From e69c456ae21d8b76dd72a6a341bfb921475833c4 Mon Sep 17 00:00:00 2001 From: ktksgit Date: Sun, 30 May 2021 20:43:02 +0200 Subject: [PATCH] Prepare release v0.2 * cpptcl.eval is now a C++ implementation * cpptcl.interpreter.get returns non owning pointer * native to python and python to native dispatching * Create directory per example Bugfixes * import from tcl did not work on nested packages * Python GIL is acquired and released appropriately Library updates * Add winappdbg library wrappers * Update pybind11 to 2.6.2 * Update asmjit to 5bc166efdb419f8 * Update PolyHook 2 to 84d6be2a20 * Update cpptcl to 9384cf9551520 --- SConstruct | 18 +- examples/bootstrap.tcl | 47 +- examples/{gui => ex01}/__init__.py | 0 examples/ex01/ipy_kernel.py | 93 + examples/ex01/jupyter/__init__.py | 0 examples/ex01/jupyter/qt_console.py | 7 + examples/ex02/__init__.py | 0 examples/{ => ex02}/zwerg.py | 0 examples/gui/console.py | 77 - examples/hook.py | 205 + examples/install_requirements.bat | 4 +- examples/ipy_kernel.py | 41 - examples/test_python.tcl | 52 +- libs/asmjit/.travis.yml | 159 +- libs/asmjit/CMakeLists.txt | 125 +- libs/asmjit/COMMIT.md | 2 +- libs/asmjit/README.md | 1945 +---- .../src/{SConstruct => asmjit.SConscript} | 10 +- libs/asmjit/src/asmjit.natvis | 146 +- libs/asmjit/src/asmjit/asmjit.h | 21 - libs/asmjit/src/asmjit/core.h | 2014 +++++- libs/asmjit/src/asmjit/core/api-build_p.h | 2 +- libs/asmjit/src/asmjit/core/api-config.h | 351 +- libs/asmjit/src/asmjit/core/arch.cpp | 176 - libs/asmjit/src/asmjit/core/arch.h | 204 - libs/asmjit/src/asmjit/core/archcommons.h | 164 + libs/asmjit/src/asmjit/core/archtraits.cpp | 155 + libs/asmjit/src/asmjit/core/archtraits.h | 174 + libs/asmjit/src/asmjit/core/assembler.cpp | 346 +- libs/asmjit/src/asmjit/core/assembler.h | 72 +- libs/asmjit/src/asmjit/core/builder.cpp | 544 +- libs/asmjit/src/asmjit/core/builder.h | 462 +- libs/asmjit/src/asmjit/core/callconv.h | 411 -- libs/asmjit/src/asmjit/core/codebuffer.h | 126 + libs/asmjit/src/asmjit/core/codeholder.cpp | 251 +- libs/asmjit/src/asmjit/core/codeholder.h | 747 +- libs/asmjit/src/asmjit/core/codewriter.cpp | 151 + .../{codebufferwriter_p.h => codewriter_p.h} | 30 +- libs/asmjit/src/asmjit/core/compiler.cpp | 347 +- libs/asmjit/src/asmjit/core/compiler.h | 547 +- libs/asmjit/src/asmjit/core/compilerdefs.h | 170 + libs/asmjit/src/asmjit/core/constpool.cpp | 4 +- libs/asmjit/src/asmjit/core/constpool.h | 37 +- libs/asmjit/src/asmjit/core/cpuinfo.h | 32 +- libs/asmjit/src/asmjit/core/datatypes.h | 14 +- libs/asmjit/src/asmjit/core/emithelper.cpp | 351 + libs/asmjit/src/asmjit/core/emithelper_p.h | 83 + libs/asmjit/src/asmjit/core/emitter.cpp | 296 +- libs/asmjit/src/asmjit/core/emitter.h | 569 +- libs/asmjit/src/asmjit/core/emitterutils.cpp | 150 + libs/asmjit/src/asmjit/core/emitterutils_p.h | 109 + libs/asmjit/src/asmjit/core/environment.cpp | 64 + libs/asmjit/src/asmjit/core/environment.h | 612 ++ .../core/{callconv.cpp => errorhandler.cpp} | 32 +- libs/asmjit/src/asmjit/core/errorhandler.h | 267 + libs/asmjit/src/asmjit/core/features.h | 28 +- libs/asmjit/src/asmjit/core/formatter.cpp | 481 ++ libs/asmjit/src/asmjit/core/formatter.h | 256 + libs/asmjit/src/asmjit/core/func.cpp | 280 +- libs/asmjit/src/asmjit/core/func.h | 700 +- .../src/asmjit/core/funcargscontext.cpp | 315 + .../src/asmjit/core/funcargscontext_p.h | 224 + libs/asmjit/src/asmjit/core/globals.cpp | 150 +- libs/asmjit/src/asmjit/core/globals.h | 178 +- libs/asmjit/src/asmjit/core/inst.cpp | 56 +- libs/asmjit/src/asmjit/core/inst.h | 474 +- libs/asmjit/src/asmjit/core/jitallocator.cpp | 6 +- libs/asmjit/src/asmjit/core/jitallocator.h | 26 +- libs/asmjit/src/asmjit/core/jitruntime.cpp | 54 +- libs/asmjit/src/asmjit/core/jitruntime.h | 2 +- libs/asmjit/src/asmjit/core/logger.cpp | 124 + .../src/asmjit/core/{logging.h => logger.h} | 207 +- libs/asmjit/src/asmjit/core/logging.cpp | 535 -- libs/asmjit/src/asmjit/core/misc_p.h | 6 +- libs/asmjit/src/asmjit/core/operand.cpp | 17 +- libs/asmjit/src/asmjit/core/operand.h | 593 +- libs/asmjit/src/asmjit/core/osutils.h | 56 +- libs/asmjit/src/asmjit/core/osutils_p.h | 94 + libs/asmjit/src/asmjit/core/raassignment_p.h | 51 +- libs/asmjit/src/asmjit/core/rabuilders_p.h | 100 +- libs/asmjit/src/asmjit/core/radefs_p.h | 183 +- libs/asmjit/src/asmjit/core/ralocal.cpp | 135 +- libs/asmjit/src/asmjit/core/ralocal_p.h | 29 +- libs/asmjit/src/asmjit/core/rapass.cpp | 280 +- libs/asmjit/src/asmjit/core/rapass_p.h | 246 +- libs/asmjit/src/asmjit/core/rastack.cpp | 9 +- libs/asmjit/src/asmjit/core/rastack_p.h | 26 +- libs/asmjit/src/asmjit/core/string.cpp | 30 +- libs/asmjit/src/asmjit/core/string.h | 186 +- libs/asmjit/src/asmjit/core/support.cpp | 10 +- libs/asmjit/src/asmjit/core/support.h | 447 +- libs/asmjit/src/asmjit/core/target.cpp | 3 +- libs/asmjit/src/asmjit/core/target.h | 121 +- libs/asmjit/src/asmjit/core/type.cpp | 54 +- libs/asmjit/src/asmjit/core/type.h | 215 +- libs/asmjit/src/asmjit/core/virtmem.cpp | 12 +- libs/asmjit/src/asmjit/core/virtmem.h | 2 +- libs/asmjit/src/asmjit/core/zone.cpp | 6 +- libs/asmjit/src/asmjit/core/zone.h | 7 + libs/asmjit/src/asmjit/core/zonehash.h | 3 +- libs/asmjit/src/asmjit/core/zonelist.h | 50 +- libs/asmjit/src/asmjit/core/zonestack.h | 2 +- libs/asmjit/src/asmjit/core/zonestring.h | 24 +- libs/asmjit/src/asmjit/core/zonetree.h | 2 +- libs/asmjit/src/asmjit/core/zonevector.cpp | 2 + libs/asmjit/src/asmjit/core/zonevector.h | 87 +- libs/asmjit/src/asmjit/x86.h | 79 +- libs/asmjit/src/asmjit/x86/x86archtraits_p.h | 150 + libs/asmjit/src/asmjit/x86/x86assembler.cpp | 642 +- libs/asmjit/src/asmjit/x86/x86assembler.h | 651 +- libs/asmjit/src/asmjit/x86/x86builder.cpp | 13 +- libs/asmjit/src/asmjit/x86/x86builder.h | 310 +- libs/asmjit/src/asmjit/x86/x86callconv.cpp | 163 - libs/asmjit/src/asmjit/x86/x86compiler.cpp | 11 +- libs/asmjit/src/asmjit/x86/x86compiler.h | 465 +- libs/asmjit/src/asmjit/x86/x86emithelper.cpp | 603 ++ libs/asmjit/src/asmjit/x86/x86emithelper_p.h | 78 + libs/asmjit/src/asmjit/x86/x86emitter.h | 5171 +++++--------- libs/asmjit/src/asmjit/x86/x86features.cpp | 158 +- libs/asmjit/src/asmjit/x86/x86features.h | 28 +- .../x86/{x86logging.cpp => x86formatter.cpp} | 447 +- .../x86/{x86logging_p.h => x86formatter_p.h} | 60 +- libs/asmjit/src/asmjit/x86/x86func.cpp | 531 ++ .../x86/{x86callconv_p.h => x86func_p.h} | 23 +- libs/asmjit/src/asmjit/x86/x86globals.h | 236 +- libs/asmjit/src/asmjit/x86/x86instapi.cpp | 462 +- libs/asmjit/src/asmjit/x86/x86instapi_p.h | 10 +- libs/asmjit/src/asmjit/x86/x86instdb.cpp | 6245 +++++++++-------- libs/asmjit/src/asmjit/x86/x86instdb.h | 53 +- libs/asmjit/src/asmjit/x86/x86instdb_p.h | 19 +- libs/asmjit/src/asmjit/x86/x86internal.cpp | 1633 ----- libs/asmjit/src/asmjit/x86/x86internal_p.h | 87 - libs/asmjit/src/asmjit/x86/x86opcode_p.h | 62 +- libs/asmjit/src/asmjit/x86/x86operand.cpp | 25 +- libs/asmjit/src/asmjit/x86/x86operand.h | 1044 +-- libs/asmjit/src/asmjit/x86/x86rapass.cpp | 476 +- libs/asmjit/src/asmjit/x86/x86rapass_p.h | 34 +- libs/asmjit/test/asmjit_bench_x86.cpp | 41 +- libs/asmjit/test/asmjit_test_compiler.cpp | 247 + libs/asmjit/test/asmjit_test_compiler.h | 103 + libs/asmjit/test/asmjit_test_compiler_x86.cpp | 4238 +++++++++++ libs/asmjit/test/asmjit_test_misc.h | 6 +- libs/asmjit/test/asmjit_test_opcode.cpp | 40 +- libs/asmjit/test/asmjit_test_opcode.h | 5 - libs/asmjit/test/asmjit_test_unit.cpp | 166 +- libs/asmjit/test/asmjit_test_x86_asm.cpp | 23 +- libs/asmjit/test/asmjit_test_x86_cc.cpp | 666 +- libs/asmjit/test/asmjit_test_x86_instinfo.cpp | 195 + libs/asmjit/test/asmjit_test_x86_sections.cpp | 31 +- libs/asmjit/test/cmdline.h | 83 + libs/asmjit/tools/ci-run.sh | 39 + libs/asmjit/tools/configure-makefiles.sh | 2 +- libs/asmjit/tools/configure-ninja.sh | 2 +- libs/asmjit/tools/configure-sanitizers.sh | 12 +- libs/asmjit/tools/enumgen.js | 417 ++ libs/asmjit/tools/enumgen.sh | 3 + libs/asmjit/tools/tablegen-x86.js | 108 +- libs/asmjit/tools/tablegen.sh | 6 +- libs/cpptcl/SCons_cpptcl.py | 23 - libs/cpptcl/cpptcl.SConscript | 29 + libs/cpptcl/cpptcl/cpptcl.cpp | 5 + libs/cpptcl/cpptcl/cpptcl.h | 137 +- libs/cpptcl/cpptcl/details/dispatchers.h | 42 +- libs/cpptcl/cpptcl/function_traits.h | 69 +- libs/libs.SConscript | 13 +- libs/polyhook2.0/.gitignore | 6 + libs/polyhook2.0/CMakeLists.txt | 76 +- libs/polyhook2.0/CMakeSettings.json | 2 +- libs/polyhook2.0/COMMIT.md | 2 +- libs/polyhook2.0/MainTests.cpp | 5 + libs/polyhook2.0/Polyhook_2-config.cmake.in | 3 +- libs/polyhook2.0/README.md | 18 +- .../UnitTests/TestDetourNoTDx64.cpp | 59 +- .../UnitTests/TestDetourNoTDx86.cpp | 63 +- libs/polyhook2.0/UnitTests/TestDetourx64.cpp | 75 +- libs/polyhook2.0/UnitTests/TestDetourx86.cpp | 55 +- .../UnitTests/TestDisassembler.cpp | 294 +- libs/polyhook2.0/UnitTests/TestEatHook.cpp | 24 +- libs/polyhook2.0/UnitTests/TestIatHook.cpp | 12 +- .../UnitTests/TestMemProtector.cpp | 17 +- .../UnitTests/TestVFuncSwapHook.cpp | 27 +- .../UnitTests/TestVTableSwapHook.cpp | 26 +- .../UnitTests/TestVTableSwapHook2.cpp | 79 + libs/polyhook2.0/_config.yml | 1 + libs/polyhook2.0/polyhook.SConscript | 5 +- libs/polyhook2.0/polyhook2/ADisassembler.hpp | 25 +- .../polyhook2/CapstoneDisassembler.hpp | 2 +- libs/polyhook2.0/polyhook2/Detour/ADetour.hpp | 39 +- .../polyhook2/Detour/ILCallback.hpp | 14 +- .../polyhook2/Detour/PyCallback.hpp | 36 +- .../polyhook2/Detour/x64Detour.hpp | 8 +- libs/polyhook2.0/polyhook2/Enums.hpp | 1 + libs/polyhook2.0/polyhook2/ErrorLog.hpp | 71 +- .../polyhook2.0/polyhook2/EventDispatcher.hpp | 27 + .../polyhook2/Exceptions/AVehHook.hpp | 59 +- .../polyhook2/Exceptions/BreakPointHook.hpp | 9 +- .../polyhook2/Exceptions/HWBreakPointHook.hpp | 9 +- libs/polyhook2.0/polyhook2/IHook.hpp | 88 +- libs/polyhook2.0/polyhook2/Instruction.hpp | 70 +- libs/polyhook2.0/polyhook2/MemAccessor.hpp | 39 + libs/polyhook2.0/polyhook2/MemProtector.hpp | 18 +- libs/polyhook2.0/polyhook2/Misc.hpp | 34 +- libs/polyhook2.0/polyhook2/PE/EatHook.hpp | 10 +- libs/polyhook2.0/polyhook2/PE/IatHook.hpp | 9 +- libs/polyhook2.0/polyhook2/PageAllocator.hpp | 13 + .../polyhook2/Tests/StackCanary.hpp | 12 + .../polyhook2/Tests/TestEffectTracker.hpp | 2 +- libs/polyhook2.0/polyhook2/UID.hpp | 22 +- .../polyhook2/Virtuals/VFuncSwapHook.hpp | 7 +- .../polyhook2/Virtuals/VTableSwapHook.hpp | 41 +- .../polyhook2/ZydisDisassembler.hpp | 2 +- libs/polyhook2.0/sources/ADetour.cpp | 90 +- libs/polyhook2.0/sources/AVehHook.cpp | 47 +- libs/polyhook2.0/sources/BreakPointHook.cpp | 27 +- .../sources/CapstoneDisassembler.cpp | 68 +- libs/polyhook2.0/sources/EatHook.cpp | 28 +- libs/polyhook2.0/sources/ErrorLog.cpp | 59 + libs/polyhook2.0/sources/HWBreakPointHook.cpp | 34 +- libs/polyhook2.0/sources/ILCallback.cpp | 64 +- libs/polyhook2.0/sources/IatHook.cpp | 13 +- libs/polyhook2.0/sources/MemAccessor.cpp | 26 + libs/polyhook2.0/sources/Misc.cpp | 75 + libs/polyhook2.0/sources/PyCallback.cpp | 604 +- libs/polyhook2.0/sources/StackCanary.cpp | 20 + .../polyhook2.0/sources/TestEffectTracker.cpp | 2 +- libs/polyhook2.0/sources/UID.cpp | 11 + libs/polyhook2.0/sources/VFuncSwapHook.cpp | 18 +- libs/polyhook2.0/sources/VTableSwapHook.cpp | 42 +- .../polyhook2.0/sources/ZydisDisassembler.cpp | 50 +- libs/polyhook2.0/sources/x64Detour.cpp | 255 +- libs/polyhook2.0/sources/x86Detour.cpp | 43 +- libs/pybind11/LICENSE | 2 +- libs/pybind11/attr.h | 80 +- libs/pybind11/buffer_info.h | 52 +- libs/pybind11/cast.h | 240 +- libs/pybind11/chrono.h | 35 +- libs/pybind11/complex.h | 8 +- libs/pybind11/detail/class.h | 141 +- libs/pybind11/detail/common.h | 149 +- libs/pybind11/detail/descr.h | 8 +- libs/pybind11/detail/init.h | 13 +- libs/pybind11/detail/internals.h | 98 +- libs/pybind11/detail/typeid.h | 8 +- libs/pybind11/eigen.h | 22 +- libs/pybind11/embed.h | 51 +- libs/pybind11/eval.h | 41 +- libs/pybind11/functional.h | 13 +- libs/pybind11/iostream.h | 45 +- libs/pybind11/numpy.h | 211 +- libs/pybind11/operators.h | 13 +- libs/pybind11/options.h | 4 +- libs/pybind11/pybind11.SConscript | 7 + libs/pybind11/pybind11.h | 684 +- libs/pybind11/pytypes.h | 308 +- libs/pybind11/stl.h | 18 +- libs/pybind11/stl_bind.h | 50 +- libs/tcl2python/module_cpptcl.cpp | 58 +- libs/tcl2python/module_native.cpp | 492 +- libs/tcl2python/module_tcl.cpp | 18 +- libs/tcl2python/tcl2python.SConscript | 31 +- libs/tcl2python/tcl_globals.cpp | 23 + libs/tcl2python/tcl_globals.h | 6 +- libs/tcl2python/tclandpython.cpp | 146 +- libs/tcl8.3.2/SConscript_dll.py | 11 +- libs/tcl8.3.2/SConscript_main.py | 16 +- libs/tcl8.3.2/SConscript_staticlib.py | 34 - scripts/abi.py | 80 + scripts/process.py | 18 + {examples => scripts}/pytcl.py | 40 +- scripts/win32/LICENSE | 26 + scripts/win32/__init__.py | 219 + scripts/win32/advapi32.py | 3246 +++++++++ scripts/win32/context_amd64.py | 762 ++ scripts/win32/context_i386.py | 447 ++ scripts/win32/dbghelp.py | 1271 ++++ scripts/win32/defines.py | 741 ++ scripts/win32/gdi32.py | 362 + scripts/win32/kernel32.py | 4741 +++++++++++++ scripts/win32/ntdll.py | 537 ++ scripts/win32/peb_teb.py | 3433 +++++++++ scripts/win32/psapi.py | 385 + scripts/win32/shell32.py | 406 ++ scripts/win32/shlwapi.py | 754 ++ scripts/win32/user32.py | 1756 +++++ scripts/win32/version.py | 1257 ++++ scripts/win32/wtsapi32.py | 335 + 286 files changed, 54116 insertions(+), 19848 deletions(-) rename examples/{gui => ex01}/__init__.py (100%) create mode 100644 examples/ex01/ipy_kernel.py create mode 100644 examples/ex01/jupyter/__init__.py create mode 100644 examples/ex01/jupyter/qt_console.py create mode 100644 examples/ex02/__init__.py rename examples/{ => ex02}/zwerg.py (100%) delete mode 100644 examples/gui/console.py create mode 100644 examples/hook.py delete mode 100644 examples/ipy_kernel.py rename libs/asmjit/src/{SConstruct => asmjit.SConscript} (77%) delete mode 100644 libs/asmjit/src/asmjit/core/arch.cpp delete mode 100644 libs/asmjit/src/asmjit/core/arch.h create mode 100644 libs/asmjit/src/asmjit/core/archcommons.h create mode 100644 libs/asmjit/src/asmjit/core/archtraits.cpp create mode 100644 libs/asmjit/src/asmjit/core/archtraits.h delete mode 100644 libs/asmjit/src/asmjit/core/callconv.h create mode 100644 libs/asmjit/src/asmjit/core/codebuffer.h create mode 100644 libs/asmjit/src/asmjit/core/codewriter.cpp rename libs/asmjit/src/asmjit/core/{codebufferwriter_p.h => codewriter_p.h} (84%) create mode 100644 libs/asmjit/src/asmjit/core/compilerdefs.h create mode 100644 libs/asmjit/src/asmjit/core/emithelper.cpp create mode 100644 libs/asmjit/src/asmjit/core/emithelper_p.h create mode 100644 libs/asmjit/src/asmjit/core/emitterutils.cpp create mode 100644 libs/asmjit/src/asmjit/core/emitterutils_p.h create mode 100644 libs/asmjit/src/asmjit/core/environment.cpp create mode 100644 libs/asmjit/src/asmjit/core/environment.h rename libs/asmjit/src/asmjit/core/{callconv.cpp => errorhandler.cpp} (68%) create mode 100644 libs/asmjit/src/asmjit/core/errorhandler.h create mode 100644 libs/asmjit/src/asmjit/core/formatter.cpp create mode 100644 libs/asmjit/src/asmjit/core/formatter.h create mode 100644 libs/asmjit/src/asmjit/core/funcargscontext.cpp create mode 100644 libs/asmjit/src/asmjit/core/funcargscontext_p.h create mode 100644 libs/asmjit/src/asmjit/core/logger.cpp rename libs/asmjit/src/asmjit/core/{logging.h => logger.h} (54%) delete mode 100644 libs/asmjit/src/asmjit/core/logging.cpp create mode 100644 libs/asmjit/src/asmjit/core/osutils_p.h create mode 100644 libs/asmjit/src/asmjit/x86/x86archtraits_p.h delete mode 100644 libs/asmjit/src/asmjit/x86/x86callconv.cpp create mode 100644 libs/asmjit/src/asmjit/x86/x86emithelper.cpp create mode 100644 libs/asmjit/src/asmjit/x86/x86emithelper_p.h rename libs/asmjit/src/asmjit/x86/{x86logging.cpp => x86formatter.cpp} (67%) rename libs/asmjit/src/asmjit/x86/{x86logging_p.h => x86formatter_p.h} (64%) create mode 100644 libs/asmjit/src/asmjit/x86/x86func.cpp rename libs/asmjit/src/asmjit/x86/{x86callconv_p.h => x86func_p.h} (73%) delete mode 100644 libs/asmjit/src/asmjit/x86/x86internal.cpp delete mode 100644 libs/asmjit/src/asmjit/x86/x86internal_p.h create mode 100644 libs/asmjit/test/asmjit_test_compiler.cpp create mode 100644 libs/asmjit/test/asmjit_test_compiler.h create mode 100644 libs/asmjit/test/asmjit_test_compiler_x86.cpp create mode 100644 libs/asmjit/test/asmjit_test_x86_instinfo.cpp create mode 100644 libs/asmjit/test/cmdline.h create mode 100644 libs/asmjit/tools/ci-run.sh create mode 100644 libs/asmjit/tools/enumgen.js create mode 100644 libs/asmjit/tools/enumgen.sh delete mode 100644 libs/cpptcl/SCons_cpptcl.py create mode 100644 libs/cpptcl/cpptcl.SConscript create mode 100644 libs/polyhook2.0/UnitTests/TestVTableSwapHook2.cpp create mode 100644 libs/polyhook2.0/_config.yml create mode 100644 libs/polyhook2.0/polyhook2/EventDispatcher.hpp create mode 100644 libs/polyhook2.0/polyhook2/MemAccessor.hpp create mode 100644 libs/polyhook2.0/polyhook2/Tests/StackCanary.hpp create mode 100644 libs/polyhook2.0/sources/ErrorLog.cpp create mode 100644 libs/polyhook2.0/sources/MemAccessor.cpp create mode 100644 libs/polyhook2.0/sources/Misc.cpp create mode 100644 libs/polyhook2.0/sources/StackCanary.cpp create mode 100644 libs/polyhook2.0/sources/UID.cpp create mode 100644 libs/pybind11/pybind11.SConscript delete mode 100644 libs/tcl8.3.2/SConscript_staticlib.py create mode 100644 scripts/abi.py create mode 100644 scripts/process.py rename {examples => scripts}/pytcl.py (74%) create mode 100644 scripts/win32/LICENSE create mode 100644 scripts/win32/__init__.py create mode 100644 scripts/win32/advapi32.py create mode 100644 scripts/win32/context_amd64.py create mode 100644 scripts/win32/context_i386.py create mode 100644 scripts/win32/dbghelp.py create mode 100644 scripts/win32/defines.py create mode 100644 scripts/win32/gdi32.py create mode 100644 scripts/win32/kernel32.py create mode 100644 scripts/win32/ntdll.py create mode 100644 scripts/win32/peb_teb.py create mode 100644 scripts/win32/psapi.py create mode 100644 scripts/win32/shell32.py create mode 100644 scripts/win32/shlwapi.py create mode 100644 scripts/win32/user32.py create mode 100644 scripts/win32/version.py create mode 100644 scripts/win32/wtsapi32.py diff --git a/SConstruct b/SConstruct index 5258db4..cc2d2d1 100644 --- a/SConstruct +++ b/SConstruct @@ -1,8 +1,8 @@ import os -AddOption('--mingw', +AddOption('--nomingw', dest='mingw', - action='store_true', + action='store_false', default=True, help='mingw build (defaults to gcc)') @@ -51,11 +51,13 @@ if is_mingw == True: variables=vars, tools = ['mingw'] ) + env["is_mingw"] = True else: env = Environment( variables=vars, TARGET_ARCH='x86' ) + env["is_mingw"] = False print ('Visual Studio build') env.AppendUnique(CXXFLAGS =['/EHsc']) @@ -98,6 +100,8 @@ if is_mingw: env.AppendUnique(CXXFLAGS=['-g']) env.AppendUnique(CFLAGS=['-g']) else: + env.AppendUnique(CXXFLAGS=['/std:c++17'] + ) if nodebug or releasedebug: env.AppendUnique(CPPFLAGS =['/W3', '/MD', '/Od'],#, '/Gs'], LINKFLAGS=['/RELEASE'] @@ -131,8 +135,6 @@ tclStubLib_obj)= env.SConscript('libs/tcl8.3.2/SConscript_main.py', exports='nodebug is_mingw windows' ) -tcl_dll_full_path = os.path.join(tcl_bin_install_dir, tcl_lib_name + '.dll') - env.AppendUnique(CPPPATH = [capstone_include_path, tcl_includes], # CPPDEFINES = {'__thiscall' : ''}, @@ -149,23 +151,21 @@ Export('pyenv') (tclpython_dll, static_cpptcl, static_cpptcl_no_stubs, -py3_obj, -cpptcl_includes, -pybind11_includes) = pyenv.SConscript(f'{variant_dir}/libs/libs.SConscript', +py3_static) = pyenv.SConscript(f'{variant_dir}/libs/libs.SConscript', exports='tclStubLib_obj', ) all_targets = [ + tclpython_dll, static_cpptcl, static_cpptcl_no_stubs, - py3_obj + py3_static ] env.CompileCommands('object', all_targets) if is_mingw and env["windows"]: install_dir = Dir('#/bin').abspath - env.Install(install_dir, tcl_dll_full_path) path = os.environ['PATH'] path = [x for x in path.split(';') if 'mingw32' in x][0] diff --git a/examples/bootstrap.tcl b/examples/bootstrap.tcl index d14d2c5..6fe3de0 100644 --- a/examples/bootstrap.tcl +++ b/examples/bootstrap.tcl @@ -1,3 +1,20 @@ +# This file will bootstrap the current directory +# It adds python path to the environment variable PATH +# It adds this SDKs bin path, namely ../bin, to the environment variable PATH +# +# A python interpreter is created +# It adds this path to Python's sys.path +# It adds this SDKs scripts path, namely ../scripts, to Python's sys.path +# +# How to use +# ---------- +# Add line +# source /examples/bootstrap.tcl +# to the very beginning of /data/systeminit.tcl +# where +# is to be replaced with the path where you unpacked the SDK +# is to be replaced with the path where you're Diggles installation (Diggles.exe) is located + set info_script [ info script ] if { $info_script == "" } { error "Use \"source bootstrap.tcl\"" @@ -11,32 +28,34 @@ if { [catch {puts "Writing stdout to console"} fid] } { set bootstrap_path [ file dirname $info_script ] set package_bin_path $bootstrap_path/../bin -set python_path $package_bin_path/python36 -set jupyter_qtconsole $python_path/scripts/jupyter-qtconsole.exe +set package_scripts_path $bootstrap_path/../scripts + + +if { [ info exists python_path] } { + # Switch to user supplied python path +} else { + # Use self-contained python 3.6 32-bit installation + set python_path $package_bin_path/python36 +} + puts $stdout "bootstrap_path $bootstrap_path" puts $stdout "package_bin_path $package_bin_path" +puts $stdout "package_scripts_path $package_scripts_path" puts $stdout "python_path $python_path" -set env(PATH) "$package_bin_path;$python_path;$env(PATH);" +set env(PATH) "$package_bin_path;$python_path;" set env(PYTHONHOME) $python_path +set env(PYTHONPATH) "$python_path/lib;$python_path/dlls;" load tclandpython.dll set py [PythonInterpreter] $py exec " import sys +import site # This makes import from the example directory possible sys.path.append('$bootstrap_path') +sys.path.append('$package_scripts_path') +site.addsitedir('$python_path/lib/site-packages') " - -# $py import pytcl -# $py import cpptcl - -# Use exclusively first: -# $py import gui.console -# $py gui.console.main() - -# or second: -# $py import ipy_kernel -# $py eval ipy_kernel.main('$jupyter_qtconsole') diff --git a/examples/gui/__init__.py b/examples/ex01/__init__.py similarity index 100% rename from examples/gui/__init__.py rename to examples/ex01/__init__.py diff --git a/examples/ex01/ipy_kernel.py b/examples/ex01/ipy_kernel.py new file mode 100644 index 0000000..cf40377 --- /dev/null +++ b/examples/ex01/ipy_kernel.py @@ -0,0 +1,93 @@ +#print("Waiting for Visual Python debugger to attach") +# import ptvsd; ptvsd.enable_attach(); ptvsd.wait_for_attach() + +# import pydevd +# pydevd.settrace() + +import os +import sys +import subprocess +import tempfile +import threading +import time + +from pathlib import Path + +import IPython + +connection_file = Path(tempfile.gettempdir()) / 'connection-{:d}.json'.format(os.getpid()) +jupyter_qtconsole_exe = Path(__file__).parent / 'jupyter' / 'qt_console.py' + + +def clean_up_environment_path(): + os_path = [] + for p in os.environ['PATH'].split(';'): + if Path(p).exists() and len(p): + os_path.append(str(p)) + else: + print("Removed from PATH:", str(p)) + os.environ['PATH'] = ';'.join(os_path) + ';' + + +def runner(python_exe, timeout=10): + print(f"Waiting {timeout} sec for {connection_file} to be created") + count_down = timeout + while count_down >= 1 and not connection_file.exists(): + time.sleep(1) + count_down -= 1 + + if not connection_file.exists(): + print(f"{connection_file} does not exist after {timeout} seconds") + return + + count_down = timeout + while count_down >= 1 and len(connection_file.read_text()) < 2: + time.sleep(1) + count_down -= 1 + + if len(connection_file.read_text()) < 2: + print(f"{connection_file} is empty after {timeout} seconds") + return + + # print(os.system(f"cmd.exe /c {jupyter_qtconsole_exe} --existing {connection_file}")) + command = [python_exe, str(jupyter_qtconsole_exe), "--existing", str(connection_file)] + print('Starting:', ' '.join(command)) + proc = subprocess.Popen( + args=command, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + universal_newlines=True) + + # we need to read. read is a blocking call + print("Process stderr\n", proc.stderr.read()) + print("Process stdout\n", proc.stdout.read()) + + print(f"Closed {jupyter_qtconsole_exe}") + + +def main(python_exe, throw_on_error = False): + clean_up_environment_path() + + t = threading.Thread( + target=runner, + args=[python_exe]) + t.start() + + print("Running IPython embeded kernel") + try: + IPython.embed_kernel( + local_ns=sys._getframe(1).f_locals, + connection_file=str(connection_file), + ) + except Exception as exp: + print("Unable to embed IPython. Exception occured:") + print(exp) + if throw_on_error: + raise exp + finally: + if sys.stdout: + sys.stdout.flush() + t.join() + + +if __name__ == "__main__": + main(sys.executable) diff --git a/examples/ex01/jupyter/__init__.py b/examples/ex01/jupyter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/ex01/jupyter/qt_console.py b/examples/ex01/jupyter/qt_console.py new file mode 100644 index 0000000..e4f6517 --- /dev/null +++ b/examples/ex01/jupyter/qt_console.py @@ -0,0 +1,7 @@ +import sys +from qtconsole.qtconsoleapp import main + +if __name__ == '__main__': + import re + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/examples/ex02/__init__.py b/examples/ex02/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/zwerg.py b/examples/ex02/zwerg.py similarity index 100% rename from examples/zwerg.py rename to examples/ex02/zwerg.py diff --git a/examples/gui/console.py b/examples/gui/console.py deleted file mode 100644 index 3c0737d..0000000 --- a/examples/gui/console.py +++ /dev/null @@ -1,77 +0,0 @@ -import tkinter as tk -import sys -import threading - - -class StdPrintRedirector: - def __init__(self, widget, tag="stdout"): - self.widget = widget - self.tag = tag - - def write(self, string): - self.widget.configure(state="normal") - self.widget.insert("end", string, (self.tag,)) - if self.widget.count('0.0', 'end', 'lines')[0] > 20: - self.widget.delete('0.0', f'2.0') - self.widget.configure(state="disabled") - - -class Console(tk.Tk): - def __init__(self): - tk.Tk.__init__(self) - toolbar = tk.Frame(self) - toolbar.pack(side="top", fill="x") - b3 = tk.Button(self, text="disable print", command=self.disable_print) - b3.pack(in_=toolbar, side="left") - self.text = tk.Text(self, wrap="word") - self.text.pack(side="top", fill="both", expand=True) - self.text.tag_configure("stderr", foreground="#b22222") - - self._stdout = sys.stdout - self._stderr = sys.stderr - - sys.stdout = StdPrintRedirector(self.text, "stdout") - sys.stderr = StdPrintRedirector(self.text, "stderr") - - def disable_print(self): - print('Disabling printing of', __file__) - sys.stdout = self._stdout - sys.stderr = self._stderr - - -class Runner: - app = None - t = None - - @classmethod - def close(cls): - if cls.app: - cls.app.disable_print() - try: - cls.app.quit() - cls.app.destroy() - except RuntimeError as excp: - print(excp) - cls.app = None - - if cls.t: - cls.t.join() - cls.t = None - - @classmethod - def open(cls): - cls.t = threading.Thread( - target=Runner(), - ) - cls.t.start() - - while not cls.app: - pass - - def __call__(self): - Runner.app = Console() - Runner.app.mainloop() - - -def main(): - Runner.open() diff --git a/examples/hook.py b/examples/hook.py new file mode 100644 index 0000000..b7b16ef --- /dev/null +++ b/examples/hook.py @@ -0,0 +1,205 @@ +from ctypes import POINTER, c_int +import ctypes + +import _native # tclandpython.dll +from _native.asmjit.Type import Id # tclandpython.dll + +import cpptcl # tclandpython.dll +import abi + +# Uncomment the two line bellow to debug this code +# print("Waiting for Visual Python debugger to attach") +# import ptvsd; ptvsd.enable_attach(); ptvsd.wait_for_attach() + +ABI = abi.detect_abi() + +TCL_OK = 0 +TCL_ERROR = 1 + + +def _create_native_dispatch(my_abi: abi.Abi): + return _native.NativeFunction(my_abi[0], my_abi[1], my_abi[2], my_abi[3]) + +class TclCache_get: + def __init__(self): + func = _create_native_dispatch(ABI[type(self).__name__]) + arg_count = abi.arg_count(ABI[type(self).__name__]) + + if arg_count == 2: + self._func = lambda _, b, c, _2: func(b, c) + else: + self._func = func + + def __call__ (self, tcl_cache: int, code_out, filename: str, run_preprocessor): + filename_as_bytes = ctypes.create_string_buffer(filename.encode("ascii")) + raw_Tcl_Obj = ctypes.c_int(0) + self._func(tcl_cache, ctypes.addressof(raw_Tcl_Obj), ctypes.addressof(filename_as_bytes), run_preprocessor) + code = cpptcl.object_.reinterpret_cast(raw_Tcl_Obj.value) + code_out.swap(code) + +TclCache_get = TclCache_get() +g_tclCache = ctypes.cast(ABI['g_tclCache'][0], ctypes.POINTER(ctypes.c_int)) +g_tclCache: int = g_tclCache[0] + + +def sTclObject_loadTclFile(code_out, filename: str, run_preprocessor, crc_flags): + if g_tclCache: + TclCache_get(g_tclCache, code_out, filename, run_preprocessor) + else: + sTclObject_preprocessFile(code_out, filename, run_preprocessor, crc_flags) + return code_out + + +class sTclObject_preprocessFile: + def __init__(self): + func = _create_native_dispatch(ABI[type(self).__name__]) + arg_count = abi.arg_count(ABI[type(self).__name__]) + + if arg_count == 2: + self._func = lambda a, b, _1, _2: func(a, b) + else: + self._func = func + + def __call__ (self, code_out, filename: str, run_preprocessor, crc_flags): + filename_as_bytes = ctypes.create_string_buffer(filename.encode("ascii")) + raw_Tcl_Obj = ctypes.c_int(0) + self._func(ctypes.addressof(raw_Tcl_Obj), ctypes.addressof(filename_as_bytes), run_preprocessor, crc_flags) + code = cpptcl.object_.reinterpret_cast(raw_Tcl_Obj.value) + code_out.swap(code) + +sTclObject_preprocessFile = sTclObject_preprocessFile() + +class BuiltinTclData(ctypes.Structure): + _fields_ = [ + ("interp", ctypes.c_void_p), + ("owning", ctypes.c_char), + ("long_error_desc", ctypes.c_void_p), + ("short_error_desc", ctypes.c_void_p), + ] + +class StringRef(ctypes.Structure): + _pack_ = 1 + _fields_ = [ + ("refCount", ctypes.c_int), + ("buffer_size", ctypes.c_int), + ("length", ctypes.c_int), + ] + +class ValueReference: + def __init__(self, value=None): + self._value = None + + @property + def value(self): + return self._value + + @value.setter + def value(self, value): + self._value = value + + +class CObj_tclEval: + def __init__(self): + self._func = _create_native_dispatch(ABI[type(self).__name__]) + + def __call__ (self, issf, code_in): + return self._func(ctypes.addressof(issf), cpptcl.object_.addressof(code_in)) + +CObj_tclEval = CObj_tclEval() + + +class BuiltinTclData_ctor_Tcl_Interp: + def __init__(self): + self._func = _create_native_dispatch(ABI[type(self).__name__]) + + def __call__ (self, issf, interp): + return self._func(ctypes.addressof(issf), cpptcl.interpreter.reinterpret_cast(interp)) + +BuiltinTclData_ctor_Tcl_Interp = BuiltinTclData_ctor_Tcl_Interp() + + +class findFileLocation: + def __init__(self): + self._func = _create_native_dispatch(ABI[type(self).__name__]) + + def __call__ (self, fileLocation: ValueReference, filename: str) -> str: + filename_as_bytes = ctypes.create_string_buffer(filename.encode("ascii")) + raw_string_ref = ctypes.c_int(0) + self._func(ctypes.addressof(raw_string_ref), ctypes.addressof(filename_as_bytes)) + + s = ctypes.cast(raw_string_ref.value, ctypes.POINTER(StringRef)) # TODO call destructor for StringRef this will leak memory + if s[0].length > 0: + char_str = ctypes.cast(raw_string_ref.value + 12, ctypes.c_char_p) + fileLocation.value = char_str.value.decode('ascii') + + return fileLocation.value + + +findFileLocation = findFileLocation() + + +def func(clientdata, interp, objc, objv): + i = cpptcl.interpreter.reinterpret_cast(interp) + + v = ctypes.cast(objv, POINTER(c_int)) + + ARG_COUNT = 2 + if objc < ARG_COUNT: + command = [] + for idx in range(objc): + command.append(str(cpptcl.object_.reinterpret_cast(v[idx]))) + command = ' '.join(command) + i.setResult(cpptcl.object_(f"invalid arg count: {command} ({ARG_COUNT} expected)")) + return TCL_ERROR + + issf = BuiltinTclData() + BuiltinTclData_ctor_Tcl_Interp(issf, i) # interpreter is not owned by us + + func_name = str(cpptcl.object_.reinterpret_cast(v[0])) + filename = str(cpptcl.object_.reinterpret_cast(v[1])) + original_filename = filename + + if not filename: + i.setResult(cpptcl.object_(f"call filename was empty !")) + return TCL_ERROR + + filename = filename.lower() + + if filename.startswith("scripts/"): + filename = "data/" + filename + elif filename.startswith("templates/"): + filename = "data/" + filename + + code = cpptcl.object_() + + try: + if not filename.startswith("data/templates"): + sTclObject_loadTclFile(code, filename, 1, 0) + else: + sTclObject_preprocessFile(code, filename, 1, 0) + except ValueError as exc: + try: + file_location = ValueReference('') + findFileLocation(file_location, filename) + filename = file_location.value.lower() + + if not filename.startswith("data/templates"): + sTclObject_loadTclFile(code, filename, 1, 0) + else: + sTclObject_preprocessFile(code, filename, 1, 0) + except ValueError as exc: + i.setResult(cpptcl.object_(f"file not found.\n{original_filename}")) + return TCL_OK # I don't know why OK is returned + + is_ok = CObj_tclEval(issf, code) + if not is_ok: + return TCL_ERROR + # TODO call issf destructor + return TCL_OK + +mock = _native.createNativeToPythonFunction(ABI["script_call"][0], ABI["script_call"][1], ABI["script_call"][2], ABI["script_call"][3], func, False) + +# This is the entry point to the native function via trampoline hook +mock_address = mock.getTrampolineAddress() +# This would call the native function +script_call = _native.NativeFunction(mock_address, ABI["script_call"][1], ABI["script_call"][2], ABI["script_call"][3]) diff --git a/examples/install_requirements.bat b/examples/install_requirements.bat index 8eb0849..1935e8d 100644 --- a/examples/install_requirements.bat +++ b/examples/install_requirements.bat @@ -1,6 +1,8 @@ cd ../bin/python36 python.exe -m pip install --upgrade --force-reinstall pip cd scripts -pip install jupyter +rem Workaround for https://github.com/ipython/ipython/issues/12740 +pip install pip install jedi==0.17.2 +pip install qtconsole pip install pyqt5 pip install ptvsd diff --git a/examples/ipy_kernel.py b/examples/ipy_kernel.py deleted file mode 100644 index f142c77..0000000 --- a/examples/ipy_kernel.py +++ /dev/null @@ -1,41 +0,0 @@ -# import ptvsd; ptvsd.enable_attach(); ptvsd.wait_for_attach() - -# import pydevd -# pydevd.settrace() - -import os -import sys -import tempfile -import threading -import time - -import gui.console - -import IPython - -connection_file = os.path.join( - tempfile.gettempdir(), - 'connection-{:d}.json'.format(os.getpid())) - - -def runner(jupyter_qtconsole): - while not os.path.exists(connection_file): - time.sleep(1) - print(f"Starting {connection_file}") - os.system(f"cmd /c {jupyter_qtconsole} --existing {connection_file}") - print("runner started") - - -def main(jupyter_qtconsole): - gui.console.Runner.close() - - t = threading.Thread( - target=runner, - args=[jupyter_qtconsole]) - t.start() - - IPython.embed_kernel( - local_ns=sys._getframe(1).f_locals, - connection_file=connection_file, - ) - t.join() diff --git a/examples/test_python.tcl b/examples/test_python.tcl index c1a3c5a..ee5f5e7 100644 --- a/examples/test_python.tcl +++ b/examples/test_python.tcl @@ -1,17 +1,25 @@ set bootstrap_path [ file dirname [ info script ] ] set package_bin_path $bootstrap_path/../bin -set python_path $package_bin_path/python36 -set jupyter_qtconsole $python_path/scripts/jupyter-qtconsole.exe +set package_scripts_path $bootstrap_path/../scripts +set python_path [lindex $argv 0] set env(PATH) "$package_bin_path;$python_path;$env(PATH);" +# Sometimes env(PATH) contains non-accessable paths +# set env(PATH) "$package_bin_path;$python_path;" set env(PYTHONHOME) $python_path +set env(PYTHONPATH) "$python_path/lib;$python_path/dlls" load tclandpython.dll set py [PythonInterpreter] $py import sys +$py import site $py eval sys.path.append('$bootstrap_path') $py eval sys.path.append('$package_bin_path') +$py eval sys.path.append('$package_scripts_path') +$py eval site.addsitedir('$python_path/lib/site-packages') + +$py eval {print('sys.path:\n', '\n'.join(sys.path), '\n') } $py import pytcl $py import cpptcl @@ -48,29 +56,49 @@ set result [multiply $actual 5.1] puts $result proc call_ipython {} { - global py jupyter_qtconsole - $py import ipy_kernel - $py eval ipy_kernel.main('$jupyter_qtconsole') + global py python_path + $py import ex01.ipy_kernel + $py eval ex01.ipy_kernel.main('$python_path/python.exe') } call_ipython +puts "Test cpptcl.object_.addressof" +$py exec { +import cpptcl +import ctypes +o = cpptcl.object_("text") +pp_Tcl_Obj = cpptcl.object_.addressof(o) +LPLP_Tcl_Obj = ctypes.POINTER(ctypes.c_void_p) +p = ctypes.cast(pp_Tcl_Obj, LPLP_Tcl_Obj) +o2 = cpptcl.object_.reinterpret_cast(p[0]) +print(str(o2)) +} + +puts "Test raw access with ctypes" $py exec { -import tcl83d import ctypes -t_interp = cpptcl.get_current_interpreter_raw() -LP_Tcl_Interp = ctypes.POINTER(tcl83d.Tcl_Interp) + +tcl83d = ctypes.CDLL('tcl83d') +t_interp = cpptcl.interpreter.reinterpret_cast(cpptcl.get_current_interpreter()) +LP_Tcl_Interp = ctypes.POINTER(ctypes.c_void_p) p = ctypes.cast(t_interp, LP_Tcl_Interp) -tcl83d.Tcl_Eval(p, 'set x Text') +command = ctypes.create_string_buffer(b'set x Text') +tcl83d.Tcl_Eval(p, command) +result = cpptcl.result(cpptcl.get_current_interpreter().get()) } puts $x +proc tclInit {} { + +} + interp create sub_child interp eval sub_child { load tclandpython.dll set py [PythonInterpreter] $py import sys - $py import tcl + $py import pytcl set actual [$py eval {"%d.%d.%d" % sys.version_info[0:3]}] set copy $actual @@ -83,8 +111,8 @@ def func(text: str): def multiply(value, value2: float): return value * value2 } - $py eval {tcl.Tcl.proc("name", func)} - $py eval {tcl.Tcl.proc("multiply", multiply)} + $py eval {pytcl.Tcl.proc("name", func)} + $py eval {pytcl.Tcl.proc("multiply", multiply)} name adsf set result [multiply $actual 5.1] diff --git a/libs/asmjit/.travis.yml b/libs/asmjit/.travis.yml index b64f9eb..42e79a6 100644 --- a/libs/asmjit/.travis.yml +++ b/libs/asmjit/.travis.yml @@ -12,7 +12,14 @@ dist: bionic matrix: include: - - name: "Linux Clang Default [64-bit] [DBG]" + - name: "Source Code Check" + env: BUILD_MATRIX="SOURCE_CODE_CHECK=1" + os: linux + language: node_js + node_js: + - node + + - name: "Linux Clang Default [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=clang-9 && CXX=clang++-9" os: linux addons: @@ -21,7 +28,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [clang++-9] - - name: "Linux Clang Default [64-bit] [REL]" + - name: "Linux Clang Default [X64] [REL]" env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" os: linux addons: @@ -30,7 +37,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [clang++-9] - - name: "Linux Clang Default [64-bit] [REL] [Valgrind]" + - name: "Linux Clang Default [X64] [REL] [Valgrind]" env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" USE_VALGRIND=1 os: linux addons: @@ -39,7 +46,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [clang++-9, valgrind] - - name: "Linux Clang Default [64-bit] [REL] [Sanitize=Address]" + - name: "Linux Clang Default [X64] [REL] [Sanitize=Address]" env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" EXTRA_OPTIONS="-DASMJIT_SANITIZE=address" os: linux addons: @@ -48,7 +55,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [clang++-9] - - name: "Linux Clang Default [64-bit] [REL] [Sanitize=Undefined]" + - name: "Linux Clang Default [X64] [REL] [Sanitize=Undefined]" env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" EXTRA_OPTIONS="-DASMJIT_SANITIZE=undefined" os: linux addons: @@ -57,8 +64,8 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [clang++-9] - - name: "Linux Clang Default [64-bit] [REL] [NoBuilder]" - env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" EXTRA_OPTIONS="-DASMJIT_NO_BUILDER=1" + - name: "Linux Clang Default [X64] [REL] [NoDeprecated]" + env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" EXTRA_OPTIONS="-DASMJIT_NO_DEPRECATED=1" os: linux addons: apt: @@ -66,8 +73,8 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [clang++-9] - - name: "Linux Clang Default [64-bit] [REL] [NoCompiler]" - env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" EXTRA_OPTIONS="-DASMJIT_NO_COMPILER=1" + - name: "Linux Clang Default [X64] [REL] [NoIntrinsics]" + env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" EXTRA_OPTIONS="-DASMJIT_NO_INTRINSICS=1" os: linux addons: apt: @@ -75,7 +82,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [clang++-9] - - name: "Linux Clang Default [64-bit] [REL] [NoLogging]" + - name: "Linux Clang Default [X64] [REL] [NoLogging]" env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" EXTRA_OPTIONS="-DASMJIT_NO_LOGGING=1" os: linux addons: @@ -84,77 +91,95 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [clang++-9] - - name: "Linux GCC 4.8 [32-bit] [DBG]" + - name: "Linux Clang Default [X64] [REL] [NoBuilder]" + env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" EXTRA_OPTIONS="-DASMJIT_NO_BUILDER=1" + os: linux + addons: + apt: + sources: + - sourceline: "ppa:ubuntu-toolchain-r/test" + packages: [clang++-9] + + - name: "Linux Clang Default [X64] [REL] [NoCompiler]" + env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" EXTRA_OPTIONS="-DASMJIT_NO_COMPILER=1" + os: linux + addons: + apt: + sources: + - sourceline: "ppa:ubuntu-toolchain-r/test" + packages: [clang++-9] + + - name: "Linux GCC 4.8 [X86] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-4.8 && CXX=g++-4.8" CXXFLAGS=-m32 LDFLAGS=-m32 os: linux addons: apt: packages: [g++-4.8, g++-4.8-multilib, "linux-libc-dev:i386"] - - name: "Linux GCC 4.8 [64-bit] [DBG]" + - name: "Linux GCC 4.8 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-4.8 && CXX=g++-4.8" os: linux addons: apt: packages: [g++-4.8] - - name: "Linux GCC 5 [32-bit] [DBG]" + - name: "Linux GCC 5 [X86] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-5 && CXX=g++-5" CXXFLAGS=-m32 LDFLAGS=-m32 os: linux addons: apt: packages: [g++-5, g++-5-multilib, "linux-libc-dev:i386"] - - name: "Linux GCC 5 [64-bit] [DBG]" + - name: "Linux GCC 5 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-5 && CXX=g++-5" os: linux addons: apt: packages: [g++-5] - - name: "Linux GCC 6 [32-bit] [DBG]" + - name: "Linux GCC 6 [X86] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-6 && CXX=g++-6" CXXFLAGS=-m32 LDFLAGS=-m32 os: linux addons: apt: packages: [g++-6, g++-6-multilib, "linux-libc-dev:i386"] - - name: "Linux GCC 6 [64-bit] [DBG]" + - name: "Linux GCC 6 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-6 && CXX=g++-6" os: linux addons: apt: packages: [g++-6] - - name: "Linux GCC 7 [32-bit] [DBG]" + - name: "Linux GCC 7 [X86] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-7 && CXX=g++-7" CXXFLAGS=-m32 LDFLAGS=-m32 os: linux addons: apt: packages: [g++-7, g++-7-multilib, "linux-libc-dev:i386"] - - name: "Linux GCC 7 [64-bit] [DBG]" + - name: "Linux GCC 7 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-7 && CXX=g++-7" os: linux addons: apt: packages: [g++-7] - - name: "Linux GCC 8 [32-bit] [DBG]" + - name: "Linux GCC 8 [X86] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-8 && CXX=g++-8" CXXFLAGS=-m32 LDFLAGS=-m32 os: linux addons: apt: packages: [g++-8, g++-8-multilib, "linux-libc-dev:i386"] - - name: "Linux GCC 8 [64-bit] [DBG]" + - name: "Linux GCC 8 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-8 && CXX=g++-8" os: linux addons: apt: packages: [g++-8] - - name: "Linux GCC 9 [32-bit] [DBG]" + - name: "Linux GCC 9 [X86] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-9 && CXX=g++-9" CXXFLAGS=-m32 LDFLAGS=-m32 os: linux addons: @@ -163,7 +188,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [g++-9, g++-9-multilib, "linux-libc-dev:i386"] - - name: "Linux GCC 9 [64-bit] [DBG]" + - name: "Linux GCC 9 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-9 && CXX=g++-9" os: linux addons: @@ -172,7 +197,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [g++-9] - - name: "Linux GCC 10 [32-bit] [DBG]" + - name: "Linux GCC 10 [X86] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-10 && CXX=g++-10" CXXFLAGS=-m32 LDFLAGS=-m32 os: linux addons: @@ -181,7 +206,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [g++-10, g++-10-multilib, "linux-libc-dev:i386"] - - name: "Linux GCC 10 [32-bit] [REL]" + - name: "Linux GCC 10 [X86] [REL]" env: BUILD_MATRIX="BUILD_TYPE=Release && CC=gcc-10 && CXX=g++-10" CXXFLAGS=-m32 LDFLAGS=-m32 os: linux addons: @@ -190,7 +215,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [g++-10, g++-10-multilib, "linux-libc-dev:i386"] - - name: "Linux GCC 10 [64-bit] [DBG]" + - name: "Linux GCC 10 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-10 && CXX=g++-10" os: linux addons: @@ -199,7 +224,7 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [g++-10] - - name: "Linux GCC 10 [64-bit] [REL]" + - name: "Linux GCC 10 [X64] [REL]" env: BUILD_MATRIX="BUILD_TYPE=Release && CC=gcc-10 && CXX=g++-10" os: linux addons: @@ -208,59 +233,69 @@ matrix: - sourceline: "ppa:ubuntu-toolchain-r/test" packages: [g++-10] - - name: "OSX Clang XCode 9.4 [32-bit] [DBG]" + - name: "Linux GCC Default [ARM64] [DBG]" + env: BUILD_MATRIX="BUILD_TYPE=Debug" + os: linux + arch: arm64 + + - name: "Linux GCC Default [ARM64] [REL]" + env: BUILD_MATRIX="BUILD_TYPE=Release" + os: linux + arch: arm64 + + - name: "OSX Clang XCode 9.4 [X86] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug" CXXFLAGS=-m32 LDFLAGS=-m32 os: osx osx_image: xcode9.4 - - name: "OSX Clang XCode 9.4 [32-bit] [REL]" + - name: "OSX Clang XCode 9.4 [X86] [REL]" env: BUILD_MATRIX="BUILD_TYPE=Release" CXXFLAGS=-m32 LDFLAGS=-m32 os: osx osx_image: xcode9.4 - - name: "OSX Clang XCode 9.4 [64-bit] [DBG]" + - name: "OSX Clang XCode 9.4 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug" os: osx osx_image: xcode9.4 - - name: "OSX Clang XCode 9.4 [64-bit] [REL]" + - name: "OSX Clang XCode 9.4 [X64] [REL]" env: BUILD_MATRIX="BUILD_TYPE=Release" os: osx osx_image: xcode9.4 - - name: "OSX Clang XCode 10.2 [64-bit] [DBG]" + - name: "OSX Clang XCode 10.2 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug" os: osx osx_image: xcode10.2 - - name: "OSX Clang XCode 10.2 [64-bit] [REL]" + - name: "OSX Clang XCode 10.2 [X64] [REL]" env: BUILD_MATRIX="BUILD_TYPE=Release" os: osx osx_image: xcode10.2 - - name: "OSX Clang XCode 11 [64-bit] [DBG]" + - name: "OSX Clang XCode 11 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug" os: osx osx_image: xcode11 - - name: "OSX Clang XCode 11 [64-bit] [REL]" + - name: "OSX Clang XCode 11 [X64] [REL]" env: BUILD_MATRIX="BUILD_TYPE=Release" os: osx osx_image: xcode11 - - name: "Windows VS2017 [32-bit] [DBG]" + - name: "Windows VS2017 [X86] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug" BUILD_TOOLCHAIN="Visual Studio 15 2017" os: windows - - name: "Windows VS2017 [32-bit] [REL]" + - name: "Windows VS2017 [X86] [REL]" env: BUILD_MATRIX="BUILD_TYPE=Release" BUILD_TOOLCHAIN="Visual Studio 15 2017" os: windows - - name: "Windows VS2017 [64-bit] [DBG]" + - name: "Windows VS2017 [X64] [DBG]" env: BUILD_MATRIX="BUILD_TYPE=Debug" BUILD_TOOLCHAIN="Visual Studio 15 2017 Win64" os: windows - - name: "Windows VS2017 [64-bit] [REL]" + - name: "Windows VS2017 [X64] [REL]" env: BUILD_MATRIX="BUILD_TYPE=Release" BUILD_TOOLCHAIN="Visual Studio 15 2017 Win64" os: windows @@ -268,36 +303,32 @@ before_install: - eval "$BUILD_MATRIX" before_script: - - mkdir build - - cd build - | - if [[ "$BUILD_TOOLCHAIN" =~ ^Visual\ Studio ]]; then - cmake .. -G"${BUILD_TOOLCHAIN}" -DASMJIT_TEST=1 ${EXTRA_OPTIONS} - else - cmake .. -G"${BUILD_TOOLCHAIN}" -DASMJIT_TEST=1 ${EXTRA_OPTIONS} -DCMAKE_PREFIX_PATH="${MINGW_PATH}" -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" + if [ -z $SOURCE_CODE_CHECK ]; then + mkdir build + cd build + if [[ "$BUILD_TOOLCHAIN" =~ ^Visual\ Studio ]]; then + cmake .. -G"${BUILD_TOOLCHAIN}" -DASMJIT_TEST=1 ${EXTRA_OPTIONS} + else + cmake .. -G"${BUILD_TOOLCHAIN}" -DASMJIT_TEST=1 ${EXTRA_OPTIONS} -DCMAKE_PREFIX_PATH="${MINGW_PATH}" -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" + fi + cd .. fi - - cd .. script: - - cd build - | - if [[ "$BUILD_TOOLCHAIN" =~ ^Visual\ Studio ]]; then - cmake --build . --config ${BUILD_TYPE} -- -nologo -v:minimal - cd ${BUILD_TYPE} + if [ -z $SOURCE_CODE_CHECK ]; then + ROOT_DIR=`pwd` + cd build + if [[ "$BUILD_TOOLCHAIN" =~ ^Visual\ Studio ]]; then + cmake --build . --config ${BUILD_TYPE} -- -nologo -v:minimal || exit 1 + cd ${BUILD_TYPE} + else + cmake --build . || exit 1 + fi + ${ROOT_DIR}/tools/ci-run.sh . || exit 1 else - cmake --build . - fi - - - | - if [ "$USE_VALGRIND" = "1" ]; then - RUN_CMD="valgrind --leak-check=full --show-reachable=yes --track-origins=yes" - fi - - - eval "$RUN_CMD ./asmjit_test_unit --quick" - - eval "$RUN_CMD ./asmjit_test_opcode > /dev/null" - - eval "$RUN_CMD ./asmjit_test_x86_asm" - - eval "$RUN_CMD ./asmjit_test_x86_sections" - - | - if [ -f ./asmjit_test_x86_cc ]; then - eval "$RUN_CMD ./asmjit_test_x86_cc" + cd tools + ./enumgen.sh --verify || exit 1 + cd .. fi diff --git a/libs/asmjit/CMakeLists.txt b/libs/asmjit/CMakeLists.txt index 7b6c90b..7644171 100644 --- a/libs/asmjit/CMakeLists.txt +++ b/libs/asmjit/CMakeLists.txt @@ -1,9 +1,17 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) cmake_policy(PUSH) -cmake_policy(SET CMP0063 NEW) # Honor visibility properties. + +if(POLICY CMP0063) + cmake_policy(SET CMP0063 NEW) # Honor visibility properties. +endif() + +if(POLICY CMP0092) + cmake_policy(SET CMP0092 NEW) # Don't add -W3 warning level by default. +endif() include(CheckCXXCompilerFlag) +include(GNUInstallDirs) # Don't create a project if it was already created by another CMakeLists.txt. # This allows one library to embed another library without making a collision. @@ -37,10 +45,6 @@ if (NOT DEFINED ASMJIT_STATIC) set(ASMJIT_STATIC ${ASMJIT_EMBED}) endif() -if (NOT DEFINED ASMJIT_BUILD_ARM) - set(ASMJIT_BUILD_ARM FALSE) -endif() - if (NOT DEFINED ASMJIT_BUILD_X86) set(ASMJIT_BUILD_X86 FALSE) endif() @@ -62,9 +66,8 @@ set(ASMJIT_DIR "${CMAKE_CURRENT_LIST_DIR}" CACHE PATH "Location of 'asmji set(ASMJIT_TEST ${ASMJIT_TEST} CACHE BOOL "Build 'asmjit' test applications") set(ASMJIT_EMBED ${ASMJIT_EMBED} CACHE BOOL "Embed 'asmjit' library (no targets)") set(ASMJIT_STATIC ${ASMJIT_STATIC} CACHE BOOL "Build 'asmjit' library as static") -set(ASMJIT_SANITIZE ${ASMJIT_SANITIZE} CACHE BOOL "Build with C/C++ sanitizers enabled") +set(ASMJIT_SANITIZE ${ASMJIT_SANITIZE} CACHE STRING "Build with sanitizers: 'address', 'undefined', etc...") set(ASMJIT_BUILD_X86 ${ASMJIT_BUILD_X86} CACHE BOOL "Build X86 backends (X86 and X86_64)") -set(ASMJIT_BUILD_ARM ${ASMJIT_BUILD_ARM} CACHE BOOL "Build ARM backends") # ============================================================================= # [AsmJit - Project] @@ -132,6 +135,7 @@ function(asmjit_add_target target target_type) add_library(${target} ${target_type} ${X_SOURCES}) endif() + set_target_properties(${target} PROPERTIES DEFINE_SYMBOL "") target_link_libraries(${target} PRIVATE ${X_LIBRARIES}) # target_link_options was added in cmake v3.13, don't use it for now... @@ -167,14 +171,13 @@ set(ASMJIT_PRIVATE_CFLAGS_REL "") # Private compiler flags used b set(ASMJIT_SANITIZE_CFLAGS "") # Compiler flags required by currently enabled sanitizers. set(ASMJIT_SANITIZE_LFLAGS "") # Linker flags required by currently enabled sanitizers. -# TODO: Backward compatibility. +# We will have to keep this most likely forever as some users may still be using it. set(ASMJIT_INCLUDE_DIR "${ASMJIT_INCLUDE_DIRS}") if (NOT ASMJIT_NO_CUSTOM_FLAGS) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC") list(APPEND ASMJIT_PRIVATE_CFLAGS -MP # [+] Multi-Process Compilation. - -GR- # [-] Runtime type information. -GF # [+] Eliminate duplicate strings. -Zc:inline # [+] Remove unreferenced COMDAT. -Zc:strictStrings # [+] Strict const qualification of string literals. @@ -189,7 +192,7 @@ if (NOT ASMJIT_NO_CUSTOM_FLAGS) -O2 # [+] Favor speed over size. -Oi) # [+] Generate intrinsic functions. elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "^(GNU|Clang|AppleClang)$") - list(APPEND ASMJIT_PRIVATE_CFLAGS -Wall -Wextra) + list(APPEND ASMJIT_PRIVATE_CFLAGS -Wall -Wextra -Wconversion) list(APPEND ASMJIT_PRIVATE_CFLAGS -fno-math-errno) list(APPEND ASMJIT_PRIVATE_CFLAGS_REL -O2) @@ -197,8 +200,10 @@ if (NOT ASMJIT_NO_CUSTOM_FLAGS) -fno-threadsafe-statics -fno-semantic-interposition) + # The following flags can save few bytes in the resulting binary. asmjit_detect_cflags(ASMJIT_PRIVATE_CFLAGS_REL - -fmerge-all-constants) + -fmerge-all-constants # Merge all constants even if it violates ISO C++. + -fno-enforce-eh-specs) # Don't enforce termination if noexcept function throws. endif() endif() @@ -246,8 +251,7 @@ endif() foreach(build_option ASMJIT_STATIC ASMJIT_BUILD_X86 - #ASMJIT_BUILD_ARM - ASMJIT_BUILD_A64 + ASMJIT_NO_DEPRECATED ASMJIT_NO_JIT ASMJIT_NO_LOGGING ASMJIT_NO_BUILDER @@ -281,29 +285,43 @@ set(ASMJIT_SRC_LIST asmjit/core.h asmjit/core/api-build_p.h asmjit/core/api-config.h - asmjit/core/arch.cpp - asmjit/core/arch.h + asmjit/core/archtraits.cpp + asmjit/core/archtraits.h + asmjit/core/archcommons.h asmjit/core/assembler.cpp asmjit/core/assembler.h asmjit/core/builder.cpp asmjit/core/builder.h - asmjit/core/callconv.cpp - asmjit/core/callconv.h - asmjit/core/codebufferwriter_p.h + asmjit/core/codebuffer.h asmjit/core/codeholder.cpp asmjit/core/codeholder.h + asmjit/core/codewriter.cpp + asmjit/core/codewriter_p.h asmjit/core/compiler.cpp asmjit/core/compiler.h + asmjit/core/compilerdefs.h asmjit/core/constpool.cpp asmjit/core/constpool.h asmjit/core/cpuinfo.cpp asmjit/core/cpuinfo.h asmjit/core/datatypes.h + asmjit/core/emithelper.cpp + asmjit/core/emithelper_p.h asmjit/core/emitter.cpp asmjit/core/emitter.h + asmjit/core/emitterutils.cpp + asmjit/core/emitterutils_p.h + asmjit/core/environment.cpp + asmjit/core/environment.h + asmjit/core/errorhandler.cpp + asmjit/core/errorhandler.h asmjit/core/features.h + asmjit/core/formatter.cpp + asmjit/core/formatter.h asmjit/core/func.cpp asmjit/core/func.h + asmjit/core/funcargscontext.cpp + asmjit/core/funcargscontext_p.h asmjit/core/globals.cpp asmjit/core/globals.h asmjit/core/inst.cpp @@ -312,8 +330,8 @@ set(ASMJIT_SRC_LIST asmjit/core/jitallocator.h asmjit/core/jitruntime.cpp asmjit/core/jitruntime.h - asmjit/core/logging.cpp - asmjit/core/logging.h + asmjit/core/logger.cpp + asmjit/core/logger.h asmjit/core/misc_p.h asmjit/core/operand.cpp asmjit/core/operand.h @@ -353,27 +371,28 @@ set(ASMJIT_SRC_LIST asmjit/core/zonevector.h asmjit/x86.h + asmjit/x86/x86archtraits_p.h asmjit/x86/x86assembler.cpp asmjit/x86/x86assembler.h asmjit/x86/x86builder.cpp asmjit/x86/x86builder.h - asmjit/x86/x86callconv.cpp - asmjit/x86/x86callconv_p.h asmjit/x86/x86compiler.cpp asmjit/x86/x86compiler.h + asmjit/x86/x86emithelper.cpp + asmjit/x86/x86emithelper_p.h asmjit/x86/x86emitter.h asmjit/x86/x86features.cpp asmjit/x86/x86features.h + asmjit/x86/x86formatter.cpp + asmjit/x86/x86formatter_p.h + asmjit/x86/x86func.cpp + asmjit/x86/x86func_p.h asmjit/x86/x86globals.h - asmjit/x86/x86internal.cpp - asmjit/x86/x86internal_p.h asmjit/x86/x86instdb.cpp asmjit/x86/x86instdb.h asmjit/x86/x86instdb_p.h asmjit/x86/x86instapi.cpp asmjit/x86/x86instapi_p.h - asmjit/x86/x86logging.cpp - asmjit/x86/x86logging_p.h asmjit/x86/x86operand.cpp asmjit/x86/x86operand.h asmjit/x86/x86rapass.cpp @@ -427,19 +446,33 @@ if (NOT ASMJIT_EMBED) CFLAGS ${ASMJIT_PRIVATE_CFLAGS} CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG} CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL}) - target_include_directories(asmjit BEFORE INTERFACE ${ASMJIT_INCLUDE_DIRS}) + target_compile_options(asmjit INTERFACE ${ASMJIT_CFLAGS}) + target_include_directories(asmjit BEFORE INTERFACE + $ + $) + + # Add blend2d::blend2d alias. + add_library(asmjit::asmjit ALIAS asmjit) + # TODO: [CMAKE] Deprecated alias - we use projectname::libraryname convention now. add_library(AsmJit::AsmJit ALIAS asmjit) # Add AsmJit install instructions (library and public headers). if (NOT ASMJIT_NO_INSTALL) - install(TARGETS asmjit RUNTIME DESTINATION "bin" - LIBRARY DESTINATION "lib${LIB_SUFFIX}" - ARCHIVE DESTINATION "lib${LIB_SUFFIX}") + install(TARGETS asmjit + EXPORT asmjit-config + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + install(EXPORT asmjit-config + NAMESPACE asmjit:: + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/asmjit") + foreach(_src_file ${ASMJIT_SRC_LIST}) if ("${_src_file}" MATCHES "\\.h$" AND NOT "${_src_file}" MATCHES "_p\\.h$") get_filename_component(_src_dir ${_src_file} PATH) - install(FILES "${ASMJIT_DIR}/src/${_src_file}" DESTINATION "include/${_src_dir}") + install(FILES "${ASMJIT_DIR}/src/${_src_file}" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${_src_dir}") endif() endforeach() endif() @@ -467,25 +500,43 @@ if (NOT ASMJIT_EMBED) asmjit_test_x86_sections) asmjit_add_target(${_target} TEST SOURCES test/${_target}.cpp - LIBRARIES AsmJit::AsmJit + LIBRARIES asmjit::asmjit CFLAGS ${ASMJIT_PRIVATE_CFLAGS} CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG} CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL}) endforeach() - if (NOT (ASMJIT_NO_BUILDER OR ASMJIT_NO_COMPILER)) - asmjit_add_target(asmjit_test_x86_cc TEST - SOURCES test/asmjit_test_x86_cc.cpp - LIBRARIES AsmJit::AsmJit + if (NOT ASMJIT_NO_INTROSPECTION) + asmjit_add_target(asmjit_test_x86_instinfo TEST + SOURCES test/asmjit_test_x86_instinfo.cpp + LIBRARIES asmjit::asmjit CFLAGS ${ASMJIT_PRIVATE_CFLAGS} CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG} CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL}) endif() + if (NOT (ASMJIT_NO_BUILDER OR ASMJIT_NO_COMPILER)) + # Vectorcall tests and XMM tests require at least SSE2 in 32-bit mode (in 64-bit mode it's implicit). + set(sse2_flags "") + if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC") + asmjit_detect_cflags(sse2_flags "-arch:SSE2") + else() + asmjit_detect_cflags(sse2_flags "-msse2") + endif() + asmjit_add_target(asmjit_test_compiler TEST + SOURCES test/asmjit_test_compiler.cpp + test/asmjit_test_compiler_x86.cpp + test/asmjit_test_compiler.h + LIBRARIES asmjit::asmjit + CFLAGS ${ASMJIT_PRIVATE_CFLAGS} ${sse2_flags} + CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG} + CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL}) + endif() + foreach(_target asmjit_bench_x86) asmjit_add_target(${_target} EXECUTABLE SOURCES test/${_target}.cpp - LIBRARIES AsmJit::AsmJit + LIBRARIES asmjit::asmjit CFLAGS ${ASMJIT_PRIVATE_CFLAGS} CFLAGS_DBG ${ASMJIT_PRIVATE_CFLAGS_DBG} CFLAGS_REL ${ASMJIT_PRIVATE_CFLAGS_REL}) diff --git a/libs/asmjit/COMMIT.md b/libs/asmjit/COMMIT.md index 25cbac7..3956c7d 100644 --- a/libs/asmjit/COMMIT.md +++ b/libs/asmjit/COMMIT.md @@ -1,2 +1,2 @@ Source: -https://github.com/asmjit/asmjit/commit/e78bba83da7fed4d8d3d5ed87cd23d64d9fcbb55 +https://github.com/asmjit/asmjit/commit/5bc166efdb419f88bd5b5774c62cfc4d08a0bfa4 diff --git a/libs/asmjit/README.md b/libs/asmjit/README.md index 24c726b..8662566 100644 --- a/libs/asmjit/README.md +++ b/libs/asmjit/README.md @@ -1,140 +1,33 @@ AsmJit ------ -Machine code generation for C++. +AsmJit is a lightweight library for machine code generation written in C++ language. + * [Official Home Page (asmjit.com)](https://asmjit.com) * [Official Repository (asmjit/asmjit)](https://github.com/asmjit/asmjit) - * [Official Blog (asmbits)](https://asmbits.blogspot.com/ncr) - * [Official Chat (gitter)](https://gitter.im/asmjit/asmjit) - * [Permissive ZLIB license](./LICENSE.md) + * [Public Chat Channel](https://gitter.im/asmjit/asmjit) + * [Zlib License](./LICENSE.md) +See [asmjit.com](https://asmjit.com) page for more details, examples, and documentation. -Introduction ------------- +Documentation +------------- -AsmJit is a complete JIT and AOT assembler for C++ language. It can generate native code for x86 and x64 architectures and supports the whole x86/x64 instruction set - from legacy MMX to the newest AVX512. It has a type-safe API that allows C++ compiler to do semantic checks at compile-time even before the assembled code is generated and/or executed. + * [Documentation Index](https://asmjit.com/doc/index.html) + * [Build Instructions](https://asmjit.com/doc/group__asmjit__build.html) -AsmJit, as the name implies, started as a project that provided JIT code-generation and execution. However, AsmJit evolved and it now contains features that are far beyond the scope of a simple JIT compilation. To keep the library small and lightweight the functionality not strictly related to JIT is provided by a sister project called [asmtk](https://github.com/asmjit/asmtk). +Breaking Changes +---------------- +Breaking the API is sometimes inevitable, what to do? -Minimal Example ---------------- - -```c++ -#include -#include - -using namespace asmjit; - -// Signature of the generated function. -typedef int (*Func)(void); - -int main(int argc, char* argv[]) { - JitRuntime rt; // Runtime specialized for JIT code execution. - - CodeHolder code; // Holds code and relocation information. - code.init(rt.codeInfo()); // Initialize to the same arch as JIT runtime. - - x86::Assembler a(&code); // Create and attach x86::Assembler to `code`. - a.mov(x86::eax, 1); // Move one to 'eax' register. - a.ret(); // Return from function. - // ----> x86::Assembler is no longer needed from here and can be destroyed <---- - - Func fn; - Error err = rt.add(&fn, &code); // Add the generated code to the runtime. - if (err) return 1; // Handle a possible error returned by AsmJit. - // ----> CodeHolder is no longer needed from here and can be destroyed <---- - - int result = fn(); // Execute the generated code. - printf("%d\n", result); // Print the resulting "1". - - // All classes use RAII, all resources will be released before `main()` returns, - // the generated function can be, however, released explicitly if you intend to - // reuse or keep the runtime alive, which you should in a production-ready code. - rt.release(fn); - - return 0; -} -``` - - -AsmJit Summary --------------- - - * Complete x86/x64 instruction set - MMX, SSE+, BMI+, ADX, TBM, XOP, AVX+, FMA+, and AVX512+. - * Different emitters providing various abstraction levels (Assembler, Builder, Compiler). - * Support for sections for separating code and data. - * Built-in CPU vendor and features detection. - * Advanced logging, formatting, and error handling. - * JIT memory allocator - interface similar to malloc/free for JIT code-generation and execution. - * Lightweight and easily embeddable - ~300kB compiled with all built-in features. - * Modular design - unneeded features can be disabled at compile-time to make the library smaller. - * Zero dependencies - no external libraries, no STL/RTTI - easy to embed and/or link statically. - * Doesn't use exceptions internally, but allows to attach a "throwable" error handler of your choice. - - -Advanced Features ------------------ - - * AsmJit contains a highly compressed instruction database: - * Instruction names - allows to convert instruction id to its name and vice versa. - * Instruction metadata - access (read|write) of all operand combinations of all instructions. - * Instruction signatures - allows to strictly validate if an instruction (with all its operands) is valid. - * AsmJit allows to precisely control how instructions are encoded if there are multiple variations. - * AsmJit is highly dynamic, constructing operands at runtime is a common practice. - * Multiple emitters with the same interface - emit machine code directly or to a representation that can be post-processed. - - -Important ---------- - -Breaking the official API is sometimes inevitable, what to do? - * See asmjit tests, they always compile and provide an implementation of a lot of use-cases: + * See [Breaking Changes Guide](https://asmjit.com/doc/group__asmjit__breaking__changes.html), which is now part of AsmJit documentation. + * See asmjit tests, they always compile and provide implementation of many use-cases: * [asmjit_test_x86_asm.cpp](./test/asmjit_test_x86_asm.cpp) - Tests that demonstrate the purpose of emitters. * [asmjit_test_x86_cc.cpp](./test/asmjit_test_x86_cc.cpp) - A lot of tests targeting Compiler infrastructure. * [asmjit_test_x86_sections.cpp](./test/asmjit_test_x86_sections.cpp) - Multiple sections test. * Visit our [Official Chat](https://gitter.im/asmjit/asmjit) if you need a quick help. - -TODO ----- - - * [ ] Add support for user external buffers in CodeHolder. - - -Supported Environments ----------------------- - -### C++ Compilers: - - * Requirements: - * AsmJit won't build without C++11 enabled. If you use older GCC or Clang you would have to enable at least c++11 through compiler flags. - * Tested: - * **Clang** - tested by Travis-CI - Clang 3.9+ (with C++11 enabled) is officially supported (older Clang versions having C++11 support are probably fine, but are not regularly tested). - * **GNU** - tested by Travis-CI - GCC 4.8+ (with C++11 enabled) is officially supported. - * **MINGW** - tested by Travis-CI - Use the latest version, if possible. - * **MSVC** - tested by Travis-CI - VS2017+ is officially supported, VS2015 is reported to work. - * Untested: - * **Intel** - no maintainers and no CI environment to regularly test this compiler. - * Other c++ compilers would require basic support in [core/build.h](./src/asmjit/core/build.h). - -### Operating Systems: - - * Tested: - * **Linux** - tested by Travis-CI - any distribution is generally supported. - * **OSX** - tested by Travis-CI - any version is supported. - * **Windows** - tested by Travis-CI - Windows 7+ is officially supported. - * Untested: - * **BSDs** - no maintainers, no CI environment to regularly test these OSes. - * **Haiku** - not regularly tested, but reported to work. - * Other operating systems would require some testing and support in [core/build.h](./src/asmjit/core/build.h), [core/osutils.cpp](./src/asmjit/core/osutils.cpp), and [core/virtmem.cpp](./src/asmjit/core/virtmem.cpp). - -### Backends: - - * **X86** - tested by both Travis-CI - both 32-bit and 64-bit backends are fully functional. - * **ARM** - work-in-progress (not public at the moment). - - Project Organization -------------------- @@ -145,1809 +38,31 @@ Project Organization * **arm** - ARM specific API, used only by ARM and AArch64 backends. * **x86** - X86 specific API, used only by X86 and X64 backends. * **test** - Unit and integration tests (don't embed in your project). - * **tools** - Tools used for configuring, documenting and generating data files. - - -Configuring & Feature Selection -------------------------------- - -AsmJit is designed to be easy embeddable in any project. However, it depends on some compile-time macros that can be used to build a specific version of AsmJit that includes or excludes certain features. A typical way of building AsmJit is to use [cmake](https://www.cmake.org), but it's also possible to just include AsmJit source code in your project and just build it. The easiest way to include AsmJit in your project is to just include **src** directory in your project and to define `ASMJIT_STATIC`. AsmJit can be just updated from time to time without any changes to this integration process. Do not embed AsmJit's [/test](./test) files in such case as these are used for testing. - -### Build Type: - - * `ASMJIT_BUILD_DEBUG` - Define to always turn debugging on (regardless of compile-time options detected). - * `ASMJIT_BUILD_RELEASE` - Define to always turn debugging off (regardless of compile-time options detected). - -By default none of these is defined, AsmJit detects build-type based on compile-time macros and supports most IDE and compiler settings out of box. By default AsmJit switches to release mode when `NDEBUG` is defined. - -### Build Mode: - - * `ASMJIT_STATIC` - Define to build AsmJit statically - either as a static library or as a part of another project. No symbols are exported in such case. - -By default AsmJit build is configured to be built as a shared library, this means `ASMJIT_STATIC` must be explicitly enabled if you want to compile AsmJit statically. - -### Build Backends: - - * `ASMJIT_BUILD_ARM` - Build ARM backends (not ready, work-in-progress). - * `ASMJIT_BUILD_X86` - Build X86 backends (X86 and X86_64). - * `ASMJIT_BUILD_HOST` - Build only the host backend (default). - -If none of `ASMJIT_BUILD_...` is defined AsmJit bails to `ASMJIT_BUILD_HOST`, which will detect the target architecture at compile-time. Each backend automatically supports 32-bit and 64-bit targets, so for example AsmJit with X86 support can generate both 32-bit and 64-bit code. - -### Disabling Features: - - * `ASMJIT_NO_BUILDER` - Disables both `Builder` and `Compiler` emitters (only `Assembler` will be available). Ideal for users that don't use `Builder` concept and want to have AsmJit a bit smaller. - * `ASMJIT_NO_COMPILER` - Disables `Compiler` emitter. For users that use `Builder`, but not `Compiler`. - * `ASMJIT_NO_JIT` - Disables JIT execution engine, which includes `JitUtils`, `JitAllocator`, and `JitRuntime`. - * `ASMJIT_NO_LOGGING` - Disables logging (`Logger` and all classes that inherit it) and instruction formatting. - * `ASMJIT_NO_TEXT` - Disables everything that uses text-representation and that causes certain strings to be stored in the resulting binary. For example when this flag is set all instruction and error names (and related APIs) will not be available. This flag has to be disabled together with `ASMJIT_NO_LOGGING`. This option is suitable for deployment builds or builds that don't want to reveal the use of AsmJit. - * `ASMJIT_NO_INST_API` - Disables instruction query features, strict validation, read/write information, and all additional data and APIs that can output information about instructions. - -NOTE: Please don't disable any features if you plan to build AsmJit as a shared library that will be used by multiple projects that you don't control (for example asmjit in a Linux distribution). The possibility to disable certain features exists mainly for customized builds of AsmJit. - - -Using AsmJit ------------- - -AsmJit library uses one global namespace called `asmjit` that provides the whole functionality. Architecture specific code is prefixed by the architecture name and architecture specific registers and operand builders have their own namespace. For example API targeting both X86 and X64 architectures is prefixed with `X86` and registers & operand builders are accessible through `x86` namespace. This design is very different from the initial version of AsmJit and it seems now as the most convenient one. - -### CodeHolder & Emitters - -AsmJit provides two classes that are used together for code generation: - - * `CodeHolder` - Provides functionality to hold generated code and stores all necessary information about code sections, labels, symbols, and possible relocations. - * `BaseEmitter` - Provides functionality to emit code into `CodeHolder`. `BaseEmitter` is abstract and provides just basic building blocks that are then implemented by `BaseAssembler`, `BaseBuilder`, `BaseCompiler`, and their architecture-specific implementations like `x86::Assembler`, `x86::Builder`, and `x86::Compiler`. - -Code emitters: - - * `[Base]Assembler` - Emitter designed to emit machine code directly into a `CodeBuffer` held by `CodeHolder`. - * `[Base]Builder` - Emitter designed to emit code into a representation that can be processed afterwards. It stores the whole code in a double linked list consisting of nodes (`BaseNode` and all derived classes). There are nodes that represent instructions (`InstNode`), labels (`LabelNode`), and other building blocks (`AlignNode`, `DataNode`, ...). Some nodes are used as markers (`SentinelNode` and comments (`CommentNode`). - * `[Base]Compiler` - High-level code emitter that uses virtual registers and contains high-level function building features. Compiler extends `[Base]Builder` functionality and introduces new nodes like `FuncNode`, `FuncRetNode`, and `FuncCallNode`. Compiler is the simplest way to start with AsmJit as it abstracts lots of details required to generate a function that can be called from a C/C++ language. - -### Targets and JitRuntime - -AsmJit's `Target` class is an interface that provides basic target abstraction. At the moment only one implementation called `JitRuntime` is provided, which as the name suggests provides JIT code target and execution runtime. `JitRuntime` provides all the necessary functionality to implement a simple JIT functionality with basic memory management. It only provides `add()` and `release()` functions that are used to either add code to the runtime or release it. The `JitRuntime` doesn't do any decisions on when the code should be released. Once you add new code into it you must decide when that code is no longer needed and should be released. - -### Instructions & Operands - -Instructions specify operations performed by the CPU, and operands specify the operation's input(s) and output(s). Each AsmJit's instruction has it's own unique id (`Inst::Id` for example) and platform specific code emitters always provide a type safe intrinsic (or multiple overloads) to emit such instruction. There are two ways of emitting an instruction: - - * Using `BaseEmitter::inst(operands...)` - A type-safe way provided by platform specific emitters - for example `x86::Assembler` provides `x86::Assembler::mov(x86::Gp, x86::Gp)`. - * Using `BaseEmitter::emit(instId, operands...)` - Allows to emit an instruction in a dynamic way - you just need to know instruction's id and provide its operands. - -AsmJit's operands all inherit from a base class called `Operand` and then specialize its type to: - - * **None** (not used or uninitialized operand). - * **Register** (`BaseReg`) - Describes either physical or virtual register. Physical registers have id that matches the target's machine id directly whereas virtual registers must be allocated into physical registers by a register allocator pass. Register operand provides: - * **Register Type** - Unique id that describes each possible register provided by the target architecture - for example X86 backend provides `x86::Reg::RegType`, which defines all variations of general purpose registers (GPB-LO, GPB-HI, GPW, GPD, and GPQ) and all types of other registers like K, MM, BND, XMM, YMM, and ZMM. - * **Register Group** - Groups multiple register types under a single group - for example all general-purpose registers (of all sizes) on X86 are `x86::Reg::kGroupGp`, all SIMD registers (XMM, YMM, ZMM) are `x86::Reg::kGroupVec`, etc. - * **Register Size** - Contains the size of the register in bytes. If the size depends on the mode (32-bit vs 64-bit) then generally the higher size is used (for example RIP register has size 8 by default). - * **Register ID** - Contains physical or virtual id of the register. - * Each architecture provides its own register that adds a architecture-specific API to `BaseReg`. - * **Memory Address** (`BaseMem`) - Used to reference a memory location. Memory operand provides: - * **Base Register** - A base register type and id (physical or virtual). - * **Index Register** - An index register type and id (physical or virtual). - * **Offset** - Displacement or absolute address to be referenced (32-bit if base register is used and 64-bit if base register is not used). - * **Flags** that can describe various architecture dependent information (like scale and segment-override on X86). - * Each architecture provides its own register that adds a architecture-specific API to `BaseMem`. - * **Immediate Value** (`Imm`) - Immediate values are usually part of instructions (encoded within the instruction itself) or data. - * **Label** - used to reference a location in code or data. Labels must be created by the `BaseEmitter` or by `CodeHolder`. Each label has its unique id per `CodeHolder` instance. - -AsmJit allows to construct operands dynamically, to store them, and to query a complete information about them at run-time. Operands are small (always 16 bytes per `Operand`) and should be always copied (by value) if you intend to store them (don't create operands by using `new` keyword, it's not recommended). Operands are safe to be `memcpy()`ed and `memset()`ed if you need to work with arrays of operands. - -Small example of manipulating and using operands: - -```c++ -#include - -using namespace asmjit; - -x86::Gp dstRegByValue() { return x86::ecx; } - -void usingOperandsExample(x86::Assembler& a) { - // Create some operands. - x86::Gp dst = dstRegByValue(); // Get `ecx` register returned by a function. - x86::Gp src = x86::rax; // Get `rax` register directly from the provided `x86` namespace. - x86::Gp idx = x86::gpq(10); // Construct `r10` dynamically. - x86::Mem m = x86::ptr(src, idx); // Construct [src + idx] memory address - referencing [rax + r10]. - - // Examine `m`: - m.indexType(); // Returns `x86::Reg::kTypeGpq`. - m.indexId(); // Returns 10 (`r10`). - - // Reconstruct `idx` stored in mem: - x86::Gp idx_2 = x86::Gp::fromTypeAndId(m.indexType(), m.indexId()); - idx == idx_2; // True, `idx` and idx_2` are identical. - - Operand op = m; // Possible. - op.isMem(); // True (can be casted to BaseMem or architecture-specific Mem). - - m == op; // True, `op` is just a copy of `m`. - static_cast(op).addOffset(1); // Static cast is fine and valid here. - op.as().addOffset(1); // However, using `as()` to cast to a derived type is preferred. - m == op; // False, `op` now points to [rax + r10 + 1], which is not [rax + r10]. - - // Emitting 'mov' - a.mov(dst, m); // Type-safe way. - a.mov(dst, op); // Not possible, `mov` doesn't provide `mov(x86::Gp, Operand)` overload. - - a.emit(x86::Inst::kIdMov, dst, m); // Type-unsafe, but possible. - a.emit(x86::Inst::kIdMov, dst, op); // Also possible, `emit()` is typeless and can be used with raw `Operand`s. -} -``` - -Some operands have to be created explicitly by `BaseEmitter`. For example labels must be created by `newLabel()` before they are used. - -### Assembler Example - -`x86::Assembler` is a code emitter that emits machine code into a CodeBuffer directly. It's capable of targeting both 32-bit and 64-bit instruction sets and it's possible to target both instruction sets within the same code-base. The following example shows how to generate a function that works in both 32-bit and 64-bit modes, and how to use JitRuntime, `CodeHolder`, and `x86::Assembler` together. - -The example handles 3 calling conventions manually just to show how it could be done, however, AsmJit contains utilities that can be used to create function prologs and epilogs automatically, but these concepts will be explained later. - -```c++ -#include -#include - -using namespace asmjit; - -// Signature of the generated function. -typedef int (*SumFunc)(const int* arr, size_t count); - -int main(int argc, char* argv[]) { - JitRuntime jit; // Create a runtime specialized for JIT. - CodeHolder code; // Create a CodeHolder. - - code.init(jit.codeInfo()); // Initialize it to be compatible with `jit`. - x86::Assembler a(&code); // Create and attach x86::Assembler to `code`. - - // Decide between 32-bit CDECL, WIN64, and SysV64 calling conventions: - // 32-BIT - passed all arguments by stack. - // WIN64 - passes first 4 arguments by RCX, RDX, R8, and R9. - // UNIX64 - passes first 6 arguments by RDI, RSI, RCX, RDX, R8, and R9. - x86::Gp arr, cnt; - x86::Gp sum = x86::eax; // Use EAX as 'sum' as it's a return register. - - if (ASMJIT_ARCH_BITS == 64) { - #if defined(_WIN32) - arr = x86::rcx; // First argument (array ptr). - cnt = x86::rdx; // Second argument (number of elements) - #else - arr = x86::rdi; // First argument (array ptr). - cnt = x86::rsi; // Second argument (number of elements) - #endif - } - else { - arr = x86::edx; // Use EDX to hold the array pointer. - cnt = x86::ecx; // Use ECX to hold the counter. - a.mov(arr, x86::ptr(x86::esp, 4)); // Fetch first argument from [ESP + 4]. - a.mov(cnt, x86::ptr(x86::esp, 8)); // Fetch second argument from [ESP + 8]. - } - - Label Loop = a.newLabel(); // To construct the loop, we need some labels. - Label Exit = a.newLabel(); - - a.xor_(sum, sum); // Clear 'sum' register (shorter than 'mov'). - a.test(cnt, cnt); // Border case: - a.jz(Exit); // If 'cnt' is zero jump to 'Exit' now. - - a.bind(Loop); // Start of a loop iteration. - a.add(sum, x86::dword_ptr(arr)); // Add int at [arr] to 'sum'. - a.add(arr, 4); // Increment 'arr' pointer. - a.dec(cnt); // Decrease 'cnt'. - a.jnz(Loop); // If not zero jump to 'Loop'. - - a.bind(Exit); // Exit to handle the border case. - a.ret(); // Return from function ('sum' == 'eax'). - // ----> x86::Assembler is no longer needed from here and can be destroyed <---- - - SumFunc fn; - Error err = jit.add(&fn, &code); // Add the generated code to the runtime. - - if (err) return 1; // Handle a possible error returned by AsmJit. - // ----> CodeHolder is no longer needed from here and can be destroyed <---- - - static const int array[6] = { 4, 8, 15, 16, 23, 42 }; - - int result = fn(array, 6); // Execute the generated code. - printf("%d\n", result); // Print sum of array (108). - - jit.release(fn); // Remove the function from the runtime. - return 0; -} -``` - -The example should be self-explanatory. It shows how to work with labels, how to use operands, and how to emit instructions that can use different registers based on runtime selection. It implements 32-bit CDECL, WIN64, and SysV64 caling conventions and will work on most X86 environments. - -### More About Memory Addresses - -X86 provides a complex memory addressing model that allows to encode addresses having a BASE register, INDEX register with a possible scale (left shift), and displacement (called offset in AsmJit). Memory address can also specify memory segment (segment-override in X86 terminology) and some instructions (gather / scatter) require INDEX to be a VECTOR register instead of a general-purpose register. AsmJit allows to encode and work with all forms of addresses mentioned and implemented by X86. It also allows to construct a 64-bit memory address, which is only allowed in one form of 'mov' instruction. - -```c++ -#include - -// Memory operand construction is provided by x86 namespace. -using namespace asmjit; -using namespace asmjit::x86; // Easier to access x86 regs. - -// BASE + OFFSET. -x86::Mem a = ptr(rax); // a = [rax] -x86::Mem b = ptr(rax, 15) // b = [rax + 15] - -// BASE + INDEX << SHIFT - Shift is in BITS as used by X86! -x86::Mem c = ptr(rax, rbx) // c = [rax + rbx] -x86::Mem d = ptr(rax, rbx, 2) // d = [rax + rbx << 2] -x86::Mem e = ptr(rax, rbx, 2, 15) // e = [rax + rbx << 2 + 15] - -// BASE + VM (Vector Index) (encoded as MOD+VSIB). -x86::Mem f = ptr(rax, xmm1) // f = [rax + xmm1] -x86::Mem g = ptr(rax, xmm1, 2) // g = [rax + xmm1 << 2] -x86::Mem h = ptr(rax, xmm1, 2, 15) // h = [rax + xmm1 << 2 + 15] - -// WITHOUT BASE: -uint64_t ADDR = (uint64_t)0x1234; -x86::Mem i = ptr(ADDR); // i = [0x1234] -x86::Mem j = ptr(ADDR, rbx); // j = [0x1234 + rbx] -x86::Mem k = ptr(ADDR, rbx, 2); // k = [0x1234 + rbx << 2] - -// LABEL - Will be encoded as RIP (64-bit) or absolute address (32-bit). -Label L = ...; -x86::Mem m = ptr(L); // m = [L] -x86::Mem n = ptr(L, rbx); // n = [L + rbx] -x86::Mem o = ptr(L, rbx, 2); // o = [L + rbx << 2] -x86::Mem p = ptr(L, rbx, 2, 15); // p = [L + rbx << 2 + 15] - -// RIP - 64-bit only (RIP can't use INDEX). -x86::Mem q = ptr(rip, 24); // q = [rip + 24] -``` - -Memory operands can optionally contain memory size. This is required by instructions where the memory size cannot be deduced from other operands, like `inc` and `dec`: - -```c++ -x86::Mem a = x86::dword_ptr(rax, rbx); // dword ptr [rax + rbx]. -x86::Mem b = x86::qword_ptr(rdx, rsi, 0, 1); // qword ptr [rdx + rsi << 0 + 1]. -``` - -Memory operands provide API that can be used to work with them: - -```c++ -x86::Mem mem = x86::dword_ptr(rax, 12); // dword ptr [rax + 12]. - -mem.hasBase(); // true. -mem.hasIndex(); // false. -mem.size(); // 4. -mem.offset(); // 12. - -mem.setSize(0); // Sets the size to 0 (makes it sizeless). -mem.addOffset(-1); // Adds -1 to the offset and makes it 11. -mem.setOffset(0); // Sets the offset to 0. -mem.setBase(rcx); // Changes BASE to RCX. -mem.setIndex(rax); // Changes INDEX to RAX. -mem.hasIndex(); // true. - -// ... -``` - -Making changes to memory operand is very comfortable when emitting loads and stores: - -```c++ -#include - -using namespace asmjit; - -x86::Assembler a(...); // Your initialized x86::Assembler. -x86::Mem m = x86::ptr(eax); // Construct [eax] memory operand. - -// One way of emitting bunch of loads is to use `mem.adjusted()`. It returns -// a new memory operand and keeps the source operand unchanged. -a.movaps(x86::xmm0, m); // No adjustment needed to load [eax]. -a.movaps(x86::xmm1, m.adjusted(16)); // Loads from [eax + 16]. -a.movaps(x86::xmm2, m.adjusted(32)); // Loads from [eax + 32]. -a.movaps(x86::xmm3, m.adjusted(48)); // Loads from [eax + 48]. - -// ... do something with xmm0-3 ... - -// Another way of adjusting memory is to change the operand in-place. If you -// want to keep the original operand you can simply clone it. -x86::Mem mx = m.clone(); -a.movaps(mx, x86::xmm0); mx.addOffset(16);// Stores to [eax] (and adds 16 to mx). -a.movaps(mx, x86::xmm1); mx.addOffset(16);// Stores to [eax + 16] (and adds 16 to mx). -a.movaps(mx, x86::xmm2); mx.addOffset(16);// Stores to [eax + 32] (and adds 16 to mx). -a.movaps(mx, x86::xmm3); // Stores to [eax + 48]. -``` - -You can explore the possibilities by taking a look at: - - * [core/operand.h](./src/asmjit/core/operand.h) - * [x86/x86operand.h](./src/asmjit/x86/x86operand.h). - -### More About CodeInfo - -In the first complete example the `CodeInfo` is retrieved from `JitRuntime`. It's logical as `JitRuntime` will always return a `CodeInfo` that is compatible with the runtime environment. For example if your application runs in 64-bit mode the `CodeInfo` will use `ArchInfo::kIdX64` architecture in contrast to `ArchInfo::kIdX86`, which will be used in 32-bit mode. AsmJit also allows to setup `CodeInfo` manually, and to select a different architecture when needed. So let's do something else this time, let's always generate a 32-bit code and print it's binary representation. To do that, we create our own `CodeInfo` and initialize it to `ArchInfo::kIdX86` architecture. CodeInfo will populate all basic fields just based on the architecture we provide, so it's super-easy: - -```c++ -#include -#include - -using namespace asmjit; - -int main(int argc, char* argv[]) { - using namespace asmjit::x86; // Easier access to x86/x64 registers. - - CodeHolder code; // Create a CodeHolder. - code.init(CodeInfo(ArchInfo::kIdX86));// Initialize it for a 32-bit X86 target. + * **tools** - Tools used for configuring, documenting, and generating files. - // Generate a 32-bit function that sums 4 floats and looks like: - // void func(float* dst, const float* a, const float* b) - x86::Assembler a(&code); // Create and attach x86::Assembler to `code`. - - a.mov(eax, dword_ptr(esp, 4)); // Load the destination pointer. - a.mov(ecx, dword_ptr(esp, 8)); // Load the first source pointer. - a.mov(edx, dword_ptr(esp, 12)); // Load the second source pointer. - - a.movups(xmm0, ptr(ecx)); // Load 4 floats from [ecx] to XMM0. - a.movups(xmm1, ptr(edx)); // Load 4 floats from [edx] to XMM1. - a.addps(xmm0, xmm1); // Add 4 floats in XMM1 to XMM0. - a.movups(ptr(eax), xmm0); // Store the result to [eax]. - a.ret(); // Return from function. - - // We have no Runtime this time, it's on us what we do with the code. - // CodeHolder stores code in `Section`, which provides some basic properties - // and CodeBuffer structure. We are interested in section's CodeBuffer only. - // - // NOTE: The first section is always '.text', so it's safe to just use 0 index. - // Get it by using either `code.sectionById(0)` or `code.textSection()`. - CodeBuffer& buffer = code.sectionById(0)->buffer(); - - // Print the machine-code generated or do something more interesting with it? - // 8B4424048B4C24048B5424040F28010F58010F2900C3 - for (size_t i = 0; i < buffer.length; i++) - printf("%02X", buffer.data[i]); - - return 0; -} -``` - -### Explicit Code Relocation - -CodeInfo contains much more information than just the target architecture. It can be configured to specify a base-address (or a virtual base-address in a linker terminology), which could be static (useful when you know the location of the target's machine code) or dynamic. AsmJit assumes dynamic base-address by default and relocates the code held by `CodeHolder` to a user-provided address on-demand. To be able to relocate to a user-provided address it needs to store some information about relocations, which is represented by `RelocEntry`. Relocation entries are only required if you call external functions from the generated code that cannot be encoded by using a 32-bit displacement (X64 architecture doesn't provide an encodable 64-bit displacement). - -There is also a concept called `LabelLink` - label links are lightweight structs that don't have any identifier and are stored per label in a single-linked list. Label links represent either unbound yet used labels (that are valid in cases in which label was not bound but was already referenced by an instruction) and links that cross-sections (only relevant to code that uses multiple sections). Since crossing sections is something that cannot be resolved immediately these links persist until offsets of these sections are assigned and `CodeHolder::resolveUnresolvedLinks()` is called. It's an error if you end up with code that has unresolved label links after flattening. You can verify it by calling `CodeHolder::hasUnresolvedLinks()` and `CodeHolder::unresolvedLinkCount()`. - -AsmJit can flatten code that uses multiple sections by assigning each section an incrementing offset that respects its alignment. Use `CodeHolder::flatten()` to do that. After the sections are flattened their offsets and virtual-sizes were adjusted to respect section's buffer size and alignment. You must call `CodeHolder::resolveUnresolvedLinks()` before relocating the code held by it. You can also flatten your code manually by iterating over all sections and calculating their offsets (relative to base) by your own algorithm. In that case you don't have to call `CodeHolder::flatten()`, but you must still call `CodeHolder::resolveUnresolvedLinks()`. - -Next example shows how to use a built-in virtual memory allocator `JitAllocator` instead of using `JitRuntime` (just in case you want to use your own memory management) and how to relocate the generated code into your own memory block - you can use your own virtual memory allocator if you prefer that, but that's OS specific and it's already provided by AsmJit, so we will use what AsmJit offers instead of going deep into OS specific APIs. - -The following code is similar to the previous one, but implements a function working in both 32-bit and 64-bit environments: - -```c++ -#include -#include - -using namespace asmjit; - -typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b); - -int main(int argc, char* argv[]) { - CodeHolder code; // Create a CodeHolder. - code.init(CodeInfo(ArchInfo::kIdHost)); // Initialize it for the host architecture. - - x86::Assembler a(&code); // Create and attach x86::Assembler to `code`. - - // Generate a function runnable in both 32-bit and 64-bit architectures: - bool isX86 = ASMJIT_ARCH_X86 == 32; - - // Signature: 'void func(int* dst, const int* a, const int* b)'. - x86::Gp dst; - x86::Gp src_a; - x86::Gp src_b; - - // Handle the difference between 32-bit and 64-bit calling convention. - // (arguments passed through stack vs. arguments passed by registers). - if (isX86) { - dst = x86::eax; - src_a = x86::ecx; - src_b = x86::edx; - a.mov(dst , x86::dword_ptr(x86::esp, 4)); // Load the destination pointer. - a.mov(src_a, x86::dword_ptr(x86::esp, 8)); // Load the first source pointer. - a.mov(src_b, x86::dword_ptr(x86::esp, 12)); // Load the second source pointer. - } - else { - #if defined(_WIN32) - dst = x86::rcx; // First argument (destination pointer). - src_a = x86::rdx; // Second argument (source 'a' pointer). - src_b = x86::r8; // Third argument (source 'b' pointer). - #else - dst = x86::rdi; // First argument (destination pointer). - src_a = x86::rsi; // Second argument (source 'a' pointer). - src_b = x86::rdx; // Third argument (source 'b' pointer). - #endif - } - - a.movdqu(x86::xmm0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0. - a.movdqu(x86::xmm1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1. - a.paddd(x86::xmm0, x86::xmm1); // Add 4 ints in XMM1 to XMM0. - a.movdqu(x86::ptr(dst), x86::xmm0); // Store the result to [dst]. - a.ret(); // Return from function. - - // Even when we didn't use multiple sections AsmJit could insert one section - // called '.addrtab' (address table section), which would be filled by data - // required by relocations (absolute jumps and calls). You can omit this code - // if you are 100% sure your code doesn't contain multiple sections and - // such relocations. You can use `CodeHolder::hasAddressTable()` to verify - // whether the address table section does exist. - code.flatten(); - code.resolveUnresolvedLinks(); - - // After the code was generated it can be relocated manually to any memory - // location, however, we need to know it's size before we perform memory - // allocation. `CodeHolder::codeSize()` returns the worst estimated code - // size in case that relocations are not possible without trampolines (in - // that case some extra code at the end of the current code buffer is - // generated during relocation). - size_t estimatedSize = code.codeSize(); - - // Instead of rolling up our own memory allocator we can use the one AsmJit - // provides. It's decoupled so you don't need to use `JitRuntime` for that. - JitAllocator allocator; - - // Allocate an executable virtual memory and handle a possible failure. - void* p = allocator.alloc(estimatedSize); - if (!p) return 0; - - // Now relocate the code to the address provided by the memory allocator. - // Please note that this DOESN'T COPY anything to `p`. This function will - // store the address in CodeInfo and use relocation entries to patch the - // existing code in all sections to respect the base address provided. - code.relocateToBase((uint64_t)p); - - // This is purely optional. There are cases in which the relocation can - // omit unneeded data, which would shrink the size of address table. If - // that happened the `codeSize` returned after `relocateToBase()` would - // be smaller than the originally `estimatedSize`. - size_t codeSize = code.codeSize(); - - // This will copy code from all sections to `p`. Iterating over all - // sections and calling `memcpy()` would work as well, however, this - // function supports additional options that can be used to also zero - // pad sections' virtual size, etc. - // - // With some additional features, copyFlattenData() does roughly this: - // for (Section* section : code.sections()) - // memcpy((uint8_t*)p + section->offset(), - // section->data(), - // section->bufferSize()); - code.copyFlattenedData(p, codeSize, CodeHolder::kCopyWithPadding); - - // Execute the generated function. - int inA[4] = { 4, 3, 2, 1 }; - int inB[4] = { 1, 5, 2, 8 }; - int out[4]; - - // This code uses AsmJit's ptr_as_func<> to cast between void* and SumIntsFunc. - ptr_as_func(p)(out, inA, inB); - - // Prints {5 8 4 9} - printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]); - - // Release 'p' is it's no longer needed. It will be destroyed with 'vm' - // instance anyway, but it's a good practice to release it explicitly - // when you know that the function will not be needed anymore. - allocator.release(p); - - return 0; -} -``` - -If you know your base-address in advance (before code generation) you can use `CodeInfo::setBaseAddress()` to setup its initial value. In that case Assembler will know the absolute position of each instruction and would be able to use it during instruction encoding and prevent relocations in case the instruction is encodable. The following example shows how to configure the base address: - -```c++ -// Configure CodeInfo with base address. -CodeInfo ci(...); -ci.setBaseAddress(uint64_t(0x1234)); - -// Then initialize CodeHolder with it. -CodeHolder code; -code.init(ci); -``` - -### Using Native Registers - zax, zbx, zcx, ... - -AsmJit's X86 code emitters always provide functions to construct machine-size registers depending on the target. This feature is for people that want to write code targeting both 32-bit and 64-bit at the same time. In AsmJit terminology these registers are named **zax**, **zcx**, **zdx**, **zbx**, **zsp**, **zbp**, **zsi**, and **zdi** (they are defined in this exact order by X86). They are accessible through `x86::Assembler`, `x86::Builder`, and `x86::Compiler`. The following example illustrates how to use this feature: - -```c++ -#include -#include - -using namespace asmjit; - -typedef int (*Func)(void); - -int main(int argc, char* argv[]) { - JitRuntime jit; // Create a runtime specialized for JIT. - CodeHolder code; // Create a CodeHolder. - - code.init(jit.codeInfo()); // Initialize it to be compatible with `jit`. - x86::Assembler a(&code); // Create and attach x86::Assembler to `code`. - - // Let's get these registers from x86::Assembler. - x86::Gp zbp = a.zbp(); - x86::Gp zsp = a.zsp(); - - int stackSize = 32; - - // Function prolog. - a.push(zbp); - a.mov(zbp, zsp); - a.sub(zsp, stackSize); - - // ... emit some code (this just sets return value to zero) ... - a.xor_(x86::eax, x86::eax); - - // Function epilog and return. - a.mov(zsp, zbp); - a.pop(zbp); - a.ret(); - - // To make the example complete let's call it. - Func fn; - Error err = jit.add(&fn, &code); // Add the generated code to the runtime. - if (err) return 1; // Handle a possible error returned by AsmJit. - - int result = fn(); // Execute the generated code. - printf("%d\n", result); // Print the resulting "0". - - jit.release(fn); // Remove the function from the runtime. - return 0; -} -``` - -The example just returns `0`, but the function generated contains a standard prolog and epilog sequence and the function itself reserves 32 bytes of local stack. The advantage is clear - a single code-base can handle multiple targets easily. If you want to create a register of native size dynamically by specifying its id it's also possible: - -```c++ -void example(x86::Assembler& a) { - x86::Gp zax = a.gpz(x86::Gp::kIdAx); - x86::Gp zbx = a.gpz(x86::Gp::kIdBx); - x86::Gp zcx = a.gpz(x86::Gp::kIdCx); - x86::Gp zdx = a.gpz(x86::Gp::kIdDx); - - // You can also change register's id easily. - x86::Gp zsp = zax; - zsp.setId(4); // or x86::Gp::kIdSp. -} -``` - -Cloning existing registers and chaning their IDs is fine in AsmJit; and this technique is used internally in many places. - -### Using Assembler as Code-Patcher - -This is an advanced topic that is sometimes unavoidable. AsmJit by default appends machine-code it generates into a `CodeBuffer`, however, it also allows to set the offset in `CodeBuffer` explicitly and to overwrite its content. This technique is extremely dangerous for asm beginners as X86 instructions have variable length (see below), so you should in general only patch code to change instruction's offset or some basic other details you didn't know about the first time you emitted it. A typical scenario that requires code-patching is when you start emitting function and you don't know how much stack you want to reserve for it. - -Before we go further it's important to introduce instruction options, because they can help with code-patching (and not only patching, but that will be explained in AVX-512 section): - - * Many general-purpose instructions (especially arithmetic ones) on X86 have multiple encodings - in AsmJit this is usually called 'short form' and 'long form'. - * AsmJit always tries to use 'short form' as it makes the resulting machine-code smaller, which is always good - this decision is used by majority of assemblers out there. - * AsmJit allows to override the default decision by using `short_()` and `long_()` instruction options to force short or long form, respectively. The most useful is `long_()` as it basically forces AsmJit to always emit the long form. The `short_()` is not that useful as it's automatic (except jumps to non-bound labels). Note the underscore after each function name as it avoids collision with built-in C++ types. - -To illustrate what short form and long form means in binary let's assume we want to emit `add esp, 16` instruction, which has two possible binary encodings: - - * `83C410` - This is a short form aka `short add esp, 16` - You can see opcode byte (0x8C), MOD/RM byte (0xC4) and an 8-bit immediate value representing `16`. - * `81C410000000` - This is a long form aka `long add esp, 16` - You can see a different opcode byte (0x81), the same Mod/RM byte (0xC4) and a 32-bit immediate in little-endian representing `16`. - -If you generate an instruction in a short form and then patch it in a long form or vice-versa then something really bad will happen when you try to execute such code. The following example illustrates how to patch the code properly (it just extends the previous example): - -```c++ -#include -#include - -using namespace asmjit; - -typedef int (*Func)(void); - -int main(int argc, char* argv[]) { - JitRuntime jit; // Create a runtime specialized for JIT. - CodeHolder code; // Create a CodeHolder. - - code.init(jit.codeInfo()); // Initialize it to be compatible with `jit`. - x86::Assembler a(&code); // Create and attach x86::Assembler to `code`. - - // Let's get these registers from x86::Assembler. - x86::Gp zbp = a.zbp(); - x86::Gp zsp = a.zsp(); - - // Function prolog. - a.push(zbp); - a.mov(zbp, zsp); - - // This is where we are gonna patch the code later, so let's get the offset - // (the current location) from the beginning of the code-buffer. - size_t patchOffset = a.offset(); - // Let's just emit 'sub zsp, 0' for now, but don't forget to use LONG form. - a.long_().sub(zsp, 0); - - // ... emit some code (this just sets return value to zero) ... - a.xor_(x86::eax, x86::eax); - - // Function epilog and return. - a.mov(zsp, zbp); - a.pop(zbp); - a.ret(); - - // Now we know how much stack size we want to reserve. I have chosen 128 - // bytes on purpose as it's encodable only in long form that we have used. - - int stackSize = 128; // Number of bytes to reserve on the stack. - a.setOffset(patchOffset); // Move the current cursor to `patchOffset`. - a.long_().sub(zsp, stackSize); // Patch the code; don't forget to use LONG form. - - // Now the code is ready to be called - Func fn; - Error err = jit.add(&fn, &code); // Add the generated code to the runtime. - if (err) return 1; // Handle a possible error returned by AsmJit. - - int result = fn(); // Execute the generated code. - printf("%d\n", result); // Print the resulting "0". - - jit.release(fn); // Remove the function from the runtime. - return 0; -} -``` - -If you run the example it would just work. As an experiment you can try removing `long_()` form to see what happens when wrong code is generated. - -### Code Patching and REX Prefix - -In 64-bit mode there is one more thing to worry about when patching code - REX prefix. It's a single byte prefix designed to address registers with ids from 9 to 15 and to override the default width of operation from 32 to 64 bits. AsmJit, like other assemblers, only emits REX prefix when it's necessary. If the patched code only changes the immediate value as shown in the previous example then there is nothing to worry about as it doesn't change the logic behind emitting REX prefix, however, if the patched code changes register id or overrides the operation width then it's important to take care of REX prefix as well. - -AsmJit contains another instruction option that controls (forces) REX prefix - `rex()`. If you use it the instruction emitted will always use REX prefix even when it's encodable without it. The following list contains some instructions and their binary representations to illustrate when it's emitted: - - * `__83C410` - `add esp, 16` - 32-bit operation in 64-bit mode doesn't require REX prefix. - * `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40). - * `4883C410` - `add rsp, 16` - 64-bit operation in 64-bit mode requires REX prefix (0x48). - * `4183C410` - `add r12d, 16` - 32-bit operation in 64-bit mode using R12D requires REX prefix (0x41). - * `4983C410` - `add r12, 16` - 64-bit operation in 64-bit mode using R12 requires REX prefix (0x49). - -### Generic Function API - -So far all examples shown above handled creating function prologs and epilogs manually. While it's possible to do it that way it's much better to automate such process as function calling conventions vary across architectures and also across operating systems. - -AsmJit contains a functionality that can be used to define function signatures and to calculate automatically optimal function frame that can be used directly by a prolog and epilog inserter. This feature was exclusive to AsmJit's Compiler for a very long time, but was abstracted out and is now available for all users regardless of BaseEmitter they use. The design of handling functions prologs and epilogs allows generally two use cases: - - * Calculate function frame before the function is generated - this is the only way if you use pure `Assembler` emitter and shown in the next example. - * Calculate function frame after the function is generated - this way is generally used by `Builder` and `Compiler` emitters(will be described together with `x86::Compiler`). - -The following concepts are used to describe and create functions in AsmJit: - - * `Type` - Type is an 8-bit value that describes a platform independent type as we know from C/C++. It provides abstractions for most common types like `int8_t`, `uint32_t`, `uintptr_t`, `float`, `double`, and all possible vector types to match ISAs up to AVX512. `Type::Id` was introduced originally to be used with the Compiler infrastucture, but is now used by `FuncSignature` as well. - - * `CallConv` - Describes a calling convention - this class contains instructions to assign registers and stack addresses to function arguments and return value(s), but doesn't specify any function signature. Calling conventions are architecture and OS dependent. - - * `FuncSignature` - Describes a function signature, for example `int func(int, int)`. `FuncSignature` contains a function calling convention id, return value type, and function arguments. The signature itself is platform independent and uses `Type::Id` to describe types of function arguments and its return value(s). - - * `FuncDetail` - Architecture and ABI dependent information that describes `CallConv` and expanded `FuncSignature`. Each function argument and return value is represented as `FuncValue` that contains the original `Type::Id` enriched by additional information that specifies if the value is passed/returned by register (and which register) or by stack. Each value also contains some other metadata that provide additional information required to handle it properly (for example if a vector value is passed indirectly by a pointer as required by WIN64 calling convention, etc...). - - * `FuncFrame` - Contains information about the function frame that can be used by prolog/epilog inserter (PEI). Holds call stack size size and alignment, local stack size and alignment, and various attributes that describe how prolog and epilog should be constructed. `FuncFrame` doesn't know anything about function's arguments or return values, it hold only information necessary to create a valid and ABI conforming function prologs and epilogs. - - * `FuncArgsAssignment` - A helper class that can be used to reassign function arguments into user specified registers. It's architecture and ABI dependent mapping from function arguments described by CallConv and FuncDetail into registers specified by the user. - -It's a lot of concepts where each represents one step in the function frame calculation. In addition, the whole machinery can also be used to create function calls, instead of function prologs and epilogs. The next example shows how AsmJit can be used to create functions for both 32-bit and 64-bit targets and various calling conventions: - -```c++ -#include -#include - -using namespace asmjit; - -typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b); - -int main(int argc, char* argv[]) { - JitRuntime jit; // Create JIT Runtime. - CodeHolder code; // Create a CodeHolder. - - code.init(jit.codeInfo()); // Initialize it to match `jit`. - x86::Assembler a(&code); // Create and attach x86::Assembler to `code`. - - // Decide which registers will be mapped to function arguments. Try changing - // registers of `dst`, `src_a`, and `src_b` and see what happens in function's - // prolog and epilog. - x86::Gp dst = a.zax(); - x86::Gp src_a = a.zcx(); - x86::Gp src_b = a.zdx(); - - X86::Xmm vec0 = x86::xmm0; - X86::Xmm vec1 = x86::xmm1; - - // Create and initialize `FuncDetail` and `FuncFrame`. - FuncDetail func; - func.init(FuncSignatureT(CallConv::kIdHost)); - - FuncFrame frame; - frame.init(func); - - // Make XMM0 and XMM1 dirty; `kGroupVec` describes XMM|YMM|ZMM registers. - frame.setDirtyRegs(x86::Reg::kGroupVec, IntUtils::mask(0, 1)); - - // Alternatively, if you don't want to use register masks you can pass `BaseReg` - // to `addDirtyRegs()`. The following code would add both `xmm0` and `xmm1`. - frame.addDirtyRegs(x86::xmm0, x86::xmm1); - - FuncArgsAssignment args(&func); // Create arguments assignment context. - args.assignAll(dst, src_a, src_b); // Assign our registers to arguments. - args.updateFrameInfo(frame); // Reflect our args in FuncFrame. - frame.finalize(); // Finalize the FuncFrame (updates it). - - a.emitProlog(frame); // Emit function prolog. - a.emitArgsAssignment(frame, args); // Assign arguments to registers. - a.movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0. - a.movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1. - a.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0. - a.movdqu(x86::ptr(dst), vec0); // Store the result to [dst]. - a.emitEpilog(frame); // Emit function epilog and return. - - SumIntsFunc fn; - Error err = jit.add(&fn, &code); // Add the generated code to the runtime. - if (err) return 1; // Handle a possible error case. - - // Execute the generated function. - int inA[4] = { 4, 3, 2, 1 }; - int inB[4] = { 1, 5, 2, 8 }; - int out[4]; - fn(out, inA, inB); - - // Prints {5 8 4 9} - printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]); - - jit.release(fn); // Remove the function from the runtime. - return 0; -} -``` - - -Builder Interface ------------------ - -Both `Builder` and `Compiler` are emitters that emit everything to a representation that allows further processing. The code stored in such representation is completely safe to be patched, simplified, reordered, obfuscated, removed, injected, analyzed, and 'think-of-anything-else'. Each instruction, label, directive, etc... is stored in `BaseNode` (or derived class like `InstNode` or `LabelNode`) and contains all the information required to pass it later to the `Assembler`. - -There is a huge difference between `Builder` and `Compiler`: - - * `Builder` (low-level): - * Maximum compatibility with `Assembler`, easy to switch from `Assembler` to `Builder` and vice versa. - * Doesn't generate machine code directly, allows to serialize to `Assembler` when the whole code is ready to be encoded. - - * `Compiler` (high-level): - * Virtual registers - allows to use unlimited number of virtual registers which are allocated into physical registers by a built-in register allocator. - * Function nodes - allows to create functions by specifying their signatures and assigning virtual registers to function arguments and return value(s). - * Function calls - allows to call other functions within the generated code by using the same interface that is used to create functions. - -There are multiple node types used by both `Builder` and `Compiler`: - - * Basic nodes: - * `BaseNode` - Base class for all nodes. - * `InstNode` - Instruction node. - * `AlignNode` - Alignment directive (.align). - * `LabelNode` - Label (location where to bound it). - - * Data nodes: - * `DataNode` - Data embedded into the code. - * `ConstPoolNode` - Constant pool data. - * `LabelDataNode` - Label address embedded as data. - - * Informative nodes: - * `CommentNode` - Contains a comment string, doesn't affect code generation. - * `SentinelNode` - A marker that can be used to remember certain position, doesn't affect code generation. - - * Compiler-only nodes: - * `FuncNode` - Start of a function. - * `FuncRetNode` - Return from a function. - * `FuncCallNode` - Function call. - -### Using Builder - -The Builder interface was designed to be used as an `Assembler` replacement in case that post-processing of the generated code is required. The code can be modified during or after code generation. The post processing can be done manually or through `Pass` (Code-Builder Pass) object. Builder stores the emitted code as a double-linked list, which allows O(1) insertion and removal. - -The code representation used by `Builder` is compatible with everything AsmJit provides. Each instruction is stored as `InstNode`, which contains instruction id, options, and operands. Each instruction emitted will create a new `InstNode` instance and add it to the current cursor in the double-linked list of nodes. Since the instruction stream used by `Builder` can be manipulated, we can rewrite the **SumInts** example into the following: +TODO +---- -```c++ -#include -#include + * [ ] Core: + * [ ] Add support for user external buffers in CodeBuffer / CodeHolder. + * [ ] Register allocator doesn't understand register pairs, affected instructions: + * [ ] v4fmaddps, v4fmaddss, v4fnmaddps, v4fnmaddss + * [ ] vp4dpwssd, vp4dpwssds + * [ ] vp2intersectd, vp2intersectq + * [ ] Ports: + * [ ] ARM/Thumb/AArch64 support. -using namespace asmjit; +Support +------- -typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b); + * AsmJit project has both community and commercial support, see [AsmJit's Support Page](https://asmjit.com/support.html) + * You can help the development and maintenance through Petr Kobalicek's [GitHub sponsors Profile](https://github.com/sponsors/kobalicek) -// Small helper function to print the current content of `cb`. -static void dumpCode(BaseBuilder& cb, const char* phase) { - StringBuilder sb; - cb.dump(sb); - printf("%s:\n%s\n", phase, sb.data()); -} - -int main(int argc, char* argv[]) { - JitRuntime jit; // Create JIT Runtime. - CodeHolder code; // Create a CodeHolder. - - code.init(jit.codeInfo()); // Initialize it to match `jit`. - x86::Builder cb(&code); // Create and attach x86::Builder to `code`. - - // Decide which registers will be mapped to function arguments. Try changing - // registers of `dst`, `src_a`, and `src_b` and see what happens in function's - // prolog and epilog. - x86::Gp dst = cb.zax(); - x86::Gp src_a = cb.zcx(); - x86::Gp src_b = cb.zdx(); - - X86::Xmm vec0 = x86::xmm0; - X86::Xmm vec1 = x86::xmm1; - - // Create and initialize `FuncDetail`. - FuncDetail func; - func.init(FuncSignatureT(CallConv::kIdHost)); - - // Remember prolog insertion point. - BaseNode* prologInsertionPoint = cb.cursor(); - - // Emit function body: - cb.movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0. - cb.movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1. - cb.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0. - cb.movdqu(x86::ptr(dst), vec0); // Store the result to [dst]. - - // Remember epilog insertion point. - BaseNode* epilogInsertionPoint = cb.cursor(); - - // Let's see what we have now. - dumpCode(cb, "Raw Function"); - - // Now, after we emitted the function body, we can insert the prolog, arguments - // allocation, and epilog. This is not possible with using pure x86::Assembler. - FuncFrame frame; - frame.init(func); - - // Make XMM0 and XMM1 dirty; `kGroupVec` describes XMM|YMM|ZMM registers. - frame.setDirtyRegs(x86::Reg::kGroupVec, IntUtils::mask(0, 1)); - - FuncArgsAssignment args(&func); // Create arguments assignment context. - args.assignAll(dst, src_a, src_b); // Assign our registers to arguments. - args.updateFrame(frame); // Reflect our args in FuncFrame. - frame.finalize(); // Finalize the FuncFrame (updates it). - - // Insert function prolog and allocate arguments to registers. - cb.setCursor(prologInsertionPoint); - cb.emitProlog(frame); - cb.emitArgsAssignment(frame, args); - - // Insert function epilog. - cb.setCursor(epilogInsertionPoint); - cb.emitEpilog(frame); - - // Let's see how the function's prolog and epilog looks. - dumpCode(cb, "Prolog & Epilog"); - - // IMPORTANT: Builder requires `finalize()` to be called to serialize the code - // to the Assembler (it automatically creates one if not attached). - cb.finalize(); - - SumIntsFunc fn; - Error err = jit.add(&fn, &code); // Add the generated code to the runtime. - if (err) return 1; // Handle a possible error case. - - // Execute the generated function. - int inA[4] = { 4, 3, 2, 1 }; - int inB[4] = { 1, 5, 2, 8 }; - int out[4]; - fn(out, inA, inB); - - // Prints {5 8 4 9} - printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]); - - jit.release(fn); // Remove the function from the runtime. - return 0; -} -``` - -When the example is executed it should output the following (this one using AMD64-SystemV ABI): - -``` -Raw Function: -movdqu xmm0, [rcx] -movdqu xmm1, [rdx] -paddd xmm0, xmm1 -movdqu [rax], xmm0 - -Prolog & Epilog: -mov rax, rdi -mov rcx, rsi -movdqu xmm0, [rcx] -movdqu xmm1, [rdx] -paddd xmm0, xmm1 -movdqu [rax], xmm0 -ret - -{5 8 4 9} -``` - -The number of use-cases of **x86::Builder** is not limited and highly depends on your creativity and experience. The previous example can be easily improved to collect all dirty registers inside the function programmatically and to pass them to `frame.setDirtyRegs()`: - -```c++ -#include - -using namespace asmjit; - -// NOTE: This function doesn't cover all possible constructs. It ignores -// instructions that write to implicit registers that are not part of the -// operand list. It also counts read-only registers. Real implementation -// would be a bit more complicated, but still relatively easy to implement. -static void collectDirtyRegs(const BaseNode* first, const BaseNode* last, uint32_t regMask[BaseReg::kGroupVirt]) { - const BaseNode* node = first; - while (node) { - if (node->actsAsInst()) { - const InstNode* inst = node->as(); - const Operand* opArray = inst->operands(); - - for (uint32_t i = 0, opCount = inst->opCount(); i < opCount; i++) { - const Operand& op = opArray[i]; - if (op.isReg()) { - const x86::Reg& reg = op.as(); - if (reg.group() < BaseReg::kGroupVirt) - regMask[reg.group()] |= 1u << reg.id(); - } - } - } - - if (node == last) break; - node = node->next(); - n} - -static void setDirtyRegsOfFuncFrame(const x86::Builder& cb, FuncFrame& frame) { - uint32_t regMask[BaseReg::kGroupVirt] = { 0 }; - collectDirtyRegs(cb.firstNode(), cb.lastNode(), regMask); - - // X86/X64 ABIs only require to save GP/XMM registers: - frame.setDirtyRegs(x86::Reg::kGroupGp , regMask[x86::Reg::kGroupGp ]); - frame.setDirtyRegs(x86::Reg::kGroupVec, regMask[x86::Reg::kGroupVec]); -} -``` - -### Using x86::Assembler or x86::Builder through X86::Emitter - -Even when **Assembler** and **Builder** provide the same interface as defined by **BaseEmitter** their platform dependent variants (**x86::Assembler** and **x86::Builder**, respective) cannot be interchanged or casted to each other by using C++'s `static_cast<>`. The main reason is the inheritance graph of these classes is different and cast-incompatible, as illustrated in the following graph: - -``` - +--------------+ +=========================+ - +----------------------->| x86::Emitter |<--+--# x86::EmitterImplicitT<> #<--+ - | +--------------+ | +=========================+ | - | (abstract) | (mixin) | - | +--------------+ +~~~~~~~~~~~~~~+ | | - +-->| BaseAssembler|---->|x86::Assembler|<--+ | - | +--------------+ +~~~~~~~~~~~~~~+ | | - | (abstract) (final) | | -+===============+ | +--------------+ +~~~~~~~~~~~~~~+ | | -# BaseEmitter #--+-->| BaseBuilder |--+->| x86::Builder |<--+ | -+===============+ +--------------+ | +~~~~~~~~~~~~~~+ | - (abstract) (abstract) | (final) | - +---------------------+ | - | | - | +--------------+ +~~~~~~~~~~~~~~+ +=========================+ | - +-->| BaseCompiler |---->| x86::Compiler|<-----# x86::EmitterExplicitT<> #---+ - +--------------+ +~~~~~~~~~~~~~~+ +=========================+ - (abstract) (final) (mixin) -``` - -The graph basically shows that it's not possible to cast `x86::Assembler` to `x86::Builder` and vice versa. However, since both `x86::Assembler` and `x86::Builder` share the same interface defined by both `BaseEmitter` and `x86::EmmiterImplicitT` a class called `x86::Emitter` was introduced to make it possible to write a function that can emit to both `x86::Assembler` and `x86::Builder`. Note that `x86::Emitter` cannot be created, it's abstract and has private constructors and destructors; it was only designed to be casted to and used as an interface. - -Each X86 emitter implements a member function called `as()`, which casts the instance to the `x86::Emitter`, as illustrated on the next example: - -```c++ -#include - -using namespace asmjit; - -static void emitSomething(x86::Emitter* e) { - e->mov(x86::eax, x86::ebx); -} - -static void assemble(CodeHolder& code, bool useAsm) { - if (useAsm) { - x86::Assembler a(&code); - emitSomething(a.as()); - } - else { - x86::Builder cb(&code); - emitSomething(cb.as()); - - // IMPORTANT: Builder requires `finalize()` to be called to serialize the - // code to the Assembler (it automatically creates one if not attached). - cb.finalize(); - } -} -``` - -The example above shows how to create a function that can emit code to either **x86::Assembler** or **x86::Builder** through **x86::Emitter**, which provides emitter-neutral functionality. **x86::Emitter**, however, doesn't provide any emitter **x86::Assembler** or **x86::Builder** specific functionality like **setCursor()**. - - -Compiler Interface ------------------- - -**Compiler** is a high-level code emitter that provides virtual registers and automatically handles function calling conventions. It's still architecture dependent, but makes the code generation much easier by offering a built-in register allocator and function builder. Functions are essential; the first-step to generate some code is to define the signature of the function you want to generate (before generating the function body). Function arguments and return value(s) are handled by assigning virtual registers to them. Similarly, function calls are handled the same way. - -**Compiler** also makes the use of passes (introduced by **Builder**) and automatically adds an architecture-dependent register allocator pass to the list of passes when attached to **CodeHolder**. - -### Compiler Basics - -The first **Compiler** example shows how to generate a function that simply returns an integer value. It's an analogy to the very first example: - -```c++ -#include -#include - -using namespace asmjit; - -// Signature of the generated function. -typedef int (*Func)(void); - -int main(int argc, char* argv[]) { - JitRuntime jit; // Runtime specialized for JIT code execution. - CodeHolder code; // Holds code and relocation information. - - code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime. - x86::Compiler cc(&code); // Create and attach x86::Compiler to `code`. - - cc.addFunc(FuncSignatureT()); // Begin a function of `int fn(void)` signature. - - x86::Gp vReg = cc.newGpd(); // Create a 32-bit general purpose register. - cc.mov(vReg, 1); // Move one to our virtual register `vReg`. - cc.ret(vReg); // Return `vReg` from the function. - - cc.endFunc(); // End of the function body. - cc.finalize(); // Translate and assemble the whole `cc` content. - // ----> x86::Compiler is no longer needed from here and can be destroyed <---- - - Func fn; - Error err = jit.add(&fn, &code); // Add the generated code to the runtime. - if (err) return 1; // Handle a possible error returned by AsmJit. - // ----> CodeHolder is no longer needed from here and can be destroyed <---- - - int result = fn(); // Execute the generated code. - printf("%d\n", result); // Print the resulting "1". - - jit.release(fn); // RAII, but let's make it explicit. - return 0; -} -``` - -The **addFunc()** and **endFunc()** methods define the body of the function. Both functions must be called per function, but the body doesn't have to be generated in sequence. An example of generating two functions will be shown later. The next example shows more complicated code that contain a loop and generates a **memcpy32()** function: - -```c++ -#include -#include - -using namespace asmjit; - -// Signature of the generated function. -typedef void (*MemCpy32)(uint32_t* dst, const uint32_t* src, size_t count); - -int main(int argc, char* argv[]) { - JitRuntime jit; // Runtime specialized for JIT code execution. - CodeHolder code; // Holds code and relocation information. - - code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime. - x86::Compiler cc(&code); // Create and attach x86::Compiler to `code`. - - cc.addFunc( // Begin the function of the following signature: - FuncSignatureT()); // 3rd argument - size_t (machine reg-size). - - Label L_Loop = cc.newLabel(); // Start of the loop. - Label L_Exit = cc.newLabel(); // Used to exit early. - - x86::Gp dst = cc.newIntPtr("dst"); // Create `dst` register (destination pointer). - x86::Gp src = cc.newIntPtr("src"); // Create `src` register (source pointer). - x86::Gp cnt = cc.newUIntPtr("cnt"); // Create `cnt` register (loop counter). - - cc.setArg(0, dst); // Assign `dst` argument. - cc.setArg(1, src); // Assign `src` argument. - cc.setArg(2, cnt); // Assign `cnt` argument. - - cc.test(cnt, cnt); // Early exit if length is zero. - cc.jz(L_Exit); - - cc.bind(L_Loop); // Bind the beginning of the loop here. - - x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes). - cc.mov(tmp, x86::dword_ptr(src)); // Load DWORD from [src] address. - cc.mov(x86::dword_ptr(dst), tmp); // Store DWORD to [dst] address. - - cc.add(src, 4); // Increment `src`. - cc.add(dst, 4); // Increment `dst`. - - cc.dec(cnt); // Loop until `cnt` is non-zero. - cc.jnz(L_Loop); - - cc.bind(L_Exit); // Label used by early exit. - cc.endFunc(); // End of the function body. - - cc.finalize(); // Translate and assemble the whole `cc` content. - // ----> x86::Compiler is no longer needed from here and can be destroyed <---- - - MemCpy32 memcpy32; - Error err = jit.add(&memcpy32, &code); // Add the generated code to the runtime. - if (err) return 1; // Handle a possible error returned by AsmJit. - // ----> CodeHolder is no longer needed from here and can be destroyed <---- - - // Test the generated code. - uint32_t input[6] = { 1, 2, 3, 5, 8, 13 }; - uint32_t output[6]; - memcpy32(output, input, 6); - - for (uint32_t i = 0; i < 6; i++) - printf("%d\n", output[i]); - - jit.release(memcpy32); // RAII, but let's make it explicit. - return 0; -} -``` - -### Recursive Functions - -It's possible to create more functions by using the same `x86::Compiler` instance and make links between them. In such case it's important to keep the pointer to the `FuncNode` node. The first example creates a simple Fibonacci function that calls itself recursively: - -```c++ -#include -#include - -using namespace asmjit; - -// Signature of the generated function. -typedef uint32_t (*Fibonacci)(uint32_t x); - -int main(int argc, char* argv[]) { - JitRuntime jit; // Runtime specialized for JIT code execution. - CodeHolder code; // Holds code and relocation information. - - code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime. - x86::Compiler cc(&code); // Create and attach x86::Compiler to `code`. - - FuncNode* func = cc.addFunc( // Begin of the Fibonacci function, `addFunc()` - FuncSignatureT()); // Returns a pointer to the `FuncNode` node. - - Label L_Exit = cc.newLabel() // Exit label. - x86::Gp x = cc.newU32(); // Function `x` argument. - x86::Gp y = cc.newU32(); // Temporary. - - cc.setArg(0, x); - - cc.cmp(x, 3); // Return `x` if less than 3. - cc.jb(L_Exit); - - cc.mov(y, x); // Make copy of the original `x`. - cc.dec(x); // Decrease `x`. - - FuncCallNode* call = cc.call( // Function call: - func->label(), // Function address or Label. - FuncSignatureT()); // Function signature. - - call->setArg(0, x); // Assign `x` as the first argument and - call->setRet(0, x); // assign `x` as a return value as well. - - cc.add(x, y); // Combine the return value with `y`. - - cc.bind(L_Exit); - cc.ret(x); // Return `x`. - cc.endFunc(); // End of the function body. - - cc.finalize(); // Translate and assemble the whole `cc` content. - // ----> x86::Compiler is no longer needed from here and can be destroyed <---- - - Fibonacci fib; - Error err = jit.add(&fib, &code); // Add the generated code to the runtime. - if (err) return 1; // Handle a possible error returned by AsmJit. - // ----> CodeHolder is no longer needed from here and can be destroyed <---- - - printf("Fib(%u) -> %u\n", 8, fib(8)); // Test the generated code. - - jit.release(fib); // RAII, but let's make it explicit. - return 0; -} -``` - -### Stack Management - -**Compiler** manages function's stack-frame, which is used by the register allocator to spill virtual registers. It also provides an interface to allocate user-defined block of the stack, which can be used as a temporary storage by the generated function. In the following example a stack of 256 bytes size is allocated, filled by bytes starting from 0 to 255 and then iterated again to sum all the values. - -```c++ -#include -#include - -using namespace asmjit; - -// Signature of the generated function. -typedef int (*Func)(void); - -int main(int argc, char* argv[]) { - JitRuntime jit; // Runtime specialized for JIT code execution. - CodeHolder code; // Holds code and relocation information. - - code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime. - x86::Compiler cc(&code); // Create and attach x86::Compiler to `code`. - - cc.addFunc(FuncSignatureT()); // Create a function that returns 'int'. - - x86::Gp p = cc.newIntPtr("p"); - x86::Gp i = cc.newIntPtr("i"); - - x86::Mem stack = cc.newStack(256, 4); // Allocate 256 bytes on the stack aligned to 4 bytes. - x86::Mem stackIdx(stack); // Copy of `stack` with `i` added. - stackIdx.setIndex(i); // stackIdx <- stack[i]. - stackIdx.setSize(1); // stackIdx <- byte ptr stack[i]. - - // Load a stack address to `p`. This step is purely optional and shows - // that `lea` is useful to load a memory operands address (even absolute) - // to a general purpose register. - cc.lea(p, stack); - - // Clear `i` (`xor` as it's C++ keyword, hence `xor_` is used instead). - cc.xor_(i, i); - - Label L1 = cc.newLabel(); - Label L2 = cc.newLabel(); - - cc.bind(L1); // First loop, fill the stack. - cc.mov(stackIdx, i.r8()); // stack[i] = uint8_t(i). - - cc.inc(i); // i++; - cc.cmp(i, 256); // if (i < 256) - cc.jb(L1); // goto L1; - - // Second loop, sum all bytes stored in `stack`. - x86::Gp sum = cc.newI32("sum"); - x86::Gp val = cc.newI32("val"); - - cc.xor_(i, i); - cc.xor_(sum, sum); - - cc.bind(L2); - - cc.movzx(val, stackIdx); // val = uint32_t(stack[i]); - cc.add(sum, val); // sum += val; - - cc.inc(i); // i++; - cc.cmp(i, 256); // if (i < 256) - cc.jb(L2); // goto L2; - - cc.ret(sum); // Return the `sum` of all values. - cc.endFunc(); // End of the function body. - - cc.finalize(); // Translate and assemble the whole `cc` content. - // ----> x86::Compiler is no longer needed from here and can be destroyed <---- - - Func func; - Error err = jit.add(&func, &code); // Add the generated code to the runtime. - if (err) return 1; // Handle a possible error returned by AsmJit. - // ----> CodeHolder is no longer needed from here and can be destroyed <---- - - printf("Func() -> %d\n", func()); // Test the generated code. - - jit.release(func); // RAII, but let's make it explicit. - return 0; -} -``` - -### Constant Pool - -**Compiler** provides two constant pools for a general purpose code generation - local and global. Local constant pool is related to a single **FuncNode** node and is generally flushed after the function body, and global constant pool is flushed at the end of the generated code by **Compiler::finalize()**. - -```c++ -#include - -using namespace asmjit; - -static void exampleUseOfConstPool(x86::Compiler& cc) { - cc.addFunc(FuncSignatureT()); - - x86::Gp v0 = cc.newGpd("v0"); - x86::Gp v1 = cc.newGpd("v1"); - - x86::Mem c0 = cc.newInt32Const(ConstPool::kScopeLocal, 200); - x86::Mem c1 = cc.newInt32Const(ConstPool::kScopeLocal, 33); - - cc.mov(v0, c0); - cc.mov(v1, c1); - cc.add(v0, v1); - - cc.ret(v0); - cc.endFunc(); -} -``` - -### Jump Tables - -**Compiler** supports `jmp` instruction with reg/mem operand, which is a commonly used pattern to implement indirect jumps within a function, for example to implement `switch()` statement in a programming languages. By default AsmJit assumes that every basic block can be a possible jump target as it's unable to deduce targets from instruction's operands. This is a very pessimistic default that should be avoided if possible as it's costly and very unfriendly to liveness analysis and register allocation. So instead of relying on such pessimistic default, use **JumpAnnotation** to annotate indirect jumps: - -```c++ -#include - -using namespace asmjit; - -static void exampleUseOfIndirectJump(x86::Compiler& cc) { - cc.addFunc(FuncSignatureT(CallConv::kIdHost)); - - // Function arguments - x86::Xmm a = cc.newXmmSs("a"); - x86::Xmm b = cc.newXmmSs("b"); - x86::Gp op = cc.newUInt32("op"); - - x86::Gp target = cc.newIntPtr("target"); - x86::Gp offset = cc.newIntPtr("offset"); - - Label L_Table = cc.newLabel(); - Label L_Add = cc.newLabel(); - Label L_Sub = cc.newLabel(); - Label L_Mul = cc.newLabel(); - Label L_Div = cc.newLabel(); - Label L_End = cc.newLabel(); - - cc.setArg(0, a); - cc.setArg(1, b); - cc.setArg(2, op); - - // Jump annotation is a building block that allows to annotate all - // possible targets where `jmp()` can jump. It then drives the CFG - // contruction and liveness analysis, which impacts register allocation. - JumpAnnotation* annotation = cc.newJumpAnnotation(); - annotation->addLabel(L_Add); - annotation->addLabel(L_Sub); - annotation->addLabel(L_Mul); - annotation->addLabel(L_Div); - - // Most likely not the common indirect jump approach, but it - // doesn't really matter how final address is calculated. The - // most important path using JumpAnnotation with `jmp()`. - cc.lea(offset, x86::ptr(L_Table)); - if (cc.is64Bit()) - cc.movsxd(target, x86::dword_ptr(offset, op.cloneAs(offset), 2)); - else - cc.mov(target, x86::dword_ptr(offset, op.cloneAs(offset), 2)); - cc.add(target, offset); - cc.jmp(target, annotation); - - // Acts like a switch() statement in C. - cc.bind(L_Add); - cc.addss(a, b); - cc.jmp(L_End); - - cc.bind(L_Sub); - cc.subss(a, b); - cc.jmp(L_End); - - cc.bind(L_Mul); - cc.mulss(a, b); - cc.jmp(L_End); - - cc.bind(L_Div); - cc.divss(a, b); - - cc.bind(L_End); - cc.ret(a); - - cc.endFunc(); - - // Relative int32_t offsets of `L_XXX - L_Table`. - cc.bind(L_Table); - cc.embedLabelDelta(L_Add, L_Table, 4); - cc.embedLabelDelta(L_Sub, L_Table, 4); - cc.embedLabelDelta(L_Mul, L_Table, 4); - cc.embedLabelDelta(L_Div, L_Table, 4); -} -``` - - -Advanced Features ------------------ - -### Logging - -The initial phase of any project that generates machine code is not always smooth. Failure cases are common especially at the beginning of the project and AsmJit provides a logging functionality to address this issue. AsmJit does already a good job with function overloading to prevent from emitting semantically incorrect instructions, but it can't prevent from emitting machine code that is semantically correct, but doesn't work when it's executed. Logging has always been an important part of AsmJit's infrastructure and looking at logs can sometimes reveal code generation issues quickly. - -AsmJit provides API for logging and formatting: - * `Logger` - A logger that you can pass to `CodeHolder` and all emitters that inherit `BaseEmitter`. - * `FormatOptions` - Formatting options that can change how instructions and operands are formatted. - -AsmJit's `Logger` serves the following purposes: - * Provides a basic foundation for logging. - * Abstract class leaving the implementation (destination) on users. Two backends are built-in for simplicity: - * `FileLogger` implements logging into a standard `std::FILE` stream. - * `StringLogger` stores the logged text in `StringBuilder` instance. - -AsmJit's `FormatOptions` provides the following to customize the formatting of instructions and operands: - * Flags: - * `FormatOptions::kFlagMachineCode` - Show a machine code of each encoded instruction. - * `FormatOptions::kFlagExplainConsts` - Show a text explanation of some immediate values that are used as predicates. - * `FormatOptions::kFlagHexImms` - Use hexadecimal notation to output immediates. - * `FormatOptions::kFlagHexOffsets` - Use hexadecimal notation to output offsets. - * `FormatOptions::kFlagRegCasts` - Show casts between various register types (compiler). - * `FormatOptions::kFlagPositions` - Show positions associated with nodes (compiler). - * Indentation: - * `FormatOptions::kIndentationCode` - Indentation of instructions and directives. - * `FormatOptions::kIndentationLabel` - Indentation of labels. - * `FormatOptions::kIndentationComment` - Indentation of whole-line comments. - -**Logger** is typically attached to **CodeHolder** and all attached code emitters automatically use it: - -```c++ -#include -#include - -using namespace asmjit; - -int main(int argc, char* argv[]) { - JitRuntime jit; // Runtime specialized for JIT code execution. - FileLogger logger(stdout); // Logger should always survive the CodeHolder. - - CodeHolder code; // Holds code and relocation information. - code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime. - code.setLogger(&logger); // Attach the `logger` to `code` holder. - - // ... code as usual, everything you emit will be logged to `stdout` ... - - return 0; -} -``` - -### Error Handling - -AsmJit uses error codes to represent and return errors. Every function where error can occur returns **Error**. Exceptions are never thrown by AsmJit even in extreme conditions like out-of-memory. Errors should never be ignored, however, checking errors after each asmjit API call would simply overcomplicate the whole code generation experience. To make life simpler AsmJit provides **ErrorHandler**, which provides **handleError()** function: - - `virtual bool handleError(Error err, const char* message, BaseEmitter* origin) = 0;` - -That can be overridden by AsmJit users and do the following: - - * 1. Record the error and continue (the way how the error is user-implemented). - * 2. Throw an exception. AsmJit doesn't use exceptions and is completely exception-safe, but it's perfectly legal to throw an exception from the error handler. - * 3. Use plain old C's `setjmp()` and `longjmp()`. Asmjit always puts `Assembler` and `Compiler` to a consistent state before calling the `handleError()` so `longjmp()` can be used without issues to cancel the code-generation if an error occurred. This method can be used if exception handling in your project is turned off and you still want some comfort. In most cases it should be safe as AsmJit uses Zone memory and the ownership of memory it allocates always ends with the instance that allocated it. If using this approach please never jump outside the life-time of **CodeHolder** and **BaseEmitter**. - -**ErrorHandler** can be attached to **CodeHolder** and/or **BaseEmitter** (which has a priority). The first example uses error handler that just prints the error, but lets AsmJit continue: - -```c++ -// Error handling #1: -#include - -#include - -// Error handler that just prints the error and lets AsmJit ignore it. -class SimpleErrorHandler : public asmjit::ErrorHandler { -public: - inline SimpleErrorHandler() : lastError(kErrorOk) {} - - void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) override { - this->err = err; - fprintf(stderr, "ERROR: %s\n", message); - } - - Error err; -}; - -int main(int argc, char* argv[]) { - using namespace asmjit; - - JitRuntime jit; - SimpleErrorHandler eh; - - CodeHolder code; - code.init(jit.codeInfo()); - code.setErrorHandler(&eh); - - // Try to emit instruction that doesn't exist. - x86::Assembler a(&code); - a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1); - - if (eh.err) { - // Assembler failed! - } - - return 0; -} -``` - -If error happens during instruction emitting / encoding the assembler behaves transactionally - the output buffer won't advance if encoding failed, thus either a fully encoded instruction or nothing is emitted. The error handling shown above is useful, but it's still not the best way of dealing with errors in AsmJit. The following example shows how to use exception handling to handle errors in a more C++ way: - -```c++ -// Error handling #2: -#include - -#include -#include -#include - -// Error handler that throws a user-defined `AsmJitException`. -class AsmJitException : public std::exception { -public: - AsmJitException(asmjit::Error err, const char* message) noexcept - : err(err), - message(message) {} - - const char* what() const noexcept override { return message.c_str(); } - - asmjit::Error err; - std::string message; -}; - -class ThrowableErrorHandler : public asmjit::ErrorHandler { -public: - // Throw is possible, functions that use ErrorHandler are never 'noexcept'. - void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) override { - throw AsmJitException(err, message); - } -}; - -int main(int argc, char* argv[]) { - using namespace asmjit; - - JitRuntime jit; - ThrowableErrorHandler eh; - - CodeHolder code; - code.init(jit.codeInfo()); - code.setErrorHandler(&eh); - - x86::Assembler a(&code); - - // Try to emit instruction that doesn't exist. - try { - a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1); - } - catch (const AsmJitException& ex) { - printf("EXCEPTION THROWN: %s\n", ex.what()); - } - - return 0; -} -``` - -If C++ exceptions are not what you like or your project turns off them completely there is still a way of reducing the error handling to a minimum by using a standard `setjmp/longjmp` approach. AsmJit is exception-safe and cleans up everything before calling the **ErrorHandler**, so any approach is safe. You can simply jump from the error handler without causing any side-effects or memory leaks. The following example demonstrates how it could be done: - -```c++ -// Error handling #3: -#include - -#include -#include - -class LongJmpErrorHandler : public asmjit::ErrorHandler { -public: - inline LongJmpErrorHandler() : err(asmjit::kErrorOk) {} - - void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) override { - this->err = err; - longjmp(state, 1); - } - - jmp_buf state; - asmjit::Error err; -}; - -int main(int argc, char* argv[]) { - using namespace asmjit; - - JitRuntime jit; - LongJmpErrorHandler eh; - - CodeHolder code; - code.init(jit.codeInfo()); - code.setErrorHandler(&eh); - - x86::Assembler a(&code); - - if (!setjmp(eh.state)) { - // Try to emit instruction that doesn't exist. - a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1); - } - else { - Error err = eh.err; - printf("ASMJIT ERROR: 0x%08X [%s]\n", err, DebugUtils::errorAsString(err)); - } - - return 0; -} -``` - -### Code Injection - -Both `Builder` and `Compiler` emitters store their nodes in a double-linked list, which makes it easy to manipulate that list during the code generation or after. Each node is always emitted next to the current `cursor` and the cursor is changed to that newly emitted node. The cursor can be explicitly retrieved and changed by `cursor()` and `setCursor()`, respectively. - -The following example shows how to inject code at the beginning of the function by implementing an `XmmConstInjector` helper class. - -```c++ -``` - -### TODO - -...More documentation... - - - -Other Topics ------------- - -This section provides quick answers to some recurring questions and topics. - -### Instruction Validation - -AsmJit by default prefers performance when it comes to instruction encoding. The Assembler implementation would only validate operands that must be validated to select a proper encoding of the instruction. This means that by default it would accept instructions that do not really exist like `mov rax, ebx`. This is great in release mode as it makes the assembler faster, however, it's not that great for development as it allows to silently pass even when the instruction's operands are incorrect. To fix this Asmjit contains a feature called **Strict Validation**, which allows to validate each instruction before the Assembler tries to encode it. This feature can also be used without an Assembler instance through `BaseInst::validate()` API. - -Emitter options are configured through CodeHolder: - -```c++ -CodeHolder code; - -// Enables strict instruction validation for all emitters attached to `code`. -code.addEmitterOptions(BaseEmitter::kOptionStrictValidation); - -// Use either ErrorHandler attached to CodeHolder or Error code returned by -// the Assembler. -x86::Assembler a(&code); -Error err = a.emit(x86::Inst::kIdMov, x86::eax, x86::al); -if (err) { /* failed */ } -``` - -### Label Offsets and Links - -When you use a label that is not yet bound the Assembler would create a `LabelLink`, which is then added to CodeHolder's `LabelEntry`. These links are also created for labels that are bound but reference some location in a different section. Firstly, here are some functions that can be used to check some basics: - -```c++ -CodeHolder code = ...; -Label L = ...; - -// Returns whether the Label `L` is bound. -bool bound = code.isLabelBound(L or L.id()); - -// Returns true if the code contains either referenced, but unbound labels, -// or cross-section label links that are not resolved yet. -bool value = code.hasUnresolvedLinks(); // Boolean answer. -size_t count = code.unresolvedLinkCount(); // Count of links. -``` - -Please note that there is not API to return a count of unbound labels as this is completely unimportant from CodeHolder's perspective. If a label is not used then it doesn't matter whether it's bound or not, only used labels matter. After a Label is bound you can query it's offset relative to the start of the section where it was bound: - -```c++ -CodeHolder code = ...; -Label L = ...; - -// After you are done you can check the offset. The offset provided -// is relative to the start of the section, see below for alternative. -// If the given label is not bound then the offset returned will be zero. -uint64_t offset = code.labelOffset(L or L.id()); - -// If you use multiple sections and want the offset relative to the base. -// NOTE: This function expects that the section has already an offset and -// the label-link was resolved (if this is not true you will still get an -// offset relative to the start of the section). -uint64_t offset = code.labelOffsetFromBase(L or L.id()); -``` - -### Sections - -Sections is a relatively new feature that allows to create multiple sections. It's supported by Assembler, Builder, and Compiler. Please note that using multiple sections is advanced and requires more understanding about how AsmJit works. There is a test-case [asmjit_test_x86_sections.cpp](./test/asmjit_test_x86_sections.cpp) that shows how sections can be used. - -```c++ -CodeHolder code = ...; - -// Text section is always provided as the first section. -Section* text = code.textSection(); // or code.sectionById(0); - -// To create another section use `code.newSection()`. -Section* data; -Error err = code.newSection(&data, - ".data", // Section name - SIZE_MAX, // Name length if the name is not null terminated (or SIZE_MAX). - 0, // Section flags, see Section::Flags. - 8); // Section alignment, must be power of 2. - -// When you switch sections in Assembler, Builder, or Compiler the cursor -// will always move to the end of that section. When you create an Assembler -// the cursor would be placed at the end of the first (.text) section, which -// is initially empty. -x86::Assembler a(&code); -Label L_Data = a.newLabel(); - -a.mov(x86::eax, x86::ebx); // Emits in .text section. - -a.section(data); // Switches to the end of .data section. -a.bind(L_Data); // Binds label in this .data section -a.db(0x01); // Emits byte in .data section. - -a.section(text); // Switches to the end of .text section. -a.add(x86::ebx, x86::eax); // Emits in .text section. - -// References a label bound in .data section in .text section. This -// would create a LabelLink even when the L_Data is already bound, -// because the reference crosses sections. See below... -a.lea(x86::rsi, x86::ptr(L_Data)); -``` - -The last line in the example above shows that a LabelLink would be created even for bound labels that cross sections. In this case a referenced label was bound in another section, which means that the link couldn't be resolved at that moment. If your code uses sections, but you wish AsmJit to flatten these sections (you don't plan to flatten them manually) then there is an API for that. - -```c++ -// ... (continuing the previous example) ... -CodeHolder code = ...; - -// Suppose we have some code that contains multiple sections and -// we would like to flatten them by using AsmJit's built-in API: -Error err = code.flatten(); -if (err) { /* Error handling is necessary. */ } - -// After flattening all sections would contain assigned offsets -// relative to base. Offsets are 64-bit unsigned integers so we -// cast them to `size_t` for simplicity. On 32-bit targets it's -// guaranteed that the offset cannot be greater than `2^32 - 1`. -printf("Data section offset %zu", size_t(data->offset())); - -// The flattening doesn't resolve unresolved label links, this -// has to be done manually as flattening can be done separately. -err = code.resolveUnresolvedLinks(); -if (err) { /* Error handling is necessary. */ } - -if (code.hasUnresolvedLinks()) { - // This would mean either unbound label or some other issue. - printf("FAILED: UnresoledLinkCount=%zu\n", code.unresovedLinkCount()); -} -``` - -### Using AsmJit Data Structures - -AsmJit stores its data in data structures allocated by `ZoneAllocator`. It's a fast allocator that allows AsmJit to allocate a lot of small data structures fast and without `malloc()` overhead. The most common data structure that you will probably inspect is `ZoneVector`. It's like C++'s `std::vector`. but the implementation doesn't use exceptions and uses the mentioned `ZoneAllocator` for performance reasons. You don't have to worry about allocations as you should not need to add items to data structures that are managed by `CodeHolder` or advanced emitters like Builder/Compiler. - -APIs that return `ZoneVector`: - -```c++ -CodeHolder code = ...; - -// Contains all emitters attached to CodeHolder. -const ZoneVector& emitters = code.emitters(); - -// Contains all sections managed by CodeHolder. -const ZoneVector& sections = code.sections(); - -// Contains all LabelEntry records associated with created Labels. -const ZoneVector& labelEntries = code.labelEntries(); - -// Contains all RelocEntry records that describe relocations. -const ZoneVector& relocEntries = code.relocEntries(); -``` - -AsmJit's `ZoneVector` has overloaded array access operator to make it possible accessing its elements through operator[]. Some standard functions like `empty()`, `size()`, and `data()` are provided as well. Vectors are also iterable through range-based for loop: - -```c++ -CodeHolder code = ...; - -for (LabelEntry* le : code.labelEntries()) { - printf("Label #%u {Bound=%s Offset=%llu}", - le->id(), - le->isBound() ? "true" : "false", - (unsigned long long)le->offset()); -} -``` - - -Support -------- - -AsmJit is an open-source library released under a permissive ZLIB license, which makes it possible to use it freely in any open-source or commercial product. Free support is available through issues and gitter channel, which is very active. Commercial support is currently individual and can be negotiated on demand. It includes consultation, priority bug fixing, review of code that uses AsmJit, porting code to the latest AsmJit, and implementation of new AsmJit features. - -If you use AsmJit in a non-commercial project and would like to appreciate the library in the form of a donation you are welcome to support us. Donations are anonymous unless the donor lets us know otherwise. The order and format of listed donors is not guaranteed and may change in the future. Additionally, donations should be considered as an appreciation of past work and not used to gain special privileges in terms of future development. AsmJit authors reserve the right to remove a donor from the list in extreme cases of disruptive behavior against other community members. Diversity of opinions and constructive criticism will always be welcome in the AsmJit community. - -Donation Addresses: - - * BTC: 14dEp5h8jYSxgXB9vcjE8eh78uweD76o7W - * ETH: 0xd4f0b9424cF31DF5a5359D029CF3A65c500a581E - * Please contact us if you would like to donate through a different channel or to use a different crypto-currency. Wire transfers and SEPA payments are both possible. - -Donors: +Notable Donors List: * [ZehMatt](https://github.com/ZehMatt) - Authors & Maintainers --------------------- diff --git a/libs/asmjit/src/SConstruct b/libs/asmjit/src/asmjit.SConscript similarity index 77% rename from libs/asmjit/src/SConstruct rename to libs/asmjit/src/asmjit.SConscript index 9646373..d481407 100644 --- a/libs/asmjit/src/SConstruct +++ b/libs/asmjit/src/asmjit.SConscript @@ -4,16 +4,18 @@ Import('env') sources = Glob('asmjit/**/*.cpp', source=True) + +asmjit_includes = File('CMakeLists.txt').srcnode().get_abspath() +asmjit_includes = os.path.split(asmjit_includes)[0] +env['asmjit_includes'] = [ asmjit_includes ] + asmjit = env.SharedLibrary(target = 'asmjit', source = sources, CPPDEFINES = {'ASMJIT_BUILD_X86' : ''}, CPPPATH = Dir('#/test') ) -asmjit_includes = File('CMakeLists.txt').srcnode().get_abspath() -asmjit_includes = os.path.split(asmjit_includes)[0] +env['asmjit_lib_path'] = os.path.dirname(asmjit[0].get_abspath()) -env['asmjit'] = asmjit -env['asmjit_includes'] = asmjit_includes env.Install(Dir('#/bin'), asmjit) diff --git a/libs/asmjit/src/asmjit.natvis b/libs/asmjit/src/asmjit.natvis index 18a083c..b73d848 100644 --- a/libs/asmjit/src/asmjit.natvis +++ b/libs/asmjit/src/asmjit.natvis @@ -35,25 +35,25 @@ - - + + - - + + - - - - + + + + - - + + - - - + + + - + [None] [Reg] {{ id={_baseId, d} group={regGroup(), d} type={regType(), d} size={opSize(), d} }} @@ -80,4 +80,122 @@ _data[1] + + + + + + + + + + + + + [RegValue {{ regType={regType()} indirect={isIndirect()} done={isDone()} }}] + [StackValue {{ indirect={isIndirect()} done={isDone()} }}] + [Unknown] + + + _data + (asmjit::Type::Id)(typeId()) + (asmjit::BaseReg::RegType)regType() + regId() + stackOffset() + + + + + + + + + + + + + + + + + + + + + + + + + [InstNode] + [SectionNode] + [LabelNode] + [AlignNode] + [EmbedDataNode] + [EmbedLabelNode] + [EmbedLabelDeltaNode] + [ConstPoolNode] + [CommentNode] + [SentinelNode] + [JumpNode] + [FuncNode] + [FuncRetNode] + [InvokeNode] + [UnknownNode {nodeType(), d}] + + + _prev + _next + + (asmjit::BaseNode::NodeType)_any._nodeType + (asmjit::BaseNode::Flags)_any._nodeFlags + + _position + _userDataU64 + _userDataPtr + _passData + _inlineComment, s8 + + ((asmjit::InstNode*)this)->_baseInst + _inst._opCount + _inst._opCapacity + ((asmjit::InstNode*)this)->_opArray, [_inst._opCount] + + ((asmjit::SectionNode*)this)->_id + ((asmjit::SectionNode*)this)->_nextSection + + ((asmjit::LabelNode*)this)->_id + + ((asmjit::AlignNode*)this)->_alignMode + ((asmjit::AlignNode*)this)->_alignment + + _embed._typeId, d + _embed._typeSize, d + ((asmjit::EmbedDataNode*)this)->_itemCount + ((asmjit::EmbedDataNode*)this)->_repeatCount + ((asmjit::EmbedDataNode*)this)->_inlineData + ((asmjit::EmbedDataNode*)this)->_externalData + + ((asmjit::EmbedLabelNode*)this)->_id + + ((asmjit::EmbedLabelDeltaNode*)this)->_id + ((asmjit::EmbedLabelDeltaNode*)this)->_baseId + ((asmjit::EmbedLabelDeltaNode*)this)->_dataSize + + ((asmjit::ConstPoolNode*)this)->_constPool + + (asmjit::SentinelNode::SentinelType)_sentinel._sentinelType + + ((asmjit::JumpNode*)this)->_annotation + + ((asmjit::FuncNode*)this)->_funcDetail + ((asmjit::FuncNode*)this)->_frame + ((asmjit::FuncNode*)this)->_exitNode + ((asmjit::FuncNode*)this)->_end + ((asmjit::FuncNode*)this)->_args, [((asmjit::FuncNode*)this)->_funcDetail._argCount] + + ((asmjit::InvokeNode*)this)->_funcDetail + ((asmjit::InvokeNode*)this)->_rets, [((asmjit::InvokeNode*)this)->_funcDetail._retCount] + ((asmjit::InvokeNode*)this)->_args, [((asmjit::InvokeNode*)this)->_funcDetail._argCount] + + diff --git a/libs/asmjit/src/asmjit/asmjit.h b/libs/asmjit/src/asmjit/asmjit.h index e543a63..400426c 100644 --- a/libs/asmjit/src/asmjit/asmjit.h +++ b/libs/asmjit/src/asmjit/asmjit.h @@ -24,27 +24,6 @@ #ifndef ASMJIT_ASMJIT_H_INCLUDED #define ASMJIT_ASMJIT_H_INCLUDED -//! \mainpage API Reference -//! -//! AsmJit C++ API reference documentation generated by Doxygen. -//! -//! Introduction provided by the project page at https://github.com/asmjit/asmjit. -//! -//! \section main_groups Groups -//! -//! The documentation is split into the following groups: -//! -//! $$DOCS_GROUP_OVERVIEW$$ -//! -//! \section main_other Other Pages -//! -//! - Class List - List of classes sorted alphabetically -//! - AsmJit Namespace - List of symbols provided by `asmjit` namespace - -//! \namespace asmjit -//! -//! Root namespace used by AsmJit. - #include "./core.h" #ifdef ASMJIT_BUILD_X86 diff --git a/libs/asmjit/src/asmjit/core.h b/libs/asmjit/src/asmjit/core.h index f9a56fc..216d015 100644 --- a/libs/asmjit/src/asmjit/core.h +++ b/libs/asmjit/src/asmjit/core.h @@ -24,66 +24,2007 @@ #ifndef ASMJIT_CORE_H_INCLUDED #define ASMJIT_CORE_H_INCLUDED +//! Root namespace used by AsmJit. +namespace asmjit { + +// ============================================================================ +// [Documentation - mainpage] +// ============================================================================ + +//! \mainpage API Reference +//! +//! AsmJit C++ API reference documentation generated by Doxygen. +//! +//! AsmJit library uses one global namespace called \ref asmjit, which provides +//! the whole functionality. Core functionality is within \ref asmjit namespace +//! and architecture specific functionality is always in its own namespace. For +//! example \ref asmjit::x86 provides both 32-bit and 64-bit X86 code generation. +//! +//! \section main_groups Documentation Groups +//! +//! AsmJit documentation is structured into groups. Groups can be followed in +//! order to learn AsmJit, but knowledge from multiple groups is required to +//! use AsmJit properly: +//! +//! $$DOCS_GROUP_OVERVIEW$$ +//! +//! \note It's important to understand that in order to learn AsmJit all groups +//! are important. Some groups can be omitted if a particular tool is out of +//! interest - for example \ref asmjit_assembler users don't need to know about +//! \ref asmjit_builder, but it's not the opposite. \ref asmjit_builder users +//! must know about \ref asmjit_assembler as it also uses operands, labels, and +//! other concepts. Similarly \ref asmjit_compiler users must know how both \ref +//! asmjit_assembler and \ref asmjit_builder tools work. +//! +//! \section where_to_start Where To Start +//! +//! AsmJit \ref asmjit_core provides the following two classes that are essential +//! from the code generation perspective: +//! +//! - \ref CodeHolder provides functionality +//! to temporarily hold the generated code. It stores all the necessary +//! information about the code - code buffers, sections, labels, symbols, +//! and information about relocations. +//! +//! - \ref BaseEmitter provides interface used +//! by emitter implementations. The interface provides basic building blocks +//! that are then implemented by \ref BaseAssembler, \ref BaseBuilder, and +//! \ref BaseCompiler. +//! +//! Code emitters: +//! +//! - \ref asmjit_assembler - provides direct machine code generation. +//! +//! - \ref asmjit_builder - provides intermediate code generation that can +//! be processed before it's serialized to \ref BaseAssembler. +//! +//! - \ref asmjit_compiler - provides high-level code generation with built-in +//! register allocation. +//! +//! - \ref FuncNode - provides insight into how function looks from the Compiler +//! perspective and how it's stored in a node-list. +//! +//! \section main_recommendations Recommendations +//! +//! The following steps are recommended for all AsmJit users: +//! +//! - Make sure that you use \ref Logger, see \ref asmjit_logging. +//! +//! - Make sure that you use \ref ErrorHandler, see \ref asmjit_error_handling. +//! +//! - Instruction validation in your debug builds can reveal problems too. +//! AsmJit provides validation at instruction level, that can be enabled +//! by \ref BaseEmitter::addValidationOptions(). +//! +//! See \ref BaseEmitter::ValidationOptions for more details. +//! +//! - Make sure you put a breakpoint into \ref DebugUtils::errored() function +//! if you have a problem with AsmJit returning errors during instruction +//! encoding or register allocation. Having an active breakpoint there can +//! help to reveal the origin of the error, to inspect variables and other +//! conditions that caused to it. +//! +//! The reason for using \ref Logger and \ref ErrorHandler is that they provide +//! a very useful information about what's happening inside emitters. In many +//! cases the information provided by these two is crucial to quickly fix issues +//! that happen during development (for example wrong instruction, address, or +//! register used). In addition, output from \ref Logger is always necessary +//! when filling bug reports. In other words, using logging and proper error +//! handling can save a lot of time during the development. +//! +//! \section main_other Other Pages +//! +//! - Class List - List of classes sorted alphabetically +//! - AsmJit Namespace - List of symbols provided by `asmjit` namespace + +// ============================================================================ +// [Documentation - asmjit_build] +// ============================================================================ + +//! \defgroup asmjit_build Build Instructions +//! \brief Build instructions, supported environments, and feature selection. +//! +//! ### Overview +//! +//! AsmJit is designed to be easy embeddable in any project. However, it depends +//! on some compile-time definitions that can be used to enable or disable +//! features to decrease the resulting binary size. A typical way of building +//! AsmJit is to use [cmake](https://www.cmake.org), but it's also possible to +//! just include AsmJit source code in your project and to just build it. The +//! easiest way to include AsmJit in your project is to just include **src** +//! directory in your project and to define \ref ASMJIT_STATIC. AsmJit can be +//! just updated from time to time without any changes to this integration +//! process. Do not embed AsmJit's `test` files in such case as these are used +//! exclusively for testing. +//! +//! ### Supported C++ Compilers +//! +//! - Requirements: +//! +//! - AsmJit won't build without C++11 enabled. If you use older GCC or Clang +//! you would have to enable at least C++11 standard through compiler flags. +//! +//! - Tested: +//! +//! - **Clang** - Tested by Travis-CI - Clang 3.9+ (with C++11 enabled) is +//! officially supported (older Clang versions having C++11 support are +//! probably fine, but are not regularly tested). +//! +//! - **GNU** - Tested by Travis-CI - GCC 4.8+ (with C++11 enabled) is +//! officially supported. +//! +//! - **MINGW** - Tested by Travis-CI - Use the latest version, if possible. +//! +//! - **MSVC** - Tested by Travis-CI - VS2017+ is officially supported, VS2015 +//! is reported to work. +//! +//! - Untested: +//! +//! - **Intel** - No maintainers and no CI environment to regularly test +//! this compiler. +//! +//! - **Other** C++ compilers would require basic support in +//! [core/api-config.h](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/api-config.h). +//! +//! ### Supported Operating Systems and Platforms +//! +//! - Tested: +//! +//! - **Linux** - Tested by Travis-CI (any distribution is generally supported). +//! +//! - **OSX** - Tested by Travis-CI (any version is supported). +//! +//! - **Windows** - Tested by Travis-CI - (Windows 7+ is officially supported). +//! +//! - **Emscripten** - Works if compiled with \ref ASMJIT_NO_JIT. AsmJit +//! cannot generate WASM code, but can be used to generate X86/X64 code +//! within a browser, for example. +//! +//! - Untested: +//! +//! - **BSDs** - No maintainers, no CI environment to regularly test BSDs, +//! but they should work out of box. +//! +//! - **Haiku** - Not regularly tested, but reported to work. +//! +//! - **Other** operating systems would require some testing and support in +//! the following files: +//! - [core/api-config.h](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/api-config.h) +//! - [core/osutils.cpp](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/osutils.cpp) +//! - [core/virtmem.cpp](https://github.com/asmjit/asmjit/tree/master/src/asmjit/core/virtmem.cpp) +//! +//! ### Supported Backends / Architectures +//! +//! - **X86** - Both 32-bit and 64-bit backends tested by Travis-CI. +//! - **ARM** - Work-in-progress (not public at the moment). +//! +//! ### Static Builds and Embedding +//! +//! These definitions can be used to enable static library build. Embed is used +//! when AsmJit's source code is embedded directly in another project, implies +//! static build as well. +//! +//! - \ref ASMJIT_EMBED - Asmjit is embedded, implies \ref ASMJIT_STATIC. +//! - \ref ASMJIT_STATIC - Enable static-library build. +//! +//! \note Projects that use AsmJit statically must define \ref ASMJIT_STATIC in +//! all compilation units that use AsmJit, otherwise AsmJit would use dynamic +//! library imports in \ref ASMJIT_API decorator. The recommendation is to +//! define this macro across the whole project that uses AsmJit this way. +//! +//! ### Build Configuration +//! +//! These definitions control whether asserts are active or not. By default +//! AsmJit would autodetect build configuration from existing pre-processor +//! definitions, but this behavior can be overridden, for example to enable +//! debug asserts in release configuration. +//! +//! - \ref ASMJIT_BUILD_DEBUG - Overrides build configuration to debug, +//! asserts will be enabled in this case. +//! - \ref ASMJIT_BUILD_RELEASE - Overrides build configuration to release, +//! asserts will be disabled in this case. +//! +//! \note There is usually no need to override the build configuration. AsmJit +//! detects the build configuration by checking whether `NDEBUG` is defined and +//! automatically defines \ref ASMJIT_BUILD_RELEASE if configuration overrides +//! were not used. We only recommend using build configuration overrides in +//! special situations, like using AsmJit in release configuration with asserts +//! enabled for whatever reason. +//! +//! ### AsmJit Backends +//! +//! AsmJit currently supports only X86/X64 backend, but the plan is to add more +//! backends in the future. By default AsmJit builds only the host backend, which +//! is autodetected at compile-time, but this can be overridden. +//! +//! - \ref ASMJIT_BUILD_X86 - Always build X86 backend (X86 and X86_64). +//! - \ref ASMJIT_BUILD_ARM - Always build ARM backend (ARM and AArch64). +//! - \ref ASMJIT_BUILD_HOST - Always build the host backend. +//! +//! ### Features Selection +//! +//! AsmJit builds by defaults all supported features, which includes all emitters, +//! logging, instruction validation and introspection, and JIT memory allocation. +//! Features can be disabled at compile time by using `ASMJIT_NO_...` definitions. +//! +//! - \ref ASMJIT_NO_DEPRECATED - Disables deprecated API at compile time +//! so it won't be available and the compilation will fail if there is +//! attempt to use such API. This includes deprecated classes, namespaces, +//! enumerations, and functions. +//! +//! - \ref ASMJIT_NO_FOREIGN - Disables the support for foreign architectures. +//! If defined, it would internally set \ref ASMJIT_BUILD_HOST to true. +//! +//! - \ref ASMJIT_NO_BUILDER - Disables \ref asmjit_builder functionality +//! completely. This implies \ref ASMJIT_NO_COMPILER as \ref asmjit_compiler +//! cannot be used without \ref asmjit_builder. +//! +//! - \ref ASMJIT_NO_COMPILER - Disables \ref asmjit_compiler functionality +//! completely. +//! +//! - \ref ASMJIT_NO_JIT - Disables JIT memory management and \ref JitRuntime. +//! +//! - \ref ASMJIT_NO_LOGGING - Disables \ref Logger and \ref Formatter. +//! +//! - \ref ASMJIT_NO_TEXT - Disables everything that contains string +//! representation of AsmJit constants, should be used together with +//! \ref ASMJIT_NO_LOGGING as logging doesn't make sense without the +//! ability to quiry instruction names, register names, etc... +//! +//! - \ref ASMJIT_NO_VALIDATION - Disables validation API. +//! +//! - \ref ASMJIT_NO_INTROSPECTION - Disables instruction introspection API, +//! must be used together with \ref ASMJIT_NO_COMPILER as \ref asmjit_compiler +//! requires introspection for its liveness analysis and register allocation. +//! +//! \note It's not recommended to disable features if you plan to build AsmJit +//! as a shared library that will be used by multiple projects that you don't +//! control how AsmJit was built (for example AsmJit in a Linux distribution). +//! The possibility to disable certain features exists mainly for customized +//! AsmJit builds. + +// ============================================================================ +// [Documentation - asmjit_breaking_changes] +// ============================================================================ + +//! \defgroup asmjit_breaking_changes Breaking Changes +//! \brief Documentation of breaking changes +//! +//! ### Overview +//! +//! AsmJit is a live project that is being actively developed. Deprecating the +//! existing API in favor of a new one is preferred, but it's not always +//! possible if the changes are significant. AsmJit authors prefer to do +//! accumulated breaking changes at once instead of breaking the API often. +//! This page documents deprecated and removed APIs and should serve as a how-to +//! guide for people that want to port existing code to work with the newest AsmJit. +//! +//! ### Tips +//! +//! Useful tips before you start: +//! +//! - Visit our [Public Gitter Channel](https://gitter.im/asmjit/asmjit) if +//! you need a quick help. +//! +//! - Build AsmJit with `ASMJIT_NO_DEPRECATED` macro defined to make sure that +//! you are not using deprecated functionality at all. Deprecated functions +//! are decorated with `ASMJIT_DEPRECATED()` macro, but sometimes it's not +//! possible to decorate everything like classes, which are used by deprecated +//! functions as well, because some compilers would warn about that. If your +//! project compiles fine with `ASMJIT_NO_DEPRECATED` it's not using anything, +//! which was deprecated. +//! +//! ### Changes committed at 2020-05-30 +//! +//! AsmJit has been cleaned up significantly, many todo items have been fixed +//! and many functions and classes have been redesigned, some in an incompatible +//! way. +//! +//! Core changes: +//! +//! - \ref Imm operand has now only \ref Imm::value() and \ref Imm::valueAs() +//! functions that return its value content, and \ref Imm::setValue() function +//! that sets the content. Functions like `setI8()`, `setU8()` were deprecated. +//! +//! Old functions were deprecated, but code using them should still compile. +//! +//! - `ArchInfo` has been replaced with \ref Environment. Environment provides +//! more details about the architecture, but drops some properties that +//! were used by arch info - `gpSize(`) and `gpCount()`. `gpSize()` can +//! be replaced with `registerSize()` getter, which returns a native register +//! size of the architecture the environment uses. However, `gpCount()` was +//! removed - at the moment \ref ArchRegs can be used to access such properties. +//! +//! Some other functions were renamed, like `ArchInfo::isX86Family()` is +//! now \ref Environment::isFamilyX86(), etc. The reason for changing the +//! order was support for more propertries and all the accessors now +//! start with the type of the property, like \ref Environment::isPlatformWindows(). +//! +//! This function causes many other classes to provide `environment()` getter +//! instead of `archInfo()` getter. In addition, AsmJit now uses `arch()` to +//! get an architecture instead of `archId()`. `ArchInfo::kIdXXX` was renamed +//! to `Environment::kArchXXX`. +//! +//! Some functions were deprecated, some removed... +//! +//! - `CodeInfo` has been removed in favor of \ref Environment. If you used +//! `CodeInfo` to set architecture and base address, this is now possible +//! with \ref Environment and setting base address explicitly by \ref +//! CodeHolder::init() - the first argument is \ref Environment, and the +//! second argument is base address, which defaults to \ref +//! Globals::kNoBaseAddress. +//! +//! CodeInfo class was deprecated, but the code using it should still +//! compile with warnings. +//! +//! - \ref CallConv has been updated to offer a more unified way of representing +//! calling conventions - many calling conventions were abstracted to follow +//! standard naming like \ref CallConv::kIdCDecl or \ref CallConv::kIdStdCall. +//! +//! This change means that other APIs like \ref FuncDetail::init() now +//! require both, calling convention and target \ref Environment. +//! +//! - `Logging` namespace has been renamed to \ref Formatter, which now +//! provides general functionality for formatting in AsmJit. +//! +//! Logging namespace should still work, but its use is deprecated. +//! Unfortunately this will be without deprecation warnings, so please +//! make sure you don't use it. +//! +//! - `Data64`, `Data128`, and `Data256` structs were deprecated and should +//! no longer be used. There is no replacement, AsmJit users should simply +//! create their own structures if they need them or use the new repeated +//! embed API in emitters, see \ref BaseEmitter::embedDataArray(). +//! +//! Emitter changes: +//! +//! - \ref BaseEmitter::emit() function signature has been changed to accept +//! 3 operands by reference and the rest 3 operands as a continuous array. +//! This change is purely cosmetic and shouldn't affect users as emit() +//! has many overloads that dispatch to the right function. +//! +//! - \ref x86::Emitter (Assembler, Builder, Compiler) deprecates embed +//! utilities like `dint8()`, `duint8()`, `duint16()`, `dxmm()`, etc... +//! in favor of a new and more powerful \ref BaseEmitter::embedDataArray(). +//! This function also allows emitting repeated values and/or patterns, +//! which is used by helpers \ref BaseEmitter::embedUInt8(), and others... +//! +//! - Validation is now available through \ref BaseEmitter::ValidationOptions, +//! which can be enabled/disabled through \ref BaseEmitter::addValidationOptions() +//! and \ref BaseEmitter::clearValidationOptions(), respectively. Validation +//! options now separate between encoding and Builder/Compiler so it's possible +//! to choose the granularity required. +//! +//! Builder changes: +//! +//! - Internal functions for creating nodes were redesigned. They now accept +//! a pointer to the node created as a first parameter. These changes should +//! not affect AsmJit users as these functions were used internally. +//! +//! Compiler changes: +//! +//! - `FuncCallNode` has been renamed to \ref InvokeNode. Additionally, function +//! calls should now use \ref x86::Compiler::invoke() instead of `call()`. +//! The reason behind this is to remove the confusion between a `call` +//! instruction and AsmJit's `call()` intrinsic, which is now `invoke()`. +//! +//! - Creating new nodes also changed. Now the preferred way of invoking a +//! function is to call \ref x86::Compiler::invoke() where the first +//! argument is `InvokeNode**`. The function now returns an error and would +//! call \ref ErrorHandler in case of a failure. Error handling was +//! unspecified in the past - the function was marked noexcept, but called +//! error handler, which could throw. +//! +//! The reason behind this change is to make the API consistent with other +//! changes and to also make it possible to inspect the possible error. In +//! the previous API it returned a new node or `nullptr` in case of error, +//! which the user couldn't inspect unless there was an attached \ref +//! ErrorHandler. +//! +//! Samples: +//! +//! ``` +//! #include +//! using namespace asmjit; +//! +//! // The basic setup of JitRuntime and CodeHolder changed, use environment() +//! // instead of codeInfo(). +//! void basicSetup() { +//! JitRuntime rt; +//! CodeHolder code(rt.environment()); +//! } +//! +//! // Calling a function (Compiler) changed - use invoke() instead of call(). +//! void functionInvocation(x86::Compiler& cc) { +//! InvokeNode* invokeNode; +//! cc.invoke(&invokeNode, targetOperand, FuncSignatureT<...>(...)); +//! } +//! ``` + +// ============================================================================ +// [Documentation - asmjit_core] +// ============================================================================ + //! \defgroup asmjit_core Core -//! \brief Core API. +//! \brief Globals, code storage, and emitter interface. +//! +//! ### Overview +//! +//! AsmJit library uses \ref CodeHolder to hold code during code generation and +//! emitters inheriting from \ref BaseEmitter to emit code. CodeHolder uses +//! containers to manage its data: +//! +//! - \ref Section - stores information about a code or data section. +//! - \ref CodeBuffer - stores actual code or data, part of \ref Section. +//! - \ref LabelEntry - stores information about a label - its name, offset, +//! section where it belongs to, and other bits. +//! - \ref LabelLink - stores information about yet unbound label, which was +//! already used by the assembler. +//! - \ref RelocEntry - stores information about a relocation. +//! - \ref AddressTableEntry - stores information about an address, which was +//! used in a jump or call. Such address may need relocation. +//! +//! To generate code you would need to instantiate at least the following classes: +//! +//! - \ref CodeHolder - to hold code during code generation. +//! - \ref BaseEmitter - to emit code into \ref CodeHolder. +//! - \ref Target (optional) - most likely \ref JitRuntime to keep the generated +//! code in executable memory. \ref Target can be customized by inheriting from +//! it. +//! +//! There are also other core classes that are important: +//! +//! - \ref Environment - describes where the code will run. Environment brings +//! the concept of target triples or tuples into AsmJit, which means that users +//! can specify target architecture, platform, and ABI. +//! - \ref Type - encapsulates lightweight type functionality that can be used +//! to describe primitive and vector types. Types are used by higher level +//! utilities, for example by \ref asmjit_function and \ref asmjit_compiler. +//! - \ref CpuInfo - encapsulates CPU information - stores both CPU information +//! and features described by \ref BaseFeatures. +//! +//! AsmJit also provides global constants: +//! +//! - \ref Globals - namespace that provides global constants. +//! - \ref ByteOrder - byte-order constants and functionality. +//! +//! \note CodeHolder examples use \ref x86::Assembler as abstract interfaces cannot +//! be used to generate code. +//! +//! ### CodeHolder & Emitters +//! +//! The example below shows how the mentioned classes interact to generate X86 code: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! // Signature of the generated function. +//! typedef int (*Func)(void); +//! +//! int main() { +//! JitRuntime rt; // Runtime specialized for JIT code execution. +//! +//! CodeHolder code; // Holds code and relocation information. +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! +//! x86::Assembler a(&code); // Create and attach x86::Assembler to code. +//! a.mov(x86::eax, 1); // Move one to eax register. +//! a.ret(); // Return from function. +//! // ===== x86::Assembler is no longer needed from here and can be destroyed ===== +//! +//! Func fn; // Holds address to the generated function. +//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime. +//! if (err) return 1; // Handle a possible error returned by AsmJit. +//! // ===== CodeHolder is no longer needed from here and can be destroyed ===== +//! +//! int result = fn(); // Execute the generated code. +//! printf("%d\n", result); // Print the resulting "1". +//! +//! // All classes use RAII, all resources will be released before `main()` returns, +//! // the generated function can be, however, released explicitly if you intend to +//! // reuse or keep the runtime alive, which you should in a production-ready code. +//! rt.release(fn); +//! +//! return 0; +//! } +//! ``` +//! +//! The example above used \ref x86::Assembler as an emitter. AsmJit provides the +//! following emitters that offer various levels of abstraction: +//! +//! - \ref asmjit_assembler - Low-level emitter that emits directly to \ref CodeBuffer. +//! - \ref asmjit_builder - Low-level emitter that emits to a \ref BaseNode list. +//! - \ref asmjit_compiler - High-level emitter that provides register allocation. +//! +//! ### Targets and JitRuntime +//! +//! AsmJit's \ref Target is an interface that provides basic target abstraction. +//! At the moment AsmJit provides only one implementation called \ref JitRuntime, +//! which as the name suggests provides JIT code target and execution runtime. +//! \ref JitRuntime provides all the necessary stuff to implement a simple JIT +//! compiler with basic memory management. It only provides \ref JitRuntime::add() +//! and \ref JitRuntime::release() functions that are used to either add code +//! to the runtime or release it. \ref JitRuntime doesn't do any decisions on +//! when the code should be released, the decision is up to the developer. +//! +//! See more at \ref asmjit_virtual_memory group. +//! +//! ### More About Environment +//! +//! In the previous example the \ref Environment is retrieved from \ref JitRuntime. +//! It's logical as \ref JitRuntime always returns an \ref Environment that is +//! compatible with the host. For example if your application runs in 64-bit mode +//! the \ref Environment returned will use \ref Environment::kArchX64 architecture +//! in contrast to \ref Environment::kArchX86, which will be used in 32-bit mode on +//! any X86 platform. +//! +//! AsmJit allows to setup the \ref Environment manually and to select a different +//! architecture and ABI when necessary. So let's do something else this time, let's +//! always generate a 32-bit code and print its binary representation. To do that, we +//! can create our own \ref Environment and initialize it to \ref Environment::kArchX86. +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! int main(int argc, char* argv[]) { +//! using namespace asmjit::x86; +//! +//! // Create a custom environment initialized to 32-bit X86 architecture. +//! Environment env; +//! env.setArch(Environment::kArchX86); +//! +//! CodeHolder code; // Create a CodeHolder. +//! code.init(env); // Initialize CodeHolder with custom environment. +//! +//! // Generate a 32-bit function that sums 4 floats and looks like: +//! // void func(float* dst, const float* a, const float* b) +//! x86::Assembler a(&code); // Create and attach x86::Assembler to `code`. +//! +//! a.mov(eax, dword_ptr(esp, 4)); // Load the destination pointer. +//! a.mov(ecx, dword_ptr(esp, 8)); // Load the first source pointer. +//! a.mov(edx, dword_ptr(esp, 12)); // Load the second source pointer. +//! +//! a.movups(xmm0, ptr(ecx)); // Load 4 floats from [ecx] to XMM0. +//! a.movups(xmm1, ptr(edx)); // Load 4 floats from [edx] to XMM1. +//! a.addps(xmm0, xmm1); // Add 4 floats in XMM1 to XMM0. +//! a.movups(ptr(eax), xmm0); // Store the result to [eax]. +//! a.ret(); // Return from function. +//! +//! // We have no Runtime this time, it's on us what we do with the code. +//! // CodeHolder stores code in Section, which provides some basic properties +//! // and CodeBuffer structure. We are interested in section's CodeBuffer. +//! // +//! // NOTE: The first section is always '.text', it can be retrieved by +//! // code.sectionById(0) or simply by code.textSection(). +//! CodeBuffer& buffer = code.textSection()->buffer(); +//! +//! // Print the machine-code generated or do something else with it... +//! // 8B4424048B4C24048B5424040F28010F58010F2900C3 +//! for (size_t i = 0; i < buffer.length; i++) +//! printf("%02X", buffer.data[i]); +//! +//! return 0; +//! } +//! ``` +//! +//! ### Explicit Code Relocation +//! +//! In addition to \ref Environment, \ref CodeHolder can be configured to +//! specify a base-address (or a virtual base-address in a linker terminology), +//! which could be static (useful when you know the location where the target's +//! machine code will be) or dynamic. AsmJit assumes dynamic base-address by +//! default and relocates the code held by \ref CodeHolder to a user provided +//! address on-demand. To be able to relocate to a user provided address it needs +//! to store some information about relocations, which is represented by \ref +//! RelocEntry. Relocation entries are only required if you call external functions +//! from the generated code that cannot be encoded by using a 32-bit displacement +//! (64-bit displacements are not provided by aby supported architecture). +//! +//! There is also a concept called \ref LabelLink - label link is a lightweight +//! data structure that doesn't have any identifier and is stored in \ref LabelEntry +//! as a single-linked list. Label link represents either unbound yet used label +//! and cross-sections links (only relevant to code that uses multiple sections). +//! Since crossing sections is something that cannot be resolved immediately these +//! links persist until offsets of these sections are assigned and until +//! \ref CodeHolder::resolveUnresolvedLinks() is called. It's an error if you end +//! up with code that has unresolved label links after flattening. You can verify +//! it by calling \ref CodeHolder::hasUnresolvedLinks(), which inspects the value +//! returned by \ref CodeHolder::unresolvedLinkCount(). +//! +//! AsmJit can flatten code that uses multiple sections by assigning each section +//! an incrementing offset that respects its alignment. Use \ref CodeHolder::flatten() +//! to do that. After the sections are flattened their offsets and virtual-sizes +//! are adjusted to respect each section's buffer size and alignment. The \ref +//! CodeHolder::resolveUnresolvedLinks() function must be called before relocating +//! the code held by \ref CodeHolder. You can also flatten your code manually by +//! iterating over all sections and calculating their offsets (relative to base) +//! by your own algorithm. In that case \ref CodeHolder::flatten() should not be +//! called, however, \ref CodeHolder::resolveUnresolvedLinks() should be. +//! +//! The example below shows how to use a built-in virtual memory allocator +//! \ref JitAllocator instead of using \ref JitRuntime (just in case you want +//! to use your own memory management) and how to relocate the generated code +//! into your own memory block - you can use your own virtual memory allocator +//! if you prefer that, but that's OS specific and not covered by the documentation. +//! +//! The following code is similar to the previous one, but implements a function +//! working in both 32-bit and 64-bit environments: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b); +//! +//! int main() { +//! // Create a custom environment that matches the current host environment. +//! Environment env = hostEnvironment(); +//! +//! CodeHolder code; // Create a CodeHolder. +//! code.init(env); // Initialize CodeHolder with environment. +//! +//! x86::Assembler a(&code); // Create and attach x86::Assembler to `code`. +//! +//! // Signature: 'void func(int* dst, const int* a, const int* b)'. +//! x86::Gp dst; +//! x86::Gp src_a; +//! x86::Gp src_b; +//! +//! // Handle the difference between 32-bit and 64-bit calling conventions +//! // (arguments passed through stack vs. arguments passed by registers). +//! if (env.is32Bit()) { +//! dst = x86::eax; +//! src_a = x86::ecx; +//! src_b = x86::edx; +//! a.mov(dst , x86::dword_ptr(x86::esp, 4)); +//! a.mov(src_a, x86::dword_ptr(x86::esp, 8)); +//! a.mov(src_b, x86::dword_ptr(x86::esp, 12)); +//! } +//! else { +//! if (env.isPlatformWindows()) { +//! dst = x86::rcx; // First argument (destination pointer). +//! src_a = x86::rdx; // Second argument (source 'a' pointer). +//! src_b = x86::r8; // Third argument (source 'b' pointer). +//! } +//! else { +//! dst = x86::rdi; // First argument (destination pointer). +//! src_a = x86::rsi; // Second argument (source 'a' pointer). +//! src_b = x86::rdx; // Third argument (source 'b' pointer). +//! } +//! } +//! +//! a.movdqu(x86::xmm0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0. +//! a.movdqu(x86::xmm1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1. +//! a.paddd(x86::xmm0, x86::xmm1); // Add 4 ints in XMM1 to XMM0. +//! a.movdqu(x86::ptr(dst), x86::xmm0); // Store the result to [dst]. +//! a.ret(); // Return from function. +//! +//! // Even when we didn't use multiple sections AsmJit could insert one section +//! // called '.addrtab' (address table section), which would be filled by data +//! // required by relocations (absolute jumps and calls). You can omit this code +//! // if you are 100% sure your code doesn't contain multiple sections and +//! // such relocations. You can use `CodeHolder::hasAddressTable()` to verify +//! // whether the address table section does exist. +//! code.flatten(); +//! code.resolveUnresolvedLinks(); +//! +//! // After the code was generated it can be relocated manually to any memory +//! // location, however, we need to know it's size before we perform memory +//! // allocation. `CodeHolder::codeSize()` returns the worst estimated code +//! // size in case that relocations are not possible without trampolines (in +//! // that case some extra code at the end of the current code buffer is +//! // generated during relocation). +//! size_t estimatedSize = code.codeSize(); +//! +//! // Instead of rolling up our own memory allocator we can use the one AsmJit +//! // provides. It's decoupled so you don't need to use `JitRuntime` for that. +//! JitAllocator allocator; +//! +//! // Allocate an executable virtual memory and handle a possible failure. +//! void* p = allocator.alloc(estimatedSize); +//! if (!p) +//! return 0; +//! +//! // Now relocate the code to the address provided by the memory allocator. +//! // Please note that this DOESN'T COPY anything to `p`. This function will +//! // store the address in CodeHolder and use relocation entries to patch the +//! // existing code in all sections to respect the base address provided. +//! code.relocateToBase((uint64_t)p); +//! +//! // This is purely optional. There are cases in which the relocation can omit +//! // unneeded data, which would shrink the size of address table. If that +//! // happened the codeSize returned after relocateToBase() would be smaller +//! // than the originally `estimatedSize`. +//! size_t codeSize = code.codeSize(); +//! +//! // This will copy code from all sections to `p`. Iterating over all sections +//! // and calling `memcpy()` would work as well, however, this function supports +//! // additional options that can be used to also zero pad sections' virtual +//! // size, etc. +//! // +//! // With some additional features, copyFlattenData() does roughly this: +//! // for (Section* section : code.sections()) +//! // memcpy((uint8_t*)p + section->offset(), +//! // section->data(), +//! // section->bufferSize()); +//! code.copyFlattenedData(p, codeSize, CodeHolder::kCopyPadSectionBuffer); +//! +//! // Execute the generated function. +//! int inA[4] = { 4, 3, 2, 1 }; +//! int inB[4] = { 1, 5, 2, 8 }; +//! int out[4]; +//! +//! // This code uses AsmJit's ptr_as_func<> to cast between void* and SumIntsFunc. +//! ptr_as_func(p)(out, inA, inB); +//! +//! // Prints {5 8 4 9} +//! printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]); +//! +//! // Release 'p' is it's no longer needed. It will be destroyed with 'vm' +//! // instance anyway, but it's a good practice to release it explicitly +//! // when you know that the function will not be needed anymore. +//! allocator.release(p); +//! +//! return 0; +//! } +//! ``` +//! +//! If you know the base-address in advance (before the code generation) it can +//! be passed as a second argument to \ref CodeHolder::init(). In that case the +//! Assembler will know the absolute position of each instruction and would be +//! able to use it during instruction encoding to prevent relocations where +//! possible. The following example shows how to configure the base address: +//! +//! ``` +//! #include +//! #include +//! +//! void initializeCodeHolder(CodeHolder& code) { +//! Environment env = hostEnvironment(); +//! uint64_t baseAddress = uint64_t(0x1234); +//! +//! // initialize CodeHolder with environment and custom base address. +//! code.init(env, baseAddress); +//! } +//! ``` +//! +//! ### Label Offsets and Links +//! +//! When a label that is not yet bound is used by the Assembler, it creates a +//! \ref LabelLink, which is then added to a \ref LabelEntry. These links are +//! also created if a label is used in a different section than in which it +//! was bound. Let's examine some functions that can be used to check whether +//! there are any unresolved links. +//! +//! ``` +//! #include +//! #include +//! +//! void labelLinksExample(CodeHolder& code, const Label& label) { +//! // Tests whether the `label` is bound. +//! bool isBound = code.isLabelBound(label); +//! printf("Label %u is %s\n", label.id(), isBound ? "bound" : "not bound"); +//! +//! // Returns true if the code contains either referenced, but unbound +//! // labels, or cross-section label links that are not resolved yet. +//! bool hasUnresolved = code.hasUnresolvedLinks(); // Boolean answer. +//! size_t nUnresolved = code.unresolvedLinkCount(); // Count of unresolved links. +//! +//! printf("Number of unresolved links: %zu\n", nUnresolved); +//! } +//! ``` +//! +//! There is no function that would return the number of unbound labels as this +//! is completely unimportant from CodeHolder's perspective. If a label is not +//! used then it doesn't matter whether it's bound or not, only actually used +//! labels matter. After a Label is bound it's possible to query its offset +//! offset relative to the start of the section where it was bound: +//! +//! ``` +//! #include +//! #include +//! +//! void labelOffsetExample(CodeHolder& code, const Label& label) { +//! // Label offset is known after it's bound. The offset provided is relative +//! // to the start of the section, see below for alternative. If the given +//! // label is not bound the offset returned will be zero. It's recommended +//! // to always check whether the label is bound before using its offset. +//! uint64_t sectionOffset = code.labelOffset(label); +//! printf("Label offset relative to section: %llu\n", (unsigned long long)sectionOffset); +//! +//! // If you use multiple sections and want the offset relative to the base. +//! // NOTE: This function expects that the section has already an offset and +//! // the label-link was resolved (if this is not true you will still get an +//! // offset relative to the start of the section). +//! uint64_t baseOffset = code.labelOffsetFromBase(label); +//! printf("Label offset relative to base: %llu\n", (unsigned long long)baseOffset); +//! } +//! ``` +//! +//! ### Sections +//! +//! AsmJit allows to create multiple sections within the same \ref CodeHolder. +//! A test-case [asmjit_test_x86_sections.cpp](https://github.com/asmjit/asmjit/blob/master/test/asmjit_test_x86_sections.cpp) +//! can be used as a reference point although the following example should +//! also provide a useful insight: +//! +//! ``` +//! #include +//! #include +//! +//! void sectionsExample(CodeHolder& code) { +//! // Text section is always provided as the first section. +//! Section* text = code.textSection(); // or code.sectionById(0); +//! +//! // To create another section use CodeHolder::newSection(). +//! Section* data; +//! Error err = code.newSection(&data, +//! ".data", // Section name +//! SIZE_MAX, // Name length if the name is not null terminated (or SIZE_MAX). +//! 0, // Section flags, see Section::Flags. +//! 8, // Section alignment, must be power of 2. +//! 0); // Section order value (optional, default 0). +//! +//! // When you switch sections in Assembler, Builder, or Compiler the cursor +//! // will always move to the end of that section. When you create an Assembler +//! // the cursor would be placed at the end of the first (.text) section, which +//! // is initially empty. +//! x86::Assembler a(&code); +//! Label L_Data = a.newLabel(); +//! +//! a.mov(x86::eax, x86::ebx); // Emits in .text section. +//! +//! a.section(data); // Switches to the end of .data section. +//! a.bind(L_Data); // Binds label in this .data section +//! a.db(0x01); // Emits byte in .data section. +//! +//! a.section(text); // Switches to the end of .text section. +//! a.add(x86::ebx, x86::eax); // Emits in .text section. +//! +//! // References a label in .text section, which was bound in .data section. +//! // This would create a LabelLink even when the L_Data is already bound, +//! // because the reference crosses sections. See below... +//! a.lea(x86::rsi, x86::ptr(L_Data)); +//! } +//! ``` +//! +//! The last line in the example above shows that a LabelLink would be created +//! even for bound labels that cross sections. In this case a referenced label +//! was bound in another section, which means that the link couldn't be resolved +//! at that moment. If your code uses sections, but you wish AsmJit to flatten +//! these sections (you don't plan to flatten them manually) then there is an +//! API for that. +//! +//! ``` +//! #include +//! #include +//! +//! // ... (continuing the previous example) ... +//! void sectionsExampleContinued(CodeHolder& code) { +//! // Suppose we have some code that contains multiple sections and +//! // we would like to flatten it by using AsmJit's built-in API: +//! Error err = code.flatten(); +//! if (err) { +//! // There are many reasons it can fail, so always handle a possible error. +//! printf("Failed to flatten the code: %s\n", DebugUtils::errorAsString(err)); +//! exit(1); +//! } +//! +//! // After flattening all sections would contain assigned offsets +//! // relative to base. Offsets are 64-bit unsigned integers so we +//! // cast them to `size_t` for simplicity. On 32-bit targets it's +//! // guaranteed that the offset cannot be greater than `2^32 - 1`. +//! printf("Data section offset %zu", size_t(data->offset())); +//! +//! // The flattening doesn't resolve unresolved label links, this +//! // has to be done manually as flattening can be done separately. +//! err = code.resolveUnresolvedLinks(); +//! if (err) { +//! // This is the kind of error that should always be handled... +//! printf("Failed to resolve label links: %s\n", DebugUtils::errorAsString(err)); +//! exit(1); +//! } +//! +//! if (code.hasUnresolvedLinks()) { +//! // This would mean either unbound label or some other issue. +//! printf("The code has %zu unbound labels\n", code.unresovedLinkCount()); +//! exit(1); +//! } +//! } +//! ``` + +// ============================================================================ +// [Documentation - asmjit_assembler] +// ============================================================================ + +//! \defgroup asmjit_assembler Assembler +//! \brief Assembler interface and operands. +//! +//! ### Overview +//! +//! AsmJit's Assembler is used to emit machine code directly into a \ref +//! CodeBuffer. In general, code generation with assembler requires the knowledge +//! of the following: +//! +//! - \ref BaseAssembler and architecture-specific assemblers: +//! - \ref x86::Assembler - Assembler specific to X86 architecture +//! - \ref Operand and its variations: +//! - \ref BaseReg - Base class for a register operand, inherited by: +//! - \ref x86::Reg - Register operand specific to X86 architecture. +//! - \ref BaseMem - Base class for a memory operand, inherited by: +//! - \ref x86::Mem - Memory operand specific to X86 architecture. +//! - \ref Imm - Immediate (value) operand. +//! - \ref Label - Label operand. +//! +//! \note Assembler examples use \ref x86::Assembler as abstract interfaces cannot +//! be used to generate code. +//! +//! ### Operand Basics +//! +//! Let's start with operands. \ref Operand is a data structure that defines a +//! data layout of any operand. It can be inherited, but any class inheriting +//! it cannot add any members to it, only the existing layout can be reused. +//! AsmJit allows to construct operands dynamically, to store them, and to query +//! a complete information about them at run-time. Operands are small (always 16 +//! bytes per \ref Operand) and can be copied and passed by value. Please never +//! allocate individual operands dynamically by using a `new` keyword - it would +//! work, but then you would have to be responsible for deleting such operands. +//! In AsmJit operands are always part of some other data structures like \ref +//! InstNode, which is part of \ref asmjit_builder tool. +//! +//! Operands contain only identifiers, but not pointers to any code-generation data. +//! For example \ref Label operand only provides label identifier, but not a pointer +//! to \ref LabelEntry structure. In AsmJit such IDs are used to link stuff together +//! without having to deal with pointers. +//! +//! AsmJit's operands all inherit from a base class called \ref Operand. Operands +//! have the following properties that are commonly accessible by getters and setters: +//! +//! - \ref Operand - Base operand, which only provides accessors that are common +//! to all operand types. +//! - \ref BaseReg - Describes either physical or virtual register. Physical +//! registers have id that matches the target's machine id directly whereas +//! virtual registers must be allocated into physical registers by a register +//! allocator pass. Register operand provides: +//! - Register Type - Unique id that describes each possible register provided +//! by the target architecture - for example X86 backend provides \ref +//! x86::Reg::RegType, which defines all variations of general purpose registers +//! (GPB-LO, GPB-HI, GPW, GPD, and GPQ) and all types of other registers like K, +//! MM, BND, XMM, YMM, and ZMM. +//! - Register Group - Groups multiple register types under a single group - for +//! example all general-purpose registers (of all sizes) on X86 are part of +//! \ref x86::Reg::kGroupGp and all SIMD registers (XMM, YMM, ZMM) are part +//! of \ref x86::Reg::kGroupVec. +//! - Register Size - Contains the size of the register in bytes. If the size +//! depends on the mode (32-bit vs 64-bit) then generally the higher size is +//! used (for example RIP register has size 8 by default). +//! - Register Id - Contains physical or virtual id of the register. +//! - \ref BaseMem - Used to reference a memory location. Memory operand provides: +//! - Base Register - A base register type and id (physical or virtual). +//! - Index Register - An index register type and id (physical or virtual). +//! - Offset - Displacement or absolute address to be referenced (32-bit if base +//! register is used and 64-bit if base register is not used). +//! - Flags that can describe various architecture dependent information (like +//! scale and segment-override on X86). +//! - \ref Imm - Immediate values are usually part of instructions (encoded within +//! the instruction itself) or data. +//! - \ref Label - used to reference a location in code or data. Labels must be +//! created by the \ref BaseEmitter or by \ref CodeHolder. Each label has its +//! unique id per \ref CodeHolder instance. +//! +//! ### Operand Manipulation +//! +//! AsmJit allows to construct operands dynamically, to store them, and to query +//! a complete information about them at run-time. Operands are small (always 16 +//! bytes per `Operand`) and should be always copied (by value) if you intend to +//! store them (don't create operands by using `new` keyword, it's not recommended). +//! Operands are safe to be passed to `memcpy()` and `memset()`, which becomes +//! handy when working with arrays of operands. If you set all members of an \ref +//! Operand to zero the operand would become NONE operand, which is the same as a +//! default constructed Operand. +//! +//! The example below illustrates how operands can be used and modified even +//! without using any other code generation classes. The example uses X86 +//! architecture-specific operands. +//! +//! ``` +//! #include +//! +//! using namespace asmjit; //! -//! API that provides classes and functions not specific to any architecture. +//! // Registers can be copied, it's a common practice. +//! x86::Gp dstRegByValue() { return x86::ecx; } +//! +//! void usingOperandsExample(x86::Assembler& a) { +//! // Gets `ecx` register returned by a function. +//! x86::Gp dst = dstRegByValue(); +//! // Gets `rax` register directly from the provided `x86` namespace. +//! x86::Gp src = x86::rax; +//! // Constructs `r10` dynamically. +//! x86::Gp idx = x86::gpq(10); +//! // Constructs [src + idx] memory address - referencing [rax + r10]. +//! x86::Mem m = x86::ptr(src, idx); +//! +//! // Examine `m`: Returns `x86::Reg::kTypeGpq`. +//! m.indexType(); +//! // Examine `m`: Returns 10 (`r10`). +//! m.indexId(); +//! +//! // Reconstruct `idx` stored in mem: +//! x86::Gp idx_2 = x86::Gp::fromTypeAndId(m.indexType(), m.indexId()); +//! +//! // True, `idx` and idx_2` are identical. +//! idx == idx_2; +//! +//! // Possible - op will still be the same as `m`. +//! Operand op = m; +//! // True (can be casted to BaseMem or architecture-specific Mem). +//! op.isMem(); +//! +//! // True, `op` is just a copy of `m`. +//! m == op; +//! +//! // Static cast is fine and valid here. +//! static_cast(op).addOffset(1); +//! // However, using `as()` to cast to a derived type is preferred. +//! op.as().addOffset(1); +//! // False, `op` now points to [rax + r10 + 2], which is not [rax + r10]. +//! m == op; +//! +//! // Emitting 'mov' - type safe way. +//! a.mov(dst, m); +//! // Not possible, `mov` doesn't provide mov(x86::Gp, Operand) overload. +//! a.mov(dst, op); +//! +//! // Type-unsafe, but possible. +//! a.emit(x86::Inst::kIdMov, dst, m); +//! // Also possible, `emit()` is typeless and can be used with raw Operand. +//! a.emit(x86::Inst::kIdMov, dst, op); +//! } +//! ``` +//! +//! Some operands have to be created explicitly by emitters. For example labels +//! must be created by \ref BaseEmitter::newLabel(), which creates a label entry +//! and returns a \ref Label operand with the id that refers to it. Such label +//! then can be used by emitters. +//! +//! ### Memory Operands +//! +//! Some architectures like X86 provide a complex memory addressing model that +//! allows to encode addresses having a BASE register, INDEX register with a +//! possible scale (left shift), and displacement (called offset in AsmJit). +//! Memory address on X86 can also specify memory segment (segment-override in +//! X86 terminology) and some instructions (gather / scatter) require INDEX to +//! be a \ref x86::Vec register instead of a general-purpose register. +//! +//! AsmJit allows to encode and work with all forms of addresses mentioned and +//! implemented by X86. In addition, it also allows to construct absolute 64-bit +//! memory address operands, which is only allowed in one form of 'mov' instruction. +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! void testX86Mem() { +//! // Makes it easier to access x86 stuff... +//! using namespace asmjit::x86; +//! +//! // BASE + OFFSET. +//! Mem a = ptr(rax); // a = [rax] +//! Mem b = ptr(rax, 15); // b = [rax + 15] +//! +//! // BASE + INDEX << SHIFT - Shift is in BITS as used by X86! +//! Mem c = ptr(rax, rbx); // c = [rax + rbx] +//! Mem d = ptr(rax, rbx, 2); // d = [rax + rbx << 2] +//! Mem e = ptr(rax, rbx, 2, 15); // e = [rax + rbx << 2 + 15] +//! +//! // BASE + VM (Vector Index) (encoded as MOD+VSIB). +//! Mem f = ptr(rax, xmm1); // f = [rax + xmm1] +//! Mem g = ptr(rax, xmm1, 2); // g = [rax + xmm1 << 2] +//! Mem h = ptr(rax, xmm1, 2, 15); // h = [rax + xmm1 << 2 + 15] +//! +//! // Absolute adddress: +//! uint64_t addr = (uint64_t)0x1234; +//! Mem i = ptr(addr); // i = [0x1234] +//! Mem j = ptr(addr, rbx); // j = [0x1234 + rbx] +//! Mem k = ptr(addr, rbx, 2); // k = [0x1234 + rbx << 2] +//! +//! // LABEL - Will be encoded as RIP (64-bit) or absolute address (32-bit). +//! Label L = ...; +//! Mem m = ptr(L); // m = [L] +//! Mem n = ptr(L, rbx); // n = [L + rbx] +//! Mem o = ptr(L, rbx, 2); // o = [L + rbx << 2] +//! Mem p = ptr(L, rbx, 2, 15); // p = [L + rbx << 2 + 15] +//! +//! // RIP - 64-bit only (RIP can't use INDEX). +//! Mem q = ptr(rip, 24); // q = [rip + 24] +//! } +//! ``` +//! +//! Memory operands can optionally contain memory size. This is required by +//! instructions where the memory size cannot be deduced from other operands, +//! like `inc` and `dec` on X86: +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! void testX86Mem() { +//! // The same as: dword ptr [rax + rbx]. +//! x86::Mem a = x86::dword_ptr(rax, rbx); +//! +//! // The same as: qword ptr [rdx + rsi << 0 + 1]. +//! x86::Mem b = x86::qword_ptr(rdx, rsi, 0, 1); +//! } +//! ``` +//! +//! Memory operands provide API that can be used to access its properties: +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! void testX86Mem() { +//! // The same as: dword ptr [rax + 12]. +//! x86::Mem mem = x86::dword_ptr(rax, 12); +//! +//! mem.hasBase(); // true. +//! mem.hasIndex(); // false. +//! mem.size(); // 4. +//! mem.offset(); // 12. +//! +//! mem.setSize(0); // Sets the size to 0 (makes it sizeless). +//! mem.addOffset(-1); // Adds -1 to the offset and makes it 11. +//! mem.setOffset(0); // Sets the offset to 0. +//! mem.setBase(rcx); // Changes BASE to RCX. +//! mem.setIndex(rax); // Changes INDEX to RAX. +//! mem.hasIndex(); // true. +//! } +//! // ... +//! ``` +//! +//! Making changes to memory operand is very comfortable when emitting loads +//! and stores: +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! void testX86Mem(CodeHolder& code) { +//! x86::Assembler a(code); // Your initialized x86::Assembler. +//! x86::Mem mSrc = x86::ptr(eax); // Construct [eax] memory operand. +//! +//! // One way of emitting bunch of loads is to use `mem.adjusted()`, which +//! // returns a new memory operand and keeps the source operand unchanged. +//! a.movaps(x86::xmm0, mSrc); // No adjustment needed to load [eax]. +//! a.movaps(x86::xmm1, mSrc.adjusted(16)); // Loads from [eax + 16]. +//! a.movaps(x86::xmm2, mSrc.adjusted(32)); // Loads from [eax + 32]. +//! a.movaps(x86::xmm3, mSrc.adjusted(48)); // Loads from [eax + 48]. +//! +//! // ... do something with xmm0-3 ... +//! +//! // Another way of adjusting memory is to change the operand in-place. +//! // If you want to keep the original operand you can simply clone it. +//! x86::Mem mDst = mSrc.clone(); // Clone mSrc. +//! +//! a.movaps(mDst, x86::xmm0); // Stores xmm0 to [eax]. +//! mDst.addOffset(16); // Adds 16 to `mDst`. +//! +//! a.movaps(mDst, x86::xmm1); // Stores to [eax + 16] . +//! mDst.addOffset(16); // Adds 16 to `mDst`. +//! +//! a.movaps(mDst, x86::xmm2); // Stores to [eax + 32]. +//! mDst.addOffset(16); // Adds 16 to `mDst`. +//! +//! a.movaps(mDst, x86::xmm3); // Stores to [eax + 48]. +//! } +//! ``` +//! +//! ### Assembler Examples +//! +//! - \ref x86::Assembler provides many X86/X64 examples. + +// ============================================================================ +// [Documentation - asmjit_builder] +// ============================================================================ //! \defgroup asmjit_builder Builder -//! \brief Builder API. +//! \brief Builder interface, nodes, and passes. +//! +//! ### Overview +//! +//! Both \ref BaseBuilder and \ref BaseCompiler interfaces describe emitters +//! that emit into a representation that allows further processing. The code +//! stored in such representation is completely safe to be patched, simplified, +//! reordered, obfuscated, removed, injected, analyzed, or processed some other +//! way. Each instruction, label, directive, or other building block is stored +//! as \ref BaseNode (or derived class like \ref InstNode or \ref LabelNode) +//! and contains all the information necessary to pass that node later to the +//! assembler. +//! +//! \ref BaseBuilder is an emitter that inherits from \ref BaseEmitter interface. +//! It was designed to provide a maximum compatibility with the existing \ref +//! BaseAssembler emitter so users can move from assembler to builder when needed, +//! for example to implement post-processing, which is not possible with Assembler. +//! +//! ### Builder Nodes +//! +//! \ref BaseBuilder doesn't generate machine code directly, it uses an intermediate +//! representation based on nodes, however, it allows to serialize to \ref BaseAssembler +//! when the code is ready to be encoded. +//! +//! There are multiple node types used by both \ref BaseBuilder and \ref BaseCompiler : +//! +//! - Basic nodes: +//! - \ref BaseNode - Base class for all nodes. +//! - \ref InstNode - Represents an instruction node. +//! - \ref AlignNode - Represents an alignment directive (.align). +//! - \ref LabelNode - Represents a location where to bound a \ref Label. +//! +//! - Data nodes: +//! - \ref EmbedDataNode - Represents data. +//! - \ref EmbedLabelNode - Represents \ref Label address embedded as data. +//! - \ref EmbedLabelDeltaNode - Represents a difference of two labels +//! embedded in data. +//! - \ref ConstPoolNode - Represents a constant pool data embedded as data. +//! +//! - Informative nodes: +//! - \ref CommentNode - Represents a comment string, doesn't affect code +//! generation. +//! - \ref SentinelNode - A marker that can be used to remember certain +//! position in code or data, doesn't affect code generation. Used by +//! \ref FuncNode to mark the end of a function. +//! +//! - Other nodes are provided by \ref asmjit_compiler infrastructure. +//! +//! ### Builder Examples //! -//! Both Builder and Compiler are emitters that emit everything to a representation -//! that allows further processing. The code stored in such representation is -//! completely safe to be patched, simplified, reordered, obfuscated, removed, -//! injected, analyzed, or processed some other way. Each instruction, label, -//! directive, or other building block is stored as \ref BaseNode (or derived -//! class like \ref InstNode or \ref LabelNode) and contains all the information -//! necessary to pass that node later to the Assembler. +//! - \ref x86::Builder provides many X86/X64 examples. + +// ============================================================================ +// [Documentation - asmjit_compiler] +// ============================================================================ //! \defgroup asmjit_compiler Compiler -//! \brief Compiler API. +//! \brief Compiler interface. +//! +//! ### Overview +//! +//! \ref BaseCompiler is a high-level interface built on top of \ref BaseBuilder +//! interface, which provides register allocation and support for defining and +//! invoking functions. At the moment it's the easiest way of generating code +//! in AsmJit as most architecture and OS specifics is properly abstracted and +//! handled by AsmJit automatically. However, abstractions also mean restrictions, +//! which means that \ref BaseCompiler has more limitations that \ref BaseAssembler +//! or \ref BaseBuilder. +//! +//! Since \ref BaseCompiler provides register allocation it also establishes the +//! concept of functions - a function in Compiler sense is a unit in which virtual +//! registers are allocated into physical registers by the register allocator. +//! In addition, it enables to use such virtual registers in function invocations. +//! +//! \ref BaseCompiler automatically handles function calling conventions. It's +//! still architecture dependent, but makes the code generation much easies. +//! Functions are essential; the first-step to generate some code is to define a +//! signature of the function to be generated (before generating the function body +//! itself). Function arguments and return value(s) are handled by assigning +//! virtual registers to them. Similarly, function calls are handled the same way. +//! +//! ### Compiler Nodes +//! +//! \ref BaseCompiler adds some nodes that are required for function generation +//! and invocation: +//! +//! - \ref FuncNode - Represents a function definition. +//! - \ref FuncRetNode - Represents a function return. +//! - \ref InvokeNode - Represents a function invocation. //! -//! Compiler tool is built on top of a \ref asmjit_builder API and adds register -//! allocation and support for defining and calling functions into it. At the -//! moment it's the easiest way to generate some code as most architecture and -//! OS specific stuff is properly abstracted, however, abstractions also mean -//! that not everything is possible with the Compiler. +//! \ref BaseCompiler also makes the use of passes (\ref Pass) and automatically +//! adds an architecture-dependent register allocator pass to the list of passes +//! when attached to \ref CodeHolder. +//! +//! ### Compiler Examples +//! +//! - \ref x86::Compiler provides many X86/X64 examples. +//! +//! ### Compiler Tips +//! +//! Users of AsmJit have done mistakes in the past, this section should provide +//! some useful tips for beginners: +//! +//! - Virtual registers in compiler are bound to a single function. At the +//! moment the implementation doesn't care whether a single virtual register +//! is used in multiple functions, but it sees it as two independent virtual +//! registers in that case. This means that virtual registers cannot be used +//! to implement global variables. Global variables are basically memory +//! addresses which functions can read from and write to, and they have to +//! be implemented in the same way. +//! +//! - Compiler provides a useful debugging functionality, which can be turned +//! on through \ref FormatOptions::Flags. Use \ref Logger::addFlags() to +//! turn on additional logging features when using Compiler. -//! \defgroup asmjit_func Function -//! \brief Function API. +// ============================================================================ +// [Documentation - asmjit_function] +// ============================================================================ -//! \defgroup asmjit_jit JIT -//! \brief JIT API and Virtual Memory Management. +//! \defgroup asmjit_function Function +//! \brief Function definitions. +//! +//! ### Overview +//! +//! AsmJit provides functionality that can be used to define function signatures +//! and to calculate automatically optimal function frame that can be used directly +//! by a prolog and epilog insertion. This feature was exclusive to AsmJit's Compiler +//! for a very long time, but was abstracted out and is now available for all users +//! regardless of the emitter they use. The following use cases are possible: +//! +//! - Calculate function frame before the function is generated - this is the +//! only way available to \ref BaseAssembler users and it will be described +//! in this section. +//! +//! - Calculate function frame after the function is generated - this way is +//! generally used by \ref BaseBuilder and \ref BaseCompiler emitters and +//! this way is generally described in \ref asmjit_compiler section. +//! +//! The following concepts are used to describe and create functions in AsmJit: +//! +//! - \ref Type::Id - Type-id is an 8-bit value that describes a platform +//! independent type as we know from C/C++. It provides abstractions for +//! most common types like `int8_t`, `uint32_t`, `uintptr_t`, `float`, +//! `double`, and all possible vector types to match ISAs up to AVX512. +//! \ref Type::Id was introduced originally for \ref asmjit_compiler, but +//! it's now used by \ref FuncSignature as well. +//! +//! - \ref CallConv - Describes a calling convention - this class contains +//! instructions to assign registers and stack addresses to function +//! arguments and return value(s), but doesn't specify any function +//! signature itself. Calling conventions are architecture and OS dependent. +//! +//! - \ref FuncSignature - Describes a function signature, for example +//! `int func(int, int)`. FuncSignature contains a function calling convention +//! id, return value type, and function arguments. The signature itself is +//! platform independent and uses \ref Type::Id to describe types of function +//! arguments and function return value(s). +//! +//! - \ref FuncDetail - Architecture and ABI dependent information that describes +//! \ref CallConv and expanded \ref FuncSignature. Each function argument and +//! return value is represented as \ref FuncValue that contains the original +//! \ref Type::Id enriched with additional information that specifies whether +//! the value is passed or returned by register (and which register) or by +//! stack. Each value also contains some other metadata that provide additional +//! information required to handle it properly (for example whether a vector is +//! passed indirectly by a pointer as required by WIN64 calling convention). +//! +//! - \ref FuncFrame - Contains information about the function frame that can +//! be used by prolog/epilog inserter (PEI). Holds call stack size size and +//! alignment, local stack size and alignment, and various attributes that +//! describe how prolog and epilog should be constructed. `FuncFrame` doesn't +//! know anything about function's arguments or return values, it hold only +//! information necessary to create a valid and ABI conforming function prologs +//! and epilogs. +//! +//! - \ref FuncArgsAssignment - A helper class that can be used to reassign +//! function arguments into user specified registers. It's architecture and +//! ABI dependent mapping from function arguments described by \ref CallConv +//! and \ref FuncDetail into registers specified by the user. +//! +//! It's a lot of concepts where each represents one step in a function frame +//! calculation. It can be used to create function prologs, epilogs, and also +//! to calculate information necessary to perform function calls. -//! \defgroup asmjit_zone Zone -//! \brief Zone allocator and zone allocated containers. +// ============================================================================ +// [Documentation - asmjit_logging] +// ============================================================================ -//! \defgroup asmjit_support Support -//! \brief Support API. +//! \defgroup asmjit_logging Logging +//! \brief Logging and formatting. +//! +//! ### Overview +//! +//! The initial phase of a project that generates machine code is not always smooth. +//! Failure cases are common not just at the beginning phase, but also during the +//! development or refactoring. AsmJit provides logging functionality to address +//! this issue. AsmJit does already a good job with function overloading to prevent +//! from emitting unencodable instructions, but it can't prevent from emitting machine +//! code that is correct at instruction level, but doesn't work when it's executed as +//! a whole. Logging has always been an important part of AsmJit's infrastructure and +//! looking at logs can sometimes reveal code generation issues quickly. +//! +//! AsmJit provides API for logging and formatting: +//! - \ref Logger - A logger that you can pass to \ref CodeHolder and all emitters +//! that inherit from \ref BaseEmitter. +//! - \ref FormatOptions - Formatting options that can change how instructions and +//! operands are formatted. +//! - \ref Formatter - A namespace that provides functions that can format input +//! data like \ref Operand, \ref BaseReg, \ref Label, and \ref BaseNode into +//! \ref String. +//! +//! AsmJit's \ref Logger serves the following purposes: +//! - Provides a basic foundation for logging. +//! - Abstract class leaving the implementation on users. The following built-in +//! inplementations are provided for simplicty: +//! - \ref FileLogger implements logging into a standard `FILE` stream. +//! - \ref StringLogger serializes all logs into a \ref String instance. +//! +//! AsmJit's \ref FormatOptions provides the following to customize the formatting of +//! instructions and operands through: +//! - \ref FormatOptions::Flags +//! - \ref FormatOptions::IndentationType +//! +//! ### Logging +//! +//! A \ref Logger is typically attached to a \ref CodeHolder, which propagates it +//! to all attached emitters automatically. The example below illustrates how to +//! use \ref FileLogger that outputs to standard output: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! int main() { +//! JitRuntime rt; // Runtime specialized for JIT code execution. +//! FileLogger logger(stdout); // Logger should always survive CodeHolder. +//! +//! CodeHolder code; // Holds code and relocation information. +//! code.init(rt.environment()); // Initialize to the same arch as JIT runtime. +//! code.setLogger(&logger); // Attach the `logger` to `code` holder. +//! +//! // ... code as usual, everything emitted will be logged to `stdout` ... +//! return 0; +//! } +//! ``` +//! +//! If output to FILE stream is not desired it's possible to use \ref StringLogger, +//! which concatenates everything into a multi-line string: +//! +//! ``` +//! #include +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! int main() { +//! JitRuntime rt; // Runtime specialized for JIT code execution. +//! StringLogger logger; // Logger should always survive CodeHolder. +//! +//! CodeHolder code; // Holds code and relocation information. +//! code.init(rt.environment()); // Initialize to the same arch as JIT runtime. +//! code.setLogger(&logger); // Attach the `logger` to `code` holder. +//! +//! // ... code as usual, logging will be concatenated to logger string ... +//! +//! // You can either use the string from StringLogger directly or you can +//! // move it. Logger::data() returns its content as null terminated char[]. +//! printf("Logger content: %s\n", logger.data()); +//! +//! // It can be moved into your own string like this: +//! String content = std::move(logger.content()); +//! printf("The same content: %s\n", content.data()); +//! +//! return 0; +//! } +//! ``` +//! +//! ### Formatting +//! +//! AsmJit uses \ref Formatter to format inputs that are then passed to \ref +//! Logger. Formatting is public and can be used by AsmJit users as well. The +//! most important thing to know regarding formatting is that \ref Formatter +//! always appends to the output string, so it can be used to build complex +//! strings without having to concatenate intermediate strings. +//! +//! The first example illustrates how to format operands: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! void logOperand(uint32_t arch, const Operand_& op) { +//! // The emitter is optional (named labels and virtual registers need it). +//! BaseEmitter* emitter = nullptr; +//! +//! // No flags by default. +//! uint32_t formatFlags = FormatOptions::kNoFlags; +//! +//! StringTmp<128> sb; +//! Formatter::formatOperand(sb, formatFlags, emitter, arch, op); +//! printf("%s\n", sb.data()); +//! } +//! +//! void formattingExample() { +//! using namespace x86; +//! +//! // Architecture is not part of operand, it must be passed explicitly. +//! // Format flags. We pass it explicitly also to 'logOperand' to make +//! // compatible with what AsmJit normally does. +//! uint32_t arch = Environment::kArchX64; +//! +//! log(arch, rax); // Prints 'rax'. +//! log(arch, ptr(rax, rbx, 2)); // Prints '[rax + rbx * 4]`. +//! log(arch, dword_ptr(rax, rbx, 2)); // Prints 'dword [rax + rbx * 4]`. +//! log(arch, imm(42)); // Prints '42'. +//! } +//! ``` +//! +//! Next example illustrates how to format whole instructions: +//! +//! ``` +//! #include +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! template +//! void logInstruction(uint32_t arch, const BaseInst& inst, Args&&... args) { +//! // The emitter is optional (named labels and virtual registers need it). +//! BaseEmitter* emitter = nullptr; +//! +//! // No flags by default. +//! uint32_t formatFlags = FormatOptions::kNoFlags; +//! +//! // The formatter expects operands in an array. +//! Operand_ operands { std::forward(args)... }; +//! +//! StringTmp<128> sb; +//! Formatter::formatInstruction( +//! sb, formatFlags, emitter, arch, inst, operands, sizeof...(args)); +//! printf("%s\n", sb.data()); +//! } +//! +//! void formattingExample() { +//! using namespace x86; +//! +//! // Architecture is not part of operand, it must be passed explicitly. +//! // Format flags. We pass it explicitly also to 'logOperand' to make +//! // compatible with what AsmJit normally does. +//! uint32_t arch = Environment::kArchX64; +//! +//! // Prints 'mov rax, rcx'. +//! logInstruction(arch, BaseInst(Inst::kIdMov), rax, rcx); +//! +//! // Prints 'vaddpd zmm0, zmm1, [rax] {1to8}'. +//! logInstruction(arch, +//! BaseInst(Inst::kIdVaddpd), +//! zmm0, zmm1, ptr(rax)._1toN()); +//! +//! // BaseInst abstracts instruction id, instruction options, and extraReg. +//! // Prints 'lock add [rax], rcx'. +//! logInstruction(arch, +//! BaseInst(Inst::kIdAdd, Inst::kOptionLock), +//! x86::ptr(rax), rcx); +//! +//! // Similarly an extra register (like AVX-512 selector) can be used. +//! // Prints 'vaddpd zmm0 {k2} {z}, zmm1, [rax]'. +//! logInstruction(arch, +//! BaseInst(Inst::kIdAdd, Inst::kOptionZMask, k2), +//! zmm0, zmm1, ptr(rax)); +//! } +//! ``` +//! +//! And finally, the example below illustrates how to use a built-in function +//! to format the content of \ref BaseBuilder, which consists of nodes: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! void formattingExample(BaseBuilder* builder) { +//! uint32_t formatFlags = FormatOptions::kNoFlags; +//! +//! // This also shows how temporary strings can be used. +//! StringTmp<512> sb; +//! +//! // FormatNodeList requires the String for output, formatting flags, which +//! // were zero (no extra flags), and the builder instance, which we have +//! // provided. An overloaded version also exists, which accepts begin and +//! // and end nodes, which can be used to only format a range of nodes. +//! Formatter::formatNodeList(sb, formatFlags, builder); +//! +//! // You can do whatever else with the string, it's always null terminated, +//! // so it can be passed to C functions like printf(). +//! printf("%s\n", sb.data()); +//! } +//! ``` + +// ============================================================================ +// [Documentation - asmjit_error_handling] +// ============================================================================ + +//! \defgroup asmjit_error_handling Error Handling +//! \brief Error handling. +//! +//! ### Overview +//! +//! AsmJit uses error codes to represent and return errors. Every function that +//! can fail returns an \ref Error code. Exceptions are never thrown by AsmJit +//! itself even in extreme conditions like out-of-memory, but it's possible to +//! override \ref ErrorHandler::handleError() to throw, in that case no error +//! will be returned and exception will be thrown instead. All functions where +//! this can happen are not marked `noexcept`. +//! +//! Errors should never be ignored, however, checking errors after each AsmJit +//! API call would simply overcomplicate the whole code generation experience. +//! \ref ErrorHandler exists to make the use of AsmJit API simpler as it allows +//! to customize how errors can be handled: +//! +//! - Record the error and continue (the way how the error is user-implemented). +//! - Throw an exception. AsmJit doesn't use exceptions and is completely +//! exception-safe, but it's perfectly legal to throw an exception from +//! the error handler. +//! - Use plain old C's `setjmp()` and `longjmp()`. Asmjit always puts Assembler, +//! Builder and Compiler to a consistent state before calling \ref +//! ErrorHandler::handleError(), so `longjmp()` can be used without issues to +//! cancel the code-generation if an error occurred. This method can be used if +//! exception handling in your project is turned off and you still want some +//! comfort. In most cases it should be safe as AsmJit uses \ref Zone memory +//! and the ownership of memory it allocates always ends with the instance that +//! allocated it. If using this approach please never jump outside the life-time +//! of \ref CodeHolder and \ref BaseEmitter. +//! +//! ### Using ErrorHandler +//! +//! An example of attaching \ref ErrorHandler to \ref CodeHolder. +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! // A simple error handler implementation, extend according to your needs. +//! class MyErrorHandler : public ErrorHandler { +//! public: +//! void handleError(Error err, const char* message, BaseEmitter* origin) override { +//! printf("AsmJit error: %s\n", message); +//! } +//! }; +//! +//! int main() { +//! JitRuntime rt; +//! +//! MyErrorHandler myErrorHandler; +//! CodeHolder code; +//! +//! code.init(rt.environment()); +//! code.setErrorHandler(&myErrorHandler); +//! +//! x86::Assembler a(&code); +//! // ... code generation ... +//! +//! return 0; +//! } +//! ``` +//! +//! Useful classes in error handling group: +//! +//! - See \ref DebugUtils that provides utilities useful for debugging. +//! - See \ref Error that lists error codes that AsmJit uses. +//! - See \ref ErrorHandler for more details about error handling. + +// ============================================================================ +// [Documentation - asmjit_instruction_db] +// ============================================================================ + +//! \defgroup asmjit_instruction_db Instruction DB +//! \brief Instruction database (introspection, read/write, validation, ...). +//! +//! ### Overview +//! +//! AsmJit provides a public instruction database that can be used to query +//! information about a complete instruction. The instruction database requires +//! the knowledge of the following: +//! +//! - \ref BaseInst - Base instruction that contains instruction id, options, +//! and a possible extra-register that represents either REP prefix counter +//! or AVX-512 selector (mask). +//! - \ref Operand - Represents operands of an instruction. +//! +//! Each instruction can be then queried for the following information: +//! +//! - \ref InstRWInfo - Read/write information of instruction and its oprands. +//! - \ref OpRWInfo - Read/write information of a single operand, part of +//! \ref InstRWInfo data structure. +//! - \ref BaseFeatures - CPU features required to execute the instruction. +//! +//! In addition to query functionality AsmJit is also able to validate whether +//! an instruction and its operands are valid. This is useful for making sure +//! that what user tries to emit is correct and it can be also used by other +//! projects that parse user input, like AsmTK project. +//! +//! ### Query API +//! +//! The instruction query API is provided by \ref InstAPI namespace. The +//! following queries are possible: +//! +//! - \ref InstAPI::queryRWInfo() - queries read/write information of the +//! given instruction and its operands. Includes also CPU flags read/written. +//! +//! - \ref InstAPI::queryFeatures() - queries CPU features that are required +//! to execute the given instruction. A full instruction with operands must +//! be given as some architectures like X86 may require different features +//! for the same instruction based on its operands. +//! +//! - asmjit_test_x86_instinfo.cpp +//! can be also used as a reference about accessing instruction information. +//! +//! ### Validation API +//! +//! The instruction validation API is provided by \ref InstAPI namespace in the +//! similar fashion like the Query API, however, validation can also be turned +//! on at \ref BaseEmitter level. The following is possible: +//! +//! - \ref InstAPI::validate() - low-level instruction validation function +//! that is used internally by emitters if strict validation is enabled. +//! +//! - \ref BaseEmitter::addValidationOptions() - can be used to enable +//! validation at emitter level, see \ref BaseEmitter::ValidationOptions. + + +// ============================================================================ +// [Documentation - asmjit_virtual_memory] +// ============================================================================ + +//! \defgroup asmjit_virtual_memory Virtual Memory +//! \brief Virtual memory management. +//! +//! ### Overview +//! +//! AsmJit's virtual memory management is divided into two main categories: +//! +//! - Low level API that provides cross-platform abstractions for virtual +//! memory allocation. Implemented in \ref VirtMem namespace. +//! - High level API that makes it very easy to store generated code for +//! execution. See \ref JitRuntime, which is used by many examples for its +//! simplicity and easy integration with \ref CodeHolder. There is also +//! \ref JitAllocator, which lays somewhere between RAW memory allocation +//! and \ref JitRuntime. + +// ============================================================================ +// [Documentation - asmjit_zone_memory] +// ============================================================================ + +//! \defgroup asmjit_zone Zone Memory +//! \brief Zone memory allocator and containers. +//! +//! ### Overview +//! +//! AsmJit uses zone memory allocation (also known as Arena allocation) to allocate +//! most of the data it uses. It's a fast allocator that allows AsmJit to allocate +//! a lot of small data structures fast and without `malloc()` overhead. Since +//! code generators and all related classes are usually short-lived this approach +//! decreases memory usage and fragmentation as arena-based allocators always +//! allocate larger blocks of memory, which are then split into smaller chunks. +//! +//! Another advantage of zone memory allocation is that since the whole library +//! uses this strategy it's very easy to deallocate everything that a particular +//! instance is holding by simply releasing the memory the allocator holds. This +//! improves destruction time of such objects as there is no destruction at all. +//! Long-lived objects just reset its data in destructor or in their reset() +//! member function for a future reuse. For this purpose all containers in AsmJit +//! are also zone allocated. +//! +//! ### Zone Allocation +//! +//! - \ref Zone - Incremental zone memory allocator with minimum features. It +//! can only allocate memory without the possibility to return it back to +//! the allocator. +//! +//! - \ref ZoneTmp - A temporary \ref Zone with some initial static storage. +//! If the allocation requests fit the static storage allocated then there +//! will be no dynamic memory allocation during the lifetime of \ref ZoneTmp, +//! otherwise it would act as \ref Zone with one preallocated block on the +//! stack. +//! +//! - \ref ZoneAllocator - A wrapper of \ref Zone that provides the capability +//! of returning memory to the allocator. Such memory is stored in a pool for +//! later reuse. +//! +//! ### Zone Allocated Containers +//! +//! - \ref ZoneString - Zone allocated string. +//! - \ref ZoneHash - Zone allocated hash table. +//! - \ref ZoneTree - Zone allocated red-black tree. +//! - \ref ZoneList - Zone allocated double-linked list. +//! - \ref ZoneStack - Zone allocated stack. +//! - \ref ZoneVector - Zone allocated vector. +//! - \ref ZoneBitVector - Zone allocated vector of bits. +//! +//! ### Using Zone Allocated Containers +//! +//! The most common data structure exposed by AsmJit is \ref ZoneVector. It's very +//! similar to `std::vector`, but the implementation doesn't use exceptions and +//! uses the mentioned \ref ZoneAllocator for performance reasons. You don't have +//! to worry about allocations as you should not need to add items to AsmJit's +//! data structures directly as there should be API for all required operations. +//! +//! The following APIs in \ref CodeHolder returns \ref ZoneVector reference: +//! +//! ``` +//! using namespace asmjit; +//! +//! void example(CodeHolder& code) { +//! // Contains all emitters attached to CodeHolder. +//! const ZoneVector& emitters = code.emitters(); +//! +//! // Contains all section entries managed by CodeHolder. +//! const ZoneVector& sections = code.sections(); +//! +//! // Contains all label entries managed by CodeHolder. +//! const ZoneVector& labelEntries = code.labelEntries(); +//! +//! // Contains all relocation entries managed by CodeHolder. +//! const ZoneVector& relocEntries = code.relocEntries(); +//! } +//! ``` +//! +//! \ref ZoneVector has overloaded array access operator to make it possible +//! to access its elements through operator[]. Some standard functions like +//! \ref ZoneVector::empty(), \ref ZoneVector::size(), and \ref ZoneVector::data() +//! are provided as well. Vectors are also iterable through a range-based for loop: +//! +//! ``` +//! using namespace asmjit; +//! +//! void example(CodeHolder& code) { +//! for (LabelEntry* le : code.labelEntries()) { +//! printf("Label #%u {Bound=%s Offset=%llu}", +//! le->id(), +//! le->isBound() ? "true" : "false", +//! (unsigned long long)le->offset()); +//! } +//! } +//! ``` +//! +//! ### Design Considerations +//! +//! Zone-allocated containers do not store the allocator within the container. +//! This decision was made to reduce the footprint of such containers as AsmJit +//! tooling, especially Compiler's register allocation, may use many instances +//! of such containers to perform code analysis and register allocation. +//! +//! For example to append an item into a \ref ZoneVector it's required to pass +//! the allocator as the first argument, so it can be used in case that the +//! vector needs a reallocation. Such function also returns an error, which +//! must be propagated to the caller. +//! +//! ``` +//! using namespace asmjit +//! +//! Error example(ZoneAllocator* allocator) { +//! ZoneVector vector; +//! +//! // Unfortunately, allocator must be provided to all functions that mutate +//! // the vector. However, AsmJit users should never need to do this as all +//! // manipulation should be done through public API, which takes care of +//! // that. +//! for (int i = 0; i < 100; i++) { +//! ASMJIT_PROPAGATE(vector.append(allocator, i)); +//! } +//! +//! // By default vector's destructor doesn't release anything as it knows +//! // that its content is zone allocated. However, \ref ZoneVector::release +//! // can be used to explicitly release the vector data to the allocator if +//! // necessary +//! vector.release(allocator); +//! } +//! ``` +//! +//! Containers like \ref ZoneVector also provide a functionality to reserve a +//! certain number of items before any items are added to it. This approach is +//! used internally in most places as it allows to prepare space for data that +//! will be added to some container before the data itself was created. +//! +//! ``` +//! using namespace asmjit +//! +//! Error example(ZoneAllocator* allocator) { +//! ZoneVector vector; +//! +//! ASMJIT_PROPAGATE(vector.willGrow(100)); +//! for (int i = 0; i < 100; i++) { +//! // Cannot fail. +//! vector.appendUnsafe(allocator, i); +//! } +//! +//! vector.release(allocator); +//! } +//! ``` + +// ============================================================================ +// [Documentation - asmjit_utilities] +// ============================================================================ + +//! \defgroup asmjit_utilities Utilities +//! \brief Utility classes and functions. +//! +//! ### Overview +//! +//! AsmJit uses and provides utility classes and functions, that can be used +//! with AsmJit. The functionality can be divided into the following topics: +//! +//! ### String Functionality +//! +//! - \ref String - AsmJit's string container, which is used internally +//! and which doesn't use exceptions and has a stable layout, which is +//! not dependent on C++ standard library. +//! - \ref StringTmp - String that can have base storage allocated on +//! stack. The amount of storage on stack can be specified as a template +//! parameter. +//! - \ref FixedString - Fixed string container limited up to N characters. +//! +//! ### Code Generation Utilities +//! +//! - \ref ConstPool - Constant pool used by \ref BaseCompiler, but also +//! available to users that may find use of it. +//! +//! ### Support Functionality Used by AsmJit +//! +//! - \ref Support namespace provides many other utility functions and +//! classes that are used by AsmJit, and made public. + +// ============================================================================ +// [Documentation - asmjit_ backends] +// ============================================================================ + +//! \defgroup asmjit_x86 X86 Backend +//! \brief X86/X64 backend. + +// ============================================================================ +// [Documentation - asmjit_ra] +// ============================================================================ //! \cond INTERNAL //! \defgroup asmjit_ra RA //! \brief Register allocator internals. //! \endcond +} // {asmjit} + +// ============================================================================ +// [Core Headers] +// ============================================================================ + #include "./core/globals.h" -#include "./core/arch.h" +#include "./core/archtraits.h" #include "./core/assembler.h" #include "./core/builder.h" -#include "./core/callconv.h" #include "./core/codeholder.h" #include "./core/compiler.h" #include "./core/constpool.h" #include "./core/cpuinfo.h" #include "./core/datatypes.h" #include "./core/emitter.h" +#include "./core/environment.h" +#include "./core/errorhandler.h" #include "./core/features.h" +#include "./core/formatter.h" #include "./core/func.h" #include "./core/inst.h" #include "./core/jitallocator.h" #include "./core/jitruntime.h" -#include "./core/logging.h" +#include "./core/logger.h" #include "./core/operand.h" #include "./core/osutils.h" #include "./core/string.h" @@ -99,4 +2040,23 @@ #include "./core/zonestring.h" #include "./core/zonevector.h" +// ============================================================================ +// [Deprecated] +// ============================================================================ + +#ifndef ASMJIT_NO_DEPRECATED +namespace asmjit { + +#ifndef ASMJIT_NO_COMPILER +ASMJIT_DEPRECATED("Use InvokeNode instead of FuncCallNode") +typedef InvokeNode FuncCallNode; +#endif // !ASMJIT_NO_COMPILER + +#ifndef ASMJIT_NO_LOGGING +namespace Logging { using namespace Formatter; } +#endif //! ASMJIT_NO_LOGGING + +} // {asmjit} +#endif // !ASMJIT_NO_DEPRECATED + #endif // ASMJIT_CORE_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/api-build_p.h b/libs/asmjit/src/asmjit/core/api-build_p.h index 714107c..db37ca7 100644 --- a/libs/asmjit/src/asmjit/core/api-build_p.h +++ b/libs/asmjit/src/asmjit/core/api-build_p.h @@ -53,7 +53,7 @@ #include "./api-config.h" -#if !defined(ASMJIT_BUILD_DEBUG) && ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 4, 0) +#if !defined(ASMJIT_BUILD_DEBUG) && defined(__GNUC__) && !defined(__clang__) #define ASMJIT_FAVOR_SIZE __attribute__((__optimize__("Os"))) #define ASMJIT_FAVOR_SPEED __attribute__((__optimize__("O3"))) #elif ASMJIT_CXX_HAS_ATTRIBUTE(__minsize__, 0) diff --git a/libs/asmjit/src/asmjit/core/api-config.h b/libs/asmjit/src/asmjit/core/api-config.h index de083ff..aab3473 100644 --- a/libs/asmjit/src/asmjit/core/api-config.h +++ b/libs/asmjit/src/asmjit/core/api-config.h @@ -28,77 +28,95 @@ // [asmjit::Version] // ============================================================================ -#define ASMJIT_LIBRARY_VERSION 0x010200 /* 1.2.0 */ +//! \addtogroup asmjit_core +//! \{ + +//! AsmJit library version in `(Major << 16) | (Minor << 8) | (Patch)` format. +#define ASMJIT_LIBRARY_VERSION 0x010400 /* 1.4.0 */ + +//! \} // ============================================================================ -// [asmjit::Options] +// [asmjit::Build - Documentation] // ============================================================================ -// AsmJit Static Builds and Embedding -// ---------------------------------- -// -// These definitions can be used to enable static library build. Embed is used -// when AsmJit's source code is embedded directly in another project, implies -// static build as well. -// -// #define ASMJIT_EMBED // Asmjit is embedded (implies ASMJIT_BUILD_STATIC). -// #define ASMJIT_STATIC // Enable static-library build. +// NOTE: Doxygen cannot document macros that are not defined, that's why we have +// to define them and then undefine them, so it won't use the macros with its +// own preprocessor. +#ifdef _DOXYGEN +namespace asmjit { -// AsmJit Build Mode -// ----------------- -// -// These definitions control the build mode and tracing support. The build mode -// should be auto-detected at compile time, but it's possible to override it in -// case that the auto-detection fails. -// -// Tracing is a feature that is never compiled by default and it's only used to -// debug AsmJit itself. -// -// #define ASMJIT_BUILD_DEBUG // Always use debug-mode (ASMJIT_ASSERT enabled). -// #define ASMJIT_BUILD_RELEASE // Always use release-mode (ASMJIT_ASSERT disabled). +//! \addtogroup asmjit_build +//! \{ -// AsmJit Build Backends -// --------------------- -// -// These definitions control which backends to compile. If none of these is -// defined AsmJit will use host architecture by default (for JIT code generation). -// -// #define ASMJIT_BUILD_X86 // Enable X86 targets (X86 and X86_64). -// #define ASMJIT_BUILD_ARM // Enable ARM targets (ARM and AArch64). -// #define ASMJIT_BUILD_HOST // Enable targets based on target arch (default). +//! Asmjit is embedded, implies \ref ASMJIT_STATIC. +#define ASMJIT_EMBED -// AsmJit Build Options -// -------------------- -// -// Flags can be defined to disable standard features. These are handy especially -// when building AsmJit statically and some features are not needed or unwanted -// (like BaseCompiler). -// -// AsmJit features are enabled by default. -// #define ASMJIT_NO_BUILDER // Disable Builder (completely). -// #define ASMJIT_NO_COMPILER // Disable Compiler (completely). -// #define ASMJIT_NO_JIT // Disable JIT memory manager and JitRuntime. -// #define ASMJIT_NO_LOGGING // Disable logging and formatting (completely). -// #define ASMJIT_NO_TEXT // Disable everything that contains text -// // representation (instructions, errors, ...). -// #define ASMJIT_NO_VALIDATION // Disable validation API and options. -// #define ASMJIT_NO_INTROSPECTION // Disable API related to instruction database. -// // (validation, cpu features, rw-info, etc). +//! Enables static-library build. +#define ASMJIT_STATIC -// ASMJIT_NO_BUILDER implies ASMJIT_NO_COMPILER. -#if defined(ASMJIT_NO_BUILDER) && !defined(ASMJIT_NO_COMPILER) - #define ASMJIT_NO_COMPILER -#endif +//! Defined when AsmJit's build configuration is 'Debug'. +//! +//! \note Can be defined explicitly to bypass autodetection. +#define ASMJIT_BUILD_DEBUG -// Prevent compile-time errors caused by misconfiguration. -#if defined(ASMJIT_NO_TEXT) && !defined(ASMJIT_NO_LOGGING) - #pragma "ASMJIT_NO_TEXT can only be defined when ASMJIT_NO_LOGGING is defined." - #undef ASMJIT_NO_TEXT -#endif +//! Defined when AsmJit's build configuration is 'Release'. +//! +//! \note Can be defined explicitly to bypass autodetection. +#define ASMJIT_BUILD_RELEASE -#if defined(ASMJIT_NO_INTROSPECTION) && !defined(ASMJIT_NO_COMPILER) - #pragma message("ASMJIT_NO_INTROSPECTION can only be defined when ASMJIT_NO_COMPILER is defined") - #undef ASMJIT_NO_INTROSPECTION +//! Defined to build X86/X64 backend. +#define ASMJIT_BUILD_X86 + +//! Defined to build host backend autodetected at compile-time. +#define ASMJIT_BUILD_HOST + +//! Disables deprecated API at compile time. +#define ASMJIT_NO_DEPRECATED + +//! Disable non-host architectures entirely. +#define ASMJIT_NO_FOREIGN + +//! Disables \ref asmjit_builder functionality completely. +#define ASMJIT_NO_BUILDER + +//! Disables \ref asmjit_compiler functionality completely. +#define ASMJIT_NO_COMPILER + +//! Disables JIT memory management and \ref JitRuntime. +#define ASMJIT_NO_JIT + +//! Disables \ref Logger and \ref Formatter. +#define ASMJIT_NO_LOGGING + +//! Disables everything that contains text. +#define ASMJIT_NO_TEXT + +//! Disables instruction validation API. +#define ASMJIT_NO_VALIDATION + +//! Disables instruction introspection API. +#define ASMJIT_NO_INTROSPECTION + +// Avoid doxygen preprocessor using feature-selection definitions. +#undef ASMJIT_NO_BUILDER +#undef ASMJIT_NO_COMPILER +#undef ASMJIT_NO_JIT +#undef ASMJIT_NO_LOGGING +#undef ASMJIT_NO_TEXT +#undef ASMJIT_NO_VALIDATION +#undef ASMJIT_NO_INTROSPECTION + +//! \} + +} // {asmjit} +#endif // _DOXYGEN + +// Enable all features at IDE level, so it's properly highlighted and indexed. +#ifdef __INTELLISENSE__ + #ifndef ASMJIT_BUILD_X86 + #define ASMJIT_BUILD_X86 + #endif #endif // ============================================================================ @@ -113,8 +131,9 @@ #include #include -#include +#include #include +#include #include #include @@ -122,23 +141,45 @@ #include #endif + +// ============================================================================ +// [asmjit::Options] +// ============================================================================ + +// ASMJIT_NO_BUILDER implies ASMJIT_NO_COMPILER. +#if defined(ASMJIT_NO_BUILDER) && !defined(ASMJIT_NO_COMPILER) + #define ASMJIT_NO_COMPILER +#endif + +// Prevent compile-time errors caused by misconfiguration. +#if defined(ASMJIT_NO_TEXT) && !defined(ASMJIT_NO_LOGGING) + #pragma "ASMJIT_NO_TEXT can only be defined when ASMJIT_NO_LOGGING is defined." + #undef ASMJIT_NO_TEXT +#endif + +#if defined(ASMJIT_NO_INTROSPECTION) && !defined(ASMJIT_NO_COMPILER) + #pragma message("ASMJIT_NO_INTROSPECTION can only be defined when ASMJIT_NO_COMPILER is defined") + #undef ASMJIT_NO_INTROSPECTION +#endif + // ============================================================================ // [asmjit::Build - Globals - Deprecated] // ============================================================================ -// DEPRECATED: Will be removed in the future. -#if defined(ASMJIT_BUILD_EMBED) || defined(ASMJIT_BUILD_STATIC) - #if defined(ASMJIT_BUILD_EMBED) - #pragma message("'ASMJIT_BUILD_EMBED' is deprecated, use 'ASMJIT_STATIC'") - #endif - #if defined(ASMJIT_BUILD_STATIC) - #pragma message("'ASMJIT_BUILD_STATIC' is deprecated, use 'ASMJIT_STATIC'") - #endif +#ifndef ASMJIT_NO_DEPRECATED + #if defined(ASMJIT_BUILD_EMBED) || defined(ASMJIT_BUILD_STATIC) + #if defined(ASMJIT_BUILD_EMBED) + #pragma message("'ASMJIT_BUILD_EMBED' is deprecated, use 'ASMJIT_STATIC'") + #endif + #if defined(ASMJIT_BUILD_STATIC) + #pragma message("'ASMJIT_BUILD_STATIC' is deprecated, use 'ASMJIT_STATIC'") + #endif - #if !defined(ASMJIT_STATIC) - #define ASMJIT_STATIC + #if !defined(ASMJIT_STATIC) + #define ASMJIT_STATIC + #endif #endif -#endif +#endif // !ASMJIT_NO_DEPRECATED // ============================================================================ // [asmjit::Build - Globals - Build Mode] @@ -154,7 +195,7 @@ #endif // ============================================================================ -// [asmjit::Build - Globals - Target Architecture] +// [asmjit::Build - Globals - Target Architecture Information] // ============================================================================ #if defined(_M_X64) || defined(__x86_64__) @@ -181,7 +222,7 @@ #define ASMJIT_ARCH_MIPS 0 #endif -#define ASMJIT_ARCH_BITS (ASMJIT_ARCH_X86 | ASMJIT_ARCH_ARM | ASMJIT_ARCH_MIPS) +#define ASMJIT_ARCH_BITS (ASMJIT_ARCH_X86 | ASMJIT_ARCH_ARM | ASMJIT_ARCH_MIPS) #if ASMJIT_ARCH_BITS == 0 #undef ASMJIT_ARCH_BITS #if defined (__LP64__) || defined(_LP64) @@ -201,31 +242,33 @@ #define ASMJIT_ARCH_BE 0 #endif -// Build host architecture if no architecture is selected. -#if !defined(ASMJIT_BUILD_HOST) && \ - !defined(ASMJIT_BUILD_X86) && \ - !defined(ASMJIT_BUILD_ARM) - #define ASMJIT_BUILD_HOST -#endif +// ============================================================================ +// [asmjit::Build - Globals - Build Architectures Definitions] +// ============================================================================ -// Detect host architecture if building only for host. -#if ASMJIT_ARCH_X86 && defined(ASMJIT_BUILD_HOST) && !defined(ASMJIT_BUILD_X86) - #define ASMJIT_BUILD_X86 +#if !defined(ASMJIT_NO_FOREIGN) + // If 'ASMJIT_NO_FOREIGN' is not defined then all architectures will be built. + #if !defined(ASMJIT_BUILD_X86) + #define ASMJIT_BUILD_X86 + #endif +#else + // Detect architectures to build if building only for the host architecture. + #if ASMJIT_ARCH_X86 && !defined(ASMJIT_BUILD_X86) + #define ASMJIT_BUILD_X86 + #endif #endif -#if ASMJIT_ARCH_ARM && defined(ASMJIT_BUILD_HOST) && !defined(ASMJIT_BUILD_ARM) - #define ASMJIT_BUILD_ARM +// Define 'ASMJIT_BUILD_HOST' if we know that host architecture will be built. +#if !defined(ASMJIT_BUILD_HOST) && ASMJIT_ARCH_X86 && defined(ASMJIT_BUILD_X86) + #define ASMJIT_BUILD_HOST #endif // ============================================================================ // [asmjit::Build - Globals - C++ Compiler and Features Detection] // ============================================================================ -#define ASMJIT_CXX_CLANG 0 -#define ASMJIT_CXX_GNU 0 -#define ASMJIT_CXX_INTEL 0 -#define ASMJIT_CXX_MSC 0 -#define ASMJIT_CXX_MAKE_VER(MAJOR, MINOR, PATCH) ((MAJOR) * 10000000 + (MINOR) * 100000 + (PATCH)) +#define ASMJIT_CXX_GNU 0 +#define ASMJIT_CXX_MAKE_VER(MAJOR, MINOR) ((MAJOR) * 1000 + (MINOR)) // Intel Compiler [pretends to be GNU or MSC, so it must be checked first]: // - https://software.intel.com/en-us/articles/c0x-features-supported-by-intel-c-compiler @@ -233,9 +276,6 @@ // - https://software.intel.com/en-us/articles/c17-features-supported-by-intel-c-compiler #if defined(__INTEL_COMPILER) - #undef ASMJIT_CXX_INTEL - #define ASMJIT_CXX_INTEL ASMJIT_CXX_MAKE_VER(__INTEL_COMPILER / 100, (__INTEL_COMPILER / 10) % 10, __INTEL_COMPILER % 10) - // MSC Compiler: // - https://msdn.microsoft.com/en-us/library/hh567368.aspx // @@ -247,72 +287,26 @@ // - 19.10.0 == VS2017 #elif defined(_MSC_VER) && defined(_MSC_FULL_VER) - #undef ASMJIT_CXX_MSC - #if _MSC_VER == _MSC_FULL_VER / 10000 - #define ASMJIT_CXX_MSC ASMJIT_CXX_MAKE_VER(_MSC_VER / 100, _MSC_VER % 100, _MSC_FULL_VER % 10000) - #else - #define ASMJIT_CXX_MSC ASMJIT_CXX_MAKE_VER(_MSC_VER / 100, (_MSC_FULL_VER / 100000) % 100, _MSC_FULL_VER % 100000) - #endif - // Clang Compiler [Pretends to be GNU, so it must be checked before]: // - https://clang.llvm.org/cxx_status.html #elif defined(__clang_major__) && defined(__clang_minor__) && defined(__clang_patchlevel__) - #undef ASMJIT_CXX_CLANG - #define ASMJIT_CXX_CLANG ASMJIT_CXX_MAKE_VER(__clang_major__, __clang_minor__, __clang_patchlevel__) - // GNU Compiler: // - https://gcc.gnu.org/projects/cxx-status.html #elif defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) #undef ASMJIT_CXX_GNU - #define ASMJIT_CXX_GNU ASMJIT_CXX_MAKE_VER(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) + #define ASMJIT_CXX_GNU ASMJIT_CXX_MAKE_VER(__GNUC__, __GNUC_MINOR__) #endif // Compiler features detection macros. -#if ASMJIT_CXX_CLANG && defined(__has_builtin) - #define ASMJIT_CXX_HAS_BUILTIN(NAME, CHECK) (__has_builtin(NAME)) -#else - #define ASMJIT_CXX_HAS_BUILTIN(NAME, CHECK) (!(!(CHECK))) -#endif - -#if ASMJIT_CXX_CLANG && defined(__has_extension) - #define ASMJIT_CXX_HAS_FEATURE(NAME, CHECK) (__has_extension(NAME)) -#elif ASMJIT_CXX_CLANG && defined(__has_feature) - #define ASMJIT_CXX_HAS_FEATURE(NAME, CHECK) (__has_feature(NAME)) -#else - #define ASMJIT_CXX_HAS_FEATURE(NAME, CHECK) (!(!(CHECK))) -#endif - -#if ASMJIT_CXX_CLANG && defined(__has_attribute) +#if defined(__clang__) && defined(__has_attribute) #define ASMJIT_CXX_HAS_ATTRIBUTE(NAME, CHECK) (__has_attribute(NAME)) #else #define ASMJIT_CXX_HAS_ATTRIBUTE(NAME, CHECK) (!(!(CHECK))) #endif -#if ASMJIT_CXX_CLANG && defined(__has_cpp_attribute) - #define ASMJIT_CXX_HAS_CPP_ATTRIBUTE(NAME, CHECK) (__has_cpp_attribute(NAME)) -#else - #define ASMJIT_CXX_HAS_CPP_ATTRIBUTE(NAME, CHECK) (!(!(CHECK))) -#endif - -// Compiler features by vendor. -#if defined(_MSC_VER) && !defined(_NATIVE_WCHAR_T_DEFINED) - #define ASMJIT_CXX_HAS_NATIVE_WCHAR_T 0 -#else - #define ASMJIT_CXX_HAS_NATIVE_WCHAR_T 1 -#endif - -#if ASMJIT_CXX_HAS_FEATURE(cxx_unicode_literals, ( \ - (ASMJIT_CXX_INTEL >= ASMJIT_CXX_MAKE_VER(14, 0, 0)) || \ - (ASMJIT_CXX_MSC >= ASMJIT_CXX_MAKE_VER(19, 0, 0)) || \ - (ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4 , 5, 0) && __cplusplus >= 201103L) )) - #define ASMJIT_CXX_HAS_UNICODE_LITERALS 1 -#else - #define ASMJIT_CXX_HAS_UNICODE_LITERALS 0 -#endif - // ============================================================================ // [asmjit::Build - Globals - API Decorators & Language Extensions] // ============================================================================ @@ -394,6 +388,15 @@ #define ASMJIT_REGPARM(N) #endif +#if ASMJIT_ARCH_X86 && defined(_WIN32) && defined(_MSC_VER) + #define ASMJIT_VECTORCALL __vectorcall +#elif ASMJIT_ARCH_X86 && defined(_WIN32) + #define ASMJIT_VECTORCALL __attribute__((__vectorcall__)) +#else + #define ASMJIT_VECTORCALL +#endif + + // Type alignment (not allowed by C++11 'alignas' keyword). #if defined(__GNUC__) #define ASMJIT_ALIGN_TYPE(TYPE, N) __attribute__((__aligned__(N))) TYPE @@ -403,13 +406,22 @@ #define ASMJIT_ALIGN_TYPE(TYPE, N) TYPE #endif +//! \def ASMJIT_MAY_ALIAS +//! +//! Expands to `__attribute__((__may_alias__))` if supported. #if defined(__GNUC__) #define ASMJIT_MAY_ALIAS __attribute__((__may_alias__)) #else #define ASMJIT_MAY_ALIAS #endif -// Annotations. +//! \def ASMJIT_LIKELY(...) +//! +//! Condition is likely to be taken (mostly error handling and edge cases). + +//! \def ASMJIT_UNLIKELY(...) +//! +//! Condition is unlikely to be taken (mostly error handling and edge cases). #if defined(__GNUC__) #define ASMJIT_LIKELY(...) __builtin_expect(!!(__VA_ARGS__), 1) #define ASMJIT_UNLIKELY(...) __builtin_expect(!!(__VA_ARGS__), 0) @@ -418,29 +430,42 @@ #define ASMJIT_UNLIKELY(...) (__VA_ARGS__) #endif +//! \def ASMJIT_FALLTHROUGH +//! +//! Portable [[fallthrough]] attribute. #if defined(__clang__) && __cplusplus >= 201103L #define ASMJIT_FALLTHROUGH [[clang::fallthrough]] -#elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(7, 0, 0) +#elif defined(__GNUC__) && __GNUC__ >= 7 #define ASMJIT_FALLTHROUGH __attribute__((__fallthrough__)) #else #define ASMJIT_FALLTHROUGH ((void)0) /* fallthrough */ #endif +//! \def ASMJIT_DEPRECATED +//! +//! Marks function, class, struct, enum, or anything else as deprecated. +#if defined(__GNUC__) + #define ASMJIT_DEPRECATED(MESSAGE) __attribute__((__deprecated__(MESSAGE))) + #if defined(__clang__) + #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) __attribute__((__deprecated__(MESSAGE))) + #else + #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) /* not usable if a deprecated function uses it */ + #endif +#elif defined(_MSC_VER) + #define ASMJIT_DEPRECATED(MESSAGE) __declspec(deprecated(MESSAGE)) + #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) /* not usable if a deprecated function uses it */ +#else + #define ASMJIT_DEPRECATED(MESSAGE) + #define ASMJIT_DEPRECATED_STRUCT(MESSAGE) +#endif + // Utilities. #define ASMJIT_OFFSET_OF(STRUCT, MEMBER) ((int)(intptr_t)((const char*)&((const STRUCT*)0x100)->MEMBER) - 0x100) #define ASMJIT_ARRAY_SIZE(X) uint32_t(sizeof(X) / sizeof(X[0])) -#if ASMJIT_CXX_HAS_ATTRIBUTE(attribute_deprecated_with_message, ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 5, 0)) - #define ASMJIT_DEPRECATED(DECL, MESSAGE) DECL __attribute__((__deprecated__(MESSAGE))) -#elif ASMJIT_MSC - #define ASMJIT_DEPRECATED(DECL, MESSAGE) __declspec(deprecated(MESSAGE)) DECL -#else - #define ASMJIT_DEPRECATED(DECL, MESSAGE) DECL -#endif - #if ASMJIT_CXX_HAS_ATTRIBUTE(no_sanitize, 0) #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF __attribute__((__no_sanitize__("undefined"))) -#elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 9, 0) +#elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 9) #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF __attribute__((__no_sanitize_undefined__)) #else #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF @@ -459,8 +484,7 @@ #define ASMJIT_END_NAMESPACE \ _Pragma("clang diagnostic pop") \ } -#elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 0, 0) && \ - ASMJIT_CXX_GNU < ASMJIT_CXX_MAKE_VER(5, 0, 0) +#elif defined(__GNUC__) && __GNUC__ == 4 #define ASMJIT_BEGIN_NAMESPACE \ namespace asmjit { \ _Pragma("GCC diagnostic push") \ @@ -468,7 +492,7 @@ #define ASMJIT_END_NAMESPACE \ _Pragma("GCC diagnostic pop") \ } -#elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(8, 0, 0) +#elif defined(__GNUC__) && __GNUC__ >= 8 #define ASMJIT_BEGIN_NAMESPACE \ namespace asmjit { \ _Pragma("GCC diagnostic push") \ @@ -480,8 +504,8 @@ #define ASMJIT_BEGIN_NAMESPACE \ namespace asmjit { \ __pragma(warning(push)) \ - __pragma(warning(disable: 4127)) /* conditional expression is constant*/\ - __pragma(warning(disable: 4201)) /* nameless struct/union */ + __pragma(warning(disable: 4127)) /* conditional expression is const */ \ + __pragma(warning(disable: 4201)) /* nameless struct/union */ #define ASMJIT_END_NAMESPACE \ __pragma(warning(pop)) \ } @@ -521,13 +545,8 @@ // [asmjit::Build - Globals - Cleanup] // ============================================================================ -// Try to cleanup things not used in other public headers. -#ifndef ASMJIT_EXPORTS - #undef ASMJIT_CXX_CLANG - #undef ASMJIT_CXX_GNU - #undef ASMJIT_CXX_INTEL - #undef ASMJIT_CXX_MSC - #undef ASMJIT_CXX_MAKE_VER -#endif +// Cleanup definitions that are only used within this header file. +#undef ASMJIT_CXX_GNU +#undef ASMJIT_CXX_MAKE_VER #endif // ASMJIT_CORE_API_CONFIG_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/arch.cpp b/libs/asmjit/src/asmjit/core/arch.cpp deleted file mode 100644 index 97fca9d..0000000 --- a/libs/asmjit/src/asmjit/core/arch.cpp +++ /dev/null @@ -1,176 +0,0 @@ -// AsmJit - Machine code generation for C++ -// -// * Official AsmJit Home Page: https://asmjit.com -// * Official Github Repository: https://github.com/asmjit/asmjit -// -// Copyright (c) 2008-2020 The AsmJit Authors -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. - -#include "../core/api-build_p.h" -#include "../core/arch.h" -#include "../core/support.h" -#include "../core/type.h" - -#ifdef ASMJIT_BUILD_X86 - #include "../x86/x86operand.h" -#endif - -#ifdef ASMJIT_BUILD_ARM - #include "../arm/armoperand.h" -#endif - -ASMJIT_BEGIN_NAMESPACE - -// ============================================================================ -// [asmjit::ArchInfo] -// ============================================================================ - -// NOTE: Keep `const constexpr` otherwise MSC would not compile this code correctly. -static const constexpr uint32_t archInfoTable[] = { - // <--------------------+---------------------+-------------------+-------+ - // | Type | SubType | GPInfo| - // <--------------------+---------------------+-------------------+-------+ - Support::bytepack32_4x8(ArchInfo::kIdNone , ArchInfo::kSubIdNone, 0, 0), - Support::bytepack32_4x8(ArchInfo::kIdX86 , ArchInfo::kSubIdNone, 4, 8), - Support::bytepack32_4x8(ArchInfo::kIdX64 , ArchInfo::kSubIdNone, 8, 16), - Support::bytepack32_4x8(ArchInfo::kIdA32 , ArchInfo::kSubIdNone, 4, 16), - Support::bytepack32_4x8(ArchInfo::kIdA64 , ArchInfo::kSubIdNone, 8, 32) -}; - -ASMJIT_FAVOR_SIZE void ArchInfo::init(uint32_t id, uint32_t subId) noexcept { - uint32_t index = id < ASMJIT_ARRAY_SIZE(archInfoTable) ? id : uint32_t(0); - - // Make sure the `archInfoTable` array is correctly indexed. - _signature = archInfoTable[index]; - ASMJIT_ASSERT(_id == index); - - // Even if the architecture is not known we setup its id and sub-id, - // however, such architecture is not really useful. - _id = uint8_t(id); - _subId = uint8_t(subId); -} - -// ============================================================================ -// [asmjit::ArchUtils] -// ============================================================================ - -ASMJIT_FAVOR_SIZE Error ArchUtils::typeIdToRegInfo(uint32_t archId, uint32_t& typeIdInOut, RegInfo& regInfo) noexcept { - uint32_t typeId = typeIdInOut; - - // Zero the signature so it's clear in case that typeId is not invalid. - regInfo._signature = 0; - - // TODO: Move to X86 backend. -#ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId)) { - // Passed RegType instead of TypeId? - if (typeId <= BaseReg::kTypeMax) - typeId = x86::opData.archRegs.regTypeToTypeId[typeId]; - - if (ASMJIT_UNLIKELY(!Type::isValid(typeId))) - return DebugUtils::errored(kErrorInvalidTypeId); - - // First normalize architecture dependent types. - if (Type::isAbstract(typeId)) { - if (typeId == Type::kIdIntPtr) - typeId = (archId == ArchInfo::kIdX86) ? Type::kIdI32 : Type::kIdI64; - else - typeId = (archId == ArchInfo::kIdX86) ? Type::kIdU32 : Type::kIdU64; - } - - // Type size helps to construct all groupss of registers. If the size is zero - // then the TypeId is invalid. - uint32_t size = Type::sizeOf(typeId); - if (ASMJIT_UNLIKELY(!size)) - return DebugUtils::errored(kErrorInvalidTypeId); - - if (ASMJIT_UNLIKELY(typeId == Type::kIdF80)) - return DebugUtils::errored(kErrorInvalidUseOfF80); - - uint32_t regType = 0; - - switch (typeId) { - case Type::kIdI8: - case Type::kIdU8: - regType = x86::Reg::kTypeGpbLo; - break; - - case Type::kIdI16: - case Type::kIdU16: - regType = x86::Reg::kTypeGpw; - break; - - case Type::kIdI32: - case Type::kIdU32: - regType = x86::Reg::kTypeGpd; - break; - - case Type::kIdI64: - case Type::kIdU64: - if (archId == ArchInfo::kIdX86) - return DebugUtils::errored(kErrorInvalidUseOfGpq); - - regType = x86::Reg::kTypeGpq; - break; - - // F32 and F64 are always promoted to use vector registers. - case Type::kIdF32: - typeId = Type::kIdF32x1; - regType = x86::Reg::kTypeXmm; - break; - - case Type::kIdF64: - typeId = Type::kIdF64x1; - regType = x86::Reg::kTypeXmm; - break; - - // Mask registers {k}. - case Type::kIdMask8: - case Type::kIdMask16: - case Type::kIdMask32: - case Type::kIdMask64: - regType = x86::Reg::kTypeKReg; - break; - - // MMX registers. - case Type::kIdMmx32: - case Type::kIdMmx64: - regType = x86::Reg::kTypeMm; - break; - - // XMM|YMM|ZMM registers. - default: - if (size <= 16) - regType = x86::Reg::kTypeXmm; - else if (size == 32) - regType = x86::Reg::kTypeYmm; - else - regType = x86::Reg::kTypeZmm; - break; - } - - typeIdInOut = typeId; - regInfo._signature = x86::opData.archRegs.regInfo[regType].signature(); - return kErrorOk; - } -#endif - - return DebugUtils::errored(kErrorInvalidArch); -} - -ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/arch.h b/libs/asmjit/src/asmjit/core/arch.h deleted file mode 100644 index b0a27fd..0000000 --- a/libs/asmjit/src/asmjit/core/arch.h +++ /dev/null @@ -1,204 +0,0 @@ -// AsmJit - Machine code generation for C++ -// -// * Official AsmJit Home Page: https://asmjit.com -// * Official Github Repository: https://github.com/asmjit/asmjit -// -// Copyright (c) 2008-2020 The AsmJit Authors -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. - -#ifndef ASMJIT_CORE_ARCH_H_INCLUDED -#define ASMJIT_CORE_ARCH_H_INCLUDED - -#include "../core/globals.h" -#include "../core/operand.h" - -ASMJIT_BEGIN_NAMESPACE - -//! \addtogroup asmjit_core -//! \{ - -// ============================================================================ -// [asmjit::ArchInfo] -// ============================================================================ - -class ArchInfo { -public: - union { - struct { - //! Architecture id. - uint8_t _id; - //! Architecture sub-id. - uint8_t _subId; - //! Default size of a general purpose register. - uint8_t _gpSize; - //! Count of all general purpose registers. - uint8_t _gpCount; - }; - //! Architecture signature (32-bit int). - uint32_t _signature; - }; - - //! Architecture id. - enum Id : uint32_t { - kIdNone = 0, //!< No/Unknown architecture. - - // X86 architectures. - kIdX86 = 1, //!< X86 architecture (32-bit). - kIdX64 = 2, //!< X64 architecture (64-bit) (AMD64). - - // ARM architectures. - kIdA32 = 3, //!< ARM 32-bit architecture (AArch32/ARM/THUMB). - kIdA64 = 4, //!< ARM 64-bit architecture (AArch64). - - //! Architecture detected at compile-time (architecture of the host). - kIdHost = ASMJIT_ARCH_X86 == 32 ? kIdX86 : - ASMJIT_ARCH_X86 == 64 ? kIdX64 : - ASMJIT_ARCH_ARM == 32 ? kIdA32 : - ASMJIT_ARCH_ARM == 64 ? kIdA64 : kIdNone - }; - - //! Architecture sub-type or execution mode. - enum SubType : uint32_t { - kSubIdNone = 0, //!< Default mode (or no specific mode). - - // X86 sub-types. - kSubIdX86_AVX = 1, //!< Code generation uses AVX by default (VEC instructions). - kSubIdX86_AVX2 = 2, //!< Code generation uses AVX2 by default (VEC instructions). - kSubIdX86_AVX512 = 3, //!< Code generation uses AVX-512F by default (+32 vector regs). - kSubIdX86_AVX512VL = 4, //!< Code generation uses AVX-512F-VL by default (+VL extensions). - - // ARM sub-types. - kSubIdA32_Thumb = 8, //!< THUMB|THUMBv2 sub-type (only ARM in 32-bit mode). - -#if (ASMJIT_ARCH_X86) && defined(__AVX512VL__) - kSubIdHost = kSubIdX86_AVX512VL -#elif (ASMJIT_ARCH_X86) && defined(__AVX512F__) - kSubIdHost = kSubIdX86_AVX512 -#elif (ASMJIT_ARCH_X86) && defined(__AVX2__) - kSubIdHost = kSubIdX86_AVX2 -#elif (ASMJIT_ARCH_X86) && defined(__AVX__) - kSubIdHost = kSubIdX86_AVX -#elif (ASMJIT_ARCH_ARM == 32) && (defined(_M_ARMT) || defined(__thumb__) || defined(__thumb2__)) - kSubIdHost = kSubIdA32_Thumb -#else - kSubIdHost = 0 -#endif - }; - - //! \name Construction & Destruction - //! \{ - - inline ArchInfo() noexcept : _signature(0) {} - inline ArchInfo(const ArchInfo& other) noexcept : _signature(other._signature) {} - inline explicit ArchInfo(uint32_t type, uint32_t subType = kSubIdNone) noexcept { init(type, subType); } - inline explicit ArchInfo(Globals::NoInit_) noexcept {} - - inline static ArchInfo host() noexcept { return ArchInfo(kIdHost, kSubIdHost); } - - inline bool isInitialized() const noexcept { return _id != kIdNone; } - - ASMJIT_API void init(uint32_t type, uint32_t subType = kSubIdNone) noexcept; - inline void reset() noexcept { _signature = 0; } - - //! \} - - //! \name Overloaded Operators - //! \{ - - inline ArchInfo& operator=(const ArchInfo& other) noexcept = default; - - inline bool operator==(const ArchInfo& other) const noexcept { return _signature == other._signature; } - inline bool operator!=(const ArchInfo& other) const noexcept { return _signature != other._signature; } - - //! \} - - //! \name Accessors - //! \{ - - //! Returns the architecture id, see `Id`. - inline uint32_t archId() const noexcept { return _id; } - - //! Returns the architecture sub-id, see `SubType`. - //! - //! X86 & X64 - //! --------- - //! - //! Architecture subtype describe the highest instruction-set level that can - //! be used. - //! - //! A32 & A64 - //! --------- - //! - //! Architecture mode means the instruction encoding to be used when generating - //! machine code, thus mode can be used to force generation of THUMB and THUMBv2 - //! encoding or regular ARM encoding. - inline uint32_t archSubId() const noexcept { return _subId; } - - //! Tests whether this architecture is 32-bit. - inline bool is32Bit() const noexcept { return _gpSize == 4; } - //! Tests whether this architecture is 64-bit. - inline bool is64Bit() const noexcept { return _gpSize == 8; } - - //! Tests whether this architecture is X86, X64. - inline bool isX86Family() const noexcept { return isX86Family(_id); } - //! Tests whether this architecture is ARM32 or ARM64. - inline bool isArmFamily() const noexcept { return isArmFamily(_id); } - - //! Returns the native size of a general-purpose register. - inline uint32_t gpSize() const noexcept { return _gpSize; } - //! Returns number of general-purpose registers. - inline uint32_t gpCount() const noexcept { return _gpCount; } - - //! \} - - //! \name Static Functions - //! \{ - - static inline bool isX86Family(uint32_t archId) noexcept { return archId >= kIdX86 && archId <= kIdX64; } - static inline bool isArmFamily(uint32_t archId) noexcept { return archId >= kIdA32 && archId <= kIdA64; } - - //! \} -}; - -// ============================================================================ -// [asmjit::ArchRegs] -// ============================================================================ - -//! Information about all architecture registers. -struct ArchRegs { - //! Register information and signatures indexed by `BaseReg::RegType`. - RegInfo regInfo[BaseReg::kTypeMax + 1]; - //! Count (maximum) of registers per `BaseReg::RegType`. - uint8_t regCount[BaseReg::kTypeMax + 1]; - //! Converts RegType to TypeId, see `Type::Id`. - uint8_t regTypeToTypeId[BaseReg::kTypeMax + 1]; -}; - -// ============================================================================ -// [asmjit::ArchUtils] -// ============================================================================ - -struct ArchUtils { - ASMJIT_API static Error typeIdToRegInfo(uint32_t archId, uint32_t& typeIdInOut, RegInfo& regInfo) noexcept; -}; - -//! \} - -ASMJIT_END_NAMESPACE - -#endif // ASMJIT_CORE_ARCH_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/archcommons.h b/libs/asmjit/src/asmjit/core/archcommons.h new file mode 100644 index 0000000..fda2451 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/archcommons.h @@ -0,0 +1,164 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_ARCHCOMMONS_H_INCLUDED +#define ASMJIT_CORE_ARCHCOMMONS_H_INCLUDED + +// This file provides architecture-specific classes that are required in the +// core library. For example Imm operand allows to be created from arm::Shift +// in a const-expr way, so the arm::Shift must be provided. So this header +// file provides everything architecture-specific that is used by the Core API. + +#include "../core/globals.h" + +// ============================================================================ +// [asmjit::arm] +// ============================================================================ + +ASMJIT_BEGIN_SUB_NAMESPACE(arm) + +//! \addtogroup asmjit_arm +//! \{ + +//! Represents ARM immediate shift operation type and value. +class Shift { +public: + //! Operation predicate (ARM) describes either SHIFT or EXTEND operation. + //! + //! \note The constants are AsmJit specific. The first 5 values describe real + //! constants on ARM32 and AArch64 hardware, however, the addition constants + //! that describe extend modes are specific to AsmJit and would be translated + //! to the AArch64 specific constants by the assembler. + enum Op : uint32_t { + //! Shift left logical operation (default). + //! + //! Available to all ARM architectures. + kOpLSL = 0x00u, + + //! Shift right logical operation. + //! + //! Available to all ARM architectures. + kOpLSR = 0x01u, + + //! Shift right arithmetic operation. + //! + //! Available to all ARM architectures. + kOpASR = 0x02u, + + //! Rotate right operation. + //! + //! \note Not available in AArch64 mode. + kOpROR = 0x03u, + + //! Rotate right with carry operation (encoded as `kShiftROR` with zero). + //! + //! \note Not available in AArch64 mode. + kOpRRX = 0x04u, + + //! Shift left by filling low order bits with ones. + kOpMSL = 0x05u, + + //! UXTN extend register operation (AArch64 only). + kOpUXTB = 0x06u, + //! UXTH extend register operation (AArch64 only). + kOpUXTH = 0x07u, + //! UXTW extend register operation (AArch64 only). + kOpUXTW = 0x08u, + //! UXTX extend register operation (AArch64 only). + kOpUXTX = 0x09u, + + //! SXTB extend register operation (AArch64 only). + kOpSXTB = 0x0Au, + //! SXTH extend register operation (AArch64 only). + kOpSXTH = 0x0Bu, + //! SXTW extend register operation (AArch64 only). + kOpSXTW = 0x0Cu, + //! SXTX extend register operation (AArch64 only). + kOpSXTX = 0x0Du + + // NOTE: 0xE and 0xF are used by memory operand to specify POST|PRE offset mode. + }; + + //! Shift operation. + uint32_t _op; + //! Shift Value. + uint32_t _value; + + //! Default constructed Shift is not initialized. + inline Shift() noexcept = default; + + //! Copy constructor (default) + constexpr Shift(const Shift& other) noexcept = default; + + //! Constructs Shift from operation `op` and shift `value`. + constexpr Shift(uint32_t op, uint32_t value) noexcept + : _op(op), + _value(value) {} + + //! Returns the shift operation. + constexpr uint32_t op() const noexcept { return _op; } + //! Returns the shift smount. + constexpr uint32_t value() const noexcept { return _value; } + + //! Sets shift operation to `op`. + inline void setOp(uint32_t op) noexcept { _op = op; } + //! Sets shift amount to `value`. + inline void setValue(uint32_t value) noexcept { _value = value; } +}; + +//! Constructs a `LSL #value` shift (logical shift left). +static constexpr Shift lsl(uint32_t value) noexcept { return Shift(Shift::kOpLSL, value); } +//! Constructs a `LSR #value` shift (logical shift right). +static constexpr Shift lsr(uint32_t value) noexcept { return Shift(Shift::kOpLSR, value); } +//! Constructs a `ASR #value` shift (arithmetic shift right). +static constexpr Shift asr(uint32_t value) noexcept { return Shift(Shift::kOpASR, value); } +//! Constructs a `ROR #value` shift (rotate right). +static constexpr Shift ror(uint32_t value) noexcept { return Shift(Shift::kOpROR, value); } +//! Constructs a `RRX` shift (rotate with carry by 1). +static constexpr Shift rrx() noexcept { return Shift(Shift::kOpRRX, 0); } +//! Constructs a `MSL #value` shift (logical shift left filling ones). +static constexpr Shift msl(uint32_t value) noexcept { return Shift(Shift::kOpMSL, value); } + +//! Constructs a `UXTB #value` extend and shift (unsigned byte extend). +static constexpr Shift uxtb(uint32_t value) noexcept { return Shift(Shift::kOpUXTB, value); } +//! Constructs a `UXTH #value` extend and shift (unsigned hword extend). +static constexpr Shift uxth(uint32_t value) noexcept { return Shift(Shift::kOpUXTH, value); } +//! Constructs a `UXTW #value` extend and shift (unsigned word extend). +static constexpr Shift uxtw(uint32_t value) noexcept { return Shift(Shift::kOpUXTW, value); } +//! Constructs a `UXTX #value` extend and shift (unsigned dword extend). +static constexpr Shift uxtx(uint32_t value) noexcept { return Shift(Shift::kOpUXTX, value); } + +//! Constructs a `SXTB #value` extend and shift (signed byte extend). +static constexpr Shift sxtb(uint32_t value) noexcept { return Shift(Shift::kOpSXTB, value); } +//! Constructs a `SXTH #value` extend and shift (signed hword extend). +static constexpr Shift sxth(uint32_t value) noexcept { return Shift(Shift::kOpSXTH, value); } +//! Constructs a `SXTW #value` extend and shift (signed word extend). +static constexpr Shift sxtw(uint32_t value) noexcept { return Shift(Shift::kOpSXTW, value); } +//! Constructs a `SXTX #value` extend and shift (signed dword extend). +static constexpr Shift sxtx(uint32_t value) noexcept { return Shift(Shift::kOpSXTX, value); } + +//! \} + +ASMJIT_END_SUB_NAMESPACE + +#endif // ASMJIT_CORE_ARCHCOMMONS_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/archtraits.cpp b/libs/asmjit/src/asmjit/core/archtraits.cpp new file mode 100644 index 0000000..f069354 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/archtraits.cpp @@ -0,0 +1,155 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#include "../core/archtraits.h" +#include "../core/misc_p.h" + +#ifdef ASMJIT_BUILD_X86 + #include "../x86/x86archtraits_p.h" +#endif + +#ifdef ASMJIT_BUILD_ARM + #include "../arm/armarchtraits_p.h" +#endif + +ASMJIT_BEGIN_NAMESPACE + +// ============================================================================ +// [asmjit::ArchTraits] +// ============================================================================ + +static const constexpr ArchTraits noArchTraits = { + 0xFF, // SP. + 0xFF, // FP. + 0xFF, // LR. + 0xFF, // PC. + { 0, 0, 0 }, // Reserved. + 0, // HW stack alignment. + 0, // Min stack offset. + 0, // Max stack offset. + { 0, 0, 0, 0}, // ISA features [Gp, Vec, Other0, Other1]. + { { 0 } }, // RegTypeToSignature. + { 0 }, // RegTypeToTypeId. + { 0 } // TypeIdToRegType. +}; + +ASMJIT_VARAPI const ArchTraits _archTraits[Environment::kArchCount] = { + // No architecture. + noArchTraits, + + // X86/X86 architectures. +#ifdef ASMJIT_BUILD_X86 + x86::x86ArchTraits, + x86::x64ArchTraits, +#else + noArchTraits, + noArchTraits, +#endif + + // RISCV32/RISCV64 architectures. + noArchTraits, + noArchTraits, + + // ARM architecture + noArchTraits, + + // AArch64 architecture. +#ifdef ASMJIT_BUILD_ARM + arm::a64ArchTraits, +#else + noArchTraits, +#endif + + // ARM/Thumb architecture. + noArchTraits, + + // Reserved. + noArchTraits, + + // MIPS32/MIPS64 + noArchTraits, + noArchTraits +}; + +// ============================================================================ +// [asmjit::ArchUtils] +// ============================================================================ + +ASMJIT_FAVOR_SIZE Error ArchUtils::typeIdToRegInfo(uint32_t arch, uint32_t typeId, uint32_t* typeIdOut, RegInfo* regInfoOut) noexcept { + const ArchTraits& archTraits = ArchTraits::byArch(arch); + + // Passed RegType instead of TypeId? + if (typeId <= BaseReg::kTypeMax) + typeId = archTraits.regTypeToTypeId(typeId); + + if (ASMJIT_UNLIKELY(!Type::isValid(typeId))) + return DebugUtils::errored(kErrorInvalidTypeId); + + // First normalize architecture dependent types. + if (Type::isAbstract(typeId)) { + bool is32Bit = Environment::is32Bit(arch); + if (typeId == Type::kIdIntPtr) + typeId = is32Bit ? Type::kIdI32 : Type::kIdI64; + else + typeId = is32Bit ? Type::kIdU32 : Type::kIdU64; + } + + // Type size helps to construct all groups of registers. + // TypeId is invalid if the size is zero. + uint32_t size = Type::sizeOf(typeId); + if (ASMJIT_UNLIKELY(!size)) + return DebugUtils::errored(kErrorInvalidTypeId); + + if (ASMJIT_UNLIKELY(typeId == Type::kIdF80)) + return DebugUtils::errored(kErrorInvalidUseOfF80); + + uint32_t regType = 0; + if (typeId >= Type::_kIdBaseStart && typeId < Type::_kIdVec32Start) { + regType = archTraits._typeIdToRegType[typeId - Type::_kIdBaseStart]; + if (!regType) { + if (typeId == Type::kIdI64 || typeId == Type::kIdU64) + return DebugUtils::errored(kErrorInvalidUseOfGpq); + else + return DebugUtils::errored(kErrorInvalidTypeId); + } + } + else { + if (size <= 8 && archTraits._regInfo[BaseReg::kTypeVec64].isValid()) + regType = BaseReg::kTypeVec64; + else if (size <= 16 && archTraits._regInfo[BaseReg::kTypeVec128].isValid()) + regType = BaseReg::kTypeVec128; + else if (size == 32 && archTraits._regInfo[BaseReg::kTypeVec256].isValid()) + regType = BaseReg::kTypeVec256; + else if (archTraits._regInfo[BaseReg::kTypeVec512].isValid()) + regType = BaseReg::kTypeVec512; + else + return DebugUtils::errored(kErrorInvalidTypeId); + } + + *typeIdOut = typeId; + regInfoOut->reset(archTraits.regTypeToSignature(regType)); + return kErrorOk; +} + +ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/archtraits.h b/libs/asmjit/src/asmjit/core/archtraits.h new file mode 100644 index 0000000..5af6c7e --- /dev/null +++ b/libs/asmjit/src/asmjit/core/archtraits.h @@ -0,0 +1,174 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_ARCHTRAITS_H_INCLUDED +#define ASMJIT_CORE_ARCHTRAITS_H_INCLUDED + +#include "../core/environment.h" +#include "../core/operand.h" +#include "../core/type.h" + +ASMJIT_BEGIN_NAMESPACE + +//! \addtogroup asmjit_core +//! \{ + +// ============================================================================ +// [asmjit::ArchTraits] +// ============================================================================ + +//! Architecture traits used by Function API and Compiler's register allocator. +struct ArchTraits { + //! ISA features for each register group. + enum IsaFeatures : uint32_t { + //! ISA features a register swap by using a single instruction. + kIsaFeatureSwap = 0x01u, + //! ISA features a push/pop like instruction for this register group. + kIsaFeaturePushPop = 0x02u, + }; + + //! Stack pointer register id. + uint8_t _spRegId; + //! Frame pointer register id. + uint8_t _fpRegId; + //! Link register id. + uint8_t _linkRegId; + //! Instruction pointer (or program counter) register id, if accessible. + uint8_t _ipRegId; + + // Reserved. + uint8_t _reserved[3]; + //! Hardware stack alignment requirement. + uint8_t _hwStackAlignment; + //! Minimum addressable offset on stack guaranteed for all instructions. + uint32_t _minStackOffset; + //! Maximum addressable offset on stack depending on specific instruction. + uint32_t _maxStackOffset; + + //! Flags for each virtual register group (always covers GP and Vec groups). + uint8_t _isaFlags[BaseReg::kGroupVirt]; + + //! Maps register type into a signature, that provides group, size and can + //! be used to construct register operands. + RegInfo _regInfo[BaseReg::kTypeMax + 1]; + //! Maps a register to type-id, see \ref Type::Id. + uint8_t _regTypeToTypeId[BaseReg::kTypeMax + 1]; + //! Maps base TypeId values (from TypeId::_kIdBaseStart) to register types, see \ref Type::Id. + uint8_t _typeIdToRegType[32]; + + //! Resets all members to zeros. + inline void reset() noexcept { memset(this, 0, sizeof(*this)); } + + //! \name Accessors + //! \{ + + //! Returns stack pointer register id. + inline constexpr uint32_t spRegId() const noexcept { return _spRegId; } + //! Returns stack frame register id. + inline constexpr uint32_t fpRegId() const noexcept { return _fpRegId; } + //! Returns link register id, if the architecture provides it. + inline constexpr uint32_t linkRegId() const noexcept { return _linkRegId; } + //! Returns instruction pointer register id, if the architecture provides it. + inline constexpr uint32_t ipRegId() const noexcept { return _ipRegId; } + + //! Returns a hardware stack alignment requirement. + //! + //! \note This is a hardware constraint. Architectures that don't constrain + //! it would return the lowest alignment (1), however, some architectures may + //! constrain the alignment, for example AArch64 requires 16-byte alignment. + inline constexpr uint32_t hwStackAlignment() const noexcept { return _hwStackAlignment; } + + //! Tests whether the architecture provides link register, which is used across + //! function calls. If the link register is not provided then a function call + //! pushes the return address on stack (X86/X64). + inline constexpr bool hasLinkReg() const noexcept { return _linkRegId != BaseReg::kIdBad; } + + //! Returns minimum addressable offset on stack guaranteed for all instructions. + inline constexpr uint32_t minStackOffset() const noexcept { return _minStackOffset; } + //! Returns maximum addressable offset on stack depending on specific instruction. + inline constexpr uint32_t maxStackOffset() const noexcept { return _maxStackOffset; } + + //! Returns ISA flags of the given register `group`. + inline constexpr uint32_t isaFlags(uint32_t group) const noexcept { return _isaFlags[group]; } + //! Tests whether the given register `group` has the given `flag` set. + inline constexpr bool hasIsaFlag(uint32_t group, uint32_t flag) const noexcept { return (_isaFlags[group] & flag) != 0; } + //! Tests whether the ISA provides register swap instruction for the given register `group`. + inline constexpr bool hasSwap(uint32_t group) const noexcept { return hasIsaFlag(group, kIsaFeatureSwap); } + //! Tests whether the ISA provides push/pop instructions for the given register `group`. + inline constexpr bool hasPushPop(uint32_t group) const noexcept { return hasIsaFlag(group, kIsaFeaturePushPop); } + + inline uint32_t hasRegType(uint32_t rType) const noexcept { + return rType <= BaseReg::kTypeMax && _regInfo[rType].signature() != 0; + } + + inline uint32_t regTypeToSignature(uint32_t rType) const noexcept { + ASMJIT_ASSERT(rType <= BaseReg::kTypeMax); + return _regInfo[rType].signature(); + } + + inline uint32_t regTypeToGroup(uint32_t rType) const noexcept { + ASMJIT_ASSERT(rType <= BaseReg::kTypeMax); + return _regInfo[rType].group(); + } + + inline uint32_t regTypeToSize(uint32_t rType) const noexcept { + ASMJIT_ASSERT(rType <= BaseReg::kTypeMax); + return _regInfo[rType].size(); + } + + inline uint32_t regTypeToTypeId(uint32_t rType) const noexcept { + ASMJIT_ASSERT(rType <= BaseReg::kTypeMax); + return _regTypeToTypeId[rType]; + } + + //! \} + + //! \name Statics + //! \{ + + //! Returns a const reference to `ArchTraits` for the given architecture `arch`. + static inline const ArchTraits& byArch(uint32_t arch) noexcept; + + //! \} +}; + +ASMJIT_VARAPI const ArchTraits _archTraits[Environment::kArchCount]; + +inline const ArchTraits& ArchTraits::byArch(uint32_t arch) noexcept { return _archTraits[arch & ~Environment::kArchBigEndianMask]; } + +// ============================================================================ +// [asmjit::ArchUtils] +// ============================================================================ + +//! Architecture utilities. +namespace ArchUtils { + +ASMJIT_API Error typeIdToRegInfo(uint32_t arch, uint32_t typeId, uint32_t* typeIdOut, RegInfo* regInfo) noexcept; + +} // {ArchUtils} + +//! \} + +ASMJIT_END_NAMESPACE + +#endif // ASMJIT_CORE_ARCHTRAITS_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/assembler.cpp b/libs/asmjit/src/asmjit/core/assembler.cpp index 35c39ab..4cf706f 100644 --- a/libs/asmjit/src/asmjit/core/assembler.cpp +++ b/libs/asmjit/src/asmjit/core/assembler.cpp @@ -23,9 +23,11 @@ #include "../core/api-build_p.h" #include "../core/assembler.h" -#include "../core/codebufferwriter_p.h" +#include "../core/codewriter_p.h" #include "../core/constpool.h" -#include "../core/logging.h" +#include "../core/emitterutils_p.h" +#include "../core/formatter.h" +#include "../core/logger.h" #include "../core/support.h" ASMJIT_BEGIN_NAMESPACE @@ -35,13 +37,8 @@ ASMJIT_BEGIN_NAMESPACE // ============================================================================ BaseAssembler::BaseAssembler() noexcept - : BaseEmitter(kTypeAssembler), - _section(nullptr), - _bufferData(nullptr), - _bufferEnd(nullptr), - _bufferPtr(nullptr), - _op4(), - _op5() {} + : BaseEmitter(kTypeAssembler) {} + BaseAssembler::~BaseAssembler() noexcept {} // ============================================================================ @@ -50,7 +47,7 @@ BaseAssembler::~BaseAssembler() noexcept {} Error BaseAssembler::setOffset(size_t offset) { if (ASMJIT_UNLIKELY(!_code)) - return DebugUtils::errored(kErrorNotInitialized); + return reportError(DebugUtils::errored(kErrorNotInitialized)); size_t size = Support::max(_section->bufferSize(), this->offset()); if (ASMJIT_UNLIKELY(offset > size)) @@ -60,25 +57,6 @@ Error BaseAssembler::setOffset(size_t offset) { return kErrorOk; } -// ============================================================================ -// [asmjit::BaseAssembler - Logging] -// ============================================================================ - -#ifndef ASMJIT_NO_LOGGING -static void BaseAssembler_logLabel(BaseAssembler* self, const Label& label) noexcept { - Logger* logger = self->_code->_logger; - - StringTmp<512> sb; - size_t binSize = logger->hasFlag(FormatOptions::kFlagMachineCode) ? size_t(0) : std::numeric_limits::max(); - - sb.appendChars(' ', logger->indentation(FormatOptions::kIndentationLabel)); - Logging::formatLabel(sb, logger->flags(), self, label.id()); - sb.appendChar(':'); - Logging::formatLine(sb, nullptr, binSize, 0, 0, self->_inlineComment); - logger->log(sb.data(), sb.size()); -} -#endif - // ============================================================================ // [asmjit::BaseAssembler - Section Management] // ============================================================================ @@ -100,8 +78,8 @@ Error BaseAssembler::section(Section* section) { return reportError(DebugUtils::errored(kErrorInvalidSection)); #ifndef ASMJIT_NO_LOGGING - if (hasEmitterOption(kOptionLoggingEnabled)) - _code->_logger->logf(".section %s {#%u}\n", section->name(), section->id()); + if (_logger) + _logger->logf(".section %s {#%u}\n", section->name(), section->id()); #endif BaseAssembler_initSection(this, section); @@ -119,7 +97,8 @@ Label BaseAssembler::newLabel() { Error err = _code->newLabelEntry(&le); if (ASMJIT_UNLIKELY(err)) reportError(err); - labelId = le->id(); + else + labelId = le->id(); } return Label(labelId); } @@ -131,20 +110,21 @@ Label BaseAssembler::newNamedLabel(const char* name, size_t nameSize, uint32_t t Error err = _code->newNamedLabelEntry(&le, name, nameSize, type, parentId); if (ASMJIT_UNLIKELY(err)) reportError(err); - labelId = le->id(); + else + labelId = le->id(); } return Label(labelId); } Error BaseAssembler::bind(const Label& label) { if (ASMJIT_UNLIKELY(!_code)) - return DebugUtils::errored(kErrorNotInitialized); + return reportError(DebugUtils::errored(kErrorNotInitialized)); Error err = _code->bindLabel(label, _section->id(), offset()); #ifndef ASMJIT_NO_LOGGING - if (hasEmitterOption(kOptionLoggingEnabled)) - BaseAssembler_logLabel(this, label); + if (_logger) + EmitterUtils::logLabelBound(this, label); #endif resetInlineComment(); @@ -155,165 +135,112 @@ Error BaseAssembler::bind(const Label& label) { } // ============================================================================ -// [asmjit::BaseAssembler - Emit (Low-Level)] +// [asmjit::BaseAssembler - Embed] // ============================================================================ -Error BaseAssembler::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) { - _op4 = o4; - _op5 = o5; - _instOptions |= BaseInst::kOptionOp4Op5Used; - return _emit(instId, o0, o1, o2, o3); -} - -Error BaseAssembler::_emitOpArray(uint32_t instId, const Operand_* operands, size_t count) { - const Operand_* o0 = &operands[0]; - const Operand_* o1 = &operands[1]; - const Operand_* o2 = &operands[2]; - const Operand_* o3 = &operands[3]; - - switch (count) { - case 0: o0 = &Globals::none; ASMJIT_FALLTHROUGH; - case 1: o1 = &Globals::none; ASMJIT_FALLTHROUGH; - case 2: o2 = &Globals::none; ASMJIT_FALLTHROUGH; - case 3: o3 = &Globals::none; ASMJIT_FALLTHROUGH; - case 4: - return _emit(instId, *o0, *o1, *o2, *o3); - - case 5: - _op4 = operands[4]; - _op5.reset(); - _instOptions |= BaseInst::kOptionOp4Op5Used; - return _emit(instId, *o0, *o1, *o2, *o3); - - case 6: - _op4 = operands[4]; - _op5 = operands[5]; - _instOptions |= BaseInst::kOptionOp4Op5Used; - return _emit(instId, *o0, *o1, *o2, *o3); - - default: - return DebugUtils::errored(kErrorInvalidArgument); - } -} - #ifndef ASMJIT_NO_LOGGING -void BaseAssembler::_emitLog( - uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, - uint32_t relSize, uint32_t immSize, uint8_t* afterCursor) { +struct DataSizeByPower { + char str[4]; +}; - Logger* logger = _code->logger(); - ASMJIT_ASSERT(logger != nullptr); - ASMJIT_ASSERT(options & BaseEmitter::kOptionLoggingEnabled); +static const DataSizeByPower dataSizeByPowerTable[] = { + { "db" }, + { "dw" }, + { "dd" }, + { "dq" } +}; +#endif - StringTmp<256> sb; - uint32_t flags = logger->flags(); +Error BaseAssembler::embed(const void* data, size_t dataSize) { + if (ASMJIT_UNLIKELY(!_code)) + return reportError(DebugUtils::errored(kErrorNotInitialized)); - uint8_t* beforeCursor = _bufferPtr; - intptr_t emittedSize = (intptr_t)(afterCursor - beforeCursor); + if (dataSize == 0) + return kErrorOk; - Operand_ operands[Globals::kMaxOpCount]; - operands[0].copyFrom(o0); - operands[1].copyFrom(o1); - operands[2].copyFrom(o2); - operands[3].copyFrom(o3); + CodeWriter writer(this); + ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize)); - if (options & BaseInst::kOptionOp4Op5Used) { - operands[4].copyFrom(_op4); - operands[5].copyFrom(_op5); - } - else { - operands[4].reset(); - operands[5].reset(); - } + writer.emitData(data, dataSize); - sb.appendChars(' ', logger->indentation(FormatOptions::kIndentationCode)); - Logging::formatInstruction(sb, flags, this, archId(), BaseInst(instId, options, _extraReg), operands, Globals::kMaxOpCount); +#ifndef ASMJIT_NO_LOGGING + if (_logger) + _logger->logBinary(data, dataSize); +#endif - if ((flags & FormatOptions::kFlagMachineCode) != 0) - Logging::formatLine(sb, _bufferPtr, size_t(emittedSize), relSize, immSize, inlineComment()); - else - Logging::formatLine(sb, nullptr, std::numeric_limits::max(), 0, 0, inlineComment()); - logger->log(sb); + writer.done(this); + return kErrorOk; } -Error BaseAssembler::_emitFailed( - Error err, - uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) { +Error BaseAssembler::embedDataArray(uint32_t typeId, const void* data, size_t itemCcount, size_t repeatCount) { + uint32_t deabstractDelta = Type::deabstractDeltaOfSize(registerSize()); + uint32_t finalTypeId = Type::deabstract(typeId, deabstractDelta); - StringTmp<256> sb; - sb.appendString(DebugUtils::errorAsString(err)); - sb.appendString(": "); + if (ASMJIT_UNLIKELY(!Type::isValid(finalTypeId))) + return reportError(DebugUtils::errored(kErrorInvalidArgument)); - Operand_ operands[Globals::kMaxOpCount]; - operands[0].copyFrom(o0); - operands[1].copyFrom(o1); - operands[2].copyFrom(o2); - operands[3].copyFrom(o3); + if (itemCcount == 0 || repeatCount == 0) + return kErrorOk; - if (options & BaseInst::kOptionOp4Op5Used) { - operands[4].copyFrom(_op4); - operands[5].copyFrom(_op5); - } - else { - operands[4].reset(); - operands[5].reset(); - } + uint32_t typeSize = Type::sizeOf(finalTypeId); + Support::FastUInt8 of = 0; - Logging::formatInstruction(sb, 0, this, archId(), BaseInst(instId, options, _extraReg), operands, Globals::kMaxOpCount); + size_t dataSize = Support::mulOverflow(itemCcount, size_t(typeSize), &of); + size_t totalSize = Support::mulOverflow(dataSize, repeatCount, &of); - if (inlineComment()) { - sb.appendString(" ; "); - sb.appendString(inlineComment()); - } + if (ASMJIT_UNLIKELY(of)) + return reportError(DebugUtils::errored(kErrorOutOfMemory)); - resetInstOptions(); - resetExtraReg(); - resetInlineComment(); - return reportError(err, sb.data()); -} + CodeWriter writer(this); + ASMJIT_PROPAGATE(writer.ensureSpace(this, totalSize)); + +#ifndef ASMJIT_NO_LOGGING + const uint8_t* start = writer.cursor(); #endif -// ============================================================================ -// [asmjit::BaseAssembler - Embed] -// ============================================================================ + for (size_t i = 0; i < repeatCount; i++) { + writer.emitData(data, dataSize); + } #ifndef ASMJIT_NO_LOGGING -struct DataSizeByPower { - char str[4]; -}; - -static const DataSizeByPower dataSizeByPowerTable[] = { - { "db" }, - { "dw" }, - { "dd" }, - { "dq" } -}; + if (_logger) + _logger->logBinary(start, totalSize); #endif -Error BaseAssembler::embed(const void* data, uint32_t dataSize) { + writer.done(this); + return kErrorOk; +} + +Error BaseAssembler::embedConstPool(const Label& label, const ConstPool& pool) { if (ASMJIT_UNLIKELY(!_code)) - return DebugUtils::errored(kErrorNotInitialized); + return reportError(DebugUtils::errored(kErrorNotInitialized)); - if (dataSize == 0) - return DebugUtils::errored(kErrorInvalidArgument); + if (ASMJIT_UNLIKELY(!isLabelValid(label))) + return reportError(DebugUtils::errored(kErrorInvalidLabel)); - CodeBufferWriter writer(this); - ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize)); + ASMJIT_PROPAGATE(align(kAlignData, uint32_t(pool.alignment()))); + ASMJIT_PROPAGATE(bind(label)); - writer.emitData(data, dataSize); + size_t size = pool.size(); + CodeWriter writer(this); + ASMJIT_PROPAGATE(writer.ensureSpace(this, size)); + + pool.fill(writer.cursor()); #ifndef ASMJIT_NO_LOGGING - if (ASMJIT_UNLIKELY(hasEmitterOption(kOptionLoggingEnabled))) - _code->_logger->logBinary(data, dataSize); + if (_logger) + _logger->logBinary(writer.cursor(), size); #endif + writer.advance(size); writer.done(this); + return kErrorOk; } -Error BaseAssembler::embedLabel(const Label& label) { +Error BaseAssembler::embedLabel(const Label& label, size_t dataSize) { if (ASMJIT_UNLIKELY(!_code)) - return DebugUtils::errored(kErrorNotInitialized); + return reportError(DebugUtils::errored(kErrorNotInitialized)); ASMJIT_ASSERT(_code != nullptr); RelocEntry* re; @@ -322,45 +249,45 @@ Error BaseAssembler::embedLabel(const Label& label) { if (ASMJIT_UNLIKELY(!le)) return reportError(DebugUtils::errored(kErrorInvalidLabel)); - uint32_t dataSize = gpSize(); - ASMJIT_ASSERT(dataSize <= 8); + if (dataSize == 0) + dataSize = registerSize(); - CodeBufferWriter writer(this); + if (ASMJIT_UNLIKELY(!Support::isPowerOf2(dataSize) || dataSize > 8)) + return reportError(DebugUtils::errored(kErrorInvalidOperandSize)); + + CodeWriter writer(this); ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize)); #ifndef ASMJIT_NO_LOGGING - if (ASMJIT_UNLIKELY(hasEmitterOption(kOptionLoggingEnabled))) { + if (_logger) { StringTmp<256> sb; - sb.appendFormat(".%s ", dataSizeByPowerTable[Support::ctz(dataSize)].str); - Logging::formatLabel(sb, 0, this, label.id()); - sb.appendChar('\n'); - _code->_logger->log(sb); + sb.appendFormat("%s ", dataSizeByPowerTable[Support::ctz(dataSize)].str); + Formatter::formatLabel(sb, 0, this, label.id()); + sb.append('\n'); + _logger->log(sb); } #endif - // TODO: Does it make sense to calculate the address here if everything is known? - /* - if (_code->hasBaseAddress() && currentSection() == _code->textSection() && le->isBound()) { - uint64_t addr = _code->baseAddress() + _code->textSection()->offset() + le->offset(); - writer.emitValueLE(addr, dataSize); - } - */ - - Error err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, dataSize); + Error err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs); if (ASMJIT_UNLIKELY(err)) return reportError(err); re->_sourceSectionId = _section->id(); re->_sourceOffset = offset(); + re->_format.resetToDataValue(uint32_t(dataSize)); if (le->isBound()) { re->_targetSectionId = le->section()->id(); re->_payload = le->offset(); } else { - LabelLink* link = _code->newLabelLink(le, _section->id(), offset(), 0); + OffsetFormat of; + of.resetToDataValue(uint32_t(dataSize)); + + LabelLink* link = _code->newLabelLink(le, _section->id(), offset(), 0, of); if (ASMJIT_UNLIKELY(!link)) return reportError(DebugUtils::errored(kErrorOutOfMemory)); + link->relocId = re->id(); } @@ -371,9 +298,9 @@ Error BaseAssembler::embedLabel(const Label& label) { return kErrorOk; } -Error BaseAssembler::embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) { +Error BaseAssembler::embedLabelDelta(const Label& label, const Label& base, size_t dataSize) { if (ASMJIT_UNLIKELY(!_code)) - return DebugUtils::errored(kErrorNotInitialized); + return reportError(DebugUtils::errored(kErrorNotInitialized)); LabelEntry* labelEntry = _code->labelEntry(label); LabelEntry* baseEntry = _code->labelEntry(base); @@ -382,23 +309,23 @@ Error BaseAssembler::embedLabelDelta(const Label& label, const Label& base, uint return reportError(DebugUtils::errored(kErrorInvalidLabel)); if (dataSize == 0) - dataSize = gpSize(); + dataSize = registerSize(); if (ASMJIT_UNLIKELY(!Support::isPowerOf2(dataSize) || dataSize > 8)) return reportError(DebugUtils::errored(kErrorInvalidOperandSize)); - CodeBufferWriter writer(this); + CodeWriter writer(this); ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize)); #ifndef ASMJIT_NO_LOGGING - if (ASMJIT_UNLIKELY(hasEmitterOption(kOptionLoggingEnabled))) { + if (_logger) { StringTmp<256> sb; sb.appendFormat(".%s (", dataSizeByPowerTable[Support::ctz(dataSize)].str); - Logging::formatLabel(sb, 0, this, label.id()); - sb.appendString(" - "); - Logging::formatLabel(sb, 0, this, base.id()); - sb.appendString(")\n"); - _code->_logger->log(sb); + Formatter::formatLabel(sb, 0, this, label.id()); + sb.append(" - "); + Formatter::formatLabel(sb, 0, this, base.id()); + sb.append(")\n"); + _logger->log(sb); } #endif @@ -409,7 +336,7 @@ Error BaseAssembler::embedLabelDelta(const Label& label, const Label& base, uint } else { RelocEntry* re; - Error err = _code->newRelocEntry(&re, RelocEntry::kTypeExpression, dataSize); + Error err = _code->newRelocEntry(&re, RelocEntry::kTypeExpression); if (ASMJIT_UNLIKELY(err)) return reportError(err); @@ -422,6 +349,7 @@ Error BaseAssembler::embedLabelDelta(const Label& label, const Label& base, uint exp->setValueAsLabel(0, labelEntry); exp->setValueAsLabel(1, baseEntry); + re->_format.resetToDataValue(dataSize); re->_sourceSectionId = _section->id(); re->_sourceOffset = offset(); re->_payload = (uint64_t)(uintptr_t)exp; @@ -433,46 +361,18 @@ Error BaseAssembler::embedLabelDelta(const Label& label, const Label& base, uint return kErrorOk; } -Error BaseAssembler::embedConstPool(const Label& label, const ConstPool& pool) { - if (ASMJIT_UNLIKELY(!_code)) - return DebugUtils::errored(kErrorNotInitialized); - - if (ASMJIT_UNLIKELY(!isLabelValid(label))) - return DebugUtils::errored(kErrorInvalidLabel); - - ASMJIT_PROPAGATE(align(kAlignData, uint32_t(pool.alignment()))); - ASMJIT_PROPAGATE(bind(label)); - - size_t size = pool.size(); - CodeBufferWriter writer(this); - ASMJIT_PROPAGATE(writer.ensureSpace(this, size)); - - pool.fill(writer.cursor()); - -#ifndef ASMJIT_NO_LOGGING - if (ASMJIT_UNLIKELY(hasEmitterOption(kOptionLoggingEnabled))) - _code->_logger->logBinary(writer.cursor(), size); -#endif - - writer.advance(size); - writer.done(this); - - return kErrorOk; -} - // ============================================================================ // [asmjit::BaseAssembler - Comment] // ============================================================================ Error BaseAssembler::comment(const char* data, size_t size) { if (ASMJIT_UNLIKELY(!_code)) - return DebugUtils::errored(kErrorNotInitialized); + return reportError(DebugUtils::errored(kErrorNotInitialized)); #ifndef ASMJIT_NO_LOGGING - if (hasEmitterOption(kOptionLoggingEnabled)) { - Logger* logger = _code->logger(); - logger->log(data, size); - logger->log("\n", 1); + if (_logger) { + _logger->log(data, size); + _logger->log("\n", 1); return kErrorOk; } #else @@ -492,10 +392,6 @@ Error BaseAssembler::onAttach(CodeHolder* code) noexcept { // Attach to the end of the .text section. BaseAssembler_initSection(this, code->_sections[0]); - // And reset everything that is used temporarily. - _op4.reset(); - _op5.reset(); - return kErrorOk; } @@ -504,10 +400,6 @@ Error BaseAssembler::onDetach(CodeHolder* code) noexcept { _bufferData = nullptr; _bufferEnd = nullptr; _bufferPtr = nullptr; - - _op4.reset(); - _op5.reset(); - return Base::onDetach(code); } diff --git a/libs/asmjit/src/asmjit/core/assembler.h b/libs/asmjit/src/asmjit/core/assembler.h index fd2c1c3..6e38bc5 100644 --- a/libs/asmjit/src/asmjit/core/assembler.h +++ b/libs/asmjit/src/asmjit/core/assembler.h @@ -31,31 +31,36 @@ ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_core +//! \addtogroup asmjit_assembler //! \{ // ============================================================================ // [asmjit::BaseAssembler] // ============================================================================ -//! Base encoder (assembler). +//! Base assembler. +//! +//! This is a base class that provides interface used by architecture specific +//! assembler implementations. Assembler doesn't hold any data, instead it's +//! attached to \ref CodeHolder, which provides all the data that Assembler +//! needs and which can be altered by it. +//! +//! Check out architecture specific assemblers for more details and examples: +//! +//! - \ref x86::Assembler - X86/X64 assembler implementation. class ASMJIT_VIRTAPI BaseAssembler : public BaseEmitter { public: ASMJIT_NONCOPYABLE(BaseAssembler) typedef BaseEmitter Base; //! Current section where the assembling happens. - Section* _section; + Section* _section = nullptr; //! Start of the CodeBuffer of the current section. - uint8_t* _bufferData; + uint8_t* _bufferData = nullptr; //! End (first invalid byte) of the current section. - uint8_t* _bufferEnd; + uint8_t* _bufferEnd = nullptr; //! Pointer in the CodeBuffer of the current section. - uint8_t* _bufferPtr; - //! 5th operand data, used only temporarily. - Operand_ _op4; - //! 6th operand data, used only temporarily. - Operand_ _op5; + uint8_t* _bufferPtr = nullptr; //! \name Construction & Destruction //! \{ @@ -77,10 +82,11 @@ class ASMJIT_VIRTAPI BaseAssembler : public BaseEmitter { //! Returns the current position in the CodeBuffer. inline size_t offset() const noexcept { return (size_t)(_bufferPtr - _bufferData); } + //! Sets the current position in the CodeBuffer to `offset`. //! - //! \note The `offset` cannot be outside of the buffer size (even if it's - //! within buffer's capacity). + //! \note The `offset` cannot be greater than buffer size even if it's + //! within the buffer's capacity. ASMJIT_API Error setOffset(size_t offset); //! Returns the start of the CodeBuffer in the current section. @@ -95,6 +101,7 @@ class ASMJIT_VIRTAPI BaseAssembler : public BaseEmitter { //! \name Section Management //! \{ + //! Returns the current section. inline Section* currentSection() const noexcept { return _section; } ASMJIT_API Error section(Section* section) override; @@ -110,47 +117,16 @@ class ASMJIT_VIRTAPI BaseAssembler : public BaseEmitter { //! \} - //! \cond INTERNAL - //! \name Emit - //! \{ - - using BaseEmitter::_emit; - - ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) override; - ASMJIT_API Error _emitOpArray(uint32_t instId, const Operand_* operands, size_t count) override; - -protected: -#ifndef ASMJIT_NO_LOGGING - void _emitLog( - uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, - uint32_t relSize, uint32_t immSize, uint8_t* afterCursor); - - Error _emitFailed( - Error err, - uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3); -#else - inline Error _emitFailed( - uint32_t err, - uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) { - - DebugUtils::unused(instId, options, o0, o1, o2, o3); - resetInstOptions(); - resetInlineComment(); - return reportError(err); - } -#endif -public: - //! \} - //! \endcond - //! \name Embed //! \{ - ASMJIT_API Error embed(const void* data, uint32_t dataSize) override; - ASMJIT_API Error embedLabel(const Label& label) override; - ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) override; + ASMJIT_API Error embed(const void* data, size_t dataSize) override; + ASMJIT_API Error embedDataArray(uint32_t typeId, const void* data, size_t itemCcount, size_t repeatCount = 1) override; ASMJIT_API Error embedConstPool(const Label& label, const ConstPool& pool) override; + ASMJIT_API Error embedLabel(const Label& label, size_t dataSize = 0) override; + ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, size_t dataSize = 0) override; + //! \} //! \name Comment diff --git a/libs/asmjit/src/asmjit/core/builder.cpp b/libs/asmjit/src/asmjit/core/builder.cpp index 35d7127..ad89f1d 100644 --- a/libs/asmjit/src/asmjit/core/builder.cpp +++ b/libs/asmjit/src/asmjit/core/builder.cpp @@ -25,7 +25,10 @@ #ifndef ASMJIT_NO_BUILDER #include "../core/builder.h" -#include "../core/logging.h" +#include "../core/emitterutils_p.h" +#include "../core/errorhandler.h" +#include "../core/formatter.h" +#include "../core/logger.h" #include "../core/support.h" ASMJIT_BEGIN_NAMESPACE @@ -41,12 +44,22 @@ class PostponedErrorHandler : public ErrorHandler { public: void handleError(Error err, const char* message, BaseEmitter* origin) override { DebugUtils::unused(err, origin); - _message.assignString(message); + _message.assign(message); } StringTmp<128> _message; }; +// ============================================================================ +// [asmjit::BaseBuilder - Utilities] +// ============================================================================ + +static void BaseBuilder_deletePasses(BaseBuilder* self) noexcept { + for (Pass* pass : self->_passes) + pass->~Pass(); + self->_passes.reset(); +} + // ============================================================================ // [asmjit::BaseBuilder - Construction / Destruction] // ============================================================================ @@ -56,139 +69,102 @@ BaseBuilder::BaseBuilder() noexcept _codeZone(32768 - Zone::kBlockOverhead), _dataZone(16384 - Zone::kBlockOverhead), _passZone(65536 - Zone::kBlockOverhead), - _allocator(&_codeZone), - _passes(), - _labelNodes(), - _cursor(nullptr), - _firstNode(nullptr), - _lastNode(nullptr), - _nodeFlags(0) {} -BaseBuilder::~BaseBuilder() noexcept {} + _allocator(&_codeZone) {} + +BaseBuilder::~BaseBuilder() noexcept { + BaseBuilder_deletePasses(this); +} // ============================================================================ // [asmjit::BaseBuilder - Node Management] // ============================================================================ -LabelNode* BaseBuilder::newLabelNode() noexcept { - LabelNode* node = newNodeT(); - if (!node || registerLabelNode(node) != kErrorOk) - return nullptr; - return node; -} +Error BaseBuilder::_newInstNode(InstNode** out, uint32_t instId, uint32_t instOptions, uint32_t opCount) { + uint32_t opCapacity = InstNode::capacityOfOpCount(opCount); + ASMJIT_ASSERT(opCapacity >= InstNode::kBaseOpCapacity); + + InstNode* node = _allocator.allocT(InstNode::nodeSizeOfOpCapacity(opCapacity)); + if (ASMJIT_UNLIKELY(!node)) + return reportError(DebugUtils::errored(kErrorOutOfMemory)); -AlignNode* BaseBuilder::newAlignNode(uint32_t alignMode, uint32_t alignment) noexcept { - return newNodeT(alignMode, alignment); + *out = new(node) InstNode(this, instId, instOptions, opCount, opCapacity); + return kErrorOk; } -EmbedDataNode* BaseBuilder::newEmbedDataNode(const void* data, uint32_t size) noexcept { - if (size > EmbedDataNode::kInlineBufferSize) { - void* cloned = _dataZone.alloc(size); - if (ASMJIT_UNLIKELY(!cloned)) - return nullptr; - if (data) - memcpy(cloned, data, size); - data = cloned; - } +Error BaseBuilder::_newLabelNode(LabelNode** out) { + *out = nullptr; - return newNodeT(const_cast(data), size); + ASMJIT_PROPAGATE(_newNodeT(out)); + return registerLabelNode(*out); } -ConstPoolNode* BaseBuilder::newConstPoolNode() noexcept { - ConstPoolNode* node = newNodeT(); - if (!node || registerLabelNode(node) != kErrorOk) - return nullptr; - return node; +Error BaseBuilder::_newAlignNode(AlignNode** out, uint32_t alignMode, uint32_t alignment) { + *out = nullptr; + return _newNodeT(out, alignMode, alignment); } -CommentNode* BaseBuilder::newCommentNode(const char* data, size_t size) noexcept { - if (data) { - if (size == SIZE_MAX) - size = strlen(data); +Error BaseBuilder::_newEmbedDataNode(EmbedDataNode** out, uint32_t typeId, const void* data, size_t itemCount, size_t repeatCount) { + *out = nullptr; - if (size > 0) { - data = static_cast(_dataZone.dup(data, size, true)); - if (!data) return nullptr; - } - } + uint32_t deabstractDelta = Type::deabstractDeltaOfSize(registerSize()); + uint32_t finalTypeId = Type::deabstract(typeId, deabstractDelta); - return newNodeT(data); -} + if (ASMJIT_UNLIKELY(!Type::isValid(finalTypeId))) + return reportError(DebugUtils::errored(kErrorInvalidArgument)); -InstNode* BaseBuilder::newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0) noexcept { - uint32_t opCount = 1; - uint32_t opCapacity = InstNode::capacityOfOpCount(opCount); - ASMJIT_ASSERT(opCapacity >= 4); + uint32_t typeSize = Type::sizeOf(finalTypeId); + Support::FastUInt8 of = 0; - InstNode* node = _allocator.allocT(InstNode::nodeSizeOfOpCapacity(opCapacity)); - if (ASMJIT_UNLIKELY(!node)) - return nullptr; + size_t dataSize = Support::mulOverflow(itemCount, size_t(typeSize), &of); + if (ASMJIT_UNLIKELY(of)) + return reportError(DebugUtils::errored(kErrorOutOfMemory)); - node = new(node) InstNode(this, instId, instOptions, opCount, opCapacity); - node->setOp(0, o0); - node->resetOps(opCount, opCapacity); - return node; -} + EmbedDataNode* node; + ASMJIT_PROPAGATE(_newNodeT(&node)); -InstNode* BaseBuilder::newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1) noexcept { - uint32_t opCount = 2; - uint32_t opCapacity = InstNode::capacityOfOpCount(opCount); - ASMJIT_ASSERT(opCapacity >= 4); + node->_embed._typeId = uint8_t(typeId); + node->_embed._typeSize = uint8_t(typeSize); + node->_itemCount = itemCount; + node->_repeatCount = repeatCount; - InstNode* node = _allocator.allocT(InstNode::nodeSizeOfOpCapacity(opCapacity)); - if (ASMJIT_UNLIKELY(!node)) - return nullptr; + uint8_t* dstData = node->_inlineData; + if (dataSize > EmbedDataNode::kInlineBufferSize) { + dstData = static_cast(_dataZone.alloc(dataSize, 8)); + if (ASMJIT_UNLIKELY(!dstData)) + return reportError(DebugUtils::errored(kErrorOutOfMemory)); + node->_externalData = dstData; + } - node = new(node) InstNode(this, instId, instOptions, opCount, opCapacity); - node->setOp(0, o0); - node->setOp(1, o1); - node->resetOps(opCount, opCapacity); - return node; -} + if (data) + memcpy(dstData, data, dataSize); -InstNode* BaseBuilder::newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept { - uint32_t opCount = 3; - uint32_t opCapacity = InstNode::capacityOfOpCount(opCount); - ASMJIT_ASSERT(opCapacity >= 4); + *out = node; + return kErrorOk; +} - InstNode* node = _allocator.allocT(InstNode::nodeSizeOfOpCapacity(opCapacity)); - if (ASMJIT_UNLIKELY(!node)) - return nullptr; +Error BaseBuilder::_newConstPoolNode(ConstPoolNode** out) { + *out = nullptr; - node = new(node) InstNode(this, instId, instOptions, opCount, opCapacity); - node->setOp(0, o0); - node->setOp(1, o1); - node->setOp(2, o2); - node->resetOps(opCount, opCapacity); - return node; + ASMJIT_PROPAGATE(_newNodeT(out)); + return registerLabelNode(*out); } -InstNode* BaseBuilder::newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept { - uint32_t opCount = 4; - uint32_t opCapacity = InstNode::capacityOfOpCount(opCount); - ASMJIT_ASSERT(opCapacity >= 4); +Error BaseBuilder::_newCommentNode(CommentNode** out, const char* data, size_t size) { + *out = nullptr; - InstNode* node = _allocator.allocT(InstNode::nodeSizeOfOpCapacity(opCapacity)); - if (ASMJIT_UNLIKELY(!node)) - return nullptr; - - node = new(node) InstNode(this, instId, instOptions, opCount, opCapacity); - node->setOp(0, o0); - node->setOp(1, o1); - node->setOp(2, o2); - node->setOp(3, o3); - node->resetOps(opCount, opCapacity); - return node; -} + if (data) { + if (size == SIZE_MAX) + size = strlen(data); -InstNode* BaseBuilder::newInstNodeRaw(uint32_t instId, uint32_t instOptions, uint32_t opCount) noexcept { - uint32_t opCapacity = InstNode::capacityOfOpCount(opCount); - ASMJIT_ASSERT(opCapacity >= 4); + if (size > 0) { + data = static_cast(_dataZone.dup(data, size, true)); + if (ASMJIT_UNLIKELY(!data)) + return reportError(DebugUtils::errored(kErrorOutOfMemory)); + } + } - InstNode* node = _allocator.allocT(InstNode::nodeSizeOfOpCapacity(opCapacity)); - if (ASMJIT_UNLIKELY(!node)) - return nullptr; - return new(node) InstNode(this, instId, instOptions, opCount, opCapacity); + return _newNodeT(out, data); } BaseNode* BaseBuilder::addNode(BaseNode* node) noexcept { @@ -368,34 +344,42 @@ BaseNode* BaseBuilder::setCursor(BaseNode* node) noexcept { // [asmjit::BaseBuilder - Section] // ============================================================================ -Error BaseBuilder::sectionNodeOf(SectionNode** pOut, uint32_t sectionId) noexcept { +Error BaseBuilder::sectionNodeOf(SectionNode** out, uint32_t sectionId) { + *out = nullptr; + if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); if (ASMJIT_UNLIKELY(!_code->isSectionValid(sectionId))) - return DebugUtils::errored(kErrorInvalidSection); + return reportError(DebugUtils::errored(kErrorInvalidSection)); + + if (sectionId >= _sectionNodes.size()) { + Error err = _sectionNodes.reserve(&_allocator, sectionId + 1); + if (ASMJIT_UNLIKELY(err != kErrorOk)) + return reportError(err); + } - if (sectionId >= _sectionNodes.size()) - ASMJIT_PROPAGATE(_sectionNodes.resize(&_allocator, sectionId + 1)); + SectionNode* node = nullptr; + if (sectionId < _sectionNodes.size()) + node = _sectionNodes[sectionId]; - SectionNode* node = _sectionNodes[sectionId]; if (!node) { - node = newNodeT(sectionId); - if (ASMJIT_UNLIKELY(!node)) - return DebugUtils::errored(kErrorOutOfMemory); + ASMJIT_PROPAGATE(_newNodeT(&node, sectionId)); + + // We have already reserved enough space, this cannot fail now. + if (sectionId >= _sectionNodes.size()) + _sectionNodes.resize(&_allocator, sectionId + 1); + _sectionNodes[sectionId] = node; } - *pOut = node; + *out = node; return kErrorOk; } Error BaseBuilder::section(Section* section) { SectionNode* node; - Error err = sectionNodeOf(&node, section->id()); - - if (ASMJIT_UNLIKELY(err)) - return reportError(err); + ASMJIT_PROPAGATE(sectionNodeOf(&node, section->id())); if (!node->isActive()) { // Insert the section at the end if it was not part of the code. @@ -444,7 +428,9 @@ void BaseBuilder::updateSectionLinks() noexcept { // [asmjit::BaseBuilder - Labels] // ============================================================================ -Error BaseBuilder::labelNodeOf(LabelNode** pOut, uint32_t labelId) noexcept { +Error BaseBuilder::labelNodeOf(LabelNode** out, uint32_t labelId) { + *out = nullptr; + if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); @@ -457,22 +443,18 @@ Error BaseBuilder::labelNodeOf(LabelNode** pOut, uint32_t labelId) noexcept { LabelNode* node = _labelNodes[index]; if (!node) { - node = newNodeT(labelId); - if (ASMJIT_UNLIKELY(!node)) - return DebugUtils::errored(kErrorOutOfMemory); + ASMJIT_PROPAGATE(_newNodeT(&node, labelId)); _labelNodes[index] = node; } - *pOut = node; + *out = node; return kErrorOk; } -Error BaseBuilder::registerLabelNode(LabelNode* node) noexcept { +Error BaseBuilder::registerLabelNode(LabelNode* node) { if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); - // Don't call `reportError()` from here, we are noexcept and we are called - // by `newLabelNode()` and `newFuncNode()`, which are noexcept as well. LabelEntry* le; ASMJIT_PROPAGATE(_code->newLabelEntry(&le)); uint32_t labelId = le->id(); @@ -482,68 +464,58 @@ Error BaseBuilder::registerLabelNode(LabelNode* node) noexcept { ASMJIT_PROPAGATE(_labelNodes.resize(&_allocator, labelId + 1)); _labelNodes[labelId] = node; - node->_id = labelId; + node->_labelId = labelId; return kErrorOk; } -static Error BaseBuilder_newLabelInternal(BaseBuilder* self, uint32_t labelId) noexcept { +static Error BaseBuilder_newLabelInternal(BaseBuilder* self, uint32_t labelId) { ASMJIT_ASSERT(self->_labelNodes.size() < labelId + 1); - LabelNode* node = self->newNodeT(labelId); - if (ASMJIT_UNLIKELY(!node)) - return DebugUtils::errored(kErrorOutOfMemory); + uint32_t growBy = labelId - self->_labelNodes.size(); + Error err = self->_labelNodes.willGrow(&self->_allocator, growBy); + + if (ASMJIT_UNLIKELY(err)) + return self->reportError(err); - ASMJIT_PROPAGATE(self->_labelNodes.resize(&self->_allocator, labelId + 1)); + LabelNode* node; + ASMJIT_PROPAGATE(self->_newNodeT(&node, labelId)); + + self->_labelNodes.resize(&self->_allocator, labelId + 1); self->_labelNodes[labelId] = node; - node->_id = labelId; + node->_labelId = labelId; return kErrorOk; } Label BaseBuilder::newLabel() { uint32_t labelId = Globals::kInvalidId; - if (_code) { - LabelEntry* le; - Error err = _code->newLabelEntry(&le); - if (ASMJIT_UNLIKELY(err)) { - reportError(err); - } - else { - err = BaseBuilder_newLabelInternal(this, le->id()); - if (ASMJIT_UNLIKELY(err)) - reportError(err); - else - labelId = le->id(); - } + LabelEntry* le; + + if (_code && + _code->newLabelEntry(&le) == kErrorOk && + BaseBuilder_newLabelInternal(this, le->id()) == kErrorOk) { + labelId = le->id(); } + return Label(labelId); } Label BaseBuilder::newNamedLabel(const char* name, size_t nameSize, uint32_t type, uint32_t parentId) { uint32_t labelId = Globals::kInvalidId; - if (_code) { - LabelEntry* le; - Error err = _code->newNamedLabelEntry(&le, name, nameSize, type, parentId); - if (ASMJIT_UNLIKELY(err)) { - reportError(err); - } - else { - err = BaseBuilder_newLabelInternal(this, le->id()); - if (ASMJIT_UNLIKELY(err)) - reportError(err); - else - labelId = le->id(); - } + LabelEntry* le; + + if (_code && + _code->newNamedLabelEntry(&le, name, nameSize, type, parentId) == kErrorOk && + BaseBuilder_newLabelInternal(this, le->id()) == kErrorOk) { + labelId = le->id(); } + return Label(labelId); } Error BaseBuilder::bind(const Label& label) { LabelNode* node; - Error err = labelNodeOf(&node, label); - - if (ASMJIT_UNLIKELY(err)) - return reportError(err); + ASMJIT_PROPAGATE(labelNodeOf(&node, label)); addNode(node); return kErrorOk; @@ -610,7 +582,6 @@ Error BaseBuilder::runPasses() { if (_passes.empty()) return kErrorOk; - Logger* logger = code()->logger(); ErrorHandler* prev = errorHandler(); PostponedErrorHandler postponed; @@ -619,8 +590,9 @@ Error BaseBuilder::runPasses() { for (Pass* pass : _passes) { _passZone.reset(); - err = pass->run(&_passZone, logger); - if (err) break; + err = pass->run(&_passZone, _logger); + if (err) + break; } _passZone.reset(); setErrorHandler(prev); @@ -635,104 +607,21 @@ Error BaseBuilder::runPasses() { // [asmjit::BaseBuilder - Emit] // ============================================================================ -Error BaseBuilder::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) { - uint32_t opCount = 4; - - if (o3.isNone()) { - opCount = 3; - if (o2.isNone()) { - opCount = 2; - if (o1.isNone()) { - opCount = 1; - if (o0.isNone()) - opCount = 0; - } - } - } +Error BaseBuilder::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) { + uint32_t opCount = EmitterUtils::opCountFromEmitArgs(o0, o1, o2, opExt); + uint32_t options = instOptions() | forcedInstOptions(); - uint32_t options = instOptions() | globalInstOptions(); if (options & BaseInst::kOptionReserved) { if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); #ifndef ASMJIT_NO_VALIDATION // Strict validation. - if (hasEmitterOption(kOptionStrictValidation)) { - Operand_ opArray[4]; - opArray[0].copyFrom(o0); - opArray[1].copyFrom(o1); - opArray[2].copyFrom(o2); - opArray[3].copyFrom(o3); - - Error err = InstAPI::validate(archId(), BaseInst(instId, options, _extraReg), opArray, opCount); - if (ASMJIT_UNLIKELY(err)) { - resetInstOptions(); - resetExtraReg(); - resetInlineComment(); - return reportError(err); - } - } -#endif - - // Clear options that should never be part of `InstNode`. - options &= ~BaseInst::kOptionReserved; - } - - uint32_t opCapacity = InstNode::capacityOfOpCount(opCount); - ASMJIT_ASSERT(opCapacity >= 4); - - InstNode* node = _allocator.allocT(InstNode::nodeSizeOfOpCapacity(opCapacity)); - const char* comment = inlineComment(); - - resetInstOptions(); - resetInlineComment(); - - if (ASMJIT_UNLIKELY(!node)) { - resetExtraReg(); - return reportError(DebugUtils::errored(kErrorOutOfMemory)); - } - - node = new(node) InstNode(this, instId, options, opCount, opCapacity); - node->setExtraReg(extraReg()); - node->setOp(0, o0); - node->setOp(1, o1); - node->setOp(2, o2); - node->setOp(3, o3); - node->resetOps(4, opCapacity); - - if (comment) - node->setInlineComment(static_cast(_dataZone.dup(comment, strlen(comment), true))); - - addNode(node); - resetExtraReg(); - return kErrorOk; -} - -Error BaseBuilder::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) { - uint32_t opCount = Globals::kMaxOpCount; - if (o5.isNone()) { - opCount = 5; - if (o4.isNone()) - return _emit(instId, o0, o1, o2, o3); - } - - uint32_t options = instOptions() | globalInstOptions(); - if (ASMJIT_UNLIKELY(options & BaseInst::kOptionReserved)) { - if (ASMJIT_UNLIKELY(!_code)) - return DebugUtils::errored(kErrorNotInitialized); - -#ifndef ASMJIT_NO_VALIDATION - // Strict validation. - if (hasEmitterOption(kOptionStrictValidation)) { + if (hasValidationOption(kValidationOptionIntermediate)) { Operand_ opArray[Globals::kMaxOpCount]; - opArray[0].copyFrom(o0); - opArray[1].copyFrom(o1); - opArray[2].copyFrom(o2); - opArray[3].copyFrom(o3); - opArray[4].copyFrom(o4); - opArray[5].copyFrom(o5); - - Error err = InstAPI::validate(archId(), BaseInst(instId, options, _extraReg), opArray, opCount); + EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt); + + Error err = InstAPI::validate(arch(), BaseInst(instId, options, _extraReg), opArray, opCount); if (ASMJIT_UNLIKELY(err)) { resetInstOptions(); resetExtraReg(); @@ -747,7 +636,7 @@ Error BaseBuilder::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1 } uint32_t opCapacity = InstNode::capacityOfOpCount(opCount); - ASMJIT_ASSERT(opCapacity >= opCount); + ASMJIT_ASSERT(opCapacity >= InstNode::kBaseOpCapacity); InstNode* node = _allocator.allocT(InstNode::nodeSizeOfOpCapacity(opCapacity)); const char* comment = inlineComment(); @@ -765,11 +654,9 @@ Error BaseBuilder::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1 node->setOp(0, o0); node->setOp(1, o1); node->setOp(2, o2); - node->setOp(3, o3); - node->setOp(4, o4); - - if (opCapacity > 5) - node->setOp(5, o5); + for (uint32_t i = 3; i < opCount; i++) + node->setOp(i, opExt[i - 3]); + node->resetOpRange(opCount, opCapacity); if (comment) node->setInlineComment(static_cast(_dataZone.dup(comment, strlen(comment), true))); @@ -787,9 +674,8 @@ Error BaseBuilder::align(uint32_t alignMode, uint32_t alignment) { if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); - AlignNode* node = newAlignNode(alignMode, alignment); - if (ASMJIT_UNLIKELY(!node)) - return reportError(DebugUtils::errored(kErrorOutOfMemory)); + AlignNode* node; + ASMJIT_PROPAGATE(_newAlignNode(&node, alignMode, alignment)); addNode(node); return kErrorOk; @@ -799,57 +685,81 @@ Error BaseBuilder::align(uint32_t alignMode, uint32_t alignment) { // [asmjit::BaseBuilder - Embed] // ============================================================================ -Error BaseBuilder::embed(const void* data, uint32_t dataSize) { +Error BaseBuilder::embed(const void* data, size_t dataSize) { if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); - EmbedDataNode* node = newEmbedDataNode(data, dataSize); - if (ASMJIT_UNLIKELY(!node)) - return reportError(DebugUtils::errored(kErrorOutOfMemory)); + EmbedDataNode* node; + ASMJIT_PROPAGATE(_newEmbedDataNode(&node, Type::kIdU8, data, dataSize)); addNode(node); return kErrorOk; } -Error BaseBuilder::embedLabel(const Label& label) { +Error BaseBuilder::embedDataArray(uint32_t typeId, const void* data, size_t itemCount, size_t itemRepeat) { if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); - EmbedLabelNode* node = newNodeT(label.id()); - if (ASMJIT_UNLIKELY(!node)) - return reportError(DebugUtils::errored(kErrorOutOfMemory)); + EmbedDataNode* node; + ASMJIT_PROPAGATE(_newEmbedDataNode(&node, typeId, data, itemCount, itemRepeat)); addNode(node); return kErrorOk; } -Error BaseBuilder::embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) { +Error BaseBuilder::embedConstPool(const Label& label, const ConstPool& pool) { if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); - EmbedLabelDeltaNode* node = newNodeT(label.id(), base.id(), dataSize); - if (ASMJIT_UNLIKELY(!node)) - return reportError(DebugUtils::errored(kErrorOutOfMemory)); + if (!isLabelValid(label)) + return reportError(DebugUtils::errored(kErrorInvalidLabel)); + + ASMJIT_PROPAGATE(align(kAlignData, uint32_t(pool.alignment()))); + ASMJIT_PROPAGATE(bind(label)); + + EmbedDataNode* node; + ASMJIT_PROPAGATE(_newEmbedDataNode(&node, Type::kIdU8, nullptr, pool.size())); + pool.fill(node->data()); addNode(node); return kErrorOk; } -Error BaseBuilder::embedConstPool(const Label& label, const ConstPool& pool) { +// EmbedLabel / EmbedLabelDelta +// ---------------------------- +// +// If dataSize is zero it means that the size is the same as target register +// width, however, if it's provided we really want to validate whether it's +// within the possible range. + +static inline bool BaseBuilder_checkDataSize(size_t dataSize) noexcept { + return !dataSize || (Support::isPowerOf2(dataSize) && dataSize <= 8); +} + +Error BaseBuilder::embedLabel(const Label& label, size_t dataSize) { if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); - if (!isLabelValid(label)) - return reportError(DebugUtils::errored(kErrorInvalidLabel)); + if (!BaseBuilder_checkDataSize(dataSize)) + return reportError(DebugUtils::errored(kErrorInvalidArgument)); - ASMJIT_PROPAGATE(align(kAlignData, uint32_t(pool.alignment()))); - ASMJIT_PROPAGATE(bind(label)); + EmbedLabelNode* node; + ASMJIT_PROPAGATE(_newNodeT(&node, label.id(), uint32_t(dataSize))); - EmbedDataNode* node = newEmbedDataNode(nullptr, uint32_t(pool.size())); - if (ASMJIT_UNLIKELY(!node)) - return reportError(DebugUtils::errored(kErrorOutOfMemory)); + addNode(node); + return kErrorOk; +} + +Error BaseBuilder::embedLabelDelta(const Label& label, const Label& base, size_t dataSize) { + if (ASMJIT_UNLIKELY(!_code)) + return DebugUtils::errored(kErrorNotInitialized); + + if (!BaseBuilder_checkDataSize(dataSize)) + return reportError(DebugUtils::errored(kErrorInvalidArgument)); + + EmbedLabelDeltaNode* node; + ASMJIT_PROPAGATE(_newNodeT(&node, label.id(), base.id(), uint32_t(dataSize))); - pool.fill(node->data()); addNode(node); return kErrorOk; } @@ -862,9 +772,8 @@ Error BaseBuilder::comment(const char* data, size_t size) { if (ASMJIT_UNLIKELY(!_code)) return DebugUtils::errored(kErrorNotInitialized); - CommentNode* node = newCommentNode(data, size); - if (ASMJIT_UNLIKELY(!node)) - return reportError(DebugUtils::errored(kErrorOutOfMemory)); + CommentNode* node; + ASMJIT_PROPAGATE(_newCommentNode(&node, data, size)); addNode(node); return kErrorOk; @@ -874,16 +783,42 @@ Error BaseBuilder::comment(const char* data, size_t size) { // [asmjit::BaseBuilder - Serialize] // ============================================================================ -Error BaseBuilder::serialize(BaseEmitter* dst) { +Error BaseBuilder::serializeTo(BaseEmitter* dst) { Error err = kErrorOk; BaseNode* node_ = _firstNode; + Operand_ opArray[Globals::kMaxOpCount]; + do { dst->setInlineComment(node_->inlineComment()); if (node_->isInst()) { InstNode* node = node_->as(); - err = dst->emitInst(node->baseInst(), node->operands(), node->opCount()); + + // NOTE: Inlined to remove one additional call per instruction. + dst->setInstOptions(node->instOptions()); + dst->setExtraReg(node->extraReg()); + + const Operand_* op = node->operands(); + const Operand_* opExt = EmitterUtils::noExt; + + uint32_t opCount = node->opCount(); + if (opCount > 3) { + uint32_t i = 4; + opArray[3] = op[3]; + + while (i < opCount) { + opArray[i].copyFrom(op[i]); + i++; + } + while (i < Globals::kMaxOpCount) { + opArray[i].reset(); + i++; + } + opExt = opArray + 3; + } + + err = dst->_emit(node->id(), op[0], op[1], op[2], opExt); } else if (node_->isLabel()) { if (node_->isConstPool()) { @@ -901,11 +836,11 @@ Error BaseBuilder::serialize(BaseEmitter* dst) { } else if (node_->isEmbedData()) { EmbedDataNode* node = node_->as(); - err = dst->embed(node->data(), node->size()); + err = dst->embedDataArray(node->typeId(), node->data(), node->itemCount(), node->repeatCount()); } else if (node_->isEmbedLabel()) { EmbedLabelNode* node = node_->as(); - err = dst->embedLabel(node->label()); + err = dst->embedLabel(node->label(), node->dataSize()); } else if (node_->isEmbedLabelDelta()) { EmbedLabelDeltaNode* node = node_->as(); @@ -927,23 +862,6 @@ Error BaseBuilder::serialize(BaseEmitter* dst) { return err; } -// ============================================================================ -// [asmjit::BaseBuilder - Logging] -// ============================================================================ - -#ifndef ASMJIT_NO_LOGGING -Error BaseBuilder::dump(String& sb, uint32_t flags) const noexcept { - BaseNode* node = _firstNode; - while (node) { - ASMJIT_PROPAGATE(Logging::formatNode(sb, flags, this, node)); - sb.appendChar('\n'); - node = node->next(); - } - - return kErrorOk; -} -#endif - // ============================================================================ // [asmjit::BaseBuilder - Events] // ============================================================================ @@ -962,7 +880,6 @@ Error BaseBuilder::onAttach(CodeHolder* code) noexcept { return err; } - _cursor = initialSection; _firstNode = initialSection; _lastNode = initialSection; @@ -972,7 +889,7 @@ Error BaseBuilder::onAttach(CodeHolder* code) noexcept { } Error BaseBuilder::onDetach(CodeHolder* code) noexcept { - _passes.reset(); + BaseBuilder_deletePasses(this); _sectionNodes.reset(); _labelNodes.reset(); @@ -995,8 +912,7 @@ Error BaseBuilder::onDetach(CodeHolder* code) noexcept { // ============================================================================ Pass::Pass(const char* name) noexcept - : _cb(nullptr), - _name(name) {} + : _name(name) {} Pass::~Pass() noexcept {} ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/builder.h b/libs/asmjit/src/asmjit/core/builder.h index c6d3599..317bda1 100644 --- a/libs/asmjit/src/asmjit/core/builder.h +++ b/libs/asmjit/src/asmjit/core/builder.h @@ -30,10 +30,12 @@ #include "../core/assembler.h" #include "../core/codeholder.h" #include "../core/constpool.h" +#include "../core/formatter.h" #include "../core/inst.h" #include "../core/operand.h" #include "../core/string.h" #include "../core/support.h" +#include "../core/type.h" #include "../core/zone.h" #include "../core/zonevector.h" @@ -68,6 +70,18 @@ class JumpAnnotation; // [asmjit::BaseBuilder] // ============================================================================ +//! Builder interface. +//! +//! `BaseBuilder` interface was designed to be used as a \ref BaseAssembler +//! replacement in case pre-processing or post-processing of the generated code +//! is required. The code can be modified during or after code generation. Pre +//! or post processing can be done manually or through a \ref Pass object. \ref +//! BaseBuilder stores the emitted code as a double-linked list of nodes, which +//! allows O(1) insertion and removal during processing. +//! +//! Check out architecture specific builders for more details and examples: +//! +//! - \ref x86::Builder - X86/X64 builder implementation. class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { public: ASMJIT_NONCOPYABLE(BaseBuilder) @@ -83,23 +97,23 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { ZoneAllocator _allocator; //! Array of `Pass` objects. - ZoneVector _passes; + ZoneVector _passes {}; //! Maps section indexes to `LabelNode` nodes. - ZoneVector _sectionNodes; + ZoneVector _sectionNodes {}; //! Maps label indexes to `LabelNode` nodes. - ZoneVector _labelNodes; + ZoneVector _labelNodes {}; //! Current node (cursor). - BaseNode* _cursor; + BaseNode* _cursor = nullptr; //! First node of the current section. - BaseNode* _firstNode; + BaseNode* _firstNode = nullptr; //! Last node of the current section. - BaseNode* _lastNode; + BaseNode* _lastNode = nullptr; //! Flags assigned to each new node. - uint32_t _nodeFlags; + uint32_t _nodeFlags = 0; //! The sections links are dirty (used internally). - bool _dirtySectionLinks; + bool _dirtySectionLinks = false; //! \name Construction & Destruction //! \{ @@ -127,33 +141,26 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! \remarks The pointer returned (if non-null) is owned by the Builder or //! Compiler. When the Builder/Compiler is destroyed it destroys all nodes //! it created so no manual memory management is required. - template - inline T* newNodeT() noexcept { - return _allocator.newT(this); - } - - //! \overload template - inline T* newNodeT(Args&&... args) noexcept { - return _allocator.newT(this, std::forward(args)...); + inline Error _newNodeT(T** out, Args&&... args) { + *out = _allocator.newT(this, std::forward(args)...); + if (ASMJIT_UNLIKELY(!*out)) + return reportError(DebugUtils::errored(kErrorOutOfMemory)); + return kErrorOk; } - //! Creates a new `LabelNode`. - ASMJIT_API LabelNode* newLabelNode() noexcept; - //! Creates a new `AlignNode`. - ASMJIT_API AlignNode* newAlignNode(uint32_t alignMode, uint32_t alignment) noexcept; - //! Creates a new `EmbedDataNode`. - ASMJIT_API EmbedDataNode* newEmbedDataNode(const void* data, uint32_t size) noexcept; - //! Creates a new `ConstPoolNode`. - ASMJIT_API ConstPoolNode* newConstPoolNode() noexcept; - //! Creates a new `CommentNode`. - ASMJIT_API CommentNode* newCommentNode(const char* data, size_t size) noexcept; - - ASMJIT_API InstNode* newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0) noexcept; - ASMJIT_API InstNode* newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1) noexcept; - ASMJIT_API InstNode* newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept; - ASMJIT_API InstNode* newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept; - ASMJIT_API InstNode* newInstNodeRaw(uint32_t instId, uint32_t instOptions, uint32_t opCount) noexcept; + //! Creates a new \ref InstNode. + ASMJIT_API Error _newInstNode(InstNode** out, uint32_t instId, uint32_t instOptions, uint32_t opCount); + //! Creates a new \ref LabelNode. + ASMJIT_API Error _newLabelNode(LabelNode** out); + //! Creates a new \ref AlignNode. + ASMJIT_API Error _newAlignNode(AlignNode** out, uint32_t alignMode, uint32_t alignment); + //! Creates a new \ref EmbedDataNode. + ASMJIT_API Error _newEmbedDataNode(EmbedDataNode** out, uint32_t typeId, const void* data, size_t itemCount, size_t repeatCount = 1); + //! Creates a new \ref ConstPoolNode. + ASMJIT_API Error _newConstPoolNode(ConstPoolNode** out); + //! Creates a new \ref CommentNode. + ASMJIT_API Error _newCommentNode(CommentNode** out, const char* data, size_t size); //! Adds `node` after the current and sets the current node to the given `node`. ASMJIT_API BaseNode* addNode(BaseNode* node) noexcept; @@ -173,7 +180,9 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! added they are always added after the cursor and the cursor is changed //! to be that newly added node. Use `setCursor()` to change where new nodes //! are inserted. - inline BaseNode* cursor() const noexcept { return _cursor; } + inline BaseNode* cursor() const noexcept { + return _cursor; + } //! Sets the current node to `node` and return the previous one. ASMJIT_API BaseNode* setCursor(BaseNode* node) noexcept; @@ -182,7 +191,9 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! //! Only use this function if you are concerned about performance and want //! this inlined (for example if you set the cursor in a loop, etc...). - inline void _setCursor(BaseNode* node) noexcept { _cursor = node; } + inline void _setCursor(BaseNode* node) noexcept { + _cursor = node; + } //! \} @@ -193,7 +204,9 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! //! \note If a section of some id is not associated with the Builder/Compiler //! it would be null, so always check for nulls if you iterate over the vector. - inline const ZoneVector& sectionNodes() const noexcept { return _sectionNodes; } + inline const ZoneVector& sectionNodes() const noexcept { + return _sectionNodes; + } //! Tests whether the `SectionNode` of the given `sectionId` was registered. inline bool hasRegisteredSectionNode(uint32_t sectionId) const noexcept { @@ -205,7 +218,7 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! \remarks This function will either get the existing `SectionNode` or create //! it in case it wasn't created before. You can check whether a section has a //! registered `SectionNode` by using `BaseBuilder::hasRegisteredSectionNode()`. - ASMJIT_API Error sectionNodeOf(SectionNode** pOut, uint32_t sectionId) noexcept; + ASMJIT_API Error sectionNodeOf(SectionNode** out, uint32_t sectionId); ASMJIT_API Error section(Section* section) override; @@ -221,7 +234,7 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! \name Label Management //! \{ - //! Returns a vector of LabelNode nodes. + //! Returns a vector of \ref LabelNode nodes. //! //! \note If a label of some id is not associated with the Builder/Compiler //! it would be null, so always check for nulls if you iterate over the vector. @@ -237,24 +250,24 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { return hasRegisteredLabelNode(label.id()); } - //! Gets or creates a `LabelNode` that matches the given `labelId`. + //! Gets or creates a \ref LabelNode that matches the given `labelId`. //! //! \remarks This function will either get the existing `LabelNode` or create //! it in case it wasn't created before. You can check whether a label has a - //! registered `LabelNode` by using `BaseBuilder::hasRegisteredLabelNode()`. - ASMJIT_API Error labelNodeOf(LabelNode** pOut, uint32_t labelId) noexcept; + //! registered `LabelNode` by calling \ref BaseBuilder::hasRegisteredLabelNode(). + ASMJIT_API Error labelNodeOf(LabelNode** out, uint32_t labelId); //! \overload - inline Error labelNodeOf(LabelNode** pOut, const Label& label) noexcept { - return labelNodeOf(pOut, label.id()); + inline Error labelNodeOf(LabelNode** out, const Label& label) { + return labelNodeOf(out, label.id()); } - //! Registers this label node [Internal]. + //! Registers this \ref LabelNode (internal). //! //! This function is used internally to register a newly created `LabelNode` - //! with this instance of Builder/Compiler. Use `labelNodeOf()` functions to - //! get back `LabelNode` from a label or its identifier. - ASMJIT_API Error registerLabelNode(LabelNode* node) noexcept; + //! with this instance of Builder/Compiler. Use \ref labelNodeOf() functions + //! to get back \ref LabelNode from a label or its identifier. + ASMJIT_API Error registerLabelNode(LabelNode* node); ASMJIT_API Label newLabel() override; ASMJIT_API Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, uint32_t type = Label::kTypeGlobal, uint32_t parentId = Globals::kInvalidId) override; @@ -284,12 +297,14 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { inline T* newPassT(Args&&... args) noexcept { return _codeZone.newT(std::forward(args)...); } template - inline Error addPassT() noexcept { return addPass(newPassT()); } + inline Error addPassT() { return addPass(newPassT()); } template - inline Error addPassT(Args&&... args) noexcept { return addPass(newPassT(std::forward(args)...)); } + inline Error addPassT(Args&&... args) { return addPass(newPassT(std::forward(args)...)); } //! Returns `Pass` by name. + //! + //! If the pass having the given `name` doesn't exist `nullptr` is returned. ASMJIT_API Pass* passByName(const char* name) const noexcept; //! Adds `pass` to the list of passes. ASMJIT_API Error addPass(Pass* pass) noexcept; @@ -304,8 +319,7 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! \name Emit //! \{ - ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) override; - ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) override; + ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override; //! \} @@ -319,11 +333,13 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! \name Embed //! \{ - ASMJIT_API Error embed(const void* data, uint32_t dataSize) override; - ASMJIT_API Error embedLabel(const Label& label) override; - ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) override; + ASMJIT_API Error embed(const void* data, size_t dataSize) override; + ASMJIT_API Error embedDataArray(uint32_t typeId, const void* data, size_t count, size_t repeat = 1) override; ASMJIT_API Error embedConstPool(const Label& label, const ConstPool& pool) override; + ASMJIT_API Error embedLabel(const Label& label, size_t dataSize = 0) override; + ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, size_t dataSize = 0) override; + //! \} //! \name Comment @@ -341,16 +357,7 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! Although not explicitly required the emitter will most probably be of //! Assembler type. The reason is that there is no known use of serializing //! nodes held by Builder/Compiler into another Builder-like emitter. - ASMJIT_API Error serialize(BaseEmitter* dst); - - //! \} - - //! \name Logging - //! \{ - -#ifndef ASMJIT_NO_LOGGING - ASMJIT_API Error dump(String& sb, uint32_t flags = 0) const noexcept; -#endif + ASMJIT_API Error serializeTo(BaseEmitter* dst); //! \} @@ -361,6 +368,20 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { ASMJIT_API Error onDetach(CodeHolder* code) noexcept override; //! \} + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use serializeTo() instead, serialize() is now also an instruction.") + inline Error serialize(BaseEmitter* dst) { + return serializeTo(dst); + } + +#ifndef ASMJIT_NO_LOGGING + ASMJIT_DEPRECATED("Use Formatter::formatNodeList(sb, formatFlags, builder)") + inline Error dump(String& sb, uint32_t formatFlags = 0) const noexcept { + return Formatter::formatNodeList(sb, formatFlags, this); + } +#endif // !ASMJIT_NO_LOGGING +#endif // !ASMJIT_NO_DEPRECATED }; // ============================================================================ @@ -369,11 +390,11 @@ class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter { //! Base node. //! -//! Every node represents a building-block used by `BaseBuilder`. It can be -//! instruction, data, label, comment, directive, or any other high-level +//! Every node represents a building-block used by \ref BaseBuilder. It can +//! be instruction, data, label, comment, directive, or any other high-level //! representation that can be transformed to the building blocks mentioned. -//! Every class that inherits `BaseBuilder` can define its own nodes that it -//! can lower to basic nodes. +//! Every class that inherits \ref BaseBuilder can define its own high-level +//! nodes that can be later lowered to basic nodes like instructions. class BaseNode { public: ASMJIT_NONCOPYABLE(BaseNode) @@ -385,7 +406,7 @@ class BaseNode { //! Next node. BaseNode* _next; }; - //! Links (previous and next nodes). + //! Links (an alternative view to previous and next nodes). BaseNode* _links[2]; }; @@ -401,6 +422,7 @@ class BaseNode { uint8_t _reserved1; }; + //! Data used by \ref InstNode. struct InstData { //! Node type, see \ref NodeType. uint8_t _nodeType; @@ -412,6 +434,19 @@ class BaseNode { uint8_t _opCapacity; }; + //! Data used by \ref EmbedDataNode. + struct EmbedData { + //! Node type, see \ref NodeType. + uint8_t _nodeType; + //! Node flags, see \ref Flags. + uint8_t _nodeFlags; + //! Type id, see \ref Type::Id. + uint8_t _typeId; + //! Size of `_typeId`. + uint8_t _typeSize; + }; + + //! Data used by \ref SentinelNode. struct SentinelData { //! Node type, see \ref NodeType. uint8_t _nodeType; @@ -423,9 +458,15 @@ class BaseNode { uint8_t _reserved1; }; + //! Data that can have different meaning dependning on \ref NodeType. union { + //! Data useful by any node type. AnyData _any; + //! Data specific to \ref InstNode. InstData _inst; + //! Data specific to \ref EmbedDataNode. + EmbedData _embed; + //! Data specific to \ref SentinelNode. SentinelData _sentinel; }; @@ -434,7 +475,9 @@ class BaseNode { //! Value reserved for AsmJit users never touched by AsmJit itself. union { + //! User data as 64-bit integer. uint64_t _userDataU64; + //! User data as pointer. void* _userDataPtr; }; @@ -451,54 +494,66 @@ class BaseNode { // [BaseBuilder] - //! Node is `InstNode` or `InstExNode`. + //! Node is \ref InstNode or \ref InstExNode. kNodeInst = 1, - //! Node is `SectionNode`. + //! Node is \ref SectionNode. kNodeSection = 2, - //! Node is `LabelNode`. + //! Node is \ref LabelNode. kNodeLabel = 3, - //! Node is `AlignNode`. + //! Node is \ref AlignNode. kNodeAlign = 4, - //! Node is `EmbedDataNode`. + //! Node is \ref EmbedDataNode. kNodeEmbedData = 5, - //! Node is `EmbedLabelNode`. + //! Node is \ref EmbedLabelNode. kNodeEmbedLabel = 6, - //! Node is `EmbedLabelDeltaNode`. + //! Node is \ref EmbedLabelDeltaNode. kNodeEmbedLabelDelta = 7, - //! Node is `ConstPoolNode`. + //! Node is \ref ConstPoolNode. kNodeConstPool = 8, - //! Node is `CommentNode`. + //! Node is \ref CommentNode. kNodeComment = 9, - //! Node is `SentinelNode`. + //! Node is \ref SentinelNode. kNodeSentinel = 10, // [BaseCompiler] - //! Node is `JumpNode` (acts as InstNode). + //! Node is \ref JumpNode (acts as InstNode). kNodeJump = 15, - //! Node is `FuncNode` (acts as LabelNode). + //! Node is \ref FuncNode (acts as LabelNode). kNodeFunc = 16, - //! Node is `FuncRetNode` (acts as InstNode). + //! Node is \ref FuncRetNode (acts as InstNode). kNodeFuncRet = 17, - //! Node is `FuncCallNode` (acts as InstNode). - kNodeFuncCall = 18, + //! Node is \ref InvokeNode (acts as InstNode). + kNodeInvoke = 18, // [UserDefined] //! First id of a user-defined node. - kNodeUser = 32 + kNodeUser = 32, + +#ifndef ASMJIT_NO_DEPRECATED + kNodeFuncCall = kNodeInvoke +#endif // !ASMJIT_NO_DEPRECATED }; //! Node flags, specify what the node is and/or does. enum Flags : uint32_t { - kFlagIsCode = 0x01u, //!< Node is code that can be executed (instruction, label, align, etc...). - kFlagIsData = 0x02u, //!< Node is data that cannot be executed (data, const-pool, etc...). - kFlagIsInformative = 0x04u, //!< Node is informative, can be removed and ignored. - kFlagIsRemovable = 0x08u, //!< Node can be safely removed if unreachable. - kFlagHasNoEffect = 0x10u, //!< Node does nothing when executed (label, align, explicit nop). - kFlagActsAsInst = 0x20u, //!< Node is an instruction or acts as it. - kFlagActsAsLabel = 0x40u, //!< Node is a label or acts as it. - kFlagIsActive = 0x80u //!< Node is active (part of the code). + //! Node is code that can be executed (instruction, label, align, etc...). + kFlagIsCode = 0x01u, + //! Node is data that cannot be executed (data, const-pool, etc...). + kFlagIsData = 0x02u, + //! Node is informative, can be removed and ignored. + kFlagIsInformative = 0x04u, + //! Node can be safely removed if unreachable. + kFlagIsRemovable = 0x08u, + //! Node does nothing when executed (label, align, explicit nop). + kFlagHasNoEffect = 0x10u, + //! Node is an instruction or acts as it. + kFlagActsAsInst = 0x20u, + //! Node is a label or acts as it. + kFlagActsAsLabel = 0x40u, + //! Node is active (part of the code). + kFlagIsActive = 0x80u }; //! \name Construction & Destruction @@ -572,8 +627,13 @@ class BaseNode { inline bool isFunc() const noexcept { return type() == kNodeFunc; } //! Tests whether this node is `FuncRetNode`. inline bool isFuncRet() const noexcept { return type() == kNodeFuncRet; } - //! Tests whether this node is `FuncCallNode`. - inline bool isFuncCall() const noexcept { return type() == kNodeFuncCall; } + //! Tests whether this node is `InvokeNode`. + inline bool isInvoke() const noexcept { return type() == kNodeInvoke; } + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use isInvoke") + inline bool isFuncCall() const noexcept { return isInvoke(); } +#endif // !ASMJIT_NO_DEPRECATED //! Returns the node flags, see \ref Flags. inline uint32_t flags() const noexcept { return _any._nodeFlags; } @@ -697,12 +757,13 @@ class InstNode : public BaseNode { _inst._opCount = uint8_t(opCount); } + //! \cond INTERNAL //! Reset all built-in operands, including `extraReg`. inline void _resetOps() noexcept { _baseInst.resetExtraReg(); - for (uint32_t i = 0, count = opCapacity(); i < count; i++) - _opArray[i].reset(); + resetOpRange(0, opCapacity()); } + //! \endcond //! \} @@ -739,12 +800,12 @@ class InstNode : public BaseNode { //! Resets extra register operand. inline void resetExtraReg() noexcept { _baseInst.resetExtraReg(); } - //! Returns operands count. + //! Returns operand count. inline uint32_t opCount() const noexcept { return _inst._opCount; } - //! Returns operands capacity. + //! Returns operand capacity. inline uint32_t opCapacity() const noexcept { return _inst._opCapacity; } - //! Sets operands count. + //! Sets operand count. inline void setOpCount(uint32_t opCount) noexcept { _inst._opCount = uint8_t(opCount); } //! Returns operands array. @@ -752,27 +813,32 @@ class InstNode : public BaseNode { //! Returns operands array (const). inline const Operand* operands() const noexcept { return (const Operand*)_opArray; } - inline Operand& opType(uint32_t index) noexcept { + //! Returns operand at the given `index`. + inline Operand& op(uint32_t index) noexcept { ASMJIT_ASSERT(index < opCapacity()); return _opArray[index].as(); } - inline const Operand& opType(uint32_t index) const noexcept { + //! Returns operand at the given `index` (const). + inline const Operand& op(uint32_t index) const noexcept { ASMJIT_ASSERT(index < opCapacity()); return _opArray[index].as(); } + //! Sets operand at the given `index` to `op`. inline void setOp(uint32_t index, const Operand_& op) noexcept { ASMJIT_ASSERT(index < opCapacity()); _opArray[index].copyFrom(op); } + //! Resets operand at the given `index` to none. inline void resetOp(uint32_t index) noexcept { ASMJIT_ASSERT(index < opCapacity()); _opArray[index].reset(); } - inline void resetOps(uint32_t start, uint32_t end) noexcept { + //! Resets operands at `[start, end)` range. + inline void resetOpRange(uint32_t start, uint32_t end) noexcept { for (uint32_t i = start; i < end; i++) _opArray[i].reset(); } @@ -816,6 +882,7 @@ class InstNode : public BaseNode { //! \name Rewriting //! \{ + //! \cond INTERNAL inline uint32_t* _getRewriteArray() noexcept { return &_baseInst._extraReg._id; } inline const uint32_t* _getRewriteArray() const noexcept { return &_baseInst._extraReg._id; } @@ -833,12 +900,14 @@ class InstNode : public BaseNode { uint32_t* array = _getRewriteArray(); array[index] = id; } + //! \endcond //! \} //! \name Static Functions //! \{ + //! \cond INTERNAL static inline uint32_t capacityOfOpCount(uint32_t opCount) noexcept { return opCount <= kBaseOpCapacity ? kBaseOpCapacity : Globals::kMaxOpCount; } @@ -847,6 +916,7 @@ class InstNode : public BaseNode { size_t base = sizeof(InstNode) - kBaseOpCapacity * sizeof(Operand); return base + opCapacity * sizeof(Operand); } + //! \endcond //! \} }; @@ -855,7 +925,7 @@ class InstNode : public BaseNode { // [asmjit::InstExNode] // ============================================================================ -//! Instruction node with maximum number of operands.. +//! Instruction node with maximum number of operands. //! //! This node is created automatically by Builder/Compiler in case that the //! required number of operands exceeds the default capacity of `InstNode`. @@ -925,27 +995,33 @@ class LabelNode : public BaseNode { public: ASMJIT_NONCOPYABLE(LabelNode) - uint32_t _id; + //! Label identifier. + uint32_t _labelId; //! \name Construction & Destruction //! \{ //! Creates a new `LabelNode` instance. - inline LabelNode(BaseBuilder* cb, uint32_t id = 0) noexcept + inline LabelNode(BaseBuilder* cb, uint32_t labelId = 0) noexcept : BaseNode(cb, kNodeLabel, kFlagHasNoEffect | kFlagActsAsLabel), - _id(id) {} + _labelId(labelId) {} //! \} //! \name Accessors //! \{ + //! Returns \ref Label representation of the \ref LabelNode. + inline Label label() const noexcept { return Label(_labelId); } //! Returns the id of the label. - inline uint32_t id() const noexcept { return _id; } - //! Returns the label as `Label` operand. - inline Label label() const noexcept { return Label(_id); } + inline uint32_t labelId() const noexcept { return _labelId; } //! \} + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use labelId() instead") + inline uint32_t id() const noexcept { return labelId(); } +#endif // !ASMJIT_NO_DEPRECATED }; // ============================================================================ @@ -1005,37 +1081,28 @@ class EmbedDataNode : public BaseNode { ASMJIT_NONCOPYABLE(EmbedDataNode) enum : uint32_t { - kInlineBufferSize = uint32_t(64 - sizeof(BaseNode) - 4) + kInlineBufferSize = 128 - (sizeof(BaseNode) + sizeof(size_t) * 2) }; + size_t _itemCount; + size_t _repeatCount; + union { - struct { - //! Embedded data buffer. - uint8_t _buf[kInlineBufferSize]; - //! Size of the data. - uint32_t _size; - }; - struct { - //! Pointer to external data. - uint8_t* _externalPtr; - }; + uint8_t* _externalData; + uint8_t _inlineData[kInlineBufferSize]; }; //! \name Construction & Destruction //! \{ //! Creates a new `EmbedDataNode` instance. - inline EmbedDataNode(BaseBuilder* cb, void* data, uint32_t size) noexcept - : BaseNode(cb, kNodeEmbedData, kFlagIsData) { - - if (size <= kInlineBufferSize) { - if (data) - memcpy(_buf, data, size); - } - else { - _externalPtr = static_cast(data); - } - _size = size; + inline EmbedDataNode(BaseBuilder* cb) noexcept + : BaseNode(cb, kNodeEmbedData, kFlagIsData), + _itemCount(0), + _repeatCount(0) { + _embed._typeId = uint8_t(Type::kIdU8), + _embed._typeSize = uint8_t(1); + memset(_inlineData, 0, kInlineBufferSize); } //! \} @@ -1043,10 +1110,32 @@ class EmbedDataNode : public BaseNode { //! \name Accessors //! \{ - //! Returns pointer to the data. - inline uint8_t* data() const noexcept { return _size <= kInlineBufferSize ? const_cast(_buf) : _externalPtr; } - //! Returns size of the data. - inline uint32_t size() const noexcept { return _size; } + //! Returns \ref Type::Id of the data. + inline uint32_t typeId() const noexcept { return _embed._typeId; } + //! Returns the size of a single data element. + inline uint32_t typeSize() const noexcept { return _embed._typeSize; } + + //! Returns a pointer to the data casted to `uint8_t`. + inline uint8_t* data() const noexcept { + return dataSize() <= kInlineBufferSize ? const_cast(_inlineData) : _externalData; + } + + //! Returns a pointer to the data casted to `T`. + template + inline T* dataAs() const noexcept { return reinterpret_cast(data()); } + + //! Returns the number of (typed) items in the array. + inline size_t itemCount() const noexcept { return _itemCount; } + + //! Returns how many times the data is repeated (default 1). + //! + //! Repeated data is useful when defining constants for SIMD, for example. + inline size_t repeatCount() const noexcept { return _repeatCount; } + + //! Returns the size of the data, not considering the number of times it repeats. + //! + //! \note The returned value is the same as `typeSize() * itemCount()`. + inline size_t dataSize() const noexcept { return typeSize() * _itemCount; } //! \} }; @@ -1060,32 +1149,44 @@ class EmbedLabelNode : public BaseNode { public: ASMJIT_NONCOPYABLE(EmbedLabelNode) - uint32_t _id; + uint32_t _labelId; + uint32_t _dataSize; //! \name Construction & Destruction //! \{ //! Creates a new `EmbedLabelNode` instance. - inline EmbedLabelNode(BaseBuilder* cb, uint32_t id = 0) noexcept + inline EmbedLabelNode(BaseBuilder* cb, uint32_t labelId = 0, uint32_t dataSize = 0) noexcept : BaseNode(cb, kNodeEmbedLabel, kFlagIsData), - _id(id) {} + _labelId(labelId), + _dataSize(dataSize) {} //! \} //! \name Accessors //! \{ + //! Returns the label to embed as \ref Label operand. + inline Label label() const noexcept { return Label(_labelId); } //! Returns the id of the label. - inline uint32_t id() const noexcept { return _id; } - //! Sets the label id (use with caution, improper use can break a lot of things). - inline void setId(uint32_t id) noexcept { _id = id; } + inline uint32_t labelId() const noexcept { return _labelId; } - //! Returns the label as `Label` operand. - inline Label label() const noexcept { return Label(_id); } //! Sets the label id from `label` operand. - inline void setLabel(const Label& label) noexcept { setId(label.id()); } + inline void setLabel(const Label& label) noexcept { setLabelId(label.id()); } + //! Sets the label id (use with caution, improper use can break a lot of things). + inline void setLabelId(uint32_t labelId) noexcept { _labelId = labelId; } + + //! Returns the data size. + inline uint32_t dataSize() const noexcept { return _dataSize; } + //! Sets the data size. + inline void setDataSize(uint32_t dataSize) noexcept { _dataSize = dataSize; } //! \} + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use labelId() instead") + inline uint32_t id() const noexcept { return labelId(); } +#endif // !ASMJIT_NO_DEPRECATED }; // ============================================================================ @@ -1097,18 +1198,18 @@ class EmbedLabelDeltaNode : public BaseNode { public: ASMJIT_NONCOPYABLE(EmbedLabelDeltaNode) - uint32_t _id; - uint32_t _baseId; + uint32_t _labelId; + uint32_t _baseLabelId; uint32_t _dataSize; //! \name Construction & Destruction //! \{ //! Creates a new `EmbedLabelDeltaNode` instance. - inline EmbedLabelDeltaNode(BaseBuilder* cb, uint32_t id = 0, uint32_t baseId = 0, uint32_t dataSize = 0) noexcept + inline EmbedLabelDeltaNode(BaseBuilder* cb, uint32_t labelId = 0, uint32_t baseLabelId = 0, uint32_t dataSize = 0) noexcept : BaseNode(cb, kNodeEmbedLabelDelta, kFlagIsData), - _id(id), - _baseId(baseId), + _labelId(labelId), + _baseLabelId(baseLabelId), _dataSize(dataSize) {} //! \} @@ -1116,28 +1217,46 @@ class EmbedLabelDeltaNode : public BaseNode { //! \name Accessors //! \{ - //! Returns the id of the label. - inline uint32_t id() const noexcept { return _id; } - //! Sets the label id. - inline void setId(uint32_t id) noexcept { _id = id; } //! Returns the label as `Label` operand. - inline Label label() const noexcept { return Label(_id); } + inline Label label() const noexcept { return Label(_labelId); } + //! Returns the id of the label. + inline uint32_t labelId() const noexcept { return _labelId; } + //! Sets the label id from `label` operand. - inline void setLabel(const Label& label) noexcept { setId(label.id()); } + inline void setLabel(const Label& label) noexcept { setLabelId(label.id()); } + //! Sets the label id. + inline void setLabelId(uint32_t labelId) noexcept { _labelId = labelId; } - //! Returns the id of the base label. - inline uint32_t baseId() const noexcept { return _baseId; } - //! Sets the base label id. - inline void setBaseId(uint32_t baseId) noexcept { _baseId = baseId; } //! Returns the base label as `Label` operand. - inline Label baseLabel() const noexcept { return Label(_baseId); } + inline Label baseLabel() const noexcept { return Label(_baseLabelId); } + //! Returns the id of the base label. + inline uint32_t baseLabelId() const noexcept { return _baseLabelId; } + //! Sets the base label id from `label` operand. - inline void setBaseLabel(const Label& baseLabel) noexcept { setBaseId(baseLabel.id()); } + inline void setBaseLabel(const Label& baseLabel) noexcept { setBaseLabelId(baseLabel.id()); } + //! Sets the base label id. + inline void setBaseLabelId(uint32_t baseLabelId) noexcept { _baseLabelId = baseLabelId; } + //! Returns the size of the embedded label address. inline uint32_t dataSize() const noexcept { return _dataSize; } + //! Sets the size of the embedded label address. inline void setDataSize(uint32_t dataSize) noexcept { _dataSize = dataSize; } //! \} + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use labelId() instead") + inline uint32_t id() const noexcept { return labelId(); } + + ASMJIT_DEPRECATED("Use setLabelId() instead") + inline void setId(uint32_t id) noexcept { setLabelId(id); } + + ASMJIT_DEPRECATED("Use baseLabelId() instead") + inline uint32_t baseId() const noexcept { return baseLabelId(); } + + ASMJIT_DEPRECATED("Use setBaseLabelId() instead") + inline void setBaseId(uint32_t id) noexcept { setBaseLabelId(id); } +#endif // !ASMJIT_NO_DEPRECATED }; // ============================================================================ @@ -1229,7 +1348,9 @@ class SentinelNode : public BaseNode { //! Type of the sentinel (purery informative purpose). enum SentinelType : uint32_t { + //! Type of the sentinel is not known. kSentinelUnknown = 0u, + //! This is a sentinel used at the end of \ref FuncNode. kSentinelFuncEnd = 1u }; @@ -1248,8 +1369,15 @@ class SentinelNode : public BaseNode { //! \name Accessors //! \{ - inline uint32_t sentinelType() const noexcept { return _sentinel._sentinelType; } - inline void setSentinelType(uint32_t type) noexcept { _sentinel._sentinelType = uint8_t(type); } + //! Returns the type of the sentinel. + inline uint32_t sentinelType() const noexcept { + return _sentinel._sentinelType; + } + + //! Sets the type of the sentinel. + inline void setSentinelType(uint32_t type) noexcept { + _sentinel._sentinelType = uint8_t(type); + } //! \} }; @@ -1265,9 +1393,9 @@ class ASMJIT_VIRTAPI Pass { ASMJIT_NONCOPYABLE(Pass) //! BaseBuilder this pass is assigned to. - BaseBuilder* _cb; + BaseBuilder* _cb = nullptr; //! Name of the pass. - const char* _name; + const char* _name = nullptr; //! \name Construction & Destruction //! \{ @@ -1280,7 +1408,9 @@ class ASMJIT_VIRTAPI Pass { //! \name Accessors //! \{ + //! Returns \ref BaseBuilder associated with the pass. inline const BaseBuilder* cb() const noexcept { return _cb; } + //! Returns the name of the pass. inline const char* name() const noexcept { return _name; } //! \} @@ -1292,7 +1422,7 @@ class ASMJIT_VIRTAPI Pass { //! //! This is the only function that is called by the `BaseBuilder` to process //! the code. It passes `zone`, which will be reset after the `run()` finishes. - virtual Error run(Zone* zone, Logger* logger) noexcept = 0; + virtual Error run(Zone* zone, Logger* logger) = 0; //! \} }; diff --git a/libs/asmjit/src/asmjit/core/callconv.h b/libs/asmjit/src/asmjit/core/callconv.h deleted file mode 100644 index f3dc385..0000000 --- a/libs/asmjit/src/asmjit/core/callconv.h +++ /dev/null @@ -1,411 +0,0 @@ -// AsmJit - Machine code generation for C++ -// -// * Official AsmJit Home Page: https://asmjit.com -// * Official Github Repository: https://github.com/asmjit/asmjit -// -// Copyright (c) 2008-2020 The AsmJit Authors -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. - -#ifndef ASMJIT_CORE_CALLCONV_H_INCLUDED -#define ASMJIT_CORE_CALLCONV_H_INCLUDED - -#include "../core/arch.h" -#include "../core/operand.h" -#include "../core/support.h" - -ASMJIT_BEGIN_NAMESPACE - -//! \addtogroup asmjit_func -//! \{ - -// ============================================================================ -// [asmjit::CallConv] -// ============================================================================ - -//! Function calling convention. -//! -//! Function calling convention is a scheme that defines how function parameters -//! are passed and how function returns its result. AsmJit defines a variety of -//! architecture and OS specific calling conventions and also provides a compile -//! time detection to make the code-generation easier. -struct CallConv { - //! Calling convention id, see `Id`. - uint8_t _id; - //! Architecture id (see `ArchInfo::Id`). - uint8_t _archId; - //! Register assignment strategy. - uint8_t _strategy; - //! Flags. - uint8_t _flags; - - //! Red zone size (AMD64 == 128 bytes). - uint8_t _redZoneSize; - //! Spill zone size (WIN64 == 32 bytes). - uint8_t _spillZoneSize; - //! Natural stack alignment as defined by OS/ABI. - uint8_t _naturalStackAlignment; - uint8_t _reserved[1]; - - //! Mask of all passed registers, per group. - uint32_t _passedRegs[BaseReg::kGroupVirt]; - //! Mask of all preserved registers, per group. - uint32_t _preservedRegs[BaseReg::kGroupVirt]; - - //! Internal limits of AsmJit's CallConv. - enum Limits : uint32_t { - kMaxRegArgsPerGroup = 16 - }; - - //! Passed registers' order. - union RegOrder { - //! Passed registers, ordered. - uint8_t id[kMaxRegArgsPerGroup]; - uint32_t packed[(kMaxRegArgsPerGroup + 3) / 4]; - }; - - //! Passed registers' order, per group. - RegOrder _passedOrder[BaseReg::kGroupVirt]; - - //! Calling convention id. - enum Id : uint32_t { - //! None or invalid (can't be used). - kIdNone = 0, - - // ------------------------------------------------------------------------ - // [Universal] - // ------------------------------------------------------------------------ - - // TODO: To make this possible we need to know target ARCH and ABI. - - /* - - // Universal calling conventions are applicable to any target and are - // converted to target dependent conventions at runtime. The purpose of - // these conventions is to make using functions less target dependent. - - kIdCDecl = 1, - kIdStdCall = 2, - kIdFastCall = 3, - - //! AsmJit specific calling convention designed for calling functions - //! inside a multimedia code that don't use many registers internally, - //! but are long enough to be called and not inlined. These functions are - //! usually used to calculate trigonometric functions, logarithms, etc... - kIdLightCall2 = 10, - kIdLightCall3 = 11, - kIdLightCall4 = 12, - */ - - // ------------------------------------------------------------------------ - // [X86] - // ------------------------------------------------------------------------ - - //! X86 `__cdecl` calling convention (used by C runtime and libraries). - kIdX86CDecl = 16, - //! X86 `__stdcall` calling convention (used mostly by WinAPI). - kIdX86StdCall = 17, - //! X86 `__thiscall` calling convention (MSVC/Intel). - kIdX86MsThisCall = 18, - //! X86 `__fastcall` convention (MSVC/Intel). - kIdX86MsFastCall = 19, - //! X86 `__fastcall` convention (GCC and Clang). - kIdX86GccFastCall = 20, - //! X86 `regparm(1)` convention (GCC and Clang). - kIdX86GccRegParm1 = 21, - //! X86 `regparm(2)` convention (GCC and Clang). - kIdX86GccRegParm2 = 22, - //! X86 `regparm(3)` convention (GCC and Clang). - kIdX86GccRegParm3 = 23, - - kIdX86LightCall2 = 29, - kIdX86LightCall3 = 30, - kIdX86LightCall4 = 31, - - //! X64 calling convention - WIN64-ABI. - kIdX86Win64 = 32, - //! X64 calling convention - SystemV / AMD64-ABI. - kIdX86SysV64 = 33, - - kIdX64LightCall2 = 45, - kIdX64LightCall3 = 46, - kIdX64LightCall4 = 47, - - // ------------------------------------------------------------------------ - // [ARM] - // ------------------------------------------------------------------------ - - //! Legacy calling convention, floating point arguments are passed via GP registers. - kIdArm32SoftFP = 48, - //! Modern calling convention, uses VFP registers to pass floating point arguments. - kIdArm32HardFP = 49, - - // ------------------------------------------------------------------------ - // [Internal] - // ------------------------------------------------------------------------ - - //! \cond INTERNAL - - _kIdX86Start = 16, - _kIdX86End = 31, - - _kIdX64Start = 32, - _kIdX64End = 47, - - _kIdArmStart = 48, - _kIdArmEnd = 49, - - //! \endcond - - // ------------------------------------------------------------------------ - // [Host] - // ------------------------------------------------------------------------ - -#if defined(ASMJIT_DOCGEN) - - //! Default calling convention based on the current C++ compiler's settings. - //! - //! \note This should be always the same as `kIdHostCDecl`, but some - //! compilers allow to override the default calling convention. Overriding - //! is not detected at the moment. - kIdHost = DETECTED_AT_COMPILE_TIME, - - //! Default CDECL calling convention based on the current C++ compiler's settings. - kIdHostCDecl = DETECTED_AT_COMPILE_TIME, - - //! Default STDCALL calling convention based on the current C++ compiler's settings. - //! - //! \note If not defined by the host then it's the same as `kIdHostCDecl`. - kIdHostStdCall = DETECTED_AT_COMPILE_TIME, - - //! Compatibility for `__fastcall` calling convention. - //! - //! \note If not defined by the host then it's the same as `kIdHostCDecl`. - kIdHostFastCall = DETECTED_AT_COMPILE_TIME - -#elif ASMJIT_ARCH_X86 == 32 - - kIdHost = kIdX86CDecl, - kIdHostCDecl = kIdX86CDecl, - kIdHostStdCall = kIdX86StdCall, - -# if defined(_MSC_VER) - kIdHostFastCall = kIdX86MsFastCall, -# elif defined(__GNUC__) - kIdHostFastCall = kIdX86GccFastCall, -# else - kIdHostFastCall = kIdHost, -# endif - - kIdHostLightCall2 = kIdX86LightCall2, - kIdHostLightCall3 = kIdX86LightCall3, - kIdHostLightCall4 = kIdX86LightCall4 - -#elif ASMJIT_ARCH_X86 == 64 - -# if defined(_WIN32) - kIdHost = kIdX86Win64, -# else - kIdHost = kIdX86SysV64, -# endif - - kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host. - kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host. - kIdHostFastCall = kIdHost, // Doesn't exist, redirected to host. - - kIdHostLightCall2 = kIdX64LightCall2, - kIdHostLightCall3 = kIdX64LightCall3, - kIdHostLightCall4 = kIdX64LightCall4 - -#elif ASMJIT_ARCH_ARM == 32 - -# if defined(__SOFTFP__) - kIdHost = kIdArm32SoftFP, -# else - kIdHost = kIdArm32HardFP, -# endif - // These don't exist on ARM. - kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host. - kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host. - kIdHostFastCall = kIdHost // Doesn't exist, redirected to host. - -#else - - kIdHost = kIdNone, - kIdHostCDecl = kIdHost, - kIdHostStdCall = kIdHost, - kIdHostFastCall = kIdHost - -#endif - }; - - //! Strategy used to assign registers to function arguments. - //! - //! This is AsmJit specific. It basically describes how AsmJit should convert - //! the function arguments defined by `FuncSignature` into register IDs and - //! stack offsets. The default strategy `kStrategyDefault` assigns registers - //! and then stack whereas `kStrategyWin64` strategy does register shadowing - //! as defined by WIN64 calling convention - it applies to 64-bit calling - //! conventions only. - enum Strategy : uint32_t { - kStrategyDefault = 0, //!< Default register assignment strategy. - kStrategyWin64 = 1 //!< WIN64 specific register assignment strategy. - }; - - //! Calling convention flags. - enum Flags : uint32_t { - kFlagCalleePopsStack = 0x01, //!< Callee is responsible for cleaning up the stack. - kFlagPassFloatsByVec = 0x02, //!< Pass F32 and F64 arguments by VEC128 register. - kFlagVectorCall = 0x04, //!< This is a '__vectorcall' calling convention. - kFlagIndirectVecArgs = 0x08 //!< Pass vector arguments indirectly (as a pointer). - }; - - //! \name Construction & Destruction - //! \{ - - ASMJIT_API Error init(uint32_t ccId) noexcept; - - inline void reset() noexcept { - memset(this, 0, sizeof(*this)); - memset(_passedOrder, 0xFF, sizeof(_passedOrder)); - } - - //! \} - - //! \name Accessors - //! \{ - - //! Returns the calling convention id, see `Id`. - inline uint32_t id() const noexcept { return _id; } - //! Sets the calling convention id, see `Id`. - inline void setId(uint32_t id) noexcept { _id = uint8_t(id); } - - //! Returns the calling function architecture id. - inline uint32_t archId() const noexcept { return _archId; } - //! Sets the calling function architecture id. - inline void setArchType(uint32_t archId) noexcept { _archId = uint8_t(archId); } - - //! Returns the strategy used to assign registers to arguments, see `Strategy`. - inline uint32_t strategy() const noexcept { return _strategy; } - //! Sets the strategy used to assign registers to arguments, see `Strategy`. - inline void setStrategy(uint32_t strategy) noexcept { _strategy = uint8_t(strategy); } - - //! Tests whether the calling convention has the given `flag` set. - inline bool hasFlag(uint32_t flag) const noexcept { return (uint32_t(_flags) & flag) != 0; } - //! Returns the calling convention flags, see `Flags`. - inline uint32_t flags() const noexcept { return _flags; } - //! Adds the calling convention flags, see `Flags`. - inline void setFlags(uint32_t flag) noexcept { _flags = uint8_t(flag); }; - //! Adds the calling convention flags, see `Flags`. - inline void addFlags(uint32_t flags) noexcept { _flags = uint8_t(_flags | flags); }; - - //! Tests whether this calling convention specifies 'RedZone'. - inline bool hasRedZone() const noexcept { return _redZoneSize != 0; } - //! Tests whether this calling convention specifies 'SpillZone'. - inline bool hasSpillZone() const noexcept { return _spillZoneSize != 0; } - - //! Returns size of 'RedZone'. - inline uint32_t redZoneSize() const noexcept { return _redZoneSize; } - //! Returns size of 'SpillZone'. - inline uint32_t spillZoneSize() const noexcept { return _spillZoneSize; } - - //! Sets size of 'RedZone'. - inline void setRedZoneSize(uint32_t size) noexcept { _redZoneSize = uint8_t(size); } - //! Sets size of 'SpillZone'. - inline void setSpillZoneSize(uint32_t size) noexcept { _spillZoneSize = uint8_t(size); } - - //! Returns a natural stack alignment. - inline uint32_t naturalStackAlignment() const noexcept { return _naturalStackAlignment; } - //! Sets a natural stack alignment. - //! - //! This function can be used to override the default stack alignment in case - //! that you know that it's alignment is different. For example it allows to - //! implement custom calling conventions that guarantee higher stack alignment. - inline void setNaturalStackAlignment(uint32_t value) noexcept { _naturalStackAlignment = uint8_t(value); } - - inline const uint8_t* passedOrder(uint32_t group) const noexcept { - ASMJIT_ASSERT(group < BaseReg::kGroupVirt); - return _passedOrder[group].id; - } - - inline uint32_t passedRegs(uint32_t group) const noexcept { - ASMJIT_ASSERT(group < BaseReg::kGroupVirt); - return _passedRegs[group]; - } - - inline void _setPassedPacked(uint32_t group, uint32_t p0, uint32_t p1, uint32_t p2, uint32_t p3) noexcept { - ASMJIT_ASSERT(group < BaseReg::kGroupVirt); - - _passedOrder[group].packed[0] = p0; - _passedOrder[group].packed[1] = p1; - _passedOrder[group].packed[2] = p2; - _passedOrder[group].packed[3] = p3; - } - - inline void setPassedToNone(uint32_t group) noexcept { - ASMJIT_ASSERT(group < BaseReg::kGroupVirt); - - _setPassedPacked(group, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); - _passedRegs[group] = 0u; - } - - inline void setPassedOrder(uint32_t group, uint32_t a0, uint32_t a1 = 0xFF, uint32_t a2 = 0xFF, uint32_t a3 = 0xFF, uint32_t a4 = 0xFF, uint32_t a5 = 0xFF, uint32_t a6 = 0xFF, uint32_t a7 = 0xFF) noexcept { - ASMJIT_ASSERT(group < BaseReg::kGroupVirt); - - // NOTE: This should always be called with all arguments known at compile time, - // so even if it looks scary it should be translated into few instructions. - _setPassedPacked(group, Support::bytepack32_4x8(a0, a1, a2, a3), - Support::bytepack32_4x8(a4, a5, a6, a7), - 0xFFFFFFFFu, - 0xFFFFFFFFu); - - _passedRegs[group] = (a0 != 0xFF ? 1u << a0 : 0u) | - (a1 != 0xFF ? 1u << a1 : 0u) | - (a2 != 0xFF ? 1u << a2 : 0u) | - (a3 != 0xFF ? 1u << a3 : 0u) | - (a4 != 0xFF ? 1u << a4 : 0u) | - (a5 != 0xFF ? 1u << a5 : 0u) | - (a6 != 0xFF ? 1u << a6 : 0u) | - (a7 != 0xFF ? 1u << a7 : 0u) ; - } - - inline uint32_t preservedRegs(uint32_t group) const noexcept { - ASMJIT_ASSERT(group < BaseReg::kGroupVirt); - return _preservedRegs[group]; - } - - inline void setPreservedRegs(uint32_t group, uint32_t regs) noexcept { - ASMJIT_ASSERT(group < BaseReg::kGroupVirt); - _preservedRegs[group] = regs; - } - - //! \} - - //! \name Static Functions - //! \{ - - static inline bool isX86Family(uint32_t ccId) noexcept { return ccId >= _kIdX86Start && ccId <= _kIdX64End; } - static inline bool isArmFamily(uint32_t ccId) noexcept { return ccId >= _kIdArmStart && ccId <= _kIdArmEnd; } - - //! \} -}; - -//! \} - -ASMJIT_END_NAMESPACE - -#endif // ASMJIT_CORE_CALLCONV_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/codebuffer.h b/libs/asmjit/src/asmjit/core/codebuffer.h new file mode 100644 index 0000000..76c86b1 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/codebuffer.h @@ -0,0 +1,126 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_CODEBUFFER_H_INCLUDED +#define ASMJIT_CORE_CODEBUFFER_H_INCLUDED + +#include "../core/globals.h" + +ASMJIT_BEGIN_NAMESPACE + +//! \addtogroup asmjit_core +//! \{ + +// ============================================================================ +// [asmjit::CodeBuffer] +// ============================================================================ + +//! Code or data buffer. +struct CodeBuffer { + //! The content of the buffer (data). + uint8_t* _data; + //! Number of bytes of `data` used. + size_t _size; + //! Buffer capacity (in bytes). + size_t _capacity; + //! Buffer flags. + uint32_t _flags; + + //! Code buffer flags. + enum Flags : uint32_t { + //! Buffer is external (not allocated by asmjit). + kFlagIsExternal = 0x00000001u, + //! Buffer is fixed (cannot be reallocated). + kFlagIsFixed = 0x00000002u + }; + + //! \name Overloaded Operators + //! \{ + + //! Returns a referebce to the byte at the given `index`. + inline uint8_t& operator[](size_t index) noexcept { + ASMJIT_ASSERT(index < _size); + return _data[index]; + } + //! \overload + inline const uint8_t& operator[](size_t index) const noexcept { + ASMJIT_ASSERT(index < _size); + return _data[index]; + } + + //! \} + + //! \name Accessors + //! \{ + + //! Returns code buffer flags, see \ref Flags. + inline uint32_t flags() const noexcept { return _flags; } + //! Tests whether the code buffer has the given `flag` set. + inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; } + + //! Tests whether this code buffer has a fixed size. + //! + //! Fixed size means that the code buffer is fixed and cannot grow. + inline bool isFixed() const noexcept { return hasFlag(kFlagIsFixed); } + + //! Tests whether the data in this code buffer is external. + //! + //! External data can only be provided by users, it's never used by AsmJit. + inline bool isExternal() const noexcept { return hasFlag(kFlagIsExternal); } + + //! Tests whether the data in this code buffer is allocated (non-null). + inline bool isAllocated() const noexcept { return _data != nullptr; } + + //! Tests whether the code buffer is empty. + inline bool empty() const noexcept { return !_size; } + + //! Returns the size of the data. + inline size_t size() const noexcept { return _size; } + //! Returns the capacity of the data. + inline size_t capacity() const noexcept { return _capacity; } + + //! Returns the pointer to the data the buffer references. + inline uint8_t* data() noexcept { return _data; } + //! \overload + inline const uint8_t* data() const noexcept { return _data; } + + //! \} + + //! \name Iterators + //! \{ + + inline uint8_t* begin() noexcept { return _data; } + inline const uint8_t* begin() const noexcept { return _data; } + + inline uint8_t* end() noexcept { return _data + _size; } + inline const uint8_t* end() const noexcept { return _data + _size; } + + //! \} +}; + +//! \} + +ASMJIT_END_NAMESPACE + +#endif // ASMJIT_CORE_CODEBUFFER_H_INCLUDED + diff --git a/libs/asmjit/src/asmjit/core/codeholder.cpp b/libs/asmjit/src/asmjit/core/codeholder.cpp index 93c9a99..3c4154e 100644 --- a/libs/asmjit/src/asmjit/core/codeholder.cpp +++ b/libs/asmjit/src/asmjit/core/codeholder.cpp @@ -23,9 +23,13 @@ #include "../core/api-build_p.h" #include "../core/assembler.h" -#include "../core/logging.h" +#include "../core/codewriter_p.h" +#include "../core/logger.h" #include "../core/support.h" +#include +#include + ASMJIT_BEGIN_NAMESPACE // ============================================================================ @@ -77,13 +81,6 @@ class LabelLinkIterator { LabelLink* _link; }; -// ============================================================================ -// [asmjit::ErrorHandler] -// ============================================================================ - -ErrorHandler::ErrorHandler() noexcept {} -ErrorHandler::~ErrorHandler() noexcept {} - // ============================================================================ // [asmjit::CodeHolder - Utilities] // ============================================================================ @@ -97,8 +94,8 @@ static void CodeHolder_resetInternal(CodeHolder* self, uint32_t resetPolicy) noe self->detach(emitters[--i]); // Reset everything into its construction state. - self->_codeInfo.reset(); - self->_emitterOptions = 0; + self->_environment.reset(); + self->_baseAddress = Globals::kNoBaseAddress; self->_logger = nullptr; self->_errorHandler = nullptr; @@ -120,6 +117,7 @@ static void CodeHolder_resetInternal(CodeHolder* self, uint32_t resetPolicy) noe self->_relocations.reset(); self->_labelEntries.reset(); self->_sections.reset(); + self->_sectionsByOrder.reset(); self->_unresolvedLinkCount = 0; self->_addressTableSection = nullptr; @@ -129,20 +127,10 @@ static void CodeHolder_resetInternal(CodeHolder* self, uint32_t resetPolicy) noe self->_zone.reset(resetPolicy); } -static void CodeHolder_modifyEmitterOptions(CodeHolder* self, uint32_t clear, uint32_t add) noexcept { - uint32_t oldOpt = self->_emitterOptions; - uint32_t newOpt = (oldOpt & ~clear) | add; - - if (oldOpt == newOpt) - return; - - // Modify emitter options of `CodeHolder` itself. - self->_emitterOptions = newOpt; - - // Modify emitter options of all attached emitters. +static void CodeHolder_onSettingsUpdated(CodeHolder* self) noexcept { + // Notify all attached emitters about a settings update. for (BaseEmitter* emitter : self->emitters()) { - emitter->_emitterOptions = (emitter->_emitterOptions & ~clear) | add; - emitter->onUpdateGlobalInstOptions(); + emitter->onSettingsUpdated(); } } @@ -151,8 +139,8 @@ static void CodeHolder_modifyEmitterOptions(CodeHolder* self, uint32_t clear, ui // ============================================================================ CodeHolder::CodeHolder() noexcept - : _codeInfo(), - _emitterOptions(0), + : _environment(), + _baseAddress(Globals::kNoBaseAddress), _logger(nullptr), _errorHandler(nullptr), _zone(16384 - Zone::kBlockOverhead), @@ -177,7 +165,7 @@ inline void CodeHolder_setSectionDefaultName( section->_name.u32[1] = Support::bytepack32_4x8(uint8_t(c4), uint8_t(c5), uint8_t(c6), uint8_t(c7)); } -Error CodeHolder::init(const CodeInfo& info) noexcept { +Error CodeHolder::init(const Environment& environment, uint64_t baseAddress) noexcept { // Cannot reinitialize if it's locked or there is one or more emitter attached. if (isInitialized()) return DebugUtils::errored(kErrorAlreadyInitialized); @@ -185,14 +173,16 @@ Error CodeHolder::init(const CodeInfo& info) noexcept { // If we are just initializing there should be no emitters attached. ASMJIT_ASSERT(_emitters.empty()); - // Create the default section and insert it to the `_sections` array. - Error err = _sections.willGrow(&_allocator); + // Create a default section and insert it to the `_sections` array. + Error err = _sections.willGrow(&_allocator) | + _sectionsByOrder.willGrow(&_allocator); if (err == kErrorOk) { Section* section = _allocator.allocZeroedT
(); if (ASMJIT_LIKELY(section)) { section->_flags = Section::kFlagExec | Section::kFlagConst; CodeHolder_setSectionDefaultName(section, '.', 't', 'e', 'x', 't'); _sections.appendUnsafe(section); + _sectionsByOrder.appendUnsafe(section); } else { err = DebugUtils::errored(kErrorOutOfMemory); @@ -204,7 +194,8 @@ Error CodeHolder::init(const CodeInfo& info) noexcept { return err; } else { - _codeInfo = info; + _environment = environment; + _baseAddress = baseAddress; return kErrorOk; } } @@ -271,33 +262,27 @@ Error CodeHolder::detach(BaseEmitter* emitter) noexcept { } // ============================================================================ -// [asmjit::CodeHolder - Emitter Options] -// ============================================================================ - -static constexpr uint32_t kEmitterOptionsFilter = ~uint32_t(BaseEmitter::kOptionLoggingEnabled); - -void CodeHolder::addEmitterOptions(uint32_t options) noexcept { - CodeHolder_modifyEmitterOptions(this, 0, options & kEmitterOptionsFilter); -} - -void CodeHolder::clearEmitterOptions(uint32_t options) noexcept { - CodeHolder_modifyEmitterOptions(this, options & kEmitterOptionsFilter, 0); -} - -// ============================================================================ -// [asmjit::CodeHolder - Logging & Error Handling] +// [asmjit::CodeHolder - Logging] // ============================================================================ void CodeHolder::setLogger(Logger* logger) noexcept { #ifndef ASMJIT_NO_LOGGING _logger = logger; - uint32_t option = !logger ? uint32_t(0) : uint32_t(BaseEmitter::kOptionLoggingEnabled); - CodeHolder_modifyEmitterOptions(this, BaseEmitter::kOptionLoggingEnabled, option); + CodeHolder_onSettingsUpdated(this); #else DebugUtils::unused(logger); #endif } +// ============================================================================ +// [asmjit::CodeHolder - Error Handling] +// ============================================================================ + +void CodeHolder::setErrorHandler(ErrorHandler* errorHandler) noexcept { + _errorHandler = errorHandler; + CodeHolder_onSettingsUpdated(this); +} + // ============================================================================ // [asmjit::CodeHolder - Code Buffer] // ============================================================================ @@ -373,7 +358,9 @@ Error CodeHolder::growBuffer(CodeBuffer* cb, size_t n) noexcept { Error CodeHolder::reserveBuffer(CodeBuffer* cb, size_t n) noexcept { size_t capacity = cb->capacity(); - if (n <= capacity) return kErrorOk; + + if (n <= capacity) + return kErrorOk; if (cb->isFixed()) return DebugUtils::errored(kErrorTooLarge); @@ -385,7 +372,7 @@ Error CodeHolder::reserveBuffer(CodeBuffer* cb, size_t n) noexcept { // [asmjit::CodeHolder - Sections] // ============================================================================ -Error CodeHolder::newSection(Section** sectionOut, const char* name, size_t nameSize, uint32_t flags, uint32_t alignment) noexcept { +Error CodeHolder::newSection(Section** sectionOut, const char* name, size_t nameSize, uint32_t flags, uint32_t alignment, int32_t order) noexcept { *sectionOut = nullptr; if (nameSize == SIZE_MAX) @@ -405,16 +392,24 @@ Error CodeHolder::newSection(Section** sectionOut, const char* name, size_t name return DebugUtils::errored(kErrorTooManySections); ASMJIT_PROPAGATE(_sections.willGrow(&_allocator)); - Section* section = _allocator.allocZeroedT
(); + ASMJIT_PROPAGATE(_sectionsByOrder.willGrow(&_allocator)); + Section* section = _allocator.allocZeroedT
(); if (ASMJIT_UNLIKELY(!section)) return DebugUtils::errored(kErrorOutOfMemory); section->_id = sectionId; section->_flags = flags; section->_alignment = alignment; + section->_order = order; memcpy(section->_name.str, name, nameSize); + + Section** insertPosition = std::lower_bound(_sectionsByOrder.begin(), _sectionsByOrder.end(), section, [](const Section* a, const Section* b) { + return std::make_tuple(a->order(), a->id()) < std::make_tuple(b->order(), b->id()); + }); + _sections.appendUnsafe(section); + _sectionsByOrder.insertUnsafe((size_t)(insertPosition - _sectionsByOrder.data()), section); *sectionOut = section; return kErrorOk; @@ -427,7 +422,7 @@ Section* CodeHolder::sectionByName(const char* name, size_t nameSize) const noex // This could be also put in a hash-table similarly like we do with labels, // however it's questionable as the number of sections should be pretty low // in general. Create an issue if this becomes a problem. - if (ASMJIT_UNLIKELY(nameSize <= Globals::kMaxSectionNameSize)) { + if (nameSize <= Globals::kMaxSectionNameSize) { for (Section* section : _sections) if (memcmp(section->_name.str, name, nameSize) == 0 && section->_name.str[nameSize] == '\0') return section; @@ -440,7 +435,7 @@ Section* CodeHolder::ensureAddressTableSection() noexcept { if (_addressTableSection) return _addressTableSection; - newSection(&_addressTableSection, CodeHolder_addrTabName, sizeof(CodeHolder_addrTabName) - 1, 0, _codeInfo.gpSize()); + newSection(&_addressTableSection, CodeHolder_addrTabName, sizeof(CodeHolder_addrTabName) - 1, 0, _environment.registerSize(), std::numeric_limits::max()); return _addressTableSection; } @@ -458,7 +453,7 @@ Error CodeHolder::addAddressToAddressTable(uint64_t address) noexcept { return DebugUtils::errored(kErrorOutOfMemory); _addressTableEntries.insert(entry); - section->_virtualSize += _codeInfo.gpSize(); + section->_virtualSize += _environment.registerSize(); return kErrorOk; } @@ -470,20 +465,24 @@ Error CodeHolder::addAddressToAddressTable(uint64_t address) noexcept { //! Only used to lookup a label from `_namedLabels`. class LabelByName { public: - inline LabelByName(const char* key, size_t keySize, uint32_t hashCode) noexcept + inline LabelByName(const char* key, size_t keySize, uint32_t hashCode, uint32_t parentId) noexcept : _key(key), _keySize(uint32_t(keySize)), - _hashCode(hashCode) {} + _hashCode(hashCode), + _parentId(parentId) {} inline uint32_t hashCode() const noexcept { return _hashCode; } inline bool matches(const LabelEntry* entry) const noexcept { - return entry->nameSize() == _keySize && ::memcmp(entry->name(), _key, _keySize) == 0; + return entry->nameSize() == _keySize && + entry->parentId() == _parentId && + ::memcmp(entry->name(), _key, _keySize) == 0; } const char* _key; uint32_t _keySize; uint32_t _hashCode; + uint32_t _parentId; }; // Returns a hash of `name` and fixes `nameSize` if it's `SIZE_MAX`. @@ -509,20 +508,7 @@ static uint32_t CodeHolder_hashNameAndGetSize(const char* name, size_t& nameSize return hashCode; } -static bool CodeHolder_writeDisplacement(void* dst, int64_t displacement, uint32_t displacementSize) { - if (displacementSize == 4 && Support::isInt32(displacement)) { - Support::writeI32uLE(dst, int32_t(displacement)); - return true; - } - else if (displacementSize == 1 && Support::isInt8(displacement)) { - Support::writeI8(dst, int8_t(displacement)); - return true; - } - - return false; -} - -LabelLink* CodeHolder::newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel) noexcept { +LabelLink* CodeHolder::newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel, const OffsetFormat& format) noexcept { LabelLink* link = _allocator.allocT(); if (ASMJIT_UNLIKELY(!link)) return nullptr; @@ -533,13 +519,14 @@ LabelLink* CodeHolder::newLabelLink(LabelEntry* le, uint32_t sectionId, size_t o link->relocId = Globals::kInvalidId; link->offset = offset; link->rel = rel; + link->format = format; _unresolvedLinkCount++; return link; } Error CodeHolder::newLabelEntry(LabelEntry** entryOut) noexcept { - *entryOut = 0; + *entryOut = nullptr; uint32_t labelId = _labelEntries.size(); if (ASMJIT_UNLIKELY(labelId == Globals::kInvalidId)) @@ -561,7 +548,7 @@ Error CodeHolder::newLabelEntry(LabelEntry** entryOut) noexcept { } Error CodeHolder::newNamedLabelEntry(LabelEntry** entryOut, const char* name, size_t nameSize, uint32_t type, uint32_t parentId) noexcept { - *entryOut = 0; + *entryOut = nullptr; uint32_t hashCode = CodeHolder_hashNameAndGetSize(name, nameSize); if (ASMJIT_UNLIKELY(nameSize == 0)) @@ -579,9 +566,9 @@ Error CodeHolder::newNamedLabelEntry(LabelEntry** entryOut, const char* name, si break; case Label::kTypeGlobal: + case Label::kTypeExternal: if (ASMJIT_UNLIKELY(parentId != Globals::kInvalidId)) - return DebugUtils::errored(kErrorNonLocalLabelCantHaveParent); - + return DebugUtils::errored(kErrorNonLocalLabelCannotHaveParent); break; default: @@ -591,7 +578,7 @@ Error CodeHolder::newNamedLabelEntry(LabelEntry** entryOut, const char* name, si // Don't allow to insert duplicates. Local labels allow duplicates that have // different id, this is already accomplished by having a different hashes // between the same label names having different parent labels. - LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode)); + LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode, parentId)); if (ASMJIT_UNLIKELY(le)) return DebugUtils::errored(kErrorLabelAlreadyDefined); @@ -610,7 +597,7 @@ Error CodeHolder::newNamedLabelEntry(LabelEntry** entryOut, const char* name, si le->_hashCode = hashCode; le->_setId(labelId); le->_type = uint8_t(type); - le->_parentId = Globals::kInvalidId; + le->_parentId = parentId; le->_offset = 0; ASMJIT_PROPAGATE(le->_name.setData(&_zone, name, nameSize)); @@ -622,13 +609,14 @@ Error CodeHolder::newNamedLabelEntry(LabelEntry** entryOut, const char* name, si } uint32_t CodeHolder::labelIdByName(const char* name, size_t nameSize, uint32_t parentId) noexcept { - // TODO: Finalize - parent id is not used here? - DebugUtils::unused(parentId); - uint32_t hashCode = CodeHolder_hashNameAndGetSize(name, nameSize); - if (ASMJIT_UNLIKELY(!nameSize)) return 0; + if (ASMJIT_UNLIKELY(!nameSize)) + return 0; + + if (parentId != Globals::kInvalidId) + hashCode ^= parentId; - LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode)); + LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode, parentId)); return le ? le->id() : uint32_t(Globals::kInvalidId); } @@ -657,18 +645,16 @@ ASMJIT_API Error CodeHolder::resolveUnresolvedLinks() noexcept { ASMJIT_ASSERT(linkOffset < buf.size()); // Calculate the offset relative to the start of the virtual base. - uint64_t fromOffset = Support::addOverflow(fromSection->offset(), linkOffset, &of); + Support::FastUInt8 localOF = of; + uint64_t fromOffset = Support::addOverflow(fromSection->offset(), linkOffset, &localOF); int64_t displacement = int64_t(toOffset - fromOffset + uint64_t(int64_t(link->rel))); - if (!of) { + if (!localOF) { ASMJIT_ASSERT(size_t(linkOffset) < buf.size()); - - // Size of the value we are going to patch. Only BYTE/DWORD is allowed. - uint32_t displacementSize = buf._data[linkOffset]; - ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= displacementSize); + ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= link->format.valueSize()); // Overwrite a real displacement in the CodeBuffer. - if (CodeHolder_writeDisplacement(buf._data + linkOffset, displacement, displacementSize)) { + if (CodeWriterUtils::writeOffset(buf._data + linkOffset, displacement, link->format)) { link.resolveAndNext(this); continue; } @@ -731,11 +717,10 @@ ASMJIT_API Error CodeHolder::bindLabel(const Label& label, uint32_t toSectionId, int64_t displacement = int64_t(toOffset - uint64_t(linkOffset) + uint64_t(int64_t(link->rel))); // Size of the value we are going to patch. Only BYTE/DWORD is allowed. - uint32_t displacementSize = buf._data[linkOffset]; - ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= displacementSize); + ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= link->format.regionSize()); // Overwrite a real displacement in the CodeBuffer. - if (!CodeHolder_writeDisplacement(buf._data + linkOffset, displacement, displacementSize)) { + if (!CodeWriterUtils::writeOffset(buf._data + linkOffset, displacement, link->format)) { err = DebugUtils::errored(kErrorInvalidDisplacement); link.next(); continue; @@ -752,7 +737,7 @@ ASMJIT_API Error CodeHolder::bindLabel(const Label& label, uint32_t toSectionId, // [asmjit::BaseEmitter - Relocations] // ============================================================================ -Error CodeHolder::newRelocEntry(RelocEntry** dst, uint32_t relocType, uint32_t valueSize) noexcept { +Error CodeHolder::newRelocEntry(RelocEntry** dst, uint32_t relocType) noexcept { ASMJIT_PROPAGATE(_relocations.willGrow(&_allocator)); uint32_t relocId = _relocations.size(); @@ -765,7 +750,6 @@ Error CodeHolder::newRelocEntry(RelocEntry** dst, uint32_t relocType, uint32_t v re->_id = relocId; re->_relocType = uint8_t(relocType); - re->_valueSize = uint8_t(valueSize); re->_sourceSectionId = Globals::kInvalidId; re->_targetSectionId = Globals::kInvalidId; _relocations.appendUnsafe(re); @@ -857,7 +841,7 @@ static Error CodeHolder_evaluateExpression(CodeHolder* self, Expression* exp, ui Error CodeHolder::flatten() noexcept { uint64_t offset = 0; - for (Section* section : _sections) { + for (Section* section : _sectionsByOrder) { uint64_t realSize = section->realSize(); if (realSize) { uint64_t alignedOffset = Support::alignUp(offset, section->alignment()); @@ -875,7 +859,7 @@ Error CodeHolder::flatten() noexcept { // Now we know that we can assign offsets of all sections properly. Section* prev = nullptr; offset = 0; - for (Section* section : _sections) { + for (Section* section : _sectionsByOrder) { uint64_t realSize = section->realSize(); if (realSize) offset = Support::alignUp(offset, section->alignment()); @@ -896,7 +880,7 @@ size_t CodeHolder::codeSize() const noexcept { Support::FastUInt8 of = 0; uint64_t offset = 0; - for (Section* section : _sections) { + for (Section* section : _sectionsByOrder) { uint64_t realSize = section->realSize(); if (realSize) { @@ -906,7 +890,6 @@ size_t CodeHolder::codeSize() const noexcept { } } - // TODO: Not nice, maybe changing `codeSize()` to return `uint64_t` instead? if ((sizeof(uint64_t) > sizeof(size_t) && offset > SIZE_MAX) || of) return SIZE_MAX; @@ -918,8 +901,8 @@ Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept { if (ASMJIT_UNLIKELY(baseAddress == Globals::kNoBaseAddress)) return DebugUtils::errored(kErrorInvalidArgument); - _codeInfo.setBaseAddress(baseAddress); - uint32_t gpSize = _codeInfo.gpSize(); + _baseAddress = baseAddress; + uint32_t addressSize = _environment.registerSize(); Section* addressTableSection = _addressTableSection; uint32_t addressTableEntryCount = 0; @@ -948,13 +931,13 @@ Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept { uint64_t sourceOffset = re->sourceOffset(); // Make sure that the `RelocEntry` doesn't go out of bounds. - size_t regionSize = re->leadingSize() + re->valueSize() + re->trailingSize(); + size_t regionSize = re->format().regionSize(); if (ASMJIT_UNLIKELY(re->sourceOffset() >= sourceSection->bufferSize() || sourceSection->bufferSize() - size_t(re->sourceOffset()) < regionSize)) return DebugUtils::errored(kErrorInvalidRelocEntry); uint8_t* buffer = sourceSection->data(); - size_t valueOffset = size_t(re->sourceOffset()) + re->leadingSize(); + size_t valueOffset = size_t(re->sourceOffset()) + re->format().valueOffset(); switch (re->relocType()) { case RelocEntry::kTypeExpression: { @@ -980,13 +963,13 @@ Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept { case RelocEntry::kTypeAbsToRel: { value -= baseAddress + sectionOffset + sourceOffset + regionSize; - if (gpSize > 4 && !Support::isInt32(int64_t(value))) + if (addressSize > 4 && !Support::isInt32(int64_t(value))) return DebugUtils::errored(kErrorRelocOffsetOutOfRange); break; } case RelocEntry::kTypeX64AddressEntry: { - if (re->valueSize() != 4 || re->leadingSize() < 2) + if (re->format().valueSize() != 4 || valueOffset < 2) return DebugUtils::errored(kErrorInvalidRelocEntry); // First try whether a relative 32-bit displacement would work. @@ -1003,7 +986,7 @@ Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept { if (!atEntry->hasAssignedSlot()) atEntry->_slot = addressTableEntryCount++; - size_t atEntryIndex = size_t(atEntry->slot()) * gpSize; + size_t atEntryIndex = size_t(atEntry->slot()) * addressSize; uint64_t addrSrc = sectionOffset + sourceOffset + regionSize; uint64_t addrDst = addressTableSection->offset() + uint64_t(atEntryIndex); @@ -1040,7 +1023,7 @@ Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept { return DebugUtils::errored(kErrorInvalidRelocEntry); } - switch (re->valueSize()) { + switch (re->format().valueSize()) { case 1: Support::writeU8(buffer + valueOffset, uint32_t(value & 0xFFu)); break; @@ -1063,8 +1046,8 @@ Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept { } // Fixup the virtual size of the address table if it's the last section. - if (_sections.last() == addressTableSection) { - size_t addressTableSize = addressTableEntryCount * gpSize; + if (_sectionsByOrder.last() == addressTableSection) { + size_t addressTableSize = addressTableEntryCount * addressSize; addressTableSection->_buffer._size = addressTableSize; addressTableSection->_virtualSize = addressTableSize; } @@ -1072,7 +1055,7 @@ Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept { return kErrorOk; } -Error CodeHolder::copySectionData(void* dst, size_t dstSize, uint32_t sectionId, uint32_t options) noexcept { +Error CodeHolder::copySectionData(void* dst, size_t dstSize, uint32_t sectionId, uint32_t copyOptions) noexcept { if (ASMJIT_UNLIKELY(!isSectionValid(sectionId))) return DebugUtils::errored(kErrorInvalidSection); @@ -1084,7 +1067,7 @@ Error CodeHolder::copySectionData(void* dst, size_t dstSize, uint32_t sectionId, memcpy(dst, section->data(), bufferSize); - if (bufferSize < dstSize && (options & kCopyWithPadding)) { + if (bufferSize < dstSize && (copyOptions & kCopyPadSectionBuffer)) { size_t paddingSize = dstSize - bufferSize; memset(static_cast(dst) + bufferSize, 0, paddingSize); } @@ -1092,9 +1075,9 @@ Error CodeHolder::copySectionData(void* dst, size_t dstSize, uint32_t sectionId, return kErrorOk; } -Error CodeHolder::copyFlattenedData(void* dst, size_t dstSize, uint32_t options) noexcept { +Error CodeHolder::copyFlattenedData(void* dst, size_t dstSize, uint32_t copyOptions) noexcept { size_t end = 0; - for (Section* section : _sections) { + for (Section* section : _sectionsByOrder) { if (section->offset() > dstSize) return DebugUtils::errored(kErrorInvalidArgument); @@ -1108,7 +1091,7 @@ Error CodeHolder::copyFlattenedData(void* dst, size_t dstSize, uint32_t options) size_t paddingSize = 0; memcpy(dstTarget, section->data(), bufferSize); - if ((options & kCopyWithPadding) && bufferSize < section->virtualSize()) { + if ((copyOptions & kCopyPadSectionBuffer) && bufferSize < section->virtualSize()) { paddingSize = Support::min(dstSize - offset, size_t(section->virtualSize())) - bufferSize; memset(dstTarget + bufferSize, 0, paddingSize); } @@ -1116,10 +1099,52 @@ Error CodeHolder::copyFlattenedData(void* dst, size_t dstSize, uint32_t options) end = Support::max(end, offset + bufferSize + paddingSize); } - // TODO: `end` is not used atm, we need an option to also pad anything beyond - // the code in case that the destination was much larger (for example page-size). + if (end < dstSize && (copyOptions & kCopyPadTargetBuffer)) { + memset(static_cast(dst) + end, 0, dstSize - end); + } return kErrorOk; } +// ============================================================================ +// [asmjit::CodeHolder - Unit] +// ============================================================================ + +#if defined(ASMJIT_TEST) +UNIT(code_holder) { + CodeHolder code; + + INFO("Verifying CodeHolder::init()"); + Environment env; + env.init(Environment::kArchX86); + + code.init(env); + EXPECT(code.arch() == Environment::kArchX86); + + INFO("Verifying named labels"); + LabelEntry* le; + EXPECT(code.newNamedLabelEntry(&le, "NamedLabel", SIZE_MAX, Label::kTypeGlobal) == kErrorOk); + EXPECT(strcmp(le->name(), "NamedLabel") == 0); + EXPECT(code.labelIdByName("NamedLabel") == le->id()); + + INFO("Verifying section ordering"); + Section* section1; + EXPECT(code.newSection(§ion1, "high-priority", SIZE_MAX, 0, 1, -1) == kErrorOk); + EXPECT(code.sections()[1] == section1); + EXPECT(code.sectionsByOrder()[0] == section1); + + Section* section0; + EXPECT(code.newSection(§ion0, "higher-priority", SIZE_MAX, 0, 1, -2) == kErrorOk); + EXPECT(code.sections()[2] == section0); + EXPECT(code.sectionsByOrder()[0] == section0); + EXPECT(code.sectionsByOrder()[1] == section1); + + Section* section3; + EXPECT(code.newSection(§ion3, "low-priority", SIZE_MAX, 0, 1, 2) == kErrorOk); + EXPECT(code.sections()[3] == section3); + EXPECT(code.sectionsByOrder()[3] == section3); + +} +#endif + ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/codeholder.h b/libs/asmjit/src/asmjit/core/codeholder.h index 5f6a21d..06bf3f9 100644 --- a/libs/asmjit/src/asmjit/core/codeholder.h +++ b/libs/asmjit/src/asmjit/core/codeholder.h @@ -24,8 +24,10 @@ #ifndef ASMJIT_CORE_CODEHOLDER_H_INCLUDED #define ASMJIT_CORE_CODEHOLDER_H_INCLUDED -#include "../core/arch.h" +#include "../core/archtraits.h" +#include "../core/codebuffer.h" #include "../core/datatypes.h" +#include "../core/errorhandler.h" #include "../core/operand.h" #include "../core/string.h" #include "../core/support.h" @@ -56,126 +58,91 @@ class Logger; //! Align mode. enum AlignMode : uint32_t { - kAlignCode = 0, //!< Align executable code. - kAlignData = 1, //!< Align non-executable code. - kAlignZero = 2, //!< Align by a sequence of zeros. - kAlignCount = 3 //!< Count of alignment modes. + //! Align executable code. + kAlignCode = 0, + //! Align non-executable code. + kAlignData = 1, + //! Align by a sequence of zeros. + kAlignZero = 2, + //! Count of alignment modes. + kAlignCount = 3 }; // ============================================================================ -// [asmjit::ErrorHandler] +// [asmjit::Expression] // ============================================================================ -//! Error handler can be used to override the default behavior of error handling -//! available to all classes that inherit `BaseEmitter`. -//! -//! Override `ErrorHandler::handleError()` to implement your own error handler. -class ASMJIT_VIRTAPI ErrorHandler { -public: - ASMJIT_BASE_CLASS(ErrorHandler) - - // -------------------------------------------------------------------------- - // [Construction / Destruction] - // -------------------------------------------------------------------------- - - //! Creates a new `ErrorHandler` instance. - ASMJIT_API ErrorHandler() noexcept; - //! Destroys the `ErrorHandler` instance. - ASMJIT_API virtual ~ErrorHandler() noexcept; - - // -------------------------------------------------------------------------- - // [Handle Error] - // -------------------------------------------------------------------------- - - //! Error handler (must be reimplemented). - //! - //! Error handler is called after an error happened and before it's propagated - //! to the caller. There are multiple ways how the error handler can be used: - //! - //! 1. User-based error handling without throwing exception or using C's - //! `longjmp()`. This is for users that don't use exceptions and want - //! customized error handling. - //! - //! 2. Throwing an exception. AsmJit doesn't use exceptions and is completely - //! exception-safe, but you can throw exception from your error handler if - //! this way is the preferred way of handling errors in your project. - //! - //! 3. Using plain old C's `setjmp()` and `longjmp()`. Asmjit always puts - //! `BaseEmitter` to a consistent state before calling `handleError()` - //! so `longjmp()` can be used without any issues to cancel the code - //! generation if an error occurred. There is no difference between - //! exceptions and `longjmp()` from AsmJit's perspective, however, - //! never jump outside of `CodeHolder` and `BaseEmitter` scope as you - //! would leak memory. - virtual void handleError(Error err, const char* message, BaseEmitter* origin) = 0; -}; - -// ============================================================================ -// [asmjit::CodeBuffer] -// ============================================================================ +//! Expression node that can reference constants, labels, and another expressions. +struct Expression { + //! Operation type. + enum OpType : uint8_t { + //! Addition. + kOpAdd = 0, + //! Subtraction. + kOpSub = 1, + //! Multiplication + kOpMul = 2, + //! Logical left shift. + kOpSll = 3, + //! Logical right shift. + kOpSrl = 4, + //! Arithmetic right shift. + kOpSra = 5 + }; -//! Code or data buffer. -struct CodeBuffer { - //! The content of the buffer (data). - uint8_t* _data; - //! Number of bytes of `data` used. - size_t _size; - //! Buffer capacity (in bytes). - size_t _capacity; - //! Buffer flags. - uint32_t _flags; + //! Type of \ref Value. + enum ValueType : uint8_t { + //! No value or invalid. + kValueNone = 0, + //! Value is 64-bit unsigned integer (constant). + kValueConstant = 1, + //! Value is \ref LabelEntry, which references a \ref Label. + kValueLabel = 2, + //! Value is \ref Expression + kValueExpression = 3 + }; - enum Flags : uint32_t { - //! Buffer is external (not allocated by asmjit). - kFlagIsExternal = 0x00000001u, - //! Buffer is fixed (cannot be reallocated). - kFlagIsFixed = 0x00000002u + //! Expression value. + union Value { + //! Constant. + uint64_t constant; + //! Pointer to another expression. + Expression* expression; + //! Poitner to \ref LabelEntry. + LabelEntry* label; }; - //! \name Overloaded Operators - //! \{ + //! Operation type. + uint8_t opType; + //! Value types of \ref value. + uint8_t valueType[2]; + //! Reserved for future use, should be initialized to zero. + uint8_t reserved[5]; + //! Expression left and right values. + Value value[2]; - inline uint8_t& operator[](size_t index) noexcept { - ASMJIT_ASSERT(index < _size); - return _data[index]; - } + //! Resets the whole expression. + //! + //! Changes both values to \ref kValueNone. + inline void reset() noexcept { memset(this, 0, sizeof(*this)); } - inline const uint8_t& operator[](size_t index) const noexcept { - ASMJIT_ASSERT(index < _size); - return _data[index]; + //! Sets the value type at `index` to \ref kValueConstant and its content to `constant`. + inline void setValueAsConstant(size_t index, uint64_t constant) noexcept { + valueType[index] = kValueConstant; + value[index].constant = constant; } - //! \} - - //! \name Accessors - //! \{ - - inline uint32_t flags() const noexcept { return _flags; } - inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; } - - inline bool isAllocated() const noexcept { return _data != nullptr; } - inline bool isFixed() const noexcept { return hasFlag(kFlagIsFixed); } - inline bool isExternal() const noexcept { return hasFlag(kFlagIsExternal); } - - inline uint8_t* data() noexcept { return _data; } - inline const uint8_t* data() const noexcept { return _data; } - - inline bool empty() const noexcept { return !_size; } - inline size_t size() const noexcept { return _size; } - inline size_t capacity() const noexcept { return _capacity; } - - //! \} - - //! \name Iterators - //! \{ - - inline uint8_t* begin() noexcept { return _data; } - inline const uint8_t* begin() const noexcept { return _data; } - - inline uint8_t* end() noexcept { return _data + _size; } - inline const uint8_t* end() const noexcept { return _data + _size; } + //! Sets the value type at `index` to \ref kValueLabel and its content to `labelEntry`. + inline void setValueAsLabel(size_t index, LabelEntry* labelEntry) noexcept { + valueType[index] = kValueLabel; + value[index].label = labelEntry; + } - //! \} + //! Sets the value type at `index` to \ref kValueExpression and its content to `expression`. + inline void setValueAsExpression(size_t index, Expression* expression) noexcept { + valueType[index] = kValueLabel; + value[index].expression = expression; + } }; // ============================================================================ @@ -191,8 +158,8 @@ class Section { uint32_t _flags; //! Section alignment requirements (0 if no requirements). uint32_t _alignment; - //! Reserved for future use (padding). - uint32_t _reserved; + //! Order (lower value means higher priority). + int32_t _order; //! Offset of this section from base-address. uint64_t _offset; //! Virtual size of the section (zero initialized sections). @@ -204,31 +171,51 @@ class Section { //! Section flags. enum Flags : uint32_t { - kFlagExec = 0x00000001u, //!< Executable (.text sections). - kFlagConst = 0x00000002u, //!< Read-only (.text and .data sections). - kFlagZero = 0x00000004u, //!< Zero initialized by the loader (BSS). - kFlagInfo = 0x00000008u, //!< Info / comment flag. - kFlagImplicit = 0x80000000u //!< Section created implicitly and can be deleted by `Target`. + //! Executable (.text sections). + kFlagExec = 0x00000001u, + //! Read-only (.text and .data sections). + kFlagConst = 0x00000002u, + //! Zero initialized by the loader (BSS). + kFlagZero = 0x00000004u, + //! Info / comment flag. + kFlagInfo = 0x00000008u, + //! Section created implicitly and can be deleted by \ref Target. + kFlagImplicit = 0x80000000u }; //! \name Accessors //! \{ + //! Returns the section id. inline uint32_t id() const noexcept { return _id; } + //! Returns the section name, as a null terminated string. inline const char* name() const noexcept { return _name.str; } + //! Returns the section data. inline uint8_t* data() noexcept { return _buffer.data(); } + //! \overload inline const uint8_t* data() const noexcept { return _buffer.data(); } + //! Returns the section flags, see \ref Flags. inline uint32_t flags() const noexcept { return _flags; } + //! Tests whether the section has the given `flag`. inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; } + //! Adds `flags` to the section flags. inline void addFlags(uint32_t flags) noexcept { _flags |= flags; } + //! Removes `flags` from the section flags. inline void clearFlags(uint32_t flags) noexcept { _flags &= ~flags; } + //! Returns the minimum section alignment inline uint32_t alignment() const noexcept { return _alignment; } + //! Sets the minimum section alignment inline void setAlignment(uint32_t alignment) noexcept { _alignment = alignment; } + //! Returns the section order, which has a higher priority than section id. + inline int32_t order() const noexcept { return _order; } + + //! Returns the section offset, relative to base. inline uint64_t offset() const noexcept { return _offset; } + //! Set the section offset. inline void setOffset(uint64_t offset) noexcept { _offset = offset; } //! Returns the virtual size of the section. @@ -256,71 +243,220 @@ class Section { }; // ============================================================================ -// [asmjit::LabelLink] +// [asmjit::OffsetFormat] // ============================================================================ -//! Data structure used to link either unbound labels or cross-section links. -struct LabelLink { - //! Next link (single-linked list). - LabelLink* next; - //! Section id where the label is bound. - uint32_t sectionId; - //! Relocation id or Globals::kInvalidId. - uint32_t relocId; - //! Label offset relative to the start of the section. - size_t offset; - //! Inlined rel8/rel32. - intptr_t rel; +//! Provides information about formatting offsets, absolute addresses, or their +//! parts. Offset format is used by both \ref RelocEntry and \ref LabelLink. +//! +//! The illustration above describes the relation of region size and offset size. +//! Region size is the size of the whole unit whereas offset size is the size of +//! the unit that will be patched. +//! +//! ``` +//! +-> Code buffer | The subject of the relocation (region) | +//! | | (Word-Offset) (Word-Size) | +//! |xxxxxxxxxxxxxxx|................|*PATCHED*|................|xxxxxxxxxxxx-> +//! | | +//! [Word Offset points here]----+ +--- [WordOffset + WordSize] +//! ``` +//! +//! Once the offset word has been located it can be patched like this: +//! +//! ``` +//! |ImmDiscardLSB (discard LSB bits). +//! |.. +//! [0000000000000iiiiiiiiiiiiiiiiiDD] - Offset value (32-bit) +//! [000000000000000iiiiiiiiiiiiiiiii] - Offset value after discard LSB. +//! [00000000000iiiiiiiiiiiiiiiii0000] - Offset value shifted by ImmBitShift. +//! [xxxxxxxxxxxiiiiiiiiiiiiiiiiixxxx] - Patched word (32-bit) +//! |...............| +//! (ImmBitCount) +- ImmBitShift +//! ``` +struct OffsetFormat { + //! Type of the displacement. + uint8_t _type; + //! Encoding flags. + uint8_t _flags; + //! Size of the region (in bytes) containing the offset value, if the offset + //! value is part of an instruction, otherwise it would be the same as + //! `_valueSize`. + uint8_t _regionSize; + //! Size of the offset value, in bytes (1, 2, 4, or 8). + uint8_t _valueSize; + //! Offset of the offset value, in bytes, relative to the start of the region + //! or data. Value offset would be zero if both region size and value size are + //! equal. + uint8_t _valueOffset; + //! Size of the displacement immediate value in bits. + uint8_t _immBitCount; + //! Shift of the displacement immediate value in bits in the target word. + uint8_t _immBitShift; + //! Number of least significant bits to discard before writing the immediate + //! to the destination. All discarded bits must be zero otherwise the value + //! is invalid. + uint8_t _immDiscardLsb; + + //! Type of the displacement. + enum Type : uint8_t { + //! A value having `_immBitCount` bits and shifted by `_immBitShift`. + //! + //! This displacement type is sufficient for both X86/X64 and many other + //! architectures that store displacement as continuous bits within a machine + //! word. + kTypeCommon = 0, + //! AARCH64 ADR format of `[.|immlo:2|.....|immhi:19|.....]`. + kTypeAArch64_ADR, + //! AARCH64 ADRP format of `[.|immlo:2|.....|immhi:19|.....]` (4kB pages). + kTypeAArch64_ADRP, + + //! Count of displacement types. + kTypeCount + }; + + //! Returns the type of the displacement. + inline uint32_t type() const noexcept { return _type; } + + //! Returns flags. + inline uint32_t flags() const noexcept { return _flags; } + + //! Returns the size of the region/instruction where the displacement is encoded. + inline uint32_t regionSize() const noexcept { return _regionSize; } + + //! Returns the the offset of the word relative to the start of the region + //! where the displacement is. + inline uint32_t valueOffset() const noexcept { return _valueOffset; } + + //! Returns the size of the data-type (word) that contains the displacement, in bytes. + inline uint32_t valueSize() const noexcept { return _valueSize; } + //! Returns the count of bits of the displacement value in the data it's stored in. + inline uint32_t immBitCount() const noexcept { return _immBitCount; } + //! Returns the bit-shift of the displacement value in the data it's stored in. + inline uint32_t immBitShift() const noexcept { return _immBitShift; } + //! Returns the number of least significant bits of the displacement value, + //! that must be zero and that are not part of the encoded data. + inline uint32_t immDiscardLsb() const noexcept { return _immDiscardLsb; } + + //! Resets this offset format to a simple data value of `dataSize` bytes. + //! + //! The region will be the same size as data and immediate bits would correspond + //! to `dataSize * 8`. There will be no immediate bit shift or discarded bits. + inline void resetToDataValue(size_t dataSize) noexcept { + ASMJIT_ASSERT(dataSize <= 8u); + + _type = uint8_t(kTypeCommon); + _flags = uint8_t(0); + _regionSize = uint8_t(dataSize); + _valueSize = uint8_t(dataSize); + _valueOffset = uint8_t(0); + _immBitCount = uint8_t(dataSize * 8u); + _immBitShift = uint8_t(0); + _immDiscardLsb = uint8_t(0); + } + + inline void resetToImmValue(uint32_t type, size_t valueSize, uint32_t immBitShift, uint32_t immBitCount, uint32_t immDiscardLsb) noexcept { + ASMJIT_ASSERT(valueSize <= 8u); + ASMJIT_ASSERT(immBitShift < valueSize * 8u); + ASMJIT_ASSERT(immBitCount <= 64u); + ASMJIT_ASSERT(immDiscardLsb <= 64u); + + _type = uint8_t(type); + _flags = uint8_t(0); + _regionSize = uint8_t(valueSize); + _valueSize = uint8_t(valueSize); + _valueOffset = uint8_t(0); + _immBitCount = uint8_t(immBitCount); + _immBitShift = uint8_t(immBitShift); + _immDiscardLsb = uint8_t(immDiscardLsb); + } + + inline void setRegion(size_t regionSize, size_t valueOffset) noexcept { + _regionSize = uint8_t(regionSize); + _valueOffset = uint8_t(valueOffset); + } + + inline void setLeadingAndTrailingSize(size_t leadingSize, size_t trailingSize) noexcept { + _regionSize = uint8_t(leadingSize + trailingSize + _valueSize); + _valueOffset = uint8_t(leadingSize); + } }; // ============================================================================ -// [asmjit::Expression] +// [asmjit::RelocEntry] // ============================================================================ -struct Expression { - enum OpType : uint8_t { - kOpAdd = 0, - kOpSub = 1, - kOpMul = 2, - kOpSll = 3, - kOpSrl = 4, - kOpSra = 5 - }; +//! Relocation entry. +struct RelocEntry { + //! Relocation id. + uint32_t _id; + //! Type of the relocation. + uint32_t _relocType; + //! Format of the relocated value. + OffsetFormat _format; + //! Source section id. + uint32_t _sourceSectionId; + //! Target section id. + uint32_t _targetSectionId; + //! Source offset (relative to start of the section). + uint64_t _sourceOffset; + //! Payload (target offset, target address, expression, etc). + uint64_t _payload; - enum ValueType : uint8_t { - kValueNone = 0, - kValueConstant = 1, - kValueLabel = 2, - kValueExpression = 3 + //! Relocation type. + enum RelocType : uint32_t { + //! None/deleted (no relocation). + kTypeNone = 0, + //! Expression evaluation, `_payload` is pointer to `Expression`. + kTypeExpression = 1, + //! Relocate absolute to absolute. + kTypeAbsToAbs = 2, + //! Relocate relative to absolute. + kTypeRelToAbs = 3, + //! Relocate absolute to relative. + kTypeAbsToRel = 4, + //! Relocate absolute to relative or use trampoline. + kTypeX64AddressEntry = 5 }; - union Value { - uint64_t constant; - Expression* expression; - LabelEntry* label; - }; + //! \name Accessors + //! \{ - uint8_t opType; - uint8_t valueType[2]; - uint8_t reserved[5]; - Value value[2]; + inline uint32_t id() const noexcept { return _id; } - inline void reset() noexcept { memset(this, 0, sizeof(*this)); } + inline uint32_t relocType() const noexcept { return _relocType; } + inline const OffsetFormat& format() const noexcept { return _format; } - inline void setValueAsConstant(size_t index, uint64_t constant) noexcept { - valueType[index] = kValueConstant; - value[index].constant = constant; - } + inline uint32_t sourceSectionId() const noexcept { return _sourceSectionId; } + inline uint32_t targetSectionId() const noexcept { return _targetSectionId; } - inline void setValueAsLabel(size_t index, LabelEntry* label) noexcept { - valueType[index] = kValueLabel; - value[index].label = label; - } + inline uint64_t sourceOffset() const noexcept { return _sourceOffset; } + inline uint64_t payload() const noexcept { return _payload; } - inline void setValueAsExpression(size_t index, Expression* expression) noexcept { - valueType[index] = kValueLabel; - value[index].expression = expression; + Expression* payloadAsExpression() const noexcept { + return reinterpret_cast(uintptr_t(_payload)); } + + //! \} +}; + +// ============================================================================ +// [asmjit::LabelLink] +// ============================================================================ + +//! Data structure used to link either unbound labels or cross-section links. +struct LabelLink { + //! Next link (single-linked list). + LabelLink* next; + //! Section id where the label is bound. + uint32_t sectionId; + //! Relocation id or Globals::kInvalidId. + uint32_t relocId; + //! Label offset relative to the start of the section. + size_t offset; + //! Inlined rel8/rel32. + intptr_t rel; + //! Offset format information. + OffsetFormat format; }; // ============================================================================ @@ -347,8 +483,10 @@ class LabelEntry : public ZoneHashNode { // Let's round the size of `LabelEntry` to 64 bytes (as `ZoneAllocator` has // granularity of 32 bytes anyway). This gives `_name` the remaining space, // which is should be 16 bytes on 64-bit and 28 bytes on 32-bit architectures. - static constexpr uint32_t kStaticNameSize = - 64 - (sizeof(ZoneHashNode) + 8 + sizeof(Section*) + sizeof(size_t) + sizeof(LabelLink*)); + enum : uint32_t { + kStaticNameSize = + 64 - (sizeof(ZoneHashNode) + 8 + sizeof(Section*) + sizeof(size_t) + sizeof(LabelLink*)) + }; //! Label type, see `Label::LabelType`. uint8_t _type; @@ -430,91 +568,18 @@ class LabelEntry : public ZoneHashNode { //! \} }; -// ============================================================================ -// [asmjit::RelocEntry] -// ============================================================================ - -//! Relocation entry. -//! -//! We describe relocation data in the following way: -//! -//! ``` -//! +- Start of the buffer +- End of the data -//! | |*PATCHED*| | or instruction -//! |xxxxxxxxxxxxxxxxxxxxxx|LeadSize|ValueSize|TrailSize|xxxxxxxxxxxxxxxxxxxx-> -//! | -//! +- Source offset -//! ``` -struct RelocEntry { - //! Relocation id. - uint32_t _id; - //! Type of the relocation. - uint8_t _relocType; - //! Size of the relocation data/value (1, 2, 4 or 8 bytes). - uint8_t _valueSize; - //! Number of bytes after `_sourceOffset` to reach the value to be patched. - uint8_t _leadingSize; - //! Number of bytes after `_sourceOffset + _valueSize` to reach end of the - //! instruction. - uint8_t _trailingSize; - //! Source section id. - uint32_t _sourceSectionId; - //! Target section id. - uint32_t _targetSectionId; - //! Source offset (relative to start of the section). - uint64_t _sourceOffset; - //! Payload (target offset, target address, expression, etc). - uint64_t _payload; - - //! Relocation type. - enum RelocType : uint32_t { - //! None/deleted (no relocation). - kTypeNone = 0, - //! Expression evaluation, `_payload` is pointer to `Expression`. - kTypeExpression = 1, - //! Relocate absolute to absolute. - kTypeAbsToAbs = 2, - //! Relocate relative to absolute. - kTypeRelToAbs = 3, - //! Relocate absolute to relative. - kTypeAbsToRel = 4, - //! Relocate absolute to relative or use trampoline. - kTypeX64AddressEntry = 5 - }; - - //! \name Accessors - //! \{ - - inline uint32_t id() const noexcept { return _id; } - - inline uint32_t relocType() const noexcept { return _relocType; } - inline uint32_t valueSize() const noexcept { return _valueSize; } - - inline uint32_t leadingSize() const noexcept { return _leadingSize; } - inline uint32_t trailingSize() const noexcept { return _trailingSize; } - - inline uint32_t sourceSectionId() const noexcept { return _sourceSectionId; } - inline uint32_t targetSectionId() const noexcept { return _targetSectionId; } - - inline uint64_t sourceOffset() const noexcept { return _sourceOffset; } - inline uint64_t payload() const noexcept { return _payload; } - - Expression* payloadAsExpression() const noexcept { - return reinterpret_cast(uintptr_t(_payload)); - } - - //! \} -}; - // ============================================================================ // [asmjit::AddressTableEntry] // ============================================================================ +//! Entry in an address table. class AddressTableEntry : public ZoneTreeNodeT { public: ASMJIT_NONCOPYABLE(AddressTableEntry) + //! Address. uint64_t _address; + //! Slot. uint32_t _slot; //! \name Construction & Destruction @@ -547,22 +612,24 @@ class AddressTableEntry : public ZoneTreeNodeT { // [asmjit::CodeHolder] // ============================================================================ -//! Contains basic information about the target architecture plus its settings, -//! and holds code & data (including sections, labels, and relocation information). -//! CodeHolder can store both binary and intermediate representation of assembly, -//! which can be generated by `BaseAssembler` and/or `BaseBuilder`. +//! Contains basic information about the target architecture and its options. +//! +//! In addition, it holds assembled code & data (including sections, labels, and +//! relocation information). `CodeHolder` can store both binary and intermediate +//! representation of assembly, which can be generated by \ref BaseAssembler, +//! \ref BaseBuilder, and \ref BaseCompiler //! -//! \note `CodeHolder` has ability to attach an `ErrorHandler`, however, the -//! error handler is not triggered by `CodeHolder` itself, it's only used by -//! emitters attached to `CodeHolder`. +//! \note `CodeHolder` has an ability to attach an \ref ErrorHandler, however, +//! the error handler is not triggered by `CodeHolder` itself, it's instead +//! propagated to all emitters that attach to it. class CodeHolder { public: ASMJIT_NONCOPYABLE(CodeHolder) - //! Basic information about the code (architecture and other info). - CodeInfo _codeInfo; - //! Emitter options, propagated to all emitters when changed. - uint32_t _emitterOptions; + //! Environment information. + Environment _environment; + //! Base address or \ref Globals::kNoBaseAddress. + uint64_t _baseAddress; //! Attached `Logger`, used by all consumers. Logger* _logger; @@ -574,10 +641,12 @@ class CodeHolder { //! Zone allocator, used to manage internal containers. ZoneAllocator _allocator; - //! Attached code emitters. + //! Attached emitters. ZoneVector _emitters; //! Section entries. ZoneVector _sections; + //! Section entries sorted by section order and then section id. + ZoneVector _sectionsByOrder; //! Label entries. ZoneVector _labelEntries; //! Relocation entries. @@ -592,6 +661,25 @@ class CodeHolder { //! Address table entries. ZoneTree _addressTableEntries; + //! Options that can be used with \ref copySectionData() and \ref copyFlattenedData(). + enum CopyOptions : uint32_t { + //! If virtual size of a section is greater than the size of its \ref CodeBuffer + //! then all bytes between the buffer size and virtual size will be zeroed. + //! If this option is not set then those bytes would be left as is, which + //! means that if the user didn't initialize them they would have a previous + //! content, which may be unwanted. + kCopyPadSectionBuffer = 0x00000001u, + +#ifndef ASMJIT_NO_DEPRECATED + kCopyWithPadding = kCopyPadSectionBuffer, +#endif // !ASMJIT_NO_DEPRECATED + + //! Zeroes the target buffer if the flattened data is less than the destination + //! size. This option works only with \ref copyFlattenedData() as it processes + //! multiple sections. It is ignored by \ref copySectionData(). + kCopyPadTargetBuffer = 0x00000002u + }; + //! \name Construction & Destruction //! \{ @@ -600,10 +688,13 @@ class CodeHolder { //! Destroys the CodeHolder. ASMJIT_API ~CodeHolder() noexcept; - inline bool isInitialized() const noexcept { return _codeInfo.isInitialized(); } + //! Tests whether the `CodeHolder` has been initialized. + //! + //! Emitters can be only attached to initialized `CodeHolder` instances. + inline bool isInitialized() const noexcept { return _environment.isInitialized(); } - //! Initializes CodeHolder to hold code described by `codeInfo`. - ASMJIT_API Error init(const CodeInfo& info) noexcept; + //! Initializes CodeHolder to hold code described by code `info`. + ASMJIT_API Error init(const Environment& environment, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept; //! Detaches all code-generators attached and resets the `CodeHolder`. ASMJIT_API void reset(uint32_t resetPolicy = Globals::kResetSoft) noexcept; @@ -622,65 +713,62 @@ class CodeHolder { //! \name Allocators //! \{ + //! Returns the allocator that the `CodeHolder` uses. + //! + //! \note This should be only used for AsmJit's purposes. Code holder uses + //! arena allocator to allocate everything, so anything allocated through + //! this allocator will be invalidated by \ref CodeHolder::reset() or by + //! CodeHolder's destructor. inline ZoneAllocator* allocator() const noexcept { return const_cast(&_allocator); } //! \} - //! \name Code Emitter + //! \name Code & Architecture //! \{ - inline const ZoneVector& emitters() const noexcept { return _emitters; } - - //! Returns global emitter options, internally propagated to all attached emitters. - inline uint32_t emitterOptions() const noexcept { return _emitterOptions; } + //! Returns the target environment information, see \ref Environment. + inline const Environment& environment() const noexcept { return _environment; } - //! Enables the given global emitter `options` and propagates the resulting - //! options to all attached emitters. - ASMJIT_API void addEmitterOptions(uint32_t options) noexcept; + //! Returns the target architecture. + inline uint32_t arch() const noexcept { return environment().arch(); } + //! Returns the target sub-architecture. + inline uint32_t subArch() const noexcept { return environment().subArch(); } - //! Disables the given global emitter `options` and propagates the resulting - //! options to all attached emitters. - ASMJIT_API void clearEmitterOptions(uint32_t options) noexcept; + //! Tests whether a static base-address is set. + inline bool hasBaseAddress() const noexcept { return _baseAddress != Globals::kNoBaseAddress; } + //! Returns a static base-address or \ref Globals::kNoBaseAddress, if not set. + inline uint64_t baseAddress() const noexcept { return _baseAddress; } //! \} - //! \name Code & Architecture + //! \name Emitters //! \{ - //! Returns the target architecture information, see `ArchInfo`. - inline const ArchInfo& archInfo() const noexcept { return _codeInfo.archInfo(); } - //! Returns the target code information, see `CodeInfo`. - inline const CodeInfo& codeInfo() const noexcept { return _codeInfo; } - - //! Returns the target architecture id. - inline uint32_t archId() const noexcept { return archInfo().archId(); } - //! Returns the target architecture sub-id. - inline uint32_t archSubId() const noexcept { return archInfo().archSubId(); } - - //! Tests whether a static base-address is set. - inline bool hasBaseAddress() const noexcept { return _codeInfo.hasBaseAddress(); } - //! Returns a static base-address (uint64_t). - inline uint64_t baseAddress() const noexcept { return _codeInfo.baseAddress(); } + //! Returns a vector of attached emitters. + inline const ZoneVector& emitters() const noexcept { return _emitters; } //! \} - //! \name Logging & Error Handling + //! \name Logging //! \{ - //! Returns the attached logger. + //! Returns the attached logger, see \ref Logger. inline Logger* logger() const noexcept { return _logger; } //! Attaches a `logger` to CodeHolder and propagates it to all attached emitters. ASMJIT_API void setLogger(Logger* logger) noexcept; //! Resets the logger to none. inline void resetLogger() noexcept { setLogger(nullptr); } - //! Tests whether the global error handler is attached. + //! \name Error Handling + //! \{ + + //! Tests whether the CodeHolder has an attached error handler, see \ref ErrorHandler. inline bool hasErrorHandler() const noexcept { return _errorHandler != nullptr; } - //! Returns the global error handler. + //! Returns the attached error handler. inline ErrorHandler* errorHandler() const noexcept { return _errorHandler; } - //! Sets the global error handler. - inline void setErrorHandler(ErrorHandler* handler) noexcept { _errorHandler = handler; } - //! Resets the global error handler to none. + //! Attach an error handler to this `CodeHolder`. + ASMJIT_API void setErrorHandler(ErrorHandler* errorHandler) noexcept; + //! Resets the error handler to none. inline void resetErrorHandler() noexcept { setErrorHandler(nullptr); } //! \} @@ -688,7 +776,16 @@ class CodeHolder { //! \name Code Buffer //! \{ + //! Makes sure that at least `n` bytes can be added to CodeHolder's buffer `cb`. + //! + //! \note The buffer `cb` must be managed by `CodeHolder` - otherwise the + //! behavior of the function is undefined. ASMJIT_API Error growBuffer(CodeBuffer* cb, size_t n) noexcept; + + //! Reserves the size of `cb` to at least `n` bytes. + //! + //! \note The buffer `cb` must be managed by `CodeHolder` - otherwise the + //! behavior of the function is undefined. ASMJIT_API Error reserveBuffer(CodeBuffer* cb, size_t n) noexcept; //! \} @@ -698,6 +795,8 @@ class CodeHolder { //! Returns an array of `Section*` records. inline const ZoneVector& sections() const noexcept { return _sections; } + //! Returns an array of `Section*` records sorted according to section order first, then section id. + inline const ZoneVector& sectionsByOrder() const noexcept { return _sectionsByOrder; } //! Returns the number of sections. inline uint32_t sectionCount() const noexcept { return _sections.size(); } @@ -707,7 +806,7 @@ class CodeHolder { //! Creates a new section and return its pointer in `sectionOut`. //! //! Returns `Error`, does not report a possible error to `ErrorHandler`. - ASMJIT_API Error newSection(Section** sectionOut, const char* name, size_t nameSize = SIZE_MAX, uint32_t flags = 0, uint32_t alignment = 1) noexcept; + ASMJIT_API Error newSection(Section** sectionOut, const char* name, size_t nameSize = SIZE_MAX, uint32_t flags = 0, uint32_t alignment = 1, int32_t order = 0) noexcept; //! Returns a section entry of the given index. inline Section* sectionById(uint32_t sectionId) const noexcept { return _sections[sectionId]; } @@ -719,7 +818,7 @@ class CodeHolder { //! Returns '.text' section (section that commonly represents code). //! - //! \note Text section is always the first section in `CodeHolder::sections()` array. + //! \note Text section is always the first section in \ref CodeHolder::sections() array. inline Section* textSection() const noexcept { return _sections[0]; } //! Tests whether '.addrtab' section exists. @@ -729,6 +828,8 @@ class CodeHolder { //! //! This section is used exclusively by AsmJit to store absolute 64-bit //! addresses that cannot be encoded in instructions like 'jmp' or 'call'. + //! + //! \note This section is created on demand, the returned pointer can be null. inline Section* addressTableSection() const noexcept { return _addressTableSection; } //! Ensures that '.addrtab' section exists (creates it if it doesn't) and @@ -825,18 +926,38 @@ class CodeHolder { //! Returns `Error`, does not report error to `ErrorHandler`. ASMJIT_API Error newLabelEntry(LabelEntry** entryOut) noexcept; - //! Creates a new named label label-type `type`. + //! Creates a new named \ref LabelEntry of the given label `type`. //! - //! Returns `Error`, does not report a possible error to `ErrorHandler`. + //! \param entryOut Where to store the created \ref LabelEntry. + //! \param name The name of the label. + //! \param nameSize The length of `name` argument, or `SIZE_MAX` if `name` is + //! a null terminated string, which means that the `CodeHolder` will + //! use `strlen()` to determine the length. + //! \param type The type of the label to create, see \ref Label::LabelType. + //! \param parentId Parent id of a local label, otherwise it must be + //! \ref Globals::kInvalidId. + //! + //! \retval Always returns \ref Error, does not report a possible error to + //! the attached \ref ErrorHandler. + //! + //! AsmJit has a support for local labels (\ref Label::kTypeLocal) which + //! require a parent label id (parentId). The names of local labels can + //! conflict with names of other local labels that have a different parent. ASMJIT_API Error newNamedLabelEntry(LabelEntry** entryOut, const char* name, size_t nameSize, uint32_t type, uint32_t parentId = Globals::kInvalidId) noexcept; - //! Returns a label id by name. - ASMJIT_API uint32_t labelIdByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept; - + //! Returns a label by name. + //! + //! If the named label doesn't a default constructed \ref Label is returned, + //! which has its id set to \ref Globals::kInvalidId. inline Label labelByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept { return Label(labelIdByName(name, nameSize, parentId)); } + //! Returns a label id by name. + //! + //! If the named label doesn't exist \ref Globals::kInvalidId is returned. + ASMJIT_API uint32_t labelIdByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept; + //! Tests whether there are any unresolved label links. inline bool hasUnresolvedLinks() const noexcept { return _unresolvedLinkCount != 0; } //! Returns the number of label links, which are unresolved. @@ -845,7 +966,7 @@ class CodeHolder { //! Creates a new label-link used to store information about yet unbound labels. //! //! Returns `null` if the allocation failed. - ASMJIT_API LabelLink* newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel) noexcept; + ASMJIT_API LabelLink* newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel, const OffsetFormat& format) noexcept; //! Resolves cross-section links (`LabelLink`) associated with each label that //! was used as a destination in code of a different section. It's only useful @@ -871,10 +992,10 @@ class CodeHolder { //! Returns a RelocEntry of the given `id`. inline RelocEntry* relocEntry(uint32_t id) const noexcept { return _relocations[id]; } - //! Creates a new relocation entry of type `relocType` and size `valueSize`. + //! Creates a new relocation entry of type `relocType`. //! //! Additional fields can be set after the relocation entry was created. - ASMJIT_API Error newRelocEntry(RelocEntry** dst, uint32_t relocType, uint32_t valueSize) noexcept; + ASMJIT_API Error newRelocEntry(RelocEntry** dst, uint32_t relocType) noexcept; //! \} @@ -903,24 +1024,34 @@ class CodeHolder { //! \note This should never be called more than once. ASMJIT_API Error relocateToBase(uint64_t baseAddress) noexcept; - //! Options that can be used with \ref copySectionData(). - enum CopyOptions : uint32_t { - //! If virtual size of the section is larger than the size of its buffer - //! then all bytes between buffer size and virtual size will be zeroed. - kCopyWithPadding = 0x1 - }; - //! Copies a single section into `dst`. - ASMJIT_API Error copySectionData(void* dst, size_t dstSize, uint32_t sectionId, uint32_t options = 0) noexcept; + ASMJIT_API Error copySectionData(void* dst, size_t dstSize, uint32_t sectionId, uint32_t copyOptions = 0) noexcept; //! Copies all sections into `dst`. //! //! This should only be used if the data was flattened and there are no gaps //! between the sections. The `dstSize` is always checked and the copy will //! never write anything outside the provided buffer. - ASMJIT_API Error copyFlattenedData(void* dst, size_t dstSize, uint32_t options = 0) noexcept; + ASMJIT_API Error copyFlattenedData(void* dst, size_t dstSize, uint32_t copyOptions = 0) noexcept; //! \} + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use 'CodeHolder::init(const Environment& environment, uint64_t baseAddress)' instead") + inline Error init(const CodeInfo& codeInfo) noexcept { return init(codeInfo._environment, codeInfo._baseAddress); } + + ASMJIT_DEPRECATED("Use nevironment() instead") + inline CodeInfo codeInfo() const noexcept { return CodeInfo(_environment, _baseAddress); } + + ASMJIT_DEPRECATED("Use BaseEmitter::encodingOptions() - this function always returns zero") + inline uint32_t emitterOptions() const noexcept { return 0; } + + ASMJIT_DEPRECATED("Use BaseEmitter::addEncodingOptions() - this function does nothing") + inline void addEmitterOptions(uint32_t options) noexcept { DebugUtils::unused(options); } + + ASMJIT_DEPRECATED("Use BaseEmitter::clearEncodingOptions() - this function does nothing") + inline void clearEmitterOptions(uint32_t options) noexcept { DebugUtils::unused(options); } +#endif // !ASMJIT_NO_DEPRECATED }; //! \} diff --git a/libs/asmjit/src/asmjit/core/codewriter.cpp b/libs/asmjit/src/asmjit/core/codewriter.cpp new file mode 100644 index 0000000..6097c0e --- /dev/null +++ b/libs/asmjit/src/asmjit/core/codewriter.cpp @@ -0,0 +1,151 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#include "../core/codeholder.h" +#include "../core/codewriter_p.h" + +ASMJIT_BEGIN_NAMESPACE + +bool CodeWriterUtils::encodeOffset32(uint32_t* dst, int64_t offset64, const OffsetFormat& format) noexcept { + uint32_t bitCount = format.immBitCount(); + uint32_t bitShift = format.immBitShift(); + uint32_t discardLsb = format.immDiscardLsb(); + + if (!bitCount || bitCount > format.valueSize() * 8u) + return false; + + if (discardLsb) { + ASMJIT_ASSERT(discardLsb <= 32); + if ((offset64 & Support::lsbMask(discardLsb)) != 0) + return false; + offset64 >>= discardLsb; + } + + if (!Support::isInt32(offset64)) + return false; + + int32_t offset32 = int32_t(offset64); + if (!Support::isEncodableOffset32(offset32, bitCount)) + return false; + + switch (format.type()) { + case OffsetFormat::kTypeCommon: { + *dst = (uint32_t(offset32) & Support::lsbMask(bitCount)) << bitShift; + return true; + } + + case OffsetFormat::kTypeAArch64_ADR: + case OffsetFormat::kTypeAArch64_ADRP: { + // Sanity checks. + if (format.valueSize() != 4 || bitCount != 21 || bitShift != 5) + return false; + + uint32_t immLo = uint32_t(offset32) & 0x3u; + uint32_t immHi = uint32_t(offset32 >> 2) & Support::lsbMask(19); + + *dst = (immLo << 29) | (immHi << 5); + return true; + } + + default: + return false; + } +} + +bool CodeWriterUtils::encodeOffset64(uint64_t* dst, int64_t offset64, const OffsetFormat& format) noexcept { + uint32_t bitCount = format.immBitCount(); + uint32_t discardLsb = format.immDiscardLsb(); + + if (!bitCount || bitCount > format.valueSize() * 8u) + return false; + + if (discardLsb) { + ASMJIT_ASSERT(discardLsb <= 32); + if ((offset64 & Support::lsbMask(discardLsb)) != 0) + return false; + offset64 >>= discardLsb; + } + + if (!Support::isEncodableOffset64(offset64, bitCount)) + return false; + + switch (format.type()) { + case OffsetFormat::kTypeCommon: { + *dst = (uint64_t(offset64) & Support::lsbMask(bitCount)) << format.immBitShift(); + return true; + } + + default: + return false; + } +} + +bool CodeWriterUtils::writeOffset(void* dst, int64_t offset64, const OffsetFormat& format) noexcept { + // Offset the destination by ValueOffset so the `dst` points to the + // patched word instead of the beginning of the patched region. + dst = static_cast(dst) + format.valueOffset(); + + switch (format.valueSize()) { + case 1: { + uint32_t mask; + if (!encodeOffset32(&mask, offset64, format)) + return false; + + Support::writeU8(dst, Support::readU8(dst) | mask); + return true; + } + + case 2: { + uint32_t mask; + if (!encodeOffset32(&mask, offset64, format)) + return false; + + Support::writeU16uLE(dst, Support::readU16uLE(dst) | mask); + return true; + } + + case 4: { + uint32_t mask; + if (!encodeOffset32(&mask, offset64, format)) + return false; + + Support::writeU32uLE(dst, Support::readU32uLE(dst) | mask); + return true; + } + + case 8: { + uint64_t mask; + if (!encodeOffset64(&mask, offset64, format)) + return false; + + Support::writeU64uLE(dst, Support::readU64uLE(dst) | mask); + return true; + } + + default: + return false; + } +} + +ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/codebufferwriter_p.h b/libs/asmjit/src/asmjit/core/codewriter_p.h similarity index 84% rename from libs/asmjit/src/asmjit/core/codebufferwriter_p.h rename to libs/asmjit/src/asmjit/core/codewriter_p.h index ee75211..61c9101 100644 --- a/libs/asmjit/src/asmjit/core/codebufferwriter_p.h +++ b/libs/asmjit/src/asmjit/core/codewriter_p.h @@ -25,24 +25,31 @@ #define ASMJIT_CORE_CODEBUFFERWRITER_P_H_INCLUDED #include "../core/assembler.h" +#include "../core/codebuffer.h" #include "../core/support.h" ASMJIT_BEGIN_NAMESPACE //! \cond INTERNAL -//! \addtogroup asmjit_core +//! \addtogroup asmjit_assembler //! \{ // ============================================================================ -// [asmjit::CodeBufferWriter] +// [Forward Declarations] // ============================================================================ -//! Helper that is used to write into a `CodeBuffer` held by `BaseAssembler`. -class CodeBufferWriter { +struct OffsetFormat; + +// ============================================================================ +// [asmjit::CodeWriter] +// ============================================================================ + +//! Helper that is used to write into a \ref CodeBuffer held by \ref BaseAssembler. +class CodeWriter { public: uint8_t* _cursor; - ASMJIT_INLINE explicit CodeBufferWriter(BaseAssembler* a) noexcept + ASMJIT_INLINE explicit CodeWriter(BaseAssembler* a) noexcept : _cursor(a->_bufferPtr) {} ASMJIT_INLINE Error ensureSpace(BaseAssembler* a, size_t n) noexcept { @@ -180,6 +187,19 @@ class CodeBufferWriter { } }; +// ============================================================================ +// [asmjit::CodeWriterUtils] +// ============================================================================ + +namespace CodeWriterUtils { + +bool encodeOffset32(uint32_t* dst, int64_t offset64, const OffsetFormat& format) noexcept; +bool encodeOffset64(uint64_t* dst, int64_t offset64, const OffsetFormat& format) noexcept; + +bool writeOffset(void* dst, int64_t offset64, const OffsetFormat& format) noexcept; + +} // {CodeWriterUtils} + //! \} //! \endcond diff --git a/libs/asmjit/src/asmjit/core/compiler.cpp b/libs/asmjit/src/asmjit/core/compiler.cpp index 13dbf54..3880986 100644 --- a/libs/asmjit/src/asmjit/core/compiler.cpp +++ b/libs/asmjit/src/asmjit/core/compiler.cpp @@ -27,7 +27,7 @@ #include "../core/assembler.h" #include "../core/compiler.h" #include "../core/cpuinfo.h" -#include "../core/logging.h" +#include "../core/logger.h" #include "../core/rapass_p.h" #include "../core/rastack_p.h" #include "../core/support.h" @@ -40,12 +40,12 @@ ASMJIT_BEGIN_NAMESPACE // ============================================================================ class GlobalConstPoolPass : public Pass { - ASMJIT_NONCOPYABLE(GlobalConstPoolPass) typedef Pass Base; + ASMJIT_NONCOPYABLE(GlobalConstPoolPass) GlobalConstPoolPass() noexcept : Pass("GlobalConstPoolPass") {} - Error run(Zone* zone, Logger* logger) noexcept override { + Error run(Zone* zone, Logger* logger) override { DebugUtils::unused(zone, logger); // Flush the global constant pool. @@ -54,30 +54,11 @@ class GlobalConstPoolPass : public Pass { compiler->addAfter(compiler->_globalConstPool, compiler->lastNode()); compiler->_globalConstPool = nullptr; } + return kErrorOk; } }; -// ============================================================================ -// [asmjit::FuncCallNode - Arg / Ret] -// ============================================================================ - -bool FuncCallNode::_setArg(uint32_t i, const Operand_& op) noexcept { - if ((i & ~kFuncArgHi) >= _funcDetail.argCount()) - return false; - - _args[i] = op; - return true; -} - -bool FuncCallNode::_setRet(uint32_t i, const Operand_& op) noexcept { - if (i >= 2) - return false; - - _rets[i] = op; - return true; -} - // ============================================================================ // [asmjit::BaseCompiler - Construction / Destruction] // ============================================================================ @@ -90,72 +71,81 @@ BaseCompiler::BaseCompiler() noexcept _localConstPool(nullptr), _globalConstPool(nullptr) { - _type = kTypeCompiler; + _emitterType = uint8_t(kTypeCompiler); + _validationFlags = uint8_t(InstAPI::kValidationFlagVirtRegs); } BaseCompiler::~BaseCompiler() noexcept {} // ============================================================================ -// [asmjit::BaseCompiler - Function API] +// [asmjit::BaseCompiler - Function Management] // ============================================================================ -FuncNode* BaseCompiler::newFunc(const FuncSignature& sign) noexcept { - Error err; - - FuncNode* func = newNodeT(); - if (ASMJIT_UNLIKELY(!func)) { - reportError(DebugUtils::errored(kErrorOutOfMemory)); - return nullptr; - } - - err = registerLabelNode(func); - if (ASMJIT_UNLIKELY(err)) { - // TODO: Calls reportError, maybe rethink noexcept? - reportError(err); - return nullptr; - } - - // Create helper nodes. - func->_exitNode = newLabelNode(); - func->_end = newNodeT(SentinelNode::kSentinelFuncEnd); +Error BaseCompiler::_newFuncNode(FuncNode** out, const FuncSignature& signature) { + *out = nullptr; - if (ASMJIT_UNLIKELY(!func->_exitNode || !func->_end)) { - reportError(DebugUtils::errored(kErrorOutOfMemory)); - return nullptr; - } + // Create FuncNode together with all the required surrounding nodes. + FuncNode* funcNode; + ASMJIT_PROPAGATE(_newNodeT(&funcNode)); + ASMJIT_PROPAGATE(_newLabelNode(&funcNode->_exitNode)); + ASMJIT_PROPAGATE(_newNodeT(&funcNode->_end, SentinelNode::kSentinelFuncEnd)); - // Initialize the function info. - err = func->detail().init(sign); - if (ASMJIT_UNLIKELY(err)) { - reportError(err); - return nullptr; - } + // Initialize the function's detail info. + Error err = funcNode->detail().init(signature, environment()); + if (ASMJIT_UNLIKELY(err)) + return reportError(err); // If the Target guarantees greater stack alignment than required by the // calling convention then override it as we can prevent having to perform // dynamic stack alignment - if (func->_funcDetail._callConv.naturalStackAlignment() < _codeInfo.stackAlignment()) - func->_funcDetail._callConv.setNaturalStackAlignment(_codeInfo.stackAlignment()); + uint32_t environmentStackAlignment = _environment.stackAlignment(); + + if (funcNode->_funcDetail._callConv.naturalStackAlignment() < environmentStackAlignment) + funcNode->_funcDetail._callConv.setNaturalStackAlignment(environmentStackAlignment); // Initialize the function frame. - err = func->_frame.init(func->_funcDetail); - if (ASMJIT_UNLIKELY(err)) { - reportError(err); - return nullptr; - } + err = funcNode->_frame.init(funcNode->_funcDetail); + if (ASMJIT_UNLIKELY(err)) + return reportError(err); // Allocate space for function arguments. - func->_args = nullptr; - if (func->argCount() != 0) { - func->_args = _allocator.allocT(func->argCount() * sizeof(VirtReg*)); - if (ASMJIT_UNLIKELY(!func->_args)) { - reportError(DebugUtils::errored(kErrorOutOfMemory)); - return nullptr; - } - - memset(func->_args, 0, func->argCount() * sizeof(VirtReg*)); + funcNode->_args = nullptr; + if (funcNode->argCount() != 0) { + funcNode->_args = _allocator.allocT(funcNode->argCount() * sizeof(FuncNode::ArgPack)); + if (ASMJIT_UNLIKELY(!funcNode->_args)) + return reportError(DebugUtils::errored(kErrorOutOfMemory)); + memset(funcNode->_args, 0, funcNode->argCount() * sizeof(FuncNode::ArgPack)); } - return func; + ASMJIT_PROPAGATE(registerLabelNode(funcNode)); + + *out = funcNode; + return kErrorOk; +} + +Error BaseCompiler::_addFuncNode(FuncNode** out, const FuncSignature& signature) { + ASMJIT_PROPAGATE(_newFuncNode(out, signature)); + addFunc(*out); + return kErrorOk; +} + +Error BaseCompiler::_newRetNode(FuncRetNode** out, const Operand_& o0, const Operand_& o1) { + uint32_t opCount = !o1.isNone() ? 2u : !o0.isNone() ? 1u : 0u; + FuncRetNode* node; + + ASMJIT_PROPAGATE(_newNodeT(&node)); + node->setOpCount(opCount); + node->setOp(0, o0); + node->setOp(1, o1); + node->resetOpRange(2, node->opCapacity()); + + *out = node; + return kErrorOk; +} + +Error BaseCompiler::_addRetNode(FuncRetNode** out, const Operand_& o0, const Operand_& o1) { + ASMJIT_PROPAGATE(_newRetNode(out, o0, o1)); + addNode(*out); + return kErrorOk; } FuncNode* BaseCompiler::addFunc(FuncNode* func) { @@ -165,25 +155,15 @@ FuncNode* BaseCompiler::addFunc(FuncNode* func) { addNode(func); // Function node. BaseNode* prev = cursor(); // {CURSOR}. addNode(func->exitNode()); // Function exit label. - addNode(func->endNode()); // Function end marker. + addNode(func->endNode()); // Function end sentinel. _setCursor(prev); return func; } -FuncNode* BaseCompiler::addFunc(const FuncSignature& sign) { - FuncNode* func = newFunc(sign); - - if (!func) { - reportError(DebugUtils::errored(kErrorOutOfMemory)); - return nullptr; - } - - return addFunc(func); -} - Error BaseCompiler::endFunc() { FuncNode* func = _func; + if (ASMJIT_UNLIKELY(!func)) return reportError(DebugUtils::errored(kErrorInvalidState)); @@ -199,10 +179,11 @@ Error BaseCompiler::endFunc() { SentinelNode* end = func->endNode(); setCursor(end); + return kErrorOk; } -Error BaseCompiler::setArg(uint32_t argIndex, const BaseReg& r) { +Error BaseCompiler::_setArg(size_t argIndex, size_t valueIndex, const BaseReg& r) { FuncNode* func = _func; if (ASMJIT_UNLIKELY(!func)) @@ -212,76 +193,48 @@ Error BaseCompiler::setArg(uint32_t argIndex, const BaseReg& r) { return reportError(DebugUtils::errored(kErrorInvalidVirtId)); VirtReg* vReg = virtRegByReg(r); - func->setArg(argIndex, vReg); + func->setArg(argIndex, valueIndex, vReg); return kErrorOk; } -FuncRetNode* BaseCompiler::newRet(const Operand_& o0, const Operand_& o1) noexcept { - FuncRetNode* node = newNodeT(); - if (!node) { - reportError(DebugUtils::errored(kErrorOutOfMemory)); - return nullptr; - } - - node->setOp(0, o0); - node->setOp(1, o1); - node->setOpCount(!o1.isNone() ? 2u : !o0.isNone() ? 1u : 0u); - - return node; -} - -FuncRetNode* BaseCompiler::addRet(const Operand_& o0, const Operand_& o1) noexcept { - FuncRetNode* node = newRet(o0, o1); - if (!node) return nullptr; - return addNode(node)->as(); -} - // ============================================================================ -// [asmjit::BaseCompiler - Call] +// [asmjit::BaseCompiler - Function Invocation] // ============================================================================ -FuncCallNode* BaseCompiler::newCall(uint32_t instId, const Operand_& o0, const FuncSignature& sign) noexcept { - FuncCallNode* node = newNodeT(instId, 0u); - if (ASMJIT_UNLIKELY(!node)) { - reportError(DebugUtils::errored(kErrorOutOfMemory)); - return nullptr; - } +Error BaseCompiler::_newInvokeNode(InvokeNode** out, uint32_t instId, const Operand_& o0, const FuncSignature& signature) { + InvokeNode* node; + ASMJIT_PROPAGATE(_newNodeT(&node, instId, 0u)); node->setOpCount(1); node->setOp(0, o0); - node->resetOp(1); - node->resetOp(2); - node->resetOp(3); - - Error err = node->detail().init(sign); - if (ASMJIT_UNLIKELY(err)) { - reportError(err); - return nullptr; - } + node->resetOpRange(1, node->opCapacity()); - // If there are no arguments skip the allocation. - uint32_t nArgs = sign.argCount(); - if (!nArgs) return node; + Error err = node->detail().init(signature, environment()); + if (ASMJIT_UNLIKELY(err)) + return reportError(err); - node->_args = static_cast(_allocator.alloc(nArgs * sizeof(Operand))); - if (!node->_args) { - reportError(DebugUtils::errored(kErrorOutOfMemory)); - return nullptr; + // Skip the allocation if there are no arguments. + uint32_t argCount = signature.argCount(); + if (argCount) { + node->_args = static_cast(_allocator.alloc(argCount * sizeof(InvokeNode::OperandPack))); + if (!node->_args) + reportError(DebugUtils::errored(kErrorOutOfMemory)); + memset(node->_args, 0, argCount * sizeof(InvokeNode::OperandPack)); } - memset(node->_args, 0, nArgs * sizeof(Operand)); - return node; + *out = node; + return kErrorOk; } -FuncCallNode* BaseCompiler::addCall(uint32_t instId, const Operand_& o0, const FuncSignature& sign) noexcept { - FuncCallNode* node = newCall(instId, o0, sign); - if (!node) return nullptr; - return addNode(node)->as(); +Error BaseCompiler::_addInvokeNode(InvokeNode** out, uint32_t instId, const Operand_& o0, const FuncSignature& signature) { + ASMJIT_PROPAGATE(_newInvokeNode(out, instId, o0, signature)); + addNode(*out); + return kErrorOk; } // ============================================================================ -// [asmjit::BaseCompiler - Vars] +// [asmjit::BaseCompiler - Virtual Registers] // ============================================================================ static void BaseCompiler_assignGenericName(BaseCompiler* self, VirtReg* vReg) { @@ -294,16 +247,19 @@ static void BaseCompiler_assignGenericName(BaseCompiler* self, VirtReg* vReg) { vReg->_name.setData(&self->_dataZone, buf, unsigned(size)); } -VirtReg* BaseCompiler::newVirtReg(uint32_t typeId, uint32_t signature, const char* name) noexcept { +Error BaseCompiler::newVirtReg(VirtReg** out, uint32_t typeId, uint32_t signature, const char* name) { + *out = nullptr; uint32_t index = _vRegArray.size(); + if (ASMJIT_UNLIKELY(index >= uint32_t(Operand::kVirtIdCount))) - return nullptr; + return reportError(DebugUtils::errored(kErrorTooManyVirtRegs)); - if (_vRegArray.willGrow(&_allocator) != kErrorOk) - return nullptr; + if (ASMJIT_UNLIKELY(_vRegArray.willGrow(&_allocator) != kErrorOk)) + return reportError(DebugUtils::errored(kErrorOutOfMemory)); VirtReg* vReg = _vRegZone.allocZeroedT(); - if (ASMJIT_UNLIKELY(!vReg)) return nullptr; + if (ASMJIT_UNLIKELY(!vReg)) + return reportError(DebugUtils::errored(kErrorOutOfMemory)); uint32_t size = Type::sizeOf(typeId); uint32_t alignment = Support::min(size, 64); @@ -320,26 +276,27 @@ VirtReg* BaseCompiler::newVirtReg(uint32_t typeId, uint32_t signature, const cha #endif _vRegArray.appendUnsafe(vReg); - return vReg; + *out = vReg; + + return kErrorOk; } -Error BaseCompiler::_newReg(BaseReg& out, uint32_t typeId, const char* name) { +Error BaseCompiler::_newReg(BaseReg* out, uint32_t typeId, const char* name) { RegInfo regInfo; + out->reset(); - Error err = ArchUtils::typeIdToRegInfo(archId(), typeId, regInfo); - if (ASMJIT_UNLIKELY(err)) return reportError(err); + Error err = ArchUtils::typeIdToRegInfo(arch(), typeId, &typeId, ®Info); + if (ASMJIT_UNLIKELY(err)) + return reportError(err); - VirtReg* vReg = newVirtReg(typeId, regInfo.signature(), name); - if (ASMJIT_UNLIKELY(!vReg)) { - out.reset(); - return reportError(DebugUtils::errored(kErrorOutOfMemory)); - } + VirtReg* vReg; + ASMJIT_PROPAGATE(newVirtReg(&vReg, typeId, regInfo.signature(), name)); - out._initReg(regInfo.signature(), vReg->id()); + out->_initReg(regInfo.signature(), vReg->id()); return kErrorOk; } -Error BaseCompiler::_newRegFmt(BaseReg& out, uint32_t typeId, const char* fmt, ...) { +Error BaseCompiler::_newRegFmt(BaseReg* out, uint32_t typeId, const char* fmt, ...) { va_list ap; StringTmp<256> sb; @@ -350,7 +307,9 @@ Error BaseCompiler::_newRegFmt(BaseReg& out, uint32_t typeId, const char* fmt, . return _newReg(out, typeId, sb.data()); } -Error BaseCompiler::_newReg(BaseReg& out, const BaseReg& ref, const char* name) { +Error BaseCompiler::_newReg(BaseReg* out, const BaseReg& ref, const char* name) { + out->reset(); + RegInfo regInfo; uint32_t typeId; @@ -412,20 +371,18 @@ Error BaseCompiler::_newReg(BaseReg& out, const BaseReg& ref, const char* name) typeId = ref.type(); } - Error err = ArchUtils::typeIdToRegInfo(archId(), typeId, regInfo); - if (ASMJIT_UNLIKELY(err)) return reportError(err); + Error err = ArchUtils::typeIdToRegInfo(arch(), typeId, &typeId, ®Info); + if (ASMJIT_UNLIKELY(err)) + return reportError(err); - VirtReg* vReg = newVirtReg(typeId, regInfo.signature(), name); - if (ASMJIT_UNLIKELY(!vReg)) { - out.reset(); - return reportError(DebugUtils::errored(kErrorOutOfMemory)); - } + VirtReg* vReg; + ASMJIT_PROPAGATE(newVirtReg(&vReg, typeId, regInfo.signature(), name)); - out._initReg(regInfo.signature(), vReg->id()); + out->_initReg(regInfo.signature(), vReg->id()); return kErrorOk; } -Error BaseCompiler::_newRegFmt(BaseReg& out, const BaseReg& ref, const char* fmt, ...) { +Error BaseCompiler::_newRegFmt(BaseReg* out, const BaseReg& ref, const char* fmt, ...) { va_list ap; StringTmp<256> sb; @@ -436,7 +393,9 @@ Error BaseCompiler::_newRegFmt(BaseReg& out, const BaseReg& ref, const char* fmt return _newReg(out, ref, sb.data()); } -Error BaseCompiler::_newStack(BaseMem& out, uint32_t size, uint32_t alignment, const char* name) { +Error BaseCompiler::_newStack(BaseMem* out, uint32_t size, uint32_t alignment, const char* name) { + out->reset(); + if (size == 0) return reportError(DebugUtils::errored(kErrorInvalidArgument)); @@ -449,22 +408,19 @@ Error BaseCompiler::_newStack(BaseMem& out, uint32_t size, uint32_t alignment, c if (alignment > 64) alignment = 64; - VirtReg* vReg = newVirtReg(0, 0, name); - if (ASMJIT_UNLIKELY(!vReg)) { - out.reset(); - return reportError(DebugUtils::errored(kErrorOutOfMemory)); - } + VirtReg* vReg; + ASMJIT_PROPAGATE(newVirtReg(&vReg, 0, 0, name)); vReg->_virtSize = size; vReg->_isStack = true; vReg->_alignment = uint8_t(alignment); // Set the memory operand to GPD/GPQ and its id to VirtReg. - out = BaseMem(BaseMem::Decomposed { _gpRegInfo.type(), vReg->id(), BaseReg::kTypeNone, 0, 0, 0, BaseMem::kSignatureMemRegHomeFlag }); + *out = BaseMem(BaseMem::Decomposed { _gpRegInfo.type(), vReg->id(), BaseReg::kTypeNone, 0, 0, 0, BaseMem::kSignatureMemRegHomeFlag }); return kErrorOk; } -Error BaseCompiler::setStackSize(uint32_t virtId, uint32_t newSize, uint32_t newAlignment) noexcept { +Error BaseCompiler::setStackSize(uint32_t virtId, uint32_t newSize, uint32_t newAlignment) { if (!isVirtIdValid(virtId)) return DebugUtils::errored(kErrorInvalidVirtId); @@ -493,8 +449,10 @@ Error BaseCompiler::setStackSize(uint32_t virtId, uint32_t newSize, uint32_t new return kErrorOk; } -Error BaseCompiler::_newConst(BaseMem& out, uint32_t scope, const void* data, size_t size) { +Error BaseCompiler::_newConst(BaseMem* out, uint32_t scope, const void* data, size_t size) { + out->reset(); ConstPoolNode** pPool; + if (scope == ConstPool::kScopeLocal) pPool = &_localConstPool; else if (scope == ConstPool::kScopeGlobal) @@ -502,29 +460,26 @@ Error BaseCompiler::_newConst(BaseMem& out, uint32_t scope, const void* data, si else return reportError(DebugUtils::errored(kErrorInvalidArgument)); - ConstPoolNode* pool = *pPool; - if (!pool) { - pool = newConstPoolNode(); - if (ASMJIT_UNLIKELY(!pool)) - return reportError(DebugUtils::errored(kErrorOutOfMemory)); - *pPool = pool; - } + if (!*pPool) + ASMJIT_PROPAGATE(_newConstPoolNode(pPool)); + ConstPoolNode* pool = *pPool; size_t off; Error err = pool->add(data, size, off); if (ASMJIT_UNLIKELY(err)) return reportError(err); - out = BaseMem(BaseMem::Decomposed { + *out = BaseMem(BaseMem::Decomposed { Label::kLabelTag, // Base type. - pool->id(), // Base id. + pool->labelId(), // Base id. 0, // Index type. 0, // Index id. int32_t(off), // Offset. uint32_t(size), // Size. 0 // Flags. }); + return kErrorOk; } @@ -553,38 +508,38 @@ void BaseCompiler::rename(const BaseReg& reg, const char* fmt, ...) { // [asmjit::BaseCompiler - Jump Annotations] // ============================================================================ -JumpNode* BaseCompiler::newJumpNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, JumpAnnotation* annotation) noexcept { - uint32_t opCount = 1; +Error BaseCompiler::newJumpNode(JumpNode** out, uint32_t instId, uint32_t instOptions, const Operand_& o0, JumpAnnotation* annotation) { JumpNode* node = _allocator.allocT(); + uint32_t opCount = 1; + + *out = node; if (ASMJIT_UNLIKELY(!node)) - return nullptr; + return reportError(DebugUtils::errored(kErrorOutOfMemory)); node = new(node) JumpNode(this, instId, instOptions, opCount, annotation); node->setOp(0, o0); - node->resetOps(opCount, JumpNode::kBaseOpCapacity); - return node; + node->resetOpRange(opCount, JumpNode::kBaseOpCapacity); + + return kErrorOk; } Error BaseCompiler::emitAnnotatedJump(uint32_t instId, const Operand_& o0, JumpAnnotation* annotation) { - uint32_t options = instOptions() | globalInstOptions(); + uint32_t options = instOptions() | forcedInstOptions(); + RegOnly extra = extraReg(); const char* comment = inlineComment(); - JumpNode* node = newJumpNode(instId, options, o0, annotation); - resetInstOptions(); resetInlineComment(); + resetExtraReg(); - if (ASMJIT_UNLIKELY(!node)) { - resetExtraReg(); - return reportError(DebugUtils::errored(kErrorOutOfMemory)); - } + JumpNode* node; + ASMJIT_PROPAGATE(newJumpNode(&node, instId, options, o0, annotation)); - node->setExtraReg(extraReg()); + node->setExtraReg(extra); if (comment) node->setInlineComment(static_cast(_dataZone.dup(comment, strlen(comment), true))); addNode(node); - resetExtraReg(); return kErrorOk; } @@ -613,6 +568,10 @@ JumpAnnotation* BaseCompiler::newJumpAnnotation() { Error BaseCompiler::onAttach(CodeHolder* code) noexcept { ASMJIT_PROPAGATE(Base::onAttach(code)); + const ArchTraits& archTraits = ArchTraits::byArch(code->arch()); + uint32_t nativeRegType = Environment::is32Bit(code->arch()) ? BaseReg::kTypeGp32 : BaseReg::kTypeGp64; + _gpRegInfo.setSignature(archTraits.regTypeToSignature(nativeRegType)); + Error err = addPassT(); if (ASMJIT_UNLIKELY(err)) { onDetach(code); @@ -644,7 +603,7 @@ FuncPass::FuncPass(const char* name) noexcept // [asmjit::FuncPass - Run] // ============================================================================ -Error FuncPass::run(Zone* zone, Logger* logger) noexcept { +Error FuncPass::run(Zone* zone, Logger* logger) { BaseNode* node = cb()->firstNode(); if (!node) return kErrorOk; diff --git a/libs/asmjit/src/asmjit/core/compiler.h b/libs/asmjit/src/asmjit/core/compiler.h index 32b2a8b..eb2a5aa 100644 --- a/libs/asmjit/src/asmjit/core/compiler.h +++ b/libs/asmjit/src/asmjit/core/compiler.h @@ -30,6 +30,7 @@ #include "../core/assembler.h" #include "../core/builder.h" #include "../core/constpool.h" +#include "../core/compilerdefs.h" #include "../core/func.h" #include "../core/inst.h" #include "../core/operand.h" @@ -43,144 +44,15 @@ ASMJIT_BEGIN_NAMESPACE // [Forward Declarations] // ============================================================================ -struct RATiedReg; -class RAWorkReg; - class JumpAnnotation; - class JumpNode; class FuncNode; class FuncRetNode; -class FuncCallNode; +class InvokeNode; //! \addtogroup asmjit_compiler //! \{ -// ============================================================================ -// [asmjit::VirtReg] -// ============================================================================ - -//! Virtual register data (BaseCompiler). -class VirtReg { -public: - ASMJIT_NONCOPYABLE(VirtReg) - - //! Virtual register id. - uint32_t _id; - //! Virtual register info (signature). - RegInfo _info; - //! Virtual register size (can be smaller than `regInfo._size`). - uint32_t _virtSize; - //! Virtual register alignment (for spilling). - uint8_t _alignment; - //! Type-id. - uint8_t _typeId; - //! Virtual register weight for alloc/spill decisions. - uint8_t _weight; - //! True if this is a fixed register, never reallocated. - uint8_t _isFixed : 1; - //! True if the virtual register is only used as a stack (never accessed as register). - uint8_t _isStack : 1; - uint8_t _reserved : 6; - - //! Virtual register name (user provided or automatically generated). - ZoneString<16> _name; - - // ------------------------------------------------------------------------- - // The following members are used exclusively by RAPass. They are initialized - // when the VirtReg is created to NULL pointers and then changed during RAPass - // execution. RAPass sets them back to NULL before it returns. - // ------------------------------------------------------------------------- - - //! Reference to `RAWorkReg`, used during register allocation. - RAWorkReg* _workReg; - - //! \name Construction & Destruction - //! \{ - - inline VirtReg(uint32_t id, uint32_t signature, uint32_t virtSize, uint32_t alignment, uint32_t typeId) noexcept - : _id(id), - _virtSize(virtSize), - _alignment(uint8_t(alignment)), - _typeId(uint8_t(typeId)), - _weight(1), - _isFixed(false), - _isStack(false), - _reserved(0), - _name(), - _workReg(nullptr) { _info._signature = signature; } - - //! \} - - //! \name Accessors - //! \{ - - //! Returns the virtual register id. - inline uint32_t id() const noexcept { return _id; } - - //! Returns the virtual register name. - inline const char* name() const noexcept { return _name.data(); } - //! Returns the size of the virtual register name. - inline uint32_t nameSize() const noexcept { return _name.size(); } - - //! Returns a register information that wraps the register signature. - inline const RegInfo& info() const noexcept { return _info; } - //! Returns a virtual register type (maps to the physical register type as well). - inline uint32_t type() const noexcept { return _info.type(); } - //! Returns a virtual register group (maps to the physical register group as well). - inline uint32_t group() const noexcept { return _info.group(); } - - //! Returns a real size of the register this virtual register maps to. - //! - //! For example if this is a 128-bit SIMD register used for a scalar single - //! precision floating point value then its virtSize would be 4, however, the - //! `regSize` would still say 16 (128-bits), because it's the smallest size - //! of that register type. - inline uint32_t regSize() const noexcept { return _info.size(); } - - //! Returns a register signature of this virtual register. - inline uint32_t signature() const noexcept { return _info.signature(); } - - //! Returns the virtual register size. - //! - //! The virtual register size describes how many bytes the virtual register - //! needs to store its content. It can be smaller than the physical register - //! size, see `regSize()`. - inline uint32_t virtSize() const noexcept { return _virtSize; } - - //! Returns the virtual register alignment. - inline uint32_t alignment() const noexcept { return _alignment; } - - //! Returns the virtual register type id, see `Type::Id`. - inline uint32_t typeId() const noexcept { return _typeId; } - - //! Returns the virtual register weight - the register allocator can use it - //! as explicit hint for alloc/spill decisions. - inline uint32_t weight() const noexcept { return _weight; } - //! Sets the virtual register weight (0 to 255) - the register allocator can - //! use it as explicit hint for alloc/spill decisions and initial bin-packing. - inline void setWeight(uint32_t weight) noexcept { _weight = uint8_t(weight); } - - //! Returns whether the virtual register is always allocated to a fixed - //! physical register (and never reallocated). - //! - //! \note This is only used for special purposes and it's mostly internal. - inline bool isFixed() const noexcept { return bool(_isFixed); } - - //! Returns whether the virtual register is indeed a stack that only uses - //! the virtual register id for making it accessible. - //! - //! \note It's an error if a stack is accessed as a register. - inline bool isStack() const noexcept { return bool(_isStack); } - - inline bool hasWorkReg() const noexcept { return _workReg != nullptr; } - inline RAWorkReg* workReg() const noexcept { return _workReg; } - inline void setWorkReg(RAWorkReg* workReg) noexcept { _workReg = workReg; } - inline void resetWorkReg() noexcept { _workReg = nullptr; } - - //! \} -}; - // ============================================================================ // [asmjit::BaseCompiler] // ============================================================================ @@ -192,14 +64,18 @@ class VirtReg { //! primarily designed for merging multiple parts of code into a function //! without worrying about registers and function calling conventions. //! -//! BaseCompiler can be used, with a minimum effort, to handle 32-bit and 64-bit -//! code at the same time. +//! BaseCompiler can be used, with a minimum effort, to handle 32-bit and +//! 64-bit code generation within a single code base. //! //! BaseCompiler is based on BaseBuilder and contains all the features it //! provides. It means that the code it stores can be modified (removed, added, //! injected) and analyzed. When the code is finalized the compiler can emit //! the code into an Assembler to translate the abstract representation into a //! machine code. +//! +//! Check out architecture specific compilers for more details and examples: +//! +//! - \ref x86::Compiler - X86/X64 compiler implementation. class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { public: ASMJIT_NONCOPYABLE(BaseCompiler) @@ -229,38 +105,84 @@ class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { //! \} - //! \name Function API + //! \name Function Management //! \{ //! Returns the current function. inline FuncNode* func() const noexcept { return _func; } - //! Creates a new `FuncNode`. - ASMJIT_API FuncNode* newFunc(const FuncSignature& sign) noexcept; - //! Adds a function `node` to the stream. + //! Creates a new \ref FuncNode. + ASMJIT_API Error _newFuncNode(FuncNode** out, const FuncSignature& signature); + //! Creates a new \ref FuncNode adds it to the compiler. + ASMJIT_API Error _addFuncNode(FuncNode** out, const FuncSignature& signature); + + //! Creates a new \ref FuncRetNode. + ASMJIT_API Error _newRetNode(FuncRetNode** out, const Operand_& o0, const Operand_& o1); + //! Creates a new \ref FuncRetNode and adds it to the compiler. + ASMJIT_API Error _addRetNode(FuncRetNode** out, const Operand_& o0, const Operand_& o1); + + //! Creates a new \ref FuncNode with the given `signature` and returns it. + inline FuncNode* newFunc(const FuncSignature& signature) { + FuncNode* node; + _newFuncNode(&node, signature); + return node; + } + + //! Creates a new \ref FuncNode with the given `signature`, adds it to the + //! compiler by using the \ref addFunc(FuncNode*) overload, and returns it. + inline FuncNode* addFunc(const FuncSignature& signature) { + FuncNode* node; + _addFuncNode(&node, signature); + return node; + } + + //! Adds a function `node` to the instruction stream. ASMJIT_API FuncNode* addFunc(FuncNode* func); - //! Adds a new function. - ASMJIT_API FuncNode* addFunc(const FuncSignature& sign); //! Emits a sentinel that marks the end of the current function. ASMJIT_API Error endFunc(); + ASMJIT_API Error _setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg); + //! Sets a function argument at `argIndex` to `reg`. - ASMJIT_API Error setArg(uint32_t argIndex, const BaseReg& reg); + inline Error setArg(size_t argIndex, const BaseReg& reg) { return _setArg(argIndex, 0, reg); } + //! Sets a function argument at `argIndex` at `valueIndex` to `reg`. + inline Error setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg) { return _setArg(argIndex, valueIndex, reg); } + + inline FuncRetNode* newRet(const Operand_& o0, const Operand_& o1) { + FuncRetNode* node; + _newRetNode(&node, o0, o1); + return node; + } - //! Creates a new `FuncRetNode`. - ASMJIT_API FuncRetNode* newRet(const Operand_& o0, const Operand_& o1) noexcept; - //! Adds a new `FuncRetNode`. - ASMJIT_API FuncRetNode* addRet(const Operand_& o0, const Operand_& o1) noexcept; + inline FuncRetNode* addRet(const Operand_& o0, const Operand_& o1) { + FuncRetNode* node; + _addRetNode(&node, o0, o1); + return node; + } //! \} - //! \name Function Calls + //! \name Function Invocation //! \{ - //! Creates a new `FuncCallNode`. - ASMJIT_API FuncCallNode* newCall(uint32_t instId, const Operand_& o0, const FuncSignature& sign) noexcept; - //! Adds a new `FuncCallNode`. - ASMJIT_API FuncCallNode* addCall(uint32_t instId, const Operand_& o0, const FuncSignature& sign) noexcept; + //! Creates a new \ref InvokeNode. + ASMJIT_API Error _newInvokeNode(InvokeNode** out, uint32_t instId, const Operand_& o0, const FuncSignature& signature); + //! Creates a new \ref InvokeNode and adds it to Compiler. + ASMJIT_API Error _addInvokeNode(InvokeNode** out, uint32_t instId, const Operand_& o0, const FuncSignature& signature); + + //! Creates a new `InvokeNode`. + inline InvokeNode* newCall(uint32_t instId, const Operand_& o0, const FuncSignature& signature) { + InvokeNode* node; + _newInvokeNode(&node, instId, o0, signature); + return node; + } + + //! Adds a new `InvokeNode`. + inline InvokeNode* addCall(uint32_t instId, const Operand_& o0, const FuncSignature& signature) { + InvokeNode* node; + _addInvokeNode(&node, instId, o0, signature); + return node; + } //! \} @@ -268,13 +190,27 @@ class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { //! \{ //! Creates a new virtual register representing the given `typeId` and `signature`. - ASMJIT_API VirtReg* newVirtReg(uint32_t typeId, uint32_t signature, const char* name) noexcept; + //! + //! \note This function is public, but it's not generally recommended to be used + //! by AsmJit users, use architecture-specific `newReg()` functionality instead + //! or functions like \ref _newReg() and \ref _newRegFmt(). + ASMJIT_API Error newVirtReg(VirtReg** out, uint32_t typeId, uint32_t signature, const char* name); + + //! Creates a new virtual register of the given `typeId` and stores it to `out` operand. + ASMJIT_API Error _newReg(BaseReg* out, uint32_t typeId, const char* name = nullptr); - ASMJIT_API Error _newReg(BaseReg& out, uint32_t typeId, const char* name = nullptr); - ASMJIT_API Error _newRegFmt(BaseReg& out, uint32_t typeId, const char* fmt, ...); + //! Creates a new virtual register of the given `typeId` and stores it to `out` operand. + //! + //! \note This version accepts a snprintf() format `fmt` followed by a variadic arguments. + ASMJIT_API Error _newRegFmt(BaseReg* out, uint32_t typeId, const char* fmt, ...); + + //! Creates a new virtual register compatible with the provided reference register `ref`. + ASMJIT_API Error _newReg(BaseReg* out, const BaseReg& ref, const char* name = nullptr); - ASMJIT_API Error _newReg(BaseReg& out, const BaseReg& ref, const char* name = nullptr); - ASMJIT_API Error _newRegFmt(BaseReg& out, const BaseReg& ref, const char* fmt, ...); + //! Creates a new virtual register compatible with the provided reference register `ref`. + //! + //! \note This version accepts a snprintf() format `fmt` followed by a variadic arguments. + ASMJIT_API Error _newRegFmt(BaseReg* out, const BaseReg& ref, const char* fmt, ...); //! Tests whether the given `id` is a valid virtual register id. inline bool isVirtIdValid(uint32_t id) const noexcept { @@ -286,14 +222,20 @@ class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { return isVirtIdValid(reg.id()); } - //! Returns `VirtReg` associated with the given `id`. + //! Returns \ref VirtReg associated with the given `id`. inline VirtReg* virtRegById(uint32_t id) const noexcept { ASMJIT_ASSERT(isVirtIdValid(id)); return _vRegArray[Operand::virtIdToIndex(id)]; } - //! Returns `VirtReg` associated with the given `reg`. + + //! Returns \ref VirtReg associated with the given `reg`. inline VirtReg* virtRegByReg(const BaseReg& reg) const noexcept { return virtRegById(reg.id()); } - //! Returns `VirtReg` associated with the given `index`. + + //! Returns \ref VirtReg associated with the given virtual register `index`. + //! + //! \note This is not the same as virtual register id. The conversion between + //! id and its index is implemented by \ref Operand_::virtIdToIndex() and \ref + //! Operand_::indexToVirtId() functions. inline VirtReg* virtRegByIndex(uint32_t index) const noexcept { return _vRegArray[index]; } //! Returns an array of all virtual registers managed by the Compiler. @@ -302,13 +244,16 @@ class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { //! \name Stack //! \{ - ASMJIT_API Error _newStack(BaseMem& out, uint32_t size, uint32_t alignment, const char* name = nullptr); + //! Creates a new stack of the given `size` and `alignment` and stores it to `out`. + //! + //! \note `name` can be used to give the stack a name, for debugging purposes. + ASMJIT_API Error _newStack(BaseMem* out, uint32_t size, uint32_t alignment, const char* name = nullptr); //! Updates the stack size of a stack created by `_newStack()` by its `virtId`. - ASMJIT_API Error setStackSize(uint32_t virtId, uint32_t newSize, uint32_t newAlignment = 0) noexcept; + ASMJIT_API Error setStackSize(uint32_t virtId, uint32_t newSize, uint32_t newAlignment = 0); //! Updates the stack size of a stack created by `_newStack()`. - inline Error setStackSize(const BaseMem& mem, uint32_t newSize, uint32_t newAlignment = 0) noexcept { + inline Error setStackSize(const BaseMem& mem, uint32_t newSize, uint32_t newAlignment = 0) { return setStackSize(mem.id(), newSize, newAlignment); } @@ -317,7 +262,11 @@ class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { //! \name Constants //! \{ - ASMJIT_API Error _newConst(BaseMem& out, uint32_t scope, const void* data, size_t size); + //! Creates a new constant of the given `scope` (see \ref ConstPool::Scope). + //! + //! This function adds a constant of the given `size` to the built-in \ref + //! ConstPool and stores the reference to that constant to the `out` operand. + ASMJIT_API Error _newConst(BaseMem* out, uint32_t scope, const void* data, size_t size); //! \} @@ -325,8 +274,6 @@ class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { //! \{ //! Rename the given virtual register `reg` to a formatted string `fmt`. - //! - //! \note Only new name will appear in the logger. ASMJIT_API void rename(const BaseReg& reg, const char* fmt, ...); //! \} @@ -338,7 +285,7 @@ class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { return _jumpAnnotations; } - ASMJIT_API JumpNode* newJumpNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, JumpAnnotation* annotation) noexcept; + ASMJIT_API Error newJumpNode(JumpNode** out, uint32_t instId, uint32_t instOptions, const Operand_& o0, JumpAnnotation* annotation); ASMJIT_API Error emitAnnotatedJump(uint32_t instId, const Operand_& o0, JumpAnnotation* annotation); //! Returns a new `JumpAnnotation` instance, which can be used to aggregate @@ -348,9 +295,12 @@ class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { //! \} - // TODO: These should be removed - inline void alloc(BaseReg& reg) { DebugUtils::unused(reg); } - inline void spill(BaseReg& reg) { DebugUtils::unused(reg); } +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("alloc() has no effect, it will be removed in the future") + inline void alloc(BaseReg&) {} + ASMJIT_DEPRECATED("spill() has no effect, it will be removed in the future") + inline void spill(BaseReg&) {} +#endif // !ASMJIT_NO_DEPRECATED //! \name Events //! \{ @@ -365,26 +315,44 @@ class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder { // [asmjit::JumpAnnotation] // ============================================================================ +//! Jump annotation used to annotate jumps. +//! +//! \ref BaseCompiler allows to emit jumps where the target is either register +//! or memory operand. Such jumps cannot be trivially inspected, so instead of +//! doing heuristics AsmJit allows to annotate such jumps with possible targets. +//! Register allocator then use the annotation to construct control-flow, which +//! is then used by liveness analysis and other tools to prepare ground for +//! register allocation. class JumpAnnotation { public: ASMJIT_NONCOPYABLE(JumpAnnotation) + //! Compiler that owns this JumpAnnotation. BaseCompiler* _compiler; + //! Annotation identifier. uint32_t _annotationId; + //! Vector of label identifiers, see \ref labelIds(). ZoneVector _labelIds; inline JumpAnnotation(BaseCompiler* compiler, uint32_t annotationId) noexcept : _compiler(compiler), _annotationId(annotationId) {} + //! Returns the compiler that owns this JumpAnnotation. inline BaseCompiler* compiler() const noexcept { return _compiler; } + //! Returns the annotation id. inline uint32_t annotationId() const noexcept { return _annotationId; } + //! Returns a vector of label identifiers that lists all targets of the jump. const ZoneVector& labelIds() const noexcept { return _labelIds; } + //! Tests whether the given `label` is a target of this JumpAnnotation. inline bool hasLabel(const Label& label) const noexcept { return hasLabelId(label.id()); } + //! Tests whether the given `labelId` is a target of this JumpAnnotation. inline bool hasLabelId(uint32_t labelId) const noexcept { return _labelIds.contains(labelId); } + //! Adds the `label` to the list of targets of this JumpAnnotation. inline Error addLabel(const Label& label) noexcept { return addLabelId(label.id()); } + //! Adds the `labelId` to the list of targets of this JumpAnnotation. inline Error addLabelId(uint32_t labelId) noexcept { return _labelIds.append(&_compiler->_allocator, labelId); } }; @@ -418,8 +386,11 @@ class JumpNode : public InstNode { //! \name Accessors //! \{ + //! Tests whether this JumpNode has associated a \ref JumpAnnotation. inline bool hasAnnotation() const noexcept { return _annotation != nullptr; } + //! Returns the \ref JumpAnnotation associated with this jump, or `nullptr`. inline JumpAnnotation* annotation() const noexcept { return _annotation; } + //! Sets the \ref JumpAnnotation associated with this jump to `annotation`. inline void setAnnotation(JumpAnnotation* annotation) noexcept { _annotation = annotation; } //! \} @@ -429,21 +400,79 @@ class JumpNode : public InstNode { // [asmjit::FuncNode] // ============================================================================ -//! Function entry (BaseCompiler). +//! Function node represents a function used by \ref BaseCompiler. +//! +//! A function is composed of the following: +//! +//! - Function entry, \ref FuncNode acts as a label, so the entry is implicit. +//! To get the entry, simply use \ref FuncNode::label(), which is the same +//! as \ref LabelNode::label(). +//! +//! - Function exit, which is represented by \ref FuncNode::exitNode(). A +//! helper function \ref FuncNode::exitLabel() exists and returns an exit +//! label instead of node. +//! +//! - Function \ref FuncNode::endNode() sentinel. This node marks the end of +//! a function - there should be no code that belongs to the function after +//! this node, but the Compiler doesn't enforce that at the moment. +//! +//! - Function detail, see \ref FuncNode::detail(). +//! +//! - Function frame, see \ref FuncNode::frame(). +//! +//! - Function arguments mapped to virtual registers, see \ref FuncNode::args(). +//! +//! In a node list, the function and its body looks like the following: +//! +//! \code{.unparsed} +//! [...] - Anything before the function. +//! +//! [FuncNode] - Entry point of the function, acts as a label as well. +//! - Prolog inserted by the register allocator. +//! {...} - Function body - user code basically. +//! [ExitLabel] - Exit label +//! - Epilog inserted by the register allocator. +//! - Return inserted by the register allocator. +//! {...} - Can contain data or user code (error handling, special cases, ...). +//! [FuncEnd] - End sentinel +//! +//! [...] - Anything after the function. +//! \endcode +//! +//! When a function is added to the compiler by \ref BaseCompiler::addFunc() it +//! actually inserts 3 nodes (FuncNode, ExitLabel, and FuncEnd) and sets the +//! current cursor to be FuncNode. When \ref BaseCompiler::endFunc() is called +//! the cursor is set to FuncEnd. This guarantees that user can use ExitLabel +//! as a marker after additional code or data can be placed, and it's a common +//! practice. class FuncNode : public LabelNode { public: ASMJIT_NONCOPYABLE(FuncNode) + //! Arguments pack. + struct ArgPack { + VirtReg* _data[Globals::kMaxValuePack]; + + inline void reset() noexcept { + for (size_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) + _data[valueIndex] = nullptr; + } + + inline VirtReg*& operator[](size_t valueIndex) noexcept { return _data[valueIndex]; } + inline VirtReg* const& operator[](size_t valueIndex) const noexcept { return _data[valueIndex]; } + }; + //! Function detail. FuncDetail _funcDetail; //! Function frame. FuncFrame _frame; - //! Function exit (label). + //! Function exit label. LabelNode* _exitNode; //! Function end (sentinel). SentinelNode* _end; - //! Arguments array as `VirtReg`. - VirtReg** _args; + + //! Argument packs. + ArgPack* _args; //! \name Construction & Destruction //! \{ @@ -484,33 +513,47 @@ class FuncNode : public LabelNode { //! Returns function frame. inline const FuncFrame& frame() const noexcept { return _frame; } + //! Tests whether the function has a return value. + inline bool hasRet() const noexcept { return _funcDetail.hasRet(); } //! Returns arguments count. inline uint32_t argCount() const noexcept { return _funcDetail.argCount(); } - //! Returns returns count. - inline uint32_t retCount() const noexcept { return _funcDetail.retCount(); } - //! Returns arguments list. - inline VirtReg** args() const noexcept { return _args; } + //! Returns argument packs. + inline ArgPack* argPacks() const noexcept { return _args; } - //! Returns argument at `i`. - inline VirtReg* arg(uint32_t i) const noexcept { - ASMJIT_ASSERT(i < argCount()); - return _args[i]; + //! Returns argument pack at `argIndex`. + inline ArgPack& argPack(size_t argIndex) const noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + return _args[argIndex]; } - //! Sets argument at `i`. - inline void setArg(uint32_t i, VirtReg* vReg) noexcept { - ASMJIT_ASSERT(i < argCount()); - _args[i] = vReg; + //! Sets argument at `argIndex`. + inline void setArg(size_t argIndex, VirtReg* vReg) noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + _args[argIndex][0] = vReg; } - //! Resets argument at `i`. - inline void resetArg(uint32_t i) noexcept { - ASMJIT_ASSERT(i < argCount()); - _args[i] = nullptr; + //! Sets argument at `argIndex` and `valueIndex`. + inline void setArg(size_t argIndex, size_t valueIndex, VirtReg* vReg) noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + _args[argIndex][valueIndex] = vReg; } + //! Resets argument pack at `argIndex`. + inline void resetArg(size_t argIndex) noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + _args[argIndex].reset(); + } + + //! Resets argument pack at `argIndex`. + inline void resetArg(size_t argIndex, size_t valueIndex) noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + _args[argIndex][valueIndex] = nullptr; + } + + //! Returns function attributes. inline uint32_t attributes() const noexcept { return _frame.attributes(); } + //! Adds `attrs` to the function attributes. inline void addAttributes(uint32_t attrs) noexcept { _frame.addAttributes(attrs); } //! \} @@ -520,7 +563,7 @@ class FuncNode : public LabelNode { // [asmjit::FuncRetNode] // ============================================================================ -//! Function return (BaseCompiler). +//! Function return, used by \ref BaseCompiler. class FuncRetNode : public InstNode { public: ASMJIT_NONCOPYABLE(FuncRetNode) @@ -537,33 +580,60 @@ class FuncRetNode : public InstNode { }; // ============================================================================ -// [asmjit::FuncCallNode] +// [asmjit::InvokeNode] // ============================================================================ -//! Function call (BaseCompiler). -class FuncCallNode : public InstNode { +//! Function invocation, used by \ref BaseCompiler. +class InvokeNode : public InstNode { public: - ASMJIT_NONCOPYABLE(FuncCallNode) + ASMJIT_NONCOPYABLE(InvokeNode) + + //! Operand pack provides multiple operands that can be associated with a + //! single return value of function argument. Sometims this is necessary to + //! express an argument or return value that requires multiple registers, for + //! example 64-bit value in 32-bit mode or passing / returning homogenous data + //! structures. + struct OperandPack { + //! Operands. + Operand_ _data[Globals::kMaxValuePack]; + + //! Reset the pack by resetting all operands in the pack. + inline void reset() noexcept { + for (size_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) + _data[valueIndex].reset(); + } + + //! Returns an operand at the given `valueIndex`. + inline Operand& operator[](size_t valueIndex) noexcept { + ASMJIT_ASSERT(valueIndex < Globals::kMaxValuePack); + return _data[valueIndex].as(); + } + + //! Returns an operand at the given `valueIndex` (const). + const inline Operand& operator[](size_t valueIndex) const noexcept { + ASMJIT_ASSERT(valueIndex < Globals::kMaxValuePack); + return _data[valueIndex].as(); + } + }; //! Function detail. FuncDetail _funcDetail; - //! Returns. - Operand_ _rets[2]; - //! Arguments. - Operand_* _args; + //! Function return value(s). + OperandPack _rets; + //! Function arguments. + OperandPack* _args; //! \name Construction & Destruction //! \{ - //! Creates a new `FuncCallNode` instance. - inline FuncCallNode(BaseBuilder* cb, uint32_t instId, uint32_t options) noexcept + //! Creates a new `InvokeNode` instance. + inline InvokeNode(BaseBuilder* cb, uint32_t instId, uint32_t options) noexcept : InstNode(cb, instId, options, kBaseOpCapacity), _funcDetail(), _args(nullptr) { - setType(kNodeFuncCall); + setType(kNodeInvoke); _resetOps(); - _rets[0].reset(); - _rets[1].reset(); + _rets.reset(); addFlags(kFlagIsRemovable); } @@ -573,8 +643,8 @@ class FuncCallNode : public InstNode { //! \{ //! Sets the function signature. - inline Error setSignature(const FuncSignature& sign) noexcept { - return _funcDetail.init(sign); + inline Error init(const FuncSignature& signature, const Environment& environment) noexcept { + return _funcDetail.init(signature, environment); } //! Returns the function detail. @@ -587,45 +657,63 @@ class FuncCallNode : public InstNode { //! \overload inline const Operand& target() const noexcept { return _opArray[0].as(); } + //! Returns the number of function return values. + inline bool hasRet() const noexcept { return _funcDetail.hasRet(); } //! Returns the number of function arguments. inline uint32_t argCount() const noexcept { return _funcDetail.argCount(); } - //! Returns the number of function return values. - inline uint32_t retCount() const noexcept { return _funcDetail.retCount(); } - //! Returns the return value at `i`. - inline Operand& ret(uint32_t i = 0) noexcept { - ASMJIT_ASSERT(i < 2); - return _rets[i].as(); + //! Returns operand pack representing function return value(s). + inline OperandPack& retPack() noexcept { return _rets; } + //! Returns operand pack representing function return value(s). + inline const OperandPack& retPack() const noexcept { return _rets; } + + //! Returns the return value at the given `valueIndex`. + inline Operand& ret(size_t valueIndex = 0) noexcept { return _rets[valueIndex]; } + //! \overload + inline const Operand& ret(size_t valueIndex = 0) const noexcept { return _rets[valueIndex]; } + + //! Returns operand pack representing function return value(s). + inline OperandPack& argPack(size_t argIndex) noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + return _args[argIndex]; } //! \overload - inline const Operand& ret(uint32_t i = 0) const noexcept { - ASMJIT_ASSERT(i < 2); - return _rets[i].as(); + inline const OperandPack& argPack(size_t argIndex) const noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + return _args[argIndex]; } - //! Returns the function argument at `i`. - inline Operand& arg(uint32_t i) noexcept { - ASMJIT_ASSERT(i < kFuncArgCountLoHi); - return _args[i].as(); + //! Returns a function argument at the given `argIndex`. + inline Operand& arg(size_t argIndex, size_t valueIndex) noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + return _args[argIndex][valueIndex]; } //! \overload - inline const Operand& arg(uint32_t i) const noexcept { - ASMJIT_ASSERT(i < kFuncArgCountLoHi); - return _args[i].as(); + inline const Operand& arg(size_t argIndex, size_t valueIndex) const noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + return _args[argIndex][valueIndex]; } - //! Sets the function argument at `i` to `op`. - ASMJIT_API bool _setArg(uint32_t i, const Operand_& op) noexcept; //! Sets the function return value at `i` to `op`. - ASMJIT_API bool _setRet(uint32_t i, const Operand_& op) noexcept; + inline void _setRet(size_t valueIndex, const Operand_& op) noexcept { _rets[valueIndex] = op; } + //! Sets the function argument at `i` to `op`. + inline void _setArg(size_t argIndex, size_t valueIndex, const Operand_& op) noexcept { + ASMJIT_ASSERT(argIndex < argCount()); + _args[argIndex][valueIndex] = op; + } + + //! Sets the function return value at `valueIndex` to `reg`. + inline void setRet(size_t valueIndex, const BaseReg& reg) noexcept { _setRet(valueIndex, reg); } - //! Sets the function argument at `i` to `reg`. - inline bool setArg(uint32_t i, const BaseReg& reg) noexcept { return _setArg(i, reg); } - //! Sets the function argument at `i` to `imm`. - inline bool setArg(uint32_t i, const Imm& imm) noexcept { return _setArg(i, imm); } + //! Sets the first function argument in a value-pack at `argIndex` to `reg`. + inline void setArg(size_t argIndex, const BaseReg& reg) noexcept { _setArg(argIndex, 0, reg); } + //! Sets the first function argument in a value-pack at `argIndex` to `imm`. + inline void setArg(size_t argIndex, const Imm& imm) noexcept { _setArg(argIndex, 0, imm); } - //! Sets the function return value at `i` to `var`. - inline bool setRet(uint32_t i, const BaseReg& reg) noexcept { return _setRet(i, reg); } + //! Sets the function argument at `argIndex` and `valueIndex` to `reg`. + inline void setArg(size_t argIndex, size_t valueIndex, const BaseReg& reg) noexcept { _setArg(argIndex, valueIndex, reg); } + //! Sets the function argument at `argIndex` and `valueIndex` to `imm`. + inline void setArg(size_t argIndex, size_t valueIndex, const Imm& imm) noexcept { _setArg(argIndex, valueIndex, imm); } //! \} }; @@ -634,6 +722,7 @@ class FuncCallNode : public InstNode { // [asmjit::FuncPass] // ============================================================================ +//! Function pass extends \ref Pass with \ref FuncPass::runOnFunction(). class ASMJIT_VIRTAPI FuncPass : public Pass { public: ASMJIT_NONCOPYABLE(FuncPass) @@ -658,10 +747,10 @@ class ASMJIT_VIRTAPI FuncPass : public Pass { //! \{ //! Calls `runOnFunction()` on each `FuncNode` node found. - ASMJIT_API Error run(Zone* zone, Logger* logger) noexcept override; + ASMJIT_API Error run(Zone* zone, Logger* logger) override; //! Called once per `FuncNode`. - virtual Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) noexcept = 0; + virtual Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) = 0; //! \} }; diff --git a/libs/asmjit/src/asmjit/core/compilerdefs.h b/libs/asmjit/src/asmjit/core/compilerdefs.h new file mode 100644 index 0000000..32f0757 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/compilerdefs.h @@ -0,0 +1,170 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_COMPILERDEFS_H_INCLUDED +#define ASMJIT_CORE_COMPILERDEFS_H_INCLUDED + +#include "../core/api-config.h" +#include "../core/operand.h" +#include "../core/zonestring.h" + +ASMJIT_BEGIN_NAMESPACE + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +class RAWorkReg; + +//! \addtogroup asmjit_compiler +//! \{ + +// ============================================================================ +// [asmjit::VirtReg] +// ============================================================================ + +//! Virtual register data, managed by \ref BaseCompiler. +class VirtReg { +public: + ASMJIT_NONCOPYABLE(VirtReg) + + //! Virtual register id. + uint32_t _id = 0; + //! Virtual register info (signature). + RegInfo _info = {}; + //! Virtual register size (can be smaller than `regInfo._size`). + uint32_t _virtSize = 0; + //! Virtual register alignment (for spilling). + uint8_t _alignment = 0; + //! Type-id. + uint8_t _typeId = 0; + //! Virtual register weight for alloc/spill decisions. + uint8_t _weight = 1; + //! True if this is a fixed register, never reallocated. + uint8_t _isFixed : 1; + //! True if the virtual register is only used as a stack (never accessed as register). + uint8_t _isStack : 1; + uint8_t _reserved : 6; + + //! Virtual register name (user provided or automatically generated). + ZoneString<16> _name {}; + + // ------------------------------------------------------------------------- + // The following members are used exclusively by RAPass. They are initialized + // when the VirtReg is created to NULL pointers and then changed during RAPass + // execution. RAPass sets them back to NULL before it returns. + // ------------------------------------------------------------------------- + + //! Reference to `RAWorkReg`, used during register allocation. + RAWorkReg* _workReg = nullptr; + + //! \name Construction & Destruction + //! \{ + + inline VirtReg(uint32_t id, uint32_t signature, uint32_t virtSize, uint32_t alignment, uint32_t typeId) noexcept + : _id(id), + _info { signature }, + _virtSize(virtSize), + _alignment(uint8_t(alignment)), + _typeId(uint8_t(typeId)), + _isFixed(false), + _isStack(false), + _reserved(0) {} + + //! \} + + //! \name Accessors + //! \{ + + //! Returns the virtual register id. + inline uint32_t id() const noexcept { return _id; } + + //! Returns the virtual register name. + inline const char* name() const noexcept { return _name.data(); } + //! Returns the size of the virtual register name. + inline uint32_t nameSize() const noexcept { return _name.size(); } + + //! Returns a register information that wraps the register signature. + inline const RegInfo& info() const noexcept { return _info; } + //! Returns a virtual register type (maps to the physical register type as well). + inline uint32_t type() const noexcept { return _info.type(); } + //! Returns a virtual register group (maps to the physical register group as well). + inline uint32_t group() const noexcept { return _info.group(); } + + //! Returns a real size of the register this virtual register maps to. + //! + //! For example if this is a 128-bit SIMD register used for a scalar single + //! precision floating point value then its virtSize would be 4, however, the + //! `regSize` would still say 16 (128-bits), because it's the smallest size + //! of that register type. + inline uint32_t regSize() const noexcept { return _info.size(); } + + //! Returns a register signature of this virtual register. + inline uint32_t signature() const noexcept { return _info.signature(); } + + //! Returns the virtual register size. + //! + //! The virtual register size describes how many bytes the virtual register + //! needs to store its content. It can be smaller than the physical register + //! size, see `regSize()`. + inline uint32_t virtSize() const noexcept { return _virtSize; } + + //! Returns the virtual register alignment. + inline uint32_t alignment() const noexcept { return _alignment; } + + //! Returns the virtual register type id, see `Type::Id`. + inline uint32_t typeId() const noexcept { return _typeId; } + + //! Returns the virtual register weight - the register allocator can use it + //! as explicit hint for alloc/spill decisions. + inline uint32_t weight() const noexcept { return _weight; } + //! Sets the virtual register weight (0 to 255) - the register allocator can + //! use it as explicit hint for alloc/spill decisions and initial bin-packing. + inline void setWeight(uint32_t weight) noexcept { _weight = uint8_t(weight); } + + //! Returns whether the virtual register is always allocated to a fixed + //! physical register (and never reallocated). + //! + //! \note This is only used for special purposes and it's mostly internal. + inline bool isFixed() const noexcept { return bool(_isFixed); } + + //! Returns whether the virtual register is indeed a stack that only uses + //! the virtual register id for making it accessible. + //! + //! \note It's an error if a stack is accessed as a register. + inline bool isStack() const noexcept { return bool(_isStack); } + + inline bool hasWorkReg() const noexcept { return _workReg != nullptr; } + inline RAWorkReg* workReg() const noexcept { return _workReg; } + inline void setWorkReg(RAWorkReg* workReg) noexcept { _workReg = workReg; } + inline void resetWorkReg() noexcept { _workReg = nullptr; } + + //! \} +}; + +//! \} + +ASMJIT_END_NAMESPACE + +#endif // ASMJIT_CORE_COMPILERDEFS_H_INCLUDED + diff --git a/libs/asmjit/src/asmjit/core/constpool.cpp b/libs/asmjit/src/asmjit/core/constpool.cpp index 4db68e2..65c995b 100644 --- a/libs/asmjit/src/asmjit/core/constpool.cpp +++ b/libs/asmjit/src/asmjit/core/constpool.cpp @@ -258,7 +258,7 @@ UNIT(const_pool) { uint32_t i; uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 1000000; - INFO("Adding %u constants to the pool.", kCount); + INFO("Adding %u constants to the pool", kCount); { size_t prevOffset; size_t curOffset; @@ -278,7 +278,7 @@ UNIT(const_pool) { EXPECT(pool.alignment() == 8); } - INFO("Retrieving %u constants from the pool.", kCount); + INFO("Retrieving %u constants from the pool", kCount); { uint64_t c = 0x0101010101010101u; diff --git a/libs/asmjit/src/asmjit/core/constpool.h b/libs/asmjit/src/asmjit/core/constpool.h index 259615f..d9ac589 100644 --- a/libs/asmjit/src/asmjit/core/constpool.h +++ b/libs/asmjit/src/asmjit/core/constpool.h @@ -30,7 +30,7 @@ ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_core +//! \addtogroup asmjit_utilities //! \{ // ============================================================================ @@ -65,9 +65,12 @@ class ConstPool { //! Zone-allocated const-pool gap created by two differently aligned constants. struct Gap { - Gap* _next; //!< Pointer to the next gap - size_t _offset; //!< Offset of the gap. - size_t _size; //!< Remaining bytes of the gap (basically a gap size). + //! Pointer to the next gap + Gap* _next; + //! Offset of the gap. + size_t _offset; + //! Remaining bytes of the gap (basically a gap size). + size_t _size; }; //! Zone-allocated const-pool node. @@ -75,6 +78,11 @@ class ConstPool { public: ASMJIT_NONCOPYABLE(Node) + //! If this constant is shared with another. + uint32_t _shared : 1; + //! Data offset from the beginning of the pool. + uint32_t _offset; + inline Node(size_t offset, bool shared) noexcept : ZoneTreeNodeT(), _shared(shared), @@ -83,14 +91,13 @@ class ConstPool { inline void* data() const noexcept { return static_cast(const_cast(this) + 1); } - - uint32_t _shared : 1; //!< If this constant is shared with another. - uint32_t _offset; //!< Data offset from the beginning of the pool. }; //! Data comparer used internally. class Compare { public: + size_t _dataSize; + inline Compare(size_t dataSize) noexcept : _dataSize(dataSize) {} @@ -101,12 +108,17 @@ class ConstPool { inline int operator()(const Node& a, const void* data) const noexcept { return ::memcmp(a.data(), data, _dataSize); } - - size_t _dataSize; }; //! Zone-allocated const-pool tree. struct Tree { + //! RB tree. + ZoneTree _tree; + //! Size of the tree (number of nodes). + size_t _size; + //! Size of the data. + size_t _dataSize; + inline explicit Tree(size_t dataSize = 0) noexcept : _tree(), _size(0), @@ -177,13 +189,6 @@ class ConstPool { memcpy(node->data(), data, size); return node; } - - //! RB tree. - ZoneTree _tree; - //! Size of the tree (number of nodes). - size_t _size; - //! Size of the data. - size_t _dataSize; }; //! \endcond diff --git a/libs/asmjit/src/asmjit/core/cpuinfo.h b/libs/asmjit/src/asmjit/core/cpuinfo.h index d2defb9..83bb8c1 100644 --- a/libs/asmjit/src/asmjit/core/cpuinfo.h +++ b/libs/asmjit/src/asmjit/core/cpuinfo.h @@ -24,14 +24,14 @@ #ifndef ASMJIT_CORE_CPUINFO_H_INCLUDED #define ASMJIT_CORE_CPUINFO_H_INCLUDED -#include "../core/arch.h" +#include "../core/archtraits.h" #include "../core/features.h" #include "../core/globals.h" #include "../core/string.h" ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_support +//! \addtogroup asmjit_core //! \{ // ============================================================================ @@ -41,8 +41,12 @@ ASMJIT_BEGIN_NAMESPACE //! CPU information. class CpuInfo { public: - //! CPU architecture information. - ArchInfo _archInfo; + //! Architecture. + uint8_t _arch; + //! Sub-architecture. + uint8_t _subArch; + //! Reserved for future use. + uint16_t _reserved; //! CPU family ID. uint32_t _familyId; //! CPU model ID. @@ -74,15 +78,15 @@ class CpuInfo { inline CpuInfo(const CpuInfo& other) noexcept = default; inline explicit CpuInfo(Globals::NoInit_) noexcept - : _archInfo(Globals::NoInit), - _features(Globals::NoInit) {}; + : _features(Globals::NoInit) {}; //! Returns the host CPU information. ASMJIT_API static const CpuInfo& host() noexcept; - //! Initializes CpuInfo to the given architecture, see `ArchInfo`. - inline void initArch(uint32_t archId, uint32_t archMode = 0) noexcept { - _archInfo.init(archId, archMode); + //! Initializes CpuInfo to the given architecture, see \ref Environment. + inline void initArch(uint32_t arch, uint32_t subArch = 0u) noexcept { + _arch = uint8_t(arch); + _subArch = uint8_t(subArch); } inline void reset() noexcept { memset(this, 0, sizeof(*this)); } @@ -99,12 +103,10 @@ class CpuInfo { //! \name Accessors //! \{ - //! Returns the CPU architecture information. - inline const ArchInfo& archInfo() const noexcept { return _archInfo; } - //! Returns the CPU architecture id, see `ArchInfo::Id`. - inline uint32_t archId() const noexcept { return _archInfo.archId(); } - //! Returns the CPU architecture sub-id, see `ArchInfo::SubId`. - inline uint32_t archSubId() const noexcept { return _archInfo.archSubId(); } + //! Returns the CPU architecture id, see \ref Environment::Arch. + inline uint32_t arch() const noexcept { return _arch; } + //! Returns the CPU architecture sub-id, see \ref Environment::SubArch. + inline uint32_t subArch() const noexcept { return _subArch; } //! Returns the CPU family ID. inline uint32_t familyId() const noexcept { return _familyId; } diff --git a/libs/asmjit/src/asmjit/core/datatypes.h b/libs/asmjit/src/asmjit/core/datatypes.h index bee4572..2f6cc1e 100644 --- a/libs/asmjit/src/asmjit/core/datatypes.h +++ b/libs/asmjit/src/asmjit/core/datatypes.h @@ -26,17 +26,16 @@ #include "../core/globals.h" -ASMJIT_BEGIN_NAMESPACE +#ifndef ASMJIT_NO_DEPRECATED -//! \addtogroup asmjit_support -//! \{ +ASMJIT_BEGIN_NAMESPACE // ============================================================================ // [asmjit::Data64] // ============================================================================ //! 64-bit data useful for creating SIMD constants. -union Data64 { +union ASMJIT_DEPRECATED_STRUCT("Data64 is deprecated and will be removed in the future") Data64 { //! Array of eight 8-bit signed integers. int8_t sb[8]; //! Array of eight 8-bit unsigned integers. @@ -303,7 +302,7 @@ union Data64 { // ============================================================================ //! 128-bit data useful for creating SIMD constants. -union Data128 { +union ASMJIT_DEPRECATED_STRUCT("Data128 is deprecated and will be removed in the future") Data128 { //! Array of sixteen 8-bit signed integers. int8_t sb[16]; //! Array of sixteen 8-bit unsigned integers. @@ -648,7 +647,7 @@ union Data128 { // ============================================================================ //! 256-bit data useful for creating SIMD constants. -union Data256 { +union ASMJIT_DEPRECATED_STRUCT("Data256 is deprecated and will be removed in the future") Data256 { //! Array of thirty two 8-bit signed integers. int8_t sb[32]; //! Array of thirty two 8-bit unsigned integers. @@ -1066,8 +1065,7 @@ union Data256 { //! \} }; -//! \} - ASMJIT_END_NAMESPACE +#endif // !ASMJIT_NO_DEPRECATED #endif // ASMJIT_CORE_DATATYPES_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/emithelper.cpp b/libs/asmjit/src/asmjit/core/emithelper.cpp new file mode 100644 index 0000000..a77211e --- /dev/null +++ b/libs/asmjit/src/asmjit/core/emithelper.cpp @@ -0,0 +1,351 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#include "../core/archtraits.h" +#include "../core/emithelper_p.h" +#include "../core/formatter.h" +#include "../core/funcargscontext_p.h" +#include "../core/radefs_p.h" + +// Can be used for debugging... +// #define ASMJIT_DUMP_ARGS_ASSIGNMENT + +ASMJIT_BEGIN_NAMESPACE + +// ============================================================================ +// [asmjit::BaseEmitHelper - Formatting] +// ============================================================================ + +#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT +static void dumpFuncValue(String& sb, uint32_t arch, const FuncValue& value) noexcept { + Formatter::formatTypeId(sb, value.typeId()); + sb.append('@'); + + if (value.isIndirect()) + sb.append('['); + + if (value.isReg()) + Formatter::formatRegister(sb, 0, nullptr, arch, value.regType(), value.regId()); + else if (value.isStack()) + sb.appendFormat("[%d]", value.stackOffset()); + else + sb.append(""); + + if (value.isIndirect()) + sb.append(']'); +} + +static void dumpAssignment(String& sb, const FuncArgsContext& ctx) noexcept { + typedef FuncArgsContext::Var Var; + + uint32_t arch = ctx.arch(); + uint32_t varCount = ctx.varCount(); + + for (uint32_t i = 0; i < varCount; i++) { + const Var& var = ctx.var(i); + const FuncValue& dst = var.out; + const FuncValue& cur = var.cur; + + sb.appendFormat("Var%u: ", i); + dumpFuncValue(sb, arch, dst); + sb.append(" <- "); + dumpFuncValue(sb, arch, cur); + + if (var.isDone()) + sb.append(" {Done}"); + + sb.append('\n'); + } +} +#endif + +// ============================================================================ +// [asmjit::BaseEmitHelper - EmitArgsAssignment] +// ============================================================================ + +ASMJIT_FAVOR_SIZE Error BaseEmitHelper::emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args) { + typedef FuncArgsContext::Var Var; + typedef FuncArgsContext::WorkData WorkData; + + enum WorkFlags : uint32_t { + kWorkNone = 0x00, + kWorkDidSome = 0x01, + kWorkPending = 0x02, + kWorkPostponed = 0x04 + }; + + uint32_t arch = frame.arch(); + const ArchTraits& archTraits = ArchTraits::byArch(arch); + + RAConstraints constraints; + FuncArgsContext ctx; + + ASMJIT_PROPAGATE(constraints.init(arch)); + ASMJIT_PROPAGATE(ctx.initWorkData(frame, args, &constraints)); + +#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT + { + String sb; + dumpAssignment(sb, ctx); + printf("%s\n", sb.data()); + } +#endif + + uint32_t varCount = ctx._varCount; + WorkData* workData = ctx._workData; + + uint32_t saVarId = ctx._saVarId; + BaseReg sp = BaseReg::fromSignatureAndId(_emitter->_gpRegInfo.signature(), archTraits.spRegId()); + BaseReg sa = sp; + + if (frame.hasDynamicAlignment()) { + if (frame.hasPreservedFP()) + sa.setId(archTraits.fpRegId()); + else + sa.setId(saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId()); + } + + // -------------------------------------------------------------------------- + // Register to stack and stack to stack moves must be first as now we have + // the biggest chance of having as many as possible unassigned registers. + // -------------------------------------------------------------------------- + + if (ctx._stackDstMask) { + // Base address of all arguments passed by stack. + BaseMem baseArgPtr(sa, int32_t(frame.saOffset(sa.id()))); + BaseMem baseStackPtr(sp, 0); + + for (uint32_t varId = 0; varId < varCount; varId++) { + Var& var = ctx._vars[varId]; + + if (!var.out.isStack()) + continue; + + FuncValue& cur = var.cur; + FuncValue& out = var.out; + + ASMJIT_ASSERT(cur.isReg() || cur.isStack()); + BaseReg reg; + + BaseMem dstStackPtr = baseStackPtr.cloneAdjusted(out.stackOffset()); + BaseMem srcStackPtr = baseArgPtr.cloneAdjusted(cur.stackOffset()); + + if (cur.isIndirect()) { + if (cur.isStack()) { + // TODO: Indirect stack. + return DebugUtils::errored(kErrorInvalidAssignment); + } + else { + srcStackPtr.setBaseId(cur.regId()); + } + } + + if (cur.isReg() && !cur.isIndirect()) { + WorkData& wd = workData[archTraits.regTypeToGroup(cur.regType())]; + uint32_t rId = cur.regId(); + + reg.setSignatureAndId(archTraits.regTypeToSignature(cur.regType()), rId); + wd.unassign(varId, rId); + } + else { + // Stack to reg move - tricky since we move stack to stack we can decide which + // register to use. In general we follow the rule that IntToInt moves will use + // GP regs with possibility to signature or zero extend, and all other moves will + // either use GP or VEC regs depending on the size of the move. + RegInfo rInfo = getSuitableRegForMemToMemMove(arch, out.typeId(), cur.typeId()); + if (ASMJIT_UNLIKELY(!rInfo.isValid())) + return DebugUtils::errored(kErrorInvalidState); + + WorkData& wd = workData[rInfo.group()]; + uint32_t availableRegs = wd.availableRegs(); + if (ASMJIT_UNLIKELY(!availableRegs)) + return DebugUtils::errored(kErrorInvalidState); + + uint32_t rId = Support::ctz(availableRegs); + reg.setSignatureAndId(rInfo.signature(), rId); + + ASMJIT_PROPAGATE(emitArgMove(reg, out.typeId(), srcStackPtr, cur.typeId())); + } + + if (cur.isIndirect() && cur.isReg()) + workData[BaseReg::kGroupGp].unassign(varId, cur.regId()); + + // Register to stack move. + ASMJIT_PROPAGATE(emitRegMove(dstStackPtr, reg, cur.typeId())); + var.markDone(); + } + } + + // -------------------------------------------------------------------------- + // Shuffle all registers that are currently assigned accordingly to target + // assignment. + // -------------------------------------------------------------------------- + + uint32_t workFlags = kWorkNone; + for (;;) { + for (uint32_t varId = 0; varId < varCount; varId++) { + Var& var = ctx._vars[varId]; + if (var.isDone() || !var.cur.isReg()) + continue; + + FuncValue& cur = var.cur; + FuncValue& out = var.out; + + uint32_t curGroup = archTraits.regTypeToGroup(cur.regType()); + uint32_t outGroup = archTraits.regTypeToGroup(out.regType()); + + uint32_t curId = cur.regId(); + uint32_t outId = out.regId(); + + if (curGroup != outGroup) { + // TODO: Conversion is not supported. + return DebugUtils::errored(kErrorInvalidAssignment); + } + else { + WorkData& wd = workData[outGroup]; + if (!wd.isAssigned(outId)) { +EmitMove: + ASMJIT_PROPAGATE( + emitArgMove( + BaseReg::fromSignatureAndId(archTraits.regTypeToSignature(out.regType()), outId), out.typeId(), + BaseReg::fromSignatureAndId(archTraits.regTypeToSignature(cur.regType()), curId), cur.typeId())); + + wd.reassign(varId, outId, curId); + cur.initReg(out.regType(), outId, out.typeId()); + + if (outId == out.regId()) + var.markDone(); + workFlags |= kWorkDidSome | kWorkPending; + } + else { + uint32_t altId = wd._physToVarId[outId]; + Var& altVar = ctx._vars[altId]; + + if (!altVar.out.isInitialized() || (altVar.out.isReg() && altVar.out.regId() == curId)) { + // Only few architectures provide swap operations, and only for few register groups. + if (archTraits.hasSwap(curGroup)) { + uint32_t highestType = Support::max(cur.regType(), altVar.cur.regType()); + if (Support::isBetween(highestType, BaseReg::kTypeGp8Lo, BaseReg::kTypeGp16)) + highestType = BaseReg::kTypeGp32; + + uint32_t signature = archTraits.regTypeToSignature(highestType); + ASMJIT_PROPAGATE( + emitRegSwap(BaseReg::fromSignatureAndId(signature, outId), + BaseReg::fromSignatureAndId(signature, curId))); + wd.swap(varId, curId, altId, outId); + cur.setRegId(outId); + var.markDone(); + altVar.cur.setRegId(curId); + + if (altVar.out.isInitialized()) + altVar.markDone(); + workFlags |= kWorkDidSome; + } + else { + // If there is a scratch register it can be used to perform the swap. + uint32_t availableRegs = wd.availableRegs(); + if (availableRegs) { + uint32_t inOutRegs = wd.dstRegs(); + if (availableRegs & ~inOutRegs) + availableRegs &= ~inOutRegs; + outId = Support::ctz(availableRegs); + goto EmitMove; + } + else { + workFlags |= kWorkPending; + } + } + } + else { + workFlags |= kWorkPending; + } + } + } + } + + if (!(workFlags & kWorkPending)) + break; + + // If we did nothing twice it means that something is really broken. + if ((workFlags & (kWorkDidSome | kWorkPostponed)) == kWorkPostponed) + return DebugUtils::errored(kErrorInvalidState); + + workFlags = (workFlags & kWorkDidSome) ? kWorkNone : kWorkPostponed; + } + + // -------------------------------------------------------------------------- + // Load arguments passed by stack into registers. This is pretty simple and + // it never requires multiple iterations like the previous phase. + // -------------------------------------------------------------------------- + + if (ctx._hasStackSrc) { + uint32_t iterCount = 1; + if (frame.hasDynamicAlignment() && !frame.hasPreservedFP()) + sa.setId(saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId()); + + // Base address of all arguments passed by stack. + BaseMem baseArgPtr(sa, int32_t(frame.saOffset(sa.id()))); + + for (uint32_t iter = 0; iter < iterCount; iter++) { + for (uint32_t varId = 0; varId < varCount; varId++) { + Var& var = ctx._vars[varId]; + if (var.isDone()) + continue; + + if (var.cur.isStack()) { + ASMJIT_ASSERT(var.out.isReg()); + + uint32_t outId = var.out.regId(); + uint32_t outType = var.out.regType(); + + uint32_t group = archTraits.regTypeToGroup(outType); + WorkData& wd = ctx._workData[group]; + + if (outId == sa.id() && group == BaseReg::kGroupGp) { + // This register will be processed last as we still need `saRegId`. + if (iterCount == 1) { + iterCount++; + continue; + } + wd.unassign(wd._physToVarId[outId], outId); + } + + BaseReg dstReg = BaseReg::fromSignatureAndId(archTraits.regTypeToSignature(outType), outId); + BaseMem srcMem = baseArgPtr.cloneAdjusted(var.cur.stackOffset()); + + ASMJIT_PROPAGATE(emitArgMove( + dstReg, var.out.typeId(), + srcMem, var.cur.typeId())); + + wd.assign(varId, outId); + var.cur.initReg(outType, outId, var.cur.typeId(), FuncValue::kFlagIsDone); + } + } + } + } + + return kErrorOk; +} + +ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/emithelper_p.h b/libs/asmjit/src/asmjit/core/emithelper_p.h new file mode 100644 index 0000000..cb8ddf0 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/emithelper_p.h @@ -0,0 +1,83 @@ + +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_EMITHELPER_P_H_INCLUDED +#define ASMJIT_CORE_EMITHELPER_P_H_INCLUDED + +#include "../core/emitter.h" +#include "../core/operand.h" +#include "../core/type.h" + +ASMJIT_BEGIN_NAMESPACE + +//! \cond INTERNAL +//! \addtogroup asmjit_core +//! \{ + +// ============================================================================ +// [asmjit::BaseEmitHelper] +// ============================================================================ + +//! Helper class that provides utilities for each supported architecture. +class BaseEmitHelper { +public: + BaseEmitter* _emitter; + + inline explicit BaseEmitHelper(BaseEmitter* emitter = nullptr) noexcept + : _emitter(emitter) {} + + inline BaseEmitter* emitter() const noexcept { return _emitter; } + inline void setEmitter(BaseEmitter* emitter) noexcept { _emitter = emitter; } + + //! Emits a pure move operation between two registers or the same type or + //! between a register and its home slot. This function does not handle + //! register conversion. + virtual Error emitRegMove( + const Operand_& dst_, + const Operand_& src_, uint32_t typeId, const char* comment = nullptr) = 0; + + //! Emits swap between two registers. + virtual Error emitRegSwap( + const BaseReg& a, + const BaseReg& b, const char* comment = nullptr) = 0; + + //! Emits move from a function argument (either register or stack) to a register. + //! + //! This function can handle the necessary conversion from one argument to + //! another, and from one register type to another, if it's possible. Any + //! attempt of conversion that requires third register of a different group + //! (for example conversion from K to MMX on X86/X64) will fail. + virtual Error emitArgMove( + const BaseReg& dst_, uint32_t dstTypeId, + const Operand_& src_, uint32_t srcTypeId, const char* comment = nullptr) = 0; + + Error emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args); +}; + +//! \} +//! \endcond + +ASMJIT_END_NAMESPACE + +#endif // ASMJIT_CORE_EMITHELPER_P_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/emitter.cpp b/libs/asmjit/src/asmjit/core/emitter.cpp index ebf8c17..23ac3b3 100644 --- a/libs/asmjit/src/asmjit/core/emitter.cpp +++ b/libs/asmjit/src/asmjit/core/emitter.cpp @@ -22,17 +22,19 @@ // 3. This notice may not be removed or altered from any source distribution. #include "../core/api-build_p.h" -#include "../core/logging.h" +#include "../core/emitterutils_p.h" +#include "../core/errorhandler.h" +#include "../core/logger.h" #include "../core/support.h" #ifdef ASMJIT_BUILD_X86 - #include "../x86/x86internal_p.h" + #include "../x86/x86emithelper_p.h" #include "../x86/x86instdb_p.h" #endif // ASMJIT_BUILD_X86 #ifdef ASMJIT_BUILD_ARM - #include "../arm/arminternal_p.h" - #include "../arm/arminstdb.h" + #include "../arm/a64emithelper_p.h" + #include "../arm/a64instdb.h" #endif // ASMJIT_BUILD_ARM ASMJIT_BEGIN_NAMESPACE @@ -41,93 +43,197 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::BaseEmitter - Construction / Destruction] // ============================================================================ -BaseEmitter::BaseEmitter(uint32_t type) noexcept - : _type(uint8_t(type)), - _reserved(0), - _flags(0), - _emitterOptions(0), - _code(nullptr), - _errorHandler(nullptr), - _codeInfo(), - _gpRegInfo(), - _privateData(0), - _instOptions(0), - _globalInstOptions(BaseInst::kOptionReserved), - _extraReg(), - _inlineComment(nullptr) {} +BaseEmitter::BaseEmitter(uint32_t emitterType) noexcept + : _emitterType(uint8_t(emitterType)) {} BaseEmitter::~BaseEmitter() noexcept { if (_code) { - _addFlags(kFlagDestroyed); + _addEmitterFlags(kFlagDestroyed); _code->detach(this); } } // ============================================================================ -// [asmjit::BaseEmitter - Code-Generation] +// [asmjit::BaseEmitter - Finalize] // ============================================================================ -Error BaseEmitter::_emitOpArray(uint32_t instId, const Operand_* operands, size_t count) { - const Operand_* op = operands; - const Operand& none_ = Globals::none; - - switch (count) { - case 0: return _emit(instId, none_, none_, none_, none_); - case 1: return _emit(instId, op[0], none_, none_, none_); - case 2: return _emit(instId, op[0], op[1], none_, none_); - case 3: return _emit(instId, op[0], op[1], op[2], none_); - case 4: return _emit(instId, op[0], op[1], op[2], op[3]); - case 5: return _emit(instId, op[0], op[1], op[2], op[3], op[4], none_); - case 6: return _emit(instId, op[0], op[1], op[2], op[3], op[4], op[5]); - default: return DebugUtils::errored(kErrorInvalidArgument); - } +Error BaseEmitter::finalize() { + // Does nothing by default, overridden by `BaseBuilder` and `BaseCompiler`. + return kErrorOk; } // ============================================================================ -// [asmjit::BaseEmitter - Finalize] +// [asmjit::BaseEmitter - Internals] // ============================================================================ -Label BaseEmitter::labelByName(const char* name, size_t nameSize, uint32_t parentId) noexcept { - return Label(_code ? _code->labelIdByName(name, nameSize, parentId) : uint32_t(Globals::kInvalidId)); +static constexpr uint32_t kEmitterPreservedFlags = + BaseEmitter::kFlagOwnLogger | + BaseEmitter::kFlagOwnErrorHandler ; + +static ASMJIT_NOINLINE void BaseEmitter_updateForcedOptions(BaseEmitter* self) noexcept { + bool hasLogger = self->_logger != nullptr; + bool hasValidationOptions; + + if (self->emitterType() == BaseEmitter::kTypeAssembler) + hasValidationOptions = self->hasValidationOption(BaseEmitter::kValidationOptionAssembler); + else + hasValidationOptions = self->hasValidationOption(BaseEmitter::kValidationOptionIntermediate); + + self->_forcedInstOptions &= ~BaseInst::kOptionReserved; + if (hasLogger || hasValidationOptions) + self->_forcedInstOptions |= BaseInst::kOptionReserved; } // ============================================================================ -// [asmjit::BaseEmitter - Finalize] +// [asmjit::BaseEmitter - Validation Options] // ============================================================================ -Error BaseEmitter::finalize() { - // Does nothing by default, overridden by `BaseBuilder` and `BaseCompiler`. - return kErrorOk; +void BaseEmitter::addValidationOptions(uint32_t options) noexcept { + _validationOptions = uint8_t(_validationOptions | options); + BaseEmitter_updateForcedOptions(this); +} + +void BaseEmitter::clearValidationOptions(uint32_t options) noexcept { + _validationOptions = uint8_t(_validationOptions | options); + BaseEmitter_updateForcedOptions(this); +} + +// ============================================================================ +// [asmjit::BaseEmitter - Logging] +// ============================================================================ + +void BaseEmitter::setLogger(Logger* logger) noexcept { +#ifndef ASMJIT_NO_LOGGING + if (logger) { + _logger = logger; + _addEmitterFlags(kFlagOwnLogger); + } + else { + _logger = nullptr; + _clearEmitterFlags(kFlagOwnLogger); + if (_code) + _logger = _code->logger(); + } + BaseEmitter_updateForcedOptions(this); +#else + DebugUtils::unused(logger); +#endif } // ============================================================================ // [asmjit::BaseEmitter - Error Handling] // ============================================================================ -Error BaseEmitter::reportError(Error err, const char* message) { - ErrorHandler* handler = errorHandler(); - if (!handler) { - if (code()) - handler = code()->errorHandler(); +void BaseEmitter::setErrorHandler(ErrorHandler* errorHandler) noexcept { + if (errorHandler) { + _errorHandler = errorHandler; + _addEmitterFlags(kFlagOwnErrorHandler); } + else { + _errorHandler = nullptr; + _clearEmitterFlags(kFlagOwnErrorHandler); + if (_code) + _errorHandler = _code->errorHandler(); + } +} - if (handler) { +Error BaseEmitter::reportError(Error err, const char* message) { + ErrorHandler* eh = _errorHandler; + if (eh) { if (!message) message = DebugUtils::errorAsString(err); - handler->handleError(err, message, this); + eh->handleError(err, message, this); } - return err; } // ============================================================================ -// [asmjit::BaseEmitter - Label Management] +// [asmjit::BaseEmitter - Labels] // ============================================================================ +Label BaseEmitter::labelByName(const char* name, size_t nameSize, uint32_t parentId) noexcept { + return Label(_code ? _code->labelIdByName(name, nameSize, parentId) : uint32_t(Globals::kInvalidId)); +} + bool BaseEmitter::isLabelValid(uint32_t labelId) const noexcept { return _code && labelId < _code->labelCount(); } +// ============================================================================ +// [asmjit::BaseEmitter - Emit (Low-Level)] +// ============================================================================ + +using EmitterUtils::noExt; + +Error BaseEmitter::_emitI(uint32_t instId) { + return _emit(instId, noExt[0], noExt[1], noExt[2], noExt); +} + +Error BaseEmitter::_emitI(uint32_t instId, const Operand_& o0) { + return _emit(instId, o0, noExt[1], noExt[2], noExt); +} + +Error BaseEmitter::_emitI(uint32_t instId, const Operand_& o0, const Operand_& o1) { + return _emit(instId, o0, o1, noExt[2], noExt); +} + +Error BaseEmitter::_emitI(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2) { + return _emit(instId, o0, o1, o2, noExt); +} + +Error BaseEmitter::_emitI(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) { + Operand_ opExt[3] = { o3 }; + return _emit(instId, o0, o1, o2, opExt); +} + +Error BaseEmitter::_emitI(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4) { + Operand_ opExt[3] = { o3, o4 }; + return _emit(instId, o0, o1, o2, opExt); +} + +Error BaseEmitter::_emitI(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) { + Operand_ opExt[3] = { o3, o4, o5 }; + return _emit(instId, o0, o1, o2, opExt); +} + +Error BaseEmitter::_emitOpArray(uint32_t instId, const Operand_* operands, size_t opCount) { + const Operand_* op = operands; + + Operand_ opExt[3]; + + switch (opCount) { + case 0: + return _emit(instId, noExt[0], noExt[1], noExt[2], noExt); + + case 1: + return _emit(instId, op[0], noExt[1], noExt[2], noExt); + + case 2: + return _emit(instId, op[0], op[1], noExt[2], noExt); + + case 3: + return _emit(instId, op[0], op[1], op[2], noExt); + + case 4: + opExt[0] = op[3]; + opExt[1].reset(); + opExt[2].reset(); + return _emit(instId, op[0], op[1], op[2], opExt); + + case 5: + opExt[0] = op[3]; + opExt[1] = op[4]; + opExt[2].reset(); + return _emit(instId, op[0], op[1], op[2], opExt); + + case 6: + return _emit(instId, op[0], op[1], op[2], op + 3); + + default: + return DebugUtils::errored(kErrorInvalidArgument); + } +} + // ============================================================================ // [asmjit::BaseEmitter - Emit (High-Level)] // ============================================================================ @@ -137,13 +243,17 @@ ASMJIT_FAVOR_SIZE Error BaseEmitter::emitProlog(const FuncFrame& frame) { return DebugUtils::errored(kErrorNotInitialized); #ifdef ASMJIT_BUILD_X86 - if (archInfo().isX86Family()) - return x86::X86Internal::emitProlog(as(), frame); + if (environment().isFamilyX86()) { + x86::EmitHelper emitHelper(this, frame.isAvxEnabled()); + return emitHelper.emitProlog(frame); + } #endif #ifdef ASMJIT_BUILD_ARM - if (archInfo().isArmFamily()) - return arm::ArmInternal::emitProlog(as(), frame); + if (environment().isArchAArch64()) { + a64::EmitHelper emitHelper(this); + return emitHelper.emitProlog(frame); + } #endif return DebugUtils::errored(kErrorInvalidArch); @@ -154,13 +264,17 @@ ASMJIT_FAVOR_SIZE Error BaseEmitter::emitEpilog(const FuncFrame& frame) { return DebugUtils::errored(kErrorNotInitialized); #ifdef ASMJIT_BUILD_X86 - if (archInfo().isX86Family()) - return x86::X86Internal::emitEpilog(as(), frame); + if (environment().isFamilyX86()) { + x86::EmitHelper emitHelper(this, frame.isAvxEnabled()); + return emitHelper.emitEpilog(frame); + } #endif #ifdef ASMJIT_BUILD_ARM - if (archInfo().isArmFamily()) - return arm::ArmInternal::emitEpilog(as(), frame); + if (environment().isArchAArch64()) { + a64::EmitHelper emitHelper(this); + return emitHelper.emitEpilog(frame); + } #endif return DebugUtils::errored(kErrorInvalidArch); @@ -171,13 +285,17 @@ ASMJIT_FAVOR_SIZE Error BaseEmitter::emitArgsAssignment(const FuncFrame& frame, return DebugUtils::errored(kErrorNotInitialized); #ifdef ASMJIT_BUILD_X86 - if (archInfo().isX86Family()) - return x86::X86Internal::emitArgsAssignment(as(), frame, args); + if (environment().isFamilyX86()) { + x86::EmitHelper emitHelper(this, frame.isAvxEnabled()); + return emitHelper.emitArgsAssignment(frame, args); + } #endif #ifdef ASMJIT_BUILD_ARM - if (archInfo().isArmFamily()) - return arm::ArmInternal::emitArgsAssignment(as(), frame, args); + if (environment().isArchAArch64()) { + a64::EmitHelper emitHelper(this); + return emitHelper.emitArgsAssignment(frame, args); + } #endif return DebugUtils::errored(kErrorInvalidArch); @@ -192,17 +310,11 @@ Error BaseEmitter::commentf(const char* fmt, ...) { return DebugUtils::errored(kErrorNotInitialized); #ifndef ASMJIT_NO_LOGGING - StringTmp<1024> sb; - va_list ap; va_start(ap, fmt); - Error err = sb.appendVFormat(fmt, ap); + Error err = commentv(fmt, ap); va_end(ap); - - if (ASMJIT_UNLIKELY(err)) - return err; - - return comment(sb.data(), sb.size()); + return err; #else DebugUtils::unused(fmt); return kErrorOk; @@ -215,8 +327,8 @@ Error BaseEmitter::commentv(const char* fmt, va_list ap) { #ifndef ASMJIT_NO_LOGGING StringTmp<1024> sb; - Error err = sb.appendVFormat(fmt, ap); + if (ASMJIT_UNLIKELY(err)) return err; @@ -233,40 +345,50 @@ Error BaseEmitter::commentv(const char* fmt, va_list ap) { Error BaseEmitter::onAttach(CodeHolder* code) noexcept { _code = code; - _codeInfo = code->codeInfo(); - _emitterOptions = code->emitterOptions(); + _environment = code->environment(); + + const ArchTraits& archTraits = ArchTraits::byArch(code->arch()); + uint32_t nativeRegType = Environment::is32Bit(code->arch()) ? BaseReg::kTypeGp32 : BaseReg::kTypeGp64; + _gpRegInfo.setSignature(archTraits._regInfo[nativeRegType].signature()); - onUpdateGlobalInstOptions(); + onSettingsUpdated(); return kErrorOk; } Error BaseEmitter::onDetach(CodeHolder* code) noexcept { DebugUtils::unused(code); - _flags = 0; - _emitterOptions = 0; - _errorHandler = nullptr; + _clearEmitterFlags(~kEmitterPreservedFlags); + _forcedInstOptions = BaseInst::kOptionReserved; + _privateData = 0; - _codeInfo.reset(); + if (!hasOwnLogger()) + _logger = nullptr; + + if (!hasOwnErrorHandler()) + _errorHandler = nullptr; + + _environment.reset(); _gpRegInfo.reset(); - _privateData = 0; _instOptions = 0; - _globalInstOptions = BaseInst::kOptionReserved; _extraReg.reset(); _inlineComment = nullptr; return kErrorOk; } -void BaseEmitter::onUpdateGlobalInstOptions() noexcept { - constexpr uint32_t kCriticalEmitterOptions = - kOptionLoggingEnabled | - kOptionStrictValidation ; +void BaseEmitter::onSettingsUpdated() noexcept { + // Only called when attached to CodeHolder by CodeHolder. + ASMJIT_ASSERT(_code != nullptr); + + if (!hasOwnLogger()) + _logger = _code->logger(); + + if (!hasOwnErrorHandler()) + _errorHandler = _code->errorHandler(); - _globalInstOptions &= ~BaseInst::kOptionReserved; - if ((_emitterOptions & kCriticalEmitterOptions) != 0) - _globalInstOptions |= BaseInst::kOptionReserved; + BaseEmitter_updateForcedOptions(this); } ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/emitter.h b/libs/asmjit/src/asmjit/core/emitter.h index 5855584..51651fd 100644 --- a/libs/asmjit/src/asmjit/core/emitter.h +++ b/libs/asmjit/src/asmjit/core/emitter.h @@ -24,10 +24,11 @@ #ifndef ASMJIT_CORE_EMITTER_H_INCLUDED #define ASMJIT_CORE_EMITTER_H_INCLUDED -#include "../core/arch.h" +#include "../core/archtraits.h" +#include "../core/codeholder.h" #include "../core/inst.h" #include "../core/operand.h" -#include "../core/codeholder.h" +#include "../core/type.h" ASMJIT_BEGIN_NAMESPACE @@ -52,75 +53,77 @@ class ASMJIT_VIRTAPI BaseEmitter { public: ASMJIT_BASE_CLASS(BaseEmitter) - //! See `EmitterType`. - uint8_t _type; - //! Reserved for future use. - uint8_t _reserved; - //! See \ref BaseEmitter::Flags. - uint16_t _flags; - //! Emitter options, always in sync with CodeHolder. - uint32_t _emitterOptions; - - //! CodeHolder the BaseEmitter is attached to. - CodeHolder* _code; - //! Attached `ErrorHandler`. - ErrorHandler* _errorHandler; - - //! Basic information about the code (matches CodeHolder::_codeInfo). - CodeInfo _codeInfo; - //! Native GP register signature and signature related information. - RegInfo _gpRegInfo; + //! See \ref EmitterType. + uint8_t _emitterType = 0; + //! See \ref BaseEmitter::EmitterFlags. + uint8_t _emitterFlags = 0; + //! Validation flags in case validation is used, see \ref InstAPI::ValidationFlags. + //! + //! \note Validation flags are specific to the emitter and they are setup at + //! construction time and then never changed. + uint8_t _validationFlags = 0; + //! Validation options, see \ref ValidationOptions. + uint8_t _validationOptions = 0; + + //! Encoding options, see \ref EncodingOptions. + uint32_t _encodingOptions = 0; + + //! Forced instruction options, combined with \ref _instOptions by \ref emit(). + uint32_t _forcedInstOptions = BaseInst::kOptionReserved; //! Internal private data used freely by any emitter. - uint32_t _privateData; + uint32_t _privateData = 0; + + //! CodeHolder the emitter is attached to. + CodeHolder* _code = nullptr; + //! Attached \ref Logger. + Logger* _logger = nullptr; + //! Attached \ref ErrorHandler. + ErrorHandler* _errorHandler = nullptr; + + //! Describes the target environment, matches \ref CodeHolder::environment(). + Environment _environment {}; + //! Native GP register signature and signature related information. + RegInfo _gpRegInfo {}; //! Next instruction options (affects the next instruction). - uint32_t _instOptions; - //! Global Instruction options (combined with `_instOptions` by `emit...()`). - uint32_t _globalInstOptions; + uint32_t _instOptions = 0; //! Extra register (op-mask {k} on AVX-512) (affects the next instruction). - RegOnly _extraReg; + RegOnly _extraReg {}; //! Inline comment of the next instruction (affects the next instruction). - const char* _inlineComment; + const char* _inlineComment = nullptr; //! Emitter type. enum EmitterType : uint32_t { //! Unknown or uninitialized. kTypeNone = 0, - //! Emitter inherits from `BaseAssembler`. + //! Emitter inherits from \ref BaseAssembler. kTypeAssembler = 1, - //! Emitter inherits from `BaseBuilder`. + //! Emitter inherits from \ref BaseBuilder. kTypeBuilder = 2, - //! Emitter inherits from `BaseCompiler`. + //! Emitter inherits from \ref BaseCompiler. kTypeCompiler = 3, + //! Count of emitter types. kTypeCount = 4 }; //! Emitter flags. - enum Flags : uint32_t { + enum EmitterFlags : uint32_t { + //! The emitter has its own \ref Logger (not propagated from \ref CodeHolder). + kFlagOwnLogger = 0x10u, + //! The emitter has its own \ref ErrorHandler (not propagated from \ref CodeHolder). + kFlagOwnErrorHandler = 0x20u, //! The emitter was finalized. - kFlagFinalized = 0x4000u, + kFlagFinalized = 0x40u, //! The emitter was destroyed. - kFlagDestroyed = 0x8000u + kFlagDestroyed = 0x80u }; - //! Emitter options. - enum Options : uint32_t { - //! Logging is enabled, `BaseEmitter::logger()` must return a valid logger. - //! This option is set automatically by the emitter if the logger is present. - //! User code should never alter this value. - //! - //! Default `false`. - kOptionLoggingEnabled = 0x00000001u, - - //! Stricly validate each instruction before it's emitted. - //! - //! Default `false`. - kOptionStrictValidation = 0x00000002u, - + //! Encoding options. + enum EncodingOptions : uint32_t { //! Emit instructions that are optimized for size, if possible. //! - //! Default `false`. + //! Default: false. //! //! X86 Specific //! ------------ @@ -130,11 +133,11 @@ class ASMJIT_VIRTAPI BaseEmitter { //! by taking advantage of implicit zero extension. For example instruction //! like `mov r64, imm` and `and r64, imm` can be translated to `mov r32, imm` //! and `and r32, imm` when the immediate constant is lesser than `2^31`. - kOptionOptimizedForSize = 0x00000004u, + kEncodingOptionOptimizeForSize = 0x00000001u, //! Emit optimized code-alignment sequences. //! - //! Default `false`. + //! Default: false. //! //! X86 Specific //! ------------ @@ -144,11 +147,11 @@ class ASMJIT_VIRTAPI BaseEmitter { //! more optimized align sequences for 2-11 bytes that may execute faster //! on certain CPUs. If this feature is enabled AsmJit will generate //! specialized sequences for alignment between 2 to 11 bytes. - kOptionOptimizedAlign = 0x00000008u, + kEncodingOptionOptimizedAlign = 0x00000002u, //! Emit jump-prediction hints. //! - //! Default `false`. + //! Default: false. //! //! X86 Specific //! ------------ @@ -163,14 +166,56 @@ class ASMJIT_VIRTAPI BaseEmitter { //! This feature is disabled by default, because the only processor that //! used to take into consideration prediction hints was P4. Newer processors //! implement heuristics for branch prediction and ignore static hints. This - //! means that this feature can be used for annotation purposes. - kOptionPredictedJumps = 0x00000010u + //! means that this feature can be only used for annotation purposes. + kEncodingOptionPredictedJumps = 0x00000010u + }; + +#ifndef ASMJIT_NO_DEPRECATED + enum EmitterOptions : uint32_t { + kOptionOptimizedForSize = kEncodingOptionOptimizeForSize, + kOptionOptimizedAlign = kEncodingOptionOptimizedAlign, + kOptionPredictedJumps = kEncodingOptionPredictedJumps + }; +#endif + + //! Validation options are used to tell emitters to perform strict validation + //! of instructions passed to \ref emit(). + //! + //! \ref BaseAssembler implementation perform by default only basic checks + //! that are necessary to identify all variations of an instruction so the + //! correct encoding can be selected. This is fine for production-ready code + //! as the assembler doesn't have to perform checks that would slow it down. + //! However, sometimes these checks are beneficial especially when the project + //! that uses AsmJit is in a development phase, in which mistakes happen often. + //! To make the experience of using AsmJit seamless it offers validation + //! features that can be controlled by `ValidationOptions`. + enum ValidationOptions : uint32_t { + //! Perform strict validation in \ref BaseAssembler::emit() implementations. + //! + //! This flag ensures that each instruction is checked before it's encoded + //! into a binary representation. This flag is only relevant for \ref + //! BaseAssembler implementations, but can be set in any other emitter type, + //! in that case if that emitter needs to create an assembler on its own, + //! for the purpose of \ref finalize() it would propagate this flag to such + //! assembler so all instructions passed to it are explicitly validated. + //! + //! Default: false. + kValidationOptionAssembler = 0x00000001u, + + //! Perform strict validation in \ref BaseBuilder::emit() and \ref + //! BaseCompiler::emit() implementations. + //! + //! This flag ensures that each instruction is checked before an \ref + //! InstNode representing the instruction is created by Builder or Compiler. + //! + //! Default: false. + kValidationOptionIntermediate = 0x00000002u }; //! \name Construction & Destruction //! \{ - ASMJIT_API explicit BaseEmitter(uint32_t type) noexcept; + ASMJIT_API explicit BaseEmitter(uint32_t emitterType) noexcept; ASMJIT_API virtual ~BaseEmitter() noexcept; //! \} @@ -190,28 +235,28 @@ class ASMJIT_VIRTAPI BaseEmitter { //! \{ //! Returns the type of this emitter, see `EmitterType`. - inline uint32_t emitterType() const noexcept { return _type; } + inline uint32_t emitterType() const noexcept { return _emitterType; } //! Returns emitter flags , see `Flags`. - inline uint32_t emitterFlags() const noexcept { return _flags; } + inline uint32_t emitterFlags() const noexcept { return _emitterFlags; } //! Tests whether the emitter inherits from `BaseAssembler`. - inline bool isAssembler() const noexcept { return _type == kTypeAssembler; } + inline bool isAssembler() const noexcept { return _emitterType == kTypeAssembler; } //! Tests whether the emitter inherits from `BaseBuilder`. //! //! \note Both Builder and Compiler emitters would return `true`. - inline bool isBuilder() const noexcept { return _type >= kTypeBuilder; } + inline bool isBuilder() const noexcept { return _emitterType >= kTypeBuilder; } //! Tests whether the emitter inherits from `BaseCompiler`. - inline bool isCompiler() const noexcept { return _type == kTypeCompiler; } + inline bool isCompiler() const noexcept { return _emitterType == kTypeCompiler; } //! Tests whether the emitter has the given `flag` enabled. - inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; } + inline bool hasEmitterFlag(uint32_t flag) const noexcept { return (_emitterFlags & flag) != 0; } //! Tests whether the emitter is finalized. - inline bool isFinalized() const noexcept { return hasFlag(kFlagFinalized); } + inline bool isFinalized() const noexcept { return hasEmitterFlag(kFlagFinalized); } //! Tests whether the emitter is destroyed (only used during destruction). - inline bool isDestroyed() const noexcept { return hasFlag(kFlagDestroyed); } + inline bool isDestroyed() const noexcept { return hasEmitterFlag(kFlagDestroyed); } - inline void _addFlags(uint32_t flags) noexcept { _flags = uint16_t(_flags | flags); } - inline void _clearFlags(uint32_t flags) noexcept { _flags = uint16_t(_flags & ~flags); } + inline void _addEmitterFlags(uint32_t flags) noexcept { _emitterFlags = uint8_t(_emitterFlags | flags); } + inline void _clearEmitterFlags(uint32_t flags) noexcept { _emitterFlags = uint8_t(_emitterFlags & ~flags); } //! \} @@ -220,90 +265,187 @@ class ASMJIT_VIRTAPI BaseEmitter { //! Returns the CodeHolder this emitter is attached to. inline CodeHolder* code() const noexcept { return _code; } - //! Returns an information about the code, see `CodeInfo`. - inline const CodeInfo& codeInfo() const noexcept { return _codeInfo; } - //! Returns an information about the architecture, see `ArchInfo`. - inline const ArchInfo& archInfo() const noexcept { return _codeInfo.archInfo(); } + + //! Returns the target environment, see \ref Environment. + //! + //! The returned \ref Environment reference matches \ref CodeHolder::environment(). + inline const Environment& environment() const noexcept { return _environment; } //! Tests whether the target architecture is 32-bit. - inline bool is32Bit() const noexcept { return archInfo().is32Bit(); } + inline bool is32Bit() const noexcept { return environment().is32Bit(); } //! Tests whether the target architecture is 64-bit. - inline bool is64Bit() const noexcept { return archInfo().is64Bit(); } + inline bool is64Bit() const noexcept { return environment().is64Bit(); } //! Returns the target architecture type. - inline uint32_t archId() const noexcept { return archInfo().archId(); } + inline uint32_t arch() const noexcept { return environment().arch(); } //! Returns the target architecture sub-type. - inline uint32_t archSubId() const noexcept { return archInfo().archSubId(); } + inline uint32_t subArch() const noexcept { return environment().subArch(); } + //! Returns the target architecture's GP register size (4 or 8 bytes). - inline uint32_t gpSize() const noexcept { return archInfo().gpSize(); } - //! Returns the number of target GP registers. - inline uint32_t gpCount() const noexcept { return archInfo().gpCount(); } + inline uint32_t registerSize() const noexcept { return environment().registerSize(); } //! \} //! \name Initialization & Finalization //! \{ - //! Tests whether the BaseEmitter is initialized (i.e. attached to the `CodeHolder`). + //! Tests whether the emitter is initialized (i.e. attached to \ref CodeHolder). inline bool isInitialized() const noexcept { return _code != nullptr; } + //! Finalizes this emitter. + //! + //! Materializes the content of the emitter by serializing it to the attached + //! \ref CodeHolder through an architecture specific \ref BaseAssembler. This + //! function won't do anything if the emitter inherits from \ref BaseAssembler + //! as assemblers emit directly to a \ref CodeBuffer held by \ref CodeHolder. + //! However, if this is an emitter that inherits from \ref BaseBuilder or \ref + //! BaseCompiler then these emitters need the materialization phase as they + //! store their content in a representation not visible to \ref CodeHolder. ASMJIT_API virtual Error finalize(); //! \} - //! \name Emitter Options + //! \name Logging //! \{ - //! Tests whether the `option` is present in emitter options. - inline bool hasEmitterOption(uint32_t option) const noexcept { return (_emitterOptions & option) != 0; } - //! Returns the emitter options. - inline uint32_t emitterOptions() const noexcept { return _emitterOptions; } + //! Tests whether the emitter has a logger. + inline bool hasLogger() const noexcept { return _logger != nullptr; } - // TODO: Deprecate and remove, CodeHolder::addEmitterOptions() is the way. - inline void addEmitterOptions(uint32_t options) noexcept { - _emitterOptions |= options; - onUpdateGlobalInstOptions(); - } + //! Tests whether the emitter has its own logger. + //! + //! Own logger means that it overrides the possible logger that may be used + //! by \ref CodeHolder this emitter is attached to. + inline bool hasOwnLogger() const noexcept { return hasEmitterFlag(kFlagOwnLogger); } - inline void clearEmitterOptions(uint32_t options) noexcept { - _emitterOptions &= ~options; - onUpdateGlobalInstOptions(); - } + //! Returns the logger this emitter uses. + //! + //! The returned logger is either the emitter's own logger or it's logger + //! used by \ref CodeHolder this emitter is attached to. + inline Logger* logger() const noexcept { return _logger; } + + //! Sets or resets the logger of the emitter. + //! + //! If the `logger` argument is non-null then the logger will be considered + //! emitter's own logger, see \ref hasOwnLogger() for more details. If the + //! given `logger` is null then the emitter will automatically use logger + //! that is attached to the \ref CodeHolder this emitter is attached to. + ASMJIT_API void setLogger(Logger* logger) noexcept; - //! Returns the global instruction options. + //! Resets the logger of this emitter. //! - //! Default instruction options are merged with instruction options before the - //! instruction is encoded. These options have some bits reserved that are used - //! for error handling, logging, and strict validation. Other options are globals that - //! affect each instruction, for example if VEX3 is set globally, it will all - //! instructions, even those that don't have such option set. - inline uint32_t globalInstOptions() const noexcept { return _globalInstOptions; } + //! The emitter will bail to using a logger attached to \ref CodeHolder this + //! emitter is attached to, or no logger at all if \ref CodeHolder doesn't + //! have one. + inline void resetLogger() noexcept { return setLogger(nullptr); } //! \} //! \name Error Handling //! \{ - //! Tests whether the local error handler is attached. + //! Tests whether the emitter has an error handler attached. inline bool hasErrorHandler() const noexcept { return _errorHandler != nullptr; } - //! Returns the local error handler. + + //! Tests whether the emitter has its own error handler. + //! + //! Own error handler means that it overrides the possible error handler that + //! may be used by \ref CodeHolder this emitter is attached to. + inline bool hasOwnErrorHandler() const noexcept { return hasEmitterFlag(kFlagOwnErrorHandler); } + + //! Returns the error handler this emitter uses. + //! + //! The returned error handler is either the emitter's own error handler or + //! it's error handler used by \ref CodeHolder this emitter is attached to. inline ErrorHandler* errorHandler() const noexcept { return _errorHandler; } - //! Sets the local error handler. - inline void setErrorHandler(ErrorHandler* handler) noexcept { _errorHandler = handler; } - //! Resets the local error handler (does nothing if not attached). + + //! Sets or resets the error handler of the emitter. + ASMJIT_API void setErrorHandler(ErrorHandler* errorHandler) noexcept; + + //! Resets the error handler. inline void resetErrorHandler() noexcept { setErrorHandler(nullptr); } //! Handles the given error in the following way: - //! 1. Gets either Emitter's (preferred) or CodeHolder's ErrorHandler. - //! 2. If exists, calls `ErrorHandler::handleError(error, message, this)`. - //! 3. Returns the given `err` if ErrorHandler haven't thrown. + //! 1. If the emitter has \ref ErrorHandler attached, it calls its + //! \ref ErrorHandler::handleError() member function first, and + //! then returns the error. The `handleError()` function may throw. + //! 2. if the emitter doesn't have \ref ErrorHandler, the error is + //! simply returned. ASMJIT_API Error reportError(Error err, const char* message = nullptr); //! \} + //! \name Encoding Options + //! \{ + + //! Returns encoding options, see \ref EncodingOptions. + inline uint32_t encodingOptions() const noexcept { return _encodingOptions; } + //! Tests whether the encoding `option` is set. + inline bool hasEncodingOption(uint32_t option) const noexcept { return (_encodingOptions & option) != 0; } + + //! Enables the given encoding `options`, see \ref EncodingOptions. + inline void addEncodingOptions(uint32_t options) noexcept { _encodingOptions |= options; } + //! Disables the given encoding `options`, see \ref EncodingOptions. + inline void clearEncodingOptions(uint32_t options) noexcept { _encodingOptions &= ~options; } + + //! \} + + //! \name Validation Options + //! \{ + + //! Returns the emitter's validation options, see \ref ValidationOptions. + inline uint32_t validationOptions() const noexcept { + return _validationOptions; + } + + //! Tests whether the given `option` is present in validation options. + inline bool hasValidationOption(uint32_t option) const noexcept { + return (_validationOptions & option) != 0; + } + + //! Activates the given validation `options`, see \ref ValidationOptions. + //! + //! This function is used to activate explicit validation options that will + //! be then used by all emitter implementations. There are in general two + //! possibilities: + //! + //! - Architecture specific assembler is used. In this case a + //! \ref kValidationOptionAssembler can be used to turn on explicit + //! validation that will be used before an instruction is emitted. + //! This means that internally an extra step will be performed to + //! make sure that the instruction is correct. This is needed, because + //! by default assemblers prefer speed over strictness. + //! + //! This option should be used in debug builds as it's pretty expensive. + //! + //! - Architecture specific builder or compiler is used. In this case + //! the user can turn on \ref kValidationOptionIntermediate option + //! that adds explicit validation step before the Builder or Compiler + //! creates an \ref InstNode to represent an emitted instruction. Error + //! will be returned if the instruction is ill-formed. In addition, + //! also \ref kValidationOptionAssembler can be used, which would not be + //! consumed by Builder / Compiler directly, but it would be propagated + //! to an architecture specific \ref BaseAssembler implementation it + //! creates during \ref BaseEmitter::finalize(). + ASMJIT_API void addValidationOptions(uint32_t options) noexcept; + + //! Deactivates the given validation `options`. + //! + //! See \ref addValidationOptions() and \ref ValidationOptions for more details. + ASMJIT_API void clearValidationOptions(uint32_t options) noexcept; + + //! \} + //! \name Instruction Options //! \{ + //! Returns forced instruction options. + //! + //! Forced instruction options are merged with next instruction options before + //! the instruction is encoded. These options have some bits reserved that are + //! used by error handling, logging, and instruction validation purposes. Other + //! options are globals that affect each instruction. + inline uint32_t forcedInstOptions() const noexcept { return _forcedInstOptions; } + //! Returns options of the next instruction. inline uint32_t instOptions() const noexcept { return _instOptions; } //! Returns options of the next instruction. @@ -352,6 +494,11 @@ class ASMJIT_VIRTAPI BaseEmitter { //! Creates a new named label. virtual Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, uint32_t type = Label::kTypeGlobal, uint32_t parentId = Globals::kInvalidId) = 0; + //! Creates a new external label. + inline Label newExternalLabel(const char* name, size_t nameSize = SIZE_MAX) { + return newNamedLabel(name, nameSize, Label::kTypeExternal); + } + //! Returns `Label` by `name`. //! //! Returns invalid Label in case that the name is invalid or label was not found. @@ -377,100 +524,49 @@ class ASMJIT_VIRTAPI BaseEmitter { // NOTE: These `emit()` helpers are designed to address a code-bloat generated // by C++ compilers to call a function having many arguments. Each parameter to - // `_emit()` requires some code to pass it, which means that if we default to 4 - // operand parameters in `_emit()` and instId the C++ compiler would have to - // generate a virtual function call having 5 parameters, which is quite a lot. - // Since by default asm instructions have 2 to 3 operands it's better to - // introduce helpers that pass those and fill out the remaining operands. - - #define OP const Operand_& - #define NONE Globals::none - - //! Emits an instruction. - ASMJIT_NOINLINE Error emit(uint32_t instId) { return _emit(instId, NONE, NONE, NONE, NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0) { return _emit(instId, o0, NONE, NONE, NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1) { return _emit(instId, o0, o1, NONE, NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2) { return _emit(instId, o0, o1, o2, NONE); } - //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3) { return _emit(instId, o0, o1, o2, o3); } - //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4) { return _emit(instId, o0, o1, o2, o3, o4, NONE); } - //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, OP o5) { return _emit(instId, o0, o1, o2, o3, o4, o5); } - - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, int o0) { return _emit(instId, Imm(o0), NONE, NONE, NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, int o1) { return _emit(instId, o0, Imm(o1), NONE, NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, int o2) { return _emit(instId, o0, o1, Imm(o2), NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, int o3) { return _emit(instId, o0, o1, o2, Imm(o3)); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, int o4) { return _emit(instId, o0, o1, o2, o3, Imm(o4), NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, int o5) { return _emit(instId, o0, o1, o2, o3, o4, Imm(o5)); } - - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, int64_t o0) { return _emit(instId, Imm(o0), NONE, NONE, NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, int64_t o1) { return _emit(instId, o0, Imm(o1), NONE, NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, int64_t o2) { return _emit(instId, o0, o1, Imm(o2), NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, int64_t o3) { return _emit(instId, o0, o1, o2, Imm(o3)); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, int64_t o4) { return _emit(instId, o0, o1, o2, o3, Imm(o4), NONE); } - //! \overload - ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, int64_t o5) { return _emit(instId, o0, o1, o2, o3, o4, Imm(o5)); } - - //! \overload - inline Error emit(uint32_t instId, unsigned int o0) { return emit(instId, int64_t(o0)); } + // `_emit()` requires some code to pass it, which means that if we default to + // 5 arguments in `_emit()` and instId the C++ compiler would have to generate + // a virtual function call having 5 parameters and additional `this` argument, + // which is quite a lot. Since by default most instructions have 2 to 3 operands + // it's better to introduce helpers that pass from 0 to 6 operands that help to + // reduce the size of emit(...) function call. + + //! Emits an instruction (internal). + ASMJIT_API Error _emitI(uint32_t instId); //! \overload - inline Error emit(uint32_t instId, OP o0, unsigned int o1) { return emit(instId, o0, int64_t(o1)); } + ASMJIT_API Error _emitI(uint32_t instId, const Operand_& o0); //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, unsigned int o2) { return emit(instId, o0, o1, int64_t(o2)); } + ASMJIT_API Error _emitI(uint32_t instId, const Operand_& o0, const Operand_& o1); //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, unsigned int o3) { return emit(instId, o0, o1, o2, int64_t(o3)); } + ASMJIT_API Error _emitI(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2); //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, unsigned int o4) { return emit(instId, o0, o1, o2, o3, int64_t(o4)); } + ASMJIT_API Error _emitI(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3); //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, unsigned int o5) { return emit(instId, o0, o1, o2, o3, o4, int64_t(o5)); } - - //! \overload - inline Error emit(uint32_t instId, uint64_t o0) { return emit(instId, int64_t(o0)); } - //! \overload - inline Error emit(uint32_t instId, OP o0, uint64_t o1) { return emit(instId, o0, int64_t(o1)); } - //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, uint64_t o2) { return emit(instId, o0, o1, int64_t(o2)); } + ASMJIT_API Error _emitI(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4); //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, uint64_t o3) { return emit(instId, o0, o1, o2, int64_t(o3)); } - //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, uint64_t o4) { return emit(instId, o0, o1, o2, o3, int64_t(o4)); } - //! \overload - inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, uint64_t o5) { return emit(instId, o0, o1, o2, o3, o4, int64_t(o5)); } + ASMJIT_API Error _emitI(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5); - #undef NONE - #undef OP + //! Emits an instruction `instId` with the given `operands`. + template + ASMJIT_INLINE Error emit(uint32_t instId, Args&&... operands) { + return _emitI(instId, Support::ForwardOp::forward(operands)...); + } - inline Error emitOpArray(uint32_t instId, const Operand_* operands, size_t count) { return _emitOpArray(instId, operands, count); } + inline Error emitOpArray(uint32_t instId, const Operand_* operands, size_t opCount) { + return _emitOpArray(instId, operands, opCount); + } - inline Error emitInst(const BaseInst& inst, const Operand_* operands, size_t count) { + inline Error emitInst(const BaseInst& inst, const Operand_* operands, size_t opCount) { setInstOptions(inst.options()); setExtraReg(inst.extraReg()); - return _emitOpArray(inst.id(), operands, count); + return _emitOpArray(inst.id(), operands, opCount); } //! \cond INTERNAL - //! Emits instruction having max 4 operands. - virtual Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) = 0; - //! Emits instruction having max 6 operands. - virtual Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) = 0; + //! Emits an instruction - all 6 operands must be defined. + virtual Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* oExt) = 0; //! Emits instruction having operands stored in array. - virtual Error _emitOpArray(uint32_t instId, const Operand_* operands, size_t count); + ASMJIT_API virtual Error _emitOpArray(uint32_t instId, const Operand_* operands, size_t opCount); //! \endcond //! \} @@ -478,19 +574,19 @@ class ASMJIT_VIRTAPI BaseEmitter { //! \name Emit Utilities //! \{ - ASMJIT_API Error emitProlog(const FuncFrame& layout); - ASMJIT_API Error emitEpilog(const FuncFrame& layout); - ASMJIT_API Error emitArgsAssignment(const FuncFrame& layout, const FuncArgsAssignment& args); + ASMJIT_API Error emitProlog(const FuncFrame& frame); + ASMJIT_API Error emitEpilog(const FuncFrame& frame); + ASMJIT_API Error emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args); //! \} //! \name Align //! \{ - //! Aligns the current CodeBuffer to the `alignment` specified. + //! Aligns the current CodeBuffer position to the `alignment` specified. //! //! The sequence that is used to fill the gap between the aligned location - //! and the current location depends on the align `mode`, see `AlignMode`. + //! and the current location depends on the align `mode`, see \ref AlignMode. virtual Error align(uint32_t alignMode, uint32_t alignment) = 0; //! \} @@ -498,16 +594,38 @@ class ASMJIT_VIRTAPI BaseEmitter { //! \name Embed //! \{ - //! Embeds raw data into the CodeBuffer. - virtual Error embed(const void* data, uint32_t dataSize) = 0; + //! Embeds raw data into the \ref CodeBuffer. + virtual Error embed(const void* data, size_t dataSize) = 0; - //! Embeds an absolute label address as data (4 or 8 bytes). - virtual Error embedLabel(const Label& label) = 0; - - //! Embeds a delta (distance) between the `label` and `base` calculating it - //! as `label - base`. This function was designed to make it easier to embed - //! lookup tables where each index is a relative distance of two labels. - virtual Error embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) = 0; + //! Embeds a typed data array. + //! + //! This is the most flexible function for embedding data as it allows to: + //! - Assign a `typeId` to the data, so the emitter knows the type of + //! items stored in `data`. Binary data should use \ref Type::kIdU8. + //! - Repeat the given data `repeatCount` times, so the data can be used + //! as a fill pattern for example, or as a pattern used by SIMD instructions. + virtual Error embedDataArray(uint32_t typeId, const void* data, size_t itemCount, size_t repeatCount = 1) = 0; + + //! Embeds int8_t `value` repeated by `repeatCount`. + inline Error embedInt8(int8_t value, size_t repeatCount = 1) { return embedDataArray(Type::kIdI8, &value, 1, repeatCount); } + //! Embeds uint8_t `value` repeated by `repeatCount`. + inline Error embedUInt8(uint8_t value, size_t repeatCount = 1) { return embedDataArray(Type::kIdU8, &value, 1, repeatCount); } + //! Embeds int16_t `value` repeated by `repeatCount`. + inline Error embedInt16(int16_t value, size_t repeatCount = 1) { return embedDataArray(Type::kIdI16, &value, 1, repeatCount); } + //! Embeds uint16_t `value` repeated by `repeatCount`. + inline Error embedUInt16(uint16_t value, size_t repeatCount = 1) { return embedDataArray(Type::kIdU16, &value, 1, repeatCount); } + //! Embeds int32_t `value` repeated by `repeatCount`. + inline Error embedInt32(int32_t value, size_t repeatCount = 1) { return embedDataArray(Type::kIdI32, &value, 1, repeatCount); } + //! Embeds uint32_t `value` repeated by `repeatCount`. + inline Error embedUInt32(uint32_t value, size_t repeatCount = 1) { return embedDataArray(Type::kIdU32, &value, 1, repeatCount); } + //! Embeds int64_t `value` repeated by `repeatCount`. + inline Error embedInt64(int64_t value, size_t repeatCount = 1) { return embedDataArray(Type::kIdI64, &value, 1, repeatCount); } + //! Embeds uint64_t `value` repeated by `repeatCount`. + inline Error embedUInt64(uint64_t value, size_t repeatCount = 1) { return embedDataArray(Type::kIdU64, &value, 1, repeatCount); } + //! Embeds a floating point `value` repeated by `repeatCount`. + inline Error embedFloat(float value, size_t repeatCount = 1) { return embedDataArray(Type::kIdF32, &value, 1, repeatCount); } + //! Embeds a floating point `value` repeated by `repeatCount`. + inline Error embedDouble(double value, size_t repeatCount = 1) { return embedDataArray(Type::IdOfT::kTypeId, &value, 1, repeatCount); } //! Embeds a constant pool at the current offset by performing the following: //! 1. Aligns by using kAlignData to the minimum `pool` alignment. @@ -515,6 +633,18 @@ class ASMJIT_VIRTAPI BaseEmitter { //! 3. Emits ConstPool content. virtual Error embedConstPool(const Label& label, const ConstPool& pool) = 0; + //! Embeds an absolute `label` address as data. + //! + //! The `dataSize` is an optional argument that can be used to specify the + //! size of the address data. If it's zero (default) the address size is + //! deduced from the target architecture (either 4 or 8 bytes). + virtual Error embedLabel(const Label& label, size_t dataSize = 0) = 0; + + //! Embeds a delta (distance) between the `label` and `base` calculating it + //! as `label - base`. This function was designed to make it easier to embed + //! lookup tables where each index is a relative distance of two labels. + virtual Error embedLabelDelta(const Label& label, const Label& base, size_t dataSize = 0) = 0; + //! \} //! \name Comment @@ -538,13 +668,48 @@ class ASMJIT_VIRTAPI BaseEmitter { //! Called after the emitter was detached from `CodeHolder`. virtual Error onDetach(CodeHolder* code) noexcept = 0; - //! Called to update `_globalInstOptions` based on `_emitterOptions`. + //! Called when \ref CodeHolder has updated an important setting, which + //! involves the following: //! - //! This function should only touch one bit `BaseInst::kOptionReserved`, which - //! is used to handle errors and special-cases in a way that minimizes branching. - ASMJIT_API void onUpdateGlobalInstOptions() noexcept; + //! - \ref Logger has been changed (\ref CodeHolder::setLogger() has been + //! called). + //! - \ref ErrorHandler has been changed (\ref CodeHolder::setErrorHandler() + //! has been called). + //! + //! This function ensures that the settings are properly propagated from + //! \ref CodeHolder to the emitter. + //! + //! \note This function is virtual and can be overridden, however, if you + //! do so, always call \ref BaseEmitter::onSettingsUpdated() within your + //! own implementation to ensure that the emitter is in a consisten state. + ASMJIT_API virtual void onSettingsUpdated() noexcept; //! \} + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use environment() instead") + inline CodeInfo codeInfo() const noexcept { + return CodeInfo(_environment, _code ? _code->baseAddress() : Globals::kNoBaseAddress); + } + + ASMJIT_DEPRECATED("Use arch() instead") + inline uint32_t archId() const noexcept { return arch(); } + + ASMJIT_DEPRECATED("Use registerSize() instead") + inline uint32_t gpSize() const noexcept { return registerSize(); } + + ASMJIT_DEPRECATED("Use encodingOptions() instead") + inline uint32_t emitterOptions() const noexcept { return encodingOptions(); } + + ASMJIT_DEPRECATED("Use addEncodingOptions() instead") + inline void addEmitterOptions(uint32_t options) noexcept { addEncodingOptions(options); } + + ASMJIT_DEPRECATED("Use clearEncodingOptions() instead") + inline void clearEmitterOptions(uint32_t options) noexcept { clearEncodingOptions(options); } + + ASMJIT_DEPRECATED("Use forcedInstOptions() instead") + inline uint32_t globalInstOptions() const noexcept { return forcedInstOptions(); } +#endif // !ASMJIT_NO_DEPRECATED }; //! \} diff --git a/libs/asmjit/src/asmjit/core/emitterutils.cpp b/libs/asmjit/src/asmjit/core/emitterutils.cpp new file mode 100644 index 0000000..1115934 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/emitterutils.cpp @@ -0,0 +1,150 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#include "../core/assembler.h" +#include "../core/emitterutils_p.h" +#include "../core/formatter.h" +#include "../core/logger.h" +#include "../core/support.h" + +ASMJIT_BEGIN_NAMESPACE + +// ============================================================================ +// [asmjit::EmitterUtils] +// ============================================================================ + +namespace EmitterUtils { + +#ifndef ASMJIT_NO_LOGGING + +Error formatLine(String& sb, const uint8_t* binData, size_t binSize, size_t dispSize, size_t immSize, const char* comment) noexcept { + size_t currentSize = sb.size(); + size_t commentSize = comment ? Support::strLen(comment, Globals::kMaxCommentSize) : 0; + + ASMJIT_ASSERT(binSize >= dispSize); + const size_t kNoBinSize = SIZE_MAX; + + if ((binSize != 0 && binSize != kNoBinSize) || commentSize) { + size_t align = kMaxInstLineSize; + char sep = ';'; + + for (size_t i = (binSize == kNoBinSize); i < 2; i++) { + size_t begin = sb.size(); + ASMJIT_PROPAGATE(sb.padEnd(align)); + + if (sep) { + ASMJIT_PROPAGATE(sb.append(sep)); + ASMJIT_PROPAGATE(sb.append(' ')); + } + + // Append binary data or comment. + if (i == 0) { + ASMJIT_PROPAGATE(sb.appendHex(binData, binSize - dispSize - immSize)); + ASMJIT_PROPAGATE(sb.appendChars('.', dispSize * 2)); + ASMJIT_PROPAGATE(sb.appendHex(binData + binSize - immSize, immSize)); + if (commentSize == 0) break; + } + else { + ASMJIT_PROPAGATE(sb.append(comment, commentSize)); + } + + currentSize += sb.size() - begin; + align += kMaxBinarySize; + sep = '|'; + } + } + + return sb.append('\n'); +} + +void logLabelBound(BaseAssembler* self, const Label& label) noexcept { + Logger* logger = self->logger(); + + StringTmp<512> sb; + size_t binSize = logger->hasFlag(FormatOptions::kFlagMachineCode) ? size_t(0) : SIZE_MAX; + + sb.appendChars(' ', logger->indentation(FormatOptions::kIndentationLabel)); + Formatter::formatLabel(sb, logger->flags(), self, label.id()); + sb.append(':'); + EmitterUtils::formatLine(sb, nullptr, binSize, 0, 0, self->_inlineComment); + logger->log(sb.data(), sb.size()); +} + +void logInstructionEmitted( + BaseAssembler* self, + uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt, + uint32_t relSize, uint32_t immSize, uint8_t* afterCursor) { + + Logger* logger = self->logger(); + ASMJIT_ASSERT(logger != nullptr); + + StringTmp<256> sb; + uint32_t flags = logger->flags(); + + uint8_t* beforeCursor = self->bufferPtr(); + intptr_t emittedSize = (intptr_t)(afterCursor - beforeCursor); + + Operand_ opArray[Globals::kMaxOpCount]; + EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt); + + sb.appendChars(' ', logger->indentation(FormatOptions::kIndentationCode)); + Formatter::formatInstruction(sb, flags, self, self->arch(), BaseInst(instId, options, self->extraReg()), opArray, Globals::kMaxOpCount); + + if ((flags & FormatOptions::kFlagMachineCode) != 0) + EmitterUtils::formatLine(sb, self->bufferPtr(), size_t(emittedSize), relSize, immSize, self->inlineComment()); + else + EmitterUtils::formatLine(sb, nullptr, SIZE_MAX, 0, 0, self->inlineComment()); + logger->log(sb); +} + +Error logInstructionFailed( + BaseAssembler* self, + Error err, + uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) { + + StringTmp<256> sb; + sb.append(DebugUtils::errorAsString(err)); + sb.append(": "); + + Operand_ opArray[Globals::kMaxOpCount]; + EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt); + + Formatter::formatInstruction(sb, 0, self, self->arch(), BaseInst(instId, options, self->extraReg()), opArray, Globals::kMaxOpCount); + + if (self->inlineComment()) { + sb.append(" ; "); + sb.append(self->inlineComment()); + } + + self->resetInstOptions(); + self->resetExtraReg(); + self->resetInlineComment(); + return self->reportError(err, sb.data()); +} + +#endif + +} // {EmitterUtils} + +ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/emitterutils_p.h b/libs/asmjit/src/asmjit/core/emitterutils_p.h new file mode 100644 index 0000000..7e222d3 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/emitterutils_p.h @@ -0,0 +1,109 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_EMITTERUTILS_P_H_INCLUDED +#define ASMJIT_CORE_EMITTERUTILS_P_H_INCLUDED + +#include "../core/emitter.h" +#include "../core/operand.h" + +ASMJIT_BEGIN_NAMESPACE + +class BaseAssembler; + +//! \cond INTERNAL +//! \addtogroup asmjit_core +//! \{ + +// ============================================================================ +// [asmjit::EmitterUtils] +// ============================================================================ + +namespace EmitterUtils { + +static const Operand_ noExt[3] {}; + +enum kOpIndex { + kOp3 = 0, + kOp4 = 1, + kOp5 = 2 +}; + +static ASMJIT_INLINE uint32_t opCountFromEmitArgs(const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) noexcept { + uint32_t opCount = 0; + + if (opExt[kOp3].isNone()) { + if (!o0.isNone()) opCount = 1; + if (!o1.isNone()) opCount = 2; + if (!o2.isNone()) opCount = 3; + } + else { + opCount = 4; + if (!opExt[kOp4].isNone()) { + opCount = 5 + uint32_t(!opExt[kOp5].isNone()); + } + } + + return opCount; +} + +static ASMJIT_INLINE void opArrayFromEmitArgs(Operand_ dst[Globals::kMaxOpCount], const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) noexcept { + dst[0].copyFrom(o0); + dst[1].copyFrom(o1); + dst[2].copyFrom(o2); + dst[3].copyFrom(opExt[kOp3]); + dst[4].copyFrom(opExt[kOp4]); + dst[5].copyFrom(opExt[kOp5]); +} + +#ifndef ASMJIT_NO_LOGGING +enum : uint32_t { + // Has to be big to be able to hold all metadata compiler can assign to a + // single instruction. + kMaxInstLineSize = 44, + kMaxBinarySize = 26 +}; + +Error formatLine(String& sb, const uint8_t* binData, size_t binSize, size_t dispSize, size_t immSize, const char* comment) noexcept; + +void logLabelBound(BaseAssembler* self, const Label& label) noexcept; + +void logInstructionEmitted( + BaseAssembler* self, + uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt, + uint32_t relSize, uint32_t immSize, uint8_t* afterCursor); + +Error logInstructionFailed( + BaseAssembler* self, + Error err, uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt); +#endif + +} + +//! \} +//! \endcond + +ASMJIT_END_NAMESPACE + +#endif // ASMJIT_CORE_EMITTERUTILS_P_H_INCLUDED + diff --git a/libs/asmjit/src/asmjit/core/environment.cpp b/libs/asmjit/src/asmjit/core/environment.cpp new file mode 100644 index 0000000..3be2b15 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/environment.cpp @@ -0,0 +1,64 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#include "../core/environment.h" + +ASMJIT_BEGIN_NAMESPACE + +// X86 Target +// ---------- +// +// - 32-bit - Linux, OSX, BSD, and apparently also Haiku guarantee 16-byte +// stack alignment. Other operating systems are assumed to have +// 4-byte alignment by default for safety reasons. +// - 64-bit - stack must be aligned to 16 bytes. +// +// ARM Target +// ---------- +// +// - 32-bit - Stack must be aligned to 8 bytes. +// - 64-bit - Stack must be aligned to 16 bytes (hardware requirement). +uint32_t Environment::stackAlignment() const noexcept { + if (is64Bit()) { + // Assume 16-byte alignment on any 64-bit target. + return 16; + } + else { + // The following platforms use 16-byte alignment in 32-bit mode. + if (isPlatformLinux() || + isPlatformBSD() || + isPlatformApple() || + isPlatformHaiku()) { + return 16u; + } + + if (isFamilyARM()) + return 8; + + // Bail to 4-byte alignment if we don't know. + return 4; + } +} + +ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/environment.h b/libs/asmjit/src/asmjit/core/environment.h new file mode 100644 index 0000000..79e6f7c --- /dev/null +++ b/libs/asmjit/src/asmjit/core/environment.h @@ -0,0 +1,612 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_ENVIRONMENT_H_INCLUDED +#define ASMJIT_CORE_ENVIRONMENT_H_INCLUDED + +#include "../core/globals.h" + +#if defined(__APPLE__) + #include +#endif + +ASMJIT_BEGIN_NAMESPACE + +//! \addtogroup asmjit_core +//! \{ + +// ============================================================================ +// [asmjit::Environment] +// ============================================================================ + +//! Represents an environment, which is usually related to a \ref Target. +//! +//! Environment has usually an 'arch-subarch-vendor-os-abi' format, which is +//! sometimes called "Triple" (historically it used to be 3 only parts) or +//! "Tuple", which is a convention used by Debian Linux. +//! +//! AsmJit doesn't support all possible combinations or architectures and ABIs, +//! however, it models the environment similarly to other compilers for future +//! extensibility. +class Environment { +public: + //! Architecture type, see \ref Arch. + uint8_t _arch; + //! Sub-architecture type, see \ref SubArch. + uint8_t _subArch; + //! Vendor type, see \ref Vendor. + uint8_t _vendor; + //! Platform type, see \ref Platform. + uint8_t _platform; + //! ABI type, see \ref Abi. + uint8_t _abi; + //! Object format, see \ref Format. + uint8_t _format; + //! Reserved for future use, must be zero. + uint16_t _reserved; + + //! Architecture. + enum Arch : uint32_t { + //! Unknown or uninitialized architecture. + kArchUnknown = 0, + + //! Mask used by 32-bit architectures (odd are 32-bit, even are 64-bit). + kArch32BitMask = 0x01, + //! Mask used by big-endian architectures. + kArchBigEndianMask = 0x80u, + + //! 32-bit X86 architecture. + kArchX86 = 1, + //! 64-bit X86 architecture also known as X86_64 and AMD64. + kArchX64 = 2, + + //! 32-bit RISC-V architecture. + kArchRISCV32 = 3, + //! 64-bit RISC-V architecture. + kArchRISCV64 = 4, + + //! 32-bit ARM architecture (little endian). + kArchARM = 5, + //! 32-bit ARM architecture (big endian). + kArchARM_BE = kArchARM | kArchBigEndianMask, + //! 64-bit ARM architecture in (little endian). + kArchAArch64 = 6, + //! 64-bit ARM architecture in (big endian). + kArchAArch64_BE = kArchAArch64 | kArchBigEndianMask, + //! 32-bit ARM in Thumb mode (little endian). + kArchThumb = 7, + //! 32-bit ARM in Thumb mode (big endian). + kArchThumb_BE = kArchThumb | kArchBigEndianMask, + + // 8 is not used, even numbers are 64-bit architectures. + + //! 32-bit MIPS architecture in (little endian). + kArchMIPS32_LE = 9, + //! 32-bit MIPS architecture in (big endian). + kArchMIPS32_BE = kArchMIPS32_LE | kArchBigEndianMask, + //! 64-bit MIPS architecture in (little endian). + kArchMIPS64_LE = 10, + //! 64-bit MIPS architecture in (big endian). + kArchMIPS64_BE = kArchMIPS64_LE | kArchBigEndianMask, + + //! Count of architectures. + kArchCount = 11 + }; + + //! Sub-architecture. + enum SubArch : uint32_t { + //! Unknown or uninitialized architecture sub-type. + kSubArchUnknown = 0, + + //! Count of sub-architectures. + kSubArchCount + }; + + //! Vendor. + //! + //! \note AsmJit doesn't use vendor information at the moment. It's provided + //! for future use, if required. + enum Vendor : uint32_t { + //! Unknown or uninitialized vendor. + kVendorUnknown = 0, + + //! Count of vendor identifiers. + kVendorCount + }; + + //! Platform / OS. + enum Platform : uint32_t { + //! Unknown or uninitialized platform. + kPlatformUnknown = 0, + + //! Windows OS. + kPlatformWindows, + + //! Other platform, most likely POSIX based. + kPlatformOther, + + //! Linux OS. + kPlatformLinux, + //! GNU/Hurd OS. + kPlatformHurd, + + //! FreeBSD OS. + kPlatformFreeBSD, + //! OpenBSD OS. + kPlatformOpenBSD, + //! NetBSD OS. + kPlatformNetBSD, + //! DragonFly BSD OS. + kPlatformDragonFlyBSD, + + //! Haiku OS. + kPlatformHaiku, + + //! Apple OSX. + kPlatformOSX, + //! Apple iOS. + kPlatformIOS, + //! Apple TVOS. + kPlatformTVOS, + //! Apple WatchOS. + kPlatformWatchOS, + + //! Emscripten platform. + kPlatformEmscripten, + + //! Count of platform identifiers. + kPlatformCount + }; + + //! ABI. + enum Abi : uint32_t { + //! Unknown or uninitialied environment. + kAbiUnknown = 0, + //! Microsoft ABI. + kAbiMSVC, + //! GNU ABI. + kAbiGNU, + //! Android Environment / ABI. + kAbiAndroid, + //! Cygwin ABI. + kAbiCygwin, + + //! Count of known ABI types. + kAbiCount + }; + + //! Object format. + //! + //! \note AsmJit doesn't really use anything except \ref kFormatUnknown and + //! \ref kFormatJIT at the moment. Object file formats are provided for + //! future extensibility and a possibility to generate object files at some + //! point. + enum Format : uint32_t { + //! Unknown or uninitialized object format. + kFormatUnknown = 0, + + //! JIT code generation object, most likely \ref JitRuntime or a custom + //! \ref Target implementation. + kFormatJIT, + + //! Executable and linkable format (ELF). + kFormatELF, + //! Common object file format. + kFormatCOFF, + //! Extended COFF object format. + kFormatXCOFF, + //! Mach object file format. + kFormatMachO, + + //! Count of object format types. + kFormatCount + }; + + //! \name Environment Detection + //! \{ + +#ifdef _DOXYGEN + //! Architecture detected at compile-time (architecture of the host). + static constexpr Arch kArchHost = DETECTED_AT_COMPILE_TIME; + //! Sub-architecture detected at compile-time (sub-architecture of the host). + static constexpr SubArch kSubArchHost = DETECTED_AT_COMPILE_TIME; + //! Vendor detected at compile-time (vendor of the host). + static constexpr Vendor kVendorHost = DETECTED_AT_COMPILE_TIME; + //! Platform detected at compile-time (platform of the host). + static constexpr Platform kPlatformHost = DETECTED_AT_COMPILE_TIME; + //! ABI detected at compile-time (ABI of the host). + static constexpr Abi kAbiHost = DETECTED_AT_COMPILE_TIME; +#else + static constexpr Arch kArchHost = + ASMJIT_ARCH_X86 == 32 ? kArchX86 : + ASMJIT_ARCH_X86 == 64 ? kArchX64 : + + ASMJIT_ARCH_ARM == 32 && ASMJIT_ARCH_LE ? kArchARM : + ASMJIT_ARCH_ARM == 32 && ASMJIT_ARCH_BE ? kArchARM_BE : + ASMJIT_ARCH_ARM == 64 && ASMJIT_ARCH_LE ? kArchAArch64 : + ASMJIT_ARCH_ARM == 64 && ASMJIT_ARCH_BE ? kArchAArch64_BE : + + ASMJIT_ARCH_MIPS == 32 && ASMJIT_ARCH_LE ? kArchMIPS32_LE : + ASMJIT_ARCH_MIPS == 32 && ASMJIT_ARCH_BE ? kArchMIPS32_BE : + ASMJIT_ARCH_MIPS == 64 && ASMJIT_ARCH_LE ? kArchMIPS64_LE : + ASMJIT_ARCH_MIPS == 64 && ASMJIT_ARCH_BE ? kArchMIPS64_BE : + + kArchUnknown; + + static constexpr SubArch kSubArchHost = + kSubArchUnknown; + + static constexpr Vendor kVendorHost = + kVendorUnknown; + + static constexpr Platform kPlatformHost = +#if defined(__EMSCRIPTEN__) + kPlatformEmscripten +#elif defined(_WIN32) + kPlatformWindows +#elif defined(__linux__) + kPlatformLinux +#elif defined(__gnu_hurd__) + kPlatformHurd +#elif defined(__FreeBSD__) + kPlatformFreeBSD +#elif defined(__OpenBSD__) + kPlatformOpenBSD +#elif defined(__NetBSD__) + kPlatformNetBSD +#elif defined(__DragonFly__) + kPlatformDragonFlyBSD +#elif defined(__HAIKU__) + kPlatformHaiku +#elif defined(__APPLE__) && TARGET_OS_OSX + kPlatformOSX +#elif defined(__APPLE__) && TARGET_OS_TV + kPlatformTVOS +#elif defined(__APPLE__) && TARGET_OS_WATCH + kPlatformWatchOS +#elif defined(__APPLE__) && TARGET_OS_IPHONE + kPlatformIOS +#else + kPlatformOther +#endif + ; + + static constexpr Abi kAbiHost = +#if defined(_MSC_VER) + kAbiMSVC +#elif defined(__CYGWIN__) + kAbiCygwin +#elif defined(__MINGW32__) || defined(__GLIBC__) + kAbiGNU +#elif defined(__ANDROID__) + kAbiAndroid +#else + kAbiUnknown +#endif + ; + +#endif + + //! \} + + //! \name Construction / Destruction + //! \{ + + inline Environment() noexcept : + _arch(uint8_t(kArchUnknown)), + _subArch(uint8_t(kSubArchUnknown)), + _vendor(uint8_t(kVendorUnknown)), + _platform(uint8_t(kPlatformUnknown)), + _abi(uint8_t(kAbiUnknown)), + _format(uint8_t(kFormatUnknown)), + _reserved(0) {} + + inline Environment(const Environment& other) noexcept = default; + + inline explicit Environment(uint32_t arch, + uint32_t subArch = kSubArchUnknown, + uint32_t vendor = kVendorUnknown, + uint32_t platform = kPlatformUnknown, + uint32_t abi = kAbiUnknown, + uint32_t format = kFormatUnknown) noexcept { + init(arch, subArch, vendor, platform, abi, format); + } + + //! \} + + //! \name Overloaded Operators + //! \{ + + inline Environment& operator=(const Environment& other) noexcept = default; + + inline bool operator==(const Environment& other) const noexcept { return equals(other); } + inline bool operator!=(const Environment& other) const noexcept { return !equals(other); } + + //! \} + + //! \name Accessors + //! \{ + + //! Tests whether the environment is not set up. + //! + //! Returns true if all members are zero, and thus unknown. + inline bool empty() const noexcept { + // Unfortunately compilers won't optimize fields are checked one by one... + return _packed() == 0; + } + + //! Tests whether the environment is intialized, which means it must have + //! a valid architecture. + inline bool isInitialized() const noexcept { + return _arch != kArchUnknown; + } + + inline uint64_t _packed() const noexcept { + uint64_t x; + memcpy(&x, this, 8); + return x; + } + + //! Resets all members of the environment to zero / unknown. + inline void reset() noexcept { + _arch = uint8_t(kArchUnknown); + _subArch = uint8_t(kSubArchUnknown); + _vendor = uint8_t(kVendorUnknown); + _platform = uint8_t(kPlatformUnknown); + _abi = uint8_t(kAbiUnknown); + _format = uint8_t(kFormatUnknown); + _reserved = 0; + } + + inline bool equals(const Environment& other) const noexcept { + return _packed() == other._packed(); + } + + //! Returns the architecture, see \ref Arch. + inline uint32_t arch() const noexcept { return _arch; } + //! Returns the sub-architecture, see \ref SubArch. + inline uint32_t subArch() const noexcept { return _subArch; } + //! Returns vendor, see \ref Vendor. + inline uint32_t vendor() const noexcept { return _vendor; } + //! Returns target's platform or operating system, see \ref Platform. + inline uint32_t platform() const noexcept { return _platform; } + //! Returns target's ABI, see \ref Abi. + inline uint32_t abi() const noexcept { return _abi; } + //! Returns target's object format, see \ref Format. + inline uint32_t format() const noexcept { return _format; } + + inline void init(uint32_t arch, + uint32_t subArch = kSubArchUnknown, + uint32_t vendor = kVendorUnknown, + uint32_t platform = kPlatformUnknown, + uint32_t abi = kAbiUnknown, + uint32_t format = kFormatUnknown) noexcept { + _arch = uint8_t(arch); + _subArch = uint8_t(subArch); + _vendor = uint8_t(vendor); + _platform = uint8_t(platform); + _abi = uint8_t(abi); + _format = uint8_t(format); + _reserved = 0; + } + + inline bool isArchX86() const noexcept { return _arch == kArchX86; } + inline bool isArchX64() const noexcept { return _arch == kArchX64; } + inline bool isArchRISCV32() const noexcept { return _arch == kArchRISCV32; } + inline bool isArchRISCV64() const noexcept { return _arch == kArchRISCV64; } + inline bool isArchARM() const noexcept { return (_arch & ~kArchBigEndianMask) == kArchARM; } + inline bool isArchThumb() const noexcept { return (_arch & ~kArchBigEndianMask) == kArchThumb; } + inline bool isArchAArch64() const noexcept { return (_arch & ~kArchBigEndianMask) == kArchAArch64; } + inline bool isArchMIPS32() const noexcept { return (_arch & ~kArchBigEndianMask) == kArchMIPS32_LE; } + inline bool isArchMIPS64() const noexcept { return (_arch & ~kArchBigEndianMask) == kArchMIPS64_LE; } + + //! Tests whether the architecture is 32-bit. + inline bool is32Bit() const noexcept { return is32Bit(_arch); } + //! Tests whether the architecture is 64-bit. + inline bool is64Bit() const noexcept { return is64Bit(_arch); } + + //! Tests whether the architecture is little endian. + inline bool isLittleEndian() const noexcept { return isLittleEndian(_arch); } + //! Tests whether the architecture is big endian. + inline bool isBigEndian() const noexcept { return isBigEndian(_arch); } + + //! Tests whether this architecture is of X86 family. + inline bool isFamilyX86() const noexcept { return isFamilyX86(_arch); } + //! Tests whether this architecture family is RISC-V (both 32-bit and 64-bit). + inline bool isFamilyRISCV() const noexcept { return isFamilyRISCV(_arch); } + //! Tests whether this architecture family is ARM, Thumb, or AArch64. + inline bool isFamilyARM() const noexcept { return isFamilyARM(_arch); } + //! Tests whether this architecture family is MISP or MIPS64. + inline bool isFamilyMIPS() const noexcept { return isFamilyMIPS(_arch); } + + //! Tests whether the environment platform is Windows. + inline bool isPlatformWindows() const noexcept { return _platform == kPlatformWindows; } + + //! Tests whether the environment platform is Linux. + inline bool isPlatformLinux() const noexcept { return _platform == kPlatformLinux; } + + //! Tests whether the environment platform is Hurd. + inline bool isPlatformHurd() const noexcept { return _platform == kPlatformHurd; } + + //! Tests whether the environment platform is Haiku. + inline bool isPlatformHaiku() const noexcept { return _platform == kPlatformHaiku; } + + //! Tests whether the environment platform is any BSD. + inline bool isPlatformBSD() const noexcept { + return _platform == kPlatformFreeBSD || + _platform == kPlatformOpenBSD || + _platform == kPlatformNetBSD || + _platform == kPlatformDragonFlyBSD; + } + + //! Tests whether the environment platform is any Apple platform (OSX, iOS, TVOS, WatchOS). + inline bool isPlatformApple() const noexcept { + return _platform == kPlatformOSX || + _platform == kPlatformIOS || + _platform == kPlatformTVOS || + _platform == kPlatformWatchOS; + } + + //! Tests whether the ABI is MSVC. + inline bool isAbiMSVC() const noexcept { return _abi == kAbiMSVC; } + //! Tests whether the ABI is GNU. + inline bool isAbiGNU() const noexcept { return _abi == kAbiGNU; } + + //! Returns a calculated stack alignment for this environment. + ASMJIT_API uint32_t stackAlignment() const noexcept; + + //! Returns a native register size of this architecture. + uint32_t registerSize() const noexcept { return registerSizeFromArch(_arch); } + + //! Sets the architecture to `arch`. + inline void setArch(uint32_t arch) noexcept { _arch = uint8_t(arch); } + //! Sets the sub-architecture to `subArch`. + inline void setSubArch(uint32_t subArch) noexcept { _subArch = uint8_t(subArch); } + //! Sets the vendor to `vendor`. + inline void setVendor(uint32_t vendor) noexcept { _vendor = uint8_t(vendor); } + //! Sets the platform to `platform`. + inline void setPlatform(uint32_t platform) noexcept { _platform = uint8_t(platform); } + //! Sets the ABI to `abi`. + inline void setAbi(uint32_t abi) noexcept { _abi = uint8_t(abi); } + //! Sets the object format to `format`. + inline void setFormat(uint32_t format) noexcept { _format = uint8_t(format); } + + //! \} + + //! \name Static Utilities + //! \{ + + static inline bool isValidArch(uint32_t arch) noexcept { + return (arch & ~kArchBigEndianMask) != 0 && + (arch & ~kArchBigEndianMask) < kArchCount; + } + + //! Tests whether the given architecture `arch` is 32-bit. + static inline bool is32Bit(uint32_t arch) noexcept { + return (arch & kArch32BitMask) == kArch32BitMask; + } + + //! Tests whether the given architecture `arch` is 64-bit. + static inline bool is64Bit(uint32_t arch) noexcept { + return (arch & kArch32BitMask) == 0; + } + + //! Tests whether the given architecture `arch` is little endian. + static inline bool isLittleEndian(uint32_t arch) noexcept { + return (arch & kArchBigEndianMask) == 0; + } + + //! Tests whether the given architecture `arch` is big endian. + static inline bool isBigEndian(uint32_t arch) noexcept { + return (arch & kArchBigEndianMask) == kArchBigEndianMask; + } + + //! Tests whether the given architecture is AArch64. + static inline bool isArchAArch64(uint32_t arch) noexcept { + arch &= ~kArchBigEndianMask; + return arch == kArchAArch64; + } + + //! Tests whether the given architecture family is X86 or X64. + static inline bool isFamilyX86(uint32_t arch) noexcept { + return arch == kArchX86 || + arch == kArchX64; + } + + //! Tests whether the given architecture family is RISC-V (both 32-bit and 64-bit). + static inline bool isFamilyRISCV(uint32_t arch) noexcept { + return arch == kArchRISCV32 || + arch == kArchRISCV64; + } + + //! Tests whether the given architecture family is ARM, Thumb, or AArch64. + static inline bool isFamilyARM(uint32_t arch) noexcept { + arch &= ~kArchBigEndianMask; + return arch == kArchARM || + arch == kArchAArch64 || + arch == kArchThumb; + } + + //! Tests whether the given architecture family is MISP or MIPS64. + static inline bool isFamilyMIPS(uint32_t arch) noexcept { + arch &= ~kArchBigEndianMask; + return arch == kArchMIPS32_LE || + arch == kArchMIPS64_LE; + } + + //! Returns a native general purpose register size from the given architecture. + static uint32_t registerSizeFromArch(uint32_t arch) noexcept { + return is32Bit(arch) ? 4u : 8u; + } + + //! \} +}; + +//! Returns the host environment constructed from preprocessor macros defined +//! by the compiler. +//! +//! The returned environment should precisely match the target host architecture, +//! sub-architecture, platform, and ABI. +static ASMJIT_INLINE Environment hostEnvironment() noexcept { + return Environment(Environment::kArchHost, + Environment::kSubArchHost, + Environment::kVendorHost, + Environment::kPlatformHost, + Environment::kAbiHost, + Environment::kFormatUnknown); +} + +static_assert(sizeof(Environment) == 8, + "Environment must occupy exactly 8 bytes."); + +//! \} + +#ifndef ASMJIT_NO_DEPRECATED +class ASMJIT_DEPRECATED_STRUCT("Use Environment instead") ArchInfo : public Environment { +public: + inline ArchInfo() noexcept : Environment() {} + + inline ArchInfo(const Environment& other) noexcept : Environment(other) {} + inline explicit ArchInfo(uint32_t arch, uint32_t subArch = kSubArchUnknown) noexcept + : Environment(arch, subArch) {} + + enum Id : uint32_t { + kIdNone = Environment::kArchUnknown, + kIdX86 = Environment::kArchX86, + kIdX64 = Environment::kArchX64, + kIdA32 = Environment::kArchARM, + kIdA64 = Environment::kArchAArch64, + kIdHost = Environment::kArchHost + }; + + enum SubType : uint32_t { + kSubIdNone = Environment::kSubArchUnknown + }; + + static inline ArchInfo host() noexcept { return ArchInfo(hostEnvironment()); } +}; +#endif // !ASMJIT_NO_DEPRECATED + +ASMJIT_END_NAMESPACE + +#endif // ASMJIT_CORE_ENVIRONMENT_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/callconv.cpp b/libs/asmjit/src/asmjit/core/errorhandler.cpp similarity index 68% rename from libs/asmjit/src/asmjit/core/callconv.cpp rename to libs/asmjit/src/asmjit/core/errorhandler.cpp index 5d915d0..8372d75 100644 --- a/libs/asmjit/src/asmjit/core/callconv.cpp +++ b/libs/asmjit/src/asmjit/core/errorhandler.cpp @@ -1,3 +1,4 @@ + // AsmJit - Machine code generation for C++ // // * Official AsmJit Home Page: https://asmjit.com @@ -22,38 +23,15 @@ // 3. This notice may not be removed or altered from any source distribution. #include "../core/api-build_p.h" -#include "../core/arch.h" -#include "../core/func.h" -#include "../core/type.h" - -#ifdef ASMJIT_BUILD_X86 - #include "../x86/x86callconv_p.h" -#endif - -#ifdef ASMJIT_BUILD_ARM - #include "../arm/armcallconv_p.h" -#endif +#include "../core/errorhandler.h" ASMJIT_BEGIN_NAMESPACE // ============================================================================ -// [asmjit::CallConv - Init / Reset] +// [asmjit::ErrorHandler] // ============================================================================ -ASMJIT_FAVOR_SIZE Error CallConv::init(uint32_t ccId) noexcept { - reset(); - -#ifdef ASMJIT_BUILD_X86 - if (CallConv::isX86Family(ccId)) - return x86::CallConvInternal::init(*this, ccId); -#endif - -#ifdef ASMJIT_BUILD_ARM - if (CallConv::isArmFamily(ccId)) - return arm::CallConvInternal::init(*this, ccId); -#endif - - return DebugUtils::errored(kErrorInvalidArgument); -} +ErrorHandler::ErrorHandler() noexcept {} +ErrorHandler::~ErrorHandler() noexcept {} ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/errorhandler.h b/libs/asmjit/src/asmjit/core/errorhandler.h new file mode 100644 index 0000000..2337cd8 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/errorhandler.h @@ -0,0 +1,267 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_ERRORHANDLER_H_INCLUDED +#define ASMJIT_CORE_ERRORHANDLER_H_INCLUDED + +#include "../core/globals.h" + +ASMJIT_BEGIN_NAMESPACE + +//! \addtogroup asmjit_error_handling +//! \{ + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +class BaseEmitter; + +// ============================================================================ +// [asmjit::ErrorHandler] +// ============================================================================ + +//! Error handler can be used to override the default behavior of error handling. +//! +//! It's available to all classes that inherit `BaseEmitter`. Override +//! \ref ErrorHandler::handleError() to implement your own error handler. +//! +//! The following use-cases are supported: +//! +//! - Record the error and continue code generation. This is the simplest +//! approach that can be used to at least log possible errors. +//! - Throw an exception. AsmJit doesn't use exceptions and is completely +//! exception-safe, but it's perfectly legal to throw an exception from +//! the error handler. +//! - Use plain old C's `setjmp()` and `longjmp()`. Asmjit always puts Assembler, +//! Builder and Compiler to a consistent state before calling \ref handleError(), +//! so `longjmp()` can be used without issues to cancel the code-generation if +//! an error occurred. This method can be used if exception handling in your +//! project is turned off and you still want some comfort. In most cases it +//! should be safe as AsmJit uses \ref Zone memory and the ownership of memory +//! it allocates always ends with the instance that allocated it. If using this +//! approach please never jump outside the life-time of \ref CodeHolder and +//! \ref BaseEmitter. +//! +//! \ref ErrorHandler can be attached to \ref CodeHolder or \ref BaseEmitter, +//! which has a priority. The example below uses error handler that just prints +//! the error, but lets AsmJit continue: +//! +//! ``` +//! // Error Handling #1 - Logging and returing Error. +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! // Error handler that just prints the error and lets AsmJit ignore it. +//! class SimpleErrorHandler : public ErrorHandler { +//! public: +//! Error err; +//! +//! inline SimpleErrorHandler() : err(kErrorOk) {} +//! +//! void handleError(Error err, const char* message, BaseEmitter* origin) override { +//! this->err = err; +//! fprintf(stderr, "ERROR: %s\n", message); +//! } +//! }; +//! +//! int main() { +//! JitRuntime rt; +//! SimpleErrorHandler eh; +//! +//! CodeHolder code; +//! code.init(rt.environment()); +//! code.setErrorHandler(&eh); +//! +//! // Try to emit instruction that doesn't exist. +//! x86::Assembler a(&code); +//! a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1); +//! +//! if (eh.err) { +//! // Assembler failed! +//! return 1; +//! } +//! +//! return 0; +//! } +//! ``` +//! +//! If error happens during instruction emitting / encoding the assembler behaves +//! transactionally - the output buffer won't advance if encoding failed, thus +//! either a fully encoded instruction or nothing is emitted. The error handling +//! shown above is useful, but it's still not the best way of dealing with errors +//! in AsmJit. The following example shows how to use exception handling to handle +//! errors in a more C++ way: +//! +//! ``` +//! // Error Handling #2 - Throwing an exception. +//! #include +//! #include +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! // Error handler that throws a user-defined `AsmJitException`. +//! class AsmJitException : public std::exception { +//! public: +//! Error err; +//! std::string message; +//! +//! AsmJitException(Error err, const char* message) noexcept +//! : err(err), +//! message(message) {} +//! +//! const char* what() const noexcept override { return message.c_str(); } +//! }; +//! +//! class ThrowableErrorHandler : public ErrorHandler { +//! public: +//! // Throw is possible, functions that use ErrorHandler are never 'noexcept'. +//! void handleError(Error err, const char* message, BaseEmitter* origin) override { +//! throw AsmJitException(err, message); +//! } +//! }; +//! +//! int main() { +//! JitRuntime rt; +//! ThrowableErrorHandler eh; +//! +//! CodeHolder code; +//! code.init(rt.environment()); +//! code.setErrorHandler(&eh); +//! +//! x86::Assembler a(&code); +//! +//! // Try to emit instruction that doesn't exist. +//! try { +//! a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1); +//! } +//! catch (const AsmJitException& ex) { +//! printf("EXCEPTION THROWN: %s\n", ex.what()); +//! return 1; +//! } +//! +//! return 0; +//! } +//! ``` +//! +//! If C++ exceptions are not what you like or your project turns off them +//! completely there is still a way of reducing the error handling to a minimum +//! by using a standard setjmp/longjmp approach. AsmJit is exception-safe and +//! cleans up everything before calling the ErrorHandler, so any approach is +//! safe. You can simply jump from the error handler without causing any +//! side-effects or memory leaks. The following example demonstrates how it +//! could be done: +//! +//! ``` +//! // Error Handling #3 - Using setjmp/longjmp if exceptions are not allowed. +//! #include +//! #include +//! #include +//! +//! class LongJmpErrorHandler : public asmjit::ErrorHandler { +//! public: +//! inline LongJmpErrorHandler() : err(asmjit::kErrorOk) {} +//! +//! void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) override { +//! this->err = err; +//! longjmp(state, 1); +//! } +//! +//! jmp_buf state; +//! asmjit::Error err; +//! }; +//! +//! int main(int argc, char* argv[]) { +//! using namespace asmjit; +//! +//! JitRuntime rt; +//! LongJmpErrorHandler eh; +//! +//! CodeHolder code; +//! code.init(rt.rt.environment()); +//! code.setErrorHandler(&eh); +//! +//! x86::Assembler a(&code); +//! +//! if (!setjmp(eh.state)) { +//! // Try to emit instruction that doesn't exist. +//! a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1); +//! } +//! else { +//! Error err = eh.err; +//! printf("ASMJIT ERROR: 0x%08X [%s]\n", err, DebugUtils::errorAsString(err)); +//! } +//! +//! return 0; +//! } +//! ``` +class ASMJIT_VIRTAPI ErrorHandler { +public: + ASMJIT_BASE_CLASS(ErrorHandler) + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! Creates a new `ErrorHandler` instance. + ASMJIT_API ErrorHandler() noexcept; + //! Destroys the `ErrorHandler` instance. + ASMJIT_API virtual ~ErrorHandler() noexcept; + + // -------------------------------------------------------------------------- + // [Handle Error] + // -------------------------------------------------------------------------- + + //! Error handler (must be reimplemented). + //! + //! Error handler is called after an error happened and before it's propagated + //! to the caller. There are multiple ways how the error handler can be used: + //! + //! 1. User-based error handling without throwing exception or using C's + //! `longjmp()`. This is for users that don't use exceptions and want + //! customized error handling. + //! + //! 2. Throwing an exception. AsmJit doesn't use exceptions and is completely + //! exception-safe, but you can throw exception from your error handler if + //! this way is the preferred way of handling errors in your project. + //! + //! 3. Using plain old C's `setjmp()` and `longjmp()`. Asmjit always puts + //! `BaseEmitter` to a consistent state before calling `handleError()` + //! so `longjmp()` can be used without any issues to cancel the code + //! generation if an error occurred. There is no difference between + //! exceptions and `longjmp()` from AsmJit's perspective, however, + //! never jump outside of `CodeHolder` and `BaseEmitter` scope as you + //! would leak memory. + virtual void handleError(Error err, const char* message, BaseEmitter* origin) = 0; +}; + +//! \} + +ASMJIT_END_NAMESPACE + +#endif // ASMJIT_CORE_ERRORHANDLER_H_INCLUDED + diff --git a/libs/asmjit/src/asmjit/core/features.h b/libs/asmjit/src/asmjit/core/features.h index 193841c..fd28472 100644 --- a/libs/asmjit/src/asmjit/core/features.h +++ b/libs/asmjit/src/asmjit/core/features.h @@ -36,9 +36,15 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::BaseFeatures] // ============================================================================ +//! Base class that provides information about CPU features. +//! +//! Internally each feature is repreesnted by a single bit in an embedded +//! bit-array, however, feature bits are defined by an architecture specific +//! implementations, like \ref x86::Features. class BaseFeatures { public: typedef Support::BitWord BitWord; + typedef Support::BitVectorIterator Iterator; enum : uint32_t { kMaxFeatures = 128, @@ -74,9 +80,11 @@ class BaseFeatures { //! \name Cast //! \{ + //! Casts this base class into a derived type `T`. template inline T& as() noexcept { return static_cast(*this); } + //! Casts this base class into a derived type `T` (const). template inline const T& as() const noexcept { return static_cast(*this); } @@ -85,11 +93,27 @@ class BaseFeatures { //! \name Accessors //! \{ - //! Returns all features as `BitWord` array. + inline bool empty() const noexcept { + for (uint32_t i = 0; i < kNumBitWords; i++) + if (_bits[i]) + return false; + return true; + } + + //! Returns all features as array of bitwords (see \ref Support::BitWord). inline BitWord* bits() noexcept { return _bits; } - //! Returns all features as `BitWord` array (const). + //! Returns all features as array of bitwords (const). inline const BitWord* bits() const noexcept { return _bits; } + //! Returns the number of BitWords returned by \ref bits(). + inline size_t bitWordCount() const noexcept { return kNumBitWords; } + + //! Returns \ref Support::BitVectorIterator, that can be used to iterate + //! all features efficiently + inline Iterator iterator() const noexcept { + return Iterator(_bits, kNumBitWords); + } + //! Tests whether the feature `featureId` is present. inline bool has(uint32_t featureId) const noexcept { ASMJIT_ASSERT(featureId < kMaxFeatures); diff --git a/libs/asmjit/src/asmjit/core/formatter.cpp b/libs/asmjit/src/asmjit/core/formatter.cpp new file mode 100644 index 0000000..89c3228 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/formatter.cpp @@ -0,0 +1,481 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#ifndef ASMJIT_NO_LOGGING + +#include "../core/builder.h" +#include "../core/codeholder.h" +#include "../core/compiler.h" +#include "../core/emitter.h" +#include "../core/formatter.h" +#include "../core/string.h" +#include "../core/support.h" +#include "../core/type.h" + +#ifdef ASMJIT_BUILD_X86 + #include "../x86/x86formatter_p.h" +#endif + +#ifdef ASMJIT_BUILD_ARM + #include "../arm/armformatter_p.h" +#endif + +ASMJIT_BEGIN_NAMESPACE + +#if defined(ASMJIT_NO_COMPILER) +class VirtReg; +#endif + +// ============================================================================ +// [asmjit::Formatter] +// ============================================================================ + +namespace Formatter { + +Error formatTypeId(String& sb, uint32_t typeId) noexcept { + if (typeId == Type::kIdVoid) + return sb.append("void"); + + if (!Type::isValid(typeId)) + return sb.append("unknown"); + + const char* typeName = "unknown"; + uint32_t typeSize = Type::sizeOf(typeId); + + uint32_t baseId = Type::baseOf(typeId); + switch (baseId) { + case Type::kIdIntPtr : typeName = "iptr" ; break; + case Type::kIdUIntPtr: typeName = "uptr" ; break; + case Type::kIdI8 : typeName = "i8" ; break; + case Type::kIdU8 : typeName = "u8" ; break; + case Type::kIdI16 : typeName = "i16" ; break; + case Type::kIdU16 : typeName = "u16" ; break; + case Type::kIdI32 : typeName = "i32" ; break; + case Type::kIdU32 : typeName = "u32" ; break; + case Type::kIdI64 : typeName = "i64" ; break; + case Type::kIdU64 : typeName = "u64" ; break; + case Type::kIdF32 : typeName = "f32" ; break; + case Type::kIdF64 : typeName = "f64" ; break; + case Type::kIdF80 : typeName = "f80" ; break; + case Type::kIdMask8 : typeName = "mask8" ; break; + case Type::kIdMask16 : typeName = "mask16"; break; + case Type::kIdMask32 : typeName = "mask32"; break; + case Type::kIdMask64 : typeName = "mask64"; break; + case Type::kIdMmx32 : typeName = "mmx32" ; break; + case Type::kIdMmx64 : typeName = "mmx64" ; break; + } + + uint32_t baseSize = Type::sizeOf(baseId); + if (typeSize > baseSize) { + uint32_t count = typeSize / baseSize; + return sb.appendFormat("%sx%u", typeName, unsigned(count)); + } + else { + return sb.append(typeName); + } +} + +Error formatFeature( + String& sb, + uint32_t arch, + uint32_t featureId) noexcept { + +#ifdef ASMJIT_BUILD_X86 + if (Environment::isFamilyX86(arch)) + return x86::FormatterInternal::formatFeature(sb, featureId); +#endif + +#ifdef ASMJIT_BUILD_ARM + if (Environment::isFamilyARM(arch)) + return arm::FormatterInternal::formatFeature(sb, featureId); +#endif + + return kErrorInvalidArch; +} + +Error formatLabel( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + uint32_t labelId) noexcept { + + DebugUtils::unused(formatFlags); + + const LabelEntry* le = emitter->code()->labelEntry(labelId); + if (ASMJIT_UNLIKELY(!le)) + return sb.appendFormat("", labelId); + + if (le->hasName()) { + if (le->hasParent()) { + uint32_t parentId = le->parentId(); + const LabelEntry* pe = emitter->code()->labelEntry(parentId); + + if (ASMJIT_UNLIKELY(!pe)) + ASMJIT_PROPAGATE(sb.appendFormat("", labelId)); + else if (ASMJIT_UNLIKELY(!pe->hasName())) + ASMJIT_PROPAGATE(sb.appendFormat("L%u", parentId)); + else + ASMJIT_PROPAGATE(sb.append(pe->name())); + + ASMJIT_PROPAGATE(sb.append('.')); + } + return sb.append(le->name()); + } + else { + return sb.appendFormat("L%u", labelId); + } +} + +Error formatRegister( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + uint32_t arch, + uint32_t regType, + uint32_t regId) noexcept { + +#ifdef ASMJIT_BUILD_X86 + if (Environment::isFamilyX86(arch)) + return x86::FormatterInternal::formatRegister(sb, formatFlags, emitter, arch, regType, regId); +#endif + +#ifdef ASMJIT_BUILD_ARM + if (Environment::isFamilyARM(arch)) + return arm::FormatterInternal::formatRegister(sb, formatFlags, emitter, arch, regType, regId); +#endif + + return kErrorInvalidArch; +} + +Error formatOperand( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + uint32_t arch, + const Operand_& op) noexcept { + +#ifdef ASMJIT_BUILD_X86 + if (Environment::isFamilyX86(arch)) + return x86::FormatterInternal::formatOperand(sb, formatFlags, emitter, arch, op); +#endif + +#ifdef ASMJIT_BUILD_ARM + if (Environment::isFamilyARM(arch)) + return arm::FormatterInternal::formatOperand(sb, formatFlags, emitter, arch, op); +#endif + + return kErrorInvalidArch; +} + +Error formatInstruction( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + uint32_t arch, + const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept { + +#ifdef ASMJIT_BUILD_X86 + if (Environment::isFamilyX86(arch)) + return x86::FormatterInternal::formatInstruction(sb, formatFlags, emitter, arch, inst, operands, opCount); +#endif + +#ifdef ASMJIT_BUILD_ARM + if (Environment::isFamilyARM(arch)) + return arm::FormatterInternal::formatInstruction(sb, formatFlags, emitter, arch, inst, operands, opCount); +#endif + + return kErrorInvalidArch; +} + +#ifndef ASMJIT_NO_BUILDER + +#ifndef ASMJIT_NO_COMPILER +static Error formatFuncValue(String& sb, uint32_t formatFlags, const BaseEmitter* emitter, FuncValue value) noexcept { + uint32_t typeId = value.typeId(); + ASMJIT_PROPAGATE(formatTypeId(sb, typeId)); + + if (value.isAssigned()) { + ASMJIT_PROPAGATE(sb.append('@')); + + if (value.isIndirect()) + ASMJIT_PROPAGATE(sb.append('[')); + + // NOTE: It should be either reg or stack, but never both. We + // use two IFs on purpose so if the FuncValue is both it would + // show in logs. + if (value.isReg()) { + ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, emitter->arch(), value.regType(), value.regId())); + } + + if (value.isStack()) { + ASMJIT_PROPAGATE(sb.appendFormat("[%d]", int(value.stackOffset()))); + } + + if (value.isIndirect()) + ASMJIT_PROPAGATE(sb.append(']')); + } + + return kErrorOk; +} + +static Error formatFuncValuePack( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + const FuncValuePack& pack, + VirtReg* const* vRegs) noexcept { + + size_t count = pack.count(); + if (!count) + return sb.append("void"); + + if (count > 1) + sb.append('['); + + for (uint32_t valueIndex = 0; valueIndex < count; valueIndex++) { + const FuncValue& value = pack[valueIndex]; + if (!value) + break; + + if (valueIndex) + ASMJIT_PROPAGATE(sb.append(", ")); + + ASMJIT_PROPAGATE(formatFuncValue(sb, formatFlags, emitter, value)); + + if (vRegs) { + static const char nullRet[] = ""; + ASMJIT_PROPAGATE(sb.appendFormat(" %s", vRegs[valueIndex] ? vRegs[valueIndex]->name() : nullRet)); + } + } + + if (count > 1) + sb.append(']'); + + return kErrorOk; +} + +static Error formatFuncRets( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + const FuncDetail& fd) noexcept { + + return formatFuncValuePack(sb, formatFlags, emitter, fd.retPack(), nullptr); +} + +static Error formatFuncArgs( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + const FuncDetail& fd, + const FuncNode::ArgPack* argPacks) noexcept { + + uint32_t argCount = fd.argCount(); + if (!argCount) + return sb.append("void"); + + for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { + if (argIndex) + ASMJIT_PROPAGATE(sb.append(", ")); + + ASMJIT_PROPAGATE(formatFuncValuePack(sb, formatFlags, emitter, fd.argPack(argIndex), argPacks[argIndex]._data)); + } + + return kErrorOk; +} +#endif + +Error formatNode( + String& sb, + uint32_t formatFlags, + const BaseBuilder* builder, + const BaseNode* node) noexcept { + + if (node->hasPosition() && (formatFlags & FormatOptions::kFlagPositions) != 0) + ASMJIT_PROPAGATE(sb.appendFormat("<%05u> ", node->position())); + + switch (node->type()) { + case BaseNode::kNodeInst: + case BaseNode::kNodeJump: { + const InstNode* instNode = node->as(); + ASMJIT_PROPAGATE( + formatInstruction(sb, formatFlags, builder, + builder->arch(), + instNode->baseInst(), instNode->operands(), instNode->opCount())); + break; + } + + case BaseNode::kNodeSection: { + const SectionNode* sectionNode = node->as(); + if (builder->_code->isSectionValid(sectionNode->id())) { + const Section* section = builder->_code->sectionById(sectionNode->id()); + ASMJIT_PROPAGATE(sb.appendFormat(".section %s", section->name())); + } + break; + } + + case BaseNode::kNodeLabel: { + const LabelNode* labelNode = node->as(); + ASMJIT_PROPAGATE(formatLabel(sb, formatFlags, builder, labelNode->labelId())); + ASMJIT_PROPAGATE(sb.append(":")); + break; + } + + case BaseNode::kNodeAlign: { + const AlignNode* alignNode = node->as(); + ASMJIT_PROPAGATE( + sb.appendFormat("align %u (%s)", + alignNode->alignment(), + alignNode->alignMode() == kAlignCode ? "code" : "data")); + break; + } + + case BaseNode::kNodeEmbedData: { + const EmbedDataNode* embedNode = node->as(); + ASMJIT_PROPAGATE(sb.append("embed ")); + if (embedNode->repeatCount() != 1) + ASMJIT_PROPAGATE(sb.appendFormat("[repeat=%zu] ", size_t(embedNode->repeatCount()))); + ASMJIT_PROPAGATE(sb.appendFormat("%u bytes", embedNode->dataSize())); + break; + } + + case BaseNode::kNodeEmbedLabel: { + const EmbedLabelNode* embedNode = node->as(); + ASMJIT_PROPAGATE(sb.append(".label ")); + ASMJIT_PROPAGATE(formatLabel(sb, formatFlags, builder, embedNode->labelId())); + break; + } + + case BaseNode::kNodeEmbedLabelDelta: { + const EmbedLabelDeltaNode* embedNode = node->as(); + ASMJIT_PROPAGATE(sb.append(".label (")); + ASMJIT_PROPAGATE(formatLabel(sb, formatFlags, builder, embedNode->labelId())); + ASMJIT_PROPAGATE(sb.append(" - ")); + ASMJIT_PROPAGATE(formatLabel(sb, formatFlags, builder, embedNode->baseLabelId())); + ASMJIT_PROPAGATE(sb.append(")")); + break; + } + + case BaseNode::kNodeComment: { + const CommentNode* commentNode = node->as(); + ASMJIT_PROPAGATE(sb.appendFormat("; %s", commentNode->inlineComment())); + break; + } + + case BaseNode::kNodeSentinel: { + const SentinelNode* sentinelNode = node->as(); + const char* sentinelName = nullptr; + + switch (sentinelNode->sentinelType()) { + case SentinelNode::kSentinelFuncEnd: + sentinelName = "[FuncEnd]"; + break; + + default: + sentinelName = "[Sentinel]"; + break; + } + + ASMJIT_PROPAGATE(sb.append(sentinelName)); + break; + } + +#ifndef ASMJIT_NO_COMPILER + case BaseNode::kNodeFunc: { + const FuncNode* funcNode = node->as(); + + ASMJIT_PROPAGATE(formatLabel(sb, formatFlags, builder, funcNode->labelId())); + ASMJIT_PROPAGATE(sb.append(": ")); + + ASMJIT_PROPAGATE(formatFuncRets(sb, formatFlags, builder, funcNode->detail())); + ASMJIT_PROPAGATE(sb.append(" Func(")); + ASMJIT_PROPAGATE(formatFuncArgs(sb, formatFlags, builder, funcNode->detail(), funcNode->argPacks())); + ASMJIT_PROPAGATE(sb.append(")")); + break; + } + + case BaseNode::kNodeFuncRet: { + const FuncRetNode* retNode = node->as(); + ASMJIT_PROPAGATE(sb.append("[FuncRet]")); + + for (uint32_t i = 0; i < 2; i++) { + const Operand_& op = retNode->_opArray[i]; + if (!op.isNone()) { + ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", ")); + ASMJIT_PROPAGATE(formatOperand(sb, formatFlags, builder, builder->arch(), op)); + } + } + break; + } + + case BaseNode::kNodeInvoke: { + const InvokeNode* invokeNode = node->as(); + ASMJIT_PROPAGATE( + formatInstruction(sb, formatFlags, builder, + builder->arch(), + invokeNode->baseInst(), invokeNode->operands(), invokeNode->opCount())); + break; + } +#endif + + default: { + ASMJIT_PROPAGATE(sb.appendFormat("[UserNode:%u]", node->type())); + break; + } + } + + return kErrorOk; +} + + +Error formatNodeList( + String& sb, + uint32_t formatFlags, + const BaseBuilder* builder) noexcept { + + return formatNodeList(sb, formatFlags, builder, builder->firstNode(), nullptr); +} + +Error formatNodeList( + String& sb, + uint32_t formatFlags, + const BaseBuilder* builder, + const BaseNode* begin, + const BaseNode* end) noexcept { + + const BaseNode* node = begin; + while (node != end) { + ASMJIT_PROPAGATE(formatNode(sb, formatFlags, builder, node)); + ASMJIT_PROPAGATE(sb.append('\n')); + node = node->next(); + } + return kErrorOk; +} +#endif + +} // {Formatter} + +ASMJIT_END_NAMESPACE + +#endif diff --git a/libs/asmjit/src/asmjit/core/formatter.h b/libs/asmjit/src/asmjit/core/formatter.h new file mode 100644 index 0000000..14934ba --- /dev/null +++ b/libs/asmjit/src/asmjit/core/formatter.h @@ -0,0 +1,256 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_FORMATTER_H_INCLUDED +#define ASMJIT_CORE_FORMATTER_H_INCLUDED + +#include "../core/inst.h" +#include "../core/string.h" + +#ifndef ASMJIT_NO_LOGGING + +ASMJIT_BEGIN_NAMESPACE + +//! \addtogroup asmjit_logging +//! \{ + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +class BaseEmitter; +struct Operand_; + +#ifndef ASMJIT_NO_BUILDER +class BaseBuilder; +class BaseNode; +#endif + +#ifndef ASMJIT_NO_COMPILER +class BaseCompiler; +#endif + +// ============================================================================ +// [asmjit::FormatOptions] +// ============================================================================ + +//! Formatting options used by \ref Logger and \ref Formatter. +class FormatOptions { +public: + //! Format flags, see \ref Flags. + uint32_t _flags; + //! Indentation by type, see \ref IndentationType. + uint8_t _indentation[4]; + + //! Flags can enable a logging feature. + enum Flags : uint32_t { + //! No flags. + kNoFlags = 0u, + + //! Show also binary form of each logged instruction (Assembler). + kFlagMachineCode = 0x00000001u, + //! Show a text explanation of some immediate values. + kFlagExplainImms = 0x00000002u, + //! Use hexadecimal notation of immediate values. + kFlagHexImms = 0x00000004u, + //! Use hexadecimal notation of address offsets. + kFlagHexOffsets = 0x00000008u, + //! Show casts between virtual register types (Compiler). + kFlagRegCasts = 0x00000010u, + //! Show positions associated with nodes (Compiler). + kFlagPositions = 0x00000020u, + //! Annotate nodes that are lowered by passes. + kFlagAnnotations = 0x00000040u, + + // TODO: These must go, keep this only for formatting. + //! Show an additional output from passes. + kFlagDebugPasses = 0x00000080u, + //! Show an additional output from RA. + kFlagDebugRA = 0x00000100u + }; + + //! Describes indentation type of code, label, or comment in logger output. + enum IndentationType : uint32_t { + //! Indentation used for instructions and directives. + kIndentationCode = 0u, + //! Indentation used for labels and function nodes. + kIndentationLabel = 1u, + //! Indentation used for comments (not inline comments). + kIndentationComment = 2u, + //! \cond INTERNAL + //! Reserved for future use. + kIndentationReserved = 3u + //! \endcond + }; + + //! \name Construction & Destruction + //! \{ + + //! Creates a default-initialized FormatOptions. + constexpr FormatOptions() noexcept + : _flags(0), + _indentation { 0, 0, 0, 0 } {} + + constexpr FormatOptions(const FormatOptions& other) noexcept = default; + inline FormatOptions& operator=(const FormatOptions& other) noexcept = default; + + //! Resets FormatOptions to its default initialized state. + inline void reset() noexcept { + _flags = 0; + _indentation[0] = 0; + _indentation[1] = 0; + _indentation[2] = 0; + _indentation[3] = 0; + } + + //! \} + + //! \name Accessors + //! \{ + + //! Returns format flags. + constexpr uint32_t flags() const noexcept { return _flags; } + //! Tests whether the given `flag` is set in format flags. + constexpr bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; } + //! Resets all format flags to `flags`. + inline void setFlags(uint32_t flags) noexcept { _flags = flags; } + //! Adds `flags` to format flags. + inline void addFlags(uint32_t flags) noexcept { _flags |= flags; } + //! Removes `flags` from format flags. + inline void clearFlags(uint32_t flags) noexcept { _flags &= ~flags; } + + //! Returns indentation for the given `type`, see \ref IndentationType. + constexpr uint8_t indentation(uint32_t type) const noexcept { return _indentation[type]; } + //! Sets indentation for the given `type`, see \ref IndentationType. + inline void setIndentation(uint32_t type, uint32_t n) noexcept { _indentation[type] = uint8_t(n); } + //! Resets indentation for the given `type` to zero. + inline void resetIndentation(uint32_t type) noexcept { _indentation[type] = uint8_t(0); } + + //! \} +}; + +// ============================================================================ +// [asmjit::Formatter] +// ============================================================================ + +//! Provides formatting functionality to format operands, instructions, and nodes. +namespace Formatter { + +//! Appends a formatted `typeId` to the output string `sb`. +ASMJIT_API Error formatTypeId( + String& sb, + uint32_t typeId) noexcept; + +//! Appends a formatted `featureId` to the output string `sb`. +//! +//! See \ref BaseFeatures. +ASMJIT_API Error formatFeature( + String& sb, + uint32_t arch, + uint32_t featureId) noexcept; + +//! Appends a formatted register to the output string `sb`. +//! +//! \note Emitter is optional, but it's required to format virtual registers, +//! which won't be formatted properly if the `emitter` is not provided. +ASMJIT_API Error formatRegister( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + uint32_t arch, + uint32_t regType, + uint32_t regId) noexcept; + +//! Appends a formatted label to the output string `sb`. +//! +//! \note Emitter is optional, but it's required to format named labels +//! properly, otherwise the formatted as it is an anonymous label. +ASMJIT_API Error formatLabel( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + uint32_t labelId) noexcept; + +//! Appends a formatted operand to the output string `sb`. +//! +//! \note Emitter is optional, but it's required to format named labels and +//! virtual registers. See \ref formatRegister() and \ref formatLabel() for +//! more details. +ASMJIT_API Error formatOperand( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + uint32_t arch, + const Operand_& op) noexcept; + +//! Appends a formatted instruction to the output string `sb`. +//! +//! \note Emitter is optional, but it's required to format named labels and +//! virtual registers. See \ref formatRegister() and \ref formatLabel() for +//! more details. +ASMJIT_API Error formatInstruction( + String& sb, + uint32_t formatFlags, + const BaseEmitter* emitter, + uint32_t arch, + const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept; + +#ifndef ASMJIT_NO_BUILDER +//! Appends a formatted node to the output string `sb`. +//! +//! The `node` must belong to the provided `builder`. +ASMJIT_API Error formatNode( + String& sb, + uint32_t formatFlags, + const BaseBuilder* builder, + const BaseNode* node) noexcept; + +//! Appends formatted nodes to the output string `sb`. +//! +//! All nodes that are part of the given `builder` will be appended. +ASMJIT_API Error formatNodeList( + String& sb, + uint32_t formatFlags, + const BaseBuilder* builder) noexcept; + +//! Appends formatted nodes to the output string `sb`. +//! +//! This function works the same as \ref formatNode(), but appends more nodes +//! to the output string, separating each node with a newline '\n' character. +ASMJIT_API Error formatNodeList( + String& sb, + uint32_t formatFlags, + const BaseBuilder* builder, + const BaseNode* begin, + const BaseNode* end) noexcept; +#endif + +} // {Formatter} + +//! \} + +ASMJIT_END_NAMESPACE + +#endif + +#endif // ASMJIT_CORE_FORMATTER_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/func.cpp b/libs/asmjit/src/asmjit/core/func.cpp index 79eab2e..bb131a0 100644 --- a/libs/asmjit/src/asmjit/core/func.cpp +++ b/libs/asmjit/src/asmjit/core/func.cpp @@ -22,61 +22,79 @@ // 3. This notice may not be removed or altered from any source distribution. #include "../core/api-build_p.h" -#include "../core/arch.h" +#include "../core/archtraits.h" #include "../core/func.h" +#include "../core/operand.h" #include "../core/type.h" +#include "../core/funcargscontext_p.h" #ifdef ASMJIT_BUILD_X86 - #include "../x86/x86internal_p.h" - #include "../x86/x86operand.h" + #include "../x86/x86func_p.h" #endif #ifdef ASMJIT_BUILD_ARM - #include "../arm/arminternal_p.h" - #include "../arm/armoperand.h" + #include "../arm/armfunc_p.h" #endif ASMJIT_BEGIN_NAMESPACE +// ============================================================================ +// [asmjit::CallConv - Init / Reset] +// ============================================================================ + +ASMJIT_FAVOR_SIZE Error CallConv::init(uint32_t ccId, const Environment& environment) noexcept { + reset(); + +#ifdef ASMJIT_BUILD_X86 + if (environment.isFamilyX86()) + return x86::FuncInternal::initCallConv(*this, ccId, environment); +#endif + +#ifdef ASMJIT_BUILD_ARM + if (environment.isFamilyARM()) + return arm::FuncInternal::initCallConv(*this, ccId, environment); +#endif + + return DebugUtils::errored(kErrorInvalidArgument); +} + // ============================================================================ // [asmjit::FuncDetail - Init / Reset] // ============================================================================ -ASMJIT_FAVOR_SIZE Error FuncDetail::init(const FuncSignature& sign) { - uint32_t ccId = sign.callConv(); - CallConv& cc = _callConv; +ASMJIT_FAVOR_SIZE Error FuncDetail::init(const FuncSignature& signature, const Environment& environment) noexcept { + uint32_t ccId = signature.callConv(); + uint32_t argCount = signature.argCount(); - uint32_t argCount = sign.argCount(); if (ASMJIT_UNLIKELY(argCount > Globals::kMaxFuncArgs)) return DebugUtils::errored(kErrorInvalidArgument); - ASMJIT_PROPAGATE(cc.init(ccId)); + CallConv& cc = _callConv; + ASMJIT_PROPAGATE(cc.init(ccId, environment)); - uint32_t gpSize = (cc.archId() == ArchInfo::kIdX86) ? 4 : 8; - uint32_t deabstractDelta = Type::deabstractDeltaOfSize(gpSize); + uint32_t registerSize = Environment::registerSizeFromArch(cc.arch()); + uint32_t deabstractDelta = Type::deabstractDeltaOfSize(registerSize); - const uint8_t* args = sign.args(); - for (uint32_t i = 0; i < argCount; i++) { - FuncValue& arg = _args[i]; - arg.initTypeId(Type::deabstract(args[i], deabstractDelta)); + const uint8_t* signatureArgs = signature.args(); + for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { + FuncValuePack& argPack = _args[argIndex]; + argPack[0].initTypeId(Type::deabstract(signatureArgs[argIndex], deabstractDelta)); } _argCount = uint8_t(argCount); - _vaIndex = uint8_t(sign.vaIndex()); + _vaIndex = uint8_t(signature.vaIndex()); - uint32_t ret = sign.ret(); - if (ret != Type::kIdVoid) { + uint32_t ret = signature.ret(); + if (ret != Type::kIdVoid) _rets[0].initTypeId(Type::deabstract(ret, deabstractDelta)); - _retCount = 1; - } #ifdef ASMJIT_BUILD_X86 - if (CallConv::isX86Family(ccId)) - return x86::X86Internal::initFuncDetail(*this, sign, gpSize); + if (environment.isFamilyX86()) + return x86::FuncInternal::initFuncDetail(*this, signature, registerSize); #endif #ifdef ASMJIT_BUILD_ARM - if (CallConv::isArmFamily(ccId)) - return arm::ArmInternal::initFuncDetail(*this, sign, gpSize); + if (environment.isFamilyARM()) + return arm::FuncInternal::initFuncDetail(*this, signature, registerSize); #endif // We should never bubble here as if `cc.init()` succeeded then there has to @@ -85,37 +103,186 @@ ASMJIT_FAVOR_SIZE Error FuncDetail::init(const FuncSignature& sign) { } // ============================================================================ -// [asmjit::FuncFrame - Init / Reset / Finalize] +// [asmjit::FuncFrame - Init / Finalize] // ============================================================================ ASMJIT_FAVOR_SIZE Error FuncFrame::init(const FuncDetail& func) noexcept { - uint32_t ccId = func.callConv().id(); + uint32_t arch = func.callConv().arch(); + if (!Environment::isValidArch(arch)) + return DebugUtils::errored(kErrorInvalidArch); -#ifdef ASMJIT_BUILD_X86 - if (CallConv::isX86Family(ccId)) - return x86::X86Internal::initFuncFrame(*this, func); -#endif + const ArchTraits& archTraits = ArchTraits::byArch(arch); -#ifdef ASMJIT_BUILD_ARM - if (CallConv::isArmFamily(ccId)) - return arm::ArmInternal::initFuncFrame(*this, func); -#endif + // Initializing FuncFrame means making a copy of some properties of `func`. + // Properties like `_localStackSize` will be set by the user before the frame + // is finalized. + reset(); - return DebugUtils::errored(kErrorInvalidArgument); + _arch = uint8_t(arch); + _spRegId = uint8_t(archTraits.spRegId()); + _saRegId = uint8_t(BaseReg::kIdBad); + + uint32_t naturalStackAlignment = func.callConv().naturalStackAlignment(); + uint32_t minDynamicAlignment = Support::max(naturalStackAlignment, 16); + + if (minDynamicAlignment == naturalStackAlignment) + minDynamicAlignment <<= 1; + + _naturalStackAlignment = uint8_t(naturalStackAlignment); + _minDynamicAlignment = uint8_t(minDynamicAlignment); + _redZoneSize = uint8_t(func.redZoneSize()); + _spillZoneSize = uint8_t(func.spillZoneSize()); + _finalStackAlignment = uint8_t(_naturalStackAlignment); + + if (func.hasFlag(CallConv::kFlagCalleePopsStack)) { + _calleeStackCleanup = uint16_t(func.argStackSize()); + } + + // Initial masks of dirty and preserved registers. + for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) { + _dirtyRegs[group] = func.usedRegs(group); + _preservedRegs[group] = func.preservedRegs(group); + } + + // Exclude stack pointer - this register is never included in saved GP regs. + _preservedRegs[BaseReg::kGroupGp] &= ~Support::bitMask(archTraits.spRegId()); + + // The size and alignment of save/restore area of registers for each significant register group. + memcpy(_saveRestoreRegSize, func.callConv()._saveRestoreRegSize, sizeof(_saveRestoreRegSize)); + memcpy(_saveRestoreAlignment, func.callConv()._saveRestoreAlignment, sizeof(_saveRestoreAlignment)); + + return kErrorOk; } ASMJIT_FAVOR_SIZE Error FuncFrame::finalize() noexcept { -#ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId())) - return x86::X86Internal::finalizeFuncFrame(*this); -#endif + if (!Environment::isValidArch(arch())) + return DebugUtils::errored(kErrorInvalidArch); -#ifdef ASMJIT_BUILD_ARM - if (ArchInfo::isArmFamily(archId())) - return arm::ArmInternal::finalizeFuncFrame(*this); -#endif + const ArchTraits& archTraits = ArchTraits::byArch(arch()); - return DebugUtils::errored(kErrorInvalidArgument); + uint32_t registerSize = _saveRestoreRegSize[BaseReg::kGroupGp]; + uint32_t vectorSize = _saveRestoreRegSize[BaseReg::kGroupVec]; + uint32_t returnAddressSize = archTraits.hasLinkReg() ? 0u : registerSize; + + // The final stack alignment must be updated accordingly to call and local stack alignments. + uint32_t stackAlignment = _finalStackAlignment; + ASMJIT_ASSERT(stackAlignment == Support::max(_naturalStackAlignment, + _callStackAlignment, + _localStackAlignment)); + + bool hasFP = hasPreservedFP(); + bool hasDA = hasDynamicAlignment(); + + uint32_t kSp = archTraits.spRegId(); + uint32_t kFp = archTraits.fpRegId(); + uint32_t kLr = archTraits.linkRegId(); + + // Make frame pointer dirty if the function uses it. + if (hasFP) { + _dirtyRegs[BaseReg::kGroupGp] |= Support::bitMask(kFp); + + // Currently required by ARM, if this works differently across architectures + // we would have to generalize most likely in CallConv. + if (kLr != BaseReg::kIdBad) + _dirtyRegs[BaseReg::kGroupGp] |= Support::bitMask(kLr); + } + + // These two are identical if the function doesn't align its stack dynamically. + uint32_t saRegId = _saRegId; + if (saRegId == BaseReg::kIdBad) + saRegId = kSp; + + // Fix stack arguments base-register from SP to FP in case it was not picked + // before and the function performs dynamic stack alignment. + if (hasDA && saRegId == kSp) + saRegId = kFp; + + // Mark as dirty any register but SP if used as SA pointer. + if (saRegId != kSp) + _dirtyRegs[BaseReg::kGroupGp] |= Support::bitMask(saRegId); + + _spRegId = uint8_t(kSp); + _saRegId = uint8_t(saRegId); + + // Setup stack size used to save preserved registers. + uint32_t saveRestoreSizes[2] {}; + for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) + saveRestoreSizes[size_t(!archTraits.hasPushPop(group))] + += Support::alignUp(Support::popcnt(savedRegs(group)) * saveRestoreRegSize(group), saveRestoreAlignment(group)); + + _pushPopSaveSize = uint16_t(saveRestoreSizes[0]); + _extraRegSaveSize = uint16_t(saveRestoreSizes[1]); + + uint32_t v = 0; // The beginning of the stack frame relative to SP after prolog. + v += callStackSize(); // Count 'callStackSize' <- This is used to call functions. + v = Support::alignUp(v, stackAlignment); // Align to function's stack alignment. + + _localStackOffset = v; // Store 'localStackOffset' <- Function's local stack starts here. + v += localStackSize(); // Count 'localStackSize' <- Function's local stack ends here. + + // If the function's stack must be aligned, calculate the alignment necessary + // to store vector registers, and set `FuncFrame::kAttrAlignedVecSR` to inform + // PEI that it can use instructions that perform aligned stores/loads. + if (stackAlignment >= vectorSize && _extraRegSaveSize) { + addAttributes(FuncFrame::kAttrAlignedVecSR); + v = Support::alignUp(v, vectorSize); // Align 'extraRegSaveOffset'. + } + + _extraRegSaveOffset = v; // Store 'extraRegSaveOffset' <- Non-GP save/restore starts here. + v += _extraRegSaveSize; // Count 'extraRegSaveSize' <- Non-GP save/restore ends here. + + // Calculate if dynamic alignment (DA) slot (stored as offset relative to SP) is required and its offset. + if (hasDA && !hasFP) { + _daOffset = v; // Store 'daOffset' <- DA pointer would be stored here. + v += registerSize; // Count 'daOffset'. + } + else { + _daOffset = FuncFrame::kTagInvalidOffset; + } + + // Link Register + // ------------- + // + // The stack is aligned after the function call as the return address is + // stored in a link register. Some architectures may require to always + // have aligned stack after PUSH/POP operation, which is represented by + // ArchTraits::stackAlignmentConstraint(). + // + // No Link Register (X86/X64) + // -------------------------- + // + // The return address should be stored after GP save/restore regs. It has + // the same size as `registerSize` (basically the native register/pointer + // size). We don't adjust it now as `v` now contains the exact size that the + // function requires to adjust (call frame + stack frame, vec stack size). + // The stack (if we consider this size) is misaligned now, as it's always + // aligned before the function call - when `call()` is executed it pushes + // the current EIP|RIP onto the stack, and misaligns it by 12 or 8 bytes + // (depending on the architecture). So count number of bytes needed to align + // it up to the function's CallFrame (the beginning). + if (v || hasFuncCalls() || !returnAddressSize) + v += Support::alignUpDiff(v + pushPopSaveSize() + returnAddressSize, stackAlignment); + + _pushPopSaveOffset = v; // Store 'pushPopSaveOffset' <- Function's push/pop save/restore starts here. + _stackAdjustment = v; // Store 'stackAdjustment' <- SA used by 'add SP, SA' and 'sub SP, SA'. + v += _pushPopSaveSize; // Count 'pushPopSaveSize' <- Function's push/pop save/restore ends here. + _finalStackSize = v; // Store 'finalStackSize' <- Final stack used by the function. + + if (!archTraits.hasLinkReg()) + v += registerSize; // Count 'ReturnAddress' <- As CALL pushes onto stack. + + // If the function performs dynamic stack alignment then the stack-adjustment must be aligned. + if (hasDA) + _stackAdjustment = Support::alignUp(_stackAdjustment, stackAlignment); + + // Calculate where the function arguments start relative to SP. + _saOffsetFromSP = hasDA ? FuncFrame::kTagInvalidOffset : v; + + // Calculate where the function arguments start relative to FP or user-provided register. + _saOffsetFromSA = hasFP ? returnAddressSize + registerSize // Return address + frame pointer. + : returnAddressSize + _pushPopSaveSize; // Return address + all push/pop regs. + + return kErrorOk; } // ============================================================================ @@ -123,22 +290,21 @@ ASMJIT_FAVOR_SIZE Error FuncFrame::finalize() noexcept { // ============================================================================ ASMJIT_FAVOR_SIZE Error FuncArgsAssignment::updateFuncFrame(FuncFrame& frame) const noexcept { + uint32_t arch = frame.arch(); const FuncDetail* func = funcDetail(); - if (!func) return DebugUtils::errored(kErrorInvalidState); - - uint32_t ccId = func->callConv().id(); -#ifdef ASMJIT_BUILD_X86 - if (CallConv::isX86Family(ccId)) - return x86::X86Internal::argsToFuncFrame(*this, frame); -#endif + if (!func) + return DebugUtils::errored(kErrorInvalidState); -#ifdef ASMJIT_BUILD_ARM - if (CallConv::isArmFamily(ccId)) - return arm::ArmInternal::argsToFuncFrame(*this, frame); -#endif + RAConstraints constraints; + ASMJIT_PROPAGATE(constraints.init(arch)); - return DebugUtils::errored(kErrorInvalidArch); + FuncArgsContext ctx; + ASMJIT_PROPAGATE(ctx.initWorkData(frame, *this, &constraints)); + ASMJIT_PROPAGATE(ctx.markDstRegsDirty(frame)); + ASMJIT_PROPAGATE(ctx.markScratchRegs(frame)); + ASMJIT_PROPAGATE(ctx.markStackArgsReg(frame)); + return kErrorOk; } ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/func.h b/libs/asmjit/src/asmjit/core/func.h index 36ebf9b..6cfd044 100644 --- a/libs/asmjit/src/asmjit/core/func.h +++ b/libs/asmjit/src/asmjit/core/func.h @@ -24,40 +24,374 @@ #ifndef ASMJIT_CORE_FUNC_H_INCLUDED #define ASMJIT_CORE_FUNC_H_INCLUDED -#include "../core/arch.h" -#include "../core/callconv.h" +#include "../core/archtraits.h" +#include "../core/environment.h" #include "../core/operand.h" #include "../core/type.h" #include "../core/support.h" ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_func +//! \addtogroup asmjit_function //! \{ // ============================================================================ -// [asmjit::FuncArgIndex] +// [asmjit::CallConv] // ============================================================================ -//! Function argument index (lo/hi). -enum FuncArgIndex : uint32_t { - //! Maximum number of function arguments supported by AsmJit. - kFuncArgCount = Globals::kMaxFuncArgs, - //! Extended maximum number of arguments (used internally). - kFuncArgCountLoHi = kFuncArgCount * 2, +//! Function calling convention. +//! +//! Function calling convention is a scheme that defines how function parameters +//! are passed and how function returns its result. AsmJit defines a variety of +//! architecture and OS specific calling conventions and also provides a compile +//! time detection to make the code-generation easier. +struct CallConv { + //! Calling convention id, see \ref Id. + uint8_t _id; + //! Architecture identifier, see \ref Environment::Arch. + uint8_t _arch; + //! Register assignment strategy, see \ref Strategy. + uint8_t _strategy; + + //! Red zone size (AMD64 == 128 bytes). + uint8_t _redZoneSize; + //! Spill zone size (WIN-X64 == 32 bytes). + uint8_t _spillZoneSize; + //! Natural stack alignment as defined by OS/ABI. + uint8_t _naturalStackAlignment; + + //! Flags. + uint16_t _flags; + + //! Size to save/restore per register group. + uint8_t _saveRestoreRegSize[BaseReg::kGroupVirt]; + //! Alignment of save/restore groups. + uint8_t _saveRestoreAlignment[BaseReg::kGroupVirt]; + + //! Mask of all passed registers, per group. + uint32_t _passedRegs[BaseReg::kGroupVirt]; + //! Mask of all preserved registers, per group. + uint32_t _preservedRegs[BaseReg::kGroupVirt]; - //! Index to the LO part of function argument (default). + //! Internal limits of AsmJit's CallConv. + enum Limits : uint32_t { + //! Maximum number of register arguments per register group. + //! + //! \note This is not really AsmJit's limitatation, it's just the number + //! that makes sense considering all common calling conventions. Usually + //! even conventions that use registers to pass function arguments are + //! limited to 8 and less arguments passed via registers per group. + kMaxRegArgsPerGroup = 16 + }; + + //! Passed registers' order. + union RegOrder { + //! Passed registers, ordered. + uint8_t id[kMaxRegArgsPerGroup]; + //! Packed IDs in `uint32_t` array. + uint32_t packed[(kMaxRegArgsPerGroup + 3) / 4]; + }; + + //! Passed registers' order, per register group. + RegOrder _passedOrder[BaseReg::kGroupVirt]; + + //! Calling convention id. + //! + //! Calling conventions can be divided into the following groups: + //! + //! - Universal - calling conventions are applicable to any target. They + //! will be converted to a target dependent calling convention at runtime + //! by \ref init(). The purpose of these conventions is to make using + //! functions less target dependent and closer to how they are declared + //! in C and C++. //! - //! This value is typically omitted and added only if there is HI argument - //! accessed. - kFuncArgLo = 0, + //! - Target specific - calling conventions that are used by a particular + //! architecture and ABI. For example Windows 64-bit calling convention + //! and AMD64 SystemV calling convention. + enum Id : uint32_t { + //! None or invalid (can't be used). + kIdNone = 0, + + // ------------------------------------------------------------------------ + // [Universal Calling Conventions] + // ------------------------------------------------------------------------ + + //! Standard function call or explicit `__cdecl` where it can be specified. + //! + //! This is a universal calling convention, which is used to initialize + //! specific calling connventions based on architecture, platform, and its ABI. + kIdCDecl = 1, + + //! `__stdcall` on targets that support this calling convention (X86). + //! + //! \note This calling convention is only supported on 32-bit X86. If used + //! on environment that doesn't support this calling convention it will be + //! replaced by \ref kIdCDecl. + kIdStdCall = 2, + + //! `__fastcall` on targets that support this calling convention (X86). + //! + //! \note This calling convention is only supported on 32-bit X86. If used + //! on environment that doesn't support this calling convention it will be + //! replaced by \ref kIdCDecl. + kIdFastCall = 3, + + //! `__vectorcall` on targets that support this calling convention (X86/X64). + //! + //! \note This calling convention is only supported on 32-bit and 64-bit + //! X86 architecture on Windows platform. If used on environment that doesn't + //! support this calling it will be replaced by \ref kIdCDecl. + kIdVectorCall = 4, + + //! `__thiscall` on targets that support this calling convention (X86). + //! + //! \note This calling convention is only supported on 32-bit X86 Windows + //! platform. If used on environment that doesn't support this calling + //! convention it will be replaced by \ref kIdCDecl. + kIdThisCall = 5, + + //! `__attribute__((regparm(1)))` convention (GCC and Clang). + kIdRegParm1 = 6, + //! `__attribute__((regparm(2)))` convention (GCC and Clang). + kIdRegParm2 = 7, + //! `__attribute__((regparm(3)))` convention (GCC and Clang). + kIdRegParm3 = 8, + + //! Soft-float calling convention (ARM). + //! + //! Floating point arguments are passed via general purpose registers. + kIdSoftFloat = 9, + + //! Hard-float calling convention (ARM). + //! + //! Floating point arguments are passed via SIMD registers. + kIdHardFloat = 10, + + //! AsmJit specific calling convention designed for calling functions + //! inside a multimedia code that don't use many registers internally, + //! but are long enough to be called and not inlined. These functions are + //! usually used to calculate trigonometric functions, logarithms, etc... + kIdLightCall2 = 16, + kIdLightCall3 = 17, + kIdLightCall4 = 18, + + // ------------------------------------------------------------------------ + // [ABI-Specific Calling Conventions] + // ------------------------------------------------------------------------ + + //! X64 System-V calling convention. + kIdX64SystemV = 32, + //! X64 Windows calling convention. + kIdX64Windows = 33, + + // ------------------------------------------------------------------------ + // [Host] + // ------------------------------------------------------------------------ + + //! Host calling convention detected at compile-time. + kIdHost = +#if ASMJIT_ARCH_ARM == 32 && defined(__SOFTFP__) + kIdSoftFloat +#elif ASMJIT_ARCH_ARM == 32 && !defined(__SOFTFP__) + kIdHardFloat +#else + kIdCDecl +#endif + +#ifndef ASMJIT_NO_DEPRECATE + , kIdHostCDecl = kIdCDecl + , kIdHostStdCall = kIdStdCall + , kIdHostFastCall = kIdFastCall + , kIdHostLightCall2 = kIdLightCall2 + , kIdHostLightCall3 = kIdLightCall3 + , kIdHostLightCall4 = kIdLightCall4 +#endif // !ASMJIT_NO_DEPRECATE + }; - //! Index to the HI part of function argument. + //! Strategy used to assign registers to function arguments. //! - //! HI part of function argument depends on target architecture. On x86 it's - //! typically used to transfer 64-bit integers (they form a pair of 32-bit - //! integers). - kFuncArgHi = kFuncArgCount + //! This is AsmJit specific. It basically describes how AsmJit should convert + //! the function arguments defined by `FuncSignature` into register IDs and + //! stack offsets. The default strategy `kStrategyDefault` assigns registers + //! and then stack whereas `kStrategyWin64` strategy does register shadowing + //! as defined by WIN64 calling convention - it applies to 64-bit calling + //! conventions only. + enum Strategy : uint32_t { + //! Default register assignment strategy. + kStrategyDefault = 0, + //! Windows 64-bit ABI register assignment strategy. + kStrategyX64Windows = 1, + //! Windows 64-bit __vectorcall register assignment strategy. + kStrategyX64VectorCall = 2, + + //! Number of assignment strategies. + kStrategyCount = 3 + }; + + //! Calling convention flags. + enum Flags : uint32_t { + //! Callee is responsible for cleaning up the stack. + kFlagCalleePopsStack = 0x0001u, + //! Pass vector arguments indirectly (as a pointer). + kFlagIndirectVecArgs = 0x0002u, + //! Pass F32 and F64 arguments via VEC128 register. + kFlagPassFloatsByVec = 0x0004u, + //! Pass MMX and vector arguments via stack if the function has variable arguments. + kFlagPassVecByStackIfVA = 0x0008u, + //! MMX registers are passed and returned via GP registers. + kFlagPassMmxByGp = 0x0010u, + //! MMX registers are passed and returned via XMM registers. + kFlagPassMmxByXmm = 0x0020u, + //! Calling convention can be used with variable arguments. + kFlagVarArgCompatible = 0x0080u + }; + + //! \name Construction & Destruction + //! \{ + + //! Initializes this calling convention to the given `ccId` based on the + //! `environment`. + //! + //! See \ref Id and \ref Environment for more details. + ASMJIT_API Error init(uint32_t ccId, const Environment& environment) noexcept; + + //! Resets this CallConv struct into a defined state. + //! + //! It's recommended to reset the \ref CallConv struct in case you would + //! like create a custom calling convention as it prevents from using an + //! uninitialized data (CallConv doesn't have a constructor that would + //! initialize it, it's just a struct). + inline void reset() noexcept { + memset(this, 0, sizeof(*this)); + memset(_passedOrder, 0xFF, sizeof(_passedOrder)); + } + + //! \} + + //! \name Accessors + //! \{ + + //! Returns the calling convention id, see `Id`. + inline uint32_t id() const noexcept { return _id; } + //! Sets the calling convention id, see `Id`. + inline void setId(uint32_t id) noexcept { _id = uint8_t(id); } + + //! Returns the calling function architecture id. + inline uint32_t arch() const noexcept { return _arch; } + //! Sets the calling function architecture id. + inline void setArch(uint32_t arch) noexcept { _arch = uint8_t(arch); } + + //! Returns the strategy used to assign registers to arguments, see `Strategy`. + inline uint32_t strategy() const noexcept { return _strategy; } + //! Sets the strategy used to assign registers to arguments, see `Strategy`. + inline void setStrategy(uint32_t strategy) noexcept { _strategy = uint8_t(strategy); } + + //! Tests whether the calling convention has the given `flag` set. + inline bool hasFlag(uint32_t flag) const noexcept { return (uint32_t(_flags) & flag) != 0; } + //! Returns the calling convention flags, see `Flags`. + inline uint32_t flags() const noexcept { return _flags; } + //! Adds the calling convention flags, see `Flags`. + inline void setFlags(uint32_t flag) noexcept { _flags = uint16_t(flag); }; + //! Adds the calling convention flags, see `Flags`. + inline void addFlags(uint32_t flags) noexcept { _flags = uint16_t(_flags | flags); }; + + //! Tests whether this calling convention specifies 'RedZone'. + inline bool hasRedZone() const noexcept { return _redZoneSize != 0; } + //! Tests whether this calling convention specifies 'SpillZone'. + inline bool hasSpillZone() const noexcept { return _spillZoneSize != 0; } + + //! Returns size of 'RedZone'. + inline uint32_t redZoneSize() const noexcept { return _redZoneSize; } + //! Returns size of 'SpillZone'. + inline uint32_t spillZoneSize() const noexcept { return _spillZoneSize; } + + //! Sets size of 'RedZone'. + inline void setRedZoneSize(uint32_t size) noexcept { _redZoneSize = uint8_t(size); } + //! Sets size of 'SpillZone'. + inline void setSpillZoneSize(uint32_t size) noexcept { _spillZoneSize = uint8_t(size); } + + //! Returns a natural stack alignment. + inline uint32_t naturalStackAlignment() const noexcept { return _naturalStackAlignment; } + //! Sets a natural stack alignment. + //! + //! This function can be used to override the default stack alignment in case + //! that you know that it's alignment is different. For example it allows to + //! implement custom calling conventions that guarantee higher stack alignment. + inline void setNaturalStackAlignment(uint32_t value) noexcept { _naturalStackAlignment = uint8_t(value); } + + //! Returns the size of a register (or its part) to be saved and restored of the given `group`. + inline uint32_t saveRestoreRegSize(uint32_t group) const noexcept { return _saveRestoreRegSize[group]; } + //! Sets the size of a vector register (or its part) to be saved and restored. + inline void setSaveRestoreRegSize(uint32_t group, uint32_t size) noexcept { _saveRestoreRegSize[group] = uint8_t(size); } + + //! Returns the alignment of a save-restore area of the given `group`. + inline uint32_t saveRestoreAlignment(uint32_t group) const noexcept { return _saveRestoreAlignment[group]; } + //! Sets the alignment of a save-restore area of the given `group`. + inline void setSaveRestoreAlignment(uint32_t group, uint32_t alignment) noexcept { _saveRestoreAlignment[group] = uint8_t(alignment); } + + //! Returns the order of passed registers of the given `group`, see \ref BaseReg::RegGroup. + inline const uint8_t* passedOrder(uint32_t group) const noexcept { + ASMJIT_ASSERT(group < BaseReg::kGroupVirt); + return _passedOrder[group].id; + } + + //! Returns the mask of passed registers of the given `group`, see \ref BaseReg::RegGroup. + inline uint32_t passedRegs(uint32_t group) const noexcept { + ASMJIT_ASSERT(group < BaseReg::kGroupVirt); + return _passedRegs[group]; + } + + inline void _setPassedPacked(uint32_t group, uint32_t p0, uint32_t p1, uint32_t p2, uint32_t p3) noexcept { + ASMJIT_ASSERT(group < BaseReg::kGroupVirt); + + _passedOrder[group].packed[0] = p0; + _passedOrder[group].packed[1] = p1; + _passedOrder[group].packed[2] = p2; + _passedOrder[group].packed[3] = p3; + } + + //! Resets the order and mask of passed registers. + inline void setPassedToNone(uint32_t group) noexcept { + ASMJIT_ASSERT(group < BaseReg::kGroupVirt); + + _setPassedPacked(group, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu); + _passedRegs[group] = 0u; + } + + //! Sets the order and mask of passed registers. + inline void setPassedOrder(uint32_t group, uint32_t a0, uint32_t a1 = 0xFF, uint32_t a2 = 0xFF, uint32_t a3 = 0xFF, uint32_t a4 = 0xFF, uint32_t a5 = 0xFF, uint32_t a6 = 0xFF, uint32_t a7 = 0xFF) noexcept { + ASMJIT_ASSERT(group < BaseReg::kGroupVirt); + + // NOTE: This should always be called with all arguments known at compile time, + // so even if it looks scary it should be translated into few instructions. + _setPassedPacked(group, Support::bytepack32_4x8(a0, a1, a2, a3), + Support::bytepack32_4x8(a4, a5, a6, a7), + 0xFFFFFFFFu, + 0xFFFFFFFFu); + + _passedRegs[group] = (a0 != 0xFF ? 1u << a0 : 0u) | + (a1 != 0xFF ? 1u << a1 : 0u) | + (a2 != 0xFF ? 1u << a2 : 0u) | + (a3 != 0xFF ? 1u << a3 : 0u) | + (a4 != 0xFF ? 1u << a4 : 0u) | + (a5 != 0xFF ? 1u << a5 : 0u) | + (a6 != 0xFF ? 1u << a6 : 0u) | + (a7 != 0xFF ? 1u << a7 : 0u) ; + } + + //! Returns preserved register mask of the given `group`, see \ref BaseReg::RegGroup. + inline uint32_t preservedRegs(uint32_t group) const noexcept { + ASMJIT_ASSERT(group < BaseReg::kGroupVirt); + return _preservedRegs[group]; + } + + //! Sets preserved register mask of the given `group`, see \ref BaseReg::RegGroup. + inline void setPreservedRegs(uint32_t group, uint32_t regs) noexcept { + ASMJIT_ASSERT(group < BaseReg::kGroupVirt); + _preservedRegs[group] = regs; + } + + //! \} }; // ============================================================================ @@ -160,7 +494,7 @@ class FuncSignatureT : public FuncSignature { //! Function signature builder. class FuncSignatureBuilder : public FuncSignature { public: - uint8_t _builderArgList[kFuncArgCount]; + uint8_t _builderArgList[Globals::kMaxFuncArgs]; //! \name Initializtion & Reset //! \{ @@ -191,7 +525,7 @@ class FuncSignatureBuilder : public FuncSignature { //! Appends an argument of `type` to the function prototype. inline void addArg(uint32_t type) noexcept { - ASMJIT_ASSERT(_argCount < kFuncArgCount); + ASMJIT_ASSERT(_argCount < Globals::kMaxFuncArgs); _builderArgList[_argCount++] = uint8_t(type); } //! Appends an argument of type based on `T` to the function prototype. @@ -205,8 +539,8 @@ class FuncSignatureBuilder : public FuncSignature { // [asmjit::FuncValue] // ============================================================================ -//! Argument or return value as defined by `FuncSignature`, but with register -//! or stack address (and other metadata) assigned to it. +//! Argument or return value (or its part) as defined by `FuncSignature`, but +//! with register or stack address (and other metadata) assigned. struct FuncValue { uint32_t _data; @@ -275,6 +609,8 @@ struct FuncValue { //! \name Accessors //! \{ + inline explicit operator bool() const noexcept { return _data != 0; } + inline void _replaceValue(uint32_t mask, uint32_t value) noexcept { _data = (_data & ~mask) | value; } //! Tests whether the `FuncValue` has a flag `flag` set. @@ -323,6 +659,72 @@ struct FuncValue { //! \} }; +// ============================================================================ +// [asmjit::FuncValuePack] +// ============================================================================ + +//! Contains multiple `FuncValue` instances in an array so functions that use +//! multiple registers for arguments or return values can represent all inputs +//! and outputs. +struct FuncValuePack { +public: + //! Values data. + FuncValue _values[Globals::kMaxValuePack]; + + inline void reset() noexcept { + for (size_t i = 0; i < Globals::kMaxValuePack; i++) + _values[i].reset(); + } + + //! Calculates how many values are in the pack, checking for non-values + //! from the end. + inline uint32_t count() const noexcept { + uint32_t n = Globals::kMaxValuePack; + while (n && !_values[n - 1]) + n--; + return n; + } + + inline FuncValue* values() noexcept { return _values; } + inline const FuncValue* values() const noexcept { return _values; } + + inline void resetValue(size_t index) noexcept { + ASMJIT_ASSERT(index < Globals::kMaxValuePack); + _values[index].reset(); + } + + inline bool hasValue(size_t index) noexcept { + ASMJIT_ASSERT(index < Globals::kMaxValuePack); + return _values[index].isInitialized(); + } + + inline void assignReg(size_t index, const BaseReg& reg, uint32_t typeId = Type::kIdVoid) noexcept { + ASMJIT_ASSERT(index < Globals::kMaxValuePack); + ASMJIT_ASSERT(reg.isPhysReg()); + _values[index].initReg(reg.type(), reg.id(), typeId); + } + + inline void assignReg(size_t index, uint32_t regType, uint32_t regId, uint32_t typeId = Type::kIdVoid) noexcept { + ASMJIT_ASSERT(index < Globals::kMaxValuePack); + _values[index].initReg(regType, regId, typeId); + } + + inline void assignStack(size_t index, int32_t offset, uint32_t typeId = Type::kIdVoid) noexcept { + ASMJIT_ASSERT(index < Globals::kMaxValuePack); + _values[index].initStack(offset, typeId); + } + + inline FuncValue& operator[](size_t index) { + ASMJIT_ASSERT(index < Globals::kMaxValuePack); + return _values[index]; + } + + inline const FuncValue& operator[](size_t index) const { + ASMJIT_ASSERT(index < Globals::kMaxValuePack); + return _values[index]; + } +}; + // ============================================================================ // [asmjit::FuncDetail] // ============================================================================ @@ -338,20 +740,18 @@ class FuncDetail { CallConv _callConv; //! Number of function arguments. uint8_t _argCount; - //! Number of function return values. - uint8_t _retCount; //! Variable arguments index of `kNoVarArgs`. uint8_t _vaIndex; //! Reserved for future use. - uint8_t _reserved; + uint16_t _reserved; //! Registers that contains arguments. uint32_t _usedRegs[BaseReg::kGroupVirt]; //! Size of arguments passed by stack. uint32_t _argStackSize; - //! Function return values. - FuncValue _rets[2]; + //! Function return value(s). + FuncValuePack _rets; //! Function arguments. - FuncValue _args[kFuncArgCountLoHi]; + FuncValuePack _args[Globals::kMaxFuncArgs]; enum : uint8_t { //! Doesn't have variable number of arguments (`...`). @@ -365,7 +765,7 @@ class FuncDetail { inline FuncDetail(const FuncDetail& other) noexcept = default; //! Initializes this `FuncDetail` to the given signature. - ASMJIT_API Error init(const FuncSignature& sign); + ASMJIT_API Error init(const FuncSignature& signature, const Environment& environment) noexcept; inline void reset() noexcept { memset(this, 0, sizeof(*this)); } //! \} @@ -381,52 +781,61 @@ class FuncDetail { //! Checks whether a CallConv `flag` is set, see `CallConv::Flags`. inline bool hasFlag(uint32_t ccFlag) const noexcept { return _callConv.hasFlag(ccFlag); } - //! Returns count of function return values. - inline uint32_t retCount() const noexcept { return _retCount; } + //! Tests whether the function has a return value. + inline bool hasRet() const noexcept { return bool(_rets[0]); } //! Returns the number of function arguments. inline uint32_t argCount() const noexcept { return _argCount; } - //! Tests whether the function has a return value. - inline bool hasRet() const noexcept { return _retCount != 0; } - //! Returns function return value associated with the given `index`. - inline FuncValue& ret(uint32_t index = 0) noexcept { - ASMJIT_ASSERT(index < ASMJIT_ARRAY_SIZE(_rets)); - return _rets[index]; + //! Returns function return values. + inline FuncValuePack& retPack() noexcept { return _rets; } + //! Returns function return values. + inline const FuncValuePack& retPack() const noexcept { return _rets; } + + //! Returns a function return value associated with the given `valueIndex`. + inline FuncValue& ret(size_t valueIndex = 0) noexcept { return _rets[valueIndex]; } + //! Returns a function return value associated with the given `valueIndex` (const). + inline const FuncValue& ret(size_t valueIndex = 0) const noexcept { return _rets[valueIndex]; } + + //! Returns function argument packs array. + inline FuncValuePack* argPacks() noexcept { return _args; } + //! Returns function argument packs array (const). + inline const FuncValuePack* argPacks() const noexcept { return _args; } + + //! Returns function argument pack at the given `argIndex`. + inline FuncValuePack& argPack(size_t argIndex) noexcept { + ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs); + return _args[argIndex]; } - //! Returns function return value associated with the given `index` (const). - inline const FuncValue& ret(uint32_t index = 0) const noexcept { - ASMJIT_ASSERT(index < ASMJIT_ARRAY_SIZE(_rets)); - return _rets[index]; - } - - //! Returns function arguments array. - inline FuncValue* args() noexcept { return _args; } - //! Returns function arguments array (const). - inline const FuncValue* args() const noexcept { return _args; } - inline bool hasArg(uint32_t index) const noexcept { - ASMJIT_ASSERT(index < kFuncArgCountLoHi); - return _args[index].isInitialized(); + //! Returns function argument pack at the given `argIndex` (const). + inline const FuncValuePack& argPack(size_t argIndex) const noexcept { + ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs); + return _args[argIndex]; } - //! Returns function argument at the given `index`. - inline FuncValue& arg(uint32_t index) noexcept { - ASMJIT_ASSERT(index < kFuncArgCountLoHi); - return _args[index]; + //! Returns an argument at `valueIndex` from the argument pack at the given `argIndex`. + inline FuncValue& arg(size_t argIndex, size_t valueIndex = 0) noexcept { + ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs); + return _args[argIndex][valueIndex]; } - //! Returnsfunction argument at the given index `index` (const). - inline const FuncValue& arg(uint32_t index) const noexcept { - ASMJIT_ASSERT(index < kFuncArgCountLoHi); - return _args[index]; + //! Returns an argument at `valueIndex` from the argument pack at the given `argIndex` (const). + inline const FuncValue& arg(size_t argIndex, size_t valueIndex = 0) const noexcept { + ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs); + return _args[argIndex][valueIndex]; } - inline void resetArg(uint32_t index) noexcept { - ASMJIT_ASSERT(index < kFuncArgCountLoHi); - _args[index].reset(); + //! Resets an argument at the given `argIndex`. + //! + //! If the argument is a parameter pack (has multiple values) all values are reset. + inline void resetArg(size_t argIndex) noexcept { + ASMJIT_ASSERT(argIndex < Globals::kMaxFuncArgs); + _args[argIndex].reset(); } + //! Tests whether the function has variable arguments. inline bool hasVarArgs() const noexcept { return _vaIndex != kNoVarArgs; } + //! Returns an index of a first variable argument. inline uint32_t vaIndex() const noexcept { return _vaIndex; } //! Tests whether the function passes one or more argument by stack. @@ -434,18 +843,25 @@ class FuncDetail { //! Returns stack size needed for function arguments passed on the stack. inline uint32_t argStackSize() const noexcept { return _argStackSize; } + //! Returns red zone size. inline uint32_t redZoneSize() const noexcept { return _callConv.redZoneSize(); } + //! Returns spill zone size. inline uint32_t spillZoneSize() const noexcept { return _callConv.spillZoneSize(); } + //! Returns natural stack alignment. inline uint32_t naturalStackAlignment() const noexcept { return _callConv.naturalStackAlignment(); } + //! Returns a mask of all passed registers of the given register `group`. inline uint32_t passedRegs(uint32_t group) const noexcept { return _callConv.passedRegs(group); } + //! Returns a mask of all preserved registers of the given register `group`. inline uint32_t preservedRegs(uint32_t group) const noexcept { return _callConv.preservedRegs(group); } + //! Returns a mask of all used registers of the given register `group`. inline uint32_t usedRegs(uint32_t group) const noexcept { ASMJIT_ASSERT(group < BaseReg::kGroupVirt); return _usedRegs[group]; } + //! Adds `regs` to the mask of used registers of the given register `group`. inline void addUsedRegs(uint32_t group, uint32_t regs) noexcept { ASMJIT_ASSERT(group < BaseReg::kGroupVirt); _usedRegs[group] |= regs; @@ -472,12 +888,13 @@ class FuncDetail { //! frame. The function frame in most cases won't use all of the properties //! illustrated (for example Spill Zone and Red Zone are never used together). //! +//! ``` //! +-----------------------------+ //! | Arguments Passed by Stack | //! +-----------------------------+ //! | Spill Zone | //! +-----------------------------+ <- Stack offset (args) starts from here. -//! | Return Address if Pushed | +//! | Return Address, if Pushed | //! +-----------------------------+ <- Stack pointer (SP) upon entry. //! | Save/Restore Stack. | //! +-----------------------------+-----------------------------+ @@ -487,32 +904,42 @@ class FuncDetail { //! +-----------------------------+-----------------------------+ <- SP after prolog. //! | Red Zone | //! +-----------------------------+ +//! ``` class FuncFrame { public: enum Tag : uint32_t { - kTagInvalidOffset = 0xFFFFFFFFu //!< Tag used to inform that some offset is invalid. + //! Tag used to inform that some offset is invalid. + kTagInvalidOffset = 0xFFFFFFFFu }; //! Attributes are designed in a way that all are initially false, and user //! or FuncFrame finalizer adds them when necessary. enum Attributes : uint32_t { - kAttrHasVarArgs = 0x00000001u, //!< Function has variable number of arguments. - kAttrHasPreservedFP = 0x00000010u, //!< Preserve frame pointer (don't omit FP). - kAttrHasFuncCalls = 0x00000020u, //!< Function calls other functions (is not leaf). - - kAttrX86AvxEnabled = 0x00010000u, //!< Use AVX instead of SSE for all operations (X86). - kAttrX86AvxCleanup = 0x00020000u, //!< Emit VZEROUPPER instruction in epilog (X86). - kAttrX86MmxCleanup = 0x00040000u, //!< Emit EMMS instruction in epilog (X86). - - kAttrAlignedVecSR = 0x40000000u, //!< Function has aligned save/restore of vector registers. - kAttrIsFinalized = 0x80000000u //!< FuncFrame is finalized and can be used by PEI. + //! Function has variable number of arguments. + kAttrHasVarArgs = 0x00000001u, + //! Preserve frame pointer (don't omit FP). + kAttrHasPreservedFP = 0x00000010u, + //! Function calls other functions (is not leaf). + kAttrHasFuncCalls = 0x00000020u, + + //! Use AVX instead of SSE for all operations (X86). + kAttrX86AvxEnabled = 0x00010000u, + //! Emit VZEROUPPER instruction in epilog (X86). + kAttrX86AvxCleanup = 0x00020000u, + //! Emit EMMS instruction in epilog (X86). + kAttrX86MmxCleanup = 0x00040000u, + + //! Function has aligned save/restore of vector registers. + kAttrAlignedVecSR = 0x40000000u, + //! FuncFrame is finalized and can be used by PEI. + kAttrIsFinalized = 0x80000000u }; //! Function attributes. uint32_t _attributes; - //! Architecture ID. - uint8_t _archId; + //! Architecture, see \ref Environment::Arch. + uint8_t _arch; //! SP register ID (to access call stack and local stack). uint8_t _spRegId; //! SA register ID (to access stack arguments). @@ -560,15 +987,19 @@ class FuncFrame { uint32_t _dirtyRegs[BaseReg::kGroupVirt]; //! Registers that must be preserved (copied from CallConv). uint32_t _preservedRegs[BaseReg::kGroupVirt]; - - //! Final stack size required to save GP regs. - uint16_t _gpSaveSize; - //! Final Stack size required to save other than GP regs. - uint16_t _nonGpSaveSize; - //! Final offset where saved GP regs are stored. - uint32_t _gpSaveOffset; - //! Final offset where saved other than GP regs are stored. - uint32_t _nonGpSaveOffset; + //! Size to save/restore per register group. + uint8_t _saveRestoreRegSize[BaseReg::kGroupVirt]; + //! Alignment of save/restore area per register group. + uint8_t _saveRestoreAlignment[BaseReg::kGroupVirt]; + + //! Stack size required to save registers with push/pop. + uint16_t _pushPopSaveSize; + //! Stack size required to save extra registers that cannot use push/pop. + uint16_t _extraRegSaveSize; + //! Offset where registers saved/restored via push/pop are stored + uint32_t _pushPopSaveOffset; + //! Offset where extra ragisters that cannot use push/pop are stored. + uint32_t _extraRegSaveOffset; //! \name Construction & Destruction //! \{ @@ -591,7 +1022,7 @@ class FuncFrame { //! \{ //! Returns the target architecture of the function frame. - inline uint32_t archId() const noexcept { return _archId; } + inline uint32_t arch() const noexcept { return _arch; } //! Returns function frame attributes, see `Attributes`. inline uint32_t attributes() const noexcept { return _attributes; } @@ -784,10 +1215,8 @@ class FuncFrame { } inline void setAllDirty() noexcept { - _dirtyRegs[0] = 0xFFFFFFFFu; - _dirtyRegs[1] = 0xFFFFFFFFu; - _dirtyRegs[2] = 0xFFFFFFFFu; - _dirtyRegs[3] = 0xFFFFFFFFu; + for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_dirtyRegs); i++) + _dirtyRegs[i] = 0xFFFFFFFFu; } inline void setAllDirty(uint32_t group) noexcept { @@ -814,20 +1243,35 @@ class FuncFrame { return _preservedRegs[group]; } + inline uint32_t saveRestoreRegSize(uint32_t group) const noexcept { + ASMJIT_ASSERT(group < BaseReg::kGroupVirt); + return _saveRestoreRegSize[group]; + } + + inline uint32_t saveRestoreAlignment(uint32_t group) const noexcept { + ASMJIT_ASSERT(group < BaseReg::kGroupVirt); + return _saveRestoreAlignment[group]; + } + inline bool hasSARegId() const noexcept { return _saRegId != BaseReg::kIdBad; } inline uint32_t saRegId() const noexcept { return _saRegId; } inline void setSARegId(uint32_t regId) { _saRegId = uint8_t(regId); } inline void resetSARegId() { setSARegId(BaseReg::kIdBad); } - //! Returns stack size required to save GP registers. - inline uint32_t gpSaveSize() const noexcept { return _gpSaveSize; } - //! Returns stack size required to save other than GP registers (MM, XMM|YMM|ZMM, K, VFP, etc...). - inline uint32_t nonGpSaveSize() const noexcept { return _nonGpSaveSize; } + //! Returns stack size required to save/restore registers via push/pop. + inline uint32_t pushPopSaveSize() const noexcept { return _pushPopSaveSize; } + //! Returns an offset to the stack where registers are saved via push/pop. + inline uint32_t pushPopSaveOffset() const noexcept { return _pushPopSaveOffset; } - //! Returns an offset to the stack where general purpose registers are saved. - inline uint32_t gpSaveOffset() const noexcept { return _gpSaveOffset; } - //! Returns an offset to the stack where other than GP registers are saved. - inline uint32_t nonGpSaveOffset() const noexcept { return _nonGpSaveOffset; } + //! Returns stack size required to save/restore extra registers that don't + //! use push/pop/ + //! + //! \note On X86 this covers all registers except GP registers, on other + //! architectures it can be always zero (for example AArch64 saves all + //! registers via push/pop like instructions, so this would be zero). + inline uint32_t extraRegSaveSize() const noexcept { return _extraRegSaveSize; } + //! Returns an offset to the stack where extra registers are saved. + inline uint32_t extraRegSaveOffset() const noexcept { return _extraRegSaveOffset; } //! Tests whether the functions contains stack adjustment. inline bool hasStackAdjustment() const noexcept { return _stackAdjustment != 0; } @@ -863,7 +1307,7 @@ class FuncArgsAssignment { //! Reserved for future use. uint8_t _reserved[3]; //! Mapping of each function argument. - FuncValue _args[kFuncArgCountLoHi]; + FuncValuePack _argPacks[Globals::kMaxFuncArgs]; //! \name Construction & Destruction //! \{ @@ -878,7 +1322,7 @@ class FuncArgsAssignment { _funcDetail = fd; _saRegId = uint8_t(BaseReg::kIdBad); memset(_reserved, 0, sizeof(_reserved)); - memset(_args, 0, sizeof(_args)); + memset(_argPacks, 0, sizeof(_argPacks)); } //! \} @@ -894,46 +1338,62 @@ class FuncArgsAssignment { inline void setSARegId(uint32_t regId) { _saRegId = uint8_t(regId); } inline void resetSARegId() { _saRegId = uint8_t(BaseReg::kIdBad); } - inline FuncValue& arg(uint32_t index) noexcept { - ASMJIT_ASSERT(index < ASMJIT_ARRAY_SIZE(_args)); - return _args[index]; + inline FuncValue& arg(size_t argIndex, size_t valueIndex) noexcept { + ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks)); + return _argPacks[argIndex][valueIndex]; } - inline const FuncValue& arg(uint32_t index) const noexcept { - ASMJIT_ASSERT(index < ASMJIT_ARRAY_SIZE(_args)); - return _args[index]; + inline const FuncValue& arg(size_t argIndex, size_t valueIndex) const noexcept { + ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks)); + return _argPacks[argIndex][valueIndex]; + } + + inline bool isAssigned(size_t argIndex, size_t valueIndex) const noexcept { + ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks)); + return _argPacks[argIndex][valueIndex].isAssigned(); + } + + inline void assignReg(size_t argIndex, const BaseReg& reg, uint32_t typeId = Type::kIdVoid) noexcept { + ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks)); + ASMJIT_ASSERT(reg.isPhysReg()); + _argPacks[argIndex][0].initReg(reg.type(), reg.id(), typeId); + } + + inline void assignReg(size_t argIndex, uint32_t regType, uint32_t regId, uint32_t typeId = Type::kIdVoid) noexcept { + ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks)); + _argPacks[argIndex][0].initReg(regType, regId, typeId); } - inline bool isAssigned(uint32_t argIndex) const noexcept { - ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_args)); - return _args[argIndex].isAssigned(); + inline void assignStack(size_t argIndex, int32_t offset, uint32_t typeId = Type::kIdVoid) noexcept { + ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks)); + _argPacks[argIndex][0].initStack(offset, typeId); } - inline void assignReg(uint32_t argIndex, const BaseReg& reg, uint32_t typeId = Type::kIdVoid) noexcept { - ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_args)); + inline void assignRegInPack(size_t argIndex, size_t valueIndex, const BaseReg& reg, uint32_t typeId = Type::kIdVoid) noexcept { + ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks)); ASMJIT_ASSERT(reg.isPhysReg()); - _args[argIndex].initReg(reg.type(), reg.id(), typeId); + _argPacks[argIndex][valueIndex].initReg(reg.type(), reg.id(), typeId); } - inline void assignReg(uint32_t argIndex, uint32_t regType, uint32_t regId, uint32_t typeId = Type::kIdVoid) noexcept { - ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_args)); - _args[argIndex].initReg(regType, regId, typeId); + inline void assignRegInPack(size_t argIndex, size_t valueIndex, uint32_t regType, uint32_t regId, uint32_t typeId = Type::kIdVoid) noexcept { + ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks)); + _argPacks[argIndex][valueIndex].initReg(regType, regId, typeId); } - inline void assignStack(uint32_t argIndex, int32_t offset, uint32_t typeId = Type::kIdVoid) { - ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_args)); - _args[argIndex].initStack(offset, typeId); + inline void assignStackInPack(size_t argIndex, size_t valueIndex, int32_t offset, uint32_t typeId = Type::kIdVoid) noexcept { + ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_argPacks)); + _argPacks[argIndex][valueIndex].initStack(offset, typeId); } // NOTE: All `assignAll()` methods are shortcuts to assign all arguments at // once, however, since registers are passed all at once these initializers // don't provide any way to pass TypeId and/or to keep any argument between // the arguments passed unassigned. - inline void _assignAllInternal(uint32_t argIndex, const BaseReg& reg) noexcept { + inline void _assignAllInternal(size_t argIndex, const BaseReg& reg) noexcept { assignReg(argIndex, reg); } template - inline void _assignAllInternal(uint32_t argIndex, const BaseReg& reg, Args&&... args) noexcept { + inline void _assignAllInternal(size_t argIndex, const BaseReg& reg, Args&&... args) noexcept { assignReg(argIndex, reg); _assignAllInternal(argIndex + 1, std::forward(args)...); } diff --git a/libs/asmjit/src/asmjit/core/funcargscontext.cpp b/libs/asmjit/src/asmjit/core/funcargscontext.cpp new file mode 100644 index 0000000..331e205 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/funcargscontext.cpp @@ -0,0 +1,315 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#include "../core/funcargscontext_p.h" + +ASMJIT_BEGIN_NAMESPACE + +//! \cond INTERNAL +//! \addtogroup asmjit_core +//! \{ + +FuncArgsContext::FuncArgsContext() noexcept { + for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) + _workData[group].reset(); +} + +ASMJIT_FAVOR_SIZE Error FuncArgsContext::initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args, const RAConstraints* constraints) noexcept { + // The code has to be updated if this changes. + ASMJIT_ASSERT(BaseReg::kGroupVirt == 4); + + uint32_t i; + + uint32_t arch = frame.arch(); + const FuncDetail& func = *args.funcDetail(); + + _archTraits = &ArchTraits::byArch(arch); + _constraints = constraints; + _arch = uint8_t(arch); + + // Initialize `_archRegs`. + for (i = 0; i < BaseReg::kGroupVirt; i++) + _workData[i]._archRegs = _constraints->availableRegs(i); + + if (frame.hasPreservedFP()) + _workData[BaseReg::kGroupGp]._archRegs &= ~Support::bitMask(archTraits().fpRegId()); + + // Extract information from all function arguments/assignments and build Var[] array. + uint32_t varId = 0; + for (uint32_t argIndex = 0; argIndex < Globals::kMaxFuncArgs; argIndex++) { + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + const FuncValue& dst_ = args.arg(argIndex, valueIndex); + if (!dst_.isAssigned()) + continue; + + const FuncValue& src_ = func.arg(argIndex, valueIndex); + if (ASMJIT_UNLIKELY(!src_.isAssigned())) + return DebugUtils::errored(kErrorInvalidState); + + Var& var = _vars[varId]; + var.init(src_, dst_); + + FuncValue& src = var.cur; + FuncValue& dst = var.out; + + uint32_t dstGroup = 0xFFFFFFFFu; + uint32_t dstId = BaseReg::kIdBad; + WorkData* dstWd = nullptr; + + // Not supported. + if (src.isIndirect()) + return DebugUtils::errored(kErrorInvalidAssignment); + + if (dst.isReg()) { + uint32_t dstType = dst.regType(); + if (ASMJIT_UNLIKELY(!archTraits().hasRegType(dstType))) + return DebugUtils::errored(kErrorInvalidRegType); + + // Copy TypeId from source if the destination doesn't have it. The RA + // used by BaseCompiler would never leave TypeId undefined, but users + // of FuncAPI can just assign phys regs without specifying the type. + if (!dst.hasTypeId()) + dst.setTypeId(archTraits().regTypeToTypeId(dst.regType())); + + dstGroup = archTraits().regTypeToGroup(dstType); + if (ASMJIT_UNLIKELY(dstGroup >= BaseReg::kGroupVirt)) + return DebugUtils::errored(kErrorInvalidRegGroup); + + dstWd = &_workData[dstGroup]; + dstId = dst.regId(); + if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId))) + return DebugUtils::errored(kErrorInvalidPhysId); + + if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId))) + return DebugUtils::errored(kErrorOverlappedRegs); + + dstWd->_dstRegs |= Support::bitMask(dstId); + dstWd->_dstShuf |= Support::bitMask(dstId); + dstWd->_usedRegs |= Support::bitMask(dstId); + } + else { + if (!dst.hasTypeId()) + dst.setTypeId(src.typeId()); + + RegInfo regInfo = getSuitableRegForMemToMemMove(arch, dst.typeId(), src.typeId()); + if (ASMJIT_UNLIKELY(!regInfo.isValid())) + return DebugUtils::errored(kErrorInvalidState); + _stackDstMask = uint8_t(_stackDstMask | Support::bitMask(regInfo.group())); + } + + if (src.isReg()) { + uint32_t srcId = src.regId(); + uint32_t srcGroup = archTraits().regTypeToGroup(src.regType()); + + if (dstGroup == srcGroup) { + dstWd->assign(varId, srcId); + + // The best case, register is allocated where it is expected to be. + if (dstId == srcId) + var.markDone(); + } + else { + if (ASMJIT_UNLIKELY(srcGroup >= BaseReg::kGroupVirt)) + return DebugUtils::errored(kErrorInvalidState); + + WorkData& srcData = _workData[srcGroup]; + srcData.assign(varId, srcId); + } + } + else { + if (dstWd) + dstWd->_numStackArgs++; + _hasStackSrc = true; + } + + varId++; + } + } + + // Initialize WorkData::workRegs. + for (i = 0; i < BaseReg::kGroupVirt; i++) { + _workData[i]._workRegs = (_workData[i].archRegs() & (frame.dirtyRegs(i) | ~frame.preservedRegs(i))) | _workData[i].dstRegs() | _workData[i].assignedRegs(); + } + + // Create a variable that represents `SARegId` if necessary. + bool saRegRequired = _hasStackSrc && frame.hasDynamicAlignment() && !frame.hasPreservedFP(); + + WorkData& gpRegs = _workData[BaseReg::kGroupGp]; + uint32_t saCurRegId = frame.saRegId(); + uint32_t saOutRegId = args.saRegId(); + + if (saCurRegId != BaseReg::kIdBad) { + // Check if the provided `SARegId` doesn't collide with input registers. + if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId))) + return DebugUtils::errored(kErrorOverlappedRegs); + } + + if (saOutRegId != BaseReg::kIdBad) { + // Check if the provided `SARegId` doesn't collide with argument assignments. + if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId))) + return DebugUtils::errored(kErrorOverlappedRegs); + saRegRequired = true; + } + + if (saRegRequired) { + uint32_t ptrTypeId = Environment::is32Bit(arch) ? Type::kIdU32 : Type::kIdU64; + uint32_t ptrRegType = Environment::is32Bit(arch) ? BaseReg::kTypeGp32 : BaseReg::kTypeGp64; + + _saVarId = uint8_t(varId); + _hasPreservedFP = frame.hasPreservedFP(); + + Var& var = _vars[varId]; + var.reset(); + + if (saCurRegId == BaseReg::kIdBad) { + if (saOutRegId != BaseReg::kIdBad && !gpRegs.isAssigned(saOutRegId)) { + saCurRegId = saOutRegId; + } + else { + uint32_t availableRegs = gpRegs.availableRegs(); + if (!availableRegs) + availableRegs = gpRegs.archRegs() & ~gpRegs.workRegs(); + + if (ASMJIT_UNLIKELY(!availableRegs)) + return DebugUtils::errored(kErrorNoMorePhysRegs); + + saCurRegId = Support::ctz(availableRegs); + } + } + + var.cur.initReg(ptrRegType, saCurRegId, ptrTypeId); + gpRegs.assign(varId, saCurRegId); + gpRegs._workRegs |= Support::bitMask(saCurRegId); + + if (saOutRegId != BaseReg::kIdBad) { + var.out.initReg(ptrRegType, saOutRegId, ptrTypeId); + gpRegs._dstRegs |= Support::bitMask(saOutRegId); + gpRegs._workRegs |= Support::bitMask(saOutRegId); + } + else { + var.markDone(); + } + + varId++; + } + + _varCount = varId; + + // Detect register swaps. + for (varId = 0; varId < _varCount; varId++) { + Var& var = _vars[varId]; + if (var.cur.isReg() && var.out.isReg()) { + uint32_t srcId = var.cur.regId(); + uint32_t dstId = var.out.regId(); + + uint32_t group = archTraits().regTypeToGroup(var.cur.regType()); + if (group != archTraits().regTypeToGroup(var.out.regType())) + continue; + + WorkData& wd = _workData[group]; + if (wd.isAssigned(dstId)) { + Var& other = _vars[wd._physToVarId[dstId]]; + if (archTraits().regTypeToGroup(other.out.regType()) == group && other.out.regId() == srcId) { + wd._numSwaps++; + _regSwapsMask = uint8_t(_regSwapsMask | Support::bitMask(group)); + } + } + } + } + + return kErrorOk; +} + +ASMJIT_FAVOR_SIZE Error FuncArgsContext::markDstRegsDirty(FuncFrame& frame) noexcept { + for (uint32_t i = 0; i < BaseReg::kGroupVirt; i++) { + WorkData& wd = _workData[i]; + uint32_t regs = wd.usedRegs() | wd._dstShuf; + + wd._workRegs |= regs; + frame.addDirtyRegs(i, regs); + } + + return kErrorOk; +} + +ASMJIT_FAVOR_SIZE Error FuncArgsContext::markScratchRegs(FuncFrame& frame) noexcept { + uint32_t groupMask = 0; + + // Handle stack to stack moves. + groupMask |= _stackDstMask; + + // Handle register swaps. + groupMask |= _regSwapsMask & ~Support::bitMask(BaseReg::kGroupGp); + + if (!groupMask) + return kErrorOk; + + // Selects one dirty register per affected group that can be used as a scratch register. + for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) { + if (Support::bitTest(groupMask, group)) { + WorkData& wd = _workData[group]; + + // Initially, pick some clobbered or dirty register. + uint32_t workRegs = wd.workRegs(); + uint32_t regs = workRegs & ~(wd.usedRegs() | wd._dstShuf); + + // If that didn't work out pick some register which is not in 'used'. + if (!regs) + regs = workRegs & ~wd.usedRegs(); + + // If that didn't work out pick any other register that is allocable. + // This last resort case will, however, result in marking one more + // register dirty. + if (!regs) + regs = wd.archRegs() & ~workRegs; + + // If that didn't work out we will have to use XORs instead of MOVs. + if (!regs) + continue; + + uint32_t regMask = Support::blsi(regs); + wd._workRegs |= regMask; + frame.addDirtyRegs(group, regMask); + } + } + + return kErrorOk; +} + +ASMJIT_FAVOR_SIZE Error FuncArgsContext::markStackArgsReg(FuncFrame& frame) noexcept { + if (_saVarId != kVarIdNone) { + const Var& var = _vars[_saVarId]; + frame.setSARegId(var.cur.regId()); + } + else if (frame.hasPreservedFP()) { + frame.setSARegId(archTraits().fpRegId()); + } + + return kErrorOk; +} + +//! \} +//! \endcond + +ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/funcargscontext_p.h b/libs/asmjit/src/asmjit/core/funcargscontext_p.h new file mode 100644 index 0000000..6c4ea6a --- /dev/null +++ b/libs/asmjit/src/asmjit/core/funcargscontext_p.h @@ -0,0 +1,224 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_FUNCARGSCONTEXT_P_H_INCLUDED +#define ASMJIT_CORE_FUNCARGSCONTEXT_P_H_INCLUDED + +#include "../core/archtraits.h" +#include "../core/environment.h" +#include "../core/func.h" +#include "../core/operand.h" +#include "../core/radefs_p.h" +#include "../core/support.h" + +ASMJIT_BEGIN_NAMESPACE + +//! \cond INTERNAL +//! \addtogroup asmjit_core +//! \{ + +// ============================================================================ +// [TODO: Place somewhere else] +// ============================================================================ + +static inline RegInfo getSuitableRegForMemToMemMove(uint32_t arch, uint32_t dstTypeId, uint32_t srcTypeId) noexcept { + const ArchTraits& archTraits = ArchTraits::byArch(arch); + + uint32_t dstSize = Type::sizeOf(dstTypeId); + uint32_t srcSize = Type::sizeOf(srcTypeId); + uint32_t maxSize = Support::max(dstSize, srcSize); + uint32_t regSize = Environment::registerSizeFromArch(arch); + + uint32_t signature = 0; + if (maxSize <= regSize || (Type::isInt(dstTypeId) && Type::isInt(srcTypeId))) + signature = maxSize <= 4 ? archTraits.regTypeToSignature(BaseReg::kTypeGp32) + : archTraits.regTypeToSignature(BaseReg::kTypeGp64); + else if (maxSize <= 8 && archTraits.hasRegType(BaseReg::kTypeVec64)) + signature = archTraits.regTypeToSignature(BaseReg::kTypeVec64); + else if (maxSize <= 16 && archTraits.hasRegType(BaseReg::kTypeVec128)) + signature = archTraits.regTypeToSignature(BaseReg::kTypeVec128); + else if (maxSize <= 32 && archTraits.hasRegType(BaseReg::kTypeVec256)) + signature = archTraits.regTypeToSignature(BaseReg::kTypeVec256); + else if (maxSize <= 64 && archTraits.hasRegType(BaseReg::kTypeVec512)) + signature = archTraits.regTypeToSignature(BaseReg::kTypeVec512); + + return RegInfo { signature }; +} + +// ============================================================================ +// [asmjit::FuncArgsContext] +// ============================================================================ + +class FuncArgsContext { +public: + enum VarId : uint32_t { + kVarIdNone = 0xFF + }; + + //! Contains information about a single argument or SA register that may need shuffling. + struct Var { + FuncValue cur; + FuncValue out; + + inline void init(const FuncValue& cur_, const FuncValue& out_) noexcept { + cur = cur_; + out = out_; + } + + //! Reset the value to its unassigned state. + inline void reset() noexcept { + cur.reset(); + out.reset(); + } + + inline bool isDone() const noexcept { return cur.isDone(); } + inline void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); } + }; + + struct WorkData { + //! All allocable registers provided by the architecture. + uint32_t _archRegs; + //! All registers that can be used by the shuffler. + uint32_t _workRegs; + //! Registers used by the shuffler (all). + uint32_t _usedRegs; + //! Assigned registers. + uint32_t _assignedRegs; + //! Destination registers assigned to arguments or SA. + uint32_t _dstRegs; + //! Destination registers that require shuffling. + uint32_t _dstShuf; + //! Number of register swaps. + uint8_t _numSwaps; + //! Number of stack loads. + uint8_t _numStackArgs; + //! Reserved (only used as padding). + uint8_t _reserved[6]; + //! Physical ID to variable ID mapping. + uint8_t _physToVarId[32]; + + inline void reset() noexcept { + _archRegs = 0; + _workRegs = 0; + _usedRegs = 0; + _assignedRegs = 0; + _dstRegs = 0; + _dstShuf = 0; + _numSwaps = 0; + _numStackArgs = 0; + memset(_reserved, 0, sizeof(_reserved)); + memset(_physToVarId, kVarIdNone, 32); + } + + inline bool isAssigned(uint32_t regId) const noexcept { + ASMJIT_ASSERT(regId < 32); + return Support::bitTest(_assignedRegs, regId); + } + + inline void assign(uint32_t varId, uint32_t regId) noexcept { + ASMJIT_ASSERT(!isAssigned(regId)); + ASMJIT_ASSERT(_physToVarId[regId] == kVarIdNone); + + _physToVarId[regId] = uint8_t(varId); + _assignedRegs ^= Support::bitMask(regId); + } + + inline void reassign(uint32_t varId, uint32_t newId, uint32_t oldId) noexcept { + ASMJIT_ASSERT( isAssigned(oldId)); + ASMJIT_ASSERT(!isAssigned(newId)); + ASMJIT_ASSERT(_physToVarId[oldId] == varId); + ASMJIT_ASSERT(_physToVarId[newId] == kVarIdNone); + + _physToVarId[oldId] = uint8_t(kVarIdNone); + _physToVarId[newId] = uint8_t(varId); + _assignedRegs ^= Support::bitMask(newId) ^ Support::bitMask(oldId); + } + + inline void swap(uint32_t aVarId, uint32_t aRegId, uint32_t bVarId, uint32_t bRegId) noexcept { + ASMJIT_ASSERT(isAssigned(aRegId)); + ASMJIT_ASSERT(isAssigned(bRegId)); + ASMJIT_ASSERT(_physToVarId[aRegId] == aVarId); + ASMJIT_ASSERT(_physToVarId[bRegId] == bVarId); + + _physToVarId[aRegId] = uint8_t(bVarId); + _physToVarId[bRegId] = uint8_t(aVarId); + } + + inline void unassign(uint32_t varId, uint32_t regId) noexcept { + ASMJIT_ASSERT(isAssigned(regId)); + ASMJIT_ASSERT(_physToVarId[regId] == varId); + + DebugUtils::unused(varId); + _physToVarId[regId] = uint8_t(kVarIdNone); + _assignedRegs ^= Support::bitMask(regId); + } + + inline uint32_t archRegs() const noexcept { return _archRegs; } + inline uint32_t workRegs() const noexcept { return _workRegs; } + inline uint32_t usedRegs() const noexcept { return _usedRegs; } + inline uint32_t assignedRegs() const noexcept { return _assignedRegs; } + inline uint32_t dstRegs() const noexcept { return _dstRegs; } + inline uint32_t availableRegs() const noexcept { return _workRegs & ~_assignedRegs; } + }; + + //! Architecture traits. + const ArchTraits* _archTraits = nullptr; + const RAConstraints* _constraints = nullptr; + //! Architecture identifier. + uint8_t _arch = 0; + //! Has arguments passed via stack (SRC). + bool _hasStackSrc = false; + //! Has preserved frame-pointer (FP). + bool _hasPreservedFP = false; + //! Has arguments assigned to stack (DST). + uint8_t _stackDstMask = 0; + //! Register swap groups (bit-mask). + uint8_t _regSwapsMask = 0; + uint8_t _saVarId = kVarIdNone; + uint32_t _varCount = 0; + WorkData _workData[BaseReg::kGroupVirt]; + Var _vars[Globals::kMaxFuncArgs * Globals::kMaxValuePack + 1]; + + FuncArgsContext() noexcept; + + inline const ArchTraits& archTraits() const noexcept { return *_archTraits; } + inline uint32_t arch() const noexcept { return _arch; } + + inline uint32_t varCount() const noexcept { return _varCount; } + inline size_t indexOf(const Var* var) const noexcept { return (size_t)(var - _vars); } + + inline Var& var(size_t varId) noexcept { return _vars[varId]; } + inline const Var& var(size_t varId) const noexcept { return _vars[varId]; } + + Error initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args, const RAConstraints* constraints) noexcept; + Error markScratchRegs(FuncFrame& frame) noexcept; + Error markDstRegsDirty(FuncFrame& frame) noexcept; + Error markStackArgsReg(FuncFrame& frame) noexcept; +}; + +//! \} +//! \endcond + +ASMJIT_END_NAMESPACE + +#endif // ASMJIT_CORE_FUNCARGSCONTEXT_P_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/globals.cpp b/libs/asmjit/src/asmjit/core/globals.cpp index 426fce8..c7d1f72 100644 --- a/libs/asmjit/src/asmjit/core/globals.cpp +++ b/libs/asmjit/src/asmjit/core/globals.cpp @@ -33,75 +33,89 @@ ASMJIT_BEGIN_NAMESPACE ASMJIT_FAVOR_SIZE const char* DebugUtils::errorAsString(Error err) noexcept { #ifndef ASMJIT_NO_TEXT - static const char errorMessages[] = + // @EnumStringBegin{"enum": "ErrorCode", "output": "sError", "strip": "kError"}@ + static const char sErrorString[] = "Ok\0" - "Out of memory\0" - "Invalid argument\0" - "Invalid state\0" - "Invalid architecture\0" - "Not initialized\0" - "Already initialized\0" - "Feature not enabled\0" - "Too many handles or file descriptors\0" - "Too large (code or memory request)\0" - "No code generated\0" - "Invalid directive\0" - "Invalid label\0" - "Too many labels\0" - "Label already bound\0" - "Label already defined\0" - "Label name too long\0" - "Invalid label name\0" - "Invalid parent label\0" - "Non-local label can't have parent\0" - "Invalid section\0" - "Too many sections\0" - "Invalid section name\0" - "Too many relocations\0" - "Invalid relocation entry\0" - "Relocation offset out of range\0" - "Invalid assignment\0" - "Invalid instruction\0" - "Invalid register type\0" - "Invalid register group\0" - "Invalid register physical id\0" - "Invalid register virtual id\0" - "Invalid prefix combination\0" - "Invalid lock prefix\0" - "Invalid xacquire prefix\0" - "Invalid xrelease prefix\0" - "Invalid rep prefix\0" - "Invalid rex prefix\0" - "Invalid {...} register \0" - "Invalid use of {k}\0" - "Invalid use of {k}{z}\0" - "Invalid broadcast {1tox}\0" - "Invalid {er} or {sae} option\0" - "Invalid address\0" - "Invalid address index\0" - "Invalid address scale\0" - "Invalid use of 64-bit address or offset\0" - "Invalid use of 64-bit address or offset that requires 32-bit zero-extension\0" - "Invalid displacement\0" - "Invalid segment\0" - "Invalid immediate value\0" - "Invalid operand size\0" - "Ambiguous operand size\0" - "Operand size mismatch\0" - "Invalid option\0" - "Option already defined\0" - "Invalid type-info\0" - "Invalid use of a low 8-bit GPB register\0" - "Invalid use of a 64-bit GPQ register in 32-bit mode\0" - "Invalid use of an 80-bit float\0" - "Not consecutive registers\0" - "No more physical registers\0" - "Overlapped registers\0" - "Overlapping register and arguments base-address register\0" - "Unbound label cannot be evaluated by expression\0" - "Arithmetic overflow during expression evaluation\0" - "Unknown error\0"; - return Support::findPackedString(errorMessages, Support::min(err, kErrorCount)); + "OutOfMemory\0" + "InvalidArgument\0" + "InvalidState\0" + "InvalidArch\0" + "NotInitialized\0" + "AlreadyInitialized\0" + "FeatureNotEnabled\0" + "TooManyHandles\0" + "TooLarge\0" + "NoCodeGenerated\0" + "InvalidDirective\0" + "InvalidLabel\0" + "TooManyLabels\0" + "LabelAlreadyBound\0" + "LabelAlreadyDefined\0" + "LabelNameTooLong\0" + "InvalidLabelName\0" + "InvalidParentLabel\0" + "NonLocalLabelCannotHaveParent\0" + "InvalidSection\0" + "TooManySections\0" + "InvalidSectionName\0" + "TooManyRelocations\0" + "InvalidRelocEntry\0" + "RelocOffsetOutOfRange\0" + "InvalidAssignment\0" + "InvalidInstruction\0" + "InvalidRegType\0" + "InvalidRegGroup\0" + "InvalidPhysId\0" + "InvalidVirtId\0" + "InvalidElementIndex\0" + "InvalidPrefixCombination\0" + "InvalidLockPrefix\0" + "InvalidXAcquirePrefix\0" + "InvalidXReleasePrefix\0" + "InvalidRepPrefix\0" + "InvalidRexPrefix\0" + "InvalidExtraReg\0" + "InvalidKMaskUse\0" + "InvalidKZeroUse\0" + "InvalidBroadcast\0" + "InvalidEROrSAE\0" + "InvalidAddress\0" + "InvalidAddressIndex\0" + "InvalidAddressScale\0" + "InvalidAddress64Bit\0" + "InvalidAddress64BitZeroExtension\0" + "InvalidDisplacement\0" + "InvalidSegment\0" + "InvalidImmediate\0" + "InvalidOperandSize\0" + "AmbiguousOperandSize\0" + "OperandSizeMismatch\0" + "InvalidOption\0" + "OptionAlreadyDefined\0" + "InvalidTypeId\0" + "InvalidUseOfGpbHi\0" + "InvalidUseOfGpq\0" + "InvalidUseOfF80\0" + "NotConsecutiveRegs\0" + "IllegalVirtReg\0" + "TooManyVirtRegs\0" + "NoMorePhysRegs\0" + "OverlappedRegs\0" + "OverlappingStackRegWithRegArg\0" + "ExpressionLabelNotBound\0" + "ExpressionOverflow\0" + "\0"; + + static const uint16_t sErrorIndex[] = { + 0, 3, 15, 31, 44, 56, 71, 90, 108, 123, 132, 148, 165, 178, 192, 210, 230, + 247, 264, 283, 313, 328, 344, 363, 382, 400, 422, 440, 459, 474, 490, 504, + 518, 538, 563, 581, 603, 625, 642, 659, 675, 691, 707, 724, 739, 754, 774, + 794, 814, 847, 867, 882, 899, 918, 939, 959, 973, 994, 1008, 1026, 1042, + 1058, 1077, 1092, 1108, 1123, 1138, 1168, 1192, 1211 + }; + // @EnumStringEnd@ + + return sErrorString + sErrorIndex[Support::min(err, kErrorCount)]; #else DebugUtils::unused(err); static const char noMessage[] = ""; diff --git a/libs/asmjit/src/asmjit/core/globals.h b/libs/asmjit/src/asmjit/core/globals.h index 6373b7e..c1535e1 100644 --- a/libs/asmjit/src/asmjit/core/globals.h +++ b/libs/asmjit/src/asmjit/core/globals.h @@ -33,7 +33,7 @@ ASMJIT_BEGIN_NAMESPACE // ============================================================================ //! \cond INTERNAL -//! \addtogroup Support +//! \addtogroup asmjit_utilities //! \{ namespace Support { //! Cast designed to cast between function and void* pointers. @@ -80,17 +80,17 @@ namespace Globals { // ============================================================================ //! Host memory allocator overhead. -constexpr uint32_t kAllocOverhead = uint32_t(sizeof(intptr_t) * 4); +static constexpr uint32_t kAllocOverhead = uint32_t(sizeof(intptr_t) * 4); //! Host memory allocator alignment. -constexpr uint32_t kAllocAlignment = 8; +static constexpr uint32_t kAllocAlignment = 8; //! Aggressive growing strategy threshold. -constexpr uint32_t kGrowThreshold = 1024 * 1024 * 16; +static constexpr uint32_t kGrowThreshold = 1024 * 1024 * 16; -//! Maximum height of RB-Tree is: +//! Maximum depth of RB-Tree is: //! -//! `2 * log2(n + 1)`. +//! `2 * log2(n + 1)` //! //! Size of RB node is at least two pointers (without data), //! so a theoretical architecture limit would be: @@ -99,37 +99,41 @@ constexpr uint32_t kGrowThreshold = 1024 * 1024 * 16; //! //! Which yields 30 on 32-bit arch and 61 on 64-bit arch. //! The final value was adjusted by +1 for safety reasons. -constexpr uint32_t kMaxTreeHeight = (ASMJIT_ARCH_BITS == 32 ? 30 : 61) + 1; +static constexpr uint32_t kMaxTreeHeight = (ASMJIT_ARCH_BITS == 32 ? 30 : 61) + 1; //! Maximum number of operands per a single instruction. -constexpr uint32_t kMaxOpCount = 6; +static constexpr uint32_t kMaxOpCount = 6; -// TODO: Use this one. -constexpr uint32_t kMaxFuncArgs = 16; +//! Maximum arguments of a function supported by the Compiler / Function API. +static constexpr uint32_t kMaxFuncArgs = 16; + +//! The number of values that can be assigned to a single function argument or +//! return value. +static constexpr uint32_t kMaxValuePack = 4; //! Maximum number of physical registers AsmJit can use per register group. -constexpr uint32_t kMaxPhysRegs = 32; +static constexpr uint32_t kMaxPhysRegs = 32; //! Maximum alignment. -constexpr uint32_t kMaxAlignment = 64; +static constexpr uint32_t kMaxAlignment = 64; //! Maximum label or symbol size in bytes. -constexpr uint32_t kMaxLabelNameSize = 2048; +static constexpr uint32_t kMaxLabelNameSize = 2048; //! Maximum section name size. -constexpr uint32_t kMaxSectionNameSize = 35; +static constexpr uint32_t kMaxSectionNameSize = 35; //! Maximum size of comment. -constexpr uint32_t kMaxCommentSize = 1024; +static constexpr uint32_t kMaxCommentSize = 1024; //! Invalid identifier. -constexpr uint32_t kInvalidId = 0xFFFFFFFFu; +static constexpr uint32_t kInvalidId = 0xFFFFFFFFu; //! Returned by `indexOf()` and similar when working with containers that use 32-bit index/size. -constexpr uint32_t kNotFound = 0xFFFFFFFFu; +static constexpr uint32_t kNotFound = 0xFFFFFFFFu; //! Invalid base address. -constexpr uint64_t kNoBaseAddress = ~uint64_t(0); +static constexpr uint64_t kNoBaseAddress = ~uint64_t(0); // ============================================================================ // [asmjit::Globals::ResetPolicy] @@ -168,15 +172,46 @@ static const constexpr NoInit_ NoInit {}; } // {Globals} +// ============================================================================ +// [asmjit::ByteOrder] +// ============================================================================ + +//! Byte order. +namespace ByteOrder { + enum : uint32_t { + kLE = 0, + kBE = 1, + kNative = ASMJIT_ARCH_LE ? kLE : kBE, + kSwapped = ASMJIT_ARCH_LE ? kBE : kLE + }; +} + +// ============================================================================ +// [asmjit::ptr_as_func / func_as_ptr] +// ============================================================================ + +template +static inline Func ptr_as_func(void* func) noexcept { return Support::ptr_cast_impl(func); } + +template +static inline void* func_as_ptr(Func func) noexcept { return Support::ptr_cast_impl(func); } + +//! \} + // ============================================================================ // [asmjit::Error] // ============================================================================ +//! \addtogroup asmjit_error_handling +//! \{ + //! AsmJit error type (uint32_t). typedef uint32_t Error; //! AsmJit error codes. enum ErrorCode : uint32_t { + // @EnumValuesBegin{"enum": "ErrorCode"}@ + //! No error (success). kErrorOk = 0, @@ -211,16 +246,16 @@ enum ErrorCode : uint32_t { //! No code generated. //! - //! Returned by runtime if the `CodeHolder` contains no code. + //! Returned by runtime if the \ref CodeHolder contains no code. kErrorNoCodeGenerated, //! Invalid directive. kErrorInvalidDirective, //! Attempt to use uninitialized label. kErrorInvalidLabel, - //! Label index overflow - a single `Assembler` instance can hold almost - //! 2^32 (4 billion) labels. If there is an attempt to create more labels - //! then this error is returned. + //! Label index overflow - a single \ref BaseAssembler instance can hold + //! almost 2^32 (4 billion) labels. If there is an attempt to create more + //! labels then this error is returned. kErrorTooManyLabels, //! Label is already bound. kErrorLabelAlreadyBound, @@ -230,10 +265,10 @@ enum ErrorCode : uint32_t { kErrorLabelNameTooLong, //! Label must always be local if it's anonymous (without a name). kErrorInvalidLabelName, - //! Parent id passed to `CodeHolder::newNamedLabelId()` was invalid. + //! Parent id passed to \ref CodeHolder::newNamedLabelEntry() was invalid. kErrorInvalidParentLabel, //! Parent id specified for a non-local (global) label. - kErrorNonLocalLabelCantHaveParent, + kErrorNonLocalLabelCannotHaveParent, //! Invalid section. kErrorInvalidSection, @@ -257,31 +292,33 @@ enum ErrorCode : uint32_t { kErrorInvalidRegType, //! Invalid register group. kErrorInvalidRegGroup, - //! Invalid register's physical id. + //! Invalid physical register id. kErrorInvalidPhysId, - //! Invalid register's virtual id. + //! Invalid virtual register id. kErrorInvalidVirtId, - //! Invalid prefix combination. + //! Invalid element index (ARM). + kErrorInvalidElementIndex, + //! Invalid prefix combination (X86|X64). kErrorInvalidPrefixCombination, - //! Invalid LOCK prefix. + //! Invalid LOCK prefix (X86|X64). kErrorInvalidLockPrefix, - //! Invalid XACQUIRE prefix. + //! Invalid XACQUIRE prefix (X86|X64). kErrorInvalidXAcquirePrefix, - //! Invalid XRELEASE prefix. + //! Invalid XRELEASE prefix (X86|X64). kErrorInvalidXReleasePrefix, - //! Invalid REP prefix. + //! Invalid REP prefix (X86|X64). kErrorInvalidRepPrefix, - //! Invalid REX prefix. + //! Invalid REX prefix (X86|X64). kErrorInvalidRexPrefix, - //! Invalid {...} register. + //! Invalid {...} register (X86|X64). kErrorInvalidExtraReg, - //! Invalid {k} use (not supported by the instruction). + //! Invalid {k} use (not supported by the instruction) (X86|X64). kErrorInvalidKMaskUse, - //! Invalid {k}{z} use (not supported by the instruction). + //! Invalid {k}{z} use (not supported by the instruction) (X86|X64). kErrorInvalidKZeroUse, - //! Invalid broadcast - Currently only related to invalid use of AVX-512 {1tox}. + //! Invalid broadcast - Currently only related to invalid use of AVX-512 {1tox} (X86|X64). kErrorInvalidBroadcast, - //! Invalid 'embedded-rounding' {er} or 'suppress-all-exceptions' {sae} (AVX-512). + //! Invalid 'embedded-rounding' {er} or 'suppress-all-exceptions' {sae} (AVX-512) (X86|X64). kErrorInvalidEROrSAE, //! Invalid address used (not encodable). kErrorInvalidAddress, @@ -319,11 +356,17 @@ enum ErrorCode : uint32_t { kErrorInvalidUseOfGpbHi, //! Invalid use of a 64-bit GPQ register in 32-bit mode. kErrorInvalidUseOfGpq, - //! Invalid use of an 80-bit float (Type::kIdF80). + //! Invalid use of an 80-bit float (\ref Type::kIdF80). kErrorInvalidUseOfF80, - //! Some registers in the instruction muse be consecutive (some ARM and AVX512 neural-net instructions). + //! Some registers in the instruction muse be consecutive (some ARM and AVX512 + //! neural-net instructions). kErrorNotConsecutiveRegs, + //! Illegal virtual register - reported by instruction validation. + kErrorIllegalVirtReg, + //! AsmJit cannot create more virtual registers. + kErrorTooManyVirtRegs, + //! AsmJit requires a physical register, but no one is available. kErrorNoMorePhysRegs, //! A variable has been assigned more than once to a function argument (BaseCompiler). @@ -336,33 +379,12 @@ enum ErrorCode : uint32_t { //! Arithmetic overflow during expression evaluation. kErrorExpressionOverflow, + // @EnumValuesEnd@ + //! Count of AsmJit error codes. kErrorCount }; -// ============================================================================ -// [asmjit::ByteOrder] -// ============================================================================ - -//! Byte order. -namespace ByteOrder { - enum : uint32_t { - kLE = 0, - kBE = 1, - kNative = ASMJIT_ARCH_LE ? kLE : kBE, - kSwapped = ASMJIT_ARCH_LE ? kBE : kLE - }; -} - -// ============================================================================ -// [asmjit::ptr_as_func / func_as_ptr] -// ============================================================================ - -template -static inline Func ptr_as_func(void* func) noexcept { return Support::ptr_cast_impl(func); } -template -static inline void* func_as_ptr(Func func) noexcept { return Support::ptr_cast_impl(func); } - // ============================================================================ // [asmjit::DebugUtils] // ============================================================================ @@ -370,9 +392,11 @@ static inline void* func_as_ptr(Func func) noexcept { return Support::ptr_cast_i //! Debugging utilities. namespace DebugUtils { +//! \cond INTERNAL //! Used to silence warnings about unused arguments or variables. template static ASMJIT_INLINE void unused(Args&&...) noexcept {} +//! \endcond //! Returns the error `err` passed. //! @@ -392,23 +416,35 @@ ASMJIT_API void debugOutput(const char* str) noexcept; //! \param line Line in the source file. //! \param msg Message to display. //! -//! If you have problems with assertions put a breakpoint at assertionFailed() -//! function (asmjit/core/globals.cpp) and check the call stack to locate the -//! failing code. +//! If you have problems with assertion failures a breakpoint can be put +//! at \ref assertionFailed() function (asmjit/core/globals.cpp). A call stack +//! will be available when such assertion failure is triggered. AsmJit always +//! returns errors on failures, assertions are a last resort and usually mean +//! unrecoverable state due to out of range array access or totally invalid +//! arguments like nullptr where a valid pointer should be provided, etc... ASMJIT_API void ASMJIT_NORETURN assertionFailed(const char* file, int line, const char* msg) noexcept; +} // {DebugUtils} + +//! \def ASMJIT_ASSERT(...) +//! +//! AsmJit's own assert macro used in AsmJit code-base. #if defined(ASMJIT_BUILD_DEBUG) -#define ASMJIT_ASSERT(EXP) \ +#define ASMJIT_ASSERT(...) \ do { \ - if (ASMJIT_LIKELY(EXP)) \ + if (ASMJIT_LIKELY(__VA_ARGS__)) \ break; \ - ::asmjit::DebugUtils::assertionFailed(__FILE__, __LINE__, #EXP); \ + ::asmjit::DebugUtils::assertionFailed(__FILE__, __LINE__, #__VA_ARGS__); \ } while (0) #else -#define ASMJIT_ASSERT(EXP) ((void)0) +#define ASMJIT_ASSERT(...) ((void)0) #endif -//! Used by AsmJit to propagate a possible `Error` produced by `...` to the caller. +//! \def ASMJIT_PROPAGATE(...) +//! +//! Propagates a possible `Error` produced by `...` to the caller by returning +//! the error immediately. Used by AsmJit internally, but kept public for users +//! that want to use the same technique to propagate errors to the caller. #define ASMJIT_PROPAGATE(...) \ do { \ ::asmjit::Error _err = __VA_ARGS__; \ @@ -416,8 +452,6 @@ ASMJIT_API void ASMJIT_NORETURN assertionFailed(const char* file, int line, cons return _err; \ } while (0) -} // {DebugUtils} - //! \} ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/inst.cpp b/libs/asmjit/src/asmjit/core/inst.cpp index d89c29f..a79fe83 100644 --- a/libs/asmjit/src/asmjit/core/inst.cpp +++ b/libs/asmjit/src/asmjit/core/inst.cpp @@ -24,7 +24,7 @@ #include "../core/api-build_p.h" #ifdef ASMJIT_BUILD_X86 -#include "../core/arch.h" +#include "../core/archtraits.h" #include "../core/inst.h" #ifdef ASMJIT_BUILD_X86 @@ -32,7 +32,7 @@ #endif #ifdef ASMJIT_BUILD_ARM - #include "../arm/arminstapi_p.h" + #include "../arm/a64instapi_p.h" #endif ASMJIT_BEGIN_NAMESPACE @@ -42,29 +42,29 @@ ASMJIT_BEGIN_NAMESPACE // ============================================================================ #ifndef ASMJIT_NO_TEXT -Error InstAPI::instIdToString(uint32_t archId, uint32_t instId, String& output) noexcept { +Error InstAPI::instIdToString(uint32_t arch, uint32_t instId, String& output) noexcept { #ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId)) - return x86::InstInternal::instIdToString(archId, instId, output); + if (Environment::isFamilyX86(arch)) + return x86::InstInternal::instIdToString(arch, instId, output); #endif #ifdef ASMJIT_BUILD_ARM - if (ArchInfo::isArmFamily(archId)) - return arm::InstInternal::instIdToString(archId, instId, output); + if (Environment::isArchAArch64(arch)) + return a64::InstInternal::instIdToString(arch, instId, output); #endif return DebugUtils::errored(kErrorInvalidArch); } -uint32_t InstAPI::stringToInstId(uint32_t archId, const char* s, size_t len) noexcept { +uint32_t InstAPI::stringToInstId(uint32_t arch, const char* s, size_t len) noexcept { #ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId)) - return x86::InstInternal::stringToInstId(archId, s, len); + if (Environment::isFamilyX86(arch)) + return x86::InstInternal::stringToInstId(arch, s, len); #endif #ifdef ASMJIT_BUILD_ARM - if (ArchInfo::isArmFamily(archId)) - return arm::InstInternal::stringToInstId(archId, s, len); + if (Environment::isArchAArch64(arch)) + return a64::InstInternal::stringToInstId(arch, s, len); #endif return 0; @@ -76,15 +76,15 @@ uint32_t InstAPI::stringToInstId(uint32_t archId, const char* s, size_t len) noe // ============================================================================ #ifndef ASMJIT_NO_VALIDATION -Error InstAPI::validate(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept { +Error InstAPI::validate(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, uint32_t validationFlags) noexcept { #ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId)) - return x86::InstInternal::validate(archId, inst, operands, opCount); + if (Environment::isFamilyX86(arch)) + return x86::InstInternal::validate(arch, inst, operands, opCount, validationFlags); #endif #ifdef ASMJIT_BUILD_ARM - if (ArchInfo::isArmFamily(archId)) - return arm::InstInternal::validate(archId, inst, operands, opCount); + if (Environment::isArchAArch64(arch)) + return a64::InstInternal::validate(arch, inst, operands, opCount, validationFlags); #endif return DebugUtils::errored(kErrorInvalidArch); @@ -96,18 +96,18 @@ Error InstAPI::validate(uint32_t archId, const BaseInst& inst, const Operand_* o // ============================================================================ #ifndef ASMJIT_NO_INTROSPECTION -Error InstAPI::queryRWInfo(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, InstRWInfo& out) noexcept { - if (ASMJIT_UNLIKELY(opCount > 6)) +Error InstAPI::queryRWInfo(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept { + if (ASMJIT_UNLIKELY(opCount > Globals::kMaxOpCount)) return DebugUtils::errored(kErrorInvalidArgument); #ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId)) - return x86::InstInternal::queryRWInfo(archId, inst, operands, opCount, out); + if (Environment::isFamilyX86(arch)) + return x86::InstInternal::queryRWInfo(arch, inst, operands, opCount, out); #endif #ifdef ASMJIT_BUILD_ARM - if (ArchInfo::isArmFamily(archId)) - return arm::InstInternal::queryRWInfo(archId, inst, operands, opCount, out); + if (Environment::isArchAArch64(arch)) + return a64::InstInternal::queryRWInfo(arch, inst, operands, opCount, out); #endif return DebugUtils::errored(kErrorInvalidArch); @@ -119,15 +119,15 @@ Error InstAPI::queryRWInfo(uint32_t archId, const BaseInst& inst, const Operand_ // ============================================================================ #ifndef ASMJIT_NO_INTROSPECTION -Error InstAPI::queryFeatures(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, BaseFeatures& out) noexcept { +Error InstAPI::queryFeatures(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, BaseFeatures* out) noexcept { #ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId)) - return x86::InstInternal::queryFeatures(archId, inst, operands, opCount, out); + if (Environment::isFamilyX86(arch)) + return x86::InstInternal::queryFeatures(arch, inst, operands, opCount, out); #endif #ifdef ASMJIT_BUILD_ARM - if (ArchInfo::isArmFamily(archId)) - return arm::InstInternal::queryFeatures(archId, inst, operands, opCount, out); + if (Environment::isArchAArch64(arch)) + return a64::InstInternal::queryFeatures(arch, inst, operands, opCount, out); #endif return DebugUtils::errored(kErrorInvalidArch); diff --git a/libs/asmjit/src/asmjit/core/inst.h b/libs/asmjit/src/asmjit/core/inst.h index 9167182..79619ae 100644 --- a/libs/asmjit/src/asmjit/core/inst.h +++ b/libs/asmjit/src/asmjit/core/inst.h @@ -31,45 +31,173 @@ ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_core +//! \addtogroup asmjit_instruction_db //! \{ // ============================================================================ -// [asmjit::InstInfo] +// [asmjit::BaseInst] // ============================================================================ -// TODO: Finalize instruction info and make more x86::InstDB methods/structs private. +//! Instruction id, options, and extraReg in a single structure. This structure +//! exists mainly to simplify analysis and validation API that requires `BaseInst` +//! and `Operand[]` array. +class BaseInst { +public: + //! Instruction id, see \ref BaseInst::Id or {arch-specific}::Inst::Id. + uint32_t _id; + //! Instruction options, see \ref BaseInst::Options or {arch-specific}::Inst::Options. + uint32_t _options; + //! Extra register used by instruction (either REP register or AVX-512 selector). + RegOnly _extraReg; -/* + enum Id : uint32_t { + //! Invalid or uninitialized instruction id. + kIdNone = 0x00000000u, + //! Abstract instruction (BaseBuilder and BaseCompiler). + kIdAbstract = 0x80000000u + }; + + enum Options : uint32_t { + //! Used internally by emitters for handling errors and rare cases. + kOptionReserved = 0x00000001u, + + //! Prevents following a jump during compilation (BaseCompiler). + kOptionUnfollow = 0x00000010u, -struct InstInfo { - //! Architecture agnostic attributes. - enum Attributes : uint32_t { + //! Overwrite the destination operand(s) (BaseCompiler). + //! + //! Hint that is important for register liveness analysis. It tells the + //! compiler that the destination operand will be overwritten now or by + //! adjacent instructions. BaseCompiler knows when a register is completely + //! overwritten by a single instruction, for example you don't have to + //! mark "movaps" or "pxor x, x", however, if a pair of instructions is + //! used and the first of them doesn't completely overwrite the content + //! of the destination, BaseCompiler fails to mark that register as dead. + //! + //! X86 Specific + //! ------------ + //! + //! - All instructions that always overwrite at least the size of the + //! register the virtual-register uses , for example "mov", "movq", + //! "movaps" don't need the overwrite option to be used - conversion, + //! shuffle, and other miscellaneous instructions included. + //! + //! - All instructions that clear the destination register if all operands + //! are the same, for example "xor x, x", "pcmpeqb x x", etc... + //! + //! - Consecutive instructions that partially overwrite the variable until + //! there is no old content require `BaseCompiler::overwrite()` to be used. + //! Some examples (not always the best use cases thought): + //! + //! - `movlps xmm0, ?` followed by `movhps xmm0, ?` and vice versa + //! - `movlpd xmm0, ?` followed by `movhpd xmm0, ?` and vice versa + //! - `mov al, ?` followed by `and ax, 0xFF` + //! - `mov al, ?` followed by `mov ah, al` + //! - `pinsrq xmm0, ?, 0` followed by `pinsrq xmm0, ?, 1` + //! + //! - If allocated variable is used temporarily for scalar operations. For + //! example if you allocate a full vector like `x86::Compiler::newXmm()` + //! and then use that vector for scalar operations you should use + //! `overwrite()` directive: + //! + //! - `sqrtss x, y` - only LO element of `x` is changed, if you don't + //! use HI elements, use `compiler.overwrite().sqrtss(x, y)`. + kOptionOverwrite = 0x00000020u, + //! Emit short-form of the instruction. + kOptionShortForm = 0x00000040u, + //! Emit long-form of the instruction. + kOptionLongForm = 0x00000080u, + //! Conditional jump is likely to be taken. + kOptionTaken = 0x00000100u, + //! Conditional jump is unlikely to be taken. + kOptionNotTaken = 0x00000200u }; - //! Instruction attributes. - uint32_t _attributes; + //! Control type. + enum ControlType : uint32_t { + //! No control type (doesn't jump). + kControlNone = 0u, + //! Unconditional jump. + kControlJump = 1u, + //! Conditional jump (branch). + kControlBranch = 2u, + //! Function call. + kControlCall = 3u, + //! Function return. + kControlReturn = 4u + }; - inline void reset() noexcept { memset(this, 0, sizeof(*this)); } + //! \name Construction & Destruction + //! \{ - inline uint32_t attributes() const noexcept { return _attributes; } - inline bool hasAttribute(uint32_t attr) const noexcept { return (_attributes & attr) != 0; } -}; + //! Creates a new BaseInst instance with `id` and `options` set. + //! + //! Default values of `id` and `options` are zero, which means none instruciton. + //! Such instruction is guaranteed to never exist for any architecture supported + //! by AsmJit. + inline explicit BaseInst(uint32_t id = 0, uint32_t options = 0) noexcept + : _id(id), + _options(options), + _extraReg() {} + + inline BaseInst(uint32_t id, uint32_t options, const RegOnly& extraReg) noexcept + : _id(id), + _options(options), + _extraReg(extraReg) {} + + inline BaseInst(uint32_t id, uint32_t options, const BaseReg& extraReg) noexcept + : _id(id), + _options(options), + _extraReg { extraReg.signature(), extraReg.id() } {} + + //! \} -//! Gets attributes of the given instruction. -ASMJIT_API Error queryCommonInfo(uint32_t archId, uint32_t instId, InstInfo& out) noexcept; + //! \name Instruction ID + //! \{ + + //! Returns the instruction id. + inline uint32_t id() const noexcept { return _id; } + //! Sets the instruction id to the given `id`. + inline void setId(uint32_t id) noexcept { _id = id; } + //! Resets the instruction id to zero, see \ref kIdNone. + inline void resetId() noexcept { _id = 0; } + + //! \} -*/ + //! \name Instruction Options + //! \{ + + inline uint32_t options() const noexcept { return _options; } + inline bool hasOption(uint32_t option) const noexcept { return (_options & option) != 0; } + inline void setOptions(uint32_t options) noexcept { _options = options; } + inline void addOptions(uint32_t options) noexcept { _options |= options; } + inline void clearOptions(uint32_t options) noexcept { _options &= ~options; } + inline void resetOptions() noexcept { _options = 0; } + + //! \} + + //! \name Extra Register + //! \{ + + inline bool hasExtraReg() const noexcept { return _extraReg.isReg(); } + inline RegOnly& extraReg() noexcept { return _extraReg; } + inline const RegOnly& extraReg() const noexcept { return _extraReg; } + inline void setExtraReg(const BaseReg& reg) noexcept { _extraReg.init(reg); } + inline void setExtraReg(const RegOnly& reg) noexcept { _extraReg.init(reg); } + inline void resetExtraReg() noexcept { _extraReg.reset(); } + + //! \} +}; // ============================================================================ -// [asmjit::InstRWInfo / OpRWInfo] +// [asmjit::OpRWInfo] // ============================================================================ -//! Read/Write information related to a single operand, used by `InstRWInfo`. +//! Read/Write information related to a single operand, used by \ref InstRWInfo. struct OpRWInfo { - //! Read/Write flags, see `OpRWInfo::Flags`. + //! Read/Write flags, see \ref OpRWInfo::Flags. uint32_t _opFlags; //! Physical register index, if required. uint8_t _physId; @@ -87,18 +215,12 @@ struct OpRWInfo { //! Flags describe how the operand is accessed and some additional information. enum Flags : uint32_t { //! Operand is read. - //! - //! \note This flag must be `0x00000001`. kRead = 0x00000001u, //! Operand is written. - //! - //! \note This flag must be `0x00000002`. kWrite = 0x00000002u, //! Operand is both read and written. - //! - //! \note This combination of flags must be `0x00000003`. kRW = 0x00000003u, //! Register operand can be replaced by a memory operand. @@ -107,9 +229,9 @@ struct OpRWInfo { //! The `extendByteMask()` represents a zero extension. kZExt = 0x00000010u, - //! Register operand must use `physId()`. + //! Register operand must use \ref physId(). kRegPhysId = 0x00000100u, - //! Base register of a memory operand must use `physId()`. + //! Base register of a memory operand must use \ref physId(). kMemPhysId = 0x00000200u, //! This memory operand is only used to encode registers and doesn't access memory. @@ -140,6 +262,8 @@ struct OpRWInfo { kMemBasePostModify = 0x00020000u }; + // Don't remove these asserts. Read/Write flags are used extensively + // by Compiler and they must always be compatible with constants below. static_assert(kRead == 0x1, "OpRWInfo::kRead flag must be 0x1"); static_assert(kWrite == 0x2, "OpRWInfo::kWrite flag must be 0x2"); static_assert(kRegMem == 0x4, "OpRWInfo::kRegMem flag must be 0x4"); @@ -147,7 +271,11 @@ struct OpRWInfo { //! \name Reset //! \{ + //! Resets this operand information to all zeros. inline void reset() noexcept { memset(this, 0, sizeof(*this)); } + + //! Resets this operand info (resets all members) and set common information + //! to the given `opFlags`, `regSize`, and possibly `physId`. inline void reset(uint32_t opFlags, uint32_t regSize, uint32_t physId = BaseReg::kIdBad) noexcept { _opFlags = opFlags; _physId = uint8_t(physId); @@ -169,35 +297,100 @@ struct OpRWInfo { //! \name Operand Flags //! \{ + //! Returns operand flags, see \ref Flags. inline uint32_t opFlags() const noexcept { return _opFlags; } + //! Tests whether operand flags contain the given `flag`. inline bool hasOpFlag(uint32_t flag) const noexcept { return (_opFlags & flag) != 0; } + //! Adds the given `flags` to operand flags. inline void addOpFlags(uint32_t flags) noexcept { _opFlags |= flags; } + //! Removes the given `flags` from operand flags. inline void clearOpFlags(uint32_t flags) noexcept { _opFlags &= ~flags; } + //! Tests whether this operand is read from. inline bool isRead() const noexcept { return hasOpFlag(kRead); } + //! Tests whether this operand is written to. inline bool isWrite() const noexcept { return hasOpFlag(kWrite); } + //! Tests whether this operand is both read and write. inline bool isReadWrite() const noexcept { return (_opFlags & kRW) == kRW; } + //! Tests whether this operand is read only. inline bool isReadOnly() const noexcept { return (_opFlags & kRW) == kRead; } + //! Tests whether this operand is write only. inline bool isWriteOnly() const noexcept { return (_opFlags & kRW) == kWrite; } + + //! Tests whether this operand is Reg/Mem + //! + //! Reg/Mem operands can use either register or memory. inline bool isRm() const noexcept { return hasOpFlag(kRegMem); } + + //! Tests whether the operand will be zero extended. inline bool isZExt() const noexcept { return hasOpFlag(kZExt); } //! \} + //! \name Memory Flags + //! \{ + + //! Tests whether this is a fake memory operand, which is only used, because + //! of encoding. Fake memory operands do not access any memory, they are only + //! used to encode registers. + inline bool isMemFake() const noexcept { return hasOpFlag(kMemFake); } + + //! Tests whether the instruction's memory BASE register is used. + inline bool isMemBaseUsed() const noexcept { return (_opFlags & kMemBaseRW) != 0; } + //! Tests whether the instruction reads from its BASE registers. + inline bool isMemBaseRead() const noexcept { return hasOpFlag(kMemBaseRead); } + //! Tests whether the instruction writes to its BASE registers. + inline bool isMemBaseWrite() const noexcept { return hasOpFlag(kMemBaseWrite); } + //! Tests whether the instruction reads and writes from/to its BASE registers. + inline bool isMemBaseReadWrite() const noexcept { return (_opFlags & kMemBaseRW) == kMemBaseRW; } + //! Tests whether the instruction only reads from its BASE registers. + inline bool isMemBaseReadOnly() const noexcept { return (_opFlags & kMemBaseRW) == kMemBaseRead; } + //! Tests whether the instruction only writes to its BASE registers. + inline bool isMemBaseWriteOnly() const noexcept { return (_opFlags & kMemBaseRW) == kMemBaseWrite; } + + //! Tests whether the instruction modifies the BASE register before it uses + //! it to calculate the target address. + inline bool isMemBasePreModify() const noexcept { return hasOpFlag(kMemBasePreModify); } + //! Tests whether the instruction modifies the BASE register after it uses + //! it to calculate the target address. + inline bool isMemBasePostModify() const noexcept { return hasOpFlag(kMemBasePostModify); } + + //! Tests whether the instruction's memory INDEX register is used. + inline bool isMemIndexUsed() const noexcept { return (_opFlags & kMemIndexRW) != 0; } + //! Tests whether the instruction reads the INDEX registers. + inline bool isMemIndexRead() const noexcept { return hasOpFlag(kMemIndexRead); } + //! Tests whether the instruction writes to its INDEX registers. + inline bool isMemIndexWrite() const noexcept { return hasOpFlag(kMemIndexWrite); } + //! Tests whether the instruction reads and writes from/to its INDEX registers. + inline bool isMemIndexReadWrite() const noexcept { return (_opFlags & kMemIndexRW) == kMemIndexRW; } + //! Tests whether the instruction only reads from its INDEX registers. + inline bool isMemIndexReadOnly() const noexcept { return (_opFlags & kMemIndexRW) == kMemIndexRead; } + //! Tests whether the instruction only writes to its INDEX registers. + inline bool isMemIndexWriteOnly() const noexcept { return (_opFlags & kMemIndexRW) == kMemIndexWrite; } + + //! \} + //! \name Physical Register ID //! \{ + //! Returns a physical id of the register that is fixed for this operand. + //! + //! Returns \ref BaseReg::kIdBad if any register can be used. inline uint32_t physId() const noexcept { return _physId; } + //! Tests whether \ref physId() would return a valid physical register id. inline bool hasPhysId() const noexcept { return _physId != BaseReg::kIdBad; } + //! Sets physical register id, which would be fixed for this operand. inline void setPhysId(uint32_t physId) noexcept { _physId = uint8_t(physId); } //! \} - //! \name Reg/Mem + //! \name Reg/Mem Information //! \{ + //! Returns Reg/Mem size of the operand. inline uint32_t rmSize() const noexcept { return _rmSize; } + //! Sets Reg/Mem size of the operand. inline void setRmSize(uint32_t rmSize) noexcept { _rmSize = uint8_t(rmSize); } //! \} @@ -205,46 +398,76 @@ struct OpRWInfo { //! \name Read & Write Masks //! \{ + //! Returns read mask. inline uint64_t readByteMask() const noexcept { return _readByteMask; } + //! Returns write mask. inline uint64_t writeByteMask() const noexcept { return _writeByteMask; } + //! Returns extend mask. inline uint64_t extendByteMask() const noexcept { return _extendByteMask; } + //! Sets read mask. inline void setReadByteMask(uint64_t mask) noexcept { _readByteMask = mask; } + //! Sets write mask. inline void setWriteByteMask(uint64_t mask) noexcept { _writeByteMask = mask; } + //! Sets externd mask. inline void setExtendByteMask(uint64_t mask) noexcept { _extendByteMask = mask; } //! \} }; +// ============================================================================ +// [asmjit::InstRWInfo] +// ============================================================================ + //! Read/Write information of an instruction. struct InstRWInfo { - //! Instruction flags. + //! Instruction flags (there are no flags at the moment, this field is reserved). uint32_t _instFlags; - //! Mask of flags read. + //! Mask of CPU flags read. uint32_t _readFlags; - //! Mask of flags written. + //! Mask of CPU flags written. uint32_t _writeFlags; //! Count of operands. uint8_t _opCount; //! CPU feature required for replacing register operand with memory operand. uint8_t _rmFeature; //! Reserved for future use. - uint8_t _reserved[19]; + uint8_t _reserved[18]; //! Read/Write onfo of extra register (rep{} or kz{}). OpRWInfo _extraReg; //! Read/Write info of instruction operands. OpRWInfo _operands[Globals::kMaxOpCount]; + //! \name Commons + //! \{ + + //! Resets this RW information to all zeros. inline void reset() noexcept { memset(this, 0, sizeof(*this)); } + //! \} + + //! \name Instruction Flags + //! + //! \{ + inline uint32_t instFlags() const noexcept { return _instFlags; } inline bool hasInstFlag(uint32_t flag) const noexcept { return (_instFlags & flag) != 0; } - inline uint32_t opCount() const noexcept { return _opCount; } + //! } + //! \name CPU Flags Read/Write Information + //! \{ + + //! Returns read flags of the instruction. inline uint32_t readFlags() const noexcept { return _readFlags; } + //! Returns write flags of the instruction. inline uint32_t writeFlags() const noexcept { return _writeFlags; } + //! \} + + //! \name Reg/Mem Information + //! \{ + //! Returns the CPU feature required to replace a register operand with memory //! operand. If the returned feature is zero (none) then this instruction //! either doesn't provide memory operand combination or there is no extra @@ -259,166 +482,25 @@ struct InstRWInfo { //! 'reg/mem/imm' combination. inline uint32_t rmFeature() const noexcept { return _rmFeature; } + //! \} + + //! \name Operand Read/Write Information + //! \{ + + //! Returns RW information of extra register operand (extraReg). inline const OpRWInfo& extraReg() const noexcept { return _extraReg; } + + //! Returns RW information of all instruction's operands. inline const OpRWInfo* operands() const noexcept { return _operands; } + //! Returns RW information of the operand at the given `index`. inline const OpRWInfo& operand(size_t index) const noexcept { ASMJIT_ASSERT(index < Globals::kMaxOpCount); return _operands[index]; } -}; - -// ============================================================================ -// [asmjit::BaseInst] -// ============================================================================ - -//! Instruction id, options, and extraReg in a single structure. This structure -//! exists mainly to simplify analysis and validation API that requires `BaseInst` -//! and `Operand[]` array. -class BaseInst { -public: - //! Instruction id. - uint32_t _id; - //! Instruction options. - uint32_t _options; - //! Extra register used by instruction (either REP register or AVX-512 selector). - RegOnly _extraReg; - - enum Id : uint32_t { - //! Invalid or uninitialized instruction id. - kIdNone = 0x00000000u, - //! Abstract instruction (BaseBuilder and BaseCompiler). - kIdAbstract = 0x80000000u - }; - - enum Options : uint32_t { - //! Used internally by emitters for handling errors and rare cases. - kOptionReserved = 0x00000001u, - - //! Used only by Assembler to mark that `_op4` and `_op5` are used (internal). - //! - //! TODO: This should be removed in the future. - kOptionOp4Op5Used = 0x00000002u, - - //! Prevents following a jump during compilation (BaseCompiler). - //! - //! TODO: This should be renamed to kOptionNoReturn. - kOptionUnfollow = 0x00000010u, - - //! Overwrite the destination operand(s) (BaseCompiler). - //! - //! Hint that is important for register liveness analysis. It tells the - //! compiler that the destination operand will be overwritten now or by - //! adjacent instructions. BaseCompiler knows when a register is completely - //! overwritten by a single instruction, for example you don't have to - //! mark "movaps" or "pxor x, x", however, if a pair of instructions is - //! used and the first of them doesn't completely overwrite the content - //! of the destination, BaseCompiler fails to mark that register as dead. - //! - //! X86 Specific - //! ------------ - //! - //! - All instructions that always overwrite at least the size of the - //! register the virtual-register uses , for example "mov", "movq", - //! "movaps" don't need the overwrite option to be used - conversion, - //! shuffle, and other miscellaneous instructions included. - //! - //! - All instructions that clear the destination register if all operands - //! are the same, for example "xor x, x", "pcmpeqb x x", etc... - //! - //! - Consecutive instructions that partially overwrite the variable until - //! there is no old content require `BaseCompiler::overwrite()` to be used. - //! Some examples (not always the best use cases thought): - //! - //! - `movlps xmm0, ?` followed by `movhps xmm0, ?` and vice versa - //! - `movlpd xmm0, ?` followed by `movhpd xmm0, ?` and vice versa - //! - `mov al, ?` followed by `and ax, 0xFF` - //! - `mov al, ?` followed by `mov ah, al` - //! - `pinsrq xmm0, ?, 0` followed by `pinsrq xmm0, ?, 1` - //! - //! - If allocated variable is used temporarily for scalar operations. For - //! example if you allocate a full vector like `x86::Compiler::newXmm()` - //! and then use that vector for scalar operations you should use - //! `overwrite()` directive: - //! - //! - `sqrtss x, y` - only LO element of `x` is changed, if you don't - //! use HI elements, use `compiler.overwrite().sqrtss(x, y)`. - kOptionOverwrite = 0x00000020u, - - //! Emit short-form of the instruction. - kOptionShortForm = 0x00000040u, - //! Emit long-form of the instruction. - kOptionLongForm = 0x00000080u, - - //! Conditional jump is likely to be taken. - kOptionTaken = 0x00000100u, - //! Conditional jump is unlikely to be taken. - kOptionNotTaken = 0x00000200u - }; - - //! Control type. - enum ControlType : uint32_t { - //! No control type (doesn't jump). - kControlNone = 0u, - //! Unconditional jump. - kControlJump = 1u, - //! Conditional jump (branch). - kControlBranch = 2u, - //! Function call. - kControlCall = 3u, - //! Function return. - kControlReturn = 4u - }; - - //! \name Construction & Destruction - //! \{ - inline explicit BaseInst(uint32_t id = 0, uint32_t options = 0) noexcept - : _id(id), - _options(options), - _extraReg() {} - - inline BaseInst(uint32_t id, uint32_t options, const RegOnly& extraReg) noexcept - : _id(id), - _options(options), - _extraReg(extraReg) {} - - inline BaseInst(uint32_t id, uint32_t options, const BaseReg& extraReg) noexcept - : _id(id), - _options(options), - _extraReg { extraReg.signature(), extraReg.id() } {} - - //! \} - - //! \name Instruction ID - //! \{ - - inline uint32_t id() const noexcept { return _id; } - inline void setId(uint32_t id) noexcept { _id = id; } - inline void resetId() noexcept { _id = 0; } - - //! \} - - //! \name Instruction Options - //! \{ - - inline uint32_t options() const noexcept { return _options; } - inline void setOptions(uint32_t options) noexcept { _options = options; } - inline void addOptions(uint32_t options) noexcept { _options |= options; } - inline void clearOptions(uint32_t options) noexcept { _options &= ~options; } - inline void resetOptions() noexcept { _options = 0; } - - //! \} - - //! \name Extra Register - //! \{ - - inline bool hasExtraReg() const noexcept { return _extraReg.isReg(); } - inline RegOnly& extraReg() noexcept { return _extraReg; } - inline const RegOnly& extraReg() const noexcept { return _extraReg; } - inline void setExtraReg(const BaseReg& reg) noexcept { _extraReg.init(reg); } - inline void setExtraReg(const RegOnly& reg) noexcept { _extraReg.init(reg); } - inline void resetExtraReg() noexcept { _extraReg.reset(); } + //! Returns the number of operands this instruction has. + inline uint32_t opCount() const noexcept { return _opCount; } //! \} }; @@ -430,6 +512,12 @@ class BaseInst { //! Instruction API. namespace InstAPI { +//! Validation flags that can be used with \ref InstAPI::validate(). +enum ValidationFlags : uint32_t { + //! Allow virtual registers in the instruction. + kValidationFlagVirtRegs = 0x01u +}; + #ifndef ASMJIT_NO_TEXT //! Appends the name of the instruction specified by `instId` and `instOptions` //! into the `output` string. @@ -437,27 +525,29 @@ namespace InstAPI { //! \note Instruction options would only affect instruction prefix & suffix, //! other options would be ignored. If `instOptions` is zero then only raw //! instruction name (without any additional text) will be appended. -ASMJIT_API Error instIdToString(uint32_t archId, uint32_t instId, String& output) noexcept; +ASMJIT_API Error instIdToString(uint32_t arch, uint32_t instId, String& output) noexcept; //! Parses an instruction name in the given string `s`. Length is specified //! by `len` argument, which can be `SIZE_MAX` if `s` is known to be null //! terminated. //! -//! The output is stored in `instId`. -ASMJIT_API uint32_t stringToInstId(uint32_t archId, const char* s, size_t len) noexcept; +//! Returns the parsed instruction id or \ref BaseInst::kIdNone if no such +//! instruction exists. +ASMJIT_API uint32_t stringToInstId(uint32_t arch, const char* s, size_t len) noexcept; #endif // !ASMJIT_NO_TEXT #ifndef ASMJIT_NO_VALIDATION -//! Validates the given instruction. -ASMJIT_API Error validate(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept; +//! Validates the given instruction considering the validation `flags`, see +//! \ref ValidationFlags. +ASMJIT_API Error validate(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, uint32_t validationFlags = 0) noexcept; #endif // !ASMJIT_NO_VALIDATION #ifndef ASMJIT_NO_INTROSPECTION //! Gets Read/Write information of the given instruction. -ASMJIT_API Error queryRWInfo(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, InstRWInfo& out) noexcept; +ASMJIT_API Error queryRWInfo(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept; //! Gets CPU features required by the given instruction. -ASMJIT_API Error queryFeatures(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, BaseFeatures& out) noexcept; +ASMJIT_API Error queryFeatures(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, BaseFeatures* out) noexcept; #endif // !ASMJIT_NO_INTROSPECTION } // {InstAPI} diff --git a/libs/asmjit/src/asmjit/core/jitallocator.cpp b/libs/asmjit/src/asmjit/core/jitallocator.cpp index a8ca0c3..6a1f8fd 100644 --- a/libs/asmjit/src/asmjit/core/jitallocator.cpp +++ b/libs/asmjit/src/asmjit/core/jitallocator.cpp @@ -24,9 +24,9 @@ #include "../core/api-build_p.h" #ifndef ASMJIT_NO_JIT -#include "../core/arch.h" +#include "../core/archtraits.h" #include "../core/jitallocator.h" -#include "../core/osutils.h" +#include "../core/osutils_p.h" #include "../core/support.h" #include "../core/virtmem.h" #include "../core/zone.h" @@ -938,7 +938,7 @@ class Random { // Helper class to verify that JitAllocator doesn't return addresses that overlap. class JitAllocatorWrapper { public: - explicit inline JitAllocatorWrapper(const JitAllocator::CreateParams* params) noexcept + inline explicit JitAllocatorWrapper(const JitAllocator::CreateParams* params) noexcept : _zone(1024 * 1024), _heap(&_zone), _allocator(params) {} diff --git a/libs/asmjit/src/asmjit/core/jitallocator.h b/libs/asmjit/src/asmjit/core/jitallocator.h index 111716e..9cd0a1f 100644 --- a/libs/asmjit/src/asmjit/core/jitallocator.h +++ b/libs/asmjit/src/asmjit/core/jitallocator.h @@ -32,7 +32,7 @@ ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_jit +//! \addtogroup asmjit_virtual_memory //! \{ // ============================================================================ @@ -132,9 +132,6 @@ class JitAllocator { //! JitAllocator allocator(¶ms); //! ``` struct CreateParams { - // Reset the content of `CreateParams`. - inline void reset() noexcept { memset(this, 0, sizeof(*this)); } - //! Allocator options, see \ref JitAllocator::Options. //! //! No options are used by default. @@ -161,6 +158,9 @@ class JitAllocator { //! //! Only used if \ref kOptionCustomFillPattern is set. uint32_t fillPattern; + + // Reset the content of `CreateParams`. + inline void reset() noexcept { memset(this, 0, sizeof(*this)); } }; //! Creates a `JitAllocator` instance. @@ -221,6 +221,15 @@ class JitAllocator { //! Statistics about `JitAllocator`. struct Statistics { + //! Number of blocks `JitAllocator` maintains. + size_t _blockCount; + //! How many bytes are currently used / allocated. + size_t _usedSize; + //! How many bytes are currently reserved by the allocator. + size_t _reservedSize; + //! Allocation overhead (in bytes) required to maintain all blocks. + size_t _overheadSize; + inline void reset() noexcept { _blockCount = 0; _usedSize = 0; @@ -251,15 +260,6 @@ class JitAllocator { inline double overheadSizeAsPercent() const noexcept { return (double(overheadSize()) / (double(reservedSize()) + 1e-16)) * 100.0; } - - //! Number of blocks `JitAllocator` maintains. - size_t _blockCount; - //! How many bytes are currently used / allocated. - size_t _usedSize; - //! How many bytes are currently reserved by the allocator. - size_t _reservedSize; - //! Allocation overhead (in bytes) required to maintain all blocks. - size_t _overheadSize; }; //! Returns JIT allocator statistics. diff --git a/libs/asmjit/src/asmjit/core/jitruntime.cpp b/libs/asmjit/src/asmjit/core/jitruntime.cpp index 625cc3d..381979e 100644 --- a/libs/asmjit/src/asmjit/core/jitruntime.cpp +++ b/libs/asmjit/src/asmjit/core/jitruntime.cpp @@ -35,42 +35,19 @@ ASMJIT_BEGIN_NAMESPACE // Only useful on non-x86 architectures. static inline void JitRuntime_flushInstructionCache(const void* p, size_t size) noexcept { -#if defined(_WIN32) && !ASMJIT_ARCH_X86 +#if ASMJIT_ARCH_X86 + DebugUtils::unused(p, size); +#else +# if defined(_WIN32) // Windows has a built-in support in `kernel32.dll`. ::FlushInstructionCache(::GetCurrentProcess(), p, size); -#else +# elif defined(__GNUC__) + void* start = const_cast(p); + void* end = static_cast(start) + size; + __builtin___clear_cache(start, end); +# else DebugUtils::unused(p, size); -#endif -} - -// X86 Target -// ---------- -// -// - 32-bit - Linux, OSX, BSD, and apparently also Haiku guarantee 16-byte -// stack alignment. Other operating systems are assumed to have -// 4-byte alignment by default for safety reasons. -// - 64-bit - stack must be aligned to 16 bytes. -// -// ARM Target -// ---------- -// -// - 32-bit - Stack must be aligned to 8 bytes. -// - 64-bit - Stack must be aligned to 16 bytes (hardware requirement). -static inline uint32_t JitRuntime_detectNaturalStackAlignment() noexcept { -#if ASMJIT_ARCH_BITS == 64 || \ - defined(__APPLE__ ) || \ - defined(__DragonFly__) || \ - defined(__HAIKU__ ) || \ - defined(__FreeBSD__ ) || \ - defined(__NetBSD__ ) || \ - defined(__OpenBSD__ ) || \ - defined(__bsdi__ ) || \ - defined(__linux__ ) - return 16; -#elif ASMJIT_ARCH_ARM - return 8; -#else - return uint32_t(sizeof(uintptr_t)); +# endif #endif } @@ -80,15 +57,10 @@ static inline uint32_t JitRuntime_detectNaturalStackAlignment() noexcept { JitRuntime::JitRuntime(const JitAllocator::CreateParams* params) noexcept : _allocator(params) { - - // Setup target properties. - _targetType = kTargetJit; - _codeInfo._archInfo = CpuInfo::host().archInfo(); - _codeInfo._stackAlignment = uint8_t(JitRuntime_detectNaturalStackAlignment()); - _codeInfo._cdeclCallConv = CallConv::kIdHostCDecl; - _codeInfo._stdCallConv = CallConv::kIdHostStdCall; - _codeInfo._fastCallConv = CallConv::kIdHostFastCall; + _environment = hostEnvironment(); + _environment.setFormat(Environment::kFormatJIT); } + JitRuntime::~JitRuntime() noexcept {} // ============================================================================ diff --git a/libs/asmjit/src/asmjit/core/jitruntime.h b/libs/asmjit/src/asmjit/core/jitruntime.h index 97f26e7..91880e6 100644 --- a/libs/asmjit/src/asmjit/core/jitruntime.h +++ b/libs/asmjit/src/asmjit/core/jitruntime.h @@ -35,7 +35,7 @@ ASMJIT_BEGIN_NAMESPACE class CodeHolder; -//! \addtogroup asmjit_jit +//! \addtogroup asmjit_virtual_memory //! \{ // ============================================================================ diff --git a/libs/asmjit/src/asmjit/core/logger.cpp b/libs/asmjit/src/asmjit/core/logger.cpp new file mode 100644 index 0000000..22e0b9a --- /dev/null +++ b/libs/asmjit/src/asmjit/core/logger.cpp @@ -0,0 +1,124 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#ifndef ASMJIT_NO_LOGGING + +#include "../core/logger.h" +#include "../core/string.h" +#include "../core/support.h" + +ASMJIT_BEGIN_NAMESPACE + +// ============================================================================ +// [asmjit::Logger - Construction / Destruction] +// ============================================================================ + +Logger::Logger() noexcept + : _options() {} +Logger::~Logger() noexcept {} + +// ============================================================================ +// [asmjit::Logger - Logging] +// ============================================================================ + +Error Logger::logf(const char* fmt, ...) noexcept { + Error err; + va_list ap; + + va_start(ap, fmt); + err = logv(fmt, ap); + va_end(ap); + + return err; +} + +Error Logger::logv(const char* fmt, va_list ap) noexcept { + StringTmp<2048> sb; + ASMJIT_PROPAGATE(sb.appendVFormat(fmt, ap)); + return log(sb); +} + +Error Logger::logBinary(const void* data, size_t size) noexcept { + static const char prefix[] = "db "; + + StringTmp<256> sb; + sb.append(prefix, ASMJIT_ARRAY_SIZE(prefix) - 1); + + size_t i = size; + const uint8_t* s = static_cast(data); + + while (i) { + uint32_t n = uint32_t(Support::min(i, 16)); + sb.truncate(ASMJIT_ARRAY_SIZE(prefix) - 1); + sb.appendHex(s, n); + sb.append('\n'); + ASMJIT_PROPAGATE(log(sb)); + s += n; + i -= n; + } + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::FileLogger - Construction / Destruction] +// ============================================================================ + +FileLogger::FileLogger(FILE* file) noexcept + : _file(file) {} +FileLogger::~FileLogger() noexcept {} + +// ============================================================================ +// [asmjit::FileLogger - Logging] +// ============================================================================ + +Error FileLogger::_log(const char* data, size_t size) noexcept { + if (!_file) + return kErrorOk; + + if (size == SIZE_MAX) + size = strlen(data); + + fwrite(data, 1, size, _file); + return kErrorOk; +} + +// ============================================================================ +// [asmjit::StringLogger - Construction / Destruction] +// ============================================================================ + +StringLogger::StringLogger() noexcept {} +StringLogger::~StringLogger() noexcept {} + +// ============================================================================ +// [asmjit::StringLogger - Logging] +// ============================================================================ + +Error StringLogger::_log(const char* data, size_t size) noexcept { + return _content.append(data, size); +} + +ASMJIT_END_NAMESPACE + +#endif diff --git a/libs/asmjit/src/asmjit/core/logging.h b/libs/asmjit/src/asmjit/core/logger.h similarity index 54% rename from libs/asmjit/src/asmjit/core/logging.h rename to libs/asmjit/src/asmjit/core/logger.h index 468e3a1..2840869 100644 --- a/libs/asmjit/src/asmjit/core/logging.h +++ b/libs/asmjit/src/asmjit/core/logger.h @@ -26,119 +26,28 @@ #include "../core/inst.h" #include "../core/string.h" - -ASMJIT_BEGIN_NAMESPACE - -//! \addtogroup asmjit_core -//! \{ +#include "../core/formatter.h" #ifndef ASMJIT_NO_LOGGING -// ============================================================================ -// [Forward Declarations] -// ============================================================================ - -class BaseEmitter; -class BaseReg; -class Logger; -struct Operand_; - -#ifndef ASMJIT_NO_BUILDER -class BaseBuilder; -class BaseNode; -#endif - -// ============================================================================ -// [asmjit::FormatOptions] -// ============================================================================ - -class FormatOptions { -public: - uint32_t _flags; - uint8_t _indentation[4]; - - enum Flags : uint32_t { - //! Show also binary form of each logged instruction (assembler). - kFlagMachineCode = 0x00000001u, - //! Show a text explanation of some immediate values. - kFlagExplainImms = 0x00000002u, - //! Use hexadecimal notation of immediate values. - kFlagHexImms = 0x00000004u, - //! Use hexadecimal notation of address offsets. - kFlagHexOffsets = 0x00000008u, - //! Show casts between virtual register types (compiler). - kFlagRegCasts = 0x00000010u, - //! Show positions associated with nodes (compiler). - kFlagPositions = 0x00000020u, - //! Annotate nodes that are lowered by passes. - kFlagAnnotations = 0x00000040u, - - // TODO: These must go, keep this only for formatting. - //! Show an additional output from passes. - kFlagDebugPasses = 0x00000080u, - //! Show an additional output from RA. - kFlagDebugRA = 0x00000100u - }; - - enum IndentationType : uint32_t { - //! Indentation used for instructions and directives. - kIndentationCode = 0u, - //! Indentation used for labels and function nodes. - kIndentationLabel = 1u, - //! Indentation used for comments (not inline comments). - kIndentationComment = 2u, - kIndentationReserved = 3u - }; - - //! \name Construction & Destruction - //! \{ - - constexpr FormatOptions() noexcept - : _flags(0), - _indentation { 0, 0, 0, 0 } {} - - constexpr FormatOptions(const FormatOptions& other) noexcept = default; - inline FormatOptions& operator=(const FormatOptions& other) noexcept = default; - - inline void reset() noexcept { - _flags = 0; - _indentation[0] = 0; - _indentation[1] = 0; - _indentation[2] = 0; - _indentation[3] = 0; - } - - //! \} - - //! \name Accessors - //! \{ - - constexpr uint32_t flags() const noexcept { return _flags; } - constexpr bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; } - inline void setFlags(uint32_t flags) noexcept { _flags = flags; } - inline void addFlags(uint32_t flags) noexcept { _flags |= flags; } - inline void clearFlags(uint32_t flags) noexcept { _flags &= ~flags; } - - constexpr uint8_t indentation(uint32_t type) const noexcept { return _indentation[type]; } - inline void setIndentation(uint32_t type, uint32_t n) noexcept { _indentation[type] = uint8_t(n); } - inline void resetIndentation(uint32_t type) noexcept { _indentation[type] = uint8_t(0); } +ASMJIT_BEGIN_NAMESPACE - //! \} -}; +//! \addtogroup asmjit_logging +//! \{ // ============================================================================ // [asmjit::Logger] // ============================================================================ -//! Abstract logging interface and helpers. +//! Logging interface. //! -//! This class can be inherited and reimplemented to fit into your logging -//! subsystem. When reimplementing use `Logger::_log()` method to log into -//! a custom stream. +//! This class can be inherited and reimplemented to fit into your own logging +//! needs. When reimplementing a logger use \ref Logger::_log() method to log +//! customize the output. //! //! There are two `Logger` implementations offered by AsmJit: -//! - `FileLogger` - allows to log into `FILE*`. -//! - `StringLogger` - logs into a `String`. +//! - \ref FileLogger - logs into a `FILE*`. +//! - \ref StringLogger - concatenates all logs into a \ref String. class ASMJIT_VIRTAPI Logger { public: ASMJIT_BASE_CLASS(Logger) @@ -160,17 +69,28 @@ class ASMJIT_VIRTAPI Logger { //! \name Format Options //! \{ + //! Returns \ref FormatOptions of this logger. inline FormatOptions& options() noexcept { return _options; } + //! \overload inline const FormatOptions& options() const noexcept { return _options; } + //! Returns formatting flags, see \ref FormatOptions::Flags. inline uint32_t flags() const noexcept { return _options.flags(); } + //! Tests whether the logger has the given `flag` enabled. inline bool hasFlag(uint32_t flag) const noexcept { return _options.hasFlag(flag); } + //! Sets formatting flags to `flags`, see \ref FormatOptions::Flags. inline void setFlags(uint32_t flags) noexcept { _options.setFlags(flags); } + //! Enables the given formatting `flags`, see \ref FormatOptions::Flags. inline void addFlags(uint32_t flags) noexcept { _options.addFlags(flags); } + //! Disables the given formatting `flags`, see \ref FormatOptions::Flags. inline void clearFlags(uint32_t flags) noexcept { _options.clearFlags(flags); } + //! Returns indentation of `type`, see \ref FormatOptions::IndentationType. inline uint32_t indentation(uint32_t type) const noexcept { return _options.indentation(type); } + //! Sets indentation of the given indentation `type` to `n` spaces, see \ref + //! FormatOptions::IndentationType. inline void setIndentation(uint32_t type, uint32_t n) noexcept { _options.setIndentation(type, n); } + //! Resets indentation of the given indentation `type` to 0 spaces. inline void resetIndentation(uint32_t type) noexcept { _options.resetIndentation(type); } //! \} @@ -179,6 +99,11 @@ class ASMJIT_VIRTAPI Logger { //! \{ //! Logs `str` - must be reimplemented. + //! + //! The function can accept either a null terminated string if `size` is + //! `SIZE_MAX` or a non-null terminated string of the given `size`. The + //! function cannot assume that the data is null terminated and must handle + //! non-null terminated inputs. virtual Error _log(const char* data, size_t size) noexcept = 0; //! Logs string `str`, which is either null terminated or having size `size`. @@ -186,15 +111,15 @@ class ASMJIT_VIRTAPI Logger { //! Logs content of a string `str`. inline Error log(const String& str) noexcept { return _log(str.data(), str.size()); } - //! Formats the message by using `snprintf()` and then sends the result - //! to `log()`. + //! Formats the message by using `snprintf()` and then passes the formatted + //! string to \ref _log(). ASMJIT_API Error logf(const char* fmt, ...) noexcept; - //! Formats the message by using `vsnprintf()` and then sends the result - //! to `log()`. + //! Formats the message by using `vsnprintf()` and then passes the formatted + //! string to \ref _log(). ASMJIT_API Error logv(const char* fmt, va_list ap) noexcept; - //! Logs binary data. + //! Logs binary `data` of the given `size`. ASMJIT_API Error logBinary(const void* data, size_t size) noexcept; //! \} @@ -267,6 +192,13 @@ class ASMJIT_VIRTAPI StringLogger : public Logger { //! \name Logger Data Accessors //! \{ + //! Returns the content of the logger as \ref String. + //! + //! It can be moved, if desired. + inline String& content() noexcept { return _content; } + //! \overload + inline const String& content() const noexcept { return _content; } + //! Returns aggregated logger data as `char*` pointer. //! //! The pointer is owned by `StringLogger`, it can't be modified or freed. @@ -287,69 +219,10 @@ class ASMJIT_VIRTAPI StringLogger : public Logger { ASMJIT_API Error _log(const char* data, size_t size = SIZE_MAX) noexcept override; }; -// ============================================================================ -// [asmjit::Logging] -// ============================================================================ - -struct Logging { - ASMJIT_API static Error formatRegister( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t archId, - uint32_t regType, - uint32_t regId) noexcept; - - ASMJIT_API static Error formatLabel( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t labelId) noexcept; - - ASMJIT_API static Error formatOperand( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t archId, - const Operand_& op) noexcept; - - ASMJIT_API static Error formatInstruction( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t archId, - const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept; - - ASMJIT_API static Error formatTypeId( - String& sb, - uint32_t typeId) noexcept; - -#ifndef ASMJIT_NO_BUILDER - ASMJIT_API static Error formatNode( - String& sb, - uint32_t flags, - const BaseBuilder* cb, - const BaseNode* node_) noexcept; -#endif - - // Only used by AsmJit internals, not available to users. -#ifdef ASMJIT_EXPORTS - enum { - // Has to be big to be able to hold all metadata compiler can assign to a - // single instruction. - kMaxInstLineSize = 44, - kMaxBinarySize = 26 - }; - - static Error formatLine( - String& sb, - const uint8_t* binData, size_t binSize, size_t dispSize, size_t immSize, const char* comment) noexcept; -#endif -}; -#endif - //! \} ASMJIT_END_NAMESPACE +#endif + #endif // ASMJIT_CORE_LOGGER_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/logging.cpp b/libs/asmjit/src/asmjit/core/logging.cpp deleted file mode 100644 index 7e10af2..0000000 --- a/libs/asmjit/src/asmjit/core/logging.cpp +++ /dev/null @@ -1,535 +0,0 @@ -// AsmJit - Machine code generation for C++ -// -// * Official AsmJit Home Page: https://asmjit.com -// * Official Github Repository: https://github.com/asmjit/asmjit -// -// Copyright (c) 2008-2020 The AsmJit Authors -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. - -#include "../core/api-build_p.h" -#ifndef ASMJIT_NO_LOGGING - -#include "../core/builder.h" -#include "../core/codeholder.h" -#include "../core/compiler.h" -#include "../core/emitter.h" -#include "../core/logging.h" -#include "../core/string.h" -#include "../core/support.h" -#include "../core/type.h" - -#ifdef ASMJIT_BUILD_X86 - #include "../x86/x86logging_p.h" -#endif - -#ifdef ASMJIT_BUILD_ARM - #include "../arm/armlogging_p.h" -#endif - -ASMJIT_BEGIN_NAMESPACE - -#if defined(ASMJIT_NO_COMPILER) -class VirtReg; -#endif - -// ============================================================================ -// [asmjit::Logger - Construction / Destruction] -// ============================================================================ - -Logger::Logger() noexcept - : _options() {} -Logger::~Logger() noexcept {} - -// ============================================================================ -// [asmjit::Logger - Logging] -// ============================================================================ - -Error Logger::logf(const char* fmt, ...) noexcept { - Error err; - va_list ap; - - va_start(ap, fmt); - err = logv(fmt, ap); - va_end(ap); - - return err; -} - -Error Logger::logv(const char* fmt, va_list ap) noexcept { - StringTmp<2048> sb; - ASMJIT_PROPAGATE(sb.appendVFormat(fmt, ap)); - return log(sb); -} - -Error Logger::logBinary(const void* data, size_t size) noexcept { - static const char prefix[] = "db "; - - StringTmp<256> sb; - sb.appendString(prefix, ASMJIT_ARRAY_SIZE(prefix) - 1); - - size_t i = size; - const uint8_t* s = static_cast(data); - - while (i) { - uint32_t n = uint32_t(Support::min(i, 16)); - sb.truncate(ASMJIT_ARRAY_SIZE(prefix) - 1); - sb.appendHex(s, n); - sb.appendChar('\n'); - ASMJIT_PROPAGATE(log(sb)); - s += n; - i -= n; - } - - return kErrorOk; -} - -// ============================================================================ -// [asmjit::FileLogger - Construction / Destruction] -// ============================================================================ - -FileLogger::FileLogger(FILE* file) noexcept - : _file(nullptr) { setFile(file); } -FileLogger::~FileLogger() noexcept {} - -// ============================================================================ -// [asmjit::FileLogger - Logging] -// ============================================================================ - -Error FileLogger::_log(const char* data, size_t size) noexcept { - if (!_file) - return kErrorOk; - - if (size == SIZE_MAX) - size = strlen(data); - - fwrite(data, 1, size, _file); - return kErrorOk; -} - -// ============================================================================ -// [asmjit::StringLogger - Construction / Destruction] -// ============================================================================ - -StringLogger::StringLogger() noexcept {} -StringLogger::~StringLogger() noexcept {} - -// ============================================================================ -// [asmjit::StringLogger - Logging] -// ============================================================================ - -Error StringLogger::_log(const char* data, size_t size) noexcept { - return _content.appendString(data, size); -} - -// ============================================================================ -// [asmjit::Logging] -// ============================================================================ - -Error Logging::formatLabel( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t labelId) noexcept { - - DebugUtils::unused(flags); - - const LabelEntry* le = emitter->code()->labelEntry(labelId); - if (ASMJIT_UNLIKELY(!le)) - return sb.appendFormat("InvalidLabel[Id=%u]", labelId); - - if (le->hasName()) { - if (le->hasParent()) { - uint32_t parentId = le->parentId(); - const LabelEntry* pe = emitter->code()->labelEntry(parentId); - - if (ASMJIT_UNLIKELY(!pe)) - ASMJIT_PROPAGATE(sb.appendFormat("InvalidLabel[Id=%u]", labelId)); - else if (ASMJIT_UNLIKELY(!pe->hasName())) - ASMJIT_PROPAGATE(sb.appendFormat("L%u", parentId)); - else - ASMJIT_PROPAGATE(sb.appendString(pe->name())); - - ASMJIT_PROPAGATE(sb.appendChar('.')); - } - return sb.appendString(le->name()); - } - else { - return sb.appendFormat("L%u", labelId); - } -} - -Error Logging::formatRegister( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t archId, - uint32_t regType, - uint32_t regId) noexcept { - -#ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId)) - return x86::LoggingInternal::formatRegister(sb, flags, emitter, archId, regType, regId); -#endif - -#ifdef ASMJIT_BUILD_ARM - if (ArchInfo::isArmFamily(archId)) - return arm::LoggingInternal::formatRegister(sb, flags, emitter, archId, regType, regId); -#endif - - return kErrorInvalidArch; -} - -Error Logging::formatOperand( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t archId, - const Operand_& op) noexcept { - -#ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId)) - return x86::LoggingInternal::formatOperand(sb, flags, emitter, archId, op); -#endif - -#ifdef ASMJIT_BUILD_ARM - if (ArchInfo::isArmFamily(archId)) - return arm::LoggingInternal::formatOperand(sb, flags, emitter, archId, op); -#endif - - return kErrorInvalidArch; -} - -Error Logging::formatInstruction( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t archId, - const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept { - -#ifdef ASMJIT_BUILD_X86 - if (ArchInfo::isX86Family(archId)) - return x86::LoggingInternal::formatInstruction(sb, flags, emitter, archId, inst, operands, opCount); -#endif - -#ifdef ASMJIT_BUILD_ARM - if (ArchInfo::isArmFamily(archId)) - return arm::LoggingInternal::formatInstruction(sb, flags, emitter, archId, inst, operands, opCount); -#endif - - return kErrorInvalidArch; -} - -Error Logging::formatTypeId(String& sb, uint32_t typeId) noexcept { - if (typeId == Type::kIdVoid) - return sb.appendString("void"); - - if (!Type::isValid(typeId)) - return sb.appendString("unknown"); - - const char* typeName = "unknown"; - uint32_t typeSize = Type::sizeOf(typeId); - - uint32_t baseId = Type::baseOf(typeId); - switch (baseId) { - case Type::kIdIntPtr : typeName = "iptr" ; break; - case Type::kIdUIntPtr: typeName = "uptr" ; break; - case Type::kIdI8 : typeName = "i8" ; break; - case Type::kIdU8 : typeName = "u8" ; break; - case Type::kIdI16 : typeName = "i16" ; break; - case Type::kIdU16 : typeName = "u16" ; break; - case Type::kIdI32 : typeName = "i32" ; break; - case Type::kIdU32 : typeName = "u32" ; break; - case Type::kIdI64 : typeName = "i64" ; break; - case Type::kIdU64 : typeName = "u64" ; break; - case Type::kIdF32 : typeName = "f32" ; break; - case Type::kIdF64 : typeName = "f64" ; break; - case Type::kIdF80 : typeName = "f80" ; break; - case Type::kIdMask8 : typeName = "mask8" ; break; - case Type::kIdMask16 : typeName = "mask16"; break; - case Type::kIdMask32 : typeName = "mask32"; break; - case Type::kIdMask64 : typeName = "mask64"; break; - case Type::kIdMmx32 : typeName = "mmx32" ; break; - case Type::kIdMmx64 : typeName = "mmx64" ; break; - } - - uint32_t baseSize = Type::sizeOf(baseId); - if (typeSize > baseSize) { - uint32_t count = typeSize / baseSize; - return sb.appendFormat("%sx%u", typeName, unsigned(count)); - } - else { - return sb.appendString(typeName); - } - -} - -#ifndef ASMJIT_NO_BUILDER -static Error formatFuncValue(String& sb, uint32_t flags, const BaseEmitter* emitter, FuncValue value) noexcept { - uint32_t typeId = value.typeId(); - ASMJIT_PROPAGATE(Logging::formatTypeId(sb, typeId)); - - if (value.isReg()) { - ASMJIT_PROPAGATE(sb.appendChar('@')); - ASMJIT_PROPAGATE(Logging::formatRegister(sb, flags, emitter, emitter->archId(), value.regType(), value.regId())); - } - - if (value.isStack()) { - ASMJIT_PROPAGATE(sb.appendFormat("@[%d]", int(value.stackOffset()))); - } - - return kErrorOk; -} - -static Error formatFuncRets( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - const FuncDetail& fd, - VirtReg* const* vRegs) noexcept { - - if (!fd.hasRet()) - return sb.appendString("void"); - - for (uint32_t i = 0; i < fd.retCount(); i++) { - if (i) ASMJIT_PROPAGATE(sb.appendString(", ")); - ASMJIT_PROPAGATE(formatFuncValue(sb, flags, emitter, fd.ret(i))); - -#ifndef ASMJIT_NO_COMPILER - if (vRegs) { - static const char nullRet[] = ""; - ASMJIT_PROPAGATE(sb.appendFormat(" %s", vRegs[i] ? vRegs[i]->name() : nullRet)); - } -#else - DebugUtils::unused(vRegs); -#endif - } - - return kErrorOk; -} - -static Error formatFuncArgs( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - const FuncDetail& fd, - VirtReg* const* vRegs) noexcept { - - uint32_t count = fd.argCount(); - if (!count) - return sb.appendString("void"); - - for (uint32_t i = 0; i < count; i++) { - if (i) ASMJIT_PROPAGATE(sb.appendString(", ")); - ASMJIT_PROPAGATE(formatFuncValue(sb, flags, emitter, fd.arg(i))); - -#ifndef ASMJIT_NO_COMPILER - if (vRegs) { - static const char nullArg[] = ""; - ASMJIT_PROPAGATE(sb.appendFormat(" %s", vRegs[i] ? vRegs[i]->name() : nullArg)); - } -#else - DebugUtils::unused(vRegs); -#endif - } - - return kErrorOk; -} - -Error Logging::formatNode( - String& sb, - uint32_t flags, - const BaseBuilder* cb, - const BaseNode* node_) noexcept { - - if (node_->hasPosition() && (flags & FormatOptions::kFlagPositions) != 0) - ASMJIT_PROPAGATE(sb.appendFormat("<%05u> ", node_->position())); - - switch (node_->type()) { - case BaseNode::kNodeInst: - case BaseNode::kNodeJump: { - const InstNode* node = node_->as(); - ASMJIT_PROPAGATE( - Logging::formatInstruction(sb, flags, cb, - cb->archId(), - node->baseInst(), node->operands(), node->opCount())); - break; - } - - case BaseNode::kNodeSection: { - const SectionNode* node = node_->as(); - if (cb->_code->isSectionValid(node->id())) { - const Section* section = cb->_code->sectionById(node->id()); - ASMJIT_PROPAGATE(sb.appendFormat(".section %s", section->name())); - } - break; - } - - case BaseNode::kNodeLabel: { - const LabelNode* node = node_->as(); - ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->id())); - ASMJIT_PROPAGATE(sb.appendString(":")); - break; - } - - case BaseNode::kNodeAlign: { - const AlignNode* node = node_->as(); - ASMJIT_PROPAGATE( - sb.appendFormat(".align %u (%s)", - node->alignment(), - node->alignMode() == kAlignCode ? "code" : "data")); - break; - } - - case BaseNode::kNodeEmbedData: { - const EmbedDataNode* node = node_->as(); - ASMJIT_PROPAGATE(sb.appendFormat(".embed (%u bytes)", node->size())); - break; - } - - case BaseNode::kNodeEmbedLabel: { - const EmbedLabelNode* node = node_->as(); - ASMJIT_PROPAGATE(sb.appendString(".label ")); - ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->id())); - break; - } - - case BaseNode::kNodeEmbedLabelDelta: { - const EmbedLabelDeltaNode* node = node_->as(); - ASMJIT_PROPAGATE(sb.appendString(".label (")); - ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->id())); - ASMJIT_PROPAGATE(sb.appendString(" - ")); - ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->baseId())); - ASMJIT_PROPAGATE(sb.appendString(")")); - break; - } - - case BaseNode::kNodeComment: { - const CommentNode* node = node_->as(); - ASMJIT_PROPAGATE(sb.appendFormat("; %s", node->inlineComment())); - break; - } - - case BaseNode::kNodeSentinel: { - const SentinelNode* node = node_->as(); - const char* sentinelName = nullptr; - - switch (node->sentinelType()) { - case SentinelNode::kSentinelFuncEnd: - sentinelName = "[FuncEnd]"; - break; - - default: - sentinelName = "[Sentinel]"; - break; - } - - ASMJIT_PROPAGATE(sb.appendString(sentinelName)); - break; - } - -#ifndef ASMJIT_NO_COMPILER - case BaseNode::kNodeFunc: { - const FuncNode* node = node_->as(); - - ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->id())); - ASMJIT_PROPAGATE(sb.appendString(": ")); - - ASMJIT_PROPAGATE(formatFuncRets(sb, flags, cb, node->detail(), nullptr)); - ASMJIT_PROPAGATE(sb.appendString(" Func(")); - ASMJIT_PROPAGATE(formatFuncArgs(sb, flags, cb, node->detail(), node->args())); - ASMJIT_PROPAGATE(sb.appendString(")")); - break; - } - - case BaseNode::kNodeFuncRet: { - const FuncRetNode* node = node_->as(); - ASMJIT_PROPAGATE(sb.appendString("[FuncRet]")); - - for (uint32_t i = 0; i < 2; i++) { - const Operand_& op = node->_opArray[i]; - if (!op.isNone()) { - ASMJIT_PROPAGATE(sb.appendString(i == 0 ? " " : ", ")); - ASMJIT_PROPAGATE(formatOperand(sb, flags, cb, cb->archId(), op)); - } - } - break; - } - - case BaseNode::kNodeFuncCall: { - const FuncCallNode* node = node_->as(); - ASMJIT_PROPAGATE( - Logging::formatInstruction(sb, flags, cb, - cb->archId(), - node->baseInst(), node->operands(), node->opCount())); - break; - } -#endif - - default: { - ASMJIT_PROPAGATE(sb.appendFormat("[User:%u]", node_->type())); - break; - } - } - - return kErrorOk; -} -#endif - -Error Logging::formatLine(String& sb, const uint8_t* binData, size_t binSize, size_t dispSize, size_t immSize, const char* comment) noexcept { - size_t currentSize = sb.size(); - size_t commentSize = comment ? Support::strLen(comment, Globals::kMaxCommentSize) : 0; - - ASMJIT_ASSERT(binSize >= dispSize); - const size_t kNoBinSize = std::numeric_limits::max(); - - if ((binSize != 0 && binSize != kNoBinSize) || commentSize) { - size_t align = kMaxInstLineSize; - char sep = ';'; - - for (size_t i = (binSize == kNoBinSize); i < 2; i++) { - size_t begin = sb.size(); - ASMJIT_PROPAGATE(sb.padEnd(align)); - - if (sep) { - ASMJIT_PROPAGATE(sb.appendChar(sep)); - ASMJIT_PROPAGATE(sb.appendChar(' ')); - } - - // Append binary data or comment. - if (i == 0) { - ASMJIT_PROPAGATE(sb.appendHex(binData, binSize - dispSize - immSize)); - ASMJIT_PROPAGATE(sb.appendChars('.', dispSize * 2)); - ASMJIT_PROPAGATE(sb.appendHex(binData + binSize - immSize, immSize)); - if (commentSize == 0) break; - } - else { - ASMJIT_PROPAGATE(sb.appendString(comment, commentSize)); - } - - currentSize += sb.size() - begin; - align += kMaxBinarySize; - sep = '|'; - } - } - - return sb.appendChar('\n'); -} - -ASMJIT_END_NAMESPACE - -#endif diff --git a/libs/asmjit/src/asmjit/core/misc_p.h b/libs/asmjit/src/asmjit/core/misc_p.h index 916ca9d..225ba6a 100644 --- a/libs/asmjit/src/asmjit/core/misc_p.h +++ b/libs/asmjit/src/asmjit/core/misc_p.h @@ -29,12 +29,14 @@ ASMJIT_BEGIN_NAMESPACE //! \cond INTERNAL -//! \addtogroup asmjit_support +//! \addtogroup asmjit_utilities //! \{ -#define ASMJIT_LOOKUP_TABLE_8(T, I) T((I)), T((I+1)), T((I+2)), T((I+3)), T((I+4)), T((I+5)), T((I+6)), T((I+7)) +#define ASMJIT_LOOKUP_TABLE_4(T, I) T((I)), T((I+1)), T((I+2)), T((I+3)) +#define ASMJIT_LOOKUP_TABLE_8(T, I) ASMJIT_LOOKUP_TABLE_4(T, I), ASMJIT_LOOKUP_TABLE_4(T, I + 4) #define ASMJIT_LOOKUP_TABLE_16(T, I) ASMJIT_LOOKUP_TABLE_8(T, I), ASMJIT_LOOKUP_TABLE_8(T, I + 8) #define ASMJIT_LOOKUP_TABLE_32(T, I) ASMJIT_LOOKUP_TABLE_16(T, I), ASMJIT_LOOKUP_TABLE_16(T, I + 16) +#define ASMJIT_LOOKUP_TABLE_40(T, I) ASMJIT_LOOKUP_TABLE_16(T, I), ASMJIT_LOOKUP_TABLE_16(T, I + 16), ASMJIT_LOOKUP_TABLE_8(T, I + 32) #define ASMJIT_LOOKUP_TABLE_64(T, I) ASMJIT_LOOKUP_TABLE_32(T, I), ASMJIT_LOOKUP_TABLE_32(T, I + 32) #define ASMJIT_LOOKUP_TABLE_128(T, I) ASMJIT_LOOKUP_TABLE_64(T, I), ASMJIT_LOOKUP_TABLE_64(T, I + 64) #define ASMJIT_LOOKUP_TABLE_256(T, I) ASMJIT_LOOKUP_TABLE_128(T, I), ASMJIT_LOOKUP_TABLE_128(T, I + 128) diff --git a/libs/asmjit/src/asmjit/core/operand.cpp b/libs/asmjit/src/asmjit/core/operand.cpp index 9d11f3f..cd5931f 100644 --- a/libs/asmjit/src/asmjit/core/operand.cpp +++ b/libs/asmjit/src/asmjit/core/operand.cpp @@ -68,7 +68,7 @@ UNIT(operand) { uint32_t rSig = Operand::kOpReg | (1 << Operand::kSignatureRegTypeShift ) | (2 << Operand::kSignatureRegGroupShift) | (8 << Operand::kSignatureSizeShift ) ; - BaseReg r1(rSig, 5); + BaseReg r1 = BaseReg::fromSignatureAndId(rSig, 5); EXPECT(r1.isValid() == true); EXPECT(r1.isReg() == true); @@ -126,10 +126,17 @@ UNIT(operand) { INFO("Checking basic functionality of Imm"); Imm immValue(-42); - EXPECT(Imm(-1).i64() == int64_t(-1)); - EXPECT(imm(-1).i64() == int64_t(-1)); - EXPECT(immValue.i64() == int64_t(-42)); - EXPECT(imm(0xFFFFFFFF).i64() == int64_t(0xFFFFFFFF)); + EXPECT(immValue.type() == Imm::kTypeInteger); + EXPECT(Imm(-1).value() == -1); + EXPECT(imm(-1).value() == -1); + EXPECT(immValue.value() == -42); + EXPECT(imm(0xFFFFFFFF).value() == int64_t(0xFFFFFFFF)); + + Imm immDouble(0.4); + EXPECT(immDouble.type() == Imm::kTypeDouble); + EXPECT(immDouble.valueAs() == 0.4); + EXPECT(immDouble == imm(0.4)); + } #endif diff --git a/libs/asmjit/src/asmjit/core/operand.h b/libs/asmjit/src/asmjit/core/operand.h index eee1af4..05e4c0f 100644 --- a/libs/asmjit/src/asmjit/core/operand.h +++ b/libs/asmjit/src/asmjit/core/operand.h @@ -24,6 +24,7 @@ #ifndef ASMJIT_CORE_OPERAND_H_INCLUDED #define ASMJIT_CORE_OPERAND_H_INCLUDED +#include "../core/archcommons.h" #include "../core/support.h" ASMJIT_BEGIN_NAMESPACE @@ -47,7 +48,7 @@ struct RegTraits { \ static constexpr uint32_t kSize = SIZE; \ \ static constexpr uint32_t kSignature = \ - (Operand::kOpReg << Operand::kSignatureOpShift ) | \ + (Operand::kOpReg << Operand::kSignatureOpTypeShift ) | \ (kType << Operand::kSignatureRegTypeShift ) | \ (kGroup << Operand::kSignatureRegGroupShift) | \ (kSize << Operand::kSignatureSizeShift ) ; \ @@ -60,7 +61,7 @@ struct RegTraits { \ public: \ /*! Default constructor that only setups basics. */ \ constexpr REG() noexcept \ - : BASE(kSignature, kIdBad) {} \ + : BASE(SignatureAndId(kSignature, kIdBad)) {} \ \ /*! Makes a copy of the `other` register operand. */ \ constexpr REG(const REG& other) noexcept \ @@ -71,8 +72,8 @@ public: \ : BASE(other, rId) {} \ \ /*! Creates a register based on `signature` and `rId`. */ \ - constexpr REG(uint32_t signature, uint32_t rId) noexcept \ - : BASE(signature, rId) {} \ + constexpr explicit REG(const SignatureAndId& sid) noexcept \ + : BASE(sid) {} \ \ /*! Creates a completely uninitialized REG register operand (garbage). */ \ inline explicit REG(Globals::NoInit_) noexcept \ @@ -80,7 +81,12 @@ public: \ \ /*! Creates a new register from register type and id. */ \ static inline REG fromTypeAndId(uint32_t rType, uint32_t rId) noexcept { \ - return REG(signatureOf(rType), rId); \ + return REG(SignatureAndId(signatureOf(rType), rId)); \ + } \ + \ + /*! Creates a new register from register signature and id. */ \ + static inline REG fromSignatureAndId(uint32_t rSgn, uint32_t rId) noexcept {\ + return REG(SignatureAndId(rSgn, rId)); \ } \ \ /*! Clones the register operand. */ \ @@ -101,9 +107,9 @@ public: \ \ /*! Creates a register operand having its id set to `rId`. */ \ constexpr explicit REG(uint32_t rId) noexcept \ - : BASE(kSignature, rId) {} + : BASE(SignatureAndId(kSignature, rId)) {} -//! \addtogroup asmjit_core +//! \addtogroup asmjit_assembler //! \{ // ============================================================================ @@ -162,12 +168,22 @@ struct Operand_ { }; static_assert(kOpMem == kOpReg + 1, "asmjit::Operand requires `kOpMem` to be `kOpReg+1`."); + //! Label tag. + enum LabelTag { + //! Label tag is used as a sub-type, forming a unique signature across all + //! operand types as 0x1 is never associated with any register type. This + //! means that a memory operand's BASE register can be constructed from + //! virtually any operand (register vs. label) by just assigning its type + //! (register type or label-tag) and operand id. + kLabelTag = 0x1 + }; + // \cond INTERNAL enum SignatureBits : uint32_t { // Operand type (3 least significant bits). // |........|........|........|.....XXX| - kSignatureOpShift = 0, - kSignatureOpMask = 0x07u << kSignatureOpShift, + kSignatureOpTypeShift = 0, + kSignatureOpTypeMask = 0x07u << kSignatureOpTypeShift, // Register type (5 bits). // |........|........|........|XXXXX...| @@ -194,16 +210,21 @@ struct Operand_ { kSignatureMemBaseIndexShift = 3, kSignatureMemBaseIndexMask = 0x3FFu << kSignatureMemBaseIndexShift, - // Memory address type (2 bits). - // |........|........|.XX.....|........| - kSignatureMemAddrTypeShift = 13, - kSignatureMemAddrTypeMask = 0x03u << kSignatureMemAddrTypeShift, - - // This memory operand represents a home-slot or stack (BaseCompiler). - // |........|........|X.......|........| - kSignatureMemRegHomeShift = 15, + // This memory operand represents a home-slot or stack (Compiler) (1 bit). + // |........|........|..X.....|........| + kSignatureMemRegHomeShift = 13, kSignatureMemRegHomeFlag = 0x01u << kSignatureMemRegHomeShift, + // Immediate type (1 bit). + // |........|........|........|....X...| + kSignatureImmTypeShift = 4, + kSignatureImmTypeMask = 0x01u << kSignatureImmTypeShift, + + // Predicate used by either registers or immediate values (4 bits). + // |........|XXXX....|........|........| + kSignaturePredicateShift = 20, + kSignaturePredicateMask = 0x0Fu << kSignaturePredicateShift, + // Operand size (8 most significant bits). // |XXXXXXXX|........|........|........| kSignatureSizeShift = 24, @@ -211,7 +232,6 @@ struct Operand_ { }; //! \endcond - //! \cond INTERNAL //! Constants useful for VirtId <-> Index translation. enum VirtIdConstants : uint32_t { //! Minimum valid packed-id. @@ -225,14 +245,12 @@ struct Operand_ { //! Tests whether the given `id` is a valid virtual register id. Since AsmJit //! supports both physical and virtual registers it must be able to distinguish //! between these two. The idea is that physical registers are always limited - //! in size, so virtual identifiers start from `kVirtIdMin` and end at - //! `kVirtIdMax`. + //! in size, so virtual identifiers start from `kVirtIdMin` and end at `kVirtIdMax`. static ASMJIT_INLINE bool isVirtId(uint32_t id) noexcept { return id - kVirtIdMin < uint32_t(kVirtIdCount); } //! Converts a real-id into a packed-id that can be stored in Operand. static ASMJIT_INLINE uint32_t indexToVirtId(uint32_t id) noexcept { return id + kVirtIdMin; } //! Converts a packed-id back to real-id. static ASMJIT_INLINE uint32_t virtIdToIndex(uint32_t id) noexcept { return id - kVirtIdMin; } - //! \endcond //! \name Construction & Destruction //! \{ @@ -245,10 +263,10 @@ struct Operand_ { _data[0] = 0; _data[1] = 0; } + //! \endcond - //! Initializes the operand from `other` (used by operator overloads). + //! Initializes the operand from `other` operand (used by operator overloads). inline void copyFrom(const Operand_& other) noexcept { memcpy(this, &other, sizeof(Operand_)); } - //! \endcond //! Resets the `Operand` to none. //! @@ -290,8 +308,10 @@ struct Operand_ { //! \name Operator Overloads //! \{ - constexpr bool operator==(const Operand_& other) const noexcept { return isEqual(other); } - constexpr bool operator!=(const Operand_& other) const noexcept { return !isEqual(other); } + //! Tests whether this operand is the same as `other`. + constexpr bool operator==(const Operand_& other) const noexcept { return equals(other); } + //! Tests whether this operand is not the same as `other`. + constexpr bool operator!=(const Operand_& other) const noexcept { return !equals(other); } //! \} @@ -311,9 +331,9 @@ struct Operand_ { //! \name Accessors //! \{ - //! Tests whether the operand matches the given signature `sign`. + //! Tests whether the operand's signature matches the given signature `sign`. constexpr bool hasSignature(uint32_t signature) const noexcept { return _signature == signature; } - //! Tests whether the operand matches the signature of the `other` operand. + //! Tests whether the operand's signature matches the signature of the `other` operand. constexpr bool hasSignature(const Operand_& other) const noexcept { return _signature == other.signature(); } //! Returns operand signature as unsigned 32-bit integer. @@ -334,6 +354,11 @@ struct Operand_ { return (_signature & mask) != 0; } + template + constexpr bool _hasSignaturePart(uint32_t signature) const noexcept { + return (_signature & mask) == signature; + } + template constexpr uint32_t _getSignaturePart() const noexcept { return (_signature >> Support::constCtz(mask)) & (mask >> Support::constCtz(mask)); @@ -347,7 +372,7 @@ struct Operand_ { //! \endcond //! Returns the type of the operand, see `OpType`. - constexpr uint32_t opType() const noexcept { return _getSignaturePart(); } + constexpr uint32_t opType() const noexcept { return _getSignaturePart(); } //! Tests whether the operand is none (`kOpNone`). constexpr bool isNone() const noexcept { return _signature == 0; } //! Tests whether the operand is a register (`kOpReg`). @@ -395,18 +420,26 @@ struct Operand_ { //! initialized. constexpr uint32_t id() const noexcept { return _baseId; } - //! Tests whether the operand is 100% equal to `other`. - constexpr bool isEqual(const Operand_& other) const noexcept { + //! Tests whether the operand is 100% equal to `other` operand. + //! + //! \note This basically performs a binary comparison, if aby bit is + //! different the operands are not equal. + constexpr bool equals(const Operand_& other) const noexcept { return (_signature == other._signature) & (_baseId == other._baseId ) & (_data[0] == other._data[0] ) & (_data[1] == other._data[1] ) ; } +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use equals() instead") + constexpr bool isEqual(const Operand_& other) const noexcept { return equals(other); } +#endif //!ASMJIT_NO_DEPRECATED + //! Tests whether the operand is a register matching `rType`. constexpr bool isReg(uint32_t rType) const noexcept { - return (_signature & (kSignatureOpMask | kSignatureRegTypeMask)) == - ((kOpReg << kSignatureOpShift) | (rType << kSignatureRegTypeShift)); + return (_signature & (kSignatureOpTypeMask | kSignatureRegTypeMask)) == + ((kOpReg << kSignatureOpTypeShift) | (rType << kSignatureRegTypeShift)); } //! Tests whether the operand is register and of `rType` and `rId`. @@ -471,11 +504,6 @@ class Operand : public Operand_ { static_assert(sizeof(Operand) == 16, "asmjit::Operand must be exactly 16 bytes long"); -namespace Globals { - //! A default-constructed operand of `Operand_::kOpNone` type. - static constexpr const Operand none; -} - // ============================================================================ // [asmjit::Label] // ============================================================================ @@ -515,18 +543,10 @@ class Label : public Operand { kTypeLocal = 1, //! Global label (never has parentId). kTypeGlobal = 2, + //! External label (references an external symbol). + kTypeExternal = 3, //! Number of label types. - kTypeCount = 3 - }; - - // TODO: Find a better place, find a better name. - enum { - //! Label tag is used as a sub-type, forming a unique signature across all - //! operand types as 0x1 is never associated with any register (reg-type). - //! This means that a memory operand's BASE register can be constructed - //! from virtually any operand (register vs. label) by just assigning its - //! type (reg type or label-tag) and operand id. - kLabelTag = 0x1 + kTypeCount = 4 }; //! \name Construction & Destruction @@ -536,7 +556,7 @@ class Label : public Operand { constexpr Label() noexcept : Operand(Globals::Init, kOpLabel, Globals::kInvalidId, 0, 0) {} - //! Creates a cloned label operand of `other` . + //! Creates a cloned label operand of `other`. constexpr Label(const Label& other) noexcept : Operand(other) {} @@ -596,8 +616,8 @@ struct BaseRegTraits { //! No size by default. static constexpr uint32_t kSize = 0; - //! Empty signature by default. - static constexpr uint32_t kSignature = Operand::kOpReg; + //! Empty signature by default (not even having operand type set to register). + static constexpr uint32_t kSignature = 0; }; //! \endcond @@ -610,7 +630,7 @@ struct BaseRegTraits { //! This information is compatible with operand's signature (32-bit integer) //! and `RegInfo` just provides easy way to access it. struct RegInfo { - inline void reset() noexcept { _signature = 0; } + inline void reset(uint32_t signature = 0) noexcept { _signature = signature; } inline void setSignature(uint32_t signature) noexcept { _signature = signature; } template @@ -620,7 +640,7 @@ struct RegInfo { constexpr bool isValid() const noexcept { return _signature != 0; } constexpr uint32_t signature() const noexcept { return _signature; } - constexpr uint32_t opType() const noexcept { return _getSignaturePart(); } + constexpr uint32_t opType() const noexcept { return _getSignaturePart(); } constexpr uint32_t group() const noexcept { return _getSignaturePart(); } constexpr uint32_t type() const noexcept { return _getSignaturePart(); } constexpr uint32_t size() const noexcept { return _getSignaturePart(); } @@ -628,9 +648,15 @@ struct RegInfo { uint32_t _signature; }; -//! Physical/Virtual register operand. +//! Physical or virtual register operand. class BaseReg : public Operand { public: + static constexpr uint32_t kBaseSignature = + kSignatureOpTypeMask | + kSignatureRegTypeMask | + kSignatureRegGroupMask | + kSignatureSizeMask ; + //! Architecture neutral register types. //! //! These must be reused by any platform that contains that types. All GP @@ -638,7 +664,7 @@ class BaseReg : public Operand { //! of a memory operand. enum RegType : uint32_t { //! No register - unused, invalid, multiple meanings. - kTypeNone = 0, + kTypeNone = 0, // (1 is used as a LabelTag) @@ -652,27 +678,31 @@ class BaseReg : public Operand { kTypeGp32 = 5, //! 64-bit general purpose register (X86|ARM). kTypeGp64 = 6, + //! 8-bit view of a vector register (ARM). + kTypeVec8 = 7, + //! 16-bit view of a vector register (ARM). + kTypeVec16 = 8, //! 32-bit view of a vector register (ARM). - kTypeVec32 = 7, + kTypeVec32 = 9, //! 64-bit view of a vector register (ARM). - kTypeVec64 = 8, + kTypeVec64 = 10, //! 128-bit view of a vector register (X86|ARM). - kTypeVec128 = 9, + kTypeVec128 = 11, //! 256-bit view of a vector register (X86). - kTypeVec256 = 10, + kTypeVec256 = 12, //! 512-bit view of a vector register (X86). - kTypeVec512 = 11, + kTypeVec512 = 13, //! 1024-bit view of a vector register (future). - kTypeVec1024 = 12, + kTypeVec1024 = 14, //! Other0 register, should match `kOther0` group. - kTypeOther0 = 13, + kTypeOther0 = 15, //! Other1 register, should match `kOther1` group. - kTypeOther1 = 14, + kTypeOther1 = 16, //! Universal id of IP/PC register (if separate). - kTypeIP = 15, - //! Start of platform dependent register types (must be honored). - kTypeCustom = 16, - //! Maximum possible register id of all architectures. + kTypeIP = 17, + //! Start of platform dependent register types. + kTypeCustom = 18, + //! Maximum possible register type value. kTypeMax = 31 }; @@ -686,9 +716,9 @@ class BaseReg : public Operand { kGroupOther0 = 2, //! Group that is architecture dependent. kGroupOther1 = 3, - //! Count of register groups used by virtual registers. + //! Count of register groups used by physical and virtual registers. kGroupVirt = 4, - //! Count of register groups used by physical registers. + //! Count of register groups used by physical registers only. kGroupCount = 16 }; @@ -697,6 +727,22 @@ class BaseReg : public Operand { kIdBad = 0xFFu }; + //! A helper used by constructors. + struct SignatureAndId { + uint32_t _signature; + uint32_t _id; + + inline SignatureAndId() noexcept = default; + constexpr SignatureAndId(const SignatureAndId& other) noexcept = default; + + constexpr explicit SignatureAndId(uint32_t signature, uint32_t id) noexcept + : _signature(signature), + _id(id) {} + + constexpr uint32_t signature() const noexcept { return _signature; } + constexpr uint32_t id() const noexcept { return _id; } + }; + static constexpr uint32_t kSignature = kOpReg; //! \name Construction & Destruction @@ -715,12 +761,17 @@ class BaseReg : public Operand { : Operand(Globals::Init, other._signature, rId, 0, 0) {} //! Creates a register initialized to `signature` and `rId`. - constexpr BaseReg(uint32_t signature, uint32_t rId) noexcept - : Operand(Globals::Init, signature, rId, 0, 0) {} + constexpr explicit BaseReg(const SignatureAndId& sid) noexcept + : Operand(Globals::Init, sid._signature, sid._id, 0, 0) {} inline explicit BaseReg(Globals::NoInit_) noexcept : Operand(Globals::NoInit) {} + /*! Creates a new register from register signature `rSgn` and id. */ + static inline BaseReg fromSignatureAndId(uint32_t rSgn, uint32_t rId) noexcept { + return BaseReg(SignatureAndId(rSgn, rId)); + } + //! \} //! \name Overloaded Operators @@ -733,18 +784,32 @@ class BaseReg : public Operand { //! \name Accessors //! \{ + //! Returns base signature of the register associated with each register type. + //! + //! Base signature only contains the operand type, register type, register + //! group, and register size. It doesn't contain element type, predicate, or + //! other architecture-specific data. Base signature is a signature that is + //! provided by architecture-specific `RegTraits`, like \ref x86::RegTraits. + constexpr uint32_t baseSignature() const noexcept { + return _signature & (kBaseSignature); + } + + //! Tests whether the operand's base signature matches the given signature `sign`. + constexpr bool hasBaseSignature(uint32_t signature) const noexcept { return baseSignature() == signature; } + //! Tests whether the operand's base signature matches the base signature of the `other` operand. + constexpr bool hasBaseSignature(const BaseReg& other) const noexcept { return baseSignature() == other.baseSignature(); } + //! Tests whether this register is the same as `other`. //! //! This is just an optimization. Registers by default only use the first - //! 8 bytes of the Operand, so this method takes advantage of this knowledge + //! 8 bytes of Operand data, so this method takes advantage of this knowledge //! and only compares these 8 bytes. If both operands were created correctly - //! then `isEqual()` and `isSame()` should give the same answer, however, if - //! some one of the two operand contains a garbage or other metadata in the - //! upper 8 bytes then `isSame()` may return `true` in cases where `isEqual()` + //! both \ref equals() and \ref isSame() should give the same answer, however, + //! if any of these two contains garbage or other metadata in the upper 8 + //! bytes then \ref isSame() may return `true` in cases in which \ref equals() //! returns false. constexpr bool isSame(const BaseReg& other) const noexcept { - return (_signature == other._signature) & - (_baseId == other._baseId ) ; + return (_signature == other._signature) & (_baseId == other._baseId); } //! Tests whether the register is valid (either virtual or physical). @@ -777,6 +842,21 @@ class BaseReg : public Operand { //! Returns the register group. constexpr uint32_t group() const noexcept { return _getSignaturePart(); } + //! Returns operation predicate of the register (ARM/AArch64). + //! + //! The meaning depends on architecture, for example on ARM hardware this + //! describes \ref arm::Predicate::ShiftOp of the register. + constexpr uint32_t predicate() const noexcept { return _getSignaturePart(); } + + //! Sets operation predicate of the register to `predicate` (ARM/AArch64). + //! + //! The meaning depends on architecture, for example on ARM hardware this + //! describes \ref arm::Predicate::ShiftOp of the register. + inline void setPredicate(uint32_t predicate) noexcept { _setSignaturePart(predicate); } + + //! Resets shift operation type of the register to the default value (ARM/AArch64). + inline void resetPredicate() noexcept { _setSignaturePart(0); } + //! Clones the register operand. constexpr BaseReg clone() const noexcept { return BaseReg(*this); } @@ -790,7 +870,7 @@ class BaseReg : public Operand { //! //! \note Improper use of `cloneAs()` can lead to hard-to-debug errors. template - constexpr RegT cloneAs(const RegT& other) const noexcept { return RegT(other.signature(), id()); } + constexpr RegT cloneAs(const RegT& other) const noexcept { return RegT(SignatureAndId(other.signature(), id())); } //! Sets the register id to `rId`. inline void setId(uint32_t rId) noexcept { _baseId = rId; } @@ -810,22 +890,25 @@ class BaseReg : public Operand { //! \name Static Functions //! \{ + //! Tests whether the `op` operand is a general purpose register. static inline bool isGp(const Operand_& op) noexcept { // Check operand type and register group. Not interested in register type and size. - const uint32_t kSgn = (kOpReg << kSignatureOpShift ) | + const uint32_t kSgn = (kOpReg << kSignatureOpTypeShift ) | (kGroupGp << kSignatureRegGroupShift) ; - return (op.signature() & (kSignatureOpMask | kSignatureRegGroupMask)) == kSgn; + return (op.signature() & (kSignatureOpTypeMask | kSignatureRegGroupMask)) == kSgn; } - //! Tests whether the `op` operand is either a low or high 8-bit GPB register. + //! Tests whether the `op` operand is a vector register. static inline bool isVec(const Operand_& op) noexcept { // Check operand type and register group. Not interested in register type and size. - const uint32_t kSgn = (kOpReg << kSignatureOpShift ) | + const uint32_t kSgn = (kOpReg << kSignatureOpTypeShift ) | (kGroupVec << kSignatureRegGroupShift) ; - return (op.signature() & (kSignatureOpMask | kSignatureRegGroupMask)) == kSgn; + return (op.signature() & (kSignatureOpTypeMask | kSignatureRegGroupMask)) == kSgn; } + //! Tests whether the `op` is a general purpose register of the given `rId`. static inline bool isGp(const Operand_& op, uint32_t rId) noexcept { return isGp(op) & (op.id() == rId); } + //! Tests whether the `op` is a vector register of the given `rId`. static inline bool isVec(const Operand_& op, uint32_t rId) noexcept { return isVec(op) & (op.id() == rId); } //! \} @@ -909,7 +992,7 @@ struct RegOnly { //! Converts this ExtraReg to a real `RegT` operand. template - constexpr RegT toReg() const noexcept { return RegT(_signature, _id); } + constexpr RegT toReg() const noexcept { return RegT(BaseReg::SignatureAndId(_signature, _id)); } //! \} }; @@ -923,42 +1006,28 @@ struct RegOnly { //! \note It's tricky to pack all possible cases that define a memory operand //! into just 16 bytes. The `BaseMem` splits data into the following parts: //! -//! BASE - Base register or label - requires 36 bits total. 4 bits are used to -//! encode the type of the BASE operand (label vs. register type) and -//! the remaining 32 bits define the BASE id, which can be a physical or -//! virtual register index. If BASE type is zero, which is never used as -//! a register-type and label doesn't use it as well then BASE field -//! contains a high DWORD of a possible 64-bit absolute address, which is -//! possible on X64. +//! - BASE - Base register or label - requires 36 bits total. 4 bits are used +//! to encode the type of the BASE operand (label vs. register type) and the +//! remaining 32 bits define the BASE id, which can be a physical or virtual +//! register index. If BASE type is zero, which is never used as a register +//! type and label doesn't use it as well then BASE field contains a high +//! DWORD of a possible 64-bit absolute address, which is possible on X64. //! -//! INDEX - Index register (or theoretically Label, which doesn't make sense). -//! Encoding is similar to BASE - it also requires 36 bits and splits -//! the encoding to INDEX type (4 bits defining the register type) and -//! id (32-bits). +//! - INDEX - Index register (or theoretically Label, which doesn't make sense). +//! Encoding is similar to BASE - it also requires 36 bits and splits the +//! encoding to INDEX type (4 bits defining the register type) and id (32-bits). //! -//! OFFSET - A relative offset of the address. Basically if BASE is specified -//! the relative displacement adjusts BASE and an optional INDEX. if -//! BASE is not specified then the OFFSET should be considered as ABSOLUTE -//! address (at least on X86). In that case its low 32 bits are stored in -//! DISPLACEMENT field and the remaining high 32 bits are stored in BASE. +//! - OFFSET - A relative offset of the address. Basically if BASE is specified +//! the relative displacement adjusts BASE and an optional INDEX. if BASE is +//! not specified then the OFFSET should be considered as ABSOLUTE address (at +//! least on X86). In that case its low 32 bits are stored in DISPLACEMENT +//! field and the remaining high 32 bits are stored in BASE. //! -//! OTHER - There is rest 8 bits that can be used for whatever purpose. The -//! x86::Mem operand uses these bits to store segment override prefix and -//! index shift (scale). +//! - OTHER - There is rest 8 bits that can be used for whatever purpose. For +//! example \ref x86::Mem operand uses these bits to store segment override +//! prefix and index shift (or scale). class BaseMem : public Operand { public: - enum AddrType : uint32_t { - kAddrTypeDefault = 0, - kAddrTypeAbs = 1, - kAddrTypeRel = 2 - }; - - // Shortcuts. - enum SignatureMem : uint32_t { - kSignatureMemAbs = kAddrTypeAbs << kSignatureMemAddrTypeShift, - kSignatureMemRel = kAddrTypeRel << kSignatureMemAddrTypeShift - }; - //! \cond INTERNAL //! Used internally to construct `BaseMem` operand from decomposed data. struct Decomposed { @@ -983,6 +1052,18 @@ class BaseMem : public Operand { constexpr BaseMem(const BaseMem& other) noexcept : Operand(other) {} + //! Creates a `BaseMem` operand from `baseReg` and `offset`. + //! + //! \note This is an architecture independent constructor that can be used to + //! create an architecture independent memory operand to be used in portable + //! code that can handle multiple architectures. + constexpr explicit BaseMem(const BaseReg& baseReg, int32_t offset = 0) noexcept + : Operand(Globals::Init, + kOpMem | (baseReg.type() << kSignatureMemBaseTypeShift), + baseReg.id(), + 0, + uint32_t(offset)) {} + //! \cond INTERNAL //! Creates a `BaseMem` operand from 4 integers as used by `Operand_` struct. @@ -1028,25 +1109,25 @@ class BaseMem : public Operand { //! Clones the memory operand. constexpr BaseMem clone() const noexcept { return BaseMem(*this); } - constexpr uint32_t addrType() const noexcept { return _getSignaturePart(); } - inline void setAddrType(uint32_t addrType) noexcept { _setSignaturePart(addrType); } - inline void resetAddrType() noexcept { _setSignaturePart(0); } - - constexpr bool isAbs() const noexcept { return addrType() == kAddrTypeAbs; } - inline void setAbs() noexcept { setAddrType(kAddrTypeAbs); } - - constexpr bool isRel() const noexcept { return addrType() == kAddrTypeRel; } - inline void setRel() noexcept { setAddrType(kAddrTypeRel); } + //! Creates a new copy of this memory operand adjusted by `off`. + inline BaseMem cloneAdjusted(int64_t off) const noexcept { + BaseMem result(*this); + result.addOffset(off); + return result; + } + //! Tests whether this memory operand is a register home (only used by \ref asmjit_compiler) constexpr bool isRegHome() const noexcept { return _hasSignaturePart(); } + //! Mark this memory operand as register home (only used by \ref asmjit_compiler). inline void setRegHome() noexcept { _signature |= kSignatureMemRegHomeFlag; } + //! Marks this operand to not be a register home (only used by \ref asmjit_compiler). inline void clearRegHome() noexcept { _signature &= ~kSignatureMemRegHomeFlag; } //! Tests whether the memory operand has a BASE register or label specified. constexpr bool hasBase() const noexcept { return (_signature & kSignatureMemBaseTypeMask) != 0; } //! Tests whether the memory operand has an INDEX register specified. constexpr bool hasIndex() const noexcept { return (_signature & kSignatureMemIndexTypeMask) != 0; } - //! Tests whether the memory operand has BASE and INDEX register. + //! Tests whether the memory operand has BASE or INDEX register. constexpr bool hasBaseOrIndex() const noexcept { return (_signature & kSignatureMemBaseIndexMask) != 0; } //! Tests whether the memory operand has BASE and INDEX register. constexpr bool hasBaseAndIndex() const noexcept { return (_signature & kSignatureMemBaseTypeMask) != 0 && (_signature & kSignatureMemIndexTypeMask) != 0; } @@ -1093,6 +1174,7 @@ class BaseMem : public Operand { //! Sets the index register to type and id of the given `index` operand. inline void setIndex(const BaseReg& index) noexcept { return _setIndex(index.type(), index.id()); } + //! \cond INTERNAL inline void _setBase(uint32_t rType, uint32_t rId) noexcept { _setSignaturePart(rType); _baseId = rId; @@ -1102,6 +1184,7 @@ class BaseMem : public Operand { _setSignaturePart(rType); _data[kDataMemIndexId] = rId; } + //! \endcond //! Resets the memory operand's BASE register or label. inline void resetBase() noexcept { _setBase(0, 0); } @@ -1159,7 +1242,7 @@ class BaseMem : public Operand { //! 64-bit offset. Use it only if you know that there is a BASE register //! and the offset is only 32 bits anyway. - //! Adjusts the offset by a 64-bit `offset`. + //! Adjusts the memory operand offset by a `offset`. inline void addOffset(int64_t offset) noexcept { if (isOffset64Bit()) { int64_t result = offset + int64_t(uint64_t(_data[kDataMemOffsetLo]) | (uint64_t(_baseId) << 32)); @@ -1199,20 +1282,48 @@ class BaseMem : public Operand { //! with any type, not just the default 64-bit int. class Imm : public Operand { public: + //! Type of the immediate. + enum Type : uint32_t { + //! Immediate is integer. + kTypeInteger = 0, + //! Immediate is a floating point stored as double-precision. + kTypeDouble = 1 + }; + //! \name Construction & Destruction //! \{ //! Creates a new immediate value (initial value is 0). - constexpr Imm() noexcept + inline constexpr Imm() noexcept : Operand(Globals::Init, kOpImm, 0, 0, 0) {} //! Creates a new immediate value from `other`. - constexpr Imm(const Imm& other) noexcept + inline constexpr Imm(const Imm& other) noexcept : Operand(other) {} - //! Creates a new signed immediate value, assigning the value to `val`. - constexpr explicit Imm(int64_t val) noexcept - : Operand(Globals::Init, kOpImm, 0, Support::unpackU32At0(val), Support::unpackU32At1(val)) {} + //! Creates a new immediate value from ARM/AArch64 specific `shift`. + inline constexpr Imm(const arm::Shift& shift) noexcept + : Operand(Globals::Init, kOpImm | (shift.op() << kSignaturePredicateShift), + 0, + Support::unpackU32At0(shift.value()), + Support::unpackU32At1(shift.value())) {} + + //! Creates a new signed immediate value, assigning the value to `val` and + //! an architecture-specific predicate to `predicate`. + //! + //! \note Predicate is currently only used by ARM architectures. + template + inline constexpr Imm(const T& val, const uint32_t predicate = 0) noexcept + : Operand(Globals::Init, kOpImm | (predicate << kSignaturePredicateShift), + 0, + Support::unpackU32At0(int64_t(val)), + Support::unpackU32At1(int64_t(val))) {} + + inline Imm(const float& val, const uint32_t predicate = 0) noexcept + : Operand(Globals::Init, kOpImm | (predicate << kSignaturePredicateShift), 0, 0, 0) { setValue(val); } + + inline Imm(const double& val, const uint32_t predicate = 0) noexcept + : Operand(Globals::Init, kOpImm | (predicate << kSignaturePredicateShift), 0, 0, 0) { setValue(val); } inline explicit Imm(Globals::NoInit_) noexcept : Operand(Globals::NoInit) {} @@ -1230,77 +1341,79 @@ class Imm : public Operand { //! \name Accessors //! \{ - //! Returns immediate value as 8-bit signed integer, possibly cropped. - constexpr int8_t i8() const noexcept { return int8_t(_data[kDataImmValueLo] & 0xFFu); } - //! Returns immediate value as 8-bit unsigned integer, possibly cropped. - constexpr uint8_t u8() const noexcept { return uint8_t(_data[kDataImmValueLo] & 0xFFu); } - //! Returns immediate value as 16-bit signed integer, possibly cropped. - constexpr int16_t i16() const noexcept { return int16_t(_data[kDataImmValueLo] & 0xFFFFu);} - //! Returns immediate value as 16-bit unsigned integer, possibly cropped. - constexpr uint16_t u16() const noexcept { return uint16_t(_data[kDataImmValueLo] & 0xFFFFu);} - //! Returns immediate value as 32-bit signed integer, possibly cropped. - constexpr int32_t i32() const noexcept { return int32_t(_data[kDataImmValueLo]); } - //! Returns low 32-bit signed integer. - constexpr int32_t i32Lo() const noexcept { return int32_t(_data[kDataImmValueLo]); } - //! Returns high 32-bit signed integer. - constexpr int32_t i32Hi() const noexcept { return int32_t(_data[kDataImmValueHi]); } - //! Returns immediate value as 32-bit unsigned integer, possibly cropped. - constexpr uint32_t u32() const noexcept { return _data[kDataImmValueLo]; } - //! Returns low 32-bit signed integer. - constexpr uint32_t u32Lo() const noexcept { return _data[kDataImmValueLo]; } - //! Returns high 32-bit signed integer. - constexpr uint32_t u32Hi() const noexcept { return _data[kDataImmValueHi]; } - //! Returns immediate value as 64-bit signed integer. - constexpr int64_t i64() const noexcept { return int64_t((uint64_t(_data[kDataImmValueHi]) << 32) | _data[kDataImmValueLo]); } - //! Returns immediate value as 64-bit unsigned integer. - constexpr uint64_t u64() const noexcept { return uint64_t(i64()); } - //! Returns immediate value as `intptr_t`, possibly cropped if size of `intptr_t` is 32 bits. - constexpr intptr_t iptr() const noexcept { return (sizeof(intptr_t) == sizeof(int64_t)) ? intptr_t(i64()) : intptr_t(i32()); } - //! Returns immediate value as `uintptr_t`, possibly cropped if size of `uintptr_t` is 32 bits. - constexpr uintptr_t uptr() const noexcept { return (sizeof(uintptr_t) == sizeof(uint64_t)) ? uintptr_t(u64()) : uintptr_t(u32()); } + //! Returns immediate type, see \ref Type. + constexpr uint32_t type() const noexcept { return _getSignaturePart(); } + //! Sets the immediate type to `type`, see \ref Type. + inline void setType(uint32_t type) noexcept { _setSignaturePart(type); } + //! Resets immediate type to `kTypeInteger`. + inline void resetType() noexcept { setType(kTypeInteger); } + + //! Returns operation predicate of the immediate. + //! + //! The meaning depends on architecture, for example on ARM hardware this + //! describes \ref arm::Predicate::ShiftOp of the immediate. + constexpr uint32_t predicate() const noexcept { return _getSignaturePart(); } + + //! Sets operation predicate of the immediate to `predicate`. + //! + //! The meaning depends on architecture, for example on ARM hardware this + //! describes \ref arm::Predicate::ShiftOp of the immediate. + inline void setPredicate(uint32_t predicate) noexcept { _setSignaturePart(predicate); } + + //! Resets the shift operation type of the immediate to the default value (no operation). + inline void resetPredicate() noexcept { _setSignaturePart(0); } + + //! Returns the immediate value as `int64_t`, which is the internal format Imm uses. + constexpr int64_t value() const noexcept { + return int64_t((uint64_t(_data[kDataImmValueHi]) << 32) | _data[kDataImmValueLo]); + } + + //! Tests whether this immediate value is integer of any size. + constexpr uint32_t isInteger() const noexcept { return type() == kTypeInteger; } + //! Tests whether this immediate value is a double precision floating point value. + constexpr uint32_t isDouble() const noexcept { return type() == kTypeDouble; } //! Tests whether the immediate can be casted to 8-bit signed integer. - constexpr bool isInt8() const noexcept { return Support::isInt8(i64()); } + constexpr bool isInt8() const noexcept { return type() == kTypeInteger && Support::isInt8(value()); } //! Tests whether the immediate can be casted to 8-bit unsigned integer. - constexpr bool isUInt8() const noexcept { return Support::isUInt8(i64()); } + constexpr bool isUInt8() const noexcept { return type() == kTypeInteger && Support::isUInt8(value()); } //! Tests whether the immediate can be casted to 16-bit signed integer. - constexpr bool isInt16() const noexcept { return Support::isInt16(i64()); } + constexpr bool isInt16() const noexcept { return type() == kTypeInteger && Support::isInt16(value()); } //! Tests whether the immediate can be casted to 16-bit unsigned integer. - constexpr bool isUInt16() const noexcept { return Support::isUInt16(i64()); } + constexpr bool isUInt16() const noexcept { return type() == kTypeInteger && Support::isUInt16(value()); } //! Tests whether the immediate can be casted to 32-bit signed integer. - constexpr bool isInt32() const noexcept { return Support::isInt32(i64()); } + constexpr bool isInt32() const noexcept { return type() == kTypeInteger && Support::isInt32(value()); } //! Tests whether the immediate can be casted to 32-bit unsigned integer. - constexpr bool isUInt32() const noexcept { return _data[kDataImmValueHi] == 0; } - - //! Sets immediate value to 8-bit signed integer `val`. - inline void setI8(int8_t val) noexcept { setI64(val); } - //! Sets immediate value to 8-bit unsigned integer `val`. - inline void setU8(uint8_t val) noexcept { setU64(val); } - //! Sets immediate value to 16-bit signed integer `val`. - inline void setI16(int16_t val) noexcept { setI64(val); } - //! Sets immediate value to 16-bit unsigned integer `val`. - inline void setU16(uint16_t val) noexcept { setU64(val); } - //! Sets immediate value to 32-bit signed integer `val`. - inline void setI32(int32_t val) noexcept { setI64(val); } - //! Sets immediate value to 32-bit unsigned integer `val`. - inline void setU32(uint32_t val) noexcept { setU64(val); } - //! Sets immediate value to 64-bit signed integer `val`. - inline void setI64(int64_t val) noexcept { + constexpr bool isUInt32() const noexcept { return type() == kTypeInteger && _data[kDataImmValueHi] == 0; } + + //! Returns the immediate value casted to `T`. + //! + //! The value is masked before it's casted to `T` so the returned value is + //! simply the representation of `T` considering the original value's lowest + //! bits. + template + inline T valueAs() const noexcept { return Support::immediateToT(value()); } + + //! Returns low 32-bit signed integer. + constexpr int32_t int32Lo() const noexcept { return int32_t(_data[kDataImmValueLo]); } + //! Returns high 32-bit signed integer. + constexpr int32_t int32Hi() const noexcept { return int32_t(_data[kDataImmValueHi]); } + //! Returns low 32-bit signed integer. + constexpr uint32_t uint32Lo() const noexcept { return _data[kDataImmValueLo]; } + //! Returns high 32-bit signed integer. + constexpr uint32_t uint32Hi() const noexcept { return _data[kDataImmValueHi]; } + + //! Sets immediate value to `val`, the value is casted to a signed 64-bit integer. + template + inline void setValue(const T& val) noexcept { + _setValueInternal(Support::immediateFromT(val), std::is_floating_point::value ? kTypeDouble : kTypeInteger); + } + + inline void _setValueInternal(int64_t val, uint32_t type) noexcept { + setType(type); _data[kDataImmValueHi] = uint32_t(uint64_t(val) >> 32); _data[kDataImmValueLo] = uint32_t(uint64_t(val) & 0xFFFFFFFFu); } - //! Sets immediate value to 64-bit unsigned integer `val`. - inline void setU64(uint64_t val) noexcept { setI64(int64_t(val)); } - //! Sets immediate value to intptr_t `val`. - inline void setIPtr(intptr_t val) noexcept { setI64(val); } - //! Sets immediate value to uintptr_t `val`. - inline void setUPtr(uintptr_t val) noexcept { setU64(val); } - - //! Sets immediate value to `val`. - template - inline void setValue(T val) noexcept { setI64(int64_t(Support::asNormalized(val))); } - - inline void setDouble(double d) noexcept { setU64(Support::bitCast(d)); } //! \} @@ -1310,15 +1423,59 @@ class Imm : public Operand { //! Clones the immediate operand. constexpr Imm clone() const noexcept { return Imm(*this); } - inline void signExtend8Bits() noexcept { setI64(int64_t(i8())); } - inline void signExtend16Bits() noexcept { setI64(int64_t(i16())); } - inline void signExtend32Bits() noexcept { setI64(int64_t(i32())); } + inline void signExtend8Bits() noexcept { setValue(int64_t(valueAs())); } + inline void signExtend16Bits() noexcept { setValue(int64_t(valueAs())); } + inline void signExtend32Bits() noexcept { setValue(int64_t(valueAs())); } - inline void zeroExtend8Bits() noexcept { setU64(u8()); } - inline void zeroExtend16Bits() noexcept { setU64(u16()); } + inline void zeroExtend8Bits() noexcept { setValue(valueAs()); } + inline void zeroExtend16Bits() noexcept { setValue(valueAs()); } inline void zeroExtend32Bits() noexcept { _data[kDataImmValueHi] = 0u; } //! \} + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use valueAs() instead") + inline int8_t i8() const noexcept { return valueAs(); } + + ASMJIT_DEPRECATED("Use valueAs() instead") + inline uint8_t u8() const noexcept { return valueAs(); } + + ASMJIT_DEPRECATED("Use valueAs() instead") + inline int16_t i16() const noexcept { return valueAs(); } + + ASMJIT_DEPRECATED("Use valueAs() instead") + inline uint16_t u16() const noexcept { return valueAs(); } + + ASMJIT_DEPRECATED("Use valueAs() instead") + inline int32_t i32() const noexcept { return valueAs(); } + + ASMJIT_DEPRECATED("Use valueAs() instead") + inline uint32_t u32() const noexcept { return valueAs(); } + + ASMJIT_DEPRECATED("Use value() instead") + inline int64_t i64() const noexcept { return value(); } + + ASMJIT_DEPRECATED("Use valueAs() instead") + inline uint64_t u64() const noexcept { return valueAs(); } + + ASMJIT_DEPRECATED("Use valueAs() instead") + inline intptr_t iptr() const noexcept { return valueAs(); } + + ASMJIT_DEPRECATED("Use valueAs() instead") + inline uintptr_t uptr() const noexcept { return valueAs(); } + + ASMJIT_DEPRECATED("Use int32Lo() instead") + inline int32_t i32Lo() const noexcept { return int32Lo(); } + + ASMJIT_DEPRECATED("Use uint32Lo() instead") + inline uint32_t u32Lo() const noexcept { return uint32Lo(); } + + ASMJIT_DEPRECATED("Use int32Hi() instead") + inline int32_t i32Hi() const noexcept { return int32Hi(); } + + ASMJIT_DEPRECATED("Use uint32Hi() instead") + inline uint32_t u32Hi() const noexcept { return uint32Hi(); } +#endif // !ASMJIT_NO_DEPRECATED }; //! Creates a new immediate operand. @@ -1326,12 +1483,48 @@ class Imm : public Operand { //! Using `imm(x)` is much nicer than using `Imm(x)` as this is a template //! which can accept any integer including pointers and function pointers. template -static constexpr Imm imm(T val) noexcept { - return Imm(std::is_signed::value ? int64_t(val) : int64_t(uint64_t(val))); -} +static constexpr Imm imm(const T& val) noexcept { return Imm(val); } //! \} +// ============================================================================ +// [asmjit::Globals::none] +// ============================================================================ + +namespace Globals { + //! \ingroup asmjit_assembler + //! + //! A default-constructed operand of `Operand_::kOpNone` type. + static constexpr const Operand none; +} + +// ============================================================================ +// [asmjit::Support::ForwardOp] +// ============================================================================ + +//! \cond INTERNAL +namespace Support { + +template +struct ForwardOpImpl { + static ASMJIT_INLINE const T& forward(const T& value) noexcept { return value; } +}; + +template +struct ForwardOpImpl { + static ASMJIT_INLINE Imm forward(const T& value) noexcept { return Imm(value); } +}; + +//! Either forwards operand T or returns a new operand for T if T is a type +//! convertible to operand. At the moment this is only used to convert integers +//! to \ref Imm operands. +template +struct ForwardOp : public ForwardOpImpl::type>::value> {}; + +} + +//! \endcond + ASMJIT_END_NAMESPACE #endif // ASMJIT_CORE_OPERAND_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/osutils.h b/libs/asmjit/src/asmjit/core/osutils.h index b9a2df4..a469129 100644 --- a/libs/asmjit/src/asmjit/core/osutils.h +++ b/libs/asmjit/src/asmjit/core/osutils.h @@ -28,7 +28,7 @@ ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_support +//! \addtogroup asmjit_utilities //! \{ // ============================================================================ @@ -50,7 +50,7 @@ namespace OSUtils { //! Lock. //! //! Lock is internal, it cannot be used outside of AsmJit, however, its internal -//! layout is exposed as it's used by some other public classes. +//! layout is exposed as it's used by some other classes, which are public. class Lock { public: ASMJIT_NONCOPYABLE(Lock) @@ -78,60 +78,8 @@ class Lock { inline void lock() noexcept; inline void unlock() noexcept; }; - -#ifdef ASMJIT_EXPORTS -#if defined(_WIN32) - -// Win32 implementation. -static_assert(sizeof(Lock::Handle) == sizeof(CRITICAL_SECTION), "asmjit::Lock::Handle layout must match CRITICAL_SECTION"); -static_assert(alignof(Lock::Handle) == alignof(CRITICAL_SECTION), "asmjit::Lock::Handle alignment must match CRITICAL_SECTION"); - -inline Lock::Lock() noexcept { InitializeCriticalSection(reinterpret_cast(&_handle)); } -inline Lock::~Lock() noexcept { DeleteCriticalSection(reinterpret_cast(&_handle)); } -inline void Lock::lock() noexcept { EnterCriticalSection(reinterpret_cast(&_handle)); } -inline void Lock::unlock() noexcept { LeaveCriticalSection(reinterpret_cast(&_handle)); } - -#elif !defined(__EMSCRIPTEN__) - -// PThread implementation. -inline Lock::Lock() noexcept { pthread_mutex_init(&_handle, nullptr); } -inline Lock::~Lock() noexcept { pthread_mutex_destroy(&_handle); } -inline void Lock::lock() noexcept { pthread_mutex_lock(&_handle); } -inline void Lock::unlock() noexcept { pthread_mutex_unlock(&_handle); } - -#else - -// Dummy implementation - Emscripten or other unsupported platform. -inline Lock::Lock() noexcept {} -inline Lock::~Lock() noexcept {} -inline void Lock::lock() noexcept {} -inline void Lock::unlock() noexcept {} - -#endif -#endif - //! \endcond -// ============================================================================ -// [asmjit::LockGuard] -// ============================================================================ - -#ifdef ASMJIT_EXPORTS -//! \cond INTERNAL - -//! Scoped lock. -struct LockGuard { - ASMJIT_NONCOPYABLE(LockGuard) - - Lock& _target; - - inline LockGuard(Lock& target) noexcept : _target(target) { _target.lock(); } - inline ~LockGuard() noexcept { _target.unlock(); } -}; - -//! \endcond -#endif - //! \} ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/osutils_p.h b/libs/asmjit/src/asmjit/core/osutils_p.h new file mode 100644 index 0000000..31db308 --- /dev/null +++ b/libs/asmjit/src/asmjit/core/osutils_p.h @@ -0,0 +1,94 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_CORE_OSUTILS_P_H_INCLUDED +#define ASMJIT_CORE_OSUTILS_P_H_INCLUDED + +#include "../core/osutils.h" + +ASMJIT_BEGIN_NAMESPACE + +//! \cond INTERNAL +//! \addtogroup asmjit_utilities +//! \{ + +// ============================================================================ +// [asmjit::Lock] +// ============================================================================ + +#if defined(_WIN32) + +// Windows implementation. +static_assert(sizeof(Lock::Handle) == sizeof(CRITICAL_SECTION), "asmjit::Lock::Handle layout must match CRITICAL_SECTION"); +static_assert(alignof(Lock::Handle) == alignof(CRITICAL_SECTION), "asmjit::Lock::Handle alignment must match CRITICAL_SECTION"); + +inline Lock::Lock() noexcept { InitializeCriticalSection(reinterpret_cast(&_handle)); } +inline Lock::~Lock() noexcept { DeleteCriticalSection(reinterpret_cast(&_handle)); } +inline void Lock::lock() noexcept { EnterCriticalSection(reinterpret_cast(&_handle)); } +inline void Lock::unlock() noexcept { LeaveCriticalSection(reinterpret_cast(&_handle)); } + +#elif !defined(__EMSCRIPTEN__) + +// PThread implementation. +#ifdef PTHREAD_MUTEX_INITIALIZER +inline Lock::Lock() noexcept : _handle(PTHREAD_MUTEX_INITIALIZER) {} +#else +inline Lock::Lock() noexcept { pthread_mutex_init(&_handle, nullptr); } +#endif +inline Lock::~Lock() noexcept { pthread_mutex_destroy(&_handle); } +inline void Lock::lock() noexcept { pthread_mutex_lock(&_handle); } +inline void Lock::unlock() noexcept { pthread_mutex_unlock(&_handle); } + +#else + +// Dummy implementation - Emscripten or other unsupported platform. +inline Lock::Lock() noexcept {} +inline Lock::~Lock() noexcept {} +inline void Lock::lock() noexcept {} +inline void Lock::unlock() noexcept {} + +#endif + +// ============================================================================ +// [asmjit::LockGuard] +// ============================================================================ + +//! Scoped lock. +class LockGuard { +public: + ASMJIT_NONCOPYABLE(LockGuard) + + Lock& _target; + + inline LockGuard(Lock& target) noexcept + : _target(target) { _target.lock(); } + inline ~LockGuard() noexcept { _target.unlock(); } +}; + +//! \} +//! \endcond + +ASMJIT_END_NAMESPACE + +#endif // ASMJIT_CORE_OSUTILS_P_H_INCLUDED + diff --git a/libs/asmjit/src/asmjit/core/raassignment_p.h b/libs/asmjit/src/asmjit/core/raassignment_p.h index 2618afd..bcdf1a9 100644 --- a/libs/asmjit/src/asmjit/core/raassignment_p.h +++ b/libs/asmjit/src/asmjit/core/raassignment_p.h @@ -54,6 +54,17 @@ class RAAssignment { }; struct Layout { + //! Index of architecture registers per group. + RARegIndex physIndex; + //! Count of architecture registers per group. + RARegCount physCount; + //! Count of physical registers of all groups. + uint32_t physTotal; + //! Count of work registers. + uint32_t workCount; + //! WorkRegs data (vector). + const RAWorkRegs* workRegs; + inline void reset() noexcept { physIndex.reset(); physCount.reset(); @@ -61,54 +72,52 @@ class RAAssignment { workCount = 0; workRegs = nullptr; } - - RARegIndex physIndex; //!< Index of architecture registers per group. - RARegCount physCount; //!< Count of architecture registers per group. - uint32_t physTotal; //!< Count of physical registers of all groups. - uint32_t workCount; //!< Count of work registers. - const RAWorkRegs* workRegs; //!< WorkRegs data (vector). }; struct PhysToWorkMap { - static inline size_t sizeOf(uint32_t count) noexcept { - return sizeof(PhysToWorkMap) - sizeof(uint32_t) + size_t(count) * sizeof(uint32_t); + //! Assigned registers (each bit represents one physical reg). + RARegMask assigned; + //! Dirty registers (spill slot out of sync or no spill slot). + RARegMask dirty; + //! PhysReg to WorkReg mapping. + uint32_t workIds[1 /* ... */]; + + static inline size_t sizeOf(size_t count) noexcept { + return sizeof(PhysToWorkMap) - sizeof(uint32_t) + count * sizeof(uint32_t); } - inline void reset(uint32_t count) noexcept { + inline void reset(size_t count) noexcept { assigned.reset(); dirty.reset(); - for (uint32_t i = 0; i < count; i++) + for (size_t i = 0; i < count; i++) workIds[i] = kWorkNone; } - inline void copyFrom(const PhysToWorkMap* other, uint32_t count) noexcept { + inline void copyFrom(const PhysToWorkMap* other, size_t count) noexcept { size_t size = sizeOf(count); memcpy(this, other, size); } - - RARegMask assigned; //!< Assigned registers (each bit represents one physical reg). - RARegMask dirty; //!< Dirty registers (spill slot out of sync or no spill slot). - uint32_t workIds[1 /* ... */]; //!< PhysReg to WorkReg mapping. }; struct WorkToPhysMap { - static inline size_t sizeOf(uint32_t count) noexcept { + //! WorkReg to PhysReg mapping + uint8_t physIds[1 /* ... */]; + + static inline size_t sizeOf(size_t count) noexcept { return size_t(count) * sizeof(uint8_t); } - inline void reset(uint32_t count) noexcept { - for (uint32_t i = 0; i < count; i++) + inline void reset(size_t count) noexcept { + for (size_t i = 0; i < count; i++) physIds[i] = kPhysNone; } - inline void copyFrom(const WorkToPhysMap* other, uint32_t count) noexcept { + inline void copyFrom(const WorkToPhysMap* other, size_t count) noexcept { size_t size = sizeOf(count); if (ASMJIT_LIKELY(size)) memcpy(this, other, size); } - - uint8_t physIds[1 /* ... */]; //!< WorkReg to PhysReg mapping }; //! Physical registers layout. diff --git a/libs/asmjit/src/asmjit/core/rabuilders_p.h b/libs/asmjit/src/asmjit/core/rabuilders_p.h index 6f400ad..0360a78 100644 --- a/libs/asmjit/src/asmjit/core/rabuilders_p.h +++ b/libs/asmjit/src/asmjit/core/rabuilders_p.h @@ -27,6 +27,7 @@ #include "../core/api-config.h" #ifndef ASMJIT_NO_COMPILER +#include "../core/formatter.h" #include "../core/rapass_p.h" ASMJIT_BEGIN_NAMESPACE @@ -36,29 +37,28 @@ ASMJIT_BEGIN_NAMESPACE //! \{ // ============================================================================ -// [asmjit::RACFGBuilder] +// [asmjit::RACFGBuilderT] // ============================================================================ template -class RACFGBuilder { +class RACFGBuilderT { public: - RAPass* _pass; - BaseCompiler* _cc; - - RABlock* _curBlock; - RABlock* _retBlock; - FuncNode* _funcNode; - RARegsStats _blockRegStats; - uint32_t _exitLabelId; - ZoneVector _sharedAssignmentsMap; + BaseRAPass* _pass = nullptr; + BaseCompiler* _cc = nullptr; + RABlock* _curBlock = nullptr; + RABlock* _retBlock = nullptr; + FuncNode* _funcNode = nullptr; + RARegsStats _blockRegStats {}; + uint32_t _exitLabelId = Globals::kInvalidId; + ZoneVector _sharedAssignmentsMap {}; // Only used by logging, it's fine to be here to prevent more #ifdefs... - bool _hasCode; - RABlock* _lastLoggedBlock; + bool _hasCode = false; + RABlock* _lastLoggedBlock = nullptr; #ifndef ASMJIT_NO_LOGGING - Logger* _logger; - uint32_t _logFlags; + Logger* _logger = nullptr; + uint32_t _logFlags = FormatOptions::kFlagPositions; StringTmp<512> _sb; #endif @@ -66,25 +66,16 @@ class RACFGBuilder { static constexpr uint32_t kCodeIndentation = 4; // NOTE: This is a bit hacky. There are some nodes which are processed twice - // (see `onBeforeCall()` and `onBeforeRet()`) as they can insert some nodes + // (see `onBeforeInvoke()` and `onBeforeRet()`) as they can insert some nodes // around them. Since we don't have any flags to mark these we just use their // position that is [at that time] unassigned. static constexpr uint32_t kNodePositionDidOnBefore = 0xFFFFFFFFu; - inline RACFGBuilder(RAPass* pass) noexcept + inline RACFGBuilderT(BaseRAPass* pass) noexcept : _pass(pass), - _cc(pass->cc()), - _curBlock(nullptr), - _retBlock(nullptr), - _funcNode(nullptr), - _blockRegStats{}, - _exitLabelId(Globals::kInvalidId), - _hasCode(false), - _lastLoggedBlock(nullptr) { + _cc(pass->cc()) { #ifndef ASMJIT_NO_LOGGING _logger = _pass->debugLogger(); - _logFlags = FormatOptions::kFlagPositions; - if (_logger) _logFlags |= _logger->flags(); #endif @@ -122,7 +113,7 @@ class RACFGBuilder { // Instruction | Jump | Invoke | Return // ------------------------------------ - // Handle `InstNode`, `FuncCallNode`, and `FuncRetNode`. All of them + // Handle `InstNode`, `InvokeNode`, and `FuncRetNode`. All of them // share the same interface that provides operands that have read/write // semantics. if (ASMJIT_UNLIKELY(!_curBlock)) { @@ -135,18 +126,18 @@ class RACFGBuilder { _hasCode = true; - if (node->isFuncCall() || node->isFuncRet()) { + if (node->isInvoke() || node->isFuncRet()) { if (node->position() != kNodePositionDidOnBefore) { // Call and Reg are complicated as they may insert some surrounding // code around them. The simplest approach is to get the previous // node, call the `onBefore()` handlers and then check whether // anything changed and restart if so. By restart we mean that the // current `node` would go back to the first possible inserted node - // by `onBeforeCall()` or `onBeforeRet()`. + // by `onBeforeInvoke()` or `onBeforeRet()`. BaseNode* prev = node->prev(); - if (node->type() == BaseNode::kNodeFuncCall) - ASMJIT_PROPAGATE(static_cast(this)->onBeforeCall(node->as())); + if (node->type() == BaseNode::kNodeInvoke) + ASMJIT_PROPAGATE(static_cast(this)->onBeforeInvoke(node->as())); else ASMJIT_PROPAGATE(static_cast(this)->onBeforeRet(node->as())); @@ -159,7 +150,7 @@ class RACFGBuilder { node->setPosition(kNodePositionDidOnBefore); node = prev->next(); - // `onBeforeCall()` and `onBeforeRet()` can only insert instructions. + // `onBeforeInvoke()` and `onBeforeRet()` can only insert instructions. ASMJIT_ASSERT(node->isInst()); } @@ -179,8 +170,8 @@ class RACFGBuilder { ib.reset(); ASMJIT_PROPAGATE(static_cast(this)->onInst(inst, controlType, ib)); - if (node->isFuncCall()) { - ASMJIT_PROPAGATE(static_cast(this)->onCall(inst->as(), ib)); + if (node->isInvoke()) { + ASMJIT_PROPAGATE(static_cast(this)->onInvoke(inst->as(), ib)); } if (node->isFuncRet()) { @@ -231,14 +222,17 @@ class RACFGBuilder { if (ASMJIT_UNLIKELY(!targetBlock)) return DebugUtils::errored(kErrorOutOfMemory); + targetBlock->makeTargetable(); ASMJIT_PROPAGATE(_curBlock->appendSuccessor(targetBlock)); } else { // Not a label - could be jump with reg/mem operand, which // means that it can go anywhere. Such jumps must either be // annotated so the CFG can be properly constructed, otherwise - // we assume the worst case - can jump to every basic block. + // we assume the worst case - can jump to any basic block. JumpAnnotation* jumpAnnotation = nullptr; + _curBlock->addFlags(RABlock::kFlagHasJumpTable); + if (inst->type() == BaseNode::kNodeJump) jumpAnnotation = inst->as()->annotation(); @@ -255,6 +249,7 @@ class RACFGBuilder { // Prevents adding basic-block successors multiple times. if (!targetBlock->hasTimestamp(timestamp)) { targetBlock->setTimestamp(timestamp); + targetBlock->makeTargetable(); ASMJIT_PROPAGATE(_curBlock->appendSuccessor(targetBlock)); } } @@ -330,7 +325,7 @@ class RACFGBuilder { if (!_curBlock) { // If the current code is unreachable the label makes it reachable // again. We may remove the whole block in the future if it's not - // referenced. + // referenced though. _curBlock = node->passData(); if (_curBlock) { @@ -340,13 +335,14 @@ class RACFGBuilder { break; } else { - // No block assigned, to create a new one, and assign it. + // No block assigned - create a new one and assign it. _curBlock = _pass->newBlock(node); if (ASMJIT_UNLIKELY(!_curBlock)) return DebugUtils::errored(kErrorOutOfMemory); node->setPassData(_curBlock); } + _curBlock->makeTargetable(); _hasCode = false; _blockRegStats.reset(); ASMJIT_PROPAGATE(_pass->addBlock(_curBlock)); @@ -354,10 +350,13 @@ class RACFGBuilder { else { if (node->hasPassData()) { RABlock* consecutive = node->passData(); + consecutive->makeTargetable(); + if (_curBlock == consecutive) { // The label currently processed is part of the current block. This // is only possible for multiple labels that are right next to each - // other, or are separated by non-code nodes like directives and comments. + // other or labels that are separated by non-code nodes like directives + // and comments. if (ASMJIT_UNLIKELY(_hasCode)) return DebugUtils::errored(kErrorInvalidState); } @@ -391,6 +390,7 @@ class RACFGBuilder { RABlock* consecutive = _pass->newBlock(node); if (ASMJIT_UNLIKELY(!consecutive)) return DebugUtils::errored(kErrorOutOfMemory); + consecutive->makeTargetable(); ASMJIT_PROPAGATE(_curBlock->appendSuccessor(consecutive)); ASMJIT_PROPAGATE(_pass->addBlock(consecutive)); @@ -409,7 +409,7 @@ class RACFGBuilder { logNode(node, kRootIndentation); // Unlikely: Assume that the exit label is reached only once per function. - if (ASMJIT_UNLIKELY(node->as()->id() == _exitLabelId)) { + if (ASMJIT_UNLIKELY(node->as()->labelId() == _exitLabelId)) { _curBlock->setLast(node); _curBlock->makeConstructed(_blockRegStats); ASMJIT_PROPAGATE(_pass->addExitBlock(_curBlock)); @@ -478,6 +478,8 @@ class RACFGBuilder { if (ASMJIT_UNLIKELY(!_retBlock)) return DebugUtils::errored(kErrorOutOfMemory); + + _retBlock->makeTargetable(); ASMJIT_PROPAGATE(_pass->addExitBlock(_retBlock)); if (node != func) { @@ -492,7 +494,7 @@ class RACFGBuilder { // Reset everything we may need. _blockRegStats.reset(); - _exitLabelId = func->exitNode()->id(); + _exitLabelId = func->exitNode()->labelId(); // Initially we assume there is no code in the function body. _hasCode = false; @@ -520,13 +522,13 @@ class RACFGBuilder { size_t blockCount = blocks.size(); // NOTE: Iterate from `1` as the first block is the entry block, we don't - // allow the entry to be a successor of block that ends with unknown jump. + // allow the entry to be a successor of any block. RABlock* consecutive = block->consecutive(); for (size_t i = 1; i < blockCount; i++) { - RABlock* successor = blocks[i]; - if (successor == consecutive) + RABlock* candidate = blocks[i]; + if (candidate == consecutive || !candidate->isTargetable()) continue; - block->appendSuccessor(successor); + block->appendSuccessor(candidate); } return shareAssignmentAcrossSuccessors(block); @@ -599,11 +601,11 @@ class RACFGBuilder { _sb.clear(); _sb.appendChars(' ', indentation); if (action) { - _sb.appendString(action); - _sb.appendChar(' '); + _sb.append(action); + _sb.append(' '); } - Logging::formatNode(_sb, _logFlags, cc(), node); - _sb.appendChar('\n'); + Formatter::formatNode(_sb, _logFlags, cc(), node); + _sb.append('\n'); _logger->log(_sb); } #else diff --git a/libs/asmjit/src/asmjit/core/radefs_p.h b/libs/asmjit/src/asmjit/core/radefs_p.h index c63a1a3..5395542 100644 --- a/libs/asmjit/src/asmjit/core/radefs_p.h +++ b/libs/asmjit/src/asmjit/core/radefs_p.h @@ -25,11 +25,12 @@ #define ASMJIT_CORE_RADEFS_P_H_INCLUDED #include "../core/api-config.h" -#ifndef ASMJIT_NO_COMPILER - -#include "../core/compiler.h" -#include "../core/logging.h" +#include "../core/archtraits.h" +#include "../core/compilerdefs.h" +#include "../core/logger.h" +#include "../core/operand.h" #include "../core/support.h" +#include "../core/type.h" #include "../core/zone.h" #include "../core/zonevector.h" @@ -64,13 +65,52 @@ ASMJIT_BEGIN_NAMESPACE // [Forward Declarations] // ============================================================================ -class RAPass; +class BaseRAPass; class RABlock; +class BaseNode; struct RAStackSlot; typedef ZoneVector RABlocks; typedef ZoneVector RAWorkRegs; +// ============================================================================ +// [asmjit::RAConstraints] +// ============================================================================ + +class RAConstraints { +public: + uint32_t _availableRegs[BaseReg::kGroupVirt] {}; + + inline RAConstraints() noexcept {} + + ASMJIT_NOINLINE Error init(uint32_t arch) noexcept { + switch (arch) { + case Environment::kArchX86: + case Environment::kArchX64: { + uint32_t registerCount = arch == Environment::kArchX86 ? 8 : 16; + _availableRegs[BaseReg::kGroupGp] = Support::lsbMask(registerCount) & ~Support::bitMask(4u); + _availableRegs[BaseReg::kGroupVec] = Support::lsbMask(registerCount); + _availableRegs[BaseReg::kGroupOther0] = Support::lsbMask(8); + _availableRegs[BaseReg::kGroupOther1] = Support::lsbMask(8); + return kErrorOk; + } + + case Environment::kArchAArch64: { + _availableRegs[BaseReg::kGroupGp] = 0xFFFFFFFFu & ~Support::bitMask(18, 31u); + _availableRegs[BaseReg::kGroupVec] = 0xFFFFFFFFu; + _availableRegs[BaseReg::kGroupOther0] = 0; + _availableRegs[BaseReg::kGroupOther1] = 0; + return kErrorOk; + } + + default: + return DebugUtils::errored(kErrorInvalidArch); + } + } + + inline uint32_t availableRegs(uint32_t group) const noexcept { return _availableRegs[group]; } +}; + // ============================================================================ // [asmjit::RAStrategy] // ============================================================================ @@ -93,45 +133,6 @@ struct RAStrategy { inline bool isComplex() const noexcept { return _type >= kStrategyComplex; } }; -// ============================================================================ -// [asmjit::RAArchTraits] -// ============================================================================ - -//! Traits. -struct RAArchTraits { - enum Flags : uint32_t { - //! Registers can be swapped by a single instruction. - kHasSwap = 0x01u - }; - - uint8_t _flags[BaseReg::kGroupVirt]; - - //! \name Construction & Destruction - //! \{ - - inline RAArchTraits() noexcept { reset(); } - inline void reset() noexcept { memset(_flags, 0, sizeof(_flags)); } - - //! \} - - //! \name Accessors - //! \{ - - inline bool hasFlag(uint32_t group, uint32_t flag) const noexcept { return (_flags[group] & flag) != 0; } - inline bool hasSwap(uint32_t group) const noexcept { return hasFlag(group, kHasSwap); } - - inline uint8_t& operator[](uint32_t group) noexcept { - ASMJIT_ASSERT(group < BaseReg::kGroupVirt); - return _flags[group]; - } - - inline const uint8_t& operator[](uint32_t group) const noexcept { - ASMJIT_ASSERT(group < BaseReg::kGroupVirt); - return _flags[group]; - } - - //! \} -}; // ============================================================================ // [asmjit::RARegCount] @@ -317,8 +318,9 @@ struct RARegMask { //! before the register allocator tries to do its job. For example to use fast //! register allocation inside a block or loop it cannot have clobbered and/or //! fixed registers, etc... -struct RARegsStats { - uint32_t _packed; +class RARegsStats { +public: + uint32_t _packed = 0; enum Index : uint32_t { kIndexUsed = 0, @@ -355,12 +357,12 @@ struct RARegsStats { //! Count of live registers, per group. class RALiveCount { public: - uint32_t n[BaseReg::kGroupVirt]; + uint32_t n[BaseReg::kGroupVirt] {}; //! \name Construction & Destruction //! \{ - inline RALiveCount() noexcept { reset(); } + inline RALiveCount() noexcept = default; inline RALiveCount(const RALiveCount& other) noexcept = default; inline void init(const RALiveCount& other) noexcept { @@ -674,19 +676,9 @@ class RALiveSpans { //! Statistics about a register liveness. class RALiveStats { public: - uint32_t _width; - float _freq; - float _priority; - - //! \name Construction & Destruction - //! \{ - - inline RALiveStats() - : _width(0), - _freq(0.0f), - _priority(0.0f) {} - - //! \} + uint32_t _width = 0; + float _freq = 0.0f; + float _priority = 0.0f; //! \name Accessors //! \{ @@ -928,44 +920,46 @@ class RAWorkReg { ASMJIT_NONCOPYABLE(RAWorkReg) //! RAPass specific ID used during analysis and allocation. - uint32_t _workId; - //! Copy of ID used by `VirtReg`. - uint32_t _virtId; - - //! Permanent association with `VirtReg`. - VirtReg* _virtReg; - //! Temporary association with `RATiedReg`. - RATiedReg* _tiedReg; + uint32_t _workId = 0; + //! Copy of ID used by \ref VirtReg. + uint32_t _virtId = 0; + + //! Permanent association with \ref VirtReg. + VirtReg* _virtReg = nullptr; + //! Temporary association with \ref RATiedReg. + RATiedReg* _tiedReg = nullptr; //! Stack slot associated with the register. - RAStackSlot* _stackSlot; + RAStackSlot* _stackSlot = nullptr; - //! Copy of a signature used by `VirtReg`. - RegInfo _info; + //! Copy of a signature used by \ref VirtReg. + RegInfo _info {}; //! RAPass specific flags used during analysis and allocation. - uint32_t _flags; + uint32_t _flags = 0; //! IDs of all physical registers this WorkReg has been allocated to. - uint32_t _allocatedMask; + uint32_t _allocatedMask = 0; //! IDs of all physical registers that are clobbered during the lifetime of //! this WorkReg. //! //! This mask should be updated by `RAPass::buildLiveness()`, because it's //! global and should be updated after unreachable code has been removed. - uint32_t _clobberSurvivalMask; + uint32_t _clobberSurvivalMask = 0; //! A byte-mask where each bit represents one valid byte of the register. - uint64_t _regByteMask; + uint64_t _regByteMask = 0; //! Argument index (or `kNoArgIndex` if none). - uint8_t _argIndex; + uint8_t _argIndex = kNoArgIndex; + //! Argument value index in the pack (0 by default). + uint8_t _argValueIndex = 0; //! Global home register ID (if any, assigned by RA). - uint8_t _homeRegId; + uint8_t _homeRegId = BaseReg::kIdBad; //! Global hint register ID (provided by RA or user). - uint8_t _hintRegId; + uint8_t _hintRegId = BaseReg::kIdBad; //! Live spans of the `VirtReg`. - LiveRegSpans _liveSpans; + LiveRegSpans _liveSpans {}; //! Live statistics. - RALiveStats _liveStats; + RALiveStats _liveStats {}; //! All nodes that read/write this VirtReg/WorkReg. ZoneVector _refs; @@ -984,10 +978,7 @@ class RAWorkReg { //! Stack allocation is preferred. kFlagStackPreferred = 0x00000004u, //! Marked for stack argument reassignment. - kFlagStackArgToStack = 0x00000008u, - - // TODO: Used? - kFlagDirtyStats = 0x80000000u + kFlagStackArgToStack = 0x00000008u }; enum ArgIndex : uint32_t { @@ -1001,19 +992,7 @@ class RAWorkReg { : _workId(workId), _virtId(vReg->id()), _virtReg(vReg), - _tiedReg(nullptr), - _stackSlot(nullptr), - _info(vReg->info()), - _flags(kFlagDirtyStats), - _allocatedMask(0), - _clobberSurvivalMask(0), - _regByteMask(0), - _argIndex(kNoArgIndex), - _homeRegId(BaseReg::kIdBad), - _hintRegId(BaseReg::kIdBad), - _liveSpans(), - _liveStats(), - _refs() {} + _info(vReg->info()) {} //! \} @@ -1063,7 +1042,12 @@ class RAWorkReg { inline bool hasArgIndex() const noexcept { return _argIndex != kNoArgIndex; } inline uint32_t argIndex() const noexcept { return _argIndex; } - inline void setArgIndex(uint32_t index) noexcept { _argIndex = uint8_t(index); } + inline uint32_t argValueIndex() const noexcept { return _argValueIndex; } + + inline void setArgIndex(uint32_t argIndex, uint32_t valueIndex) noexcept { + _argIndex = uint8_t(argIndex); + _argValueIndex = uint8_t(valueIndex); + } inline bool hasHomeRegId() const noexcept { return _homeRegId != BaseReg::kIdBad; } inline uint32_t homeRegId() const noexcept { return _homeRegId; } @@ -1090,5 +1074,4 @@ class RAWorkReg { ASMJIT_END_NAMESPACE -#endif // !ASMJIT_NO_COMPILER #endif // ASMJIT_CORE_RADEFS_P_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/core/ralocal.cpp b/libs/asmjit/src/asmjit/core/ralocal.cpp index 98d7d8f..35f6560 100644 --- a/libs/asmjit/src/asmjit/core/ralocal.cpp +++ b/libs/asmjit/src/asmjit/core/ralocal.cpp @@ -80,55 +80,59 @@ Error RALocalAllocator::makeInitialAssignment() noexcept { uint32_t numIter = 1; for (uint32_t iter = 0; iter < numIter; iter++) { - for (uint32_t i = 0; i < argCount; i++) { - // Unassigned argument. - VirtReg* virtReg = func->arg(i); - if (!virtReg) continue; - - // Unreferenced argument. - RAWorkReg* workReg = virtReg->workReg(); - if (!workReg) continue; - - // Overwritten argument. - uint32_t workId = workReg->workId(); - if (!liveIn.bitAt(workId)) - continue; + for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + // Unassigned argument. + VirtReg* virtReg = func->argPack(argIndex)[valueIndex]; + if (!virtReg) + continue; - uint32_t group = workReg->group(); - if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone) - continue; + // Unreferenced argument. + RAWorkReg* workReg = virtReg->workReg(); + if (!workReg) + continue; - uint32_t allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group); - if (iter == 0) { - // First iteration: Try to allocate to home RegId. - if (workReg->hasHomeRegId()) { - uint32_t physId = workReg->homeRegId(); - if (Support::bitTest(allocableRegs, physId)) { - _curAssignment.assign(group, workId, physId, true); - _pass->_argsAssignment.assignReg(i, workReg->info().type(), physId, workReg->typeId()); - continue; + // Overwritten argument. + uint32_t workId = workReg->workId(); + if (!liveIn.bitAt(workId)) + continue; + + uint32_t group = workReg->group(); + if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone) + continue; + + uint32_t allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group); + if (iter == 0) { + // First iteration: Try to allocate to home RegId. + if (workReg->hasHomeRegId()) { + uint32_t physId = workReg->homeRegId(); + if (Support::bitTest(allocableRegs, physId)) { + _curAssignment.assign(group, workId, physId, true); + _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->info().type(), physId, workReg->typeId()); + continue; + } } - } - numIter = 2; - } - else { - // Second iteration: Pick any other register if the is an unassigned one or assign to stack. - if (allocableRegs) { - uint32_t physId = Support::ctz(allocableRegs); - _curAssignment.assign(group, workId, physId, true); - _pass->_argsAssignment.assignReg(i, workReg->info().type(), physId, workReg->typeId()); + numIter = 2; } else { - // This register will definitely need stack, create the slot now and assign also `argIndex` - // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes. - RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg); - if (ASMJIT_UNLIKELY(!slot)) - return DebugUtils::errored(kErrorOutOfMemory); - - // This means STACK_ARG may be moved to STACK. - workReg->addFlags(RAWorkReg::kFlagStackArgToStack); - _pass->_numStackArgsToStackSlots++; + // Second iteration: Pick any other register if the is an unassigned one or assign to stack. + if (allocableRegs) { + uint32_t physId = Support::ctz(allocableRegs); + _curAssignment.assign(group, workId, physId, true); + _pass->_argsAssignment.assignRegInPack(argIndex, valueIndex, workReg->info().type(), physId, workReg->typeId()); + } + else { + // This register will definitely need stack, create the slot now and assign also `argIndex` + // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes. + RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg); + if (ASMJIT_UNLIKELY(!slot)) + return DebugUtils::errored(kErrorOutOfMemory); + + // This means STACK_ARG may be moved to STACK. + workReg->addFlags(RAWorkReg::kFlagStackArgToStack); + _pass->_numStackArgsToStackSlots++; + } } } } @@ -239,7 +243,7 @@ Error RALocalAllocator::switchToAssignment( // Reset as we will do some changes to the current assignment. runId = -1; - if (_archTraits.hasSwap(group)) { + if (_archTraits->hasSwap(group)) { ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId)); } else { @@ -370,7 +374,7 @@ Error RALocalAllocator::switchToAssignment( return kErrorOk; } -Error RALocalAllocator::spillGpScratchRegsBeforeEntry(uint32_t scratchRegs) noexcept { +Error RALocalAllocator::spillScratchGpRegsBeforeEntry(uint32_t scratchRegs) noexcept { uint32_t group = BaseReg::kGroupGp; Support::BitWordIterator it(scratchRegs); @@ -596,7 +600,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { // DECIDE whether to MOVE or SPILL. if (allocableRegs) { - uint32_t reassignedId = decideOnUnassignment(group, workId, assignedId, allocableRegs); + uint32_t reassignedId = decideOnReassignment(group, workId, assignedId, allocableRegs); if (reassignedId != RAAssignment::kPhysNone) { ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId)); allocableRegs ^= Support::bitMask(reassignedId); @@ -649,7 +653,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { // just a single instruction. However, swap is only available on few // architectures and it's definitely not available for each register // group. Calling `onSwapReg()` before checking these would be fatal. - if (_archTraits.hasSwap(group) && thisPhysId != RAAssignment::kPhysNone) { + if (_archTraits->hasSwap(group) && thisPhysId != RAAssignment::kPhysNone) { ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId)); thisTiedReg->markUseDone(); @@ -763,7 +767,7 @@ Error RALocalAllocator::allocInst(InstNode* node) noexcept { uint32_t dstId = it.next(); if (dstId == srcId) continue; - _pass->onEmitMove(workId, dstId, srcId); + _pass->emitMove(workId, dstId, srcId); } } @@ -897,7 +901,7 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co // Additional instructions emitted to switch from the current state to // the `target` state. This means that we have to move these instructions // into an independent code block and patch the jump location. - Operand& targetOp(node->opType(node->opCount() - 1)); + Operand& targetOp = node->op(node->opCount() - 1); if (ASMJIT_UNLIKELY(!targetOp.isLabel())) return DebugUtils::errored(kErrorInvalidState); @@ -912,7 +916,7 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co node->clearInstOptions(BaseInst::kOptionShortForm); // Finalize the switch assignment sequence. - ASMJIT_PROPAGATE(_pass->onEmitJump(savedTarget)); + ASMJIT_PROPAGATE(_pass->emitJump(savedTarget)); _cc->_setCursor(injectionPoint); _cc->bind(trampoline); } @@ -928,12 +932,12 @@ Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* co } Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, RABlock* cont) noexcept { + // TODO: Do we really need to use `cont`? + DebugUtils::unused(cont); + if (targets.empty()) return DebugUtils::errored(kErrorInvalidState); - if (targets.size() == 1) - return allocBranch(node, targets[0], cont); - // The cursor must point to the previous instruction for a possible instruction insertion. _cc->_setCursor(node->prev()); @@ -970,38 +974,39 @@ Error RALocalAllocator::allocJumpTable(InstNode* node, const RABlocks& targets, // ============================================================================ uint32_t RALocalAllocator::decideOnAssignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept { - DebugUtils::unused(group, physId); ASMJIT_ASSERT(allocableRegs != 0); + DebugUtils::unused(group, physId); RAWorkReg* workReg = workRegById(workId); - // HIGHEST PRIORITY: Home register id. + // Prefer home register id, if possible. if (workReg->hasHomeRegId()) { uint32_t homeId = workReg->homeRegId(); if (Support::bitTest(allocableRegs, homeId)) return homeId; } - // HIGH PRIORITY: Register IDs used upon block entries. + // Prefer registers used upon block entries. uint32_t previouslyAssignedRegs = workReg->allocatedMask(); if (allocableRegs & previouslyAssignedRegs) allocableRegs &= previouslyAssignedRegs; - if (Support::isPowerOf2(allocableRegs)) - return Support::ctz(allocableRegs); - - // TODO: This is not finished. return Support::ctz(allocableRegs); } -uint32_t RALocalAllocator::decideOnUnassignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept { +uint32_t RALocalAllocator::decideOnReassignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept { ASMJIT_ASSERT(allocableRegs != 0); + DebugUtils::unused(group, physId); - // TODO: - DebugUtils::unused(allocableRegs, group, workId, physId); + RAWorkReg* workReg = workRegById(workId); + + // Prefer allocating back to HomeId, if possible. + if (workReg->hasHomeRegId()) { + if (Support::bitTest(allocableRegs, workReg->homeRegId())) + return workReg->homeRegId(); + } - // if (!_curAssignment.isPhysDirty(group, physId)) { - // } + // TODO: [Register Allocator] This could be improved. // Decided to SPILL. return RAAssignment::kPhysNone; diff --git a/libs/asmjit/src/asmjit/core/ralocal_p.h b/libs/asmjit/src/asmjit/core/ralocal_p.h index 0af595b..eebecc9 100644 --- a/libs/asmjit/src/asmjit/core/ralocal_p.h +++ b/libs/asmjit/src/asmjit/core/ralocal_p.h @@ -50,13 +50,13 @@ class RALocalAllocator { typedef RAAssignment::PhysToWorkMap PhysToWorkMap; typedef RAAssignment::WorkToPhysMap WorkToPhysMap; - //! Link to `RAPass`. - RAPass* _pass; + //! Link to `BaseRAPass`. + BaseRAPass* _pass; //! Link to `BaseCompiler`. BaseCompiler* _cc; //! Architecture traits. - RAArchTraits _archTraits; + const ArchTraits* _archTraits; //! Registers available to the allocator. RARegMask _availableRegs; //! Registers clobbered by the allocator. @@ -82,7 +82,7 @@ class RALocalAllocator { //! \name Construction & Destruction //! \{ - inline RALocalAllocator(RAPass* pass) noexcept + inline RALocalAllocator(BaseRAPass* pass) noexcept : _pass(pass), _cc(pass->cc()), _archTraits(pass->_archTraits), @@ -155,10 +155,10 @@ class RALocalAllocator { bool tryMode) noexcept; inline Error spillRegsBeforeEntry(RABlock* block) noexcept { - return spillGpScratchRegsBeforeEntry(block->entryScratchGpRegs()); + return spillScratchGpRegsBeforeEntry(block->entryScratchGpRegs()); } - Error spillGpScratchRegsBeforeEntry(uint32_t scratchRegs) noexcept; + Error spillScratchGpRegsBeforeEntry(uint32_t scratchRegs) noexcept; //! \} @@ -198,12 +198,13 @@ class RALocalAllocator { //! Decides on register assignment. uint32_t decideOnAssignment(uint32_t group, uint32_t workId, uint32_t assignedId, uint32_t allocableRegs) const noexcept; - //! Decides on whether to MOVE or SPILL the given WorkReg. + //! Decides on whether to MOVE or SPILL the given WorkReg, because it's allocated + //! in a physical register that have to be used by another WorkReg. //! //! The function must return either `RAAssignment::kPhysNone`, which means that - //! the WorkReg should be spilled, or a valid physical register ID, which means - //! that the register should be moved to that physical register instead. - uint32_t decideOnUnassignment(uint32_t group, uint32_t workId, uint32_t assignedId, uint32_t allocableRegs) const noexcept; + //! the WorkReg of `workId` should be spilled, or a valid physical register ID, + //! which means that the register should be moved to that physical register instead. + uint32_t decideOnReassignment(uint32_t group, uint32_t workId, uint32_t assignedId, uint32_t allocableRegs) const noexcept; //! Decides on best spill given a register mask `spillableRegs` uint32_t decideOnSpillFor(uint32_t group, uint32_t workId, uint32_t spillableRegs, uint32_t* spillWorkId) const noexcept; @@ -218,7 +219,7 @@ class RALocalAllocator { inline Error onMoveReg(uint32_t group, uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept { if (dstPhysId == srcPhysId) return kErrorOk; _curAssignment.reassign(group, workId, dstPhysId, srcPhysId); - return _pass->onEmitMove(workId, dstPhysId, srcPhysId); + return _pass->emitMove(workId, dstPhysId, srcPhysId); } //! Emits a swap between two physical registers and fixes their assignment. @@ -226,14 +227,14 @@ class RALocalAllocator { //! \note Target must support this operation otherwise this would ASSERT. inline Error onSwapReg(uint32_t group, uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept { _curAssignment.swap(group, aWorkId, aPhysId, bWorkId, bPhysId); - return _pass->onEmitSwap(aWorkId, aPhysId, bWorkId, bPhysId); + return _pass->emitSwap(aWorkId, aPhysId, bWorkId, bPhysId); } //! Emits a load from [VirtReg/WorkReg]'s spill slot to a physical register //! and makes it assigned and clean. inline Error onLoadReg(uint32_t group, uint32_t workId, uint32_t physId) noexcept { _curAssignment.assign(group, workId, physId, RAAssignment::kClean); - return _pass->onEmitLoad(workId, physId); + return _pass->emitLoad(workId, physId); } //! Emits a save a physical register to a [VirtReg/WorkReg]'s spill slot, @@ -243,7 +244,7 @@ class RALocalAllocator { ASMJIT_ASSERT(_curAssignment.physToWorkId(group, physId) == workId); _curAssignment.makeClean(group, workId, physId); - return _pass->onEmitSave(workId, physId); + return _pass->emitSave(workId, physId); } //! Assigns a register, the content of it is undefined at this point. diff --git a/libs/asmjit/src/asmjit/core/rapass.cpp b/libs/asmjit/src/asmjit/core/rapass.cpp index 1174635..270027a 100644 --- a/libs/asmjit/src/asmjit/core/rapass.cpp +++ b/libs/asmjit/src/asmjit/core/rapass.cpp @@ -24,6 +24,7 @@ #include "../core/api-build_p.h" #ifndef ASMJIT_NO_COMPILER +#include "../core/formatter.h" #include "../core/ralocal_p.h" #include "../core/rapass_p.h" #include "../core/support.h" @@ -69,50 +70,17 @@ Error RABlock::prependSuccessor(RABlock* successor) noexcept { } // ============================================================================ -// [asmjit::RAPass - Construction / Destruction] +// [asmjit::BaseRAPass - Construction / Destruction] // ============================================================================ -RAPass::RAPass() noexcept - : FuncPass("RAPass"), - _allocator(), - _logger(nullptr), - _debugLogger(nullptr), - _loggerFlags(0), - _func(nullptr), - _stop(nullptr), - _extraBlock(nullptr), - _blocks(), - _exits(), - _pov(), - _instructionCount(0), - _createdBlockCount(0), - _sharedAssignments(), - _lastTimestamp(0), - _archRegsInfo(nullptr), - _archTraits(), - _physRegIndex(), - _physRegCount(), - _physRegTotal(0), - _scratchRegIndexes{}, - _availableRegs(), - _availableRegCount(), - _clobberedRegs(), - _globalMaxLiveCount(), - _globalLiveSpans {}, - _temporaryMem(), - _sp(), - _fp(), - _stackAllocator(), - _argsAssignment(), - _numStackArgsToStackSlots(0), - _maxWorkRegNameSize(0) {} -RAPass::~RAPass() noexcept {} +BaseRAPass::BaseRAPass() noexcept : FuncPass("BaseRAPass") {} +BaseRAPass::~BaseRAPass() noexcept {} // ============================================================================ -// [asmjit::RAPass - RunOnFunction] +// [asmjit::BaseRAPass - RunOnFunction] // ============================================================================ -static void RAPass_reset(RAPass* self, FuncDetail* funcDetail) noexcept { +static void RAPass_reset(BaseRAPass* self, FuncDetail* funcDetail) noexcept { ZoneAllocator* allocator = self->allocator(); self->_blocks.reset(); @@ -125,8 +93,7 @@ static void RAPass_reset(RAPass* self, FuncDetail* funcDetail) noexcept { self->_sharedAssignments.reset(); self->_lastTimestamp = 0; - self->_archRegsInfo = nullptr; - self->_archTraits.reset(); + self->_archTraits = nullptr; self->_physRegIndex.reset(); self->_physRegCount.reset(); self->_physRegTotal = 0; @@ -153,7 +120,7 @@ static void RAPass_reset(RAPass* self, FuncDetail* funcDetail) noexcept { self->_maxWorkRegNameSize = 0; } -static void RAPass_resetVirtRegData(RAPass* self) noexcept { +static void RAPass_resetVirtRegData(BaseRAPass* self) noexcept { // Zero everything so it cannot be used by accident. for (RAWorkReg* wReg : self->_workRegs) { VirtReg* vReg = wReg->virtReg(); @@ -161,7 +128,7 @@ static void RAPass_resetVirtRegData(RAPass* self) noexcept { } } -Error RAPass::runOnFunction(Zone* zone, Logger* logger, FuncNode* func) noexcept { +Error BaseRAPass::runOnFunction(Zone* zone, Logger* logger, FuncNode* func) { _allocator.reset(zone); #ifndef ASMJIT_NO_LOGGING @@ -194,7 +161,6 @@ Error RAPass::runOnFunction(Zone* zone, Logger* logger, FuncNode* func) noexcept // Must be called regardless of the allocation status. onDone(); - // TODO: I don't like this... // Reset possible connections introduced by the register allocator. RAPass_resetVirtRegData(this); @@ -223,7 +189,7 @@ Error RAPass::runOnFunction(Zone* zone, Logger* logger, FuncNode* func) noexcept return err; } -Error RAPass::onPerformAllSteps() noexcept { +Error BaseRAPass::onPerformAllSteps() noexcept { ASMJIT_PROPAGATE(buildCFG()); ASMJIT_PROPAGATE(buildViews()); ASMJIT_PROPAGATE(removeUnreachableBlocks()); @@ -249,10 +215,10 @@ Error RAPass::onPerformAllSteps() noexcept { } // ============================================================================ -// [asmjit::RAPass - CFG - Basic Block Management] +// [asmjit::BaseRAPass - CFG - Basic Block Management] // ============================================================================ -RABlock* RAPass::newBlock(BaseNode* initialNode) noexcept { +RABlock* BaseRAPass::newBlock(BaseNode* initialNode) noexcept { RABlock* block = zone()->newT(this); if (ASMJIT_UNLIKELY(!block)) return nullptr; @@ -264,7 +230,7 @@ RABlock* RAPass::newBlock(BaseNode* initialNode) noexcept { return block; } -RABlock* RAPass::newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt) noexcept { +RABlock* BaseRAPass::newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt) noexcept { if (cbLabel->hasPassData()) return cbLabel->passData(); @@ -351,7 +317,7 @@ RABlock* RAPass::newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt) return block; } -Error RAPass::addBlock(RABlock* block) noexcept { +Error BaseRAPass::addBlock(RABlock* block) noexcept { ASMJIT_PROPAGATE(_blocks.willGrow(allocator())); block->_blockId = blockCount(); @@ -360,10 +326,10 @@ Error RAPass::addBlock(RABlock* block) noexcept { } // ============================================================================ -// [asmjit::RAPass - CFG - Build] +// [asmjit::BaseRAPass - CFG - Build] // ============================================================================ -Error RAPass::initSharedAssignments(const ZoneVector& sharedAssignmentsMap) noexcept { +Error BaseRAPass::initSharedAssignments(const ZoneVector& sharedAssignmentsMap) noexcept { if (sharedAssignmentsMap.empty()) return kErrorOk; @@ -382,9 +348,20 @@ Error RAPass::initSharedAssignments(const ZoneVector& sharedAssignment // the assignment itself. It will then be used instead of RABlock's own scratch // regs mask, as shared assignments have precedence. for (RABlock* block : _blocks) { + if (block->hasJumpTable()) { + const RABlocks& successors = block->successors(); + if (!successors.empty()) { + RABlock* firstSuccessor = successors[0]; + // NOTE: Shared assignments connect all possible successors so we only + // need the first to propagate exit scratch gp registers. + ASMJIT_ASSERT(firstSuccessor->hasSharedAssignmentId()); + RASharedAssignment& sa = _sharedAssignments[firstSuccessor->sharedAssignmentId()]; + sa.addEntryScratchGpRegs(block->exitScratchGpRegs()); + } + } if (block->hasSharedAssignmentId()) { RASharedAssignment& sa = _sharedAssignments[block->sharedAssignmentId()]; - sa.addScratchGpRegs(block->_entryScratchGpRegs); + sa.addEntryScratchGpRegs(block->_entryScratchGpRegs); } } @@ -392,7 +369,7 @@ Error RAPass::initSharedAssignments(const ZoneVector& sharedAssignment } // ============================================================================ -// [asmjit::RAPass - CFG - Views Order] +// [asmjit::BaseRAPass - CFG - Views Order] // ============================================================================ class RABlockVisitItem { @@ -414,7 +391,7 @@ class RABlockVisitItem { uint32_t _index; }; -Error RAPass::buildViews() noexcept { +Error BaseRAPass::buildViews() noexcept { #ifndef ASMJIT_NO_LOGGING Logger* logger = debugLogger(); ASMJIT_RA_LOG_FORMAT("[RAPass::BuildViews]\n"); @@ -472,7 +449,7 @@ Error RAPass::buildViews() noexcept { if (block->hasSuccessors()) { sb.appendFormat(" #%u -> {", block->blockId()); _dumpBlockIds(sb, block->successors()); - sb.appendString("}\n"); + sb.append("}\n"); } else { sb.appendFormat(" #%u -> {Exit}\n", block->blockId()); @@ -486,7 +463,7 @@ Error RAPass::buildViews() noexcept { } // ============================================================================ -// [asmjit::RAPass - CFG - Dominators] +// [asmjit::BaseRAPass - CFG - Dominators] // ============================================================================ static ASMJIT_INLINE RABlock* intersectBlocks(RABlock* b1, RABlock* b2) noexcept { @@ -498,7 +475,7 @@ static ASMJIT_INLINE RABlock* intersectBlocks(RABlock* b1, RABlock* b2) noexcept } // Based on "A Simple, Fast Dominance Algorithm". -Error RAPass::buildDominators() noexcept { +Error BaseRAPass::buildDominators() noexcept { #ifndef ASMJIT_NO_LOGGING Logger* logger = debugLogger(); ASMJIT_RA_LOG_FORMAT("[RAPass::BuildDominators]\n"); @@ -545,7 +522,7 @@ Error RAPass::buildDominators() noexcept { return kErrorOk; } -bool RAPass::_strictlyDominates(const RABlock* a, const RABlock* b) const noexcept { +bool BaseRAPass::_strictlyDominates(const RABlock* a, const RABlock* b) const noexcept { ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks. ASMJIT_ASSERT(a != b); // Checked by `dominates()` and `strictlyDominates()`. @@ -562,7 +539,7 @@ bool RAPass::_strictlyDominates(const RABlock* a, const RABlock* b) const noexce return iDom != entryBlock; } -const RABlock* RAPass::_nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept { +const RABlock* BaseRAPass::_nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept { ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks. ASMJIT_ASSERT(a != b); // Checked by `dominates()` and `properlyDominates()`. @@ -600,10 +577,10 @@ const RABlock* RAPass::_nearestCommonDominator(const RABlock* a, const RABlock* } // ============================================================================ -// [asmjit::RAPass - CFG - Utilities] +// [asmjit::BaseRAPass - CFG - Utilities] // ============================================================================ -Error RAPass::removeUnreachableBlocks() noexcept { +Error BaseRAPass::removeUnreachableBlocks() noexcept { uint32_t numAllBlocks = blockCount(); uint32_t numReachableBlocks = reachableBlockCount(); @@ -650,13 +627,13 @@ Error RAPass::removeUnreachableBlocks() noexcept { return kErrorOk; } -BaseNode* RAPass::findSuccessorStartingAt(BaseNode* node) noexcept { +BaseNode* BaseRAPass::findSuccessorStartingAt(BaseNode* node) noexcept { while (node && (node->isInformative() || node->hasNoEffect())) node = node->next(); return node; } -bool RAPass::isNextTo(BaseNode* node, BaseNode* target) noexcept { +bool BaseRAPass::isNextTo(BaseNode* node, BaseNode* target) noexcept { for (;;) { node = node->next(); if (node == target) @@ -671,10 +648,10 @@ bool RAPass::isNextTo(BaseNode* node, BaseNode* target) noexcept { } // ============================================================================ -// [asmjit::RAPass - ?] +// [asmjit::BaseRAPass - ?] // ============================================================================ -Error RAPass::_asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept { +Error BaseRAPass::_asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept { // Checked by `asWorkReg()` - must be true. ASMJIT_ASSERT(vReg->_workReg == nullptr); @@ -704,7 +681,7 @@ Error RAPass::_asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept { return kErrorOk; } -RAAssignment::WorkToPhysMap* RAPass::newWorkToPhysMap() noexcept { +RAAssignment::WorkToPhysMap* BaseRAPass::newWorkToPhysMap() noexcept { uint32_t count = workRegCount(); size_t size = WorkToPhysMap::sizeOf(count); @@ -723,7 +700,7 @@ RAAssignment::WorkToPhysMap* RAPass::newWorkToPhysMap() noexcept { return map; } -RAAssignment::PhysToWorkMap* RAPass::newPhysToWorkMap() noexcept { +RAAssignment::PhysToWorkMap* BaseRAPass::newPhysToWorkMap() noexcept { uint32_t count = physRegTotal(); size_t size = PhysToWorkMap::sizeOf(count); @@ -736,7 +713,7 @@ RAAssignment::PhysToWorkMap* RAPass::newPhysToWorkMap() noexcept { } // ============================================================================ -// [asmjit::RAPass - Registers - Liveness Analysis and Statistics] +// [asmjit::BaseRAPass - Registers - Liveness Analysis and Statistics] // ============================================================================ namespace LiveOps { @@ -812,7 +789,7 @@ namespace LiveOps { } } -ASMJIT_FAVOR_SPEED Error RAPass::buildLiveness() noexcept { +ASMJIT_FAVOR_SPEED Error BaseRAPass::buildLiveness() noexcept { #ifndef ASMJIT_NO_LOGGING Logger* logger = debugLogger(); StringTmp<512> sb; @@ -1095,36 +1072,41 @@ ASMJIT_FAVOR_SPEED Error RAPass::buildLiveness() noexcept { return kErrorOk; } -Error RAPass::assignArgIndexToWorkRegs() noexcept { +Error BaseRAPass::assignArgIndexToWorkRegs() noexcept { ZoneBitVector& liveIn = entryBlock()->liveIn(); uint32_t argCount = func()->argCount(); - for (uint32_t i = 0; i < argCount; i++) { - // Unassigned argument. - VirtReg* virtReg = func()->arg(i); - if (!virtReg) continue; + for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + // Unassigned argument. + VirtReg* virtReg = func()->argPack(argIndex)[valueIndex]; + if (!virtReg) + continue; - // Unreferenced argument. - RAWorkReg* workReg = virtReg->workReg(); - if (!workReg) continue; + // Unreferenced argument. + RAWorkReg* workReg = virtReg->workReg(); + if (!workReg) + continue; - // Overwritten argument. - uint32_t workId = workReg->workId(); - if (!liveIn.bitAt(workId)) - continue; + // Overwritten argument. + uint32_t workId = workReg->workId(); + if (!liveIn.bitAt(workId)) + continue; - workReg->setArgIndex(i); + workReg->setArgIndex(argIndex, valueIndex); + const FuncValue& arg = func()->detail().arg(argIndex, valueIndex); - const FuncValue& arg = func()->detail().arg(i); - if (arg.isReg() && _archRegsInfo->regInfo[arg.regType()].group() == workReg->group()) { - workReg->setHintRegId(arg.regId()); + if (arg.isReg() && _archTraits->regTypeToGroup(arg.regType()) == workReg->group()) { + workReg->setHintRegId(arg.regId()); + } } } return kErrorOk; } + // ============================================================================ -// [asmjit::RAPass - Allocation - Global] +// [asmjit::BaseRAPass - Allocation - Global] // ============================================================================ #ifndef ASMJIT_NO_LOGGING @@ -1133,15 +1115,15 @@ static void RAPass_dumpSpans(String& sb, uint32_t index, const LiveRegSpans& liv for (uint32_t i = 0; i < liveSpans.size(); i++) { const LiveRegSpan& liveSpan = liveSpans[i]; - if (i) sb.appendString(", "); + if (i) sb.append(", "); sb.appendFormat("[%u:%u@%u]", liveSpan.a, liveSpan.b, liveSpan.id); } - sb.appendChar('\n'); + sb.append('\n'); } #endif -Error RAPass::runGlobalAllocator() noexcept { +Error BaseRAPass::runGlobalAllocator() noexcept { ASMJIT_PROPAGATE(initGlobalLiveSpans()); for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) { @@ -1151,16 +1133,19 @@ Error RAPass::runGlobalAllocator() noexcept { return kErrorOk; } -ASMJIT_FAVOR_SPEED Error RAPass::initGlobalLiveSpans() noexcept { +ASMJIT_FAVOR_SPEED Error BaseRAPass::initGlobalLiveSpans() noexcept { for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) { size_t physCount = _physRegCount[group]; - LiveRegSpans* liveSpans = allocator()->allocT(physCount * sizeof(LiveRegSpans)); + LiveRegSpans* liveSpans = nullptr; - if (ASMJIT_UNLIKELY(!liveSpans)) - return DebugUtils::errored(kErrorOutOfMemory); + if (physCount) { + liveSpans = allocator()->allocT(physCount * sizeof(LiveRegSpans)); + if (ASMJIT_UNLIKELY(!liveSpans)) + return DebugUtils::errored(kErrorOutOfMemory); - for (size_t physId = 0; physId < physCount; physId++) - new(&liveSpans[physId]) LiveRegSpans(); + for (size_t physId = 0; physId < physCount; physId++) + new(&liveSpans[physId]) LiveRegSpans(); + } _globalLiveSpans[group] = liveSpans; } @@ -1168,7 +1153,7 @@ ASMJIT_FAVOR_SPEED Error RAPass::initGlobalLiveSpans() noexcept { return kErrorOk; } -ASMJIT_FAVOR_SPEED Error RAPass::binPack(uint32_t group) noexcept { +ASMJIT_FAVOR_SPEED Error BaseRAPass::binPack(uint32_t group) noexcept { if (workRegCount(group) == 0) return kErrorOk; @@ -1296,10 +1281,10 @@ ASMJIT_FAVOR_SPEED Error RAPass::binPack(uint32_t group) noexcept { sb.appendFormat(" Unassigned (%u): ", count); for (i = 0; i < numWorkRegs; i++) { RAWorkReg* workReg = workRegs[i]; - if (i) sb.appendString(", "); - sb.appendString(workReg->name()); + if (i) sb.append(", "); + sb.append(workReg->name()); } - sb.appendChar('\n'); + sb.append('\n'); logger->log(sb); }); } @@ -1308,10 +1293,10 @@ ASMJIT_FAVOR_SPEED Error RAPass::binPack(uint32_t group) noexcept { } // ============================================================================ -// [asmjit::RAPass - Allocation - Local] +// [asmjit::BaseRAPass - Allocation - Local] // ============================================================================ -Error RAPass::runLocalAllocator() noexcept { +Error BaseRAPass::runLocalAllocator() noexcept { RALocalAllocator lra(this); ASMJIT_PROPAGATE(lra.init()); @@ -1380,8 +1365,8 @@ Error RAPass::runLocalAllocator() noexcept { } ASMJIT_PROPAGATE(lra.allocInst(inst)); - if (inst->type() == BaseNode::kNodeFuncCall) - ASMJIT_PROPAGATE(onEmitPreCall(inst->as())); + if (inst->type() == BaseNode::kNodeInvoke) + ASMJIT_PROPAGATE(emitPreCall(inst->as())); else ASMJIT_PROPAGATE(lra.spillAfterAllocation(inst)); } @@ -1443,7 +1428,7 @@ Error RAPass::runLocalAllocator() noexcept { return kErrorOk; } -Error RAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept { +Error BaseRAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept { if (block->hasSharedAssignmentId()) { uint32_t sharedAssignmentId = block->sharedAssignmentId(); @@ -1499,7 +1484,7 @@ Error RAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, return blockEntryAssigned(as); } -Error RAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept { +Error BaseRAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignment& fromAssignment) noexcept { ASMJIT_ASSERT(_sharedAssignments[sharedAssignmentId].empty()); PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap()); @@ -1567,7 +1552,7 @@ Error RAPass::setSharedAssignment(uint32_t sharedAssignmentId, const RAAssignmen return blockEntryAssigned(as); } -Error RAPass::blockEntryAssigned(const RAAssignment& as) noexcept { +Error BaseRAPass::blockEntryAssigned(const RAAssignment& as) noexcept { // Complex allocation strategy requires to record register assignments upon // block entry (or per shared state). for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) { @@ -1588,14 +1573,14 @@ Error RAPass::blockEntryAssigned(const RAAssignment& as) noexcept { } // ============================================================================ -// [asmjit::RAPass - Allocation - Utilities] +// [asmjit::BaseRAPass - Allocation - Utilities] // ============================================================================ -Error RAPass::useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept { +Error BaseRAPass::useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept { ASMJIT_ASSERT(alignment <= 64); if (_temporaryMem.isNone()) { - ASMJIT_PROPAGATE(cc()->_newStack(_temporaryMem.as(), size, alignment)); + ASMJIT_PROPAGATE(cc()->_newStack(&_temporaryMem.as(), size, alignment)); } else { ASMJIT_ASSERT(_temporaryMem.as().isRegHome()); @@ -1612,10 +1597,10 @@ Error RAPass::useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) n } // ============================================================================ -// [asmjit::RAPass - Allocation - Prolog / Epilog] +// [asmjit::BaseRAPass - Allocation - Prolog / Epilog] // ============================================================================ -Error RAPass::updateStackFrame() noexcept { +Error BaseRAPass::updateStackFrame() noexcept { // Update some StackFrame information that we updated during allocation. The // only information we don't have at the moment is final local stack size, // which is calculated last. @@ -1653,7 +1638,7 @@ Error RAPass::updateStackFrame() noexcept { return kErrorOk; } -Error RAPass::_markStackArgsToKeep() noexcept { +Error BaseRAPass::_markStackArgsToKeep() noexcept { FuncFrame& frame = func()->frame(); bool hasSAReg = frame.hasPreservedFP() || !frame.hasDynamicAlignment(); @@ -1684,7 +1669,7 @@ Error RAPass::_markStackArgsToKeep() noexcept { // NOTE: Update StackOffset here so when `_argsAssignment.updateFuncFrame()` // is called it will take into consideration moving to stack slots. Without // this we may miss some scratch registers later. - FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex()); + FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex()); dstArg.assignStackOffset(0); } } @@ -1692,7 +1677,7 @@ Error RAPass::_markStackArgsToKeep() noexcept { return kErrorOk; } -Error RAPass::_updateStackArgs() noexcept { +Error BaseRAPass::_updateStackArgs() noexcept { FuncFrame& frame = func()->frame(); RAWorkRegs& workRegs = _workRegs; uint32_t numWorkRegs = workRegCount(); @@ -1717,7 +1702,7 @@ Error RAPass::_updateStackArgs() noexcept { } } else { - FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex()); + FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex(), workReg->argValueIndex()); dstArg.setStackOffset(slot->offset()); } } @@ -1726,12 +1711,12 @@ Error RAPass::_updateStackArgs() noexcept { return kErrorOk; } -Error RAPass::insertPrologEpilog() noexcept { +Error BaseRAPass::insertPrologEpilog() noexcept { FuncFrame& frame = _func->frame(); cc()->_setCursor(func()); ASMJIT_PROPAGATE(cc()->emitProlog(frame)); - ASMJIT_PROPAGATE(cc()->emitArgsAssignment(frame, _argsAssignment)); + ASMJIT_PROPAGATE(_iEmitHelper->emitArgsAssignment(frame, _argsAssignment)); cc()->_setCursor(func()->exitNode()); ASMJIT_PROPAGATE(cc()->emitEpilog(frame)); @@ -1740,10 +1725,10 @@ Error RAPass::insertPrologEpilog() noexcept { } // ============================================================================ -// [asmjit::RAPass - Rewriter] +// [asmjit::BaseRAPass - Rewriter] // ============================================================================ -Error RAPass::rewrite() noexcept { +Error BaseRAPass::rewrite() noexcept { #ifndef ASMJIT_NO_LOGGING Logger* logger = debugLogger(); ASMJIT_RA_LOG_FORMAT("[RAPass::Rewrite]\n"); @@ -1752,7 +1737,7 @@ Error RAPass::rewrite() noexcept { return _rewrite(_func, _stop); } -ASMJIT_FAVOR_SPEED Error RAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept { +ASMJIT_FAVOR_SPEED Error BaseRAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept { uint32_t virtCount = cc()->_vRegArray.size(); BaseNode* node = first; @@ -1799,7 +1784,7 @@ ASMJIT_FAVOR_SPEED Error RAPass::_rewrite(BaseNode* first, BaseNode* stop) noexc RABlock* block = raInst->block(); if (!isNextTo(node, _func->exitNode())) { cc()->_setCursor(node->prev()); - ASMJIT_PROPAGATE(onEmitJump(_func->exitNode()->label())); + ASMJIT_PROPAGATE(emitJump(_func->exitNode()->label())); } BaseNode* prev = node->prev(); @@ -1841,42 +1826,46 @@ ASMJIT_FAVOR_SPEED Error RAPass::_rewrite(BaseNode* first, BaseNode* stop) noexc } // ============================================================================ -// [asmjit::RAPass - Logging] +// [asmjit::BaseRAPass - Logging] // ============================================================================ #ifndef ASMJIT_NO_LOGGING -static void RAPass_dumpRAInst(RAPass* pass, String& sb, const RAInst* raInst) noexcept { +static void RAPass_dumpRAInst(BaseRAPass* pass, String& sb, const RAInst* raInst) noexcept { const RATiedReg* tiedRegs = raInst->tiedRegs(); uint32_t tiedCount = raInst->tiedCount(); for (uint32_t i = 0; i < tiedCount; i++) { const RATiedReg& tiedReg = tiedRegs[i]; - if (i != 0) sb.appendChar(' '); + if (i != 0) + sb.append(' '); sb.appendFormat("%s{", pass->workRegById(tiedReg.workId())->name()); - sb.appendChar(tiedReg.isReadWrite() ? 'X' : - tiedReg.isRead() ? 'R' : - tiedReg.isWrite() ? 'W' : '?'); + sb.append(tiedReg.isReadWrite() ? 'X' : + tiedReg.isRead() ? 'R' : + tiedReg.isWrite() ? 'W' : '?'); if (tiedReg.hasUseId()) sb.appendFormat("|Use=%u", tiedReg.useId()); else if (tiedReg.isUse()) - sb.appendString("|Use"); + sb.append("|Use"); if (tiedReg.hasOutId()) sb.appendFormat("|Out=%u", tiedReg.outId()); else if (tiedReg.isOut()) - sb.appendString("|Out"); + sb.append("|Out"); + + if (tiedReg.isLast()) + sb.append("|Last"); - if (tiedReg.isLast()) sb.appendString("|Last"); - if (tiedReg.isKill()) sb.appendString("|Kill"); + if (tiedReg.isKill()) + sb.append("|Kill"); - sb.appendString("}"); + sb.append("}"); } } -ASMJIT_FAVOR_SIZE Error RAPass::annotateCode() noexcept { +ASMJIT_FAVOR_SIZE Error BaseRAPass::annotateCode() noexcept { uint32_t loggerFlags = _loggerFlags; StringTmp<1024> sb; @@ -1887,13 +1876,13 @@ ASMJIT_FAVOR_SIZE Error RAPass::annotateCode() noexcept { BaseNode* last = block->last(); for (;;) { sb.clear(); - Logging::formatNode(sb, loggerFlags, cc(), node); + Formatter::formatNode(sb, loggerFlags, cc(), node); if ((loggerFlags & FormatOptions::kFlagDebugRA) != 0 && node->isInst() && node->hasPassData()) { const RAInst* raInst = node->passData(); if (raInst->tiedCount() > 0) { sb.padEnd(40); - sb.appendString(" | "); + sb.append(" | "); RAPass_dumpRAInst(this, sb, raInst); } } @@ -1911,7 +1900,7 @@ ASMJIT_FAVOR_SIZE Error RAPass::annotateCode() noexcept { return kErrorOk; } -ASMJIT_FAVOR_SIZE Error RAPass::_dumpBlockIds(String& sb, const RABlocks& blocks) noexcept { +ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockIds(String& sb, const RABlocks& blocks) noexcept { for (uint32_t i = 0, size = blocks.size(); i < size; i++) { const RABlock* block = blocks[i]; if (i != 0) @@ -1922,7 +1911,7 @@ ASMJIT_FAVOR_SIZE Error RAPass::_dumpBlockIds(String& sb, const RABlocks& blocks return kErrorOk; } -ASMJIT_FAVOR_SIZE Error RAPass::_dumpBlockLiveness(String& sb, const RABlock* block) noexcept { +ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpBlockLiveness(String& sb, const RABlock* block) noexcept { for (uint32_t liveType = 0; liveType < RABlock::kLiveCount; liveType++) { const char* bitsName = liveType == RABlock::kLiveIn ? "IN " : liveType == RABlock::kLiveOut ? "OUT " : @@ -1940,31 +1929,31 @@ ASMJIT_FAVOR_SIZE Error RAPass::_dumpBlockLiveness(String& sb, const RABlock* bl if (!n) sb.appendFormat(" %s [", bitsName); else - sb.appendString(", "); + sb.append(", "); - sb.appendString(wReg->name()); + sb.append(wReg->name()); n++; } } if (n) - sb.appendString("]\n"); + sb.append("]\n"); } return kErrorOk; } -ASMJIT_FAVOR_SIZE Error RAPass::_dumpLiveSpans(String& sb) noexcept { +ASMJIT_FAVOR_SIZE Error BaseRAPass::_dumpLiveSpans(String& sb) noexcept { uint32_t numWorkRegs = _workRegs.size(); uint32_t maxSize = _maxWorkRegNameSize; for (uint32_t workId = 0; workId < numWorkRegs; workId++) { RAWorkReg* workReg = _workRegs[workId]; - sb.appendString(" "); + sb.append(" "); size_t oldSize = sb.size(); - sb.appendString(workReg->name()); + sb.append(workReg->name()); sb.padEnd(oldSize + maxSize); RALiveStats& stats = workReg->liveStats(); @@ -1973,16 +1962,17 @@ ASMJIT_FAVOR_SIZE Error RAPass::_dumpLiveSpans(String& sb) noexcept { stats.width(), stats.freq(), stats.priority()); - sb.appendString(": "); + sb.append(": "); LiveRegSpans& liveSpans = workReg->liveSpans(); for (uint32_t x = 0; x < liveSpans.size(); x++) { const LiveRegSpan& liveSpan = liveSpans[x]; - if (x) sb.appendString(", "); + if (x) + sb.append(", "); sb.appendFormat("[%u:%u]", liveSpan.a, liveSpan.b); } - sb.appendChar('\n'); + sb.append('\n'); } return kErrorOk; diff --git a/libs/asmjit/src/asmjit/core/rapass_p.h b/libs/asmjit/src/asmjit/core/rapass_p.h index 5a575ad..d2fe505 100644 --- a/libs/asmjit/src/asmjit/core/rapass_p.h +++ b/libs/asmjit/src/asmjit/core/rapass_p.h @@ -27,6 +27,8 @@ #include "../core/api-config.h" #ifndef ASMJIT_NO_COMPILER +#include "../core/compiler.h" +#include "../core/emithelper_p.h" #include "../core/raassignment_p.h" #include "../core/radefs_p.h" #include "../core/rastack_p.h" @@ -42,6 +44,7 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::RABlock] // ============================================================================ +//! Basic block used by register allocator pass. class RABlock { public: ASMJIT_NONCOPYABLE(RABlock) @@ -53,123 +56,105 @@ class RABlock { kUnassignedId = 0xFFFFFFFFu }; + //! Basic block flags. enum Flags : uint32_t { //! Block has been constructed from nodes. kFlagIsConstructed = 0x00000001u, //! Block is reachable (set by `buildViews()`). kFlagIsReachable = 0x00000002u, + //! Block is a target (has an associated label or multiple labels). + kFlagIsTargetable = 0x00000004u, //! Block has been allocated. - kFlagIsAllocated = 0x00000004u, + kFlagIsAllocated = 0x00000008u, //! Block is a function-exit. - kFlagIsFuncExit = 0x00000008u, + kFlagIsFuncExit = 0x00000010u, //! Block has a terminator (jump, conditional jump, ret). - kFlagHasTerminator = 0x00000010u, + kFlagHasTerminator = 0x00000100u, //! Block naturally flows to the next block. - kFlagHasConsecutive = 0x00000020u, + kFlagHasConsecutive = 0x00000200u, + //! Block has a jump to a jump-table at the end. + kFlagHasJumpTable = 0x00000400u, //! Block contains fixed registers (precolored). - kFlagHasFixedRegs = 0x00000040u, + kFlagHasFixedRegs = 0x00000800u, //! Block contains function calls. - kFlagHasFuncCalls = 0x00000080u + kFlagHasFuncCalls = 0x00001000u }; //! Register allocator pass. - RAPass* _ra; + BaseRAPass* _ra; //! Block id (indexed from zero). - uint32_t _blockId; + uint32_t _blockId = kUnassignedId; //! Block flags, see `Flags`. - uint32_t _flags; + uint32_t _flags = 0; //! First `BaseNode` of this block (inclusive). - BaseNode* _first; + BaseNode* _first = nullptr; //! Last `BaseNode` of this block (inclusive). - BaseNode* _last; + BaseNode* _last = nullptr; //! Initial position of this block (inclusive). - uint32_t _firstPosition; + uint32_t _firstPosition = 0; //! End position of this block (exclusive). - uint32_t _endPosition; + uint32_t _endPosition = 0; //! Weight of this block (default 0, each loop adds one). - uint32_t _weight; + uint32_t _weight = 0; //! Post-order view order, used during POV construction. - uint32_t _povOrder; + uint32_t _povOrder = 0; //! Basic statistics about registers. - RARegsStats _regsStats; + RARegsStats _regsStats = RARegsStats(); //! Maximum live-count per register group. - RALiveCount _maxLiveCount; + RALiveCount _maxLiveCount = RALiveCount(); //! Timestamp (used by block visitors). - mutable uint64_t _timestamp; + mutable uint64_t _timestamp = 0; //! Immediate dominator of this block. - RABlock* _idom; + RABlock* _idom = nullptr; //! Block predecessors. - RABlocks _predecessors; + RABlocks _predecessors {}; //! Block successors. - RABlocks _successors; - - // TODO: Used? - RABlocks _doms; + RABlocks _successors {}; enum LiveType : uint32_t { - kLiveIn = 0, - kLiveOut = 1, - kLiveGen = 2, - kLiveKill = 3, - kLiveCount = 4 + kLiveIn = 0, + kLiveOut = 1, + kLiveGen = 2, + kLiveKill = 3, + kLiveCount = 4 }; //! Liveness in/out/use/kill. - ZoneBitVector _liveBits[kLiveCount]; + ZoneBitVector _liveBits[kLiveCount] {}; //! Shared assignment it or `Globals::kInvalidId` if this block doesn't //! have shared assignment. See `RASharedAssignment` for more details. - uint32_t _sharedAssignmentId; + uint32_t _sharedAssignmentId = Globals::kInvalidId; //! Scratch registers that cannot be allocated upon block entry. - uint32_t _entryScratchGpRegs; + uint32_t _entryScratchGpRegs = 0; //! Scratch registers used at exit, by a terminator instruction. - uint32_t _exitScratchGpRegs; + uint32_t _exitScratchGpRegs = 0; //! Register assignment (PhysToWork) on entry. - PhysToWorkMap* _entryPhysToWorkMap; + PhysToWorkMap* _entryPhysToWorkMap = nullptr; //! Register assignment (WorkToPhys) on entry. - WorkToPhysMap* _entryWorkToPhysMap; + WorkToPhysMap* _entryWorkToPhysMap = nullptr; //! \name Construction & Destruction //! \{ - inline RABlock(RAPass* ra) noexcept - : _ra(ra), - _blockId(kUnassignedId), - _flags(0), - _first(nullptr), - _last(nullptr), - _firstPosition(0), - _endPosition(0), - _weight(0), - _povOrder(kUnassignedId), - _regsStats(), - _maxLiveCount(), - _timestamp(0), - _idom(nullptr), - _predecessors(), - _successors(), - _doms(), - _sharedAssignmentId(Globals::kInvalidId), - _entryScratchGpRegs(0), - _exitScratchGpRegs(0), - _entryPhysToWorkMap(nullptr), - _entryWorkToPhysMap(nullptr) {} + inline RABlock(BaseRAPass* ra) noexcept + : _ra(ra) {} //! \} //! \name Accessors //! \{ - inline RAPass* pass() const noexcept { return _ra; } + inline BaseRAPass* pass() const noexcept { return _ra; } inline ZoneAllocator* allocator() const noexcept; inline uint32_t blockId() const noexcept { return _blockId; } @@ -182,6 +167,7 @@ class RABlock { inline bool isConstructed() const noexcept { return hasFlag(kFlagIsConstructed); } inline bool isReachable() const noexcept { return hasFlag(kFlagIsReachable); } + inline bool isTargetable() const noexcept { return hasFlag(kFlagIsTargetable); } inline bool isAllocated() const noexcept { return hasFlag(kFlagIsAllocated); } inline bool isFuncExit() const noexcept { return hasFlag(kFlagIsFuncExit); } @@ -191,12 +177,14 @@ class RABlock { } inline void makeReachable() noexcept { _flags |= kFlagIsReachable; } + inline void makeTargetable() noexcept { _flags |= kFlagIsTargetable; } inline void makeAllocated() noexcept { _flags |= kFlagIsAllocated; } inline const RARegsStats& regsStats() const noexcept { return _regsStats; } inline bool hasTerminator() const noexcept { return hasFlag(kFlagHasTerminator); } inline bool hasConsecutive() const noexcept { return hasFlag(kFlagHasConsecutive); } + inline bool hasJumpTable() const noexcept { return hasFlag(kFlagHasJumpTable); } inline bool hasPredecessors() const noexcept { return !_predecessors.empty(); } inline bool hasSuccessors() const noexcept { return !_successors.empty(); } @@ -221,6 +209,7 @@ class RABlock { inline uint32_t entryScratchGpRegs() const noexcept; inline uint32_t exitScratchGpRegs() const noexcept { return _exitScratchGpRegs; } + inline void addEntryScratchGpRegs(uint32_t regMask) noexcept { _entryScratchGpRegs |= regMask; } inline void addExitScratchGpRegs(uint32_t regMask) noexcept { _exitScratchGpRegs |= regMask; } inline bool hasSharedAssignmentId() const noexcept { return _sharedAssignmentId != Globals::kInvalidId; } @@ -470,7 +459,7 @@ class RAInstBuilder { //! \name Utilities //! \{ - ASMJIT_INLINE Error add(RAWorkReg* workReg, uint32_t flags, uint32_t allocable, uint32_t useId, uint32_t useRewriteMask, uint32_t outId, uint32_t outRewriteMask, uint32_t rmSize = 0) noexcept { + Error add(RAWorkReg* workReg, uint32_t flags, uint32_t allocable, uint32_t useId, uint32_t useRewriteMask, uint32_t outId, uint32_t outRewriteMask, uint32_t rmSize = 0) noexcept { uint32_t group = workReg->group(); RATiedReg* tiedReg = workReg->tiedReg(); @@ -510,7 +499,6 @@ class RAInstBuilder { if (ASMJIT_UNLIKELY(tiedReg->hasOutId())) return DebugUtils::errored(kErrorOverlappedRegs); tiedReg->setOutId(outId); - // TODO: ? _used[group] |= Support::bitMask(outId); } tiedReg->addRefCount(); @@ -523,7 +511,7 @@ class RAInstBuilder { } } - ASMJIT_INLINE Error addCallArg(RAWorkReg* workReg, uint32_t useId) noexcept { + Error addCallArg(RAWorkReg* workReg, uint32_t useId) noexcept { ASMJIT_ASSERT(useId != BaseReg::kIdBad); uint32_t flags = RATiedReg::kUse | RATiedReg::kRead | RATiedReg::kUseFixed; @@ -563,7 +551,7 @@ class RAInstBuilder { } } - ASMJIT_INLINE Error addCallRet(RAWorkReg* workReg, uint32_t outId) noexcept { + Error addCallRet(RAWorkReg* workReg, uint32_t outId) noexcept { ASMJIT_ASSERT(outId != BaseReg::kIdBad); uint32_t flags = RATiedReg::kOut | RATiedReg::kWrite | RATiedReg::kOutFixed; @@ -615,24 +603,19 @@ class RASharedAssignment { //! ISA limits (like jecx/loop instructions on x86) or because the registers //! are used by jump/branch instruction that uses registers to perform an //! indirect jump. - uint32_t _entryScratchGpRegs; + uint32_t _entryScratchGpRegs = 0; //! Union of all live-in registers. - ZoneBitVector _liveIn; + ZoneBitVector _liveIn {}; //! Register assignment (PhysToWork). - PhysToWorkMap* _physToWorkMap; + PhysToWorkMap* _physToWorkMap = nullptr; //! Register assignment (WorkToPhys). - WorkToPhysMap* _workToPhysMap; + WorkToPhysMap* _workToPhysMap = nullptr; - //! Provided for clarity, most likely never called as we initialize a vector - //! of shared assignments to zero. - inline RASharedAssignment() noexcept - : _entryScratchGpRegs(0), - _liveIn(), - _physToWorkMap(nullptr), - _workToPhysMap(nullptr) {} + //! Most likely never called as we initialize a vector of shared assignments to zero. + inline RASharedAssignment() noexcept {} inline uint32_t entryScratchGpRegs() const noexcept { return _entryScratchGpRegs; } - inline void addScratchGpRegs(uint32_t mask) noexcept { _entryScratchGpRegs |= mask; } + inline void addEntryScratchGpRegs(uint32_t mask) noexcept { _entryScratchGpRegs |= mask; } inline const ZoneBitVector& liveIn() const noexcept { return _liveIn; } @@ -650,13 +633,13 @@ class RASharedAssignment { }; // ============================================================================ -// [asmjit::RAPass] +// [asmjit::BaseRAPass] // ============================================================================ //! Register allocation pass used by `BaseCompiler`. -class RAPass : public FuncPass { +class BaseRAPass : public FuncPass { public: - ASMJIT_NONCOPYABLE(RAPass) + ASMJIT_NONCOPYABLE(BaseRAPass) typedef FuncPass Base; enum Weights : uint32_t { @@ -667,58 +650,59 @@ class RAPass : public FuncPass { typedef RAAssignment::WorkToPhysMap WorkToPhysMap; //! Allocator that uses zone passed to `runOnFunction()`. - ZoneAllocator _allocator; + ZoneAllocator _allocator {}; + //! Emit helper. + BaseEmitHelper* _iEmitHelper = nullptr; + //! Logger, disabled if null. - Logger* _logger; + Logger* _logger = nullptr; //! Debug logger, non-null only if `kOptionDebugPasses` option is set. - Logger* _debugLogger; + Logger* _debugLogger = nullptr; //! Logger flags. - uint32_t _loggerFlags; + uint32_t _loggerFlags = 0; //! Function being processed. - FuncNode* _func; + FuncNode* _func = nullptr; //! Stop node. - BaseNode* _stop; + BaseNode* _stop = nullptr; //! Node that is used to insert extra code after the function body. - BaseNode* _extraBlock; + BaseNode* _extraBlock = nullptr; //! Blocks (first block is the entry, always exists). - RABlocks _blocks; + RABlocks _blocks {}; //! Function exit blocks (usually one, but can contain more). - RABlocks _exits; + RABlocks _exits {}; //! Post order view (POV). - RABlocks _pov; + RABlocks _pov {}; //! Number of instruction nodes. - uint32_t _instructionCount; + uint32_t _instructionCount = 0; //! Number of created blocks (internal). - uint32_t _createdBlockCount; + uint32_t _createdBlockCount = 0; //! SharedState blocks. - ZoneVector _sharedAssignments; + ZoneVector _sharedAssignments {}; //! Timestamp generator (incremental). - mutable uint64_t _lastTimestamp; + mutable uint64_t _lastTimestamp = 0; - //!< Architecture registers information. - const ArchRegs* _archRegsInfo; //! Architecture traits. - RAArchTraits _archTraits; + const ArchTraits* _archTraits = nullptr; //! Index to physical registers in `RAAssignment::PhysToWorkMap`. - RARegIndex _physRegIndex; + RARegIndex _physRegIndex = RARegIndex(); //! Count of physical registers in `RAAssignment::PhysToWorkMap`. - RARegCount _physRegCount; + RARegCount _physRegCount = RARegCount(); //! Total number of physical registers. - uint32_t _physRegTotal; + uint32_t _physRegTotal = 0; //! Indexes of a possible scratch registers that can be selected if necessary. - uint8_t _scratchRegIndexes[2]; + uint8_t _scratchRegIndexes[2] {}; //! Registers available for allocation. - RARegMask _availableRegs; + RARegMask _availableRegs = RARegMask(); //! Count of physical registers per group. - RARegCount _availableRegCount; + RARegCount _availableRegCount = RARegCount(); //! Registers clobbered by the function. - RARegMask _clobberedRegs; + RARegMask _clobberedRegs = RARegMask(); //! Work registers (registers used by the function). RAWorkRegs _workRegs; @@ -728,47 +712,47 @@ class RAPass : public FuncPass { //! Register allocation strategy per register group. RAStrategy _strategy[BaseReg::kGroupVirt]; //! Global max live-count (from all blocks) per register group. - RALiveCount _globalMaxLiveCount; + RALiveCount _globalMaxLiveCount = RALiveCount(); //! Global live spans per register group. - LiveRegSpans* _globalLiveSpans[BaseReg::kGroupVirt]; + LiveRegSpans* _globalLiveSpans[BaseReg::kGroupVirt] {}; //! Temporary stack slot. - Operand _temporaryMem; + Operand _temporaryMem = Operand(); //! Stack pointer. - BaseReg _sp; + BaseReg _sp = BaseReg(); //! Frame pointer. - BaseReg _fp; + BaseReg _fp = BaseReg(); //! Stack manager. - RAStackAllocator _stackAllocator; + RAStackAllocator _stackAllocator {}; //! Function arguments assignment. - FuncArgsAssignment _argsAssignment; + FuncArgsAssignment _argsAssignment {}; //! Some StackArgs have to be assigned to StackSlots. - uint32_t _numStackArgsToStackSlots; + uint32_t _numStackArgsToStackSlots = 0; //! Maximum name-size computed from all WorkRegs. - uint32_t _maxWorkRegNameSize; + uint32_t _maxWorkRegNameSize = 0; //! Temporary string builder used to format comments. StringTmp<80> _tmpString; //! \name Construction & Reset //! \{ - RAPass() noexcept; - virtual ~RAPass() noexcept; + BaseRAPass() noexcept; + virtual ~BaseRAPass() noexcept; //! \} //! \name Accessors //! \{ - //! Returns `Logger` passed to `runOnFunction()`. + //! Returns \ref Logger passed to \ref runOnFunction(). inline Logger* logger() const noexcept { return _logger; } - //! Returns `Logger` passed to `runOnFunction()` or null if `kOptionDebugPasses` is not set. + //! Returns \ref Logger passed to \ref runOnFunction() or null if `kOptionDebugPasses` is not set. inline Logger* debugLogger() const noexcept { return _debugLogger; } - //! Returns `Zone` passed to `runOnFunction()`. + //! Returns \ref Zone passed to \ref runOnFunction(). inline Zone* zone() const noexcept { return _allocator.zone(); } - //! Returns `ZoneAllocator` used by the register allocator. + //! Returns \ref ZoneAllocator used by the register allocator. inline ZoneAllocator* allocator() const noexcept { return const_cast(&_allocator); } inline const ZoneVector& sharedAssignments() const { return _sharedAssignments; } @@ -800,7 +784,7 @@ class RAPass : public FuncPass { } //! Runs the register allocator for the given `func`. - Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) noexcept override; + Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) override; //! Performs all allocation steps sequentially, called by `runOnFunction()`. Error onPerformAllSteps() noexcept; @@ -810,11 +794,11 @@ class RAPass : public FuncPass { //! \name Events //! \{ - //! Called by `runOnFunction()` before the register allocation to initialize + //! Called by \ref runOnFunction() before the register allocation to initialize //! architecture-specific data and constraints. virtual void onInit() noexcept = 0; - //! Called by `runOnFunction()` after register allocation to clean everything + //! Called by \ref runOnFunction(` after register allocation to clean everything //! up. Called even if the register allocation failed. virtual void onDone() noexcept = 0; @@ -944,7 +928,7 @@ class RAPass : public FuncPass { //! information that is essential for further analysis and register //! allocation. //! - //! Use `RACFGBuilder` template that provides the necessary boilerplate. + //! Use `RACFGBuilderT` template that provides the necessary boilerplate. virtual Error buildCFG() noexcept = 0; //! Called after the CFG is built. @@ -1009,7 +993,7 @@ class RAPass : public FuncPass { //! \{ //! Returns a native size of the general-purpose register of the target architecture. - inline uint32_t gpSize() const noexcept { return _sp.size(); } + inline uint32_t registerSize() const noexcept { return _sp.size(); } inline uint32_t availableRegCount(uint32_t group) const noexcept { return _availableRegCount[group]; } inline RAWorkReg* workRegById(uint32_t workId) const noexcept { return _workRegs[workId]; } @@ -1049,9 +1033,11 @@ class RAPass : public FuncPass { inline RAStackSlot* getOrCreateStackSlot(RAWorkReg* workReg) noexcept { RAStackSlot* slot = workReg->stackSlot(); - if (slot) return slot; - slot = _stackAllocator.newSlot(_sp.id(), workReg->virtReg()->virtSize(), workReg->virtReg()->alignment(), 0); + if (slot) + return slot; + + slot = _stackAllocator.newSlot(_sp.id(), workReg->virtReg()->virtSize(), workReg->virtReg()->alignment(), RAStackSlot::kFlagRegHome); workReg->_stackSlot = slot; workReg->markStackUsed(); return slot; @@ -1128,7 +1114,7 @@ class RAPass : public FuncPass { //! \name Function Prolog & Epilog //! \{ - Error updateStackFrame() noexcept; + virtual Error updateStackFrame() noexcept; Error _markStackArgsToKeep() noexcept; Error _updateStackArgs() noexcept; Error insertPrologEpilog() noexcept; @@ -1159,14 +1145,14 @@ class RAPass : public FuncPass { //! \name Emit //! \{ - virtual Error onEmitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept = 0; - virtual Error onEmitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept = 0; + virtual Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept = 0; + virtual Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept = 0; - virtual Error onEmitLoad(uint32_t workId, uint32_t dstPhysId) noexcept = 0; - virtual Error onEmitSave(uint32_t workId, uint32_t srcPhysId) noexcept = 0; + virtual Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept = 0; + virtual Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept = 0; - virtual Error onEmitJump(const Label& label) noexcept = 0; - virtual Error onEmitPreCall(FuncCallNode* call) noexcept = 0; + virtual Error emitJump(const Label& label) noexcept = 0; + virtual Error emitPreCall(InvokeNode* invokeNode) noexcept = 0; //! \} }; diff --git a/libs/asmjit/src/asmjit/core/rastack.cpp b/libs/asmjit/src/asmjit/core/rastack.cpp index 342b7ce..b886279 100644 --- a/libs/asmjit/src/asmjit/core/rastack.cpp +++ b/libs/asmjit/src/asmjit/core/rastack.cpp @@ -43,11 +43,9 @@ RAStackSlot* RAStackAllocator::newSlot(uint32_t baseRegId, uint32_t size, uint32 slot->_baseRegId = uint8_t(baseRegId); slot->_alignment = uint8_t(Support::max(alignment, 1)); - slot->_reserved[0] = 0; - slot->_reserved[1] = 0; + slot->_flags = uint16_t(flags); slot->_useCount = 0; slot->_size = size; - slot->_flags = flags; slot->_weight = 0; slot->_offset = 0; @@ -92,7 +90,7 @@ Error RAStackAllocator::calculateStackFrame() noexcept { uint32_t alignment = slot->alignment(); ASMJIT_ASSERT(alignment > 0); - uint32_t power = Support::ctz(alignment); + uint32_t power = Support::min(Support::ctz(alignment), 6); uint64_t weight; if (slot->isRegHome()) @@ -128,7 +126,8 @@ Error RAStackAllocator::calculateStackFrame() noexcept { ZoneVector gaps[kSizeCount - 1]; for (RAStackSlot* slot : _slots) { - if (slot->isStackArg()) continue; + if (slot->isStackArg()) + continue; uint32_t slotAlignment = slot->alignment(); uint32_t alignedOffset = Support::alignUp(offset, slotAlignment); diff --git a/libs/asmjit/src/asmjit/core/rastack_p.h b/libs/asmjit/src/asmjit/core/rastack_p.h index d45f7aa..33d4e1d 100644 --- a/libs/asmjit/src/asmjit/core/rastack_p.h +++ b/libs/asmjit/src/asmjit/core/rastack_p.h @@ -41,10 +41,14 @@ ASMJIT_BEGIN_NAMESPACE //! Stack slot. struct RAStackSlot { + //! Stack slot flags. + //! + //! TODO: kFlagStackArg is not used by the current implementation, do we need to keep it? enum Flags : uint32_t { - // TODO: kFlagRegHome is apparently not used, but isRegHome() is. - kFlagRegHome = 0x00000001u, //!< Stack slot is register home slot. - kFlagStackArg = 0x00000002u //!< Stack slot position matches argument passed via stack. + //! Stack slot is register home slot. + kFlagRegHome = 0x0001u, + //! Stack slot position matches argument passed via stack. + kFlagStackArg = 0x0002u }; enum ArgIndex : uint32_t { @@ -56,17 +60,15 @@ struct RAStackSlot { //! Minimum alignment required by the slot. uint8_t _alignment; //! Reserved for future use. - uint8_t _reserved[2]; + uint16_t _flags; //! Size of memory required by the slot. uint32_t _size; - //! Slot flags. - uint32_t _flags; //! Usage counter (one unit equals one memory access). uint32_t _useCount; - //! Weight of the slot (calculated by `calculateStackFrame()`). + //! Weight of the slot, calculated by \ref RAStackAllocator::calculateStackFrame(). uint32_t _weight; - //! Stack offset (calculated by `calculateStackFrame()`). + //! Stack offset, calculated by \ref RAStackAllocator::calculateStackFrame(). int32_t _offset; //! \name Accessors @@ -79,9 +81,11 @@ struct RAStackSlot { inline uint32_t alignment() const noexcept { return _alignment; } inline uint32_t flags() const noexcept { return _flags; } - inline void addFlags(uint32_t flags) noexcept { _flags |= flags; } - inline bool isRegHome() const noexcept { return (_flags & kFlagRegHome) != 0; } - inline bool isStackArg() const noexcept { return (_flags & kFlagStackArg) != 0; } + inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; } + inline void addFlags(uint32_t flags) noexcept { _flags = uint16_t(_flags | flags); } + + inline bool isRegHome() const noexcept { return hasFlag(kFlagRegHome); } + inline bool isStackArg() const noexcept { return hasFlag(kFlagStackArg); } inline uint32_t useCount() const noexcept { return _useCount; } inline void addUseCount(uint32_t n = 1) noexcept { _useCount += n; } diff --git a/libs/asmjit/src/asmjit/core/string.cpp b/libs/asmjit/src/asmjit/core/string.cpp index 564a566..e059884 100644 --- a/libs/asmjit/src/asmjit/core/string.cpp +++ b/libs/asmjit/src/asmjit/core/string.cpp @@ -34,7 +34,7 @@ ASMJIT_BEGIN_NAMESPACE static const char String_baseN[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; constexpr size_t kMinAllocSize = 64; -constexpr size_t kMaxAllocSize = std::numeric_limits::max() - Globals::kGrowThreshold; +constexpr size_t kMaxAllocSize = SIZE_MAX - Globals::kGrowThreshold; // ============================================================================ // [asmjit::String] @@ -150,7 +150,7 @@ char* String::prepare(uint32_t op, size_t size) noexcept { } } -Error String::assignString(const char* data, size_t size) noexcept { +Error String::assign(const char* data, size_t size) noexcept { char* dst = nullptr; // Null terminated string without `size` specified. @@ -222,7 +222,8 @@ Error String::_opString(uint32_t op, const char* str, size_t size) noexcept { return kErrorOk; char* p = prepare(op, size); - if (!p) return DebugUtils::errored(kErrorOutOfMemory); + if (!p) + return DebugUtils::errored(kErrorOutOfMemory); memcpy(p, str, size); return kErrorOk; @@ -230,7 +231,8 @@ Error String::_opString(uint32_t op, const char* str, size_t size) noexcept { Error String::_opChar(uint32_t op, char c) noexcept { char* p = prepare(op, 1); - if (!p) return DebugUtils::errored(kErrorOutOfMemory); + if (!p) + return DebugUtils::errored(kErrorOutOfMemory); *p = c; return kErrorOk; @@ -241,7 +243,8 @@ Error String::_opChars(uint32_t op, char c, size_t n) noexcept { return kErrorOk; char* p = prepare(op, n); - if (!p) return DebugUtils::errored(kErrorOutOfMemory); + if (!p) + return DebugUtils::errored(kErrorOutOfMemory); memset(p, c, n); return kErrorOk; @@ -349,7 +352,7 @@ Error String::_opHex(uint32_t op, const void* data, size_t size, char separator) return kErrorOk; if (separator) { - if (ASMJIT_UNLIKELY(size >= std::numeric_limits::max() / 3)) + if (ASMJIT_UNLIKELY(size >= SIZE_MAX / 3)) return DebugUtils::errored(kErrorOutOfMemory); dst = prepare(op, size * 3 - 1); @@ -369,7 +372,7 @@ Error String::_opHex(uint32_t op, const void* data, size_t size, char separator) } } else { - if (ASMJIT_UNLIKELY(size >= std::numeric_limits::max() / 2)) + if (ASMJIT_UNLIKELY(size >= SIZE_MAX / 2)) return DebugUtils::errored(kErrorOutOfMemory); dst = prepare(op, size * 2); @@ -404,6 +407,9 @@ Error String::_opVFormat(uint32_t op, const char* fmt, va_list ap) noexcept { int fmtResult; size_t outputSize; + va_list apCopy; + va_copy(apCopy, ap); + if (remainingCapacity >= 128) { fmtResult = vsnprintf(data() + startAt, remainingCapacity, fmt, ap); outputSize = size_t(fmtResult); @@ -428,7 +434,7 @@ Error String::_opVFormat(uint32_t op, const char* fmt, va_list ap) noexcept { if (ASMJIT_UNLIKELY(!p)) return DebugUtils::errored(kErrorOutOfMemory); - fmtResult = vsnprintf(p, outputSize + 1, fmt, ap); + fmtResult = vsnprintf(p, outputSize + 1, fmt, apCopy); ASMJIT_ASSERT(size_t(fmtResult) == outputSize); return kErrorOk; @@ -483,7 +489,7 @@ UNIT(core_string) { EXPECT(s.isLarge() == false); EXPECT(s.isExternal() == false); - EXPECT(s.assignChar('a') == kErrorOk); + EXPECT(s.assign('a') == kErrorOk); EXPECT(s.size() == 1); EXPECT(s.capacity() == String::kSSOCapacity); EXPECT(s.data()[0] == 'a'); @@ -502,7 +508,7 @@ UNIT(core_string) { EXPECT(s.eq("bbbb") == true); EXPECT(s.eq("bbbb", 4) == true); - EXPECT(s.assignString("abc") == kErrorOk); + EXPECT(s.assign("abc") == kErrorOk); EXPECT(s.size() == 3); EXPECT(s.capacity() == String::kSSOCapacity); EXPECT(s.data()[0] == 'a'); @@ -513,7 +519,7 @@ UNIT(core_string) { EXPECT(s.eq("abc", 3) == true); const char* large = "Large string that will not fit into SSO buffer"; - EXPECT(s.assignString(large) == kErrorOk); + EXPECT(s.assign(large) == kErrorOk); EXPECT(s.isLarge() == true); EXPECT(s.size() == strlen(large)); EXPECT(s.capacity() > String::kSSOCapacity); @@ -522,7 +528,7 @@ UNIT(core_string) { const char* additional = " (additional content)"; EXPECT(s.isLarge() == true); - EXPECT(s.appendString(additional) == kErrorOk); + EXPECT(s.append(additional) == kErrorOk); EXPECT(s.size() == strlen(large) + strlen(additional)); EXPECT(s.clear() == kErrorOk); diff --git a/libs/asmjit/src/asmjit/core/string.h b/libs/asmjit/src/asmjit/core/string.h index 22108f2..4c490d8 100644 --- a/libs/asmjit/src/asmjit/core/string.h +++ b/libs/asmjit/src/asmjit/core/string.h @@ -29,9 +29,33 @@ ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_support +//! \addtogroup asmjit_utilities //! \{ +// ============================================================================ +// [asmjit::FixedString] +// ============================================================================ + +//! A fixed string - only useful for strings that would never exceed `N - 1` +//! characters; always null-terminated. +template +union FixedString { + enum : uint32_t { + kNumU32 = uint32_t((N + sizeof(uint32_t) - 1) / sizeof(uint32_t)) + }; + + char str[kNumU32 * sizeof(uint32_t)]; + uint32_t u32[kNumU32]; + + //! \name Utilities + //! \{ + + inline bool eq(const char* other) const noexcept { + return strcmp(str, other) == 0; + } + + //! \} +}; // ============================================================================ // [asmjit::String] // ============================================================================ @@ -60,8 +84,10 @@ class String { //! String operation. enum Op : uint32_t { - kOpAssign = 0, - kOpAppend = 1 + //! Assignment - a new content replaces the current one. + kOpAssign = 0, + //! Append - a new content is appended to the string. + kOpAppend = 1 }; //! String format flags. @@ -114,12 +140,13 @@ class String { //! \name Construction & Destruction //! \{ + //! Creates a default-initialized string if zero length. inline String() noexcept : _small {} {} + //! Creates a string that takes ownership of the content of the `other` string. inline String(String&& other) noexcept { - for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_raw.uptr); i++) - _raw.uptr[i] = other._raw.uptr[i]; + _raw = other._raw; other._resetInternal(); } @@ -135,6 +162,12 @@ class String { //! \name Overloaded Operators //! \{ + inline String& operator=(String&& other) noexcept { + swap(other); + other.reset(); + return *this; + } + inline bool operator==(const char* other) const noexcept { return eq(other); } inline bool operator!=(const char* other) const noexcept { return !eq(other); } @@ -149,13 +182,21 @@ class String { inline bool isLarge() const noexcept { return _type >= kTypeLarge; } inline bool isExternal() const noexcept { return _type == kTypeExternal; } + //! Tests whether the string is empty. inline bool empty() const noexcept { return size() == 0; } + //! Returns the size of the string. inline size_t size() const noexcept { return isLarge() ? size_t(_large.size) : size_t(_type); } + //! Returns the capacity of the string. inline size_t capacity() const noexcept { return isLarge() ? _large.capacity : size_t(kSSOCapacity); } + //! Returns the data of the string. inline char* data() noexcept { return isLarge() ? _large.data : _small.data; } + //! \overload inline const char* data() const noexcept { return isLarge() ? _large.data : _small.data; } + inline char* start() noexcept { return data(); } + inline const char* start() const noexcept { return data(); } + inline char* end() noexcept { return data() + size(); } inline const char* end() const noexcept { return data() + size(); } @@ -164,102 +205,120 @@ class String { //! \name String Operations //! \{ - //! Clear the content of the string. + //! Swaps the content of this string with `other`. + inline void swap(String& other) noexcept { + std::swap(_raw, other._raw); + } + + //! Clears the content of the string. ASMJIT_API Error clear() noexcept; ASMJIT_API char* prepare(uint32_t op, size_t size) noexcept; ASMJIT_API Error _opString(uint32_t op, const char* str, size_t size = SIZE_MAX) noexcept; - ASMJIT_API Error _opFormat(uint32_t op, const char* fmt, ...) noexcept; - ASMJIT_API Error _opVFormat(uint32_t op, const char* fmt, va_list ap) noexcept; ASMJIT_API Error _opChar(uint32_t op, char c) noexcept; ASMJIT_API Error _opChars(uint32_t op, char c, size_t n) noexcept; ASMJIT_API Error _opNumber(uint32_t op, uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept; ASMJIT_API Error _opHex(uint32_t op, const void* data, size_t size, char separator = '\0') noexcept; + ASMJIT_API Error _opFormat(uint32_t op, const char* fmt, ...) noexcept; + ASMJIT_API Error _opVFormat(uint32_t op, const char* fmt, va_list ap) noexcept; - //! Replace the string content to a string specified by `data` and `size`. If - //! `size` is `SIZE_MAX` then it's considered null-terminated and its length - //! will be obtained through `strlen()`. - ASMJIT_API Error assignString(const char* data, size_t size = SIZE_MAX) noexcept; - - //! Replace the current content by a formatted string `fmt`. - template - inline Error assignFormat(const char* fmt, Args&&... args) noexcept { - return _opFormat(kOpAssign, fmt, std::forward(args)...); - } + //! Replaces the current of the string with `data` of the given `size`. + //! + //! Null terminated strings can set `size` to `SIZE_MAX`. + ASMJIT_API Error assign(const char* data, size_t size = SIZE_MAX) noexcept; - //! Replace the current content by a formatted string `fmt` (va_list version). - inline Error assignVFormat(const char* fmt, va_list ap) noexcept { - return _opVFormat(kOpAssign, fmt, ap); + //! Replaces the current of the string with `other` string. + inline Error assign(const String& other) noexcept { + return assign(other.data(), other.size()); } - //! Replace the current content by a single `c` character. - inline Error assignChar(char c) noexcept { + //! Replaces the current of the string by a single `c` character. + inline Error assign(char c) noexcept { return _opChar(kOpAssign, c); } - //! Replace the current content by `c` character `n` times. + //! Replaces the current of the string by a `c` character, repeated `n` times. inline Error assignChars(char c, size_t n) noexcept { return _opChars(kOpAssign, c, n); } - //! Replace the current content by a formatted integer `i` (signed). + //! Replaces the current of the string by a formatted integer `i` (signed). inline Error assignInt(int64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept { return _opNumber(kOpAssign, uint64_t(i), base, width, flags | kFormatSigned); } - //! Replace the current content by a formatted integer `i` (unsigned). + //! Replaces the current of the string by a formatted integer `i` (unsigned). inline Error assignUInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept { return _opNumber(kOpAssign, i, base, width, flags); } - //! Replace the current content by the given `data` converted to a HEX string. + //! Replaces the current of the string by the given `data` converted to a HEX string. inline Error assignHex(const void* data, size_t size, char separator = '\0') noexcept { return _opHex(kOpAssign, data, size, separator); } - //! Append string `str` of size `size` (or possibly null terminated). - inline Error appendString(const char* str, size_t size = SIZE_MAX) noexcept { - return _opString(kOpAppend, str, size); + //! Replaces the current of the string by a formatted string `fmt`. + template + inline Error assignFormat(const char* fmt, Args&&... args) noexcept { + return _opFormat(kOpAssign, fmt, std::forward(args)...); } - template - inline Error appendFormat(const char* fmt, Args&&... args) noexcept { - return _opFormat(kOpAppend, fmt, std::forward(args)...); + //! Replaces the current of the string by a formatted string `fmt` (va_list version). + inline Error assignVFormat(const char* fmt, va_list ap) noexcept { + return _opVFormat(kOpAssign, fmt, ap); } - //! Append a formatted string `fmt` (va_list version). - inline Error appendVFormat(const char* fmt, va_list ap) noexcept { - return _opVFormat(kOpAppend, fmt, ap); + //! Appends `str` having the given size `size` to the string. + //! + //! Null terminated strings can set `size` to `SIZE_MAX`. + inline Error append(const char* str, size_t size = SIZE_MAX) noexcept { + return _opString(kOpAppend, str, size); + } + + //! Appends `other` string to this string. + inline Error append(const String& other) noexcept { + return append(other.data(), other.size()); } - //! Append a single `c` character. - inline Error appendChar(char c) noexcept { + //! Appends a single `c` character. + inline Error append(char c) noexcept { return _opChar(kOpAppend, c); } - //! Append `c` character `n` times. + //! Appends `c` character repeated `n` times. inline Error appendChars(char c, size_t n) noexcept { return _opChars(kOpAppend, c, n); } - ASMJIT_API Error padEnd(size_t n, char c = ' ') noexcept; - - //! Append `i`. + //! Appends a formatted integer `i` (signed). inline Error appendInt(int64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept { return _opNumber(kOpAppend, uint64_t(i), base, width, flags | kFormatSigned); } - //! Append `i`. + //! Appends a formatted integer `i` (unsigned). inline Error appendUInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept { return _opNumber(kOpAppend, i, base, width, flags); } - //! Append the given `data` converted to a HEX string. + //! Appends the given `data` converted to a HEX string. inline Error appendHex(const void* data, size_t size, char separator = '\0') noexcept { return _opHex(kOpAppend, data, size, separator); } + //! Appends a formatted string `fmt` with `args`. + template + inline Error appendFormat(const char* fmt, Args&&... args) noexcept { + return _opFormat(kOpAppend, fmt, std::forward(args)...); + } + + //! Appends a formatted string `fmt` (va_list version). + inline Error appendVFormat(const char* fmt, va_list ap) noexcept { + return _opVFormat(kOpAppend, fmt, ap); + } + + ASMJIT_API Error padEnd(size_t n, char c = ' ') noexcept; + //! Truncate the string length into `newSize`. ASMJIT_API Error truncate(size_t newSize) noexcept; @@ -288,6 +347,20 @@ class String { } //! \} + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use assign() instead of assignString()") + inline Error assignString(const char* data, size_t size = SIZE_MAX) noexcept { return assign(data, size); } + + ASMJIT_DEPRECATED("Use assign() instead of assignChar()") + inline Error assignChar(char c) noexcept { return assign(c); } + + ASMJIT_DEPRECATED("Use append() instead of appendString()") + inline Error appendString(const char* data, size_t size = SIZE_MAX) noexcept { return append(data, size); } + + ASMJIT_DEPRECATED("Use append() instead of appendChar()") + inline Error appendChar(char c) noexcept { return append(c); } +#endif // !ASMJIT_NO_DEPRECATED }; // ============================================================================ @@ -320,31 +393,6 @@ class StringTmp : public String { //! \} }; -// ============================================================================ -// [asmjit::FixedString] -// ============================================================================ - -//! A fixed string - only useful for strings that would never exceed `N - 1` -//! characters; always null-terminated. -template -union FixedString { - enum : uint32_t { - kNumU32 = uint32_t((N + sizeof(uint32_t) - 1) / sizeof(uint32_t)) - }; - - char str[kNumU32 * sizeof(uint32_t)]; - uint32_t u32[kNumU32]; - - //! \name Utilities - //! \{ - - inline bool eq(const char* other) const noexcept { - return strcmp(str, other) == 0; - } - - //! \} -}; - //! \} ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/support.cpp b/libs/asmjit/src/asmjit/core/support.cpp index 6b7e085..a99477d 100644 --- a/libs/asmjit/src/asmjit/core/support.cpp +++ b/libs/asmjit/src/asmjit/core/support.cpp @@ -100,7 +100,7 @@ static void testBitUtils() noexcept { EXPECT(Support::bitTest((1 << i), i) == true, "Support::bitTest(%X, %u) should return true", (1 << i), i); } - INFO("Support::lsbMask()"); + INFO("Support::lsbMask()"); for (i = 0; i < 32; i++) { uint32_t expectedBits = 0; for (uint32_t b = 0; b < i; b++) @@ -108,6 +108,14 @@ static void testBitUtils() noexcept { EXPECT(Support::lsbMask(i) == expectedBits); } + INFO("Support::lsbMask()"); + for (i = 0; i < 64; i++) { + uint64_t expectedBits = 0; + for (uint32_t b = 0; b < i; b++) + expectedBits |= uint64_t(1) << b; + EXPECT(Support::lsbMask(i) == expectedBits); + } + INFO("Support::popcnt()"); for (i = 0; i < 32; i++) EXPECT(Support::popcnt((uint32_t(1) << i)) == 1); for (i = 0; i < 64; i++) EXPECT(Support::popcnt((uint64_t(1) << i)) == 1); diff --git a/libs/asmjit/src/asmjit/core/support.h b/libs/asmjit/src/asmjit/core/support.h index 0f49b78..22a0fa0 100644 --- a/libs/asmjit/src/asmjit/core/support.h +++ b/libs/asmjit/src/asmjit/core/support.h @@ -32,7 +32,7 @@ ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_support +//! \addtogroup asmjit_utilities //! \{ //! Contains support classes and functions that may be used by AsmJit source @@ -69,27 +69,34 @@ namespace Internal { template<> struct AlignedInt { typedef uint64_t ASMJIT_ALIGN_TYPE(T, 4); }; template<> struct AlignedInt { typedef uint64_t T; }; - // IntBySize - Make an int-type by size (signed or unsigned) that is the + // StdInt - Make an int-type by size (signed or unsigned) that is the // same as types defined by . // Int32Or64 - Make an int-type that has at least 32 bits: [u]int[32|64]_t. - template - struct IntBySize {}; // Fail if not specialized. + template + struct StdInt {}; // Fail if not specialized. - template<> struct IntBySize<1, 0> { typedef uint8_t Type; }; - template<> struct IntBySize<1, 1> { typedef int8_t Type; }; - template<> struct IntBySize<2, 0> { typedef uint16_t Type; }; - template<> struct IntBySize<2, 1> { typedef int16_t Type; }; - template<> struct IntBySize<4, 0> { typedef uint32_t Type; }; - template<> struct IntBySize<4, 1> { typedef int32_t Type; }; - template<> struct IntBySize<8, 0> { typedef uint64_t Type; }; - template<> struct IntBySize<8, 1> { typedef int64_t Type; }; + template<> struct StdInt<1, 0> { typedef int8_t Type; }; + template<> struct StdInt<1, 1> { typedef uint8_t Type; }; + template<> struct StdInt<2, 0> { typedef int16_t Type; }; + template<> struct StdInt<2, 1> { typedef uint16_t Type; }; + template<> struct StdInt<4, 0> { typedef int32_t Type; }; + template<> struct StdInt<4, 1> { typedef uint32_t Type; }; + template<> struct StdInt<8, 0> { typedef int64_t Type; }; + template<> struct StdInt<8, 1> { typedef uint64_t Type; }; - template::value> - struct Int32Or64 : public IntBySize {}; + template::value> + struct Int32Or64 : public StdInt {}; } //! \endcond +// ============================================================================ +// [asmjit::Support - Basic Traits] +// ============================================================================ + +template +static constexpr bool isUnsigned() noexcept { return std::is_unsigned::value; } + // ============================================================================ // [asmjit::Support - FastUInt8] // ============================================================================ @@ -101,20 +108,32 @@ typedef unsigned int FastUInt8; #endif // ============================================================================ -// [asmjit::Support - IntBySize / Int32Or64] +// [asmjit::Support - asInt / asUInt / asNormalized] // ============================================================================ //! Casts an integer `x` to either `int32_t` or `int64_t` depending on `T`. template -static constexpr typename Internal::Int32Or64::Type asInt(T x) noexcept { return (typename Internal::Int32Or64::Type)x; } +static constexpr typename Internal::Int32Or64::Type asInt(const T& x) noexcept { + return (typename Internal::Int32Or64::Type)x; +} //! Casts an integer `x` to either `uint32_t` or `uint64_t` depending on `T`. template -static constexpr typename Internal::Int32Or64::Type asUInt(T x) noexcept { return (typename Internal::Int32Or64::Type)x; } +static constexpr typename Internal::Int32Or64::Type asUInt(const T& x) noexcept { + return (typename Internal::Int32Or64::Type)x; +} //! Casts an integer `x` to either `int32_t`, uint32_t`, `int64_t`, or `uint64_t` depending on `T`. template -static constexpr typename Internal::Int32Or64::Type asNormalized(T x) noexcept { return (typename Internal::Int32Or64::Type)x; } +static constexpr typename Internal::Int32Or64::Type asNormalized(const T& x) noexcept { + return (typename Internal::Int32Or64::Type)x; +} + +//! Casts an integer `x` to the same type as defined by ``. +template +static constexpr typename Internal::StdInt()>::Type asStdInt(const T& x) noexcept { + return (typename Internal::StdInt()>::Type)x; +} // ============================================================================ // [asmjit::Support - BitCast] @@ -142,7 +161,7 @@ static inline Dst bitCast(const Src& x) noexcept { return Internal::BitCastUnion // ============================================================================ //! Storage used to store a pack of bits (should by compatible with a machine word). -typedef Internal::IntBySize::Type BitWord; +typedef Internal::StdInt::Type BitWord; template static constexpr uint32_t bitSizeOf() noexcept { return uint32_t(sizeof(T) * 8u); } @@ -194,14 +213,14 @@ static constexpr T blsi(T x) noexcept { //! Generate a trailing bit-mask that has `n` least significant (trailing) bits set. template -static constexpr T lsbMask(CountT n) noexcept { +static constexpr T lsbMask(const CountT& n) noexcept { typedef typename std::make_unsigned::type U; return (sizeof(U) < sizeof(uintptr_t)) + // Prevent undefined behavior by using a larger type than T. ? T(U((uintptr_t(1) << n) - uintptr_t(1))) - // Shifting more bits than the type provides is UNDEFINED BEHAVIOR. - // In such case we trash the result by ORing it with a mask that has - // all bits set and discards the UNDEFINED RESULT of the shift. - : T(((U(1) << n) - U(1u)) | neg(U(n >= CountT(bitSizeOf())))); + // Prevent undefined behavior by performing `n & (nBits - 1)` so it's always within the range. + : shr(sar(neg(T(n)), bitSizeOf() - 1u), + neg(T(n)) & CountT(bitSizeOf() - 1u)); } //! Tests whether the given value `x` has `n`th bit set. @@ -252,63 +271,96 @@ static constexpr T fillTrailingBits(const T& x) noexcept { //! \cond namespace Internal { - static constexpr uint32_t constCtzImpl(uint32_t xAndNegX) noexcept { - return 31 - ((xAndNegX & 0x0000FFFFu) ? 16 : 0) - - ((xAndNegX & 0x00FF00FFu) ? 8 : 0) - - ((xAndNegX & 0x0F0F0F0Fu) ? 4 : 0) - - ((xAndNegX & 0x33333333u) ? 2 : 0) - - ((xAndNegX & 0x55555555u) ? 1 : 0); +namespace { + +template +struct BitScanData { T x; uint32_t n; }; + +template +struct BitScanCalc { + static constexpr BitScanData advanceLeft(const BitScanData& data, uint32_t n) noexcept { + return BitScanData { data.x << n, data.n + n }; } - static constexpr uint32_t constCtzImpl(uint64_t xAndNegX) noexcept { - return 63 - ((xAndNegX & 0x00000000FFFFFFFFu) ? 32 : 0) - - ((xAndNegX & 0x0000FFFF0000FFFFu) ? 16 : 0) - - ((xAndNegX & 0x00FF00FF00FF00FFu) ? 8 : 0) - - ((xAndNegX & 0x0F0F0F0F0F0F0F0Fu) ? 4 : 0) - - ((xAndNegX & 0x3333333333333333u) ? 2 : 0) - - ((xAndNegX & 0x5555555555555555u) ? 1 : 0); + static constexpr BitScanData advanceRight(const BitScanData& data, uint32_t n) noexcept { + return BitScanData { data.x >> n, data.n + n }; } - template - static constexpr uint32_t constCtz(T x) noexcept { - return constCtzImpl(x & neg(x)); + static constexpr BitScanData clz(const BitScanData& data) noexcept { + return BitScanCalc::clz(advanceLeft(data, data.x & (allOnes() << (bitSizeOf() - N)) ? uint32_t(0) : N)); } - static ASMJIT_INLINE uint32_t ctz(uint32_t x) noexcept { - #if defined(__GNUC__) - return uint32_t(__builtin_ctz(x)); - #elif defined(_MSC_VER) && (ASMJIT_ARCH_X86 || ASMJIT_ARCH_ARM) - unsigned long i; - _BitScanForward(&i, x); - return uint32_t(i); - #else - return constCtz(x); - #endif + static constexpr BitScanData ctz(const BitScanData& data) noexcept { + return BitScanCalc::ctz(advanceRight(data, data.x & (allOnes() >> (bitSizeOf() - N)) ? uint32_t(0) : N)); + } +}; + +template +struct BitScanCalc { + static constexpr BitScanData clz(const BitScanData& ctx) noexcept { + return BitScanData { 0, ctx.n - uint32_t(ctx.x >> (bitSizeOf() - 1)) }; } - static ASMJIT_INLINE uint32_t ctz(uint64_t x) noexcept { - #if defined(__GNUC__) - return uint32_t(__builtin_ctzll(x)); - #elif defined(_MSC_VER) && (ASMJIT_ARCH_X86 == 64 || ASMJIT_ARCH_ARM == 64) - unsigned long i; - _BitScanForward64(&i, x); - return uint32_t(i); - #else - return constCtz(x); - #endif + static constexpr BitScanData ctz(const BitScanData& ctx) noexcept { + return BitScanData { 0, ctx.n - uint32_t(ctx.x & 0x1) }; } +}; + +template +constexpr uint32_t clzFallback(const T& x) noexcept { + return BitScanCalc() / 2u>::clz(BitScanData{x, 1}).n; } + +template +constexpr uint32_t ctzFallback(const T& x) noexcept { + return BitScanCalc() / 2u>::ctz(BitScanData{x, 1}).n; +} + +template constexpr uint32_t constClz(const T& x) noexcept { return clzFallback(asUInt(x)); } +template constexpr uint32_t constCtz(const T& x) noexcept { return ctzFallback(asUInt(x)); } + +template inline uint32_t clzImpl(const T& x) noexcept { return constClz(x); } +template inline uint32_t ctzImpl(const T& x) noexcept { return constCtz(x); } + +#if !defined(ASMJIT_NO_INTRINSICS) +# if defined(__GNUC__) +template<> inline uint32_t clzImpl(const uint32_t& x) noexcept { return uint32_t(__builtin_clz(x)); } +template<> inline uint32_t clzImpl(const uint64_t& x) noexcept { return uint32_t(__builtin_clzll(x)); } +template<> inline uint32_t ctzImpl(const uint32_t& x) noexcept { return uint32_t(__builtin_ctz(x)); } +template<> inline uint32_t ctzImpl(const uint64_t& x) noexcept { return uint32_t(__builtin_ctzll(x)); } +# elif defined(_MSC_VER) +template<> inline uint32_t clzImpl(const uint32_t& x) noexcept { unsigned long i; _BitScanReverse(&i, x); return uint32_t(i ^ 31); } +template<> inline uint32_t ctzImpl(const uint32_t& x) noexcept { unsigned long i; _BitScanForward(&i, x); return uint32_t(i); } +# if ASMJIT_ARCH_X86 == 64 || ASMJIT_ARCH_ARM == 64 +template<> inline uint32_t clzImpl(const uint64_t& x) noexcept { unsigned long i; _BitScanReverse64(&i, x); return uint32_t(i ^ 63); } +template<> inline uint32_t ctzImpl(const uint64_t& x) noexcept { unsigned long i; _BitScanForward64(&i, x); return uint32_t(i); } +# endif +# endif +#endif + +} // {anonymous} +} // {Internal} //! \endcond +//! Count leading zeros in `x` (returns a position of a first bit set in `x`). +//! +//! \note The input MUST NOT be zero, otherwise the result is undefined. +template +static inline uint32_t clz(T x) noexcept { return Internal::clzImpl(asUInt(x)); } + +//! Count leading zeros in `x` (constant expression). +template +static constexpr inline uint32_t constClz(T x) noexcept { return Internal::constClz(asUInt(x)); } + //! Count trailing zeros in `x` (returns a position of a first bit set in `x`). //! //! \note The input MUST NOT be zero, otherwise the result is undefined. template -static inline uint32_t ctz(T x) noexcept { return Internal::ctz(asUInt(x)); } +static inline uint32_t ctz(T x) noexcept { return Internal::ctzImpl(asUInt(x)); } //! Count trailing zeros in `x` (constant expression). template -static constexpr uint32_t constCtz(T x) noexcept { return Internal::constCtz(asUInt(x)); } +static constexpr inline uint32_t constCtz(T x) noexcept { return Internal::constCtz(asUInt(x)); } // ============================================================================ // [asmjit::Support - PopCnt] @@ -391,6 +443,30 @@ static constexpr T max(const T& a, const T& b) noexcept { return a < b ? b : a; template static constexpr T max(const T& a, const T& b, Args&&... args) noexcept { return max(max(a, b), std::forward(args)...); } +// ============================================================================ +// [asmjit::Support - Immediate Helpers] +// ============================================================================ + +namespace Internal { + template + struct ImmConv { + static inline int64_t fromT(const T& x) noexcept { return int64_t(x); } + static inline T toT(int64_t x) noexcept { return T(uint64_t(x) & Support::allOnes::type>()); } + }; + + template + struct ImmConv { + static inline int64_t fromT(const T& x) noexcept { return int64_t(bitCast(double(x))); } + static inline T toT(int64_t x) noexcept { return T(bitCast(x)); } + }; +} + +template +static inline int64_t immediateFromT(const T& x) noexcept { return Internal::ImmConv::value>::fromT(x); } + +template +static inline T immediateToT(int64_t x) noexcept { return Internal::ImmConv::value>::toT(x); } + // ============================================================================ // [asmjit::Support - Overflow Arithmetic] // ============================================================================ @@ -398,30 +474,113 @@ static constexpr T max(const T& a, const T& b, Args&&... args) noexcept { return //! \cond namespace Internal { template - static ASMJIT_INLINE T addOverflowImpl(T x, T y, FastUInt8* of) noexcept { + ASMJIT_INLINE T addOverflowFallback(T x, T y, FastUInt8* of) noexcept { typedef typename std::make_unsigned::type U; U result = U(x) + U(y); - *of = FastUInt8(*of | FastUInt8(std::is_unsigned::value ? result < U(x) : T((U(x) ^ ~U(y)) & (U(x) ^ result)) < 0)); + *of = FastUInt8(*of | FastUInt8(isUnsigned() ? result < U(x) : T((U(x) ^ ~U(y)) & (U(x) ^ result)) < 0)); return T(result); } template - static ASMJIT_INLINE T subOverflowImpl(T x, T y, FastUInt8* of) noexcept { + ASMJIT_INLINE T subOverflowFallback(T x, T y, FastUInt8* of) noexcept { typedef typename std::make_unsigned::type U; U result = U(x) - U(y); - *of = FastUInt8(*of | FastUInt8(std::is_unsigned::value ? result > U(x) : T((U(x) ^ U(y)) & (U(x) ^ result)) < 0)); + *of = FastUInt8(*of | FastUInt8(isUnsigned() ? result > U(x) : T((U(x) ^ U(y)) & (U(x) ^ result)) < 0)); return T(result); } -} + + template + ASMJIT_INLINE T mulOverflowFallback(T x, T y, FastUInt8* of) noexcept { + typedef typename Internal::StdInt()>::Type I; + typedef typename std::make_unsigned::type U; + + U mask = allOnes(); + if (std::is_signed::value) { + U prod = U(I(x)) * U(I(y)); + *of = FastUInt8(*of | FastUInt8(I(prod) < I(std::numeric_limits::lowest()) || I(prod) > I(std::numeric_limits::max()))); + return T(I(prod & mask)); + } + else { + U prod = U(x) * U(y); + *of = FastUInt8(*of | FastUInt8((prod & ~mask) != 0)); + return T(prod & mask); + } + } + + template<> + ASMJIT_INLINE int64_t mulOverflowFallback(int64_t x, int64_t y, FastUInt8* of) noexcept { + int64_t result = int64_t(uint64_t(x) * uint64_t(y)); + *of = FastUInt8(*of | FastUInt8(x && (result / x != y))); + return result; + } + + template<> + ASMJIT_INLINE uint64_t mulOverflowFallback(uint64_t x, uint64_t y, FastUInt8* of) noexcept { + uint64_t result = x * y; + *of = FastUInt8(*of | FastUInt8(y != 0 && allOnes() / y < x)); + return result; + } + + // These can be specialized. + template ASMJIT_INLINE T addOverflowImpl(const T& x, const T& y, FastUInt8* of) noexcept { return addOverflowFallback(x, y, of); } + template ASMJIT_INLINE T subOverflowImpl(const T& x, const T& y, FastUInt8* of) noexcept { return subOverflowFallback(x, y, of); } + template ASMJIT_INLINE T mulOverflowImpl(const T& x, const T& y, FastUInt8* of) noexcept { return mulOverflowFallback(x, y, of); } + + #if defined(__GNUC__) && !defined(ASMJIT_NO_INTRINSICS) + #if defined(__clang__) || __GNUC__ >= 5 + #define ASMJIT_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, RESULT_T, BUILTIN) \ + template<> \ + ASMJIT_INLINE T FUNC(const T& x, const T& y, FastUInt8* of) noexcept { \ + RESULT_T result; \ + *of = FastUInt8(*of | (BUILTIN((RESULT_T)x, (RESULT_T)y, &result))); \ + return T(result); \ + } + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int32_t , int , __builtin_sadd_overflow ) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int , __builtin_uadd_overflow ) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int64_t , long long , __builtin_saddll_overflow) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned long long, __builtin_uaddll_overflow) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int32_t , int , __builtin_ssub_overflow ) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int , __builtin_usub_overflow ) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int64_t , long long , __builtin_ssubll_overflow) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned long long, __builtin_usubll_overflow) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int32_t , int , __builtin_smul_overflow ) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint32_t, unsigned int , __builtin_umul_overflow ) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int64_t , long long , __builtin_smulll_overflow) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint64_t, unsigned long long, __builtin_umulll_overflow) + #undef ASMJIT_ARITH_OVERFLOW_SPECIALIZE + #endif + #endif + + // There is a bug in MSVC that makes these specializations unusable, maybe in the future... + #if defined(_MSC_VER) && 0 + #define ASMJIT_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, ALT_T, BUILTIN) \ + template<> \ + ASMJIT_INLINE T FUNC(T x, T y, FastUInt8* of) noexcept { \ + ALT_T result; \ + *of = FastUInt8(*of | BUILTIN(0, (ALT_T)x, (ALT_T)y, &result)); \ + return T(result); \ + } + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int , _addcarry_u32 ) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int , _subborrow_u32) + #if ARCH_BITS >= 64 + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned __int64 , _addcarry_u64 ) + ASMJIT_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned __int64 , _subborrow_u64) + #endif + #undef ASMJIT_ARITH_OVERFLOW_SPECIALIZE + #endif +} // {Internal} //! \endcond template -static ASMJIT_INLINE T addOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::addOverflowImpl(x, y, of)); } +static ASMJIT_INLINE T addOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::addOverflowImpl(asStdInt(x), asStdInt(y), of)); } + +template +static ASMJIT_INLINE T subOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::subOverflowImpl(asStdInt(x), asStdInt(y), of)); } template -static ASMJIT_INLINE T subOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::subOverflowImpl(x, y, of)); } +static ASMJIT_INLINE T mulOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::mulOverflowImpl(asStdInt(x), asStdInt(y), of)); } // ============================================================================ // [asmjit::Support - Alignment] @@ -429,7 +588,7 @@ static ASMJIT_INLINE T subOverflow(const T& x, const T& y, FastUInt8* of) noexce template static constexpr bool isAligned(X base, Y alignment) noexcept { - typedef typename Internal::IntBySize::Type U; + typedef typename Internal::StdInt::Type U; return ((U)base % (U)alignment) == 0; } @@ -442,27 +601,27 @@ static constexpr bool isPowerOf2(T x) noexcept { template static constexpr X alignUp(X x, Y alignment) noexcept { - typedef typename Internal::IntBySize::Type U; + typedef typename Internal::StdInt::Type U; return (X)( ((U)x + ((U)(alignment) - 1u)) & ~((U)(alignment) - 1u) ); } template static constexpr T alignUpPowerOf2(T x) noexcept { - typedef typename Internal::IntBySize::Type U; + typedef typename Internal::StdInt::Type U; return (T)(fillTrailingBits(U(x) - 1u) + 1u); } //! Returns either zero or a positive difference between `base` and `base` when //! aligned to `alignment`. template -static constexpr typename Internal::IntBySize::Type alignUpDiff(X base, Y alignment) noexcept { - typedef typename Internal::IntBySize::Type U; +static constexpr typename Internal::StdInt::Type alignUpDiff(X base, Y alignment) noexcept { + typedef typename Internal::StdInt::Type U; return alignUp(U(base), alignment) - U(base); } template static constexpr X alignDown(X x, Y alignment) noexcept { - typedef typename Internal::IntBySize::Type U; + typedef typename Internal::StdInt::Type U; return (X)( (U)x & ~((U)(alignment) - 1u) ); } @@ -475,7 +634,7 @@ static constexpr X alignDown(X x, Y alignment) noexcept { //! the number of BitWords to represent N bits, for example. template static constexpr X numGranularized(X base, Y granularity) noexcept { - typedef typename Internal::IntBySize::Type U; + typedef typename Internal::StdInt::Type U; return X((U(base) + U(granularity) - 1) / U(granularity)); } @@ -499,8 +658,16 @@ static constexpr bool isInt4(T x) noexcept { typedef typename std::make_signed::type S; typedef typename std::make_unsigned::type U; - return std::is_signed::value ? isBetween(S(x), -8, 7) - : U(x) <= U(7u); + return std::is_signed::value ? isBetween(S(x), -8, 7) : U(x) <= U(7u); +} + +//! Checks whether the given integer `x` can be casted to a 7-bit signed integer. +template +static constexpr bool isInt7(T x) noexcept { + typedef typename std::make_signed::type S; + typedef typename std::make_unsigned::type U; + + return std::is_signed::value ? isBetween(S(x), -64, 63) : U(x) <= U(63u); } //! Checks whether the given integer `x` can be casted to an 8-bit signed integer. @@ -509,8 +676,27 @@ static constexpr bool isInt8(T x) noexcept { typedef typename std::make_signed::type S; typedef typename std::make_unsigned::type U; - return std::is_signed::value ? sizeof(T) <= 1 || isBetween(S(x), -128, 127) - : U(x) <= U(127u); + return std::is_signed::value ? sizeof(T) <= 1 || isBetween(S(x), -128, 127) : U(x) <= U(127u); +} + +//! Checks whether the given integer `x` can be casted to a 9-bit signed integer. +template +static constexpr bool isInt9(T x) noexcept { + typedef typename std::make_signed::type S; + typedef typename std::make_unsigned::type U; + + return std::is_signed::value ? sizeof(T) <= 1 || isBetween(S(x), -256, 255) + : sizeof(T) <= 1 || U(x) <= U(255u); +} + +//! Checks whether the given integer `x` can be casted to a 10-bit signed integer. +template +static constexpr bool isInt10(T x) noexcept { + typedef typename std::make_signed::type S; + typedef typename std::make_unsigned::type U; + + return std::is_signed::value ? sizeof(T) <= 1 || isBetween(S(x), -512, 511) + : sizeof(T) <= 1 || U(x) <= U(511u); } //! Checks whether the given integer `x` can be casted to a 16-bit signed integer. @@ -584,6 +770,16 @@ static constexpr bool isIntOrUInt32(T x) noexcept { return sizeof(T) <= 4 ? true : (uint32_t(uint64_t(x) >> 32) + 1u) <= 1u; } +static bool inline isEncodableOffset32(int32_t offset, uint32_t nBits) noexcept { + uint32_t nRev = 32 - nBits; + return Support::sar(Support::shl(offset, nRev), nRev) == offset; +} + +static bool inline isEncodableOffset64(int64_t offset, uint32_t nBits) noexcept { + uint32_t nRev = 64 - nBits; + return Support::sar(Support::shl(offset, nRev), nRev) == offset; +} + // ============================================================================ // [asmjit::Support - ByteSwap] // ============================================================================ @@ -620,10 +816,10 @@ static inline uint32_t byteShiftOfDWordStruct(uint32_t index) noexcept { // ============================================================================ template -static constexpr T asciiToLower(T c) noexcept { return c ^ (T(c >= T('A') && c <= T('Z')) << 5); } +static constexpr T asciiToLower(T c) noexcept { return T(c ^ T(T(c >= T('A') && c <= T('Z')) << 5)); } template -static constexpr T asciiToUpper(T c) noexcept { return c ^ (T(c >= T('a') && c <= T('z')) << 5); } +static constexpr T asciiToUpper(T c) noexcept { return T(c ^ T(T(c >= T('a') && c <= T('z')) << 5)); } static ASMJIT_INLINE size_t strLen(const char* s, size_t maxSize) noexcept { size_t i = 0; @@ -920,6 +1116,7 @@ static inline void writeU64uBE(void* p, uint64_t x) noexcept { writeU64xBE<1>(p, // [asmjit::Support - Operators] // ============================================================================ +//! \cond INTERNAL struct Set { template static inline T op(T x, T y) noexcept { DebugUtils::unused(x); return y; } }; struct SetNot { template static inline T op(T x, T y) noexcept { DebugUtils::unused(x); return ~y; } }; struct And { template static inline T op(T x, T y) noexcept { return x & y; } }; @@ -931,6 +1128,7 @@ struct Add { template static inline T op(T x, T y) noexcept { ret struct Sub { template static inline T op(T x, T y) noexcept { return x - y; } }; struct Min { template static inline T op(T x, T y) noexcept { return min(x, y); } }; struct Max { template static inline T op(T x, T y) noexcept { return max(x, y); } }; +//! \endcond // ============================================================================ // [asmjit::Support - BitWordIterator] @@ -1081,6 +1279,13 @@ static inline size_t bitVectorIndexOf(T* buf, size_t start, bool value) noexcept template class BitVectorIterator { public: + const T* _ptr; + size_t _idx; + size_t _end; + T _current; + + ASMJIT_INLINE BitVectorIterator(const BitVectorIterator& other) noexcept = default; + ASMJIT_INLINE BitVectorIterator(const T* data, size_t numBitWords, size_t start = 0) noexcept { init(data, numBitWords, start); } @@ -1126,11 +1331,6 @@ class BitVectorIterator { ASMJIT_ASSERT(_current != T(0)); return _idx + ctz(_current); } - - const T* _ptr; - size_t _idx; - size_t _end; - T _current; }; // ============================================================================ @@ -1142,6 +1342,12 @@ class BitVectorOpIterator { public: static constexpr uint32_t kTSizeInBits = bitSizeOf(); + const T* _aPtr; + const T* _bPtr; + size_t _idx; + size_t _end; + T _current; + ASMJIT_INLINE BitVectorOpIterator(const T* aData, const T* bData, size_t numBitWords, size_t start = 0) noexcept { init(aData, bData, numBitWords, start); } @@ -1184,12 +1390,6 @@ class BitVectorOpIterator { _current = bitWord; return n; } - - const T* _aPtr; - const T* _bPtr; - size_t _idx; - size_t _end; - T _current; }; // ============================================================================ @@ -1290,7 +1490,6 @@ namespace Internal { } //! \endcond - //! Quick sort implementation. //! //! The main reason to provide a custom qsort implementation is that we needed @@ -1301,66 +1500,6 @@ static inline void qSort(T* base, size_t size, const CompareT& cmp = CompareT()) Internal::QSortImpl::sort(base, size, cmp); } -// ============================================================================ -// [asmjit::Support - Iterators] -// ============================================================================ - -template -class Iterator { -public: - constexpr Iterator(T* p) noexcept : _p(p) {} - constexpr Iterator(const Iterator& other) noexcept = default; - - inline Iterator& operator=(const Iterator& other) noexcept = default; - - inline Iterator operator+(size_t n) const noexcept { return Iterator(_p + n); } - inline Iterator operator-(size_t n) const noexcept { return Iterator(_p - n); } - - inline Iterator& operator+=(size_t n) noexcept { _p += n; return *this; } - inline Iterator& operator-=(size_t n) noexcept { _p -= n; return *this; } - - inline Iterator& operator++() noexcept { return operator+=(1); } - inline Iterator& operator--() noexcept { return operator-=(1); } - - inline Iterator operator++(int) noexcept { T* prev = _p; operator+=(1); return Iterator(prev); } - inline Iterator operator--(int) noexcept { T* prev = _p; operator-=(1); return Iterator(prev); } - - inline bool operator==(const Iterator& other) noexcept { return _p == other._p; } - inline bool operator!=(const Iterator& other) noexcept { return _p != other._p; } - - inline T& operator*() const noexcept { return _p[0]; } - - T* _p; -}; - -template -class ReverseIterator { -public: - constexpr ReverseIterator(T* p) noexcept : _p(p) {} - constexpr ReverseIterator(const ReverseIterator& other) noexcept = default; - - inline ReverseIterator& operator=(const ReverseIterator& other) noexcept = default; - - inline ReverseIterator operator+(size_t n) const noexcept { return ReverseIterator(_p + n); } - inline ReverseIterator operator-(size_t n) const noexcept { return ReverseIterator(_p - n); } - - inline ReverseIterator& operator+=(size_t n) noexcept { _p -= n; return *this; } - inline ReverseIterator& operator-=(size_t n) noexcept { _p += n; return *this; } - - inline ReverseIterator& operator++() noexcept { return operator+=(1); } - inline ReverseIterator& operator--() noexcept { return operator-=(1); } - - inline ReverseIterator operator++(int) noexcept { T* prev = _p; operator+=(1); return ReverseIterator(prev); } - inline ReverseIterator operator--(int) noexcept { T* prev = _p; operator-=(1); return ReverseIterator(prev); } - - inline bool operator==(const ReverseIterator& other) noexcept { return _p == other._p; } - inline bool operator!=(const ReverseIterator& other) noexcept { return _p != other._p; } - - inline T& operator*() const noexcept { return _p[-1]; } - - T* _p; -}; - // ============================================================================ // [asmjit::Support::Temporary] // ============================================================================ diff --git a/libs/asmjit/src/asmjit/core/target.cpp b/libs/asmjit/src/asmjit/core/target.cpp index ad120b4..9ce94f3 100644 --- a/libs/asmjit/src/asmjit/core/target.cpp +++ b/libs/asmjit/src/asmjit/core/target.cpp @@ -31,8 +31,7 @@ ASMJIT_BEGIN_NAMESPACE // ============================================================================ Target::Target() noexcept - : _targetType(kTargetNone), - _codeInfo() {} + : _environment() {} Target::~Target() noexcept {} ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/target.h b/libs/asmjit/src/asmjit/core/target.h index f3da0e5..f2045c0 100644 --- a/libs/asmjit/src/asmjit/core/target.h +++ b/libs/asmjit/src/asmjit/core/target.h @@ -24,7 +24,7 @@ #ifndef ASMJIT_CORE_TARGET_H_INCLUDED #define ASMJIT_CORE_TARGET_H_INCLUDED -#include "../core/arch.h" +#include "../core/archtraits.h" #include "../core/func.h" ASMJIT_BEGIN_NAMESPACE @@ -36,20 +36,13 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::CodeInfo] // ============================================================================ +#ifndef ASMJIT_NO_DEPRECATED //! Basic information about a code (or target). It describes its architecture, //! code generation mode (or optimization level), and base address. -class CodeInfo { +class ASMJIT_DEPRECATED_STRUCT("Use Environment instead of CodeInfo") CodeInfo { public: - //!< Architecture information. - ArchInfo _archInfo; - //! Natural stack alignment (ARCH+OS). - uint8_t _stackAlignment; - //! Default CDECL calling convention. - uint8_t _cdeclCallConv; - //! Default STDCALL calling convention. - uint8_t _stdCallConv; - //! Default FASTCALL calling convention. - uint8_t _fastCallConv; + //!< Environment information. + Environment _environment; //! Base address. uint64_t _baseAddress; @@ -57,46 +50,35 @@ class CodeInfo { //! \{ inline CodeInfo() noexcept - : _archInfo(), - _stackAlignment(0), - _cdeclCallConv(CallConv::kIdNone), - _stdCallConv(CallConv::kIdNone), - _fastCallConv(CallConv::kIdNone), + : _environment(), _baseAddress(Globals::kNoBaseAddress) {} - inline explicit CodeInfo(uint32_t archId, uint32_t archMode = 0, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept - : _archInfo(archId, archMode), - _stackAlignment(0), - _cdeclCallConv(CallConv::kIdNone), - _stdCallConv(CallConv::kIdNone), - _fastCallConv(CallConv::kIdNone), + inline explicit CodeInfo(uint32_t arch, uint32_t subArch = 0, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept + : _environment(arch, subArch), _baseAddress(baseAddress) {} + inline explicit CodeInfo(const Environment& environment, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept + : _environment(environment), + _baseAddress(baseAddress) {} + + inline CodeInfo(const CodeInfo& other) noexcept { init(other); } inline bool isInitialized() const noexcept { - return _archInfo.archId() != ArchInfo::kIdNone; + return _environment.arch() != Environment::kArchUnknown; } inline void init(const CodeInfo& other) noexcept { *this = other; } - inline void init(uint32_t archId, uint32_t archMode = 0, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept { - _archInfo.init(archId, archMode); - _stackAlignment = 0; - _cdeclCallConv = CallConv::kIdNone; - _stdCallConv = CallConv::kIdNone; - _fastCallConv = CallConv::kIdNone; + inline void init(uint32_t arch, uint32_t subArch = 0, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept { + _environment.init(arch, subArch); _baseAddress = baseAddress; } inline void reset() noexcept { - _archInfo.reset(); - _stackAlignment = 0; - _cdeclCallConv = CallConv::kIdNone; - _stdCallConv = CallConv::kIdNone; - _fastCallConv = CallConv::kIdNone; + _environment.reset(); _baseAddress = Globals::kNoBaseAddress; } @@ -115,39 +97,28 @@ class CodeInfo { //! \name Accessors //! \{ - //! Returns the target architecture information, see `ArchInfo`. - inline const ArchInfo& archInfo() const noexcept { return _archInfo; } + //! Returns the target environment information, see \ref Environment. + inline const Environment& environment() const noexcept { return _environment; } - //! Returns the target architecture id, see `ArchInfo::Id`. - inline uint32_t archId() const noexcept { return _archInfo.archId(); } - //! Returns the target architecture sub-type, see `ArchInfo::SubId`. - inline uint32_t archSubId() const noexcept { return _archInfo.archSubId(); } + //! Returns the target architecture, see \ref Environment::Arch. + inline uint32_t arch() const noexcept { return _environment.arch(); } + //! Returns the target sub-architecture, see \ref Environment::SubArch. + inline uint32_t subArch() const noexcept { return _environment.subArch(); } //! Returns the native size of the target's architecture GP register. - inline uint32_t gpSize() const noexcept { return _archInfo.gpSize(); } - //! Returns the number of GP registers of the target's architecture. - inline uint32_t gpCount() const noexcept { return _archInfo.gpCount(); } - - //! Returns a natural stack alignment that must be honored (or 0 if not known). - inline uint32_t stackAlignment() const noexcept { return _stackAlignment; } - //! Sets a natural stack alignment that must be honored. - inline void setStackAlignment(uint32_t sa) noexcept { _stackAlignment = uint8_t(sa); } - - inline uint32_t cdeclCallConv() const noexcept { return _cdeclCallConv; } - inline void setCdeclCallConv(uint32_t cc) noexcept { _cdeclCallConv = uint8_t(cc); } - - inline uint32_t stdCallConv() const noexcept { return _stdCallConv; } - inline void setStdCallConv(uint32_t cc) noexcept { _stdCallConv = uint8_t(cc); } - - inline uint32_t fastCallConv() const noexcept { return _fastCallConv; } - inline void setFastCallConv(uint32_t cc) noexcept { _fastCallConv = uint8_t(cc); } + inline uint32_t gpSize() const noexcept { return _environment.registerSize(); } + //! Tests whether this CodeInfo has a base address set. inline bool hasBaseAddress() const noexcept { return _baseAddress != Globals::kNoBaseAddress; } + //! Returns the base address or \ref Globals::kNoBaseAddress if it's not set. inline uint64_t baseAddress() const noexcept { return _baseAddress; } + //! Sets base address to `p`. inline void setBaseAddress(uint64_t p) noexcept { _baseAddress = p; } + //! Resets base address (implicitly sets it to \ref Globals::kNoBaseAddress). inline void resetBaseAddress() noexcept { _baseAddress = Globals::kNoBaseAddress; } //! \} }; +#endif // !ASMJIT_NO_DEPRECATED // ============================================================================ // [asmjit::Target] @@ -159,19 +130,8 @@ class ASMJIT_VIRTAPI Target { ASMJIT_BASE_CLASS(Target) ASMJIT_NONCOPYABLE(Target) - //! Tartget type, see `TargetType`. - uint8_t _targetType; - //! Reserved for future use. - uint8_t _reserved[7]; - //! Basic information about the Runtime's code. - CodeInfo _codeInfo; - - enum TargetType : uint32_t { - //! Uninitialized target or unknown target type. - kTargetNone = 0, - //! JIT target type, see `JitRuntime`. - kTargetJit = 1 - }; + //! Target environment information. + Environment _environment; //! \name Construction & Destruction //! \{ @@ -190,15 +150,20 @@ class ASMJIT_VIRTAPI Target { //! //! CodeInfo can be used to setup a CodeHolder in case you plan to generate a //! code compatible and executable by this Runtime. - inline const CodeInfo& codeInfo() const noexcept { return _codeInfo; } + inline const Environment& environment() const noexcept { return _environment; } + + //! Returns the target architecture, see \ref Environment::Arch. + inline uint32_t arch() const noexcept { return _environment.arch(); } + //! Returns the target sub-architecture, see \ref Environment::SubArch. + inline uint32_t subArch() const noexcept { return _environment.subArch(); } - //! Returns the target architecture id, see `ArchInfo::Id`. - inline uint32_t archId() const noexcept { return _codeInfo.archId(); } - //! Returns the target architecture sub-id, see `ArchInfo::SubId`. - inline uint32_t archSubId() const noexcept { return _codeInfo.archSubId(); } +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use environment() instead") + inline CodeInfo codeInfo() const noexcept { return CodeInfo(_environment); } - //! Returns the target type, see `TargetType`. - inline uint32_t targetType() const noexcept { return _targetType; } + ASMJIT_DEPRECATED("Use environment().format() instead") + inline uint32_t targetType() const noexcept { return _environment.format(); } +#endif // !ASMJIT_NO_DEPRECATED //! \} }; diff --git a/libs/asmjit/src/asmjit/core/type.cpp b/libs/asmjit/src/asmjit/core/type.cpp index 67662db..a2bebf4 100644 --- a/libs/asmjit/src/asmjit/core/type.cpp +++ b/libs/asmjit/src/asmjit/core/type.cpp @@ -31,14 +31,62 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::Type] // ============================================================================ -const Type::TypeData Type::_typeData = { - #define VALUE(X) Type::BaseOfTypeId::kTypeId +namespace Type { + +template +struct BaseOfTypeId { + static constexpr uint32_t kTypeId = + isBase (TYPE_ID) ? TYPE_ID : + isMask8 (TYPE_ID) ? kIdU8 : + isMask16(TYPE_ID) ? kIdU16 : + isMask32(TYPE_ID) ? kIdU32 : + isMask64(TYPE_ID) ? kIdU64 : + isMmx32 (TYPE_ID) ? kIdI32 : + isMmx64 (TYPE_ID) ? kIdI64 : + isVec32 (TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec32Start : + isVec64 (TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec64Start : + isVec128(TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec128Start : + isVec256(TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec256Start : + isVec512(TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec512Start : 0; +}; + +template +struct SizeOfTypeId { + static constexpr uint32_t kTypeSize = + isInt8 (TYPE_ID) ? 1 : + isUInt8 (TYPE_ID) ? 1 : + isInt16 (TYPE_ID) ? 2 : + isUInt16 (TYPE_ID) ? 2 : + isInt32 (TYPE_ID) ? 4 : + isUInt32 (TYPE_ID) ? 4 : + isInt64 (TYPE_ID) ? 8 : + isUInt64 (TYPE_ID) ? 8 : + isFloat32(TYPE_ID) ? 4 : + isFloat64(TYPE_ID) ? 8 : + isFloat80(TYPE_ID) ? 10 : + isMask8 (TYPE_ID) ? 1 : + isMask16 (TYPE_ID) ? 2 : + isMask32 (TYPE_ID) ? 4 : + isMask64 (TYPE_ID) ? 8 : + isMmx32 (TYPE_ID) ? 4 : + isMmx64 (TYPE_ID) ? 8 : + isVec32 (TYPE_ID) ? 4 : + isVec64 (TYPE_ID) ? 8 : + isVec128 (TYPE_ID) ? 16 : + isVec256 (TYPE_ID) ? 32 : + isVec512 (TYPE_ID) ? 64 : 0; +}; + +const TypeData _typeData = { + #define VALUE(x) BaseOfTypeId::kTypeId { ASMJIT_LOOKUP_TABLE_256(VALUE, 0) }, #undef VALUE - #define VALUE(X) Type::SizeOfTypeId::kTypeSize + #define VALUE(x) SizeOfTypeId::kTypeSize { ASMJIT_LOOKUP_TABLE_256(VALUE, 0) } #undef VALUE }; +} // {Type} + ASMJIT_END_NAMESPACE diff --git a/libs/asmjit/src/asmjit/core/type.h b/libs/asmjit/src/asmjit/core/type.h index 59a17e9..ef03ecb 100644 --- a/libs/asmjit/src/asmjit/core/type.h +++ b/libs/asmjit/src/asmjit/core/type.h @@ -35,7 +35,7 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::Type] // ============================================================================ -//! Provides minimum type-system that is used by \ref asmjit_func and \ref asmjit_compiler. +//! Provides a minimalist type-system that is used by Asmjit library. namespace Type { //! TypeId. @@ -46,7 +46,7 @@ namespace Type { //! width used) and it's also used by APIs that allow to describe and work with //! function signatures. enum Id : uint32_t { - kIdVoid = 0, + kIdVoid = 0, //!< Void type. _kIdBaseStart = 32, _kIdBaseEnd = 44, @@ -54,38 +54,38 @@ enum Id : uint32_t { _kIdIntStart = 32, _kIdIntEnd = 41, - kIdIntPtr = 32, - kIdUIntPtr = 33, + kIdIntPtr = 32, //!< Abstract signed integer type that has a native size. + kIdUIntPtr = 33, //!< Abstract unsigned integer type that has a native size. - kIdI8 = 34, - kIdU8 = 35, - kIdI16 = 36, - kIdU16 = 37, - kIdI32 = 38, - kIdU32 = 39, - kIdI64 = 40, - kIdU64 = 41, + kIdI8 = 34, //!< 8-bit signed integer type. + kIdU8 = 35, //!< 8-bit unsigned integer type. + kIdI16 = 36, //!< 16-bit signed integer type. + kIdU16 = 37, //!< 16-bit unsigned integer type. + kIdI32 = 38, //!< 32-bit signed integer type. + kIdU32 = 39, //!< 32-bit unsigned integer type. + kIdI64 = 40, //!< 64-bit signed integer type. + kIdU64 = 41, //!< 64-bit unsigned integer type. _kIdFloatStart = 42, _kIdFloatEnd = 44, - kIdF32 = 42, - kIdF64 = 43, - kIdF80 = 44, + kIdF32 = 42, //!< 32-bit floating point type. + kIdF64 = 43, //!< 64-bit floating point type. + kIdF80 = 44, //!< 80-bit floating point type. _kIdMaskStart = 45, _kIdMaskEnd = 48, - kIdMask8 = 45, - kIdMask16 = 46, - kIdMask32 = 47, - kIdMask64 = 48, + kIdMask8 = 45, //!< 8-bit opmask register (K). + kIdMask16 = 46, //!< 16-bit opmask register (K). + kIdMask32 = 47, //!< 32-bit opmask register (K). + kIdMask64 = 48, //!< 64-bit opmask register (K). _kIdMmxStart = 49, _kIdMmxEnd = 50, - kIdMmx32 = 49, - kIdMmx64 = 50, + kIdMmx32 = 49, //!< 64-bit MMX register only used for 32 bits. + kIdMmx64 = 50, //!< 64-bit MMX register. _kIdVec32Start = 51, _kIdVec32End = 60, @@ -206,72 +206,74 @@ static constexpr bool isVec128(uint32_t typeId) noexcept { return typeId >= _kId static constexpr bool isVec256(uint32_t typeId) noexcept { return typeId >= _kIdVec256Start && typeId <= _kIdVec256End; } static constexpr bool isVec512(uint32_t typeId) noexcept { return typeId >= _kIdVec512Start && typeId <= _kIdVec512End; } -//! IdOfT<> template allows to get a TypeId of a C++ `T` type. -template struct IdOfT { /* Fail if not specialized. */ }; - //! \cond -template struct IdOfT { - enum : uint32_t { kTypeId = kIdUIntPtr }; +enum TypeCategory : uint32_t { + kTypeCategoryUnknown = 0, + kTypeCategoryEnum = 1, + kTypeCategoryIntegral = 2, + kTypeCategoryFloatingPoint = 3, + kTypeCategoryFunction = 4 }; -template struct IdOfT { - enum : uint32_t { kTypeId = kIdUIntPtr }; -}; +template +struct IdOfT_ByCategory {}; // Fails if not specialized. template -struct IdOfIntT { - static constexpr uint32_t kTypeId = - sizeof(T) == 1 ? (std::is_signed::value ? kIdI8 : kIdU8 ) : - sizeof(T) == 2 ? (std::is_signed::value ? kIdI16 : kIdU16) : - sizeof(T) == 4 ? (std::is_signed::value ? kIdI32 : kIdU32) : - sizeof(T) == 8 ? (std::is_signed::value ? kIdI64 : kIdU64) : kIdVoid; +struct IdOfT_ByCategory { + enum : uint32_t { + kTypeId = (sizeof(T) == 1 && std::is_signed::value) ? kIdI8 : + (sizeof(T) == 1 && !std::is_signed::value) ? kIdU8 : + (sizeof(T) == 2 && std::is_signed::value) ? kIdI16 : + (sizeof(T) == 2 && !std::is_signed::value) ? kIdU16 : + (sizeof(T) == 4 && std::is_signed::value) ? kIdI32 : + (sizeof(T) == 4 && !std::is_signed::value) ? kIdU32 : + (sizeof(T) == 8 && std::is_signed::value) ? kIdI64 : + (sizeof(T) == 8 && !std::is_signed::value) ? kIdU64 : kIdVoid + }; }; -template -struct BaseOfTypeId { - static constexpr uint32_t kTypeId = - isBase (TYPE_ID) ? TYPE_ID : - isMask8 (TYPE_ID) ? kIdU8 : - isMask16(TYPE_ID) ? kIdU16 : - isMask32(TYPE_ID) ? kIdU32 : - isMask64(TYPE_ID) ? kIdU64 : - isMmx32 (TYPE_ID) ? kIdI32 : - isMmx64 (TYPE_ID) ? kIdI64 : - isVec32 (TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec32Start : - isVec64 (TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec64Start : - isVec128(TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec128Start : - isVec256(TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec256Start : - isVec512(TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec512Start : 0; +template +struct IdOfT_ByCategory { + enum : uint32_t { + kTypeId = (sizeof(T) == 4 ) ? kIdF32 : + (sizeof(T) == 8 ) ? kIdF64 : + (sizeof(T) >= 10) ? kIdF80 : kIdVoid + }; }; -template -struct SizeOfTypeId { - static constexpr uint32_t kTypeSize = - isInt8 (TYPE_ID) ? 1 : - isUInt8 (TYPE_ID) ? 1 : - isInt16 (TYPE_ID) ? 2 : - isUInt16 (TYPE_ID) ? 2 : - isInt32 (TYPE_ID) ? 4 : - isUInt32 (TYPE_ID) ? 4 : - isInt64 (TYPE_ID) ? 8 : - isUInt64 (TYPE_ID) ? 8 : - isFloat32(TYPE_ID) ? 4 : - isFloat64(TYPE_ID) ? 8 : - isFloat80(TYPE_ID) ? 10 : - isMask8 (TYPE_ID) ? 1 : - isMask16 (TYPE_ID) ? 2 : - isMask32 (TYPE_ID) ? 4 : - isMask64 (TYPE_ID) ? 8 : - isMmx32 (TYPE_ID) ? 4 : - isMmx64 (TYPE_ID) ? 8 : - isVec32 (TYPE_ID) ? 4 : - isVec64 (TYPE_ID) ? 8 : - isVec128 (TYPE_ID) ? 16 : - isVec256 (TYPE_ID) ? 32 : - isVec512 (TYPE_ID) ? 64 : 0; +template +struct IdOfT_ByCategory + : public IdOfT_ByCategory::type, kTypeCategoryIntegral> {}; + +template +struct IdOfT_ByCategory { + enum: uint32_t { kTypeId = kIdUIntPtr }; }; //! \endcond +//! IdOfT<> template allows to get a TypeId from a C++ type `T`. +template +struct IdOfT +#ifdef _DOXYGEN + //! TypeId of C++ type `T`. + static constexpr uint32_t kTypeId = _TypeIdDeducedAtCompileTime_; +#else + : public IdOfT_ByCategory::value ? kTypeCategoryEnum : + std::is_integral::value ? kTypeCategoryIntegral : + std::is_floating_point::value ? kTypeCategoryFloatingPoint : + std::is_function::value ? kTypeCategoryFunction : kTypeCategoryUnknown> +#endif +{}; + +//! \cond +template +struct IdOfT { enum : uint32_t { kTypeId = kIdUIntPtr }; }; + +template +struct IdOfT { enum : uint32_t { kTypeId = kIdUIntPtr }; }; +//! \endcond + static inline uint32_t baseOf(uint32_t typeId) noexcept { ASMJIT_ASSERT(typeId <= kIdMax); return _typeData.baseOf[typeId]; @@ -283,14 +285,14 @@ static inline uint32_t sizeOf(uint32_t typeId) noexcept { } //! Returns offset needed to convert a `kIntPtr` and `kUIntPtr` TypeId -//! into a type that matches `gpSize` (general-purpose register size). +//! into a type that matches `registerSize` (general-purpose register size). //! If you find such TypeId it's then only about adding the offset to it. //! //! For example: //! //! ``` -//! uint32_t gpSize = '4' or '8'; -//! uint32_t deabstractDelta = Type::deabstractDeltaOfSize(gpSize); +//! uint32_t registerSize = '4' or '8'; +//! uint32_t deabstractDelta = Type::deabstractDeltaOfSize(registerSize); //! //! uint32_t typeId = 'some type-id'; //! @@ -300,8 +302,8 @@ static inline uint32_t sizeOf(uint32_t typeId) noexcept { //! // The same, but by using Type::deabstract() function. //! typeId = Type::deabstract(typeId, deabstractDelta); //! ``` -static constexpr uint32_t deabstractDeltaOfSize(uint32_t gpSize) noexcept { - return gpSize >= 8 ? kIdI64 - kIdIntPtr : kIdI32 - kIdIntPtr; +static constexpr uint32_t deabstractDeltaOfSize(uint32_t registerSize) noexcept { + return registerSize >= 8 ? kIdI64 - kIdIntPtr : kIdI32 - kIdIntPtr; } static constexpr uint32_t deabstract(uint32_t typeId, uint32_t deabstractDelta) noexcept { @@ -350,45 +352,20 @@ namespace Type { \ }; \ } -ASMJIT_DEFINE_TYPE_ID(bool , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(char , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(signed char , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(unsigned char , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(short , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(unsigned short , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(int , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(unsigned int , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(long , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(unsigned long , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(long long , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(unsigned long long, IdOfIntT::kTypeId); - -#if ASMJIT_CXX_HAS_NATIVE_WCHAR_T -ASMJIT_DEFINE_TYPE_ID(wchar_t , IdOfIntT::kTypeId); -#endif - -#if ASMJIT_CXX_HAS_UNICODE_LITERALS -ASMJIT_DEFINE_TYPE_ID(char16_t , IdOfIntT::kTypeId); -ASMJIT_DEFINE_TYPE_ID(char32_t , IdOfIntT::kTypeId); -#endif - -ASMJIT_DEFINE_TYPE_ID(void , kIdVoid); -ASMJIT_DEFINE_TYPE_ID(float , kIdF32); -ASMJIT_DEFINE_TYPE_ID(double , kIdF64); - -ASMJIT_DEFINE_TYPE_ID(Bool , kIdU8); -ASMJIT_DEFINE_TYPE_ID(I8 , kIdI8); -ASMJIT_DEFINE_TYPE_ID(U8 , kIdU8); -ASMJIT_DEFINE_TYPE_ID(I16 , kIdI16); -ASMJIT_DEFINE_TYPE_ID(U16 , kIdU16); -ASMJIT_DEFINE_TYPE_ID(I32 , kIdI32); -ASMJIT_DEFINE_TYPE_ID(U32 , kIdU32); -ASMJIT_DEFINE_TYPE_ID(I64 , kIdI64); -ASMJIT_DEFINE_TYPE_ID(U64 , kIdU64); -ASMJIT_DEFINE_TYPE_ID(IPtr , kIdIntPtr); -ASMJIT_DEFINE_TYPE_ID(UPtr , kIdUIntPtr); -ASMJIT_DEFINE_TYPE_ID(F32 , kIdF32); -ASMJIT_DEFINE_TYPE_ID(F64 , kIdF64); +ASMJIT_DEFINE_TYPE_ID(void, kIdVoid); +ASMJIT_DEFINE_TYPE_ID(Bool, kIdU8); +ASMJIT_DEFINE_TYPE_ID(I8 , kIdI8); +ASMJIT_DEFINE_TYPE_ID(U8 , kIdU8); +ASMJIT_DEFINE_TYPE_ID(I16 , kIdI16); +ASMJIT_DEFINE_TYPE_ID(U16 , kIdU16); +ASMJIT_DEFINE_TYPE_ID(I32 , kIdI32); +ASMJIT_DEFINE_TYPE_ID(U32 , kIdU32); +ASMJIT_DEFINE_TYPE_ID(I64 , kIdI64); +ASMJIT_DEFINE_TYPE_ID(U64 , kIdU64); +ASMJIT_DEFINE_TYPE_ID(IPtr, kIdIntPtr); +ASMJIT_DEFINE_TYPE_ID(UPtr, kIdUIntPtr); +ASMJIT_DEFINE_TYPE_ID(F32 , kIdF32); +ASMJIT_DEFINE_TYPE_ID(F64 , kIdF64); //! \endcond //! \} diff --git a/libs/asmjit/src/asmjit/core/virtmem.cpp b/libs/asmjit/src/asmjit/core/virtmem.cpp index 97f7ceb..0606748 100644 --- a/libs/asmjit/src/asmjit/core/virtmem.cpp +++ b/libs/asmjit/src/asmjit/core/virtmem.cpp @@ -317,9 +317,9 @@ static ASMJIT_INLINE bool VirtMem_isHardened() noexcept { // version 10.14+ (Mojave) and IOS. static ASMJIT_INLINE bool VirtMem_hasMapJitSupport() noexcept { #if TARGET_OS_OSX - static volatile uint32_t globalVersion; + static volatile int globalVersion; - uint32_t ver = globalVersion; + int ver = globalVersion; if (!ver) { struct utsname osname; uname(&osname); @@ -333,19 +333,19 @@ static ASMJIT_INLINE bool VirtMem_hasMapJitSupport() noexcept { #endif } -static ASMJIT_INLINE uint32_t VirtMem_appleSpecificMMapFlags(uint32_t flags) { +static ASMJIT_INLINE int VirtMem_appleSpecificMMapFlags(uint32_t flags) { // Always use MAP_JIT flag if user asked for it (could be used for testing // on non-hardened processes) and detect whether it must be used when the // process is actually hardened (in that case it doesn't make sense to rely // on user `flags`). bool useMapJit = ((flags & VirtMem::kMMapEnableMapJit) != 0) || VirtMem_isHardened(); if (useMapJit) - return VirtMem_hasMapJitSupport() ? MAP_JIT : 0u; + return VirtMem_hasMapJitSupport() ? int(MAP_JIT) : 0; else return 0; } #else -static ASMJIT_INLINE uint32_t VirtMem_appleSpecificMMapFlags(uint32_t flags) { +static ASMJIT_INLINE int VirtMem_appleSpecificMMapFlags(uint32_t flags) { DebugUtils::unused(flags); return 0; } @@ -406,7 +406,7 @@ static Error VirtMem_openAnonymousMemory(int* fd, bool preferTmpOverDevShm) noex bits = ((bits >> 14) ^ (bits << 6)) + uint64_t(++internalCounter) * 10619863; if (!ASMJIT_VM_SHM_DETECT || preferTmpOverDevShm) { - uniqueName.assignString(VirtMem_getTmpDir()); + uniqueName.assign(VirtMem_getTmpDir()); uniqueName.appendFormat(kShmFormat, (unsigned long long)bits); *fd = open(uniqueName.data(), O_RDWR | O_CREAT | O_EXCL, 0); if (ASMJIT_LIKELY(*fd >= 0)) { diff --git a/libs/asmjit/src/asmjit/core/virtmem.h b/libs/asmjit/src/asmjit/core/virtmem.h index a37005d..8d3ee01 100644 --- a/libs/asmjit/src/asmjit/core/virtmem.h +++ b/libs/asmjit/src/asmjit/core/virtmem.h @@ -31,7 +31,7 @@ ASMJIT_BEGIN_NAMESPACE -//! \addtogroup asmjit_jit +//! \addtogroup asmjit_virtual_memory //! \{ // ============================================================================ diff --git a/libs/asmjit/src/asmjit/core/zone.cpp b/libs/asmjit/src/asmjit/core/zone.cpp index 16de89b..61f7cec 100644 --- a/libs/asmjit/src/asmjit/core/zone.cpp +++ b/libs/asmjit/src/asmjit/core/zone.cpp @@ -144,7 +144,7 @@ void* Zone::_alloc(size_t size, size_t alignment) noexcept { size_t newSize = Support::max(blockSize(), size); // Prevent arithmetic overflow. - if (ASMJIT_UNLIKELY(newSize > std::numeric_limits::max() - kBlockSize - blockAlignmentOverhead)) + if (ASMJIT_UNLIKELY(newSize > SIZE_MAX - kBlockSize - blockAlignmentOverhead)) return nullptr; // Allocate new block - we add alignment overhead to `newSize`, which becomes the @@ -200,7 +200,7 @@ void* Zone::dup(const void* data, size_t size, bool nullTerminate) noexcept { if (ASMJIT_UNLIKELY(!data || !size)) return nullptr; - ASMJIT_ASSERT(size != std::numeric_limits::max()); + ASMJIT_ASSERT(size != SIZE_MAX); uint8_t* m = allocT(size + nullTerminate); if (ASMJIT_UNLIKELY(!m)) return nullptr; @@ -318,7 +318,7 @@ void* ZoneAllocator::_alloc(size_t size, size_t& allocatedSize) noexcept { size_t kBlockOverhead = sizeof(DynamicBlock) + sizeof(DynamicBlock*) + kBlockAlignment; // Handle a possible overflow. - if (ASMJIT_UNLIKELY(kBlockOverhead >= std::numeric_limits::max() - size)) + if (ASMJIT_UNLIKELY(kBlockOverhead >= SIZE_MAX - size)) return nullptr; void* p = ::malloc(size + kBlockOverhead); diff --git a/libs/asmjit/src/asmjit/core/zone.h b/libs/asmjit/src/asmjit/core/zone.h index c426f78..52e9f12 100644 --- a/libs/asmjit/src/asmjit/core/zone.h +++ b/libs/asmjit/src/asmjit/core/zone.h @@ -383,15 +383,22 @@ class Zone { // [b2d::ZoneTmp] // ============================================================================ +//! \ref Zone with `N` bytes of a static storage, used for the initial block. +//! +//! Temporary zones are used in cases where it's known that some memory will be +//! required, but in many cases it won't exceed N bytes, so the whole operation +//! can be performed without a dynamic memory allocation. template class ZoneTmp : public Zone { public: ASMJIT_NONCOPYABLE(ZoneTmp) + //! Temporary storage, embedded after \ref Zone. struct Storage { char data[N]; } _storage; + //! Creates a temporary zone. Dynamic block size is specified by `blockSize`. ASMJIT_INLINE explicit ZoneTmp(size_t blockSize, size_t blockAlignment = 1) noexcept : Zone(blockSize, blockAlignment, Support::Temporary(_storage.data, N)) {} }; diff --git a/libs/asmjit/src/asmjit/core/zonehash.h b/libs/asmjit/src/asmjit/core/zonehash.h index bdc1da7..0f1f21f 100644 --- a/libs/asmjit/src/asmjit/core/zonehash.h +++ b/libs/asmjit/src/asmjit/core/zonehash.h @@ -35,7 +35,7 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::ZoneHashNode] // ============================================================================ -//! Node used by `ZoneHash<>` template. +//! Node used by \ref ZoneHash template. //! //! You must provide function `bool eq(const Key& key)` in order to make //! `ZoneHash::get()` working. @@ -60,6 +60,7 @@ class ZoneHashNode { // [asmjit::ZoneHashBase] // ============================================================================ +//! Base class used by \ref ZoneHash template class ZoneHashBase { public: ASMJIT_NONCOPYABLE(ZoneHashBase) diff --git a/libs/asmjit/src/asmjit/core/zonelist.h b/libs/asmjit/src/asmjit/core/zonelist.h index 9d300b0..d7fb1dd 100644 --- a/libs/asmjit/src/asmjit/core/zonelist.h +++ b/libs/asmjit/src/asmjit/core/zonelist.h @@ -35,6 +35,7 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::ZoneListNode] // ============================================================================ +//! Node used by \ref ZoneList template. template class ZoneListNode { public: @@ -69,25 +70,26 @@ class ZoneListNode { // [asmjit::ZoneList] // ============================================================================ +//! Zone allocated list container that uses nodes of `NodeT` type. template class ZoneList { public: ASMJIT_NONCOPYABLE(ZoneList) - NodeT* _bounds[Globals::kLinkCount]; + NodeT* _nodes[Globals::kLinkCount]; //! \name Construction & Destruction //! \{ inline ZoneList() noexcept - : _bounds { nullptr, nullptr } {} + : _nodes { nullptr, nullptr } {} inline ZoneList(ZoneList&& other) noexcept - : _bounds { other._bounds[0], other._bounds[1] } {} + : _nodes { other._nodes[0], other._nodes[1] } {} inline void reset() noexcept { - _bounds[0] = nullptr; - _bounds[1] = nullptr; + _nodes[0] = nullptr; + _nodes[1] = nullptr; } //! \} @@ -95,9 +97,9 @@ class ZoneList { //! \name Accessors //! \{ - inline bool empty() const noexcept { return _bounds[0] == nullptr; } - inline NodeT* first() const noexcept { return _bounds[Globals::kLinkFirst]; } - inline NodeT* last() const noexcept { return _bounds[Globals::kLinkLast]; } + inline bool empty() const noexcept { return _nodes[0] == nullptr; } + inline NodeT* first() const noexcept { return _nodes[Globals::kLinkFirst]; } + inline NodeT* last() const noexcept { return _nodes[Globals::kLinkLast]; } //! \} @@ -105,23 +107,23 @@ class ZoneList { //! \{ inline void swap(ZoneList& other) noexcept { - std::swap(_bounds[0], other._bounds[0]); - std::swap(_bounds[1], other._bounds[1]); + std::swap(_nodes[0], other._nodes[0]); + std::swap(_nodes[1], other._nodes[1]); } - // Can be used to both prepend and append. + // Can be used to both append and prepend. inline void _addNode(NodeT* node, size_t dir) noexcept { - NodeT* prev = _bounds[dir]; + NodeT* prev = _nodes[dir]; node->_listNodes[!dir] = prev; - _bounds[dir] = node; + _nodes[dir] = node; if (prev) prev->_listNodes[dir] = node; else - _bounds[!dir] = node; + _nodes[!dir] = node; } - // Can be used to both prepend and append. + // Can be used to both append and prepend. inline void _insertNode(NodeT* ref, NodeT* node, size_t dir) noexcept { ASMJIT_ASSERT(ref != nullptr); @@ -132,7 +134,7 @@ class ZoneList { if (next) next->_listNodes[!dir] = node; else - _bounds[dir] = node; + _nodes[dir] = node; node->_listNodes[!dir] = prev; node->_listNodes[ dir] = next; @@ -148,8 +150,8 @@ class ZoneList { NodeT* prev = node->prev(); NodeT* next = node->next(); - if (prev) { prev->_listNodes[1] = next; node->_listNodes[0] = nullptr; } else { _bounds[0] = next; } - if (next) { next->_listNodes[0] = prev; node->_listNodes[1] = nullptr; } else { _bounds[1] = prev; } + if (prev) { prev->_listNodes[1] = next; node->_listNodes[0] = nullptr; } else { _nodes[0] = next; } + if (next) { next->_listNodes[0] = prev; node->_listNodes[1] = nullptr; } else { _nodes[1] = prev; } node->_listNodes[0] = nullptr; node->_listNodes[1] = nullptr; @@ -158,36 +160,36 @@ class ZoneList { } inline NodeT* popFirst() noexcept { - NodeT* node = _bounds[0]; + NodeT* node = _nodes[0]; ASMJIT_ASSERT(node != nullptr); NodeT* next = node->next(); - _bounds[0] = next; + _nodes[0] = next; if (next) { next->_listNodes[0] = nullptr; node->_listNodes[1] = nullptr; } else { - _bounds[1] = nullptr; + _nodes[1] = nullptr; } return node; } inline NodeT* pop() noexcept { - NodeT* node = _bounds[1]; + NodeT* node = _nodes[1]; ASMJIT_ASSERT(node != nullptr); NodeT* prev = node->prev(); - _bounds[1] = prev; + _nodes[1] = prev; if (prev) { prev->_listNodes[1] = nullptr; node->_listNodes[0] = nullptr; } else { - _bounds[0] = nullptr; + _nodes[0] = nullptr; } return node; diff --git a/libs/asmjit/src/asmjit/core/zonestack.h b/libs/asmjit/src/asmjit/core/zonestack.h index 3c7b5ce..2de6cdc 100644 --- a/libs/asmjit/src/asmjit/core/zonestack.h +++ b/libs/asmjit/src/asmjit/core/zonestack.h @@ -35,7 +35,7 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::ZoneStackBase] // ============================================================================ -//! Base class used by `ZoneStack`. +//! Base class used by \ref ZoneStack. class ZoneStackBase { public: ASMJIT_NONCOPYABLE(ZoneStackBase) diff --git a/libs/asmjit/src/asmjit/core/zonestring.h b/libs/asmjit/src/asmjit/core/zonestring.h index dfb06b4..cb25b29 100644 --- a/libs/asmjit/src/asmjit/core/zonestring.h +++ b/libs/asmjit/src/asmjit/core/zonestring.h @@ -36,6 +36,7 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::ZoneStringBase] // ============================================================================ +//! A helper class used by \ref ZoneString implementation. struct ZoneStringBase { union { struct { @@ -77,11 +78,12 @@ struct ZoneStringBase { // [asmjit::ZoneString] // ============================================================================ -//! Small string is a template that helps to create strings that can be either -//! statically allocated if they are small, or externally allocated in case -//! their size exceeds the limit. The `N` represents the size of the whole -//! `ZoneString` structure, based on that size the maximum size of the internal -//! buffer is determined. +//! A string template that can be zone allocated. +//! +//! Helps with creating strings that can be either statically allocated if they +//! are small, or externally allocated in case their size exceeds the limit. +//! The `N` represents the size of the whole `ZoneString` structure, based on +//! that size the maximum size of the internal buffer is determined. template class ZoneString { public: @@ -105,12 +107,22 @@ class ZoneString { //! \name Accessors //! \{ - inline const char* data() const noexcept { return _base._size <= kMaxEmbeddedSize ? _base._embedded : _base._external; } + //! Tests whether the string is empty. inline bool empty() const noexcept { return _base._size == 0; } + + //! Returns the string data. + inline const char* data() const noexcept { return _base._size <= kMaxEmbeddedSize ? _base._embedded : _base._external; } + //! Returns the string size. inline uint32_t size() const noexcept { return _base._size; } + //! Tests whether the string is embedded (e.g. no dynamically allocated). inline bool isEmbedded() const noexcept { return _base._size <= kMaxEmbeddedSize; } + //! Copies a new `data` of the given `size` to the string. + //! + //! If the `size` exceeds the internal buffer the given `zone` will be + //! used to duplicate the data, otherwise the internal buffer will be + //! used as a storage. inline Error setData(Zone* zone, const char* data, size_t size) noexcept { return _base.setData(zone, kMaxEmbeddedSize, data, size); } diff --git a/libs/asmjit/src/asmjit/core/zonetree.h b/libs/asmjit/src/asmjit/core/zonetree.h index 6cb88ed..1877919 100644 --- a/libs/asmjit/src/asmjit/core/zonetree.h +++ b/libs/asmjit/src/asmjit/core/zonetree.h @@ -99,7 +99,7 @@ class ZoneTreeNode { //! \endcond }; -//! RB-Tree typed to `NodeT`. +//! RB-Tree node casted to `NodeT`. template class ZoneTreeNodeT : public ZoneTreeNode { public: diff --git a/libs/asmjit/src/asmjit/core/zonevector.cpp b/libs/asmjit/src/asmjit/core/zonevector.cpp index 7ab53bf..81d5d50 100644 --- a/libs/asmjit/src/asmjit/core/zonevector.cpp +++ b/libs/asmjit/src/asmjit/core/zonevector.cpp @@ -312,6 +312,8 @@ static void test_zone_vector(ZoneAllocator* allocator, const char* typeName) { EXPECT(vec.size() == uint32_t(kMax)); EXPECT(vec.indexOf(T(kMax - 1)) == uint32_t(kMax - 1)); + EXPECT(vec.rbegin()[0] == kMax - 1); + vec.release(allocator); } diff --git a/libs/asmjit/src/asmjit/core/zonevector.h b/libs/asmjit/src/asmjit/core/zonevector.h index 7ee04ce..e95422a 100644 --- a/libs/asmjit/src/asmjit/core/zonevector.h +++ b/libs/asmjit/src/asmjit/core/zonevector.h @@ -36,9 +36,7 @@ ASMJIT_BEGIN_NAMESPACE // [asmjit::ZoneVectorBase] // ============================================================================ -//! \cond INTERNAL - -//! Base class implementing core `ZoneVector<>` functionality. +//! Base class used by \ref ZoneVector template. class ZoneVectorBase { public: ASMJIT_NONCOPYABLE(ZoneVectorBase) @@ -48,21 +46,18 @@ class ZoneVectorBase { typedef ptrdiff_t difference_type; //! Vector data (untyped). - void* _data; + void* _data = nullptr; //! Size of the vector. - size_type _size; + size_type _size = 0; //! Capacity of the vector. - size_type _capacity; + size_type _capacity = 0; protected: //! \name Construction & Destruction //! \{ //! Creates a new instance of `ZoneVectorBase`. - inline ZoneVectorBase() noexcept - : _data(nullptr), - _size(0), - _capacity(0) {} + inline ZoneVectorBase() noexcept {} inline ZoneVectorBase(ZoneVectorBase&& other) noexcept : _data(other._data), @@ -93,6 +88,7 @@ class ZoneVectorBase { } //! \} + //! \endcond public: //! \name Accessors @@ -133,8 +129,6 @@ class ZoneVectorBase { //! \} }; -//! \endcond - // ============================================================================ // [asmjit::ZoneVector] // ============================================================================ @@ -158,10 +152,10 @@ class ZoneVector : public ZoneVectorBase { typedef T& reference; typedef const T& const_reference; - typedef Support::Iterator iterator; - typedef Support::Iterator const_iterator; - typedef Support::ReverseIterator reverse_iterator; - typedef Support::ReverseIterator const_reverse_iterator; + typedef T* iterator; + typedef const T* const_iterator; + typedef std::reverse_iterator reverse_iterator; + typedef std::reverse_iterator const_reverse_iterator; //! \name Construction & Destruction //! \{ @@ -180,7 +174,7 @@ class ZoneVector : public ZoneVectorBase { inline const T* data() const noexcept { return static_cast(_data); } //! Returns item at the given index `i` (const). - inline const T& at(uint32_t i) const noexcept { + inline const T& at(size_t i) const noexcept { ASMJIT_ASSERT(i < _size); return data()[i]; } @@ -201,17 +195,17 @@ class ZoneVector : public ZoneVectorBase { inline iterator end() noexcept { return iterator(data() + _size); }; inline const_iterator end() const noexcept { return const_iterator(data() + _size); }; - inline reverse_iterator rbegin() noexcept { return reverse_iterator(data()); }; - inline const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(data()); }; + inline reverse_iterator rbegin() noexcept { return reverse_iterator(end()); }; + inline const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); }; - inline reverse_iterator rend() noexcept { return reverse_iterator(data() + _size); }; - inline const_reverse_iterator rend() const noexcept { return const_reverse_iterator(data() + _size); }; + inline reverse_iterator rend() noexcept { return reverse_iterator(begin()); }; + inline const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); }; inline const_iterator cbegin() const noexcept { return const_iterator(data()); }; inline const_iterator cend() const noexcept { return const_iterator(data() + _size); }; - inline const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(data()); }; - inline const_reverse_iterator crend() const noexcept { return const_reverse_iterator(data() + _size); }; + inline const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(cend()); }; + inline const_reverse_iterator crend() const noexcept { return const_reverse_iterator(cbegin()); }; //! \} @@ -234,7 +228,7 @@ class ZoneVector : public ZoneVectorBase { } //! Inserts an `item` at the specified `index`. - inline Error insert(ZoneAllocator* allocator, uint32_t index, const T& item) noexcept { + inline Error insert(ZoneAllocator* allocator, size_t index, const T& item) noexcept { ASMJIT_ASSERT(index <= _size); if (ASMJIT_UNLIKELY(_size == _capacity)) @@ -259,6 +253,7 @@ class ZoneVector : public ZoneVectorBase { return kErrorOk; } + //! Appends `other` vector at the end of this vector. inline Error concat(ZoneAllocator* allocator, const ZoneVector& other) noexcept { uint32_t size = other._size; if (_capacity - _size < size) @@ -300,6 +295,16 @@ class ZoneVector : public ZoneVectorBase { _size++; } + //! Inserts an `item` at the specified `index` (unsafe case). + inline void insertUnsafe(size_t index, const T& item) noexcept { + ASMJIT_ASSERT(_size < _capacity); + ASMJIT_ASSERT(index <= _size); + + T* dst = static_cast(_data) + index; + ::memmove(dst + 1, dst, size_t(_size - index) * sizeof(T)); + memcpy(dst, &item, sizeof(T)); + _size++; + } //! Concatenates all items of `other` at the end of the vector. inline void concatUnsafe(const ZoneVector& other) noexcept { uint32_t size = other._size; @@ -328,16 +333,17 @@ class ZoneVector : public ZoneVectorBase { } //! Removes item at index `i`. - inline void removeAt(uint32_t i) noexcept { + inline void removeAt(size_t i) noexcept { ASMJIT_ASSERT(i < _size); T* data = static_cast(_data) + i; - uint32_t size = --_size - i; + size_t size = --_size - i; if (size) ::memmove(data, data + 1, size_t(size) * sizeof(T)); } + //! Pops the last element from the vector and returns it. inline T pop() noexcept { ASMJIT_ASSERT(_size > 0); @@ -351,21 +357,33 @@ class ZoneVector : public ZoneVectorBase { } //! Returns item at index `i`. - inline T& operator[](uint32_t i) noexcept { + inline T& operator[](size_t i) noexcept { ASMJIT_ASSERT(i < _size); return data()[i]; } //! Returns item at index `i`. - inline const T& operator[](uint32_t i) const noexcept { + inline const T& operator[](size_t i) const noexcept { ASMJIT_ASSERT(i < _size); return data()[i]; } + //! Returns a reference to the first element of the vector. + //! + //! \note The vector must have at least one element. Attempting to use + //! `first()` on empty vector will trigger an assertion failure in debug + //! builds. inline T& first() noexcept { return operator[](0); } + //! \overload inline const T& first() const noexcept { return operator[](0); } + //! Returns a reference to the last element of the vector. + //! + //! \note The vector must have at least one element. Attempting to use + //! `last()` on empty vector will trigger an assertion failure in debug + //! builds. inline T& last() noexcept { return operator[](_size - 1); } + //! \overload inline const T& last() const noexcept { return operator[](_size - 1); } //! \} @@ -408,17 +426,18 @@ class ZoneVector : public ZoneVectorBase { // [asmjit::ZoneBitVector] // ============================================================================ +//! Zone-allocated bit vector. class ZoneBitVector { public: typedef Support::BitWord BitWord; static constexpr uint32_t kBitWordSizeInBits = Support::kBitWordSizeInBits; //! Bits. - BitWord* _data; + BitWord* _data = nullptr; //! Size of the bit-vector (in bits). - uint32_t _size; + uint32_t _size = 0; //! Capacity of the bit-vector (in bits). - uint32_t _capacity; + uint32_t _capacity = 0; ASMJIT_NONCOPYABLE(ZoneBitVector) @@ -451,10 +470,7 @@ class ZoneBitVector { //! \name Construction & Destruction //! \{ - inline ZoneBitVector() noexcept - : _data(nullptr), - _size(0), - _capacity(0) {} + inline ZoneBitVector() noexcept {} inline ZoneBitVector(ZoneBitVector&& other) noexcept : _data(other._data), @@ -689,7 +705,6 @@ class ZoneBitVector { }; //! \} - }; //! \} diff --git a/libs/asmjit/src/asmjit/x86.h b/libs/asmjit/src/asmjit/x86.h index 161b3be..69d47b4 100644 --- a/libs/asmjit/src/asmjit/x86.h +++ b/libs/asmjit/src/asmjit/x86.h @@ -24,9 +24,84 @@ #ifndef ASMJIT_X86_H_INCLUDED #define ASMJIT_X86_H_INCLUDED -//! \defgroup asmjit_x86 X86 +//! \addtogroup asmjit_x86 //! -//! \brief X86/X64 Backend. +//! ### Namespace +//! +//! - \ref x86 - x86 namespace provides support for X86/X64 code generation. +//! +//! ### Emitters +//! +//! - \ref x86::Assembler - X86/X64 assembler (must read, provides examples). +//! - \ref x86::Builder - X86/X64 builder. +//! - \ref x86::Compiler - X86/X64 compiler. +//! - \ref x86::Emitter - X86/X64 emitter (abstract). +//! +//! ### Supported Instructions +//! +//! - Emitters: +//! - \ref x86::EmitterExplicitT - Provides all instructions that use +//! explicit operands, provides also utility functions. The member +//! functions provided are part of all X86 emitters. +//! - \ref x86::EmitterImplicitT - Provides all instructions that use +//! implicit operands, these cannot be used with \ref x86::Compiler. +//! +//! - Instruction representation: +//! - \ref x86::Inst::Id - instruction identifiers. +//! - \ref x86::Inst::Options - instruction options. +//! +//! ### Register Operands +//! +//! - \ref x86::Reg - Base class for any X86 register. +//! - \ref x86::Gp - General purpose register: +//! - \ref x86::GpbLo - 8-bit low register. +//! - \ref x86::GpbHi - 8-bit high register. +//! - \ref x86::Gpw - 16-bit register. +//! - \ref x86::Gpd - 32-bit register. +//! - \ref x86::Gpq - 64-bit register (X64 only). +//! - \ref x86::Vec - Vector (SIMD) register: +//! - \ref x86::Xmm - 128-bit SIMD register (SSE+). +//! - \ref x86::Ymm - 256-bit SIMD register (AVX+). +//! - \ref x86::Zmm - 512-bit SIMD register (AVX512+). +//! - \ref x86::Mm - 64-bit MMX register. +//! - \ref x86::St - 80-bit FPU register. +//! - \ref x86::KReg - opmask registers (AVX512+). +//! - \ref x86::SReg - segment register. +//! - \ref x86::CReg - control register. +//! - \ref x86::DReg - debug register. +//! - \ref x86::Bnd - bound register (discontinued). +//! - \ref x86::Rip - relative instruction pointer. +//! +//! ### Memory Operands +//! +//! - \ref x86::Mem - X86/X64 memory operand that provides support for all +//! X86 and X64 addressing features including absolute addresses, index +//! scales, and segment override prefixes. +//! +//! ### Other +//! +//! - \ref x86::Features - X86/X64 CPU features on top of \ref BaseFeatures. +//! +//! ### Status and Control Words +//! +//! - \ref asmjit::x86::FpuWord::Status - FPU status word. +//! - \ref asmjit::x86::FpuWord::Control - FPU control word. +//! +//! ### Predicates +//! +//! - \ref x86::Predicate - namespace that provides X86/X64 predicates. +//! - \ref x86::Predicate::Cmp - `CMP[PD|PS|SD|SS]` predicate (SSE+). +//! - \ref x86::Predicate::PCmpStr - `[V]PCMP[I|E]STR[I|M]` predicate (SSE4.1+). +//! - \ref x86::Predicate::Round - `ROUND[PD|PS|SD|SS]` predicate (SSE+). +//! - \ref x86::Predicate::VCmp - `VCMP[PD|PS|SD|SS]` predicate (AVX+). +//! - \ref x86::Predicate::VFixupImm - `VFIXUPIMM[PD|PS|SD|SS]` predicate (AVX512+). +//! - \ref x86::Predicate::VFPClass - `VFPCLASS[PD|PS|SD|SS]` predicate (AVX512+). +//! - \ref x86::Predicate::VGetMant - `VGETMANT[PD|PS|SD|SS]` predicate (AVX512+). +//! - \ref x86::Predicate::VPCmp - `VPCMP[U][B|W|D|Q]` predicate (AVX512+). +//! - \ref x86::Predicate::VPCom - `VPCOM[U][B|W|D|Q]` predicate (XOP). +//! - \ref x86::Predicate::VRange - `VRANGE[PD|PS|SD|SS]` predicate (AVX512+). +//! - \ref x86::Predicate::VReduce - `REDUCE[PD|PS|SD|SS]` predicate (AVX512+). +//! - \ref x86::TLog - namespace that provides `VPTERNLOG[D|Q]` predicate / operations. #include "./core.h" diff --git a/libs/asmjit/src/asmjit/x86/x86archtraits_p.h b/libs/asmjit/src/asmjit/x86/x86archtraits_p.h new file mode 100644 index 0000000..095919c --- /dev/null +++ b/libs/asmjit/src/asmjit/x86/x86archtraits_p.h @@ -0,0 +1,150 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED +#define ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED + +#include "../core/archtraits.h" +#include "../core/misc_p.h" +#include "../x86/x86operand.h" + +ASMJIT_BEGIN_SUB_NAMESPACE(x86) + +//! \cond INTERNAL +//! \addtogroup asmjit_x86 +//! \{ + +// ============================================================================ +// [asmjit::x86::x86ArchTraits +// ============================================================================ + +static const constexpr ArchTraits x86ArchTraits = { + Gp::kIdSp, // SP. + Gp::kIdBp, // FP. + 0xFF, // LR. + 0xFF, // PC. + { 0, 0, 0 }, // Reserved. + 1, // HW stack alignment. + 0x7FFFFFFFu, // Min stack offset. + 0x7FFFFFFFu, // Max stack offset. + + // ISA features [Gp, Vec, Other0, Other1]. + { + ArchTraits::kIsaFeatureSwap | ArchTraits::kIsaFeaturePushPop, + 0, + 0, + 0 + }, + + // RegInfo. + #define V(index) { x86::RegTraits::kSignature } + { ASMJIT_LOOKUP_TABLE_32(V, 0) }, + #undef V + + // RegTypeToTypeId. + #define V(index) x86::RegTraits::kTypeId + { ASMJIT_LOOKUP_TABLE_32(V, 0) }, + #undef V + + // TypeIdToRegType. + #define V(index) (index + Type::_kIdBaseStart == Type::kIdI8 ? Reg::kTypeGpbLo : \ + index + Type::_kIdBaseStart == Type::kIdU8 ? Reg::kTypeGpbLo : \ + index + Type::_kIdBaseStart == Type::kIdI16 ? Reg::kTypeGpw : \ + index + Type::_kIdBaseStart == Type::kIdU16 ? Reg::kTypeGpw : \ + index + Type::_kIdBaseStart == Type::kIdI32 ? Reg::kTypeGpd : \ + index + Type::_kIdBaseStart == Type::kIdU32 ? Reg::kTypeGpd : \ + index + Type::_kIdBaseStart == Type::kIdIntPtr ? Reg::kTypeGpd : \ + index + Type::_kIdBaseStart == Type::kIdUIntPtr ? Reg::kTypeGpd : \ + index + Type::_kIdBaseStart == Type::kIdF32 ? Reg::kTypeXmm : \ + index + Type::_kIdBaseStart == Type::kIdF64 ? Reg::kTypeXmm : \ + index + Type::_kIdBaseStart == Type::kIdMask8 ? Reg::kTypeKReg : \ + index + Type::_kIdBaseStart == Type::kIdMask16 ? Reg::kTypeKReg : \ + index + Type::_kIdBaseStart == Type::kIdMask32 ? Reg::kTypeKReg : \ + index + Type::_kIdBaseStart == Type::kIdMask64 ? Reg::kTypeKReg : \ + index + Type::_kIdBaseStart == Type::kIdMmx32 ? Reg::kTypeMm : \ + index + Type::_kIdBaseStart == Type::kIdMmx64 ? Reg::kTypeMm : Reg::kTypeNone) + { ASMJIT_LOOKUP_TABLE_32(V, 0) } + #undef V +}; + +// ============================================================================ +// [asmjit::x86::x64ArchTraits +// ============================================================================ + +static const constexpr ArchTraits x64ArchTraits = { + Gp::kIdSp, // SP. + Gp::kIdBp, // FP. + 0xFF, // LR. + 0xFF, // PC. + { 0, 0, 0 }, // Reserved. + 1, // HW stack alignment. + 0x7FFFFFFFu, // Min stack offset. + 0x7FFFFFFFu, // Max stack offset. + + // ISA features [Gp, Vec, Other0, Other1]. + { + ArchTraits::kIsaFeatureSwap | ArchTraits::kIsaFeaturePushPop, + 0, + 0, + 0 + }, + + // RegInfo. + #define V(index) { x86::RegTraits::kSignature } + { ASMJIT_LOOKUP_TABLE_32(V, 0) }, + #undef V + + // RegTypeToTypeId. + #define V(index) x86::RegTraits::kTypeId + { ASMJIT_LOOKUP_TABLE_32(V, 0) }, + #undef V + + // TypeIdToRegType. + #define V(index) (index + Type::_kIdBaseStart == Type::kIdI8 ? Reg::kTypeGpbLo : \ + index + Type::_kIdBaseStart == Type::kIdU8 ? Reg::kTypeGpbLo : \ + index + Type::_kIdBaseStart == Type::kIdI16 ? Reg::kTypeGpw : \ + index + Type::_kIdBaseStart == Type::kIdU16 ? Reg::kTypeGpw : \ + index + Type::_kIdBaseStart == Type::kIdI32 ? Reg::kTypeGpd : \ + index + Type::_kIdBaseStart == Type::kIdU32 ? Reg::kTypeGpd : \ + index + Type::_kIdBaseStart == Type::kIdI64 ? Reg::kTypeGpq : \ + index + Type::_kIdBaseStart == Type::kIdU64 ? Reg::kTypeGpq : \ + index + Type::_kIdBaseStart == Type::kIdIntPtr ? Reg::kTypeGpd : \ + index + Type::_kIdBaseStart == Type::kIdUIntPtr ? Reg::kTypeGpd : \ + index + Type::_kIdBaseStart == Type::kIdF32 ? Reg::kTypeXmm : \ + index + Type::_kIdBaseStart == Type::kIdF64 ? Reg::kTypeXmm : \ + index + Type::_kIdBaseStart == Type::kIdMask8 ? Reg::kTypeKReg : \ + index + Type::_kIdBaseStart == Type::kIdMask16 ? Reg::kTypeKReg : \ + index + Type::_kIdBaseStart == Type::kIdMask32 ? Reg::kTypeKReg : \ + index + Type::_kIdBaseStart == Type::kIdMask64 ? Reg::kTypeKReg : \ + index + Type::_kIdBaseStart == Type::kIdMmx32 ? Reg::kTypeMm : \ + index + Type::_kIdBaseStart == Type::kIdMmx64 ? Reg::kTypeMm : Reg::kTypeNone) + { ASMJIT_LOOKUP_TABLE_32(V, 0) } + #undef V +}; + +//! \} +//! \endcond + +ASMJIT_END_SUB_NAMESPACE + +#endif // ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/x86/x86assembler.cpp b/libs/asmjit/src/asmjit/x86/x86assembler.cpp index d509926..78bc748 100644 --- a/libs/asmjit/src/asmjit/x86/x86assembler.cpp +++ b/libs/asmjit/src/asmjit/x86/x86assembler.cpp @@ -24,14 +24,17 @@ #include "../core/api-build_p.h" #ifdef ASMJIT_BUILD_X86 -#include "../core/codebufferwriter_p.h" +#include "../core/assembler.h" +#include "../core/codewriter_p.h" #include "../core/cpuinfo.h" -#include "../core/logging.h" +#include "../core/emitterutils_p.h" +#include "../core/formatter.h" +#include "../core/logger.h" #include "../core/misc_p.h" #include "../core/support.h" #include "../x86/x86assembler.h" #include "../x86/x86instdb_p.h" -#include "../x86/x86logging_p.h" +#include "../x86/x86formatter_p.h" #include "../x86/x86opcode_p.h" ASMJIT_BEGIN_SUB_NAMESPACE(x86) @@ -224,7 +227,7 @@ struct X86MemInfo_T { // - REX - A possible combination of REX.[B|X|R|W] bits in REX prefix where // REX.B and REX.X are possibly masked out, but REX.R and REX.W are // kept as is. -#define VALUE(X) X86MemInfo_T::kValue +#define VALUE(x) X86MemInfo_T::kValue static const uint8_t x86MemInfo[] = { ASMJIT_LOOKUP_TABLE_1024(VALUE, 0) }; #undef VALUE @@ -237,23 +240,23 @@ static const uint8_t x86MemInfo[] = { ASMJIT_LOOKUP_TABLE_1024(VALUE, 0) }; // decide between VEX3 vs XOP. // ____ ___ // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP] -#define VALUE(X) ((X & 0x08) ? kX86ByteXop3 : kX86ByteVex3) | (0xF << 19) | (0x7 << 13) +#define VALUE(x) ((x & 0x08) ? kX86ByteXop3 : kX86ByteVex3) | (0xF << 19) | (0x7 << 13) static const uint32_t x86VEXPrefix[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) }; #undef VALUE // Table that contains LL opcode field addressed by a register size / 16. It's // used to propagate L.256 or L.512 when YMM or ZMM registers are used, // respectively. -#define VALUE(X) (X & (64 >> 4)) ? Opcode::kLL_2 : \ - (X & (32 >> 4)) ? Opcode::kLL_1 : Opcode::kLL_0 +#define VALUE(x) (x & (64 >> 4)) ? Opcode::kLL_2 : \ + (x & (32 >> 4)) ? Opcode::kLL_1 : Opcode::kLL_0 static const uint32_t x86LLBySizeDiv16[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) }; #undef VALUE // Table that contains LL opcode field addressed by a register size / 16. It's // used to propagate L.256 or L.512 when YMM or ZMM registers are used, // respectively. -#define VALUE(X) X == Reg::kTypeZmm ? Opcode::kLL_2 : \ - X == Reg::kTypeYmm ? Opcode::kLL_1 : Opcode::kLL_0 +#define VALUE(x) x == Reg::kTypeZmm ? Opcode::kLL_2 : \ + x == Reg::kTypeYmm ? Opcode::kLL_1 : Opcode::kLL_0 static const uint32_t x86LLByRegType[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) }; #undef VALUE @@ -275,7 +278,7 @@ struct X86CDisp8SHL_T { }; }; -#define VALUE(X) X86CDisp8SHL_T::kValue +#define VALUE(x) X86CDisp8SHL_T::kValue static const uint32_t x86CDisp8SHL[] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) }; #undef VALUE @@ -307,7 +310,7 @@ struct X86Mod16BaseIndexTable_T { }; }; -#define VALUE(X) X86Mod16BaseIndexTable_T::kValue +#define VALUE(x) X86Mod16BaseIndexTable_T::kValue static const uint8_t x86Mod16BaseIndexTable[] = { ASMJIT_LOOKUP_TABLE_64(VALUE, 0) }; #undef VALUE @@ -372,10 +375,10 @@ static ASMJIT_INLINE uint32_t x86AltOpcodeOf(const InstDB::InstInfo* info) noexc // [asmjit::X86BufferWriter] // ============================================================================ -class X86BufferWriter : public CodeBufferWriter { +class X86BufferWriter : public CodeWriter { public: ASMJIT_INLINE explicit X86BufferWriter(Assembler* a) noexcept - : CodeBufferWriter(a) {} + : CodeWriter(a) {} ASMJIT_INLINE void emitPP(uint32_t opcode) noexcept { uint32_t ppIndex = (opcode >> Opcode::kPP_Shift) & @@ -506,9 +509,9 @@ static ASMJIT_INLINE uint32_t x86GetMovAbsAddrType(Assembler* self, X86BufferWri uint32_t addrType = rmRel.addrType(); int64_t addrValue = rmRel.offset(); - if (addrType == BaseMem::kAddrTypeDefault && !(options & Inst::kOptionModMR)) { + if (addrType == Mem::kAddrTypeDefault && !(options & Inst::kOptionModMR)) { if (self->is64Bit()) { - uint64_t baseAddress = self->codeInfo().baseAddress(); + uint64_t baseAddress = self->code()->baseAddress(); if (baseAddress != Globals::kNoBaseAddress && !rmRel.hasSegment()) { uint32_t instructionSize = x86GetMovAbsInstSize64Bit(regSize, options, rmRel); uint64_t virtualOffset = uint64_t(writer.offsetFrom(self->_bufferData)); @@ -516,15 +519,15 @@ static ASMJIT_INLINE uint32_t x86GetMovAbsAddrType(Assembler* self, X86BufferWri uint64_t rel64 = uint64_t(addrValue) - rip64; if (!Support::isInt32(int64_t(rel64))) - addrType = BaseMem::kAddrTypeAbs; + addrType = Mem::kAddrTypeAbs; } else { if (!Support::isInt32(addrValue)) - addrType = BaseMem::kAddrTypeAbs; + addrType = Mem::kAddrTypeAbs; } } else { - addrType = BaseMem::kAddrTypeAbs; + addrType = Mem::kAddrTypeAbs; } } @@ -545,18 +548,18 @@ Assembler::~Assembler() noexcept {} // [asmjit::x86::Assembler - Emit (Low-Level)] // ============================================================================ -ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) { +ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) { constexpr uint32_t kVSHR_W = Opcode::kW_Shift - 23; constexpr uint32_t kVSHR_PP = Opcode::kPP_Shift - 16; constexpr uint32_t kVSHR_PP_EW = Opcode::kPP_Shift - 16; constexpr uint32_t kRequiresSpecialHandling = - Inst::kOptionReserved | // Logging/Validation/Error. - Inst::kOptionRep | // REP/REPE prefix. - Inst::kOptionRepne | // REPNE prefix. - Inst::kOptionLock | // LOCK prefix. - Inst::kOptionXAcquire | // XACQUIRE prefix. - Inst::kOptionXRelease ; // XRELEASE prefix. + uint32_t(Inst::kOptionReserved) | // Logging/Validation/Error. + uint32_t(Inst::kOptionRep ) | // REP/REPE prefix. + uint32_t(Inst::kOptionRepne ) | // REPNE prefix. + uint32_t(Inst::kOptionLock ) | // LOCK prefix. + uint32_t(Inst::kOptionXAcquire) | // XACQUIRE prefix. + uint32_t(Inst::kOptionXRelease) ; // XRELEASE prefix. Error err; @@ -595,12 +598,12 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // instruction) are handled by the next branch. options = uint32_t(instId == 0); options |= uint32_t((size_t)(_bufferEnd - writer.cursor()) < 16); - options |= uint32_t(instOptions() | globalInstOptions()); + options |= uint32_t(instOptions() | forcedInstOptions()); // Handle failure and rare cases first. if (ASMJIT_UNLIKELY(options & kRequiresSpecialHandling)) { if (ASMJIT_UNLIKELY(!_code)) - return DebugUtils::errored(kErrorNotInitialized); + return reportError(DebugUtils::errored(kErrorNotInitialized)); // Unknown instruction. if (ASMJIT_UNLIKELY(instId == 0)) @@ -613,25 +616,13 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c #ifndef ASMJIT_NO_VALIDATION // Strict validation. - if (hasEmitterOption(kOptionStrictValidation)) { + if (hasValidationOption(kValidationOptionAssembler)) { Operand_ opArray[Globals::kMaxOpCount]; + EmitterUtils::opArrayFromEmitArgs(opArray, o0, o1, o2, opExt); - opArray[0].copyFrom(o0); - opArray[1].copyFrom(o1); - opArray[2].copyFrom(o2); - opArray[3].copyFrom(o3); - - if (options & Inst::kOptionOp4Op5Used) { - opArray[4].copyFrom(_op4); - opArray[5].copyFrom(_op5); - } - else { - opArray[4].reset(); - opArray[5].reset(); - } - - err = InstAPI::validate(archId(), BaseInst(instId, options, _extraReg), opArray, Globals::kMaxOpCount); - if (ASMJIT_UNLIKELY(err)) goto Failed; + err = InstAPI::validate(arch(), BaseInst(instId, options, _extraReg), opArray, Globals::kMaxOpCount); + if (ASMJIT_UNLIKELY(err)) + goto Failed; } #endif @@ -671,13 +662,21 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // This sequence seems to be the fastest. opcode = InstDB::_mainOpcodeTable[instInfo->_mainOpcodeIndex]; - opReg = opcode.extractO(); + opReg = opcode.extractModO(); + rbReg = 0; opcode |= instInfo->_mainOpcodeValue; // -------------------------------------------------------------------------- // [Encoding Scope] // -------------------------------------------------------------------------- + // How it works? Each case here represents a unique encoding of a group of + // instructions, which is handled separately. The handlers check instruction + // signature, possibly register types, etc, and process this information by + // writing some bits to opcode, opReg/rbReg, immValue/immSize, etc, and then + // at the end of the sequence it uses goto to jump into a lower level handler, + // that actually encodes the instruction. + switch (instInfo->_encoding) { case InstDB::kEncodingNone: goto EmitDone; @@ -689,16 +688,17 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c case InstDB::kEncodingX86Op: goto EmitX86Op; - case InstDB::kEncodingX86Op_O_I8: + case InstDB::kEncodingX86Op_Mod11RM: + rbReg = opcode.extractModRM(); + goto EmitX86R; + + case InstDB::kEncodingX86Op_Mod11RM_I8: if (ASMJIT_UNLIKELY(isign3 != ENC_OPS1(Imm))) goto InvalidInstruction; - immValue = o0.as().u8(); + rbReg = opcode.extractModRM(); + immValue = o0.as().valueAs(); immSize = 1; - ASMJIT_FALLTHROUGH; - - case InstDB::kEncodingX86Op_O: - rbReg = 0; goto EmitX86R; case InstDB::kEncodingX86Op_xAddr: @@ -738,14 +738,14 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c case InstDB::kEncodingX86I_xAX: // Implicit form. if (isign3 == ENC_OPS1(Imm)) { - immValue = o0.as().u8(); + immValue = o0.as().valueAs(); immSize = 1; goto EmitX86Op; } // Explicit form. if (isign3 == ENC_OPS2(Reg, Imm) && o0.id() == Gp::kIdAx) { - immValue = o1.as().u8(); + immValue = o1.as().valueAs(); immSize = 1; goto EmitX86Op; } @@ -846,21 +846,61 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (isign3 == ENC_OPS1(None)) goto EmitX86Op; - // Multi-byte NOP instruction "0F 1F /0". + // Single operand NOP instruction "0F 1F /0". opcode = Opcode::k000F00 | 0x1F; opReg = 0; if (isign3 == ENC_OPS1(Reg)) { - opcode.add66hBySize(o0.size()); + opcode.addPrefixBySize(o0.size()); rbReg = o0.id(); goto EmitX86R; } if (isign3 == ENC_OPS1(Mem)) { - opcode.add66hBySize(o0.size()); + opcode.addPrefixBySize(o0.size()); rmRel = &o0; goto EmitX86M; } + + // Two operand NOP instruction "0F 1F /r". + opReg = o1.id(); + opcode.addPrefixBySize(o1.size()); + + if (isign3 == ENC_OPS2(Reg, Reg)) { + rbReg = o0.id(); + goto EmitX86R; + } + + if (isign3 == ENC_OPS2(Mem, Reg)) { + rmRel = &o0; + goto EmitX86M; + } + break; + + case InstDB::kEncodingX86R_FromM: + if (isign3 == ENC_OPS1(Mem)) { + rmRel = &o0; + rbReg = o0.id(); + goto EmitX86RFromM; + } + break; + + case InstDB::kEncodingX86R32_EDX_EAX: + // Explicit form: R32, EDX, EAX. + if (isign3 == ENC_OPS3(Reg, Reg, Reg)) { + if (!Reg::isGpd(o1, Gp::kIdDx) || !Reg::isGpd(o2, Gp::kIdAx)) + goto InvalidInstruction; + rbReg = o0.id(); + goto EmitX86R; + } + + // Implicit form: R32. + if (isign3 == ENC_OPS1(Reg)) { + if (!Reg::isGpd(o0)) + goto InvalidInstruction; + rbReg = o0.id(); + goto EmitX86R; + } break; case InstDB::kEncodingX86R_Native: @@ -996,7 +1036,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c uint32_t size = o0.size(); rbReg = o0.id(); - immValue = o1.as().i64(); + immValue = o1.as().value(); if (size == 1) { FIXUP_GPB(o0, rbReg); @@ -1022,8 +1062,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c else goto InvalidImmediate; } - else if (canTransformTo32Bit && hasEmitterOption(kOptionOptimizedForSize)) { - // This is a code-size optimization. + else if (canTransformTo32Bit && hasEncodingOption(kEncodingOptionOptimizeForSize)) { size = 4; } @@ -1053,7 +1092,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (ASMJIT_UNLIKELY(memSize == 0)) goto AmbiguousOperandSize; - immValue = o1.as().i64(); + immValue = o1.as().value(); immSize = FastUInt8(Support::min(memSize, 4)); // Sign extend so isInt8 returns the right result. @@ -1098,12 +1137,12 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } // The remaining instructions use the secondary opcode/r. - immValue = o1.as().i64(); + immValue = o1.as().value(); immSize = 1; opcode = x86AltOpcodeOf(instInfo); opcode.addPrefixBySize(o0.size()); - opReg = opcode.extractO(); + opReg = opcode.extractModO(); if (isign3 == ENC_OPS2(Reg, Imm)) { rbReg = o0.id(); @@ -1175,8 +1214,11 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } case InstDB::kEncodingX86Cmpxchg8b_16b: { + const Operand_& o3 = opExt[EmitterUtils::kOp3]; + const Operand_& o4 = opExt[EmitterUtils::kOp4]; + if (isign3 == ENC_OPS3(Mem, Reg, Reg)) { - if (o3.isReg() && _op4.isReg()) { + if (o3.isReg() && o4.isReg()) { rmRel = &o0; goto EmitX86M; } @@ -1224,8 +1266,8 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c case InstDB::kEncodingX86Enter: if (isign3 == ENC_OPS2(Imm, Imm)) { - uint32_t iw = o0.as().u16(); - uint32_t ib = o1.as().u8(); + uint32_t iw = o0.as().valueAs(); + uint32_t ib = o1.as().valueAs(); immValue = iw | (ib << 16); immSize = 3; @@ -1239,7 +1281,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opcode = 0x6B; opcode.addPrefixBySize(o0.size()); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) { @@ -1257,7 +1299,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opcode = 0x6B; opcode.addPrefixBySize(o0.size()); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; // Sign extend so isInt8 returns the right result. @@ -1309,7 +1351,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opcode = 0x6B; opcode.addPrefixBySize(o0.size()); - immValue = o1.as().i64(); + immValue = o1.as().value(); immSize = 1; // Sign extend so isInt8 returns the right result. @@ -1333,7 +1375,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx)) goto InvalidInstruction; - immValue = o1.as().u8(); + immValue = o1.as().valueAs(); immSize = 1; opcode = x86AltOpcodeOf(instInfo) + (o0.size() != 1); @@ -1398,18 +1440,16 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c case InstDB::kEncodingX86Int: if (isign3 == ENC_OPS1(Imm)) { - immValue = o0.as().i64(); + immValue = o0.as().value(); immSize = 1; goto EmitX86Op; } break; case InstDB::kEncodingX86Jcc: - if (_emitterOptions & kOptionPredictedJumps) { - if (options & Inst::kOptionTaken) - writer.emit8(0x3E); - if (options & Inst::kOptionNotTaken) - writer.emit8(0x2E); + if ((options & (Inst::kOptionTaken | Inst::kOptionNotTaken)) && hasEncodingOption(kEncodingOptionPredictedJumps)) { + uint8_t prefix = (options & Inst::kOptionTaken) ? uint8_t(0x3E) : uint8_t(0x2E); + writer.emit8(prefix); } rmRel = &o0; @@ -1597,7 +1637,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // Handle a special form of `mov al|ax|eax|rax, [ptr64]` that doesn't use MOD. if (opReg == Gp::kIdAx && !rmRel->as().hasBaseOrIndex()) { immValue = rmRel->as().offset(); - if (x86GetMovAbsAddrType(this, writer, o0.size(), options, rmRel->as()) == BaseMem::kAddrTypeAbs) { + if (x86GetMovAbsAddrType(this, writer, o0.size(), options, rmRel->as()) == Mem::kAddrTypeAbs) { opcode += 0xA0; goto EmitX86OpMovAbs; } @@ -1630,7 +1670,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // Handle a special form of `mov [ptr64], al|ax|eax|rax` that doesn't use MOD. if (opReg == Gp::kIdAx && !rmRel->as().hasBaseOrIndex()) { immValue = rmRel->as().offset(); - if (x86GetMovAbsAddrType(this, writer, o1.size(), options, rmRel->as()) == BaseMem::kAddrTypeAbs) { + if (x86GetMovAbsAddrType(this, writer, o1.size(), options, rmRel->as()) == Mem::kAddrTypeAbs) { opcode += 0xA2; goto EmitX86OpMovAbs; } @@ -1649,16 +1689,16 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c FIXUP_GPB(o0, opReg); opcode = 0xB0; - immValue = o1.as().u8(); + immValue = o1.as().valueAs(); goto EmitX86OpReg; } else { // 64-bit immediate in 64-bit mode is allowed. - immValue = o1.as().i64(); + immValue = o1.as().value(); // Optimize the instruction size by using a 32-bit immediate if possible. if (immSize == 8 && !(options & Inst::kOptionLongForm)) { - if (Support::isUInt32(immValue) && hasEmitterOption(kOptionOptimizedForSize)) { + if (Support::isUInt32(immValue) && hasEncodingOption(kEncodingOptionOptimizeForSize)) { // Zero-extend by using a 32-bit GPD destination instead of a 64-bit GPQ. immSize = 4; } @@ -1690,7 +1730,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opReg = 0; rmRel = &o0; - immValue = o1.as().i64(); + immValue = o1.as().value(); immSize = FastUInt8(Support::min(memSize, 4)); goto EmitX86M; } @@ -1753,7 +1793,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opcode = x86AltOpcodeOf(instInfo) + (o1.size() != 1); opcode.add66hBySize(o1.size()); - immValue = o0.as().u8(); + immValue = o0.as().valueAs(); immSize = 1; goto EmitX86Op; } @@ -1800,7 +1840,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } if (isign3 == ENC_OPS1(Imm)) { - immValue = o0.as().i64(); + immValue = o0.as().value(); immSize = 4; if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm)) @@ -1840,7 +1880,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (ASMJIT_UNLIKELY(o0.size() == 0)) goto AmbiguousOperandSize; - if (ASMJIT_UNLIKELY(o0.size() != 2 && o0.size() != gpSize())) + if (ASMJIT_UNLIKELY(o0.size() != 2 && o0.size() != registerSize())) goto InvalidInstruction; opcode.add66hBySize(o0.size()); @@ -1857,7 +1897,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } if (isign3 == ENC_OPS1(Imm)) { - immValue = o0.as().i64(); + immValue = o0.as().value(); if (immValue == 0 && !(options & Inst::kOptionLongForm)) { // 'ret' without immediate, change C2 to C3. opcode.add(1); @@ -1887,7 +1927,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } if (isign3 == ENC_OPS2(Reg, Imm)) { - immValue = o1.as().i64() & 0xFF; + immValue = o1.as().value() & 0xFF; immSize = 0; if (immValue == 1 && !(options & Inst::kOptionLongForm)) @@ -1915,7 +1955,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c goto AmbiguousOperandSize; rmRel = &o0; - immValue = o1.as().i64() & 0xFF; + immValue = o1.as().value() & 0xFF; immSize = 0; if (immValue == 1 && !(options & Inst::kOptionLongForm)) @@ -1947,7 +1987,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opReg = o1.id(); rbReg = o0.id(); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; goto EmitX86R; } @@ -1957,7 +1997,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opReg = o1.id(); rmRel = &o0; - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; goto EmitX86M; } @@ -2069,7 +2109,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // The following instructions use the secondary opcode. opcode = x86AltOpcodeOf(instInfo); - opReg = opcode.extractO(); + opReg = opcode.extractModO(); if (isign3 == ENC_OPS2(Reg, Imm)) { opcode.addArithBySize(o0.size()); @@ -2077,11 +2117,11 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (o0.size() == 1) { FIXUP_GPB(o0, rbReg); - immValue = o1.as().u8(); + immValue = o1.as().valueAs(); immSize = 1; } else { - immValue = o1.as().i64(); + immValue = o1.as().value(); immSize = FastUInt8(Support::min(o0.size(), 4)); } @@ -2102,7 +2142,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opcode.addArithBySize(o0.size()); rmRel = &o0; - immValue = o1.as().i64(); + immValue = o1.as().value(); immSize = FastUInt8(Support::min(o0.size(), 4)); goto EmitX86M; } @@ -2270,7 +2310,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (o0.size() == 10 && commonInfo->hasFlag(InstDB::kFlagFpuM80)) { opcode = x86AltOpcodeOf(instInfo); - opReg = opcode.extractO(); + opReg = opcode.extractModO(); goto EmitX86M; } } @@ -2299,7 +2339,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (o0.size() == 8 && commonInfo->hasFlag(InstDB::kFlagFpuM64)) { opcode = x86AltOpcodeOf(instInfo) & ~uint32_t(Opcode::kCDSHL_Mask); - opReg = opcode.extractO(); + opReg = opcode.extractModO(); goto EmitX86M; } } @@ -2345,7 +2385,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (isign3 == ENC_OPS3(Reg, Reg, Imm)) { opcode.add66hIf(Reg::isXmm(o1)); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; opReg = o0.id(); @@ -2358,7 +2398,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opcode = x86AltOpcodeOf(instInfo); opcode.add66hIf(Reg::isXmm(o1)); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; opReg = o1.id(); @@ -2371,7 +2411,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (isign3 == ENC_OPS3(Reg, Reg, Imm)) { opcode.add66hIf(Reg::isXmm(o1)); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; opReg = o1.id(); @@ -2382,7 +2422,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (isign3 == ENC_OPS3(Mem, Reg, Imm)) { opcode.add66hIf(Reg::isXmm(o1)); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; opReg = o1.id(); @@ -2618,10 +2658,10 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // The following instruction uses the secondary opcode. opcode = x86AltOpcodeOf(instInfo); - opReg = opcode.extractO(); + opReg = opcode.extractModO(); if (isign3 == ENC_OPS2(Reg, Imm)) { - immValue = o1.as().i64(); + immValue = o1.as().value(); immSize = 1; rbReg = o0.id(); @@ -2648,12 +2688,12 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // The following instruction uses the secondary opcode. opcode = x86AltOpcodeOf(instInfo); - opReg = opcode.extractO(); + opReg = opcode.extractModO(); if (isign3 == ENC_OPS2(Reg, Imm)) { opcode.add66hIf(Reg::isXmm(o0)); - immValue = o1.as().i64(); + immValue = o1.as().value(); immSize = 1; rbReg = o0.id(); @@ -2662,7 +2702,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c break; case InstDB::kEncodingExtRmi: - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; if (isign3 == ENC_OPS3(Reg, Reg, Imm)) { @@ -2679,7 +2719,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c break; case InstDB::kEncodingExtRmi_P: - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; if (isign3 == ENC_OPS3(Reg, Reg, Imm)) { @@ -2714,17 +2754,19 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opcode = x86AltOpcodeOf(instInfo); if (isign3 == ENC_OPS3(Reg, Imm, Imm)) { - immValue = (o1.as().u32() ) + - (o2.as().u32() << 8) ; + immValue = (uint32_t(o1.as().valueAs()) ) + + (uint32_t(o2.as().valueAs()) << 8) ; immSize = 2; - rbReg = opcode.extractO(); + rbReg = opcode.extractModO(); goto EmitX86R; } break; case InstDB::kEncodingExtInsertq: { + const Operand_& o3 = opExt[EmitterUtils::kOp3]; const uint32_t isign4 = isign3 + (o3.opType() << 9); + opReg = o0.id(); rbReg = o1.id(); @@ -2735,8 +2777,8 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opcode = x86AltOpcodeOf(instInfo); if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) { - immValue = (o2.as().u32() ) + - (o3.as().u32() << 8) ; + immValue = (uint32_t(o2.as().valueAs()) ) + + (uint32_t(o3.as().valueAs()) << 8) ; immSize = 2; goto EmitX86R; } @@ -2774,6 +2816,10 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c case InstDB::kEncodingVexOp: goto EmitVexEvexOp; + case InstDB::kEncodingVexOpMod: + rbReg = 0; + goto EmitVexEvexR; + case InstDB::kEncodingVexKmov: if (isign3 == ENC_OPS2(Reg, Reg)) { opReg = o0.id(); @@ -2869,7 +2915,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c ASMJIT_FALLTHROUGH; case InstDB::kEncodingVexMri: - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; if (isign3 == ENC_OPS3(Reg, Reg, Imm)) { @@ -2934,24 +2980,22 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c break; case InstDB::kEncodingVexRm_T1_4X: { - if (!(options & Inst::kOptionOp4Op5Used)) - goto InvalidInstruction; - - if (Reg::isZmm(o0 ) && Reg::isZmm(o1) && - Reg::isZmm(o2 ) && Reg::isZmm(o3) && - Reg::isZmm(_op4) && _op5.isMem()) { + const Operand_& o3 = opExt[EmitterUtils::kOp3]; + const Operand_& o4 = opExt[EmitterUtils::kOp4]; + const Operand_& o5 = opExt[EmitterUtils::kOp5]; - // Registers [o1, o2, o3, _op4] must start aligned and must be consecutive. + if (Reg::isZmm(o0) && Reg::isZmm(o1) && Reg::isZmm(o2) && Reg::isZmm(o3) && Reg::isZmm(o4) && o5.isMem()) { + // Registers [o1, o2, o3, o4] must start aligned and must be consecutive. uint32_t i1 = o1.id(); uint32_t i2 = o2.id(); uint32_t i3 = o3.id(); - uint32_t i4 = _op4.id(); + uint32_t i4 = o4.id(); if (ASMJIT_UNLIKELY((i1 & 0x3) != 0 || i2 != i1 + 1 || i3 != i1 + 2 || i4 != i1 + 3)) goto NotConsecutiveRegs; opReg = o0.id(); - rmRel = &_op5; + rmRel = &o5; goto EmitVexEvexM; } break; @@ -2967,7 +3011,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c case InstDB::kEncodingVexRmi: CaseVexRmi: - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; if (isign3 == ENC_OPS3(Reg, Reg, Imm)) { @@ -2999,25 +3043,58 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } break; - case InstDB::kEncodingVexRvm_ZDX_Wx: + case InstDB::kEncodingVexRvm_ZDX_Wx: { + const Operand_& o3 = opExt[EmitterUtils::kOp3]; if (ASMJIT_UNLIKELY(!o3.isNone() && !Reg::isGp(o3, Gp::kIdDx))) goto InvalidInstruction; ASMJIT_FALLTHROUGH; + } - case InstDB::kEncodingVexRvm_Wx: + case InstDB::kEncodingVexRvm_Wx: { opcode.addWIf(Reg::isGpq(o0) | (o2.size() == 8)); goto CaseVexRvm; + } - case InstDB::kEncodingVexRvm_Lx: + case InstDB::kEncodingVexRvm_Lx: { opcode |= x86OpcodeLBySize(o0.size() | o1.size()); goto CaseVexRvm; + } + + case InstDB::kEncodingVexRvm_Lx_2xK: { + if (isign3 == ENC_OPS3(Reg, Reg, Reg)) { + // Two registers are encoded as a single register. + // - First K register must be even. + // - Second K register must be first+1. + if ((o0.id() & 1) != 0 || o0.id() + 1 != o1.id()) + goto InvalidPhysId; + + const Operand_& o3 = opExt[EmitterUtils::kOp3]; + + opcode |= x86OpcodeLBySize(o2.size()); + opReg = x86PackRegAndVvvvv(o0.id(), o2.id()); + + if (o3.isReg()) { + rbReg = o3.id(); + goto EmitVexEvexR; + } + + if (o3.isMem()) { + rmRel = &o3; + goto EmitVexEvexM; + } + } + break; + } - case InstDB::kEncodingVexRvmr_Lx: + case InstDB::kEncodingVexRvmr_Lx: { opcode |= x86OpcodeLBySize(o0.size() | o1.size()); ASMJIT_FALLTHROUGH; + } case InstDB::kEncodingVexRvmr: { + const Operand_& o3 = opExt[EmitterUtils::kOp3]; const uint32_t isign4 = isign3 + (o3.opType() << 9); + immValue = o3.id() << 4; immSize = 1; @@ -3040,8 +3117,10 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c ASMJIT_FALLTHROUGH; case InstDB::kEncodingVexRvmi: { + const Operand_& o3 = opExt[EmitterUtils::kOp3]; const uint32_t isign4 = isign3 + (o3.opType() << 9); - immValue = o3.as().i64(); + + immValue = o3.as().value(); immSize = 1; if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) { @@ -3100,8 +3179,10 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c case InstDB::kEncodingVexRmvi: { + const Operand_& o3 = opExt[EmitterUtils::kOp3]; const uint32_t isign4 = isign3 + (o3.opType() << 9); - immValue = o3.as().i64(); + + immValue = o3.as().value(); immSize = 1; if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) { @@ -3249,7 +3330,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opcode &= Opcode::kLL_Mask; opcode |= x86AltOpcodeOf(instInfo); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; if (isign3 == ENC_OPS3(Reg, Reg, Imm)) { @@ -3295,7 +3376,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // The following instructions use the secondary opcode. opcode = x86AltOpcodeOf(instInfo); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; if (isign3 == ENC_OPS3(Reg, Reg, Imm)) { @@ -3388,9 +3469,9 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // The following instruction uses the secondary opcode. opcode &= Opcode::kLL_Mask; opcode |= x86AltOpcodeOf(instInfo); - opReg = opcode.extractO(); + opReg = opcode.extractModO(); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; if (isign3 == ENC_OPS3(Reg, Reg, Imm)) { @@ -3434,7 +3515,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c ASMJIT_FALLTHROUGH; case InstDB::kEncodingVexVmi: - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 1; CaseVexVmi_AfterImm: @@ -3453,7 +3534,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c case InstDB::kEncodingVexVmi4_Wx: opcode.addWIf(Reg::isGpq(o0) || o1.size() == 8); - immValue = o2.as().i64(); + immValue = o2.as().value(); immSize = 4; goto CaseVexVmi_AfterImm; @@ -3462,6 +3543,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c ASMJIT_FALLTHROUGH; case InstDB::kEncodingVexRvrmRvmr: { + const Operand_& o3 = opExt[EmitterUtils::kOp3]; const uint32_t isign4 = isign3 + (o3.opType() << 9); if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) { @@ -3495,13 +3577,16 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } case InstDB::kEncodingVexRvrmiRvmri_Lx: { - if (!(options & Inst::kOptionOp4Op5Used) || !_op4.isImm()) + const Operand_& o3 = opExt[EmitterUtils::kOp3]; + const Operand_& o4 = opExt[EmitterUtils::kOp4]; + + if (ASMJIT_UNLIKELY(!o4.isImm())) goto InvalidInstruction; const uint32_t isign4 = isign3 + (o3.opType() << 9); opcode |= x86OpcodeLBySize(o0.size() | o1.size() | o2.size() | o3.size()); - immValue = _op4.as().u8() & 0x0F; + immValue = o4.as().valueAs() & 0x0F; immSize = 1; if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) { @@ -3560,6 +3645,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c ASMJIT_FALLTHROUGH; case InstDB::kEncodingFma4: { + const Operand_& o3 = opExt[EmitterUtils::kOp3]; const uint32_t isign4 = isign3 + (o3.opType() << 9); if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) { @@ -3591,6 +3677,49 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } break; } + + // ------------------------------------------------------------------------ + // [AMX] + // ------------------------------------------------------------------------ + + case InstDB::kEncodingAmxCfg: + if (isign3 == ENC_OPS1(Mem)) { + rmRel = &o0; + goto EmitVexEvexM; + } + break; + + case InstDB::kEncodingAmxR: + if (isign3 == ENC_OPS1(Reg)) { + opReg = o0.id(); + rbReg = 0; + goto EmitVexEvexR; + } + break; + + case InstDB::kEncodingAmxRm: + if (isign3 == ENC_OPS2(Reg, Mem)) { + opReg = o0.id(); + rmRel = &o1; + goto EmitVexEvexM; + } + break; + + case InstDB::kEncodingAmxMr: + if (isign3 == ENC_OPS2(Mem, Reg)) { + opReg = o1.id(); + rmRel = &o0; + goto EmitVexEvexM; + } + break; + + case InstDB::kEncodingAmxRmv: + if (isign3 == ENC_OPS3(Reg, Reg, Reg)) { + opReg = x86PackRegAndVvvvv(o0.id(), o2.id()); + rbReg = o1.id(); + goto EmitVexEvexR; + } + break; } goto InvalidInstruction; @@ -3600,7 +3729,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // -------------------------------------------------------------------------- EmitX86OpMovAbs: - immSize = FastUInt8(gpSize()); + immSize = FastUInt8(registerSize()); writer.emitSegmentOverride(rmRel->as().segmentId()); EmitX86Op: @@ -3621,6 +3750,10 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c writer.emitImmediate(uint64_t(immValue), immSize); goto EmitDone; + // -------------------------------------------------------------------------- + // [Emit - X86 - Opcode + Reg] + // -------------------------------------------------------------------------- + EmitX86OpReg: // Emit mandatory instruction prefix. writer.emitPP(opcode.v); @@ -3642,8 +3775,11 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c writer.emitImmediate(uint64_t(immValue), immSize); goto EmitDone; + // -------------------------------------------------------------------------- + // [Emit - X86 - Opcode with implicit operand] + // -------------------------------------------------------------------------- + EmitX86OpImplicitMem: - // NOTE: Don't change the emit order here, it's compatible with KeyStone/LLVM. rmInfo = x86MemInfo[rmRel->as().baseAndIndexTypes()]; if (ASMJIT_UNLIKELY(rmRel->as().hasOffset() || (rmInfo & kX86MemInfo_Index))) goto InvalidInstruction; @@ -3660,19 +3796,63 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c writer.emit8If(rex | kX86ByteRex, rex != 0); } + // Emit override prefixes. writer.emitSegmentOverride(rmRel->as().segmentId()); writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0); // Emit instruction opcodes. writer.emitMMAndOpcode(opcode.v); + + // Emit immediate value. writer.emitImmediate(uint64_t(immValue), immSize); goto EmitDone; + // -------------------------------------------------------------------------- + // [Emit - X86 - Opcode /r - register] + // -------------------------------------------------------------------------- + EmitX86R: // Mandatory instruction prefix. writer.emitPP(opcode.v); - // Rex prefix (64-bit only). + // Emit REX prefix (64-bit only). + { + uint32_t rex = opcode.extractRex(options) | + ((opReg & 0x08) >> 1) | // REX.R (0x04). + ((rbReg & 0x08) >> 3) ; // REX.B (0x01). + + if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex))) + goto InvalidRexPrefix; + rex &= ~kX86ByteInvalidRex & 0xFF; + writer.emit8If(rex | kX86ByteRex, rex != 0); + + opReg &= 0x07; + rbReg &= 0x07; + } + + // Emit instruction opcodes. + writer.emitMMAndOpcode(opcode.v); + + // Emit ModR. + writer.emit8(x86EncodeMod(3, opReg, rbReg)); + + // Emit immediate value. + writer.emitImmediate(uint64_t(immValue), immSize); + goto EmitDone; + + // -------------------------------------------------------------------------- + // [Emit - X86 - Opcode /r - memory base] + // -------------------------------------------------------------------------- + +EmitX86RFromM: + rmInfo = x86MemInfo[rmRel->as().baseAndIndexTypes()]; + if (ASMJIT_UNLIKELY(rmRel->as().hasOffset() || (rmInfo & kX86MemInfo_Index))) + goto InvalidInstruction; + + // Emit mandatory instruction prefix. + writer.emitPP(opcode.v); + + // Emit REX prefix (64-bit only). { uint32_t rex = opcode.extractRex(options) | ((opReg & 0x08) >> 1) | // REX.R (0x04). @@ -3687,32 +3867,43 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c rbReg &= 0x07; } - // Instruction opcodes. + // Emit override prefixes. + writer.emitSegmentOverride(rmRel->as().segmentId()); + writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0); + + // Emit instruction opcodes. writer.emitMMAndOpcode(opcode.v); - // ModR. + + // Emit ModR/M. writer.emit8(x86EncodeMod(3, opReg, rbReg)); + + // Emit immediate value. writer.emitImmediate(uint64_t(immValue), immSize); goto EmitDone; + // -------------------------------------------------------------------------- + // [Emit - X86 - Opcode /r - memory operand] + // -------------------------------------------------------------------------- + EmitX86M: // `rmRel` operand must be memory. ASMJIT_ASSERT(rmRel != nullptr); ASMJIT_ASSERT(rmRel->opType() == Operand::kOpMem); ASMJIT_ASSERT((opcode & Opcode::kCDSHL_Mask) == 0); + // Emit override prefixes. rmInfo = x86MemInfo[rmRel->as().baseAndIndexTypes()]; writer.emitSegmentOverride(rmRel->as().segmentId()); memOpAOMark = writer.cursor(); writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0); - // Mandatory instruction prefix. + // Emit mandatory instruction prefix. writer.emitPP(opcode.v); + // Emit REX prefix (64-bit only). rbReg = rmRel->as().baseId(); rxReg = rmRel->as().indexId(); - - // REX prefix (64-bit only). { uint32_t rex; @@ -3731,8 +3922,9 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c opReg &= 0x07; } - // Instruction opcodes. + // Emit instruction opcodes. writer.emitMMAndOpcode(opcode.v); + // ... Fall through ... // -------------------------------------------------------------------------- @@ -3747,25 +3939,28 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c relOffset = rmRel->as().offsetLo32(); uint32_t mod = x86EncodeMod(0, opReg, rbReg); - if (rbReg == Gp::kIdSp) { - // [XSP|R12]. - if (relOffset == 0) { + bool forceSIB = commonInfo->isTsibOp(); + + if (rbReg == Gp::kIdSp || forceSIB) { + // TSIB or [XSP|R12]. + mod = (mod & 0xF8u) | 0x04u; + if (rbReg != Gp::kIdBp && relOffset == 0) { writer.emit8(mod); - writer.emit8(x86EncodeSib(0, 4, 4)); + writer.emit8(x86EncodeSib(0, 4, rbReg)); } - // [XSP|R12 + DISP8|DISP32]. + // TSIB or [XSP|R12 + DISP8|DISP32]. else { uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift; int32_t cdOffset = relOffset >> cdShift; if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) { writer.emit8(mod + 0x40); // <- MOD(1, opReg, rbReg). - writer.emit8(x86EncodeSib(0, 4, 4)); + writer.emit8(x86EncodeSib(0, 4, rbReg)); writer.emit8(cdOffset & 0xFF); } else { writer.emit8(mod + 0x80); // <- MOD(2, opReg, rbReg). - writer.emit8(x86EncodeSib(0, 4, 4)); + writer.emit8(x86EncodeSib(0, 4, rbReg)); writer.emit32uLE(uint32_t(relOffset)); } } @@ -3796,7 +3991,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (is32Bit()) { // Explicit relative addressing doesn't work in 32-bit mode. - if (ASMJIT_UNLIKELY(addrType == BaseMem::kAddrTypeRel)) + if (ASMJIT_UNLIKELY(addrType == Mem::kAddrTypeRel)) goto InvalidAddress; writer.emit8(x86EncodeMod(0, opReg, 5)); @@ -3805,16 +4000,16 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c else { bool isOffsetI32 = rmRel->as().offsetHi32() == (relOffset >> 31); bool isOffsetU32 = rmRel->as().offsetHi32() == 0; - uint64_t baseAddress = codeInfo().baseAddress(); + uint64_t baseAddress = code()->baseAddress(); // If relative addressing was not explicitly set then we can try to guess. // By guessing we check some properties of the memory operand and try to // base the decision on the segment prefix and the address type. - if (addrType == BaseMem::kAddrTypeDefault) { + if (addrType == Mem::kAddrTypeDefault) { if (baseAddress == Globals::kNoBaseAddress) { // Prefer absolute addressing mode if the offset is 32-bit. - addrType = isOffsetI32 || isOffsetU32 ? BaseMem::kAddrTypeAbs - : BaseMem::kAddrTypeRel; + addrType = isOffsetI32 || isOffsetU32 ? Mem::kAddrTypeAbs + : Mem::kAddrTypeRel; } else { // Prefer absolute addressing mode if FS|GS segment override is present. @@ -3822,30 +4017,30 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // Prefer absolute addressing mode if this is LEA with 32-bit immediate. bool isLea32 = (instId == Inst::kIdLea) && (isOffsetI32 || isOffsetU32); - addrType = hasFsGs || isLea32 ? BaseMem::kAddrTypeAbs - : BaseMem::kAddrTypeRel; + addrType = hasFsGs || isLea32 ? Mem::kAddrTypeAbs + : Mem::kAddrTypeRel; } } - if (addrType == BaseMem::kAddrTypeRel) { + if (addrType == Mem::kAddrTypeRel) { uint32_t kModRel32Size = 5; uint64_t virtualOffset = uint64_t(writer.offsetFrom(_bufferData)) + immSize + kModRel32Size; - if (baseAddress == Globals::kNoBaseAddress) { + if (baseAddress == Globals::kNoBaseAddress || _section->id() != 0) { // Create a new RelocEntry as we cannot calculate the offset right now. - err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel, 4); + err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel); if (ASMJIT_UNLIKELY(err)) goto Failed; writer.emit8(x86EncodeMod(0, opReg, 5)); - writer.emit32uLE(0); re->_sourceSectionId = _section->id(); re->_sourceOffset = offset(); - re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr) - 4); - re->_trailingSize = uint8_t(immSize); + re->_format.resetToDataValue(4); + re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize); re->_payload = uint64_t(rmRel->as().offset()); + writer.emit32uLE(0); writer.emitImmediate(uint64_t(immValue), immSize); goto EmitDone; } @@ -3936,14 +4131,14 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c if (ASMJIT_UNLIKELY(!label)) goto InvalidLabel; - err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, 4); + err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs); if (ASMJIT_UNLIKELY(err)) goto Failed; re->_sourceSectionId = _section->id(); re->_sourceOffset = offset(); - re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr)); - re->_trailingSize = uint8_t(immSize); + re->_format.resetToDataValue(4); + re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize); re->_payload = uint64_t(int64_t(relOffset)); if (label->isBound()) { @@ -3961,16 +4156,16 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } else { // [RIP->ABS]. - err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, 4); + err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs); if (ASMJIT_UNLIKELY(err)) goto Failed; re->_sourceSectionId = _section->id(); re->_targetSectionId = _section->id(); + re->_format.resetToDataValue(4); + re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize); re->_sourceOffset = offset(); - re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr)); - re->_trailingSize = uint8_t(immSize); - re->_payload = re->_sourceOffset + re->_leadingSize + 4 + re->_trailingSize + uint64_t(int64_t(relOffset)); + re->_payload = re->_sourceOffset + re->_format.regionSize() + uint64_t(int64_t(relOffset)); writer.emit32uLE(0); } @@ -4137,7 +4332,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c goto EmitDone; // -------------------------------------------------------------------------- - // [Emit - VEX / EVEX] + // [Emit - VEX|EVEX] // -------------------------------------------------------------------------- EmitVexEvexOp: @@ -4174,6 +4369,10 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } } + // -------------------------------------------------------------------------- + // [Emit - VEX|EVEX - /r (Register)] + // -------------------------------------------------------------------------- + EmitVexEvexR: { // Construct `x` - a complete EVEX|VEX prefix. @@ -4241,7 +4440,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c rbReg &= 0x7; writer.emit8(x86EncodeMod(3, opReg, rbReg)); - writer.emitImmByteOrDWord(immValue, immSize); + writer.emitImmByteOrDWord(uint64_t(immValue), immSize); goto EmitDone; } @@ -4262,7 +4461,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c rbReg &= 0x7; writer.emit8(x86EncodeMod(3, opReg, rbReg)); - writer.emitImmByteOrDWord(immValue, immSize); + writer.emitImmByteOrDWord(uint64_t(immValue), immSize); goto EmitDone; } else { @@ -4276,11 +4475,15 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c rbReg &= 0x7; writer.emit8(x86EncodeMod(3, opReg, rbReg)); - writer.emitImmByteOrDWord(immValue, immSize); + writer.emitImmByteOrDWord(uint64_t(immValue), immSize); goto EmitDone; } } + // -------------------------------------------------------------------------- + // [Emit - VEX|EVEX - /r (Memory)] + // -------------------------------------------------------------------------- + EmitVexEvexM: ASMJIT_ASSERT(rmRel != nullptr); ASMJIT_ASSERT(rmRel->opType() == Operand::kOpMem); @@ -4459,15 +4662,15 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } if (rmRel->isImm()) { - uint64_t baseAddress = codeInfo().baseAddress(); - uint64_t jumpAddress = rmRel->as().u64(); + uint64_t baseAddress = code()->baseAddress(); + uint64_t jumpAddress = rmRel->as().valueAs(); // If the base-address is known calculate a relative displacement and // check if it fits in 32 bits (which is always true in 32-bit mode). // Emit relative displacement as it was a bound label if all checks are ok. if (baseAddress != Globals::kNoBaseAddress) { uint64_t rel64 = jumpAddress - (ip + baseAddress) - inst32Size; - if (archId() == ArchInfo::kIdX86 || Support::isInt32(int64_t(rel64))) { + if (Environment::is32Bit(arch()) || Support::isInt32(int64_t(rel64))) { rel32 = uint32_t(rel64 & 0xFFFFFFFFu); goto EmitJmpCallRel; } @@ -4479,7 +4682,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c } } - err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel, 0); + err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel); if (ASMJIT_UNLIKELY(err)) goto Failed; @@ -4492,7 +4695,7 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // REX prefix does nothing if not patched, but allows to patch the // instruction to use MOD/M and to point to a memory where the final // 64-bit address is stored. - if (archId() != ArchInfo::kIdX86 && x86IsJmpOrCall(instId)) { + if (Environment::is64Bit(arch()) && x86IsJmpOrCall(instId)) { if (!rex) writer.emit8(kX86ByteRex); @@ -4506,19 +4709,15 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0); // Emit 0F prefix. writer.emit8(opcode.v); // Emit opcode. writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD. + re->_format.resetToDataValue(4); + re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize); writer.emit32uLE(0); // Emit DISP32. - - re->_valueSize = 4; - re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr) - 4); - re->_trailingSize = uint8_t(immSize); } else { writer.emit8(opCode8); // Emit opcode. + re->_format.resetToDataValue(4); + re->_format.setLeadingAndTrailingSize(writer.offsetFrom(_bufferPtr), immSize); writer.emit8(0); // Emit DISP8 (zero). - - re->_valueSize = 1; - re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr) - 1); - re->_trailingSize = uint8_t(immSize); } goto EmitDone; } @@ -4559,19 +4758,18 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // Chain with label. size_t offset = size_t(writer.offsetFrom(_bufferData)); - LabelLink* link = _code->newLabelLink(label, _section->id(), offset, relOffset); + OffsetFormat of; + of.resetToDataValue(relSize); + LabelLink* link = _code->newLabelLink(label, _section->id(), offset, relOffset, of); if (ASMJIT_UNLIKELY(!link)) goto OutOfMemory; if (re) link->relocId = re->id(); - // Emit label size as dummy data. - if (relSize == 1) - writer.emit8(0x01); - else // if (relSize == 4) - writer.emit32uLE(0x04040404); + // Emit dummy zeros, must be patched later when the reference becomes known. + writer.emitZeros(relSize); } writer.emitImmediate(uint64_t(immValue), immSize); @@ -4582,27 +4780,23 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c EmitDone: if (ASMJIT_UNLIKELY(options & Inst::kOptionReserved)) { #ifndef ASMJIT_NO_LOGGING - if (hasEmitterOption(kOptionLoggingEnabled)) - _emitLog(instId, options, o0, o1, o2, o3, relSize, immSize, writer.cursor()); + if (_logger) + EmitterUtils::logInstructionEmitted(this, instId, options, o0, o1, o2, opExt, relSize, immSize, writer.cursor()); #endif } - resetInstOptions(); resetExtraReg(); + resetInstOptions(); resetInlineComment(); writer.done(this); return kErrorOk; // -------------------------------------------------------------------------- - // [Error Cases] + // [Error Handler] // -------------------------------------------------------------------------- - #define ERROR_HANDLER(ERROR) \ - ERROR: \ - err = DebugUtils::errored(kError##ERROR); \ - goto Failed; - +#define ERROR_HANDLER(ERR) ERR: err = DebugUtils::errored(kError##ERR); goto Failed; ERROR_HANDLER(OutOfMemory) ERROR_HANDLER(InvalidLabel) ERROR_HANDLER(InvalidInstruction) @@ -4616,16 +4810,23 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c ERROR_HANDLER(InvalidAddressIndex) ERROR_HANDLER(InvalidAddress64Bit) ERROR_HANDLER(InvalidDisplacement) + ERROR_HANDLER(InvalidPhysId) ERROR_HANDLER(InvalidSegment) ERROR_HANDLER(InvalidImmediate) ERROR_HANDLER(OperandSizeMismatch) ERROR_HANDLER(AmbiguousOperandSize) ERROR_HANDLER(NotConsecutiveRegs) - - #undef ERROR_HANDLER +#undef ERROR_HANDLER Failed: - return _emitFailed(err, instId, options, o0, o1, o2, o3); +#ifndef ASMJIT_NO_LOGGING + return EmitterUtils::logInstructionFailed(this, err, instId, options, o0, o1, o2, opExt); +#else + resetExtraReg(); + resetInstOptions(); + resetInlineComment(); + return reportError(err); +#endif } // ============================================================================ @@ -4633,6 +4834,9 @@ ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, c // ============================================================================ Error Assembler::align(uint32_t alignMode, uint32_t alignment) { + if (ASMJIT_UNLIKELY(!_code)) + return reportError(DebugUtils::errored(kErrorNotInitialized)); + if (ASMJIT_UNLIKELY(alignMode >= kAlignCount)) return reportError(DebugUtils::errored(kErrorInvalidArgument)); @@ -4644,13 +4848,13 @@ Error Assembler::align(uint32_t alignMode, uint32_t alignment) { uint32_t i = uint32_t(Support::alignUpDiff(offset(), alignment)); if (i > 0) { - CodeBufferWriter writer(this); + CodeWriter writer(this); ASMJIT_PROPAGATE(writer.ensureSpace(this, i)); uint8_t pattern = 0x00; switch (alignMode) { case kAlignCode: { - if (hasEmitterOption(kOptionOptimizedAlign)) { + if (hasEncodingOption(kEncodingOptionOptimizedAlign)) { // Intel 64 and IA-32 Architectures Software Developer's Manual - Volume 2B (NOP). enum { kMaxNopSize = 9 }; @@ -4699,12 +4903,11 @@ Error Assembler::align(uint32_t alignMode, uint32_t alignment) { } #ifndef ASMJIT_NO_LOGGING - if (hasEmitterOption(kOptionLoggingEnabled)) { - Logger* logger = _code->logger(); + if (_logger) { StringTmp<128> sb; - sb.appendChars(' ', logger->indentation(FormatOptions::kIndentationCode)); + sb.appendChars(' ', _logger->indentation(FormatOptions::kIndentationCode)); sb.appendFormat("align %u\n", alignment); - logger->log(sb); + _logger->log(sb); } #endif @@ -4716,22 +4919,20 @@ Error Assembler::align(uint32_t alignMode, uint32_t alignment) { // ============================================================================ Error Assembler::onAttach(CodeHolder* code) noexcept { - uint32_t archId = code->archId(); - if (!ArchInfo::isX86Family(archId)) + uint32_t arch = code->arch(); + if (!Environment::isFamilyX86(arch)) return DebugUtils::errored(kErrorInvalidArch); ASMJIT_PROPAGATE(Base::onAttach(code)); - if (archId == ArchInfo::kIdX86) { + if (Environment::is32Bit(arch)) { // 32 bit architecture - X86. - _gpRegInfo.setSignature(Gpd::kSignature); - _globalInstOptions |= Inst::_kOptionInvalidRex; + _forcedInstOptions |= Inst::_kOptionInvalidRex; _setAddressOverrideMask(kX86MemInfo_67H_X86); } else { // 64 bit architecture - X64. - _gpRegInfo.setSignature(Gpq::kSignature); - _globalInstOptions &= ~Inst::_kOptionInvalidRex; + _forcedInstOptions &= ~Inst::_kOptionInvalidRex; _setAddressOverrideMask(kX86MemInfo_67H_X64); } @@ -4739,6 +4940,9 @@ Error Assembler::onAttach(CodeHolder* code) noexcept { } Error Assembler::onDetach(CodeHolder* code) noexcept { + _forcedInstOptions &= ~Inst::_kOptionInvalidRex; + _setAddressOverrideMask(0); + return Base::onDetach(code); } diff --git a/libs/asmjit/src/asmjit/x86/x86assembler.h b/libs/asmjit/src/asmjit/x86/x86assembler.h index 3e3027b..8cd1014 100644 --- a/libs/asmjit/src/asmjit/x86/x86assembler.h +++ b/libs/asmjit/src/asmjit/x86/x86assembler.h @@ -37,9 +37,652 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) // [asmjit::Assembler] // ============================================================================ -//! Assembler (X86). +//! X86/X64 assembler implementation. //! -//! Emits X86 machine-code into buffers managed by `CodeHolder`. +//! x86::Assembler is a code emitter that emits machine code directly into the +//! \ref CodeBuffer. The assembler is capable of targeting both 32-bit and 64-bit +//! instruction sets, the instruction set can be configured through \ref CodeHolder. +//! +//! ### Basics +//! +//! The following example shows a basic use of `x86::Assembler`, how to generate +//! a function that works in both 32-bit and 64-bit modes, and how to connect +//! \ref JitRuntime, \ref CodeHolder, and `x86::Assembler`. +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! // Signature of the generated function. +//! typedef int (*SumFunc)(const int* arr, size_t count); +//! +//! int main() { +//! JitRuntime rt; // Create a runtime specialized for JIT. +//! CodeHolder code; // Create a CodeHolder. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Assembler a(&code); // Create and attach x86::Assembler to code. +//! +//! // Decide between 32-bit CDECL, WIN64, and SysV64 calling conventions: +//! // 32-BIT - passed all arguments by stack. +//! // WIN64 - passes first 4 arguments by RCX, RDX, R8, and R9. +//! // UNIX64 - passes first 6 arguments by RDI, RSI, RCX, RDX, R8, and R9. +//! x86::Gp arr, cnt; +//! x86::Gp sum = x86::eax; // Use EAX as 'sum' as it's a return register. +//! +//! if (ASMJIT_ARCH_BITS == 64) { +//! #if defined(_WIN32) +//! arr = x86::rcx; // First argument (array ptr). +//! cnt = x86::rdx; // Second argument (number of elements) +//! #else +//! arr = x86::rdi; // First argument (array ptr). +//! cnt = x86::rsi; // Second argument (number of elements) +//! #endif +//! } +//! else { +//! arr = x86::edx; // Use EDX to hold the array pointer. +//! cnt = x86::ecx; // Use ECX to hold the counter. +//! // Fetch first and second arguments from [ESP + 4] and [ESP + 8]. +//! a.mov(arr, x86::ptr(x86::esp, 4)); +//! a.mov(cnt, x86::ptr(x86::esp, 8)); +//! } +//! +//! Label Loop = a.newLabel(); // To construct the loop, we need some labels. +//! Label Exit = a.newLabel(); +//! +//! a.xor_(sum, sum); // Clear 'sum' register (shorter than 'mov'). +//! a.test(cnt, cnt); // Border case: +//! a.jz(Exit); // If 'cnt' is zero jump to 'Exit' now. +//! +//! a.bind(Loop); // Start of a loop iteration. +//! a.add(sum, x86::dword_ptr(arr)); // Add int at [arr] to 'sum'. +//! a.add(arr, 4); // Increment 'arr' pointer. +//! a.dec(cnt); // Decrease 'cnt'. +//! a.jnz(Loop); // If not zero jump to 'Loop'. +//! +//! a.bind(Exit); // Exit to handle the border case. +//! a.ret(); // Return from function ('sum' == 'eax'). +//! // ----> x86::Assembler is no longer needed from here and can be destroyed <---- +//! +//! SumFunc fn; +//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime. +//! +//! if (err) return 1; // Handle a possible error returned by AsmJit. +//! // ----> CodeHolder is no longer needed from here and can be destroyed <---- +//! +//! static const int array[6] = { 4, 8, 15, 16, 23, 42 }; +//! +//! int result = fn(array, 6); // Execute the generated code. +//! printf("%d\n", result); // Print sum of array (108). +//! +//! rt.release(fn); // Explicitly remove the function from the runtime +//! return 0; // Everything successful... +//! } +//! ``` +//! +//! The example should be self-explanatory. It shows how to work with labels, +//! how to use operands, and how to emit instructions that can use different +//! registers based on runtime selection. It implements 32-bit CDECL, WIN64, +//! and SysV64 caling conventions and will work on most X86/X64 environments. +//! +//! Although functions prologs / epilogs can be implemented manually, AsmJit +//! provides utilities that can be used to create function prologs and epilogs +//! automatically, see \ref asmjit_function for more details. +//! +//! ### Instruction Validation +//! +//! Assembler prefers speed over strictness by default. The implementation checks +//! the type of operands and fails if the signature of types is invalid, however, +//! it does only basic checks regarding registers and their groups used in +//! instructions. It's possible to pass operands that don't form any valid +//! signature to the implementation and succeed. This is usually not a problem +//! as Assembler provides typed API so operand types are normally checked by C++ +//! compiler at compile time, however, Assembler is fully dynamic and its \ref +//! emit() function can be called with any instruction id, options, and operands. +//! Moreover, it's also possible to form instructions that will be accepted by +//! the typed API, for example by calling `mov(x86::eax, x86::al)` - the C++ +//! compiler won't see a problem as both EAX and AL are \ref Gp registers. +//! +//! To help with common mistakes AsmJit allows to activate instruction validation. +//! This feature instruments the Assembler to call \ref InstAPI::validate() before +//! it attempts to encode any instruction. +//! +//! The example below illustrates how validation can be turned on: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! int main(int argc, char* argv[]) { +//! JitRuntime rt; // Create a runtime specialized for JIT. +//! CodeHolder code; // Create a CodeHolder. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Assembler a(&code); // Create and attach x86::Assembler to code. +//! +//! // Enable strict validation. +//! a.addValidationOptions(BaseEmitter::kValidationOptionAssembler); +//! +//! // Try to encode invalid or ill-formed instructions. +//! Error err; +//! +//! // Invalid instruction. +//! err = a.mov(x86::eax, x86::al); +//! printf("Status: %s\n", DebugUtils::errorAsString(err)); +//! +//! // Invalid instruction. +//! err = a.emit(x86::Inst::kIdMovss, x86::eax, x86::xmm0); +//! printf("Status: %s\n", DebugUtils::errorAsString(err)); +//! +//! // Ambiguous operand size - the pointer requires size. +//! err = a.inc(x86::ptr(x86::rax), 1); +//! printf("Status: %s\n", DebugUtils::errorAsString(err)); +//! +//! return 0; +//! } +//! ``` +//! +//! ### Native Registers +//! +//! All emitters provide functions to construct machine-size registers depending +//! on the target. This feature is for users that want to write code targeting +//! both 32-bit and 64-bit architectures at the same time. In AsmJit terminology +//! such registers have prefix `z`, so for example on X86 architecture the +//! following native registers are provided: +//! +//! - `zax` - mapped to either `eax` or `rax` +//! - `zbx` - mapped to either `ebx` or `rbx` +//! - `zcx` - mapped to either `ecx` or `rcx` +//! - `zdx` - mapped to either `edx` or `rdx` +//! - `zsp` - mapped to either `esp` or `rsp` +//! - `zbp` - mapped to either `ebp` or `rbp` +//! - `zsi` - mapped to either `esi` or `rsi` +//! - `zdi` - mapped to either `edi` or `rdi` +//! +//! They are accessible through \ref x86::Assembler, \ref x86::Builder, and +//! \ref x86::Compiler. The example below illustrates how to use this feature: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! typedef int (*Func)(void); +//! +//! int main(int argc, char* argv[]) { +//! JitRuntime rt; // Create a runtime specialized for JIT. +//! CodeHolder code; // Create a CodeHolder. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Assembler a(&code); // Create and attach x86::Assembler to code. +//! +//! // Let's get these registers from x86::Assembler. +//! x86::Gp zbp = a.zbp(); +//! x86::Gp zsp = a.zsp(); +//! +//! int stackSize = 32; +//! +//! // Function prolog. +//! a.push(zbp); +//! a.mov(zbp, zsp); +//! a.sub(zsp, stackSize); +//! +//! // ... emit some code (this just sets return value to zero) ... +//! a.xor_(x86::eax, x86::eax); +//! +//! // Function epilog and return. +//! a.mov(zsp, zbp); +//! a.pop(zbp); +//! a.ret(); +//! +//! // To make the example complete let's call it. +//! Func fn; +//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime. +//! if (err) return 1; // Handle a possible error returned by AsmJit. +//! +//! int result = fn(); // Execute the generated code. +//! printf("%d\n", result); // Print the resulting "0". +//! +//! rt.release(fn); // Remove the function from the runtime. +//! return 0; +//! } +//! ``` +//! +//! The example just returns `0`, but the function generated contains a standard +//! prolog and epilog sequence and the function itself reserves 32 bytes of local +//! stack. The advantage is clear - a single code-base can handle multiple targets +//! easily. If you want to create a register of native size dynamically by +//! specifying its id it's also possible: +//! +//! ``` +//! void example(x86::Assembler& a) { +//! x86::Gp zax = a.gpz(x86::Gp::kIdAx); +//! x86::Gp zbx = a.gpz(x86::Gp::kIdBx); +//! x86::Gp zcx = a.gpz(x86::Gp::kIdCx); +//! x86::Gp zdx = a.gpz(x86::Gp::kIdDx); +//! +//! // You can also change register's id easily. +//! x86::Gp zsp = zax; +//! zsp.setId(4); // or x86::Gp::kIdSp. +//! } +//! ``` +//! +//! ### Data Embedding +//! +//! x86::Assembler extends the standard \ref BaseAssembler with X86/X64 specific +//! conventions that are often used by assemblers to embed data next to the code. +//! The following functions can be used to embed data: +//! +//! - \ref x86::Assembler::db() - embeds byte (8 bits) (x86 naming). +//! - \ref x86::Assembler::dw() - embeds word (16 bits) (x86 naming). +//! - \ref x86::Assembler::dd() - embeds dword (32 bits) (x86 naming). +//! - \ref x86::Assembler::dq() - embeds qword (64 bits) (x86 naming). +//! +//! - \ref BaseAssembler::embedInt8() - embeds int8_t (portable naming). +//! - \ref BaseAssembler::embedUInt8() - embeds uint8_t (portable naming). +//! - \ref BaseAssembler::embedInt16() - embeds int16_t (portable naming). +//! - \ref BaseAssembler::embedUInt16() - embeds uint16_t (portable naming). +//! - \ref BaseAssembler::embedInt32() - embeds int32_t (portable naming). +//! - \ref BaseAssembler::embedUInt32() - embeds uint32_t (portable naming). +//! - \ref BaseAssembler::embedInt64() - embeds int64_t (portable naming). +//! - \ref BaseAssembler::embedUInt64() - embeds uint64_t (portable naming). +//! - \ref BaseAssembler::embedFloat() - embeds float (portable naming). +//! - \ref BaseAssembler::embedDouble() - embeds double (portable naming). +//! +//! The following example illustrates how embed works: +//! +//! ``` +//! #include +//! using namespace asmjit; +//! +//! void embedData(x86::Assembler& a) { +//! a.db(0xFF); // Embeds 0xFF byte. +//! a.dw(0xFF00); // Embeds 0xFF00 word (little-endian). +//! a.dd(0xFF000000); // Embeds 0xFF000000 dword (little-endian). +//! a.embedFloat(0.4f); // Embeds 0.4f (32-bit float, little-endian). +//! } +//! ``` +//! +//! Sometimes it's required to read the data that is embedded after code, for +//! example. This can be done through \ref Label as shown below: +//! +//! ``` +//! #include +//! using namespace asmjit; +//! +//! void embedData(x86::Assembler& a, const Label& L_Data) { +//! x86::Gp addr = a.zax(); // EAX or RAX. +//! x86::Gp val = x86::edi; // Where to store some value... +//! +//! // Approach 1 - Load the address to register through LEA. This approach +//! // is flexible as the address can be then manipulated, for +//! // example if you have a data array, which would need index. +//! a.lea(addr, L_Data); // Loads the address of the label to EAX or RAX. +//! a.mov(val, dword_ptr(addr)); +//! +//! // Approach 2 - Load the data directly by using L_Data in address. It's +//! // worth noting that this doesn't work with indexes in X64 +//! // mode. It will use absolute address in 32-bit mode and +//! // relative address (RIP) in 64-bit mode. +//! a.mov(val, dword_ptr(L_Data)); +//! } +//! ``` +//! +//! ### Label Embedding +//! +//! It's also possible to embed labels. In general AsmJit provides the following +//! options: +//! +//! - \ref BaseEmitter::embedLabel() - Embeds absolute address of a label. +//! This is target dependent and would embed either 32-bit or 64-bit data +//! that embeds absolute label address. This kind of embedding cannot be +//! used in a position independent code. +//! +//! - \ref BaseEmitter::embedLabelDelta() - Embeds a difference between two +//! labels. The size of the difference can be specified so it's possible to +//! embed 8-bit, 16-bit, 32-bit, and 64-bit difference, which is sufficient +//! for most purposes. +//! +//! The following example demonstrates how to embed labels and their differences: +//! +//! ``` +//! #include +//! using namespace asmjit; +//! +//! void embedLabel(x86::Assembler& a, const Label& L_Data) { +//! // [1] Embed L_Data - the size of the data will be dependent on the target. +//! a.embedLabel(L_Data); +//! +//! // [2] Embed a 32-bit difference of two labels. +//! Label L_Here = a.newLabel(); +//! a.bind(L_Here); +//! // Embeds int32_t(L_Data - L_Here). +//! a.embedLabelDelta(L_Data, L_Here, 4); +//! } +//! ``` +//! +//! ### Using FuncFrame and FuncDetail with x86::Assembler +//! +//! The example below demonstrates how \ref FuncFrame and \ref FuncDetail can be +//! used together with \ref x86::Assembler to generate a function that will use +//! platform dependent calling conventions automatically depending on the target: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b); +//! +//! int main(int argc, char* argv[]) { +//! JitRuntime rt; // Create JIT Runtime. +//! CodeHolder code; // Create a CodeHolder. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Assembler a(&code); // Create and attach x86::Assembler to code. +//! +//! // Decide which registers will be mapped to function arguments. Try changing +//! // registers of dst, src_a, and src_b and see what happens in function's +//! // prolog and epilog. +//! x86::Gp dst = a.zax(); +//! x86::Gp src_a = a.zcx(); +//! x86::Gp src_b = a.zdx(); +//! +//! X86::Xmm vec0 = x86::xmm0; +//! X86::Xmm vec1 = x86::xmm1; +//! +//! // Create/initialize FuncDetail and FuncFrame. +//! FuncDetail func; +//! func.init(FuncSignatureT(CallConv::kIdHost)); +//! +//! FuncFrame frame; +//! frame.init(func); +//! +//! // Make XMM0 and XMM1 dirty - kGroupVec describes XMM|YMM|ZMM registers. +//! frame.setDirtyRegs(x86::Reg::kGroupVec, IntUtils::mask(0, 1)); +//! +//! // Alternatively, if you don't want to use register masks you can pass BaseReg +//! // to addDirtyRegs(). The following code would add both xmm0 and xmm1. +//! frame.addDirtyRegs(x86::xmm0, x86::xmm1); +//! +//! FuncArgsAssignment args(&func); // Create arguments assignment context. +//! args.assignAll(dst, src_a, src_b);// Assign our registers to arguments. +//! args.updateFrameInfo(frame); // Reflect our args in FuncFrame. +//! frame.finalize(); // Finalize the FuncFrame (updates it). +//! +//! a.emitProlog(frame); // Emit function prolog. +//! a.emitArgsAssignment(frame, args);// Assign arguments to registers. +//! a.movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0. +//! a.movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1. +//! a.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0. +//! a.movdqu(x86::ptr(dst), vec0); // Store the result to [dst]. +//! a.emitEpilog(frame); // Emit function epilog and return. +//! +//! SumIntsFunc fn; +//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime. +//! if (err) return 1; // Handle a possible error case. +//! +//! // Execute the generated function. +//! int inA[4] = { 4, 3, 2, 1 }; +//! int inB[4] = { 1, 5, 2, 8 }; +//! int out[4]; +//! fn(out, inA, inB); +//! +//! // Prints {5 8 4 9} +//! printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]); +//! +//! rt.release(fn); +//! return 0; +//! } +//! ``` +//! +//! ### Using x86::Assembler as Code-Patcher +//! +//! This is an advanced topic that is sometimes unavoidable. AsmJit by default +//! appends machine code it generates into a \ref CodeBuffer, however, it also +//! allows to set the offset in \ref CodeBuffer explicitly and to overwrite its +//! content. This technique is extremely dangerous as X86 instructions have +//! variable length (see below), so you should in general only patch code to +//! change instruction's immediate values or some other details not known the +//! at a time the instruction was emitted. A typical scenario that requires +//! code-patching is when you start emitting function and you don't know how +//! much stack you want to reserve for it. +//! +//! Before we go further it's important to introduce instruction options, because +//! they can help with code-patching (and not only patching, but that will be +//! explained in AVX-512 section): +//! +//! - Many general-purpose instructions (especially arithmetic ones) on X86 +//! have multiple encodings - in AsmJit this is usually called 'short form' +//! and 'long form'. +//! - AsmJit always tries to use 'short form' as it makes the resulting +//! machine-code smaller, which is always good - this decision is used +//! by majority of assemblers out there. +//! - AsmJit allows to override the default decision by using `short_()` +//! and `long_()` instruction options to force short or long form, +//! respectively. The most useful is `long_()` as it basically forces +//! AsmJit to always emit the longest form. The `short_()` is not that +//! useful as it's automatic (except jumps to non-bound labels). Note that +//! the underscore after each function name avoids collision with built-in +//! C++ types. +//! +//! To illustrate what short form and long form means in binary let's assume +//! we want to emit "add esp, 16" instruction, which has two possible binary +//! encodings: +//! +//! - `83C410` - This is a short form aka `short add esp, 16` - You can see +//! opcode byte (0x8C), MOD/RM byte (0xC4) and an 8-bit immediate value +//! representing `16`. +//! - `81C410000000` - This is a long form aka `long add esp, 16` - You can +//! see a different opcode byte (0x81), the same Mod/RM byte (0xC4) and a +//! 32-bit immediate in little-endian representing `16`. +//! +//! It should be obvious that patching an existing instruction into an instruction +//! having a different size may create various problems. So it's recommended to be +//! careful and to only patch instructions into instructions having the same size. +//! The example below demonstrates how instruction options can be used to guarantee +//! the size of an instruction by forcing the assembler to use long-form encoding: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! typedef int (*Func)(void); +//! +//! int main(int argc, char* argv[]) { +//! JitRuntime rt; // Create a runtime specialized for JIT. +//! CodeHolder code; // Create a CodeHolder. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Assembler a(&code); // Create and attach x86::Assembler to code. +//! +//! // Let's get these registers from x86::Assembler. +//! x86::Gp zbp = a.zbp(); +//! x86::Gp zsp = a.zsp(); +//! +//! // Function prolog. +//! a.push(zbp); +//! a.mov(zbp, zsp); +//! +//! // This is where we are gonna patch the code later, so let's get the offset +//! // (the current location) from the beginning of the code-buffer. +//! size_t patchOffset = a.offset(); +//! // Let's just emit 'sub zsp, 0' for now, but don't forget to use LONG form. +//! a.long_().sub(zsp, 0); +//! +//! // ... emit some code (this just sets return value to zero) ... +//! a.xor_(x86::eax, x86::eax); +//! +//! // Function epilog and return. +//! a.mov(zsp, zbp); +//! a.pop(zbp); +//! a.ret(); +//! +//! // Now we know how much stack size we want to reserve. I have chosen 128 +//! // bytes on purpose as it's encodable only in long form that we have used. +//! +//! int stackSize = 128; // Number of bytes to reserve on the stack. +//! a.setOffset(patchOffset); // Move the current cursor to `patchOffset`. +//! a.long_().sub(zsp, stackSize); // Patch the code; don't forget to use LONG form. +//! +//! // Now the code is ready to be called +//! Func fn; +//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime. +//! if (err) return 1; // Handle a possible error returned by AsmJit. +//! +//! int result = fn(); // Execute the generated code. +//! printf("%d\n", result); // Print the resulting "0". +//! +//! rt.release(fn); // Remove the function from the runtime. +//! return 0; +//! } +//! ``` +//! +//! If you run the example it will just work, because both instructions have +//! the same size. As an experiment you can try removing `long_()` form to +//! see what happens when wrong code is generated. +//! +//! ### Code Patching and REX Prefix +//! +//! In 64-bit mode there is one more thing to worry about when patching code: +//! REX prefix. It's a single byte prefix designed to address registers with +//! ids from 9 to 15 and to override the default width of operation from 32 +//! to 64 bits. AsmJit, like other assemblers, only emits REX prefix when it's +//! necessary. If the patched code only changes the immediate value as shown +//! in the previous example then there is nothing to worry about as it doesn't +//! change the logic behind emitting REX prefix, however, if the patched code +//! changes register id or overrides the operation width then it's important +//! to take care of REX prefix as well. +//! +//! AsmJit contains another instruction option that controls (forces) REX +//! prefix - `rex()`. If you use it the instruction emitted will always use +//! REX prefix even when it's encodable without it. The following list contains +//! some instructions and their binary representations to illustrate when it's +//! emitted: +//! +//! - `__83C410` - `add esp, 16` - 32-bit operation in 64-bit mode doesn't require REX prefix. +//! - `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40). +//! - `4883C410` - `add rsp, 16` - 64-bit operation in 64-bit mode requires REX prefix (0x48). +//! - `4183C410` - `add r12d, 16` - 32-bit operation in 64-bit mode using R12D requires REX prefix (0x41). +//! - `4983C410` - `add r12, 16` - 64-bit operation in 64-bit mode using R12 requires REX prefix (0x49). +//! +//! ### More Prefixes +//! +//! X86 architecture is known for its prefixes. AsmJit supports all prefixes +//! that can affect how the instruction is encoded: +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! void prefixesExample(x86::Assembler& a) { +//! // Lock prefix for implementing atomics: +//! // lock add dword ptr [dst], 1 +//! a.lock().add(x86::dword_ptr(dst), 1); +//! +//! // Similarly, XAcquire/XRelease prefixes are also available: +//! // xacquire add dword ptr [dst], 1 +//! a.xacquire().add(x86::dword_ptr(dst), 1); +//! +//! // Rep prefix (see also repe/repz and repne/repnz): +//! // rep movs byte ptr [dst], byte ptr [src] +//! a.rep().movs(x86::byte_ptr(dst), x86::byte_ptr(src)); +//! +//! // Forcing REX prefix in 64-bit mode. +//! // rex mov eax, 1 +//! a.rex().mov(x86::eax, 1); +//! +//! // AVX instruction without forced prefix uses the shortest encoding: +//! // vaddpd xmm0, xmm1, xmm2 -> [C5|F1|58|C2] +//! a.vaddpd(x86::xmm0, x86::xmm1, x86::xmm2); +//! +//! // Forcing VEX3 prefix (AVX): +//! // vex3 vaddpd xmm0, xmm1, xmm2 -> [C4|E1|71|58|C2] +//! a.vex3().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2); +//! +//! // Forcing EVEX prefix (AVX512): +//! // evex vaddpd xmm0, xmm1, xmm2 -> [62|F1|F5|08|58|C2] +//! a.evex().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2); +//! +//! // Some instructions accept prefixes not originally intended to: +//! // rep ret +//! a.rep().ret(); +//! } +//! ``` +//! +//! It's important to understand that prefixes are part of instruction options. +//! When a member function that involves adding a prefix is called the prefix +//! is combined with existing instruction options, which will affect the next +//! instruction generated. +//! +//! ### Generating AVX512 code. +//! +//! x86::Assembler can generate AVX512+ code including the use of opmask +//! registers. Opmask can be specified through \ref x86::Assembler::k() +//! function, which stores it as an extra register, which will be used +//! by the next instruction. AsmJit uses such concept for manipulating +//! instruction options as well. +//! +//! The following AVX512 features are supported: +//! +//! - Opmask selector {k} and zeroing {z}. +//! - Rounding modes {rn|rd|ru|rz} and suppress-all-exceptions {sae} option. +//! - AVX512 broadcasts {1toN}. +//! +//! The following example demonstrates how AVX512 features can be used: +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! void generateAVX512Code(x86::Assembler& a) { +//! using namespace x86; +//! +//! // Opmask Selectors +//! // ---------------- +//! // +//! // - Opmask / zeroing is part of the instruction options / extraReg. +//! // - k(reg) is like {kreg} in Intel syntax. +//! // - z() is like {z} in Intel syntax. +//! +//! // vaddpd zmm {k1} {z}, zmm1, zmm2 +//! a.k(k1).z().vaddpd(zmm0, zmm1, zmm2); +//! +//! // Memory Broadcasts +//! // ----------------- +//! // +//! // - Broadcast data is part of memory operand. +//! // - Use x86::Mem::_1toN(), which returns a new x86::Mem operand. +//! +//! // vaddpd zmm0 {k1} {z}, zmm1, [rcx] {1to8} +//! a.k(k1).z().vaddpd(zmm0, zmm1, x86::mem(rcx)._1to8()); +//! +//! // Embedded Rounding & Suppress-All-Exceptoins +//! // ------------------------------------------- +//! // +//! // - Rounding mode and {sae} are part of instruction options. +//! // - Use sae() to enable exception suppression. +//! // - Use rn_sae(), rd_sae(), ru_sae(), and rz_sae() - to enable rounding. +//! // - Embedded rounding implicitly sets {sae} as well, that's why the API +//! // also has sae() suffix, to make it clear. +//! +//! // vcmppd k1, zmm1, zmm2, 0x00 {sae} +//! a.sae().vcmppd(k1, zmm1, zmm2, 0); +//! +//! // vaddpd zmm0, zmm1, zmm2 {rz} +//! a.rz_sae().vaddpd(zmm0, zmm1, zmm2); +//! } +//! ``` class ASMJIT_VIRTAPI Assembler : public BaseAssembler, public EmitterImplicitT { @@ -69,12 +712,10 @@ class ASMJIT_VIRTAPI Assembler //! \} //! \endcond - //! \cond INTERNAL //! \name Emit //! \{ - using BaseEmitter::_emit; - ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) override; + ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override; //! \} //! \endcond diff --git a/libs/asmjit/src/asmjit/x86/x86builder.cpp b/libs/asmjit/src/asmjit/x86/x86builder.cpp index 4e65c7b..2227fa2 100644 --- a/libs/asmjit/src/asmjit/x86/x86builder.cpp +++ b/libs/asmjit/src/asmjit/x86/x86builder.cpp @@ -46,7 +46,9 @@ Builder::~Builder() noexcept {} Error Builder::finalize() { ASMJIT_PROPAGATE(runPasses()); Assembler a(_code); - return serialize(&a); + a.addEncodingOptions(encodingOptions()); + a.addValidationOptions(validationOptions()); + return serializeTo(&a); } // ============================================================================ @@ -54,14 +56,11 @@ Error Builder::finalize() { // ============================================================================ Error Builder::onAttach(CodeHolder* code) noexcept { - uint32_t archId = code->archId(); - if (!ArchInfo::isX86Family(archId)) + uint32_t arch = code->arch(); + if (!Environment::isFamilyX86(arch)) return DebugUtils::errored(kErrorInvalidArch); - ASMJIT_PROPAGATE(Base::onAttach(code)); - - _gpRegInfo.setSignature(archId == ArchInfo::kIdX86 ? uint32_t(Gpd::kSignature) : uint32_t(Gpq::kSignature)); - return kErrorOk; + return Base::onAttach(code); } ASMJIT_END_SUB_NAMESPACE diff --git a/libs/asmjit/src/asmjit/x86/x86builder.h b/libs/asmjit/src/asmjit/x86/x86builder.h index 66e2dfc..256bc9e 100644 --- a/libs/asmjit/src/asmjit/x86/x86builder.h +++ b/libs/asmjit/src/asmjit/x86/x86builder.h @@ -40,7 +40,315 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) // [asmjit::x86::Builder] // ============================================================================ -//! Architecture-dependent asm-builder (X86). +//! X86/X64 builder implementation. +//! +//! The code representation used by \ref BaseBuilder is compatible with everything +//! AsmJit provides. Each instruction is stored as \ref InstNode, which contains +//! instruction id, options, and operands. Each instruction emitted will create +//! a new \ref InstNode instance and add it to the current cursor in the double-linked +//! list of nodes. Since the instruction stream used by \ref BaseBuilder can be +//! manipulated, we can rewrite the SumInts example from \ref asmjit_assembler +//! into the following: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b); +//! +//! // Small helper function to print the current content of `cb`. +//! static void dumpCode(BaseBuilder& builder, const char* phase) { +//! String sb; +//! builder.dump(sb); +//! printf("%s:\n%s\n", phase, sb.data()); +//! } +//! +//! int main() { +//! JitRuntime rt; // Create JIT Runtime. +//! CodeHolder code; // Create a CodeHolder. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Builder cb(&code); // Create and attach x86::Builder to `code`. +//! +//! // Decide which registers will be mapped to function arguments. Try changing +//! // registers of `dst`, `srcA`, and `srcB` and see what happens in function's +//! // prolog and epilog. +//! x86::Gp dst = cb.zax(); +//! x86::Gp srcA = cb.zcx(); +//! x86::Gp srcB = cb.zdx(); +//! +//! X86::Xmm vec0 = x86::xmm0; +//! X86::Xmm vec1 = x86::xmm1; +//! +//! // Create and initialize `FuncDetail`. +//! FuncDetail func; +//! func.init(FuncSignatureT(CallConv::kIdHost)); +//! +//! // Remember prolog insertion point. +//! BaseNode* prologInsertionPoint = cb.cursor(); +//! +//! // Emit function body: +//! cb.movdqu(vec0, x86::ptr(srcA)); // Load 4 ints from [srcA] to XMM0. +//! cb.movdqu(vec1, x86::ptr(srcB)); // Load 4 ints from [srcB] to XMM1. +//! cb.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0. +//! cb.movdqu(x86::ptr(dst), vec0); // Store the result to [dst]. +//! +//! // Remember epilog insertion point. +//! BaseNode* epilogInsertionPoint = cb.cursor(); +//! +//! // Let's see what we have now. +//! dumpCode(cb, "Raw Function"); +//! +//! // Now, after we emitted the function body, we can insert the prolog, arguments +//! // allocation, and epilog. This is not possible with using pure x86::Assembler. +//! FuncFrame frame; +//! frame.init(func); +//! +//! // Make XMM0 and XMM1 dirty; `kGroupVec` describes XMM|YMM|ZMM registers. +//! frame.setDirtyRegs(x86::Reg::kGroupVec, IntUtils::mask(0, 1)); +//! +//! FuncArgsAssignment args(&func); // Create arguments assignment context. +//! args.assignAll(dst, srcA, srcB); // Assign our registers to arguments. +//! args.updateFrame(frame); // Reflect our args in FuncFrame. +//! frame.finalize(); // Finalize the FuncFrame (updates it). +//! +//! // Insert function prolog and allocate arguments to registers. +//! cb.setCursor(prologInsertionPoint); +//! cb.emitProlog(frame); +//! cb.emitArgsAssignment(frame, args); +//! +//! // Insert function epilog. +//! cb.setCursor(epilogInsertionPoint); +//! cb.emitEpilog(frame); +//! +//! // Let's see how the function's prolog and epilog looks. +//! dumpCode(cb, "Prolog & Epilog"); +//! +//! // IMPORTANT: Builder requires finalize() to be called to serialize its +//! // code to the Assembler (it automatically creates one if not attached). +//! cb.finalize(); +//! +//! SumIntsFunc fn; +//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime. +//! if (err) return 1; // Handle a possible error case. +//! +//! // Execute the generated function. +//! int inA[4] = { 4, 3, 2, 1 }; +//! int inB[4] = { 1, 5, 2, 8 }; +//! int out[4]; +//! fn(out, inA, inB); +//! +//! // Prints {5 8 4 9} +//! printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]); +//! +//! rt.release(fn); // Explicitly remove the function from the runtime. +//! return 0; +//! } +//! ``` +//! +//! When the example is executed it should output the following (this one using +//! AMD64-SystemV ABI): +//! +//! ``` +//! Raw Function: +//! movdqu xmm0, [rcx] +//! movdqu xmm1, [rdx] +//! paddd xmm0, xmm1 +//! movdqu [rax], xmm0 +//! +//! Prolog & Epilog: +//! mov rax, rdi +//! mov rcx, rsi +//! movdqu xmm0, [rcx] +//! movdqu xmm1, [rdx] +//! paddd xmm0, xmm1 +//! movdqu [rax], xmm0 +//! ret +//! +//! {5 8 4 9} +//! ``` +//! +//! The number of use-cases of \ref BaseBuilder is not limited and highly depends +//! on your creativity and experience. The previous example can be easily improved +//! to collect all dirty registers inside the function programmatically and to pass +//! them to \ref FuncFrame::setDirtyRegs(). +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! // NOTE: This function doesn't cover all possible constructs. It ignores +//! // instructions that write to implicit registers that are not part of the +//! // operand list. It also counts read-only registers. Real implementation +//! // would be a bit more complicated, but still relatively easy to implement. +//! static void collectDirtyRegs(const BaseNode* first, +//! const BaseNode* last, +//! uint32_t regMask[BaseReg::kGroupVirt]) { +//! const BaseNode* node = first; +//! while (node) { +//! if (node->actsAsInst()) { +//! const InstNode* inst = node->as(); +//! const Operand* opArray = inst->operands(); +//! +//! for (uint32_t i = 0, opCount = inst->opCount(); i < opCount; i++) { +//! const Operand& op = opArray[i]; +//! if (op.isReg()) { +//! const x86::Reg& reg = op.as(); +//! if (reg.group() < BaseReg::kGroupVirt) { +//! regMask[reg.group()] |= 1u << reg.id(); +//! } +//! } +//! } +//! } +//! +//! if (node == last) +//! break; +//! node = node->next(); +//! } +//! } +//! +//! static void setDirtyRegsOfFuncFrame(const x86::Builder& builder, FuncFrame& frame) { +//! uint32_t regMask[BaseReg::kGroupVirt] {}; +//! collectDirtyRegs(builder.firstNode(), builder.lastNode(), regMask); +//! +//! // X86/X64 ABIs only require to save GP/XMM registers: +//! frame.setDirtyRegs(x86::Reg::kGroupGp , regMask[x86::Reg::kGroupGp ]); +//! frame.setDirtyRegs(x86::Reg::kGroupVec, regMask[x86::Reg::kGroupVec]); +//! } +//! ``` +//! +//! ### Casting Between Various Emitters +//! +//! Even when \ref BaseAssembler and \ref BaseBuilder provide the same interface +//! as defined by \ref BaseEmitter their platform dependent variants like \ref +//! x86::Assembler and \ref x86::Builder cannot be interchanged or casted +//! to each other by using a C++ `static_cast<>`. The main reason is the +//! inheritance graph of these classes is different and cast-incompatible, as +//! illustrated below: +//! +//! ``` +//! +--------------+ +=========================+ +//! +----------------------->| x86::Emitter |<--+--# x86::EmitterImplicitT<> #<--+ +//! | +--------------+ | +=========================+ | +//! | (abstract) | (mixin) | +//! | +--------------+ +~~~~~~~~~~~~~~+ | | +//! +-->| BaseAssembler|---->|x86::Assembler|<--+ | +//! | +--------------+ +~~~~~~~~~~~~~~+ | | +//! | (abstract) (final) | | +//! +===============+ | +--------------+ +~~~~~~~~~~~~~~+ | | +//! # BaseEmitter #--+-->| BaseBuilder |--+->| x86::Builder |<--+ | +//! +===============+ +--------------+ | +~~~~~~~~~~~~~~+ | +//! (abstract) (abstract) | (final) | +//! +---------------------+ | +//! | | +//! | +--------------+ +~~~~~~~~~~~~~~+ +=========================+ | +//! +-->| BaseCompiler |---->| x86::Compiler|<-----# x86::EmitterExplicitT<> #---+ +//! +--------------+ +~~~~~~~~~~~~~~+ +=========================+ +//! (abstract) (final) (mixin) +//! ``` +//! +//! The graph basically shows that it's not possible to cast between \ref +//! x86::Assembler and \ref x86::Builder. However, since both share the +//! base interface (\ref BaseEmitter) it's possible to cast them to a class +//! that cannot be instantiated, but defines the same interface - the class +//! is called \ref x86::Emitter and was introduced to make it possible to +//! write a function that can emit to both \ref x86::Assembler and \ref +//! x86::Builder. Note that \ref x86::Emitter cannot be created, it's abstract +//! and has private constructors and destructors; it was only designed to be +//! casted to and used as an interface. +//! +//! Each architecture-specific emitter implements a member function called +//! `as()`, which casts the instance to the architecture +//! specific emitter as illustrated below: +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! static void emitSomething(x86::Emitter* e) { +//! e->mov(x86::eax, x86::ebx); +//! } +//! +//! static void assemble(CodeHolder& code, bool useAsm) { +//! if (useAsm) { +//! x86::Assembler assembler(&code); +//! emitSomething(assembler.as()); +//! } +//! else { +//! x86::Builder builder(&code); +//! emitSomething(builder.as()); +//! +//! // NOTE: Builder requires `finalize()` to be called to serialize its +//! // content to Assembler (it automatically creates one if not attached). +//! builder.finalize(); +//! } +//! } +//! ``` +//! +//! The example above shows how to create a function that can emit code to +//! either \ref x86::Assembler or \ref x86::Builder through \ref x86::Emitter, +//! which provides emitter-neutral functionality. \ref x86::Emitter, however, +//! doesn't provide any emitter-specific functionality like `setCursor()`. +//! +//! ### Code Injection and Manipulation +//! +//! \ref BaseBuilder emitter stores its nodes in a double-linked list, which +//! makes it easy to manipulate that list during the code generation or +//! afterwards. Each node is always emitted next to the current cursor and the +//! cursor is advanced to that newly emitted node. The cursor can be retrieved +//! and changed by \ref BaseBuilder::cursor() and \ref BaseBuilder::setCursor(), +//! respectively. +//! +//! The example below demonstrates how to remember a node and inject something +//! next to it. +//! +//! ``` +//! static void example(x86::Builder& builder) { +//! // Emit something, after it returns the cursor would point at the last +//! // emitted node. +//! builder.mov(x86::rax, x86::rdx); // [1] +//! +//! // We can retrieve the node. +//! BaseNode* node = builder.cursor(); +//! +//! // Change the instruction we just emitted, just for fun... +//! if (node->isInst()) { +//! InstNode* inst = node->as(); +//! // Changes the operands at index [1] to RCX. +//! inst->setOp(1, x86::rcx); +//! } +//! +//! // ------------------------- Generate Some Code ------------------------- +//! builder.add(x86::rax, x86::rdx); // [2] +//! builder.shr(x86::rax, 3); // [3] +//! // ---------------------------------------------------------------------- +//! +//! // Now, we know where our node is, and we can simply change the cursor +//! // and start emitting something after it. The setCursor() function +//! // returns the previous cursor, and it's always a good practice to remember +//! // it, because you never know if you are not already injecting the code +//! // somewhere else... +//! BaseNode* oldCursor = builder.setCursor(node); +//! +//! builder.mul(x86::rax, 8); // [4] +//! +//! // Restore the cursor +//! builder.setCursor(oldCursor); +//! } +//! ``` +//! +//! The function above would actually emit the following: +//! +//! ``` +//! mov rax, rcx ; [1] Patched at the beginning. +//! mul rax, 8 ; [4] Injected. +//! add rax, rdx ; [2] Followed [1] initially. +//! shr rax, 3 ; [3] Follows [2]. +//! ``` class ASMJIT_VIRTAPI Builder : public BaseBuilder, public EmitterImplicitT { diff --git a/libs/asmjit/src/asmjit/x86/x86callconv.cpp b/libs/asmjit/src/asmjit/x86/x86callconv.cpp deleted file mode 100644 index 7ec4c55..0000000 --- a/libs/asmjit/src/asmjit/x86/x86callconv.cpp +++ /dev/null @@ -1,163 +0,0 @@ -// AsmJit - Machine code generation for C++ -// -// * Official AsmJit Home Page: https://asmjit.com -// * Official Github Repository: https://github.com/asmjit/asmjit -// -// Copyright (c) 2008-2020 The AsmJit Authors -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. - -#include "../core/api-build_p.h" -#ifdef ASMJIT_BUILD_X86 - -#include "../x86/x86callconv_p.h" -#include "../x86/x86operand.h" - -ASMJIT_BEGIN_SUB_NAMESPACE(x86) - -// ============================================================================ -// [asmjit::x86::CallConvInternal - Init] -// ============================================================================ - -static inline void CallConv_initX86Common(CallConv& cc) noexcept { - cc.setNaturalStackAlignment(4); - cc.setArchType(ArchInfo::kIdX86); - cc.setPreservedRegs(Reg::kGroupGp, Support::bitMask(Gp::kIdBx, Gp::kIdSp, Gp::kIdBp, Gp::kIdSi, Gp::kIdDi)); -} - -ASMJIT_FAVOR_SIZE Error CallConvInternal::init(CallConv& cc, uint32_t ccId) noexcept { - constexpr uint32_t kGroupGp = Reg::kGroupGp; - constexpr uint32_t kGroupVec = Reg::kGroupVec; - constexpr uint32_t kGroupMm = Reg::kGroupMm; - constexpr uint32_t kGroupKReg = Reg::kGroupKReg; - - constexpr uint32_t kZax = Gp::kIdAx; - constexpr uint32_t kZbx = Gp::kIdBx; - constexpr uint32_t kZcx = Gp::kIdCx; - constexpr uint32_t kZdx = Gp::kIdDx; - constexpr uint32_t kZsp = Gp::kIdSp; - constexpr uint32_t kZbp = Gp::kIdBp; - constexpr uint32_t kZsi = Gp::kIdSi; - constexpr uint32_t kZdi = Gp::kIdDi; - - switch (ccId) { - case CallConv::kIdX86StdCall: - cc.setFlags(CallConv::kFlagCalleePopsStack); - CallConv_initX86Common(cc); - break; - - case CallConv::kIdX86MsThisCall: - cc.setFlags(CallConv::kFlagCalleePopsStack); - cc.setPassedOrder(kGroupGp, kZcx); - CallConv_initX86Common(cc); - break; - - case CallConv::kIdX86MsFastCall: - case CallConv::kIdX86GccFastCall: - cc.setFlags(CallConv::kFlagCalleePopsStack); - cc.setPassedOrder(kGroupGp, kZcx, kZdx); - CallConv_initX86Common(cc); - break; - - case CallConv::kIdX86GccRegParm1: - cc.setPassedOrder(kGroupGp, kZax); - CallConv_initX86Common(cc); - break; - - case CallConv::kIdX86GccRegParm2: - cc.setPassedOrder(kGroupGp, kZax, kZdx); - CallConv_initX86Common(cc); - break; - - case CallConv::kIdX86GccRegParm3: - cc.setPassedOrder(kGroupGp, kZax, kZdx, kZcx); - CallConv_initX86Common(cc); - break; - - case CallConv::kIdX86CDecl: - CallConv_initX86Common(cc); - break; - - case CallConv::kIdX86Win64: - cc.setArchType(ArchInfo::kIdX64); - cc.setStrategy(CallConv::kStrategyWin64); - cc.setFlags(CallConv::kFlagPassFloatsByVec | CallConv::kFlagIndirectVecArgs); - cc.setNaturalStackAlignment(16); - cc.setSpillZoneSize(32); - cc.setPassedOrder(kGroupGp, kZcx, kZdx, 8, 9); - cc.setPassedOrder(kGroupVec, 0, 1, 2, 3); - cc.setPreservedRegs(kGroupGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15)); - cc.setPreservedRegs(kGroupVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); - break; - - case CallConv::kIdX86SysV64: - cc.setArchType(ArchInfo::kIdX64); - cc.setFlags(CallConv::kFlagPassFloatsByVec); - cc.setNaturalStackAlignment(16); - cc.setRedZoneSize(128); - cc.setPassedOrder(kGroupGp, kZdi, kZsi, kZdx, kZcx, 8, 9); - cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5, 6, 7); - cc.setPreservedRegs(kGroupGp, Support::bitMask(kZbx, kZsp, kZbp, 12, 13, 14, 15)); - break; - - case CallConv::kIdX86LightCall2: - case CallConv::kIdX86LightCall3: - case CallConv::kIdX86LightCall4: { - uint32_t n = (ccId - CallConv::kIdX86LightCall2) + 2; - - cc.setArchType(ArchInfo::kIdX86); - cc.setFlags(CallConv::kFlagPassFloatsByVec); - cc.setNaturalStackAlignment(16); - cc.setPassedOrder(kGroupGp, kZax, kZdx, kZcx, kZsi, kZdi); - cc.setPassedOrder(kGroupMm, 0, 1, 2, 3, 4, 5, 6, 7); - cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5, 6, 7); - cc.setPassedOrder(kGroupKReg, 0, 1, 2, 3, 4, 5, 6, 7); - - cc.setPreservedRegs(kGroupGp , Support::lsbMask(8)); - cc.setPreservedRegs(kGroupVec , Support::lsbMask(8) & ~Support::lsbMask(n)); - break; - } - - case CallConv::kIdX64LightCall2: - case CallConv::kIdX64LightCall3: - case CallConv::kIdX64LightCall4: { - uint32_t n = (ccId - CallConv::kIdX64LightCall2) + 2; - - cc.setArchType(ArchInfo::kIdX64); - cc.setFlags(CallConv::kFlagPassFloatsByVec); - cc.setNaturalStackAlignment(16); - cc.setPassedOrder(kGroupGp, kZax, kZdx, kZcx, kZsi, kZdi); - cc.setPassedOrder(kGroupMm, 0, 1, 2, 3, 4, 5, 6, 7); - cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5, 6, 7); - cc.setPassedOrder(kGroupKReg, 0, 1, 2, 3, 4, 5, 6, 7); - - cc.setPreservedRegs(kGroupGp , Support::lsbMask(16)); - cc.setPreservedRegs(kGroupVec ,~Support::lsbMask(n)); - break; - } - - default: - return DebugUtils::errored(kErrorInvalidArgument); - } - - cc.setId(ccId); - return kErrorOk; -} - -ASMJIT_END_SUB_NAMESPACE - -#endif // ASMJIT_BUILD_X86 diff --git a/libs/asmjit/src/asmjit/x86/x86compiler.cpp b/libs/asmjit/src/asmjit/x86/x86compiler.cpp index 910ed5e..566437f 100644 --- a/libs/asmjit/src/asmjit/x86/x86compiler.cpp +++ b/libs/asmjit/src/asmjit/x86/x86compiler.cpp @@ -47,7 +47,9 @@ Compiler::~Compiler() noexcept {} Error Compiler::finalize() { ASMJIT_PROPAGATE(runPasses()); Assembler a(_code); - return serialize(&a); + a.addEncodingOptions(encodingOptions()); + a.addValidationOptions(validationOptions()); + return serializeTo(&a); } // ============================================================================ @@ -55,14 +57,13 @@ Error Compiler::finalize() { // ============================================================================ Error Compiler::onAttach(CodeHolder* code) noexcept { - uint32_t archId = code->archId(); - if (!ArchInfo::isX86Family(archId)) + uint32_t arch = code->arch(); + if (!Environment::isFamilyX86(arch)) return DebugUtils::errored(kErrorInvalidArch); ASMJIT_PROPAGATE(Base::onAttach(code)); - _gpRegInfo.setSignature(archId == ArchInfo::kIdX86 ? uint32_t(Gpd::kSignature) : uint32_t(Gpq::kSignature)); - Error err = addPassT(); + if (ASMJIT_UNLIKELY(err)) { onDetach(code); return err; diff --git a/libs/asmjit/src/asmjit/x86/x86compiler.h b/libs/asmjit/src/asmjit/x86/x86compiler.h index cc7035b..4c64b3b 100644 --- a/libs/asmjit/src/asmjit/x86/x86compiler.h +++ b/libs/asmjit/src/asmjit/x86/x86compiler.h @@ -41,7 +41,413 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) // [asmjit::x86::Compiler] // ============================================================================ -//! Architecture-dependent asm-compiler (X86). +//! X86/X64 compiler implementation. +//! +//! ### Compiler Basics +//! +//! The first \ref x86::Compiler example shows how to generate a function that +//! simply returns an integer value. It's an analogy to the first Assembler example: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! // Signature of the generated function. +//! typedef int (*Func)(void); +//! +//! int main() { +//! JitRuntime rt; // Runtime specialized for JIT code execution. +//! CodeHolder code; // Holds code and relocation information. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code. +//! +//! cc.addFunc(FuncSignatureT());// Begin a function of `int fn(void)` signature. +//! +//! x86::Gp vReg = cc.newGpd(); // Create a 32-bit general purpose register. +//! cc.mov(vReg, 1); // Move one to our virtual register `vReg`. +//! cc.ret(vReg); // Return `vReg` from the function. +//! +//! cc.endFunc(); // End of the function body. +//! cc.finalize(); // Translate and assemble the whole 'cc' content. +//! // ----> x86::Compiler is no longer needed from here and can be destroyed <---- +//! +//! Func fn; +//! Error err = rt.add(&fn, &code); // Add the generated code to the runtime. +//! if (err) return 1; // Handle a possible error returned by AsmJit. +//! // ----> CodeHolder is no longer needed from here and can be destroyed <---- +//! +//! int result = fn(); // Execute the generated code. +//! printf("%d\n", result); // Print the resulting "1". +//! +//! rt.release(fn); // Explicitly remove the function from the runtime. +//! return 0; +//! } +//! ``` +//! +//! The \ref BaseCompiler::addFunc() and \ref BaseCompiler::endFunc() functions +//! are used to define the function and its end. Both must be called per function, +//! but the body doesn't have to be generated in sequence. An example of generating +//! two functions will be shown later. The next example shows more complicated code +//! that contain a loop and generates a simple memory copy function that uses +//! `uint32_t` items: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! // Signature of the generated function. +//! typedef void (*MemCpy32)(uint32_t* dst, const uint32_t* src, size_t count); +//! +//! int main() { +//! JitRuntime rt; // Runtime specialized for JIT code execution. +//! CodeHolder code; // Holds code and relocation information. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code. +//! +//! cc.addFunc( // Begin the function of the following signature: +//! FuncSignatureT()); // 3rd argument - size_t (machine reg-size). +//! +//! Label L_Loop = cc.newLabel(); // Start of the loop. +//! Label L_Exit = cc.newLabel(); // Used to exit early. +//! +//! x86::Gp dst = cc.newIntPtr("dst");// Create `dst` register (destination pointer). +//! x86::Gp src = cc.newIntPtr("src");// Create `src` register (source pointer). +//! x86::Gp i = cc.newUIntPtr("i"); // Create `i` register (loop counter). +//! +//! cc.setArg(0, dst); // Assign `dst` argument. +//! cc.setArg(1, src); // Assign `src` argument. +//! cc.setArg(2, i); // Assign `i` argument. +//! +//! cc.test(i, i); // Early exit if length is zero. +//! cc.jz(L_Exit); +//! +//! cc.bind(L_Loop); // Bind the beginning of the loop here. +//! +//! x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes). +//! cc.mov(tmp, x86::dword_ptr(src)); // Load DWORD from [src] address. +//! cc.mov(x86::dword_ptr(dst), tmp); // Store DWORD to [dst] address. +//! +//! cc.add(src, 4); // Increment `src`. +//! cc.add(dst, 4); // Increment `dst`. +//! +//! cc.dec(i); // Loop until `i` is non-zero. +//! cc.jnz(L_Loop); +//! +//! cc.bind(L_Exit); // Label used by early exit. +//! cc.endFunc(); // End of the function body. +//! +//! cc.finalize(); // Translate and assemble the whole 'cc' content. +//! // ----> x86::Compiler is no longer needed from here and can be destroyed <---- +//! +//! // Add the generated code to the runtime. +//! MemCpy32 memcpy32; +//! Error err = rt.add(&memcpy32, &code); +//! +//! // Handle a possible error returned by AsmJit. +//! if (err) +//! return 1; +//! // ----> CodeHolder is no longer needed from here and can be destroyed <---- +//! +//! // Test the generated code. +//! uint32_t input[6] = { 1, 2, 3, 5, 8, 13 }; +//! uint32_t output[6]; +//! memcpy32(output, input, 6); +//! +//! for (uint32_t i = 0; i < 6; i++) +//! printf("%d\n", output[i]); +//! +//! rt.release(memcpy32); +//! return 0; +//! } +//! ``` +//! +//! ### Recursive Functions +//! +//! It's possible to create more functions by using the same \ref x86::Compiler +//! instance and make links between them. In such case it's important to keep +//! the pointer to \ref FuncNode. +//! +//! The example below creates a simple Fibonacci function that calls itself recursively: +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! // Signature of the generated function. +//! typedef uint32_t (*Fibonacci)(uint32_t x); +//! +//! int main() { +//! JitRuntime rt; // Runtime specialized for JIT code execution. +//! CodeHolder code; // Holds code and relocation information. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code. +//! +//! FuncNode* func = cc.addFunc( // Begin of the Fibonacci function, addFunc() +//! FuncSignatureT()); // Returns a pointer to the FuncNode node. +//! +//! Label L_Exit = cc.newLabel() // Exit label. +//! x86::Gp x = cc.newU32(); // Function x argument. +//! x86::Gp y = cc.newU32(); // Temporary. +//! +//! cc.setArg(0, x); +//! +//! cc.cmp(x, 3); // Return x if less than 3. +//! cc.jb(L_Exit); +//! +//! cc.mov(y, x); // Make copy of the original x. +//! cc.dec(x); // Decrease x. +//! +//! InvokeNode* invokeNode; // Function invocation: +//! cc.invoke(&invokeNode, // - InvokeNode (output). +//! func->label(), // - Function address or Label. +//! FuncSignatureT()); // - Function signature. +//! +//! invokeNode->setArg(0, x); // Assign x as the first argument. +//! invokeNode->setRet(0, x); // Assign x as a return value as well. +//! +//! cc.add(x, y); // Combine the return value with y. +//! +//! cc.bind(L_Exit); +//! cc.ret(x); // Return x. +//! cc.endFunc(); // End of the function body. +//! +//! cc.finalize(); // Translate and assemble the whole 'cc' content. +//! // ----> x86::Compiler is no longer needed from here and can be destroyed <---- +//! +//! Fibonacci fib; +//! Error err = rt.add(&fib, &code); // Add the generated code to the runtime. +//! if (err) return 1; // Handle a possible error returned by AsmJit. +//! // ----> CodeHolder is no longer needed from here and can be destroyed <---- +//! +//! // Test the generated code. +//! printf("Fib(%u) -> %u\n", 8, fib(8)); +//! +//! rt.release(fib); +//! return 0; +//! } +//! ``` +//! +//! ### Stack Management +//! +//! Function's stack-frame is managed automatically, which is used by the register allocator to spill virtual registers. It also provides an interface to allocate user-defined block of the stack, which can be used as a temporary storage by the generated function. In the following example a stack of 256 bytes size is allocated, filled by bytes starting from 0 to 255 and then iterated again to sum all the values. +//! +//! ``` +//! #include +//! #include +//! +//! using namespace asmjit; +//! +//! // Signature of the generated function. +//! typedef int (*Func)(void); +//! +//! int main() { +//! JitRuntime rt; // Runtime specialized for JIT code execution. +//! CodeHolder code; // Holds code and relocation information. +//! +//! code.init(rt.environment()); // Initialize code to match the JIT environment. +//! x86::Compiler cc(&code); // Create and attach x86::Compiler to code. +//! +//! cc.addFunc(FuncSignatureT());// Create a function that returns int. +//! +//! x86::Gp p = cc.newIntPtr("p"); +//! x86::Gp i = cc.newIntPtr("i"); +//! +//! // Allocate 256 bytes on the stack aligned to 4 bytes. +//! x86::Mem stack = cc.newStack(256, 4); +//! +//! x86::Mem stackIdx(stack); // Copy of stack with i added. +//! stackIdx.setIndex(i); // stackIdx <- stack[i]. +//! stackIdx.setSize(1); // stackIdx <- byte ptr stack[i]. +//! +//! // Load a stack address to `p`. This step is purely optional and shows +//! // that `lea` is useful to load a memory operands address (even absolute) +//! // to a general purpose register. +//! cc.lea(p, stack); +//! +//! // Clear i (xor is a C++ keyword, hence 'xor_' is used instead). +//! cc.xor_(i, i); +//! +//! Label L1 = cc.newLabel(); +//! Label L2 = cc.newLabel(); +//! +//! cc.bind(L1); // First loop, fill the stack. +//! cc.mov(stackIdx, i.r8()); // stack[i] = uint8_t(i). +//! +//! cc.inc(i); // i++; +//! cc.cmp(i, 256); // if (i < 256) +//! cc.jb(L1); // goto L1; +//! +//! // Second loop, sum all bytes stored in `stack`. +//! x86::Gp sum = cc.newI32("sum"); +//! x86::Gp val = cc.newI32("val"); +//! +//! cc.xor_(i, i); +//! cc.xor_(sum, sum); +//! +//! cc.bind(L2); +//! +//! cc.movzx(val, stackIdx); // val = uint32_t(stack[i]); +//! cc.add(sum, val); // sum += val; +//! +//! cc.inc(i); // i++; +//! cc.cmp(i, 256); // if (i < 256) +//! cc.jb(L2); // goto L2; +//! +//! cc.ret(sum); // Return the `sum` of all values. +//! cc.endFunc(); // End of the function body. +//! +//! cc.finalize(); // Translate and assemble the whole 'cc' content. +//! // ----> x86::Compiler is no longer needed from here and can be destroyed <---- +//! +//! Func func; +//! Error err = rt.add(&func, &code); // Add the generated code to the runtime. +//! if (err) return 1; // Handle a possible error returned by AsmJit. +//! // ----> CodeHolder is no longer needed from here and can be destroyed <---- +//! +//! printf("Func() -> %d\n", func()); // Test the generated code. +//! +//! rt.release(func); +//! return 0; +//! } +//! ``` +//! +//! ### Constant Pool +//! +//! Compiler provides two constant pools for a general purpose code generation: +//! +//! - Local constant pool - Part of \ref FuncNode, can be only used by a +//! single function and added after the function epilog sequence (after +//! `ret` instruction). +//! +//! - Global constant pool - Part of \ref BaseCompiler, flushed at the end +//! of the generated code by \ref BaseEmitter::finalize(). +//! +//! The example below illustrates how a built-in constant pool can be used: +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! static void exampleUseOfConstPool(x86::Compiler& cc) { +//! cc.addFunc(FuncSignatureT()); +//! +//! x86::Gp v0 = cc.newGpd("v0"); +//! x86::Gp v1 = cc.newGpd("v1"); +//! +//! x86::Mem c0 = cc.newInt32Const(ConstPool::kScopeLocal, 200); +//! x86::Mem c1 = cc.newInt32Const(ConstPool::kScopeLocal, 33); +//! +//! cc.mov(v0, c0); +//! cc.mov(v1, c1); +//! cc.add(v0, v1); +//! +//! cc.ret(v0); +//! cc.endFunc(); +//! } +//! ``` +//! +//! ### Jump Tables +//! +//! x86::Compiler supports `jmp` instruction with reg/mem operand, which is a +//! commonly used pattern to implement indirect jumps within a function, for +//! example to implement `switch()` statement in a programming languages. By +//! default AsmJit assumes that every basic block can be a possible jump +//! target as it's unable to deduce targets from instruction's operands. This +//! is a very pessimistic default that should be avoided if possible as it's +//! costly and very unfriendly to liveness analysis and register allocation. +//! +//! Instead of relying on such pessimistic default behavior, let's use \ref +//! JumpAnnotation to annotate a jump where all targets are known: +//! +//! ``` +//! #include +//! +//! using namespace asmjit; +//! +//! static void exampleUseOfIndirectJump(x86::Compiler& cc) { +//! cc.addFunc(FuncSignatureT(CallConv::kIdHost)); +//! +//! // Function arguments +//! x86::Xmm a = cc.newXmmSs("a"); +//! x86::Xmm b = cc.newXmmSs("b"); +//! x86::Gp op = cc.newUInt32("op"); +//! +//! x86::Gp target = cc.newIntPtr("target"); +//! x86::Gp offset = cc.newIntPtr("offset"); +//! +//! Label L_Table = cc.newLabel(); +//! Label L_Add = cc.newLabel(); +//! Label L_Sub = cc.newLabel(); +//! Label L_Mul = cc.newLabel(); +//! Label L_Div = cc.newLabel(); +//! Label L_End = cc.newLabel(); +//! +//! cc.setArg(0, a); +//! cc.setArg(1, b); +//! cc.setArg(2, op); +//! +//! // Jump annotation is a building block that allows to annotate all +//! // possible targets where `jmp()` can jump. It then drives the CFG +//! // contruction and liveness analysis, which impacts register allocation. +//! JumpAnnotation* annotation = cc.newJumpAnnotation(); +//! annotation->addLabel(L_Add); +//! annotation->addLabel(L_Sub); +//! annotation->addLabel(L_Mul); +//! annotation->addLabel(L_Div); +//! +//! // Most likely not the common indirect jump approach, but it +//! // doesn't really matter how final address is calculated. The +//! // most important path using JumpAnnotation with `jmp()`. +//! cc.lea(offset, x86::ptr(L_Table)); +//! if (cc.is64Bit()) +//! cc.movsxd(target, x86::dword_ptr(offset, op.cloneAs(offset), 2)); +//! else +//! cc.mov(target, x86::dword_ptr(offset, op.cloneAs(offset), 2)); +//! cc.add(target, offset); +//! cc.jmp(target, annotation); +//! +//! // Acts like a switch() statement in C. +//! cc.bind(L_Add); +//! cc.addss(a, b); +//! cc.jmp(L_End); +//! +//! cc.bind(L_Sub); +//! cc.subss(a, b); +//! cc.jmp(L_End); +//! +//! cc.bind(L_Mul); +//! cc.mulss(a, b); +//! cc.jmp(L_End); +//! +//! cc.bind(L_Div); +//! cc.divss(a, b); +//! +//! cc.bind(L_End); +//! cc.ret(a); +//! +//! cc.endFunc(); +//! +//! // Relative int32_t offsets of `L_XXX - L_Table`. +//! cc.bind(L_Table); +//! cc.embedLabelDelta(L_Add, L_Table, 4); +//! cc.embedLabelDelta(L_Sub, L_Table, 4); +//! cc.embedLabelDelta(L_Mul, L_Table, 4); +//! cc.embedLabelDelta(L_Div, L_Table, 4); +//! } +//! ``` class ASMJIT_VIRTAPI Compiler : public BaseCompiler, public EmitterExplicitT { @@ -62,18 +468,18 @@ class ASMJIT_VIRTAPI Compiler #ifndef ASMJIT_NO_LOGGING # define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS) \ - _newRegFmt(OUT, PARAM, FORMAT, ARGS) + _newRegFmt(&OUT, PARAM, FORMAT, ARGS) #else # define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS) \ DebugUtils::unused(FORMAT); \ DebugUtils::unused(std::forward(args)...); \ - _newReg(OUT, PARAM) + _newReg(&OUT, PARAM) #endif #define ASMJIT_NEW_REG_CUSTOM(FUNC, REG) \ inline REG FUNC(uint32_t typeId) { \ REG reg(Globals::NoInit); \ - _newReg(reg, typeId); \ + _newReg(®, typeId); \ return reg; \ } \ \ @@ -87,7 +493,7 @@ class ASMJIT_VIRTAPI Compiler #define ASMJIT_NEW_REG_TYPED(FUNC, REG, TYPE_ID) \ inline REG FUNC() { \ REG reg(Globals::NoInit); \ - _newReg(reg, TYPE_ID); \ + _newReg(®, TYPE_ID); \ return reg; \ } \ \ @@ -170,7 +576,7 @@ class ASMJIT_VIRTAPI Compiler //! Creates a new memory chunk allocated on the current function's stack. inline Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) { Mem m(Globals::NoInit); - _newStack(m, size, alignment, name); + _newStack(&m, size, alignment, name); return m; } @@ -182,7 +588,7 @@ class ASMJIT_VIRTAPI Compiler //! Put data to a constant-pool and get a memory reference to it. inline Mem newConst(uint32_t scope, const void* data, size_t size) { Mem m(Globals::NoInit); - _newConst(m, scope, data, size); + _newConst(&m, scope, data, size); return m; } @@ -213,12 +619,16 @@ class ASMJIT_VIRTAPI Compiler //! Put a DP-FP `val` to a constant-pool. inline Mem newDoubleConst(uint32_t scope, double val) noexcept { return newConst(scope, &val, 8); } - //! Put a MMX `val` to a constant-pool. +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("newMmConst() uses a deprecated Data64, use newConst() with your own data instead") inline Mem newMmConst(uint32_t scope, const Data64& val) noexcept { return newConst(scope, &val, 8); } - //! Put a XMM `val` to a constant-pool. + + ASMJIT_DEPRECATED("newXmmConst() uses a deprecated Data128, use newConst() with your own data instead") inline Mem newXmmConst(uint32_t scope, const Data128& val) noexcept { return newConst(scope, &val, 16); } - //! Put a YMM `val` to a constant-pool. + + ASMJIT_DEPRECATED("newYmmConst() uses a deprecated Data256, use newConst() with your own data instead") inline Mem newYmmConst(uint32_t scope, const Data256& val) noexcept { return newConst(scope, &val, 32); } +#endif // !ASMJIT_NO_DEPRECATED //! \} @@ -235,16 +645,37 @@ class ASMJIT_VIRTAPI Compiler //! \name Function Call & Ret Intrinsics //! \{ - //! Call a function. - inline FuncCallNode* call(const Gp& target, const FuncSignature& sign) { return addCall(Inst::kIdCall, target, sign); } + //! Invoke a function call without `target` type enforcement. + inline Error invoke_(InvokeNode** out, const Operand_& target, const FuncSignature& signature) { + return _addInvokeNode(out, Inst::kIdCall, target, signature); + } + + //! Invoke a function call of the given `target` and `signature` and store + //! the added node to `out`. + //! + //! Creates a new \ref InvokeNode, initializes all the necessary members to + //! match the given function `signature`, adds the node to the compiler, and + //! stores its pointer to `out`. The operation is atomic, if anything fails + //! nullptr is stored in `out` and error code is returned. + inline Error invoke(InvokeNode** out, const Gp& target, const FuncSignature& signature) { return invoke_(out, target, signature); } //! \overload - inline FuncCallNode* call(const Mem& target, const FuncSignature& sign) { return addCall(Inst::kIdCall, target, sign); } + inline Error invoke(InvokeNode** out, const Mem& target, const FuncSignature& signature) { return invoke_(out, target, signature); } //! \overload - inline FuncCallNode* call(const Label& target, const FuncSignature& sign) { return addCall(Inst::kIdCall, target, sign); } + inline Error invoke(InvokeNode** out, const Label& target, const FuncSignature& signature) { return invoke_(out, target, signature); } //! \overload - inline FuncCallNode* call(const Imm& target, const FuncSignature& sign) { return addCall(Inst::kIdCall, target, sign); } + inline Error invoke(InvokeNode** out, const Imm& target, const FuncSignature& signature) { return invoke_(out, target, signature); } //! \overload - inline FuncCallNode* call(uint64_t target, const FuncSignature& sign) { return addCall(Inst::kIdCall, Imm(int64_t(target)), sign); } + inline Error invoke(InvokeNode** out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); } + +#ifndef _DOXYGEN + template + ASMJIT_DEPRECATED("Use invoke() instead of call()") + inline InvokeNode* call(const Target& target, const FuncSignature& signature) { + InvokeNode* invokeNode; + invoke(&invokeNode, target, signature); + return invokeNode; + } +#endif //! Return. inline FuncRetNode* ret() { return addRet(Operand(), Operand()); } @@ -260,7 +691,9 @@ class ASMJIT_VIRTAPI Compiler using EmitterExplicitT::jmp; + //! Adds a jump to the given `target` with the provided jump `annotation`. inline Error jmp(const BaseReg& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); } + //! \overload inline Error jmp(const BaseMem& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); } //! \} diff --git a/libs/asmjit/src/asmjit/x86/x86emithelper.cpp b/libs/asmjit/src/asmjit/x86/x86emithelper.cpp new file mode 100644 index 0000000..1b21402 --- /dev/null +++ b/libs/asmjit/src/asmjit/x86/x86emithelper.cpp @@ -0,0 +1,603 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#ifdef ASMJIT_BUILD_X86 + +#include "../core/formatter.h" +#include "../core/funcargscontext_p.h" +#include "../core/string.h" +#include "../core/support.h" +#include "../core/type.h" +#include "../core/radefs_p.h" +#include "../x86/x86emithelper_p.h" +#include "../x86/x86emitter.h" + +ASMJIT_BEGIN_SUB_NAMESPACE(x86) + +// ============================================================================ +// [asmjit::x86::Internal - Helpers] +// ============================================================================ + +static ASMJIT_INLINE uint32_t getXmmMovInst(const FuncFrame& frame) { + bool avx = frame.isAvxEnabled(); + bool aligned = frame.hasAlignedVecSR(); + + return aligned ? (avx ? Inst::kIdVmovaps : Inst::kIdMovaps) + : (avx ? Inst::kIdVmovups : Inst::kIdMovups); +} + +//! Converts `size` to a 'kmov?' instructio. +static inline uint32_t kmovInstFromSize(uint32_t size) noexcept { + switch (size) { + case 1: return Inst::kIdKmovb; + case 2: return Inst::kIdKmovw; + case 4: return Inst::kIdKmovd; + case 8: return Inst::kIdKmovq; + default: return Inst::kIdNone; + } +} + +// ============================================================================ +// [asmjit::X86Internal - Emit Helpers] +// ============================================================================ + +ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove( + const Operand_& dst_, + const Operand_& src_, uint32_t typeId, const char* comment) { + + // Invalid or abstract TypeIds are not allowed. + ASMJIT_ASSERT(Type::isValid(typeId) && !Type::isAbstract(typeId)); + + Operand dst(dst_); + Operand src(src_); + + uint32_t instId = Inst::kIdNone; + uint32_t memFlags = 0; + uint32_t overrideMemSize = 0; + + enum MemFlags : uint32_t { + kDstMem = 0x1, + kSrcMem = 0x2 + }; + + // Detect memory operands and patch them to have the same size as the register. + // BaseCompiler always sets memory size of allocs and spills, so it shouldn't + // be really necessary, however, after this function was separated from Compiler + // it's better to make sure that the size is always specified, as we can use + // 'movzx' and 'movsx' that rely on it. + if (dst.isMem()) { memFlags |= kDstMem; dst.as().setSize(src.size()); } + if (src.isMem()) { memFlags |= kSrcMem; src.as().setSize(dst.size()); } + + switch (typeId) { + case Type::kIdI8: + case Type::kIdU8: + case Type::kIdI16: + case Type::kIdU16: + // Special case - 'movzx' load. + if (memFlags & kSrcMem) { + instId = Inst::kIdMovzx; + dst.setSignature(Reg::signatureOfT()); + } + else if (!memFlags) { + // Change both destination and source registers to GPD (safer, no dependencies). + dst.setSignature(Reg::signatureOfT()); + src.setSignature(Reg::signatureOfT()); + } + ASMJIT_FALLTHROUGH; + + case Type::kIdI32: + case Type::kIdU32: + case Type::kIdI64: + case Type::kIdU64: + instId = Inst::kIdMov; + break; + + case Type::kIdMmx32: + instId = Inst::kIdMovd; + if (memFlags) break; + ASMJIT_FALLTHROUGH; + + case Type::kIdMmx64 : instId = Inst::kIdMovq ; break; + case Type::kIdMask8 : instId = Inst::kIdKmovb; break; + case Type::kIdMask16: instId = Inst::kIdKmovw; break; + case Type::kIdMask32: instId = Inst::kIdKmovd; break; + case Type::kIdMask64: instId = Inst::kIdKmovq; break; + + default: { + uint32_t elementTypeId = Type::baseOf(typeId); + if (Type::isVec32(typeId) && memFlags) { + overrideMemSize = 4; + if (elementTypeId == Type::kIdF32) + instId = _avxEnabled ? Inst::kIdVmovss : Inst::kIdMovss; + else + instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd; + break; + } + + if (Type::isVec64(typeId) && memFlags) { + overrideMemSize = 8; + if (elementTypeId == Type::kIdF64) + instId = _avxEnabled ? Inst::kIdVmovsd : Inst::kIdMovsd; + else + instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq; + break; + } + + if (elementTypeId == Type::kIdF32) + instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps; + else if (elementTypeId == Type::kIdF64) + instId = _avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd; + else if (typeId <= Type::_kIdVec256End) + instId = _avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa; + else if (elementTypeId <= Type::kIdU32) + instId = Inst::kIdVmovdqa32; + else + instId = Inst::kIdVmovdqa64; + break; + } + } + + if (!instId) + return DebugUtils::errored(kErrorInvalidState); + + if (overrideMemSize) { + if (dst.isMem()) dst.as().setSize(overrideMemSize); + if (src.isMem()) src.as().setSize(overrideMemSize); + } + + _emitter->setInlineComment(comment); + return _emitter->emit(instId, dst, src); +} + +ASMJIT_FAVOR_SIZE Error EmitHelper::emitArgMove( + const BaseReg& dst_, uint32_t dstTypeId, + const Operand_& src_, uint32_t srcTypeId, const char* comment) { + + // Deduce optional `dstTypeId`, which may be `Type::kIdVoid` in some cases. + if (!dstTypeId) { + const ArchTraits& archTraits = ArchTraits::byArch(_emitter->arch()); + dstTypeId = archTraits.regTypeToTypeId(dst_.type()); + } + + // Invalid or abstract TypeIds are not allowed. + ASMJIT_ASSERT(Type::isValid(dstTypeId) && !Type::isAbstract(dstTypeId)); + ASMJIT_ASSERT(Type::isValid(srcTypeId) && !Type::isAbstract(srcTypeId)); + + Reg dst(dst_.as()); + Operand src(src_); + + uint32_t dstSize = Type::sizeOf(dstTypeId); + uint32_t srcSize = Type::sizeOf(srcTypeId); + + uint32_t instId = Inst::kIdNone; + + // Not a real loop, just 'break' is nicer than 'goto'. + for (;;) { + if (Type::isInt(dstTypeId)) { + if (Type::isInt(srcTypeId)) { + instId = Inst::kIdMovsx; + uint32_t typeOp = (dstTypeId << 8) | srcTypeId; + + // Sign extend by using 'movsx'. + if (typeOp == ((Type::kIdI16 << 8) | Type::kIdI8 ) || + typeOp == ((Type::kIdI32 << 8) | Type::kIdI8 ) || + typeOp == ((Type::kIdI32 << 8) | Type::kIdI16) || + typeOp == ((Type::kIdI64 << 8) | Type::kIdI8 ) || + typeOp == ((Type::kIdI64 << 8) | Type::kIdI16)) + break; + + // Sign extend by using 'movsxd'. + instId = Inst::kIdMovsxd; + if (typeOp == ((Type::kIdI64 << 8) | Type::kIdI32)) + break; + } + + if (Type::isInt(srcTypeId) || src_.isMem()) { + // Zero extend by using 'movzx' or 'mov'. + if (dstSize <= 4 && srcSize < 4) { + instId = Inst::kIdMovzx; + dst.setSignature(Reg::signatureOfT()); + } + else { + // We should have caught all possibilities where `srcSize` is less + // than 4, so we don't have to worry about 'movzx' anymore. Minimum + // size is enough to determine if we want 32-bit or 64-bit move. + instId = Inst::kIdMov; + srcSize = Support::min(srcSize, dstSize); + + dst.setSignature(srcSize == 4 ? Reg::signatureOfT() + : Reg::signatureOfT()); + if (src.isReg()) + src.setSignature(dst.signature()); + } + break; + } + + // NOTE: The previous branch caught all memory sources, from here it's + // always register to register conversion, so catch the remaining cases. + srcSize = Support::min(srcSize, dstSize); + + if (Type::isMmx(srcTypeId)) { + // 64-bit move. + instId = Inst::kIdMovq; + if (srcSize == 8) + break; + + // 32-bit move. + instId = Inst::kIdMovd; + dst.setSignature(Reg::signatureOfT()); + break; + } + + if (Type::isMask(srcTypeId)) { + instId = kmovInstFromSize(srcSize); + dst.setSignature(srcSize <= 4 ? Reg::signatureOfT() + : Reg::signatureOfT()); + break; + } + + if (Type::isVec(srcTypeId)) { + // 64-bit move. + instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq; + if (srcSize == 8) + break; + + // 32-bit move. + instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd; + dst.setSignature(Reg::signatureOfT()); + break; + } + } + + if (Type::isMmx(dstTypeId)) { + instId = Inst::kIdMovq; + srcSize = Support::min(srcSize, dstSize); + + if (Type::isInt(srcTypeId) || src.isMem()) { + // 64-bit move. + if (srcSize == 8) + break; + + // 32-bit move. + instId = Inst::kIdMovd; + if (src.isReg()) + src.setSignature(Reg::signatureOfT()); + break; + } + + if (Type::isMmx(srcTypeId)) + break; + + // This will hurt if AVX is enabled. + instId = Inst::kIdMovdq2q; + if (Type::isVec(srcTypeId)) + break; + } + + if (Type::isMask(dstTypeId)) { + srcSize = Support::min(srcSize, dstSize); + + if (Type::isInt(srcTypeId) || Type::isMask(srcTypeId) || src.isMem()) { + instId = kmovInstFromSize(srcSize); + if (Reg::isGp(src) && srcSize <= 4) + src.setSignature(Reg::signatureOfT()); + break; + } + } + + if (Type::isVec(dstTypeId)) { + // By default set destination to XMM, will be set to YMM|ZMM if needed. + dst.setSignature(Reg::signatureOfT()); + + // This will hurt if AVX is enabled. + if (Reg::isMm(src)) { + // 64-bit move. + instId = Inst::kIdMovq2dq; + break; + } + + // Argument conversion. + uint32_t dstElement = Type::baseOf(dstTypeId); + uint32_t srcElement = Type::baseOf(srcTypeId); + + if (dstElement == Type::kIdF32 && srcElement == Type::kIdF64) { + srcSize = Support::min(dstSize * 2, srcSize); + dstSize = srcSize / 2; + + if (srcSize <= 8) + instId = _avxEnabled ? Inst::kIdVcvtss2sd : Inst::kIdCvtss2sd; + else + instId = _avxEnabled ? Inst::kIdVcvtps2pd : Inst::kIdCvtps2pd; + + if (dstSize == 32) + dst.setSignature(Reg::signatureOfT()); + if (src.isReg()) + src.setSignature(Reg::signatureOfVecBySize(srcSize)); + break; + } + + if (dstElement == Type::kIdF64 && srcElement == Type::kIdF32) { + srcSize = Support::min(dstSize, srcSize * 2) / 2; + dstSize = srcSize * 2; + + if (srcSize <= 4) + instId = _avxEnabled ? Inst::kIdVcvtsd2ss : Inst::kIdCvtsd2ss; + else + instId = _avxEnabled ? Inst::kIdVcvtpd2ps : Inst::kIdCvtpd2ps; + + dst.setSignature(Reg::signatureOfVecBySize(dstSize)); + if (src.isReg() && srcSize >= 32) + src.setSignature(Reg::signatureOfT()); + break; + } + + srcSize = Support::min(srcSize, dstSize); + if (Reg::isGp(src) || src.isMem()) { + // 32-bit move. + if (srcSize <= 4) { + instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd; + if (src.isReg()) + src.setSignature(Reg::signatureOfT()); + break; + } + + // 64-bit move. + if (srcSize == 8) { + instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq; + break; + } + } + + if (Reg::isVec(src) || src.isMem()) { + instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps; + + if (src.isMem() && srcSize < _emitter->environment().stackAlignment()) + instId = _avxEnabled ? Inst::kIdVmovups : Inst::kIdMovups; + + uint32_t signature = Reg::signatureOfVecBySize(srcSize); + dst.setSignature(signature); + if (src.isReg()) + src.setSignature(signature); + break; + } + } + + return DebugUtils::errored(kErrorInvalidState); + } + + if (src.isMem()) + src.as().setSize(srcSize); + + _emitter->setInlineComment(comment); + return _emitter->emit(instId, dst, src); +} + +Error EmitHelper::emitRegSwap( + const BaseReg& a, + const BaseReg& b, const char* comment) { + + if (a.isGp() && b.isGp()) { + _emitter->setInlineComment(comment); + return _emitter->emit(Inst::kIdXchg, a, b); + } + else + return DebugUtils::errored(kErrorInvalidState); +} + +// ============================================================================ +// [asmjit::X86Internal - Emit Prolog & Epilog] +// ============================================================================ + +static ASMJIT_INLINE void X86Internal_setupSaveRestoreInfo(uint32_t group, const FuncFrame& frame, Reg& xReg, uint32_t& xInst, uint32_t& xSize) noexcept { + switch (group) { + case Reg::kGroupVec: + xReg = xmm(0); + xInst = getXmmMovInst(frame); + xSize = xReg.size(); + break; + case Reg::kGroupMm: + xReg = mm(0); + xInst = Inst::kIdMovq; + xSize = xReg.size(); + break; + case Reg::kGroupKReg: + xReg = k(0); + xInst = Inst::kIdKmovq; + xSize = xReg.size(); + break; + } +} + +ASMJIT_FAVOR_SIZE Error EmitHelper::emitProlog(const FuncFrame& frame) { + Emitter* emitter = _emitter->as(); + uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp); + + Gp zsp = emitter->zsp(); // ESP|RSP register. + Gp zbp = emitter->zbp(); // EBP|RBP register. + Gp gpReg = zsp; // General purpose register (temporary). + Gp saReg = zsp; // Stack-arguments base pointer. + + // Emit: 'push zbp' + // 'mov zbp, zsp'. + if (frame.hasPreservedFP()) { + gpSaved &= ~Support::bitMask(Gp::kIdBp); + ASMJIT_PROPAGATE(emitter->push(zbp)); + ASMJIT_PROPAGATE(emitter->mov(zbp, zsp)); + } + + // Emit: 'push gp' sequence. + { + Support::BitWordIterator it(gpSaved); + while (it.hasNext()) { + gpReg.setId(it.next()); + ASMJIT_PROPAGATE(emitter->push(gpReg)); + } + } + + // Emit: 'mov saReg, zsp'. + uint32_t saRegId = frame.saRegId(); + if (saRegId != BaseReg::kIdBad && saRegId != Gp::kIdSp) { + saReg.setId(saRegId); + if (frame.hasPreservedFP()) { + if (saRegId != Gp::kIdBp) + ASMJIT_PROPAGATE(emitter->mov(saReg, zbp)); + } + else { + ASMJIT_PROPAGATE(emitter->mov(saReg, zsp)); + } + } + + // Emit: 'and zsp, StackAlignment'. + if (frame.hasDynamicAlignment()) { + ASMJIT_PROPAGATE(emitter->and_(zsp, -int32_t(frame.finalStackAlignment()))); + } + + // Emit: 'sub zsp, StackAdjustment'. + if (frame.hasStackAdjustment()) { + ASMJIT_PROPAGATE(emitter->sub(zsp, frame.stackAdjustment())); + } + + // Emit: 'mov [zsp + DAOffset], saReg'. + if (frame.hasDynamicAlignment() && frame.hasDAOffset()) { + Mem saMem = ptr(zsp, int32_t(frame.daOffset())); + ASMJIT_PROPAGATE(emitter->mov(saMem, saReg)); + } + + // Emit 'movxxx [zsp + X], {[x|y|z]mm, k}'. + { + Reg xReg; + Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset())); + + uint32_t xInst; + uint32_t xSize; + + for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) { + Support::BitWordIterator it(frame.savedRegs(group)); + if (it.hasNext()) { + X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize); + do { + xReg.setId(it.next()); + ASMJIT_PROPAGATE(emitter->emit(xInst, xBase, xReg)); + xBase.addOffsetLo32(int32_t(xSize)); + } while (it.hasNext()); + } + } + } + + return kErrorOk; +} + +ASMJIT_FAVOR_SIZE Error EmitHelper::emitEpilog(const FuncFrame& frame) { + Emitter* emitter = _emitter->as(); + + uint32_t i; + uint32_t regId; + + uint32_t registerSize = emitter->registerSize(); + uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp); + + Gp zsp = emitter->zsp(); // ESP|RSP register. + Gp zbp = emitter->zbp(); // EBP|RBP register. + Gp gpReg = emitter->zsp(); // General purpose register (temporary). + + // Don't emit 'pop zbp' in the pop sequence, this case is handled separately. + if (frame.hasPreservedFP()) + gpSaved &= ~Support::bitMask(Gp::kIdBp); + + // Emit 'movxxx {[x|y|z]mm, k}, [zsp + X]'. + { + Reg xReg; + Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset())); + + uint32_t xInst; + uint32_t xSize; + + for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) { + Support::BitWordIterator it(frame.savedRegs(group)); + if (it.hasNext()) { + X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize); + do { + xReg.setId(it.next()); + ASMJIT_PROPAGATE(emitter->emit(xInst, xReg, xBase)); + xBase.addOffsetLo32(int32_t(xSize)); + } while (it.hasNext()); + } + } + } + + // Emit 'emms' and/or 'vzeroupper'. + if (frame.hasMmxCleanup()) ASMJIT_PROPAGATE(emitter->emms()); + if (frame.hasAvxCleanup()) ASMJIT_PROPAGATE(emitter->vzeroupper()); + + if (frame.hasPreservedFP()) { + // Emit 'mov zsp, zbp' or 'lea zsp, [zbp - x]' + int32_t count = int32_t(frame.pushPopSaveSize() - registerSize); + if (!count) + ASMJIT_PROPAGATE(emitter->mov(zsp, zbp)); + else + ASMJIT_PROPAGATE(emitter->lea(zsp, ptr(zbp, -count))); + } + else { + if (frame.hasDynamicAlignment() && frame.hasDAOffset()) { + // Emit 'mov zsp, [zsp + DsaSlot]'. + Mem saMem = ptr(zsp, int32_t(frame.daOffset())); + ASMJIT_PROPAGATE(emitter->mov(zsp, saMem)); + } + else if (frame.hasStackAdjustment()) { + // Emit 'add zsp, StackAdjustment'. + ASMJIT_PROPAGATE(emitter->add(zsp, int32_t(frame.stackAdjustment()))); + } + } + + // Emit 'pop gp' sequence. + if (gpSaved) { + i = gpSaved; + regId = 16; + + do { + regId--; + if (i & 0x8000) { + gpReg.setId(regId); + ASMJIT_PROPAGATE(emitter->pop(gpReg)); + } + i <<= 1; + } while (regId != 0); + } + + // Emit 'pop zbp'. + if (frame.hasPreservedFP()) + ASMJIT_PROPAGATE(emitter->pop(zbp)); + + // Emit 'ret' or 'ret x'. + if (frame.hasCalleeStackCleanup()) + ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet, int(frame.calleeStackCleanup()))); + else + ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet)); + + return kErrorOk; +} + +ASMJIT_END_SUB_NAMESPACE + +#endif // ASMJIT_BUILD_X86 diff --git a/libs/asmjit/src/asmjit/x86/x86emithelper_p.h b/libs/asmjit/src/asmjit/x86/x86emithelper_p.h new file mode 100644 index 0000000..0fb8abc --- /dev/null +++ b/libs/asmjit/src/asmjit/x86/x86emithelper_p.h @@ -0,0 +1,78 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED +#define ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED + +#include "../core/api-config.h" + +#include "../core/emithelper_p.h" +#include "../core/func.h" +#include "../x86/x86emitter.h" +#include "../x86/x86operand.h" + +ASMJIT_BEGIN_SUB_NAMESPACE(x86) + +//! \cond INTERNAL +//! \addtogroup asmjit_x86 +//! \{ + +// ============================================================================ +// [asmjit::x86::EmitHelper] +// ============================================================================ + +static ASMJIT_INLINE uint32_t vecTypeIdToRegType(uint32_t typeId) noexcept { + return typeId <= Type::_kIdVec128End ? Reg::kTypeXmm : + typeId <= Type::_kIdVec256End ? Reg::kTypeYmm : Reg::kTypeZmm; +} + +class EmitHelper : public BaseEmitHelper { +public: + bool _avxEnabled; + + inline explicit EmitHelper(BaseEmitter* emitter = nullptr, bool avxEnabled = false) noexcept + : BaseEmitHelper(emitter), + _avxEnabled(avxEnabled) {} + + Error emitRegMove( + const Operand_& dst_, + const Operand_& src_, uint32_t typeId, const char* comment = nullptr) override; + + Error emitArgMove( + const BaseReg& dst_, uint32_t dstTypeId, + const Operand_& src_, uint32_t srcTypeId, const char* comment = nullptr) override; + + Error emitRegSwap( + const BaseReg& a, + const BaseReg& b, const char* comment = nullptr) override; + + Error emitProlog(const FuncFrame& frame); + Error emitEpilog(const FuncFrame& frame); +}; + +//! \} +//! \endcond + +ASMJIT_END_SUB_NAMESPACE + +#endif // ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/x86/x86emitter.h b/libs/asmjit/src/asmjit/x86/x86emitter.h index cfd96a1..0334573 100644 --- a/libs/asmjit/src/asmjit/x86/x86emitter.h +++ b/libs/asmjit/src/asmjit/x86/x86emitter.h @@ -32,134 +32,112 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) #define ASMJIT_INST_0x(NAME, ID) \ - inline Error NAME() { return _emitter()->emit(Inst::kId##ID); } + inline Error NAME() { return _emitter()->_emitI(Inst::kId##ID); } #define ASMJIT_INST_1x(NAME, ID, T0) \ - inline Error NAME(const T0& o0) { return _emitter()->emit(Inst::kId##ID, o0); } + inline Error NAME(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID, o0); } #define ASMJIT_INST_1i(NAME, ID, T0) \ - inline Error NAME(const T0& o0) { return _emitter()->emit(Inst::kId##ID, o0); } \ - inline Error NAME(int o0) { return _emitter()->emit(Inst::kId##ID, Support::asInt(o0)); } \ - inline Error NAME(unsigned int o0) { return _emitter()->emit(Inst::kId##ID, Support::asInt(o0)); } \ - inline Error NAME(int64_t o0) { return _emitter()->emit(Inst::kId##ID, Support::asInt(o0)); } \ - inline Error NAME(uint64_t o0) { return _emitter()->emit(Inst::kId##ID, Support::asInt(o0)); } + inline Error NAME(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID, o0); } #define ASMJIT_INST_1c(NAME, ID, CONV, T0) \ - inline Error NAME(uint32_t cc, const T0& o0) { return _emitter()->emit(CONV(cc), o0); } \ - inline Error NAME##a(const T0& o0) { return _emitter()->emit(Inst::kId##ID##a, o0); } \ - inline Error NAME##ae(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ae, o0); } \ - inline Error NAME##b(const T0& o0) { return _emitter()->emit(Inst::kId##ID##b, o0); } \ - inline Error NAME##be(const T0& o0) { return _emitter()->emit(Inst::kId##ID##be, o0); } \ - inline Error NAME##c(const T0& o0) { return _emitter()->emit(Inst::kId##ID##c, o0); } \ - inline Error NAME##e(const T0& o0) { return _emitter()->emit(Inst::kId##ID##e, o0); } \ - inline Error NAME##g(const T0& o0) { return _emitter()->emit(Inst::kId##ID##g, o0); } \ - inline Error NAME##ge(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ge, o0); } \ - inline Error NAME##l(const T0& o0) { return _emitter()->emit(Inst::kId##ID##l, o0); } \ - inline Error NAME##le(const T0& o0) { return _emitter()->emit(Inst::kId##ID##le, o0); } \ - inline Error NAME##na(const T0& o0) { return _emitter()->emit(Inst::kId##ID##na, o0); } \ - inline Error NAME##nae(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nae, o0); } \ - inline Error NAME##nb(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nb, o0); } \ - inline Error NAME##nbe(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nbe, o0); } \ - inline Error NAME##nc(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nc, o0); } \ - inline Error NAME##ne(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ne, o0); } \ - inline Error NAME##ng(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ng, o0); } \ - inline Error NAME##nge(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nge, o0); } \ - inline Error NAME##nl(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nl, o0); } \ - inline Error NAME##nle(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nle, o0); } \ - inline Error NAME##no(const T0& o0) { return _emitter()->emit(Inst::kId##ID##no, o0); } \ - inline Error NAME##np(const T0& o0) { return _emitter()->emit(Inst::kId##ID##np, o0); } \ - inline Error NAME##ns(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ns, o0); } \ - inline Error NAME##nz(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nz, o0); } \ - inline Error NAME##o(const T0& o0) { return _emitter()->emit(Inst::kId##ID##o, o0); } \ - inline Error NAME##p(const T0& o0) { return _emitter()->emit(Inst::kId##ID##p, o0); } \ - inline Error NAME##pe(const T0& o0) { return _emitter()->emit(Inst::kId##ID##pe, o0); } \ - inline Error NAME##po(const T0& o0) { return _emitter()->emit(Inst::kId##ID##po, o0); } \ - inline Error NAME##s(const T0& o0) { return _emitter()->emit(Inst::kId##ID##s, o0); } \ - inline Error NAME##z(const T0& o0) { return _emitter()->emit(Inst::kId##ID##z, o0); } + inline Error NAME(uint32_t cc, const T0& o0) { return _emitter()->_emitI(CONV(cc), o0); } \ + inline Error NAME##a(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##a, o0); } \ + inline Error NAME##ae(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ae, o0); } \ + inline Error NAME##b(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##b, o0); } \ + inline Error NAME##be(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##be, o0); } \ + inline Error NAME##c(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##c, o0); } \ + inline Error NAME##e(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##e, o0); } \ + inline Error NAME##g(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##g, o0); } \ + inline Error NAME##ge(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ge, o0); } \ + inline Error NAME##l(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##l, o0); } \ + inline Error NAME##le(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##le, o0); } \ + inline Error NAME##na(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##na, o0); } \ + inline Error NAME##nae(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nae, o0); } \ + inline Error NAME##nb(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nb, o0); } \ + inline Error NAME##nbe(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nbe, o0); } \ + inline Error NAME##nc(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nc, o0); } \ + inline Error NAME##ne(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ne, o0); } \ + inline Error NAME##ng(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ng, o0); } \ + inline Error NAME##nge(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nge, o0); } \ + inline Error NAME##nl(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nl, o0); } \ + inline Error NAME##nle(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nle, o0); } \ + inline Error NAME##no(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##no, o0); } \ + inline Error NAME##np(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##np, o0); } \ + inline Error NAME##ns(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##ns, o0); } \ + inline Error NAME##nz(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##nz, o0); } \ + inline Error NAME##o(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##o, o0); } \ + inline Error NAME##p(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##p, o0); } \ + inline Error NAME##pe(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##pe, o0); } \ + inline Error NAME##po(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##po, o0); } \ + inline Error NAME##s(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##s, o0); } \ + inline Error NAME##z(const T0& o0) { return _emitter()->_emitI(Inst::kId##ID##z, o0); } #define ASMJIT_INST_2x(NAME, ID, T0, T1) \ - inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID, o0, o1); } + inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID, o0, o1); } #define ASMJIT_INST_2i(NAME, ID, T0, T1) \ - inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID, o0, o1); } \ - inline Error NAME(const T0& o0, int o1) { return _emitter()->emit(Inst::kId##ID, o0, Support::asInt(o1)); } \ - inline Error NAME(const T0& o0, unsigned int o1) { return _emitter()->emit(Inst::kId##ID, o0, Support::asInt(o1)); } \ - inline Error NAME(const T0& o0, int64_t o1) { return _emitter()->emit(Inst::kId##ID, o0, Support::asInt(o1)); } \ - inline Error NAME(const T0& o0, uint64_t o1) { return _emitter()->emit(Inst::kId##ID, o0, Support::asInt(o1)); } + inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID, o0, o1); } #define ASMJIT_INST_2c(NAME, ID, CONV, T0, T1) \ - inline Error NAME(uint32_t cc, const T0& o0, const T1& o1) { return _emitter()->emit(CONV(cc), o0, o1); } \ - inline Error NAME##a(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##a, o0, o1); } \ - inline Error NAME##ae(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ae, o0, o1); } \ - inline Error NAME##b(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##b, o0, o1); } \ - inline Error NAME##be(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##be, o0, o1); } \ - inline Error NAME##c(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##c, o0, o1); } \ - inline Error NAME##e(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##e, o0, o1); } \ - inline Error NAME##g(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##g, o0, o1); } \ - inline Error NAME##ge(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ge, o0, o1); } \ - inline Error NAME##l(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##l, o0, o1); } \ - inline Error NAME##le(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##le, o0, o1); } \ - inline Error NAME##na(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##na, o0, o1); } \ - inline Error NAME##nae(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nae, o0, o1); } \ - inline Error NAME##nb(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nb, o0, o1); } \ - inline Error NAME##nbe(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nbe, o0, o1); } \ - inline Error NAME##nc(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nc, o0, o1); } \ - inline Error NAME##ne(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ne, o0, o1); } \ - inline Error NAME##ng(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ng, o0, o1); } \ - inline Error NAME##nge(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nge, o0, o1); } \ - inline Error NAME##nl(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nl, o0, o1); } \ - inline Error NAME##nle(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nle, o0, o1); } \ - inline Error NAME##no(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##no, o0, o1); } \ - inline Error NAME##np(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##np, o0, o1); } \ - inline Error NAME##ns(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ns, o0, o1); } \ - inline Error NAME##nz(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nz, o0, o1); } \ - inline Error NAME##o(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##o, o0, o1); } \ - inline Error NAME##p(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##p, o0, o1); } \ - inline Error NAME##pe(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##pe, o0, o1); } \ - inline Error NAME##po(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##po, o0, o1); } \ - inline Error NAME##s(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##s, o0, o1); } \ - inline Error NAME##z(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##z, o0, o1); } + inline Error NAME(uint32_t cc, const T0& o0, const T1& o1) { return _emitter()->_emitI(CONV(cc), o0, o1); } \ + inline Error NAME##a(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##a, o0, o1); } \ + inline Error NAME##ae(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ae, o0, o1); } \ + inline Error NAME##b(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##b, o0, o1); } \ + inline Error NAME##be(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##be, o0, o1); } \ + inline Error NAME##c(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##c, o0, o1); } \ + inline Error NAME##e(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##e, o0, o1); } \ + inline Error NAME##g(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##g, o0, o1); } \ + inline Error NAME##ge(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ge, o0, o1); } \ + inline Error NAME##l(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##l, o0, o1); } \ + inline Error NAME##le(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##le, o0, o1); } \ + inline Error NAME##na(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##na, o0, o1); } \ + inline Error NAME##nae(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nae, o0, o1); } \ + inline Error NAME##nb(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nb, o0, o1); } \ + inline Error NAME##nbe(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nbe, o0, o1); } \ + inline Error NAME##nc(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nc, o0, o1); } \ + inline Error NAME##ne(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ne, o0, o1); } \ + inline Error NAME##ng(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ng, o0, o1); } \ + inline Error NAME##nge(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nge, o0, o1); } \ + inline Error NAME##nl(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nl, o0, o1); } \ + inline Error NAME##nle(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nle, o0, o1); } \ + inline Error NAME##no(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##no, o0, o1); } \ + inline Error NAME##np(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##np, o0, o1); } \ + inline Error NAME##ns(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##ns, o0, o1); } \ + inline Error NAME##nz(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##nz, o0, o1); } \ + inline Error NAME##o(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##o, o0, o1); } \ + inline Error NAME##p(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##p, o0, o1); } \ + inline Error NAME##pe(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##pe, o0, o1); } \ + inline Error NAME##po(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##po, o0, o1); } \ + inline Error NAME##s(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##s, o0, o1); } \ + inline Error NAME##z(const T0& o0, const T1& o1) { return _emitter()->_emitI(Inst::kId##ID##z, o0, o1); } #define ASMJIT_INST_3x(NAME, ID, T0, T1, T2) \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2); } + inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2); } #define ASMJIT_INST_3i(NAME, ID, T0, T1, T2) \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2); } \ - inline Error NAME(const T0& o0, const T1& o1, int o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, Support::asInt(o2)); } \ - inline Error NAME(const T0& o0, const T1& o1, unsigned int o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, Support::asInt(o2)); } \ - inline Error NAME(const T0& o0, const T1& o1, int64_t o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, Support::asInt(o2)); } \ - inline Error NAME(const T0& o0, const T1& o1, uint64_t o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, Support::asInt(o2)); } + inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2); } #define ASMJIT_INST_3ii(NAME, ID, T0, T1, T2) \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2); } \ - inline Error NAME(const T0& o0, int o1, int o2) { return _emitter()->emit(Inst::kId##ID, o0, Imm(o1), Support::asInt(o2)); } + inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2); } #define ASMJIT_INST_4x(NAME, ID, T0, T1, T2, T3) \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3); } + inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3); } #define ASMJIT_INST_4i(NAME, ID, T0, T1, T2, T3) \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3); } \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, int o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, Support::asInt(o3)); } \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, unsigned int o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, Support::asInt(o3)); } \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, int64_t o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, Support::asInt(o3)); } \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, uint64_t o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, Support::asInt(o3)); } + inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3); } #define ASMJIT_INST_4ii(NAME, ID, T0, T1, T2, T3) \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3); } \ - inline Error NAME(const T0& o0, const T1& o1, int o2, int o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, Imm(o2), Support::asInt(o3)); } + inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3); } #define ASMJIT_INST_5x(NAME, ID, T0, T1, T2, T3, T4) \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, o4); } + inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4); } #define ASMJIT_INST_5i(NAME, ID, T0, T1, T2, T3, T4) \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, o4); } \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, int o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, Support::asInt(o4)); } \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, unsigned int o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, Support::asInt(o4)); } \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, int64_t o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, Support::asInt(o4)); } \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, uint64_t o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, Support::asInt(o4)); } + inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4); } #define ASMJIT_INST_6x(NAME, ID, T0, T1, T2, T3, T4, T5) \ - inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4, const T5& o5) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, o4, o5); } + inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4, const T5& o5) { return _emitter()->_emitI(Inst::kId##ID, o0, o1, o2, o3, o4, o5); } //! \addtogroup asmjit_x86 //! \{ @@ -168,30 +146,32 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) // [asmjit::x86::EmitterExplicitT] // ============================================================================ +//! Emitter (X86 - explicit). template struct EmitterExplicitT { //! \cond + // These typedefs are used to describe implicit operands passed explicitly. - typedef Gp AL; - typedef Gp AH; - typedef Gp CL; - typedef Gp AX; - typedef Gp DX; - - typedef Gp EAX; - typedef Gp EBX; - typedef Gp ECX; - typedef Gp EDX; - - typedef Gp RAX; - typedef Gp RBX; - typedef Gp RCX; - typedef Gp RDX; - - typedef Gp ZAX; - typedef Gp ZBX; - typedef Gp ZCX; - typedef Gp ZDX; + typedef Gp Gp_AL; + typedef Gp Gp_AH; + typedef Gp Gp_CL; + typedef Gp Gp_AX; + typedef Gp Gp_DX; + + typedef Gp Gp_EAX; + typedef Gp Gp_EBX; + typedef Gp Gp_ECX; + typedef Gp Gp_EDX; + + typedef Gp Gp_RAX; + typedef Gp Gp_RBX; + typedef Gp Gp_RCX; + typedef Gp Gp_RDX; + + typedef Gp Gp_ZAX; + typedef Gp Gp_ZBX; + typedef Gp Gp_ZCX; + typedef Gp Gp_ZDX; typedef Mem DS_ZAX; // ds:[zax] typedef Mem DS_ZDI; // ds:[zdi] @@ -212,16 +192,16 @@ struct EmitterExplicitT { //! \{ //! Returns either GPD or GPQ register of the given `id` depending on the emitter's architecture. - inline Gp gpz(uint32_t id) const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), id); } + inline Gp gpz(uint32_t id) const noexcept { return Gp::fromSignatureAndId(_emitter()->_gpRegInfo.signature(), id); } - inline Gp zax() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdAx); } - inline Gp zcx() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdCx); } - inline Gp zdx() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdDx); } - inline Gp zbx() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdBx); } - inline Gp zsp() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdSp); } - inline Gp zbp() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdBp); } - inline Gp zsi() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdSi); } - inline Gp zdi() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdDi); } + inline Gp zax() const noexcept { return Gp::fromSignatureAndId(_emitter()->_gpRegInfo.signature(), Gp::kIdAx); } + inline Gp zcx() const noexcept { return Gp::fromSignatureAndId(_emitter()->_gpRegInfo.signature(), Gp::kIdCx); } + inline Gp zdx() const noexcept { return Gp::fromSignatureAndId(_emitter()->_gpRegInfo.signature(), Gp::kIdDx); } + inline Gp zbx() const noexcept { return Gp::fromSignatureAndId(_emitter()->_gpRegInfo.signature(), Gp::kIdBx); } + inline Gp zsp() const noexcept { return Gp::fromSignatureAndId(_emitter()->_gpRegInfo.signature(), Gp::kIdSp); } + inline Gp zbp() const noexcept { return Gp::fromSignatureAndId(_emitter()->_gpRegInfo.signature(), Gp::kIdBp); } + inline Gp zsi() const noexcept { return Gp::fromSignatureAndId(_emitter()->_gpRegInfo.signature(), Gp::kIdSi); } + inline Gp zdi() const noexcept { return Gp::fromSignatureAndId(_emitter()->_gpRegInfo.signature(), Gp::kIdDi); } //! \} @@ -244,58 +224,58 @@ struct EmitterExplicitT { //! Creates an `intptr_t` memory operand depending on the current architecture. inline Mem intptr_ptr(const Gp& base, int32_t offset = 0) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); + uint32_t nativeGpSize = _emitter()->registerSize(); return Mem(base, offset, nativeGpSize); } //! \overload inline Mem intptr_ptr(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); + uint32_t nativeGpSize = _emitter()->registerSize(); return Mem(base, index, shift, offset, nativeGpSize); } //! \overload inline Mem intptr_ptr(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); + uint32_t nativeGpSize = _emitter()->registerSize(); return Mem(base, index, shift, offset, nativeGpSize); } //! \overload inline Mem intptr_ptr(const Label& base, int32_t offset = 0) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); + uint32_t nativeGpSize = _emitter()->registerSize(); return Mem(base, offset, nativeGpSize); } //! \overload inline Mem intptr_ptr(const Label& base, const Gp& index, uint32_t shift, int32_t offset = 0) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); + uint32_t nativeGpSize = _emitter()->registerSize(); return Mem(base, index, shift, offset, nativeGpSize); } //! \overload inline Mem intptr_ptr(const Label& base, const Vec& index, uint32_t shift, int32_t offset = 0) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); + uint32_t nativeGpSize = _emitter()->registerSize(); return Mem(base, index, shift, offset, nativeGpSize); } //! \overload inline Mem intptr_ptr(const Rip& rip, int32_t offset = 0) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); + uint32_t nativeGpSize = _emitter()->registerSize(); return Mem(rip, offset, nativeGpSize); } //! \overload inline Mem intptr_ptr(uint64_t base) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); + uint32_t nativeGpSize = _emitter()->registerSize(); return Mem(base, nativeGpSize); } //! \overload inline Mem intptr_ptr(uint64_t base, const Gp& index, uint32_t shift = 0) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); + uint32_t nativeGpSize = _emitter()->registerSize(); return Mem(base, index, shift, nativeGpSize); } //! \overload inline Mem intptr_ptr_abs(uint64_t base) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); - return Mem(base, nativeGpSize, BaseMem::kSignatureMemAbs); + uint32_t nativeGpSize = _emitter()->registerSize(); + return Mem(base, nativeGpSize, Mem::kSignatureMemAbs); } //! \overload inline Mem intptr_ptr_abs(uint64_t base, const Gp& index, uint32_t shift = 0) const noexcept { - uint32_t nativeGpSize = _emitter()->gpSize(); - return Mem(base, index, shift, nativeGpSize, BaseMem::kSignatureMemAbs); + uint32_t nativeGpSize = _emitter()->registerSize(); + return Mem(base, index, shift, nativeGpSize, Mem::kSignatureMemAbs); } //! \} @@ -303,46 +283,55 @@ struct EmitterExplicitT { //! \name Embed //! \{ - //! Adds 8-bit integer data to the CodeBuffer. - inline Error db(uint8_t x) { return _emitter()->embed(&x, 1); } - //! Adds 16-bit integer data to the CodeBuffer. - inline Error dw(uint16_t x) { return _emitter()->embed(&x, 2); } - //! Adds 32-bit integer data to the CodeBuffer. - inline Error dd(uint32_t x) { return _emitter()->embed(&x, 4); } - //! Adds 64-bit integer data to the CodeBuffer. - inline Error dq(uint64_t x) { return _emitter()->embed(&x, 8); } - - //! Adds 8-bit integer data to the CodeBuffer. + //! Embeds 8-bit integer data. + inline Error db(uint8_t x, size_t repeatCount = 1) { return _emitter()->embedUInt8(x, repeatCount); } + //! Embeds 16-bit integer data. + inline Error dw(uint16_t x, size_t repeatCount = 1) { return _emitter()->embedUInt16(x, repeatCount); } + //! Embeds 32-bit integer data. + inline Error dd(uint32_t x, size_t repeatCount = 1) { return _emitter()->embedUInt32(x, repeatCount); } + //! Embeds 64-bit integer data. + inline Error dq(uint64_t x, size_t repeatCount = 1) { return _emitter()->embedUInt64(x, repeatCount); } + +#ifndef ASMJIT_NO_DEPRECATED + ASMJIT_DEPRECATED("Use embedInt8() instead of dint8()") inline Error dint8(int8_t x) { return _emitter()->embed(&x, sizeof(int8_t)); } - //! Adds 8-bit integer data to the CodeBuffer. + + ASMJIT_DEPRECATED("Use embedUInt8() instead of duint8()") inline Error duint8(uint8_t x) { return _emitter()->embed(&x, sizeof(uint8_t)); } - //! Adds 16-bit integer data to the CodeBuffer. + ASMJIT_DEPRECATED("Use embedInt16() instead of dint16()") inline Error dint16(int16_t x) { return _emitter()->embed(&x, sizeof(int16_t)); } - //! Adds 16-bit integer data to the CodeBuffer. + + ASMJIT_DEPRECATED("Use embedUInt16() instead of duint16()") inline Error duint16(uint16_t x) { return _emitter()->embed(&x, sizeof(uint16_t)); } - //! Adds 32-bit integer data to the CodeBuffer. + ASMJIT_DEPRECATED("Use embedInt32() instead of dint32()") inline Error dint32(int32_t x) { return _emitter()->embed(&x, sizeof(int32_t)); } - //! Adds 32-bit integer data to the CodeBuffer. + + ASMJIT_DEPRECATED("Use embedUInt32() instead of duint32()") inline Error duint32(uint32_t x) { return _emitter()->embed(&x, sizeof(uint32_t)); } - //! Adds 64-bit integer data to the CodeBuffer. + ASMJIT_DEPRECATED("Use embedInt64() instead of dint64()") inline Error dint64(int64_t x) { return _emitter()->embed(&x, sizeof(int64_t)); } - //! Adds 64-bit integer data to the CodeBuffer. + + ASMJIT_DEPRECATED("Use embedUInt64() instead of duint64()") inline Error duint64(uint64_t x) { return _emitter()->embed(&x, sizeof(uint64_t)); } - //! Adds float data to the CodeBuffer. + ASMJIT_DEPRECATED("Use embedFloat() instead of float()") inline Error dfloat(float x) { return _emitter()->embed(&x, sizeof(float)); } - //! Adds double data to the CodeBuffer. + + ASMJIT_DEPRECATED("Use embedDouble() instead of ddouble()") inline Error ddouble(double x) { return _emitter()->embed(&x, sizeof(double)); } - //! Adds MMX data to the CodeBuffer. - inline Error dmm(const Data64& x) { return _emitter()->embed(&x, sizeof(Data64)); } - //! Adds XMM data to the CodeBuffer. - inline Error dxmm(const Data128& x) { return _emitter()->embed(&x, sizeof(Data128)); } - //! Adds YMM data to the CodeBuffer. - inline Error dymm(const Data256& x) { return _emitter()->embed(&x, sizeof(Data256)); } + ASMJIT_DEPRECATED("Use embed[U]IntN() or embed[Float|Double]() instead of dmm()") + inline Error dmm(const Data64& x) { return _emitter()->embed(&x, 8); } + + ASMJIT_DEPRECATED("Use embed[U]IntN() or embed[Float|Double]() instead of dxmm()") + inline Error dxmm(const Data128& x) { return _emitter()->embed(&x, 16); } + + ASMJIT_DEPRECATED("Use embed[U]IntN() or embed[Float|Double]() instead of dymm()") + inline Error dymm(const Data256& x) { return _emitter()->embed(&x, 32); } +#endif // !ASMJIT_NO_DEPRECATED //! Adds data in a given structure instance to the CodeBuffer. template @@ -487,7 +476,7 @@ struct EmitterExplicitT { //! \} - //! \name Base Instructions & GP Extensions + //! \name Core Instructions //! \{ ASMJIT_INST_2x(adc, Adc, Gp, Gp) // ANY @@ -505,8 +494,6 @@ struct EmitterExplicitT { ASMJIT_INST_2i(and_, And, Gp, Imm) // ANY ASMJIT_INST_2x(and_, And, Mem, Gp) // ANY ASMJIT_INST_2i(and_, And, Mem, Imm) // ANY - ASMJIT_INST_2x(arpl, Arpl, Gp, Gp) // X86 - ASMJIT_INST_2x(arpl, Arpl, Mem, Gp) // X86 ASMJIT_INST_2x(bound, Bound, Gp, Mem) // X86 ASMJIT_INST_2x(bsf, Bsf, Gp, Gp) // ANY ASMJIT_INST_2x(bsf, Bsf, Gp, Mem) // ANY @@ -529,21 +516,16 @@ struct EmitterExplicitT { ASMJIT_INST_2i(bts, Bts, Gp, Imm) // ANY ASMJIT_INST_2x(bts, Bts, Mem, Gp) // ANY ASMJIT_INST_2i(bts, Bts, Mem, Imm) // ANY - ASMJIT_INST_1x(cbw, Cbw, AX) // ANY [EXPLICIT] AX <- Sign Extend AL - ASMJIT_INST_2x(cdq, Cdq, EDX, EAX) // ANY [EXPLICIT] EDX:EAX <- Sign Extend EAX - ASMJIT_INST_1x(cdqe, Cdqe, EAX) // X64 [EXPLICIT] RAX <- Sign Extend EAX - ASMJIT_INST_2x(cqo, Cqo, RDX, RAX) // X64 [EXPLICIT] RDX:RAX <- Sign Extend RAX - ASMJIT_INST_2x(cwd, Cwd, DX, AX) // ANY [EXPLICIT] DX:AX <- Sign Extend AX - ASMJIT_INST_1x(cwde, Cwde, EAX) // ANY [EXPLICIT] EAX <- Sign Extend AX + ASMJIT_INST_1x(cbw, Cbw, Gp_AX) // ANY [EXPLICIT] AX <- Sign Extend AL + ASMJIT_INST_2x(cdq, Cdq, Gp_EDX, Gp_EAX) // ANY [EXPLICIT] EDX:EAX <- Sign Extend EAX + ASMJIT_INST_1x(cdqe, Cdqe, Gp_EAX) // X64 [EXPLICIT] RAX <- Sign Extend EAX + ASMJIT_INST_2x(cqo, Cqo, Gp_RDX, Gp_RAX) // X64 [EXPLICIT] RDX:RAX <- Sign Extend RAX + ASMJIT_INST_2x(cwd, Cwd, Gp_DX, Gp_AX) // ANY [EXPLICIT] DX:AX <- Sign Extend AX + ASMJIT_INST_1x(cwde, Cwde, Gp_EAX) // ANY [EXPLICIT] EAX <- Sign Extend AX ASMJIT_INST_1x(call, Call, Gp) // ANY ASMJIT_INST_1x(call, Call, Mem) // ANY ASMJIT_INST_1x(call, Call, Label) // ANY ASMJIT_INST_1i(call, Call, Imm) // ANY - ASMJIT_INST_0x(clc, Clc) // ANY - ASMJIT_INST_0x(cld, Cld) // ANY - ASMJIT_INST_0x(cli, Cli) // ANY - ASMJIT_INST_0x(clts, Clts) // ANY - ASMJIT_INST_0x(cmc, Cmc) // ANY ASMJIT_INST_2c(cmov, Cmov, Condition::toCmovcc, Gp, Gp) // CMOV ASMJIT_INST_2c(cmov, Cmov, Condition::toCmovcc, Gp, Mem) // CMOV ASMJIT_INST_2x(cmp, Cmp, Gp, Gp) // ANY @@ -551,89 +533,52 @@ struct EmitterExplicitT { ASMJIT_INST_2i(cmp, Cmp, Gp, Imm) // ANY ASMJIT_INST_2x(cmp, Cmp, Mem, Gp) // ANY ASMJIT_INST_2i(cmp, Cmp, Mem, Imm) // ANY - ASMJIT_INST_2x(cmps, Cmps, DS_ZSI, ES_ZDI) // ANY [EXPLICIT] - ASMJIT_INST_3x(cmpxchg, Cmpxchg, Gp, Gp, ZAX) // I486 [EXPLICIT] - ASMJIT_INST_3x(cmpxchg, Cmpxchg, Mem, Gp, ZAX) // I486 [EXPLICIT] - ASMJIT_INST_5x(cmpxchg16b, Cmpxchg16b, Mem, RDX, RAX, RCX, RBX); // CMPXCHG16B[EXPLICIT] m == EDX:EAX ? m <- ECX:EBX - ASMJIT_INST_5x(cmpxchg8b, Cmpxchg8b, Mem, EDX, EAX, ECX, EBX); // CMPXCHG8B [EXPLICIT] m == RDX:RAX ? m <- RCX:RBX - ASMJIT_INST_4x(cpuid, Cpuid, EAX, EBX, ECX, EDX) // I486 [EXPLICIT] EAX:EBX:ECX:EDX <- CPUID[EAX:ECX] - ASMJIT_INST_1x(daa, Daa, Gp) // X86 [EXPLICIT] - ASMJIT_INST_1x(das, Das, Gp) // X86 [EXPLICIT] + ASMJIT_INST_2x(cmps, Cmps, DS_ZSI, ES_ZDI) // ANY [EXPLICIT] + ASMJIT_INST_3x(cmpxchg, Cmpxchg, Gp, Gp, Gp_ZAX) // I486 [EXPLICIT] + ASMJIT_INST_3x(cmpxchg, Cmpxchg, Mem, Gp, Gp_ZAX) // I486 [EXPLICIT] + ASMJIT_INST_5x(cmpxchg16b, Cmpxchg16b, Mem, Gp_RDX, Gp_RAX, Gp_RCX, Gp_RBX); // CMPXCHG16B [EXPLICIT] m == EDX:EAX ? m <- ECX:EBX + ASMJIT_INST_5x(cmpxchg8b, Cmpxchg8b, Mem, Gp_EDX, Gp_EAX, Gp_ECX, Gp_EBX); // CMPXCHG8B [EXPLICIT] m == RDX:RAX ? m <- RCX:RBX ASMJIT_INST_1x(dec, Dec, Gp) // ANY ASMJIT_INST_1x(dec, Dec, Mem) // ANY - ASMJIT_INST_2x(div, Div, Gp, Gp) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / r8 - ASMJIT_INST_2x(div, Div, Gp, Mem) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / m8 - ASMJIT_INST_3x(div, Div, Gp, Gp, Gp) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64 - ASMJIT_INST_3x(div, Div, Gp, Gp, Mem) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64 - ASMJIT_INST_0x(emms, Emms) // MMX - ASMJIT_INST_2x(enter, Enter, Imm, Imm) // ANY - ASMJIT_INST_0x(hlt, Hlt) // ANY - ASMJIT_INST_2x(idiv, Idiv, Gp, Gp) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / r8 - ASMJIT_INST_2x(idiv, Idiv, Gp, Mem) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / m8 - ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Gp) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64 - ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Mem) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64 - ASMJIT_INST_2x(imul, Imul, Gp, Gp) // ANY [EXPLICIT] AX <- AL * r8 | ra <- ra * rb - ASMJIT_INST_2x(imul, Imul, Gp, Mem) // ANY [EXPLICIT] AX <- AL * m8 | ra <- ra * m16|m32|m64 + ASMJIT_INST_2x(div, Div, Gp, Gp) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / r8 + ASMJIT_INST_2x(div, Div, Gp, Mem) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / m8 + ASMJIT_INST_3x(div, Div, Gp, Gp, Gp) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64 + ASMJIT_INST_3x(div, Div, Gp, Gp, Mem) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64 + ASMJIT_INST_2x(idiv, Idiv, Gp, Gp) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / r8 + ASMJIT_INST_2x(idiv, Idiv, Gp, Mem) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / m8 + ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Gp) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64 + ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Mem) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64 + ASMJIT_INST_2x(imul, Imul, Gp, Gp) // ANY [EXPLICIT] AX <- AL * r8 | ra <- ra * rb + ASMJIT_INST_2x(imul, Imul, Gp, Mem) // ANY [EXPLICIT] AX <- AL * m8 | ra <- ra * m16|m32|m64 ASMJIT_INST_2i(imul, Imul, Gp, Imm) // ANY ASMJIT_INST_3i(imul, Imul, Gp, Gp, Imm) // ANY ASMJIT_INST_3i(imul, Imul, Gp, Mem, Imm) // ANY - ASMJIT_INST_3x(imul, Imul, Gp, Gp, Gp) // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64 - ASMJIT_INST_3x(imul, Imul, Gp, Gp, Mem) // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64 - ASMJIT_INST_2i(in, In, ZAX, Imm) // ANY - ASMJIT_INST_2x(in, In, ZAX, DX) // ANY + ASMJIT_INST_3x(imul, Imul, Gp, Gp, Gp) // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64 + ASMJIT_INST_3x(imul, Imul, Gp, Gp, Mem) // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64 ASMJIT_INST_1x(inc, Inc, Gp) // ANY ASMJIT_INST_1x(inc, Inc, Mem) // ANY - ASMJIT_INST_2x(ins, Ins, ES_ZDI, DX) // ANY - ASMJIT_INST_1i(int_, Int, Imm) // ANY - ASMJIT_INST_0x(int3, Int3) // ANY - ASMJIT_INST_0x(into, Into) // ANY - ASMJIT_INST_0x(invd, Invd) // ANY - ASMJIT_INST_1x(invlpg, Invlpg, Mem) // ANY - ASMJIT_INST_2x(invpcid, Invpcid, Gp, Mem) // ANY ASMJIT_INST_1c(j, J, Condition::toJcc, Label) // ANY ASMJIT_INST_1c(j, J, Condition::toJcc, Imm) // ANY ASMJIT_INST_1c(j, J, Condition::toJcc, uint64_t) // ANY - ASMJIT_INST_2x(jecxz, Jecxz, Gp, Label) // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero. - ASMJIT_INST_2x(jecxz, Jecxz, Gp, Imm) // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero. - ASMJIT_INST_2x(jecxz, Jecxz, Gp, uint64_t) // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero. + ASMJIT_INST_2x(jecxz, Jecxz, Gp, Label) // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero. + ASMJIT_INST_2x(jecxz, Jecxz, Gp, Imm) // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero. + ASMJIT_INST_2x(jecxz, Jecxz, Gp, uint64_t) // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero. ASMJIT_INST_1x(jmp, Jmp, Gp) // ANY ASMJIT_INST_1x(jmp, Jmp, Mem) // ANY ASMJIT_INST_1x(jmp, Jmp, Label) // ANY ASMJIT_INST_1x(jmp, Jmp, Imm) // ANY ASMJIT_INST_1x(jmp, Jmp, uint64_t) // ANY - ASMJIT_INST_1x(lahf, Lahf, AH) // LAHFSAHF [EXPLICIT] AH <- EFL - ASMJIT_INST_2x(lar, Lar, Gp, Gp) // ANY - ASMJIT_INST_2x(lar, Lar, Gp, Mem) // ANY - ASMJIT_INST_1x(ldmxcsr, Ldmxcsr, Mem) // SSE - ASMJIT_INST_2x(lds, Lds, Gp, Mem) // X86 ASMJIT_INST_2x(lea, Lea, Gp, Mem) // ANY - ASMJIT_INST_0x(leave, Leave) // ANY - ASMJIT_INST_2x(les, Les, Gp, Mem) // X86 - ASMJIT_INST_0x(lfence, Lfence) // SSE2 - ASMJIT_INST_2x(lfs, Lfs, Gp, Mem) // ANY - ASMJIT_INST_1x(lgdt, Lgdt, Mem) // ANY - ASMJIT_INST_2x(lgs, Lgs, Gp, Mem) // ANY - ASMJIT_INST_1x(lidt, Lidt, Mem) // ANY - ASMJIT_INST_1x(lldt, Lldt, Gp) // ANY - ASMJIT_INST_1x(lldt, Lldt, Mem) // ANY - ASMJIT_INST_1x(lmsw, Lmsw, Gp) // ANY - ASMJIT_INST_1x(lmsw, Lmsw, Mem) // ANY - ASMJIT_INST_2x(lods, Lods, ZAX, DS_ZSI) // ANY [EXPLICIT] - ASMJIT_INST_2x(loop, Loop, ZCX, Label) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0. - ASMJIT_INST_2x(loop, Loop, ZCX, Imm) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0. - ASMJIT_INST_2x(loop, Loop, ZCX, uint64_t) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0. - ASMJIT_INST_2x(loope, Loope, ZCX, Label) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1. - ASMJIT_INST_2x(loope, Loope, ZCX, Imm) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1. - ASMJIT_INST_2x(loope, Loope, ZCX, uint64_t) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1. - ASMJIT_INST_2x(loopne, Loopne, ZCX, Label) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0. - ASMJIT_INST_2x(loopne, Loopne, ZCX, Imm) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0. - ASMJIT_INST_2x(loopne, Loopne, ZCX, uint64_t) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0. - ASMJIT_INST_2x(lsl, Lsl, Gp, Gp) // ANY - ASMJIT_INST_2x(lsl, Lsl, Gp, Mem) // ANY - ASMJIT_INST_2x(lss, Lss, Gp, Mem) // ANY - ASMJIT_INST_1x(ltr, Ltr, Gp) // ANY - ASMJIT_INST_1x(ltr, Ltr, Mem) // ANY - ASMJIT_INST_0x(mfence, Mfence) // SSE2 + ASMJIT_INST_2x(lods, Lods, Gp_ZAX, DS_ZSI) // ANY [EXPLICIT] + ASMJIT_INST_2x(loop, Loop, Gp_ZCX, Label) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0. + ASMJIT_INST_2x(loop, Loop, Gp_ZCX, Imm) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0. + ASMJIT_INST_2x(loop, Loop, Gp_ZCX, uint64_t) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0. + ASMJIT_INST_2x(loope, Loope, Gp_ZCX, Label) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1. + ASMJIT_INST_2x(loope, Loope, Gp_ZCX, Imm) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1. + ASMJIT_INST_2x(loope, Loope, Gp_ZCX, uint64_t) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1. + ASMJIT_INST_2x(loopne, Loopne, Gp_ZCX, Label) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0. + ASMJIT_INST_2x(loopne, Loopne, Gp_ZCX, Imm) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0. + ASMJIT_INST_2x(loopne, Loopne, Gp_ZCX, uint64_t) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0. ASMJIT_INST_2x(mov, Mov, Gp, Gp) // ANY ASMJIT_INST_2x(mov, Mov, Gp, Mem) // ANY ASMJIT_INST_2i(mov, Mov, Gp, Imm) // ANY @@ -648,22 +593,24 @@ struct EmitterExplicitT { ASMJIT_INST_2x(mov, Mov, SReg, Gp) // ANY ASMJIT_INST_2x(mov, Mov, SReg, Mem) // ANY ASMJIT_INST_2x(movnti, Movnti, Mem, Gp) // SSE2 - ASMJIT_INST_2x(movs, Movs, ES_ZDI, DS_ZSI) // ANY [EXPLICIT] + ASMJIT_INST_2x(movs, Movs, ES_ZDI, DS_ZSI) // ANY [EXPLICIT] ASMJIT_INST_2x(movsx, Movsx, Gp, Gp) // ANY ASMJIT_INST_2x(movsx, Movsx, Gp, Mem) // ANY ASMJIT_INST_2x(movsxd, Movsxd, Gp, Gp) // X64 ASMJIT_INST_2x(movsxd, Movsxd, Gp, Mem) // X64 ASMJIT_INST_2x(movzx, Movzx, Gp, Gp) // ANY ASMJIT_INST_2x(movzx, Movzx, Gp, Mem) // ANY - ASMJIT_INST_2x(mul, Mul, AX, Gp) // ANY [EXPLICIT] AX <- AL * r8 - ASMJIT_INST_2x(mul, Mul, AX, Mem) // ANY [EXPLICIT] AX <- AL * m8 - ASMJIT_INST_3x(mul, Mul, ZDX, ZAX, Gp) // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64 - ASMJIT_INST_3x(mul, Mul, ZDX, ZAX, Mem) // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64 + ASMJIT_INST_2x(mul, Mul, Gp_AX, Gp) // ANY [EXPLICIT] AX <- AL * r8 + ASMJIT_INST_2x(mul, Mul, Gp_AX, Mem) // ANY [EXPLICIT] AX <- AL * m8 + ASMJIT_INST_3x(mul, Mul, Gp_ZDX, Gp_ZAX, Gp) // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64 + ASMJIT_INST_3x(mul, Mul, Gp_ZDX, Gp_ZAX, Mem) // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64 ASMJIT_INST_1x(neg, Neg, Gp) // ANY ASMJIT_INST_1x(neg, Neg, Mem) // ANY ASMJIT_INST_0x(nop, Nop) // ANY ASMJIT_INST_1x(nop, Nop, Gp) // ANY ASMJIT_INST_1x(nop, Nop, Mem) // ANY + ASMJIT_INST_2x(nop, Nop, Gp, Gp) // ANY + ASMJIT_INST_2x(nop, Nop, Mem, Gp) // ANY ASMJIT_INST_1x(not_, Not, Gp) // ANY ASMJIT_INST_1x(not_, Not, Mem) // ANY ASMJIT_INST_2x(or_, Or, Gp, Gp) // ANY @@ -671,10 +618,6 @@ struct EmitterExplicitT { ASMJIT_INST_2i(or_, Or, Gp, Imm) // ANY ASMJIT_INST_2x(or_, Or, Mem, Gp) // ANY ASMJIT_INST_2i(or_, Or, Mem, Imm) // ANY - ASMJIT_INST_2x(out, Out, Imm, ZAX) // ANY - ASMJIT_INST_2i(out, Out, DX, ZAX) // ANY - ASMJIT_INST_2i(outs, Outs, DX, DS_ZSI) // ANY - ASMJIT_INST_0x(pause, Pause) // SSE2 ASMJIT_INST_1x(pop, Pop, Gp) // ANY ASMJIT_INST_1x(pop, Pop, Mem) // ANY ASMJIT_INST_1x(pop, Pop, SReg); // ANY @@ -683,13 +626,6 @@ struct EmitterExplicitT { ASMJIT_INST_0x(popf, Popf) // ANY ASMJIT_INST_0x(popfd, Popfd) // X86 ASMJIT_INST_0x(popfq, Popfq) // X64 - ASMJIT_INST_1x(prefetch, Prefetch, Mem) // 3DNOW - ASMJIT_INST_1x(prefetchnta, Prefetchnta, Mem) // SSE - ASMJIT_INST_1x(prefetcht0, Prefetcht0, Mem) // SSE - ASMJIT_INST_1x(prefetcht1, Prefetcht1, Mem) // SSE - ASMJIT_INST_1x(prefetcht2, Prefetcht2, Mem) // SSE - ASMJIT_INST_1x(prefetchw, Prefetchw, Mem) // PREFETCHW - ASMJIT_INST_1x(prefetchwt1, Prefetchwt1, Mem) // PREFETCHW1 ASMJIT_INST_1x(push, Push, Gp) // ANY ASMJIT_INST_1x(push, Push, Mem) // ANY ASMJIT_INST_1x(push, Push, SReg) // ANY @@ -699,90 +635,69 @@ struct EmitterExplicitT { ASMJIT_INST_0x(pushf, Pushf) // ANY ASMJIT_INST_0x(pushfd, Pushfd) // X86 ASMJIT_INST_0x(pushfq, Pushfq) // X64 - ASMJIT_INST_2x(rcl, Rcl, Gp, CL) // ANY - ASMJIT_INST_2x(rcl, Rcl, Mem, CL) // ANY + ASMJIT_INST_2x(rcl, Rcl, Gp, Gp_CL) // ANY + ASMJIT_INST_2x(rcl, Rcl, Mem, Gp_CL) // ANY ASMJIT_INST_2i(rcl, Rcl, Gp, Imm) // ANY ASMJIT_INST_2i(rcl, Rcl, Mem, Imm) // ANY - ASMJIT_INST_2x(rcr, Rcr, Gp, CL) // ANY - ASMJIT_INST_2x(rcr, Rcr, Mem, CL) // ANY + ASMJIT_INST_2x(rcr, Rcr, Gp, Gp_CL) // ANY + ASMJIT_INST_2x(rcr, Rcr, Mem, Gp_CL) // ANY ASMJIT_INST_2i(rcr, Rcr, Gp, Imm) // ANY ASMJIT_INST_2i(rcr, Rcr, Mem, Imm) // ANY - ASMJIT_INST_3x(rdmsr, Rdmsr, EDX, EAX, ECX) // MSR [EXPLICIT] RDX:EAX <- MSR[ECX] - ASMJIT_INST_3x(rdpmc, Rdpmc, EDX, EAX, ECX) // ANY [EXPLICIT] RDX:EAX <- PMC[ECX] - ASMJIT_INST_2x(rdtsc, Rdtsc, EDX, EAX) // RDTSC [EXPLICIT] EDX:EAX <- Counter - ASMJIT_INST_3x(rdtscp, Rdtscp, EDX, EAX, ECX) // RDTSCP [EXPLICIT] EDX:EAX:EXC <- Counter - ASMJIT_INST_2x(rol, Rol, Gp, CL) // ANY - ASMJIT_INST_2x(rol, Rol, Mem, CL) // ANY + ASMJIT_INST_2x(rol, Rol, Gp, Gp_CL) // ANY + ASMJIT_INST_2x(rol, Rol, Mem, Gp_CL) // ANY ASMJIT_INST_2i(rol, Rol, Gp, Imm) // ANY ASMJIT_INST_2i(rol, Rol, Mem, Imm) // ANY - ASMJIT_INST_2x(ror, Ror, Gp, CL) // ANY - ASMJIT_INST_2x(ror, Ror, Mem, CL) // ANY + ASMJIT_INST_2x(ror, Ror, Gp, Gp_CL) // ANY + ASMJIT_INST_2x(ror, Ror, Mem, Gp_CL) // ANY ASMJIT_INST_2i(ror, Ror, Gp, Imm) // ANY ASMJIT_INST_2i(ror, Ror, Mem, Imm) // ANY - ASMJIT_INST_0x(rsm, Rsm) // X86 ASMJIT_INST_2x(sbb, Sbb, Gp, Gp) // ANY ASMJIT_INST_2x(sbb, Sbb, Gp, Mem) // ANY ASMJIT_INST_2i(sbb, Sbb, Gp, Imm) // ANY ASMJIT_INST_2x(sbb, Sbb, Mem, Gp) // ANY ASMJIT_INST_2i(sbb, Sbb, Mem, Imm) // ANY - ASMJIT_INST_1x(sahf, Sahf, AH) // LAHFSAHF [EXPLICIT] EFL <- AH - ASMJIT_INST_2x(sal, Sal, Gp, CL) // ANY - ASMJIT_INST_2x(sal, Sal, Mem, CL) // ANY + ASMJIT_INST_2x(sal, Sal, Gp, Gp_CL) // ANY + ASMJIT_INST_2x(sal, Sal, Mem, Gp_CL) // ANY ASMJIT_INST_2i(sal, Sal, Gp, Imm) // ANY ASMJIT_INST_2i(sal, Sal, Mem, Imm) // ANY - ASMJIT_INST_2x(sar, Sar, Gp, CL) // ANY - ASMJIT_INST_2x(sar, Sar, Mem, CL) // ANY + ASMJIT_INST_2x(sar, Sar, Gp, Gp_CL) // ANY + ASMJIT_INST_2x(sar, Sar, Mem, Gp_CL) // ANY ASMJIT_INST_2i(sar, Sar, Gp, Imm) // ANY ASMJIT_INST_2i(sar, Sar, Mem, Imm) // ANY - ASMJIT_INST_2x(scas, Scas, ZAX, ES_ZDI) // ANY [EXPLICIT] + ASMJIT_INST_2x(scas, Scas, Gp_ZAX, ES_ZDI) // ANY [EXPLICIT] ASMJIT_INST_1c(set, Set, Condition::toSetcc, Gp) // ANY ASMJIT_INST_1c(set, Set, Condition::toSetcc, Mem) // ANY - ASMJIT_INST_0x(sfence, Sfence) // SSE - ASMJIT_INST_1x(sgdt, Sgdt, Mem) // ANY - ASMJIT_INST_2x(shl, Shl, Gp, CL) // ANY - ASMJIT_INST_2x(shl, Shl, Mem, CL) // ANY + ASMJIT_INST_2x(shl, Shl, Gp, Gp_CL) // ANY + ASMJIT_INST_2x(shl, Shl, Mem, Gp_CL) // ANY ASMJIT_INST_2i(shl, Shl, Gp, Imm) // ANY ASMJIT_INST_2i(shl, Shl, Mem, Imm) // ANY - ASMJIT_INST_2x(shr, Shr, Gp, CL) // ANY - ASMJIT_INST_2x(shr, Shr, Mem, CL) // ANY + ASMJIT_INST_2x(shr, Shr, Gp, Gp_CL) // ANY + ASMJIT_INST_2x(shr, Shr, Mem, Gp_CL) // ANY ASMJIT_INST_2i(shr, Shr, Gp, Imm) // ANY ASMJIT_INST_2i(shr, Shr, Mem, Imm) // ANY - ASMJIT_INST_3x(shld, Shld, Gp, Gp, CL) // ANY - ASMJIT_INST_3x(shld, Shld, Mem, Gp, CL) // ANY + ASMJIT_INST_3x(shld, Shld, Gp, Gp, Gp_CL) // ANY + ASMJIT_INST_3x(shld, Shld, Mem, Gp, Gp_CL) // ANY ASMJIT_INST_3i(shld, Shld, Gp, Gp, Imm) // ANY ASMJIT_INST_3i(shld, Shld, Mem, Gp, Imm) // ANY - ASMJIT_INST_3x(shrd, Shrd, Gp, Gp, CL) // ANY - ASMJIT_INST_3x(shrd, Shrd, Mem, Gp, CL) // ANY + ASMJIT_INST_3x(shrd, Shrd, Gp, Gp, Gp_CL) // ANY + ASMJIT_INST_3x(shrd, Shrd, Mem, Gp, Gp_CL) // ANY ASMJIT_INST_3i(shrd, Shrd, Gp, Gp, Imm) // ANY ASMJIT_INST_3i(shrd, Shrd, Mem, Gp, Imm) // ANY - ASMJIT_INST_1x(sidt, Sidt, Mem) // ANY - ASMJIT_INST_1x(sldt, Sldt, Gp) // ANY - ASMJIT_INST_1x(sldt, Sldt, Mem) // ANY - ASMJIT_INST_1x(smsw, Smsw, Gp) // ANY - ASMJIT_INST_1x(smsw, Smsw, Mem) // ANY - ASMJIT_INST_0x(stc, Stc) // ANY - ASMJIT_INST_0x(std, Std) // ANY - ASMJIT_INST_0x(sti, Sti) // ANY - ASMJIT_INST_1x(stmxcsr, Stmxcsr, Mem) // SSE - ASMJIT_INST_2x(stos, Stos, ES_ZDI, ZAX) // ANY [EXPLICIT] - ASMJIT_INST_1x(str, Str, Gp) // ANY - ASMJIT_INST_1x(str, Str, Mem) // ANY + ASMJIT_INST_2x(stos, Stos, ES_ZDI, Gp_ZAX) // ANY [EXPLICIT] ASMJIT_INST_2x(sub, Sub, Gp, Gp) // ANY ASMJIT_INST_2x(sub, Sub, Gp, Mem) // ANY ASMJIT_INST_2i(sub, Sub, Gp, Imm) // ANY ASMJIT_INST_2x(sub, Sub, Mem, Gp) // ANY ASMJIT_INST_2i(sub, Sub, Mem, Imm) // ANY - ASMJIT_INST_0x(swapgs, Swapgs) // X64 ASMJIT_INST_2x(test, Test, Gp, Gp) // ANY ASMJIT_INST_2i(test, Test, Gp, Imm) // ANY ASMJIT_INST_2x(test, Test, Mem, Gp) // ANY ASMJIT_INST_2i(test, Test, Mem, Imm) // ANY + ASMJIT_INST_1x(ud0, Ud0, Reg) // ANY + ASMJIT_INST_1x(ud0, Ud0, Mem) // ANY + ASMJIT_INST_1x(ud1, Ud1, Reg) // ANY + ASMJIT_INST_1x(ud1, Ud1, Mem) // ANY ASMJIT_INST_0x(ud2, Ud2) // ANY - ASMJIT_INST_1x(verr, Verr, Gp) // ANY - ASMJIT_INST_1x(verr, Verr, Mem) // ANY - ASMJIT_INST_1x(verw, Verw, Gp) // ANY - ASMJIT_INST_1x(verw, Verw, Mem) // ANY - ASMJIT_INST_3x(wrmsr, Wrmsr, EDX, EAX, ECX) // MSR [EXPLICIT] RDX:EAX -> MSR[ECX] ASMJIT_INST_2x(xadd, Xadd, Gp, Gp) // ANY ASMJIT_INST_2x(xadd, Xadd, Mem, Gp) // ANY ASMJIT_INST_2x(xchg, Xchg, Gp, Gp) // ANY @@ -796,6 +711,60 @@ struct EmitterExplicitT { //! \} + //! \name Deprecated 32-bit Instructions + //! \{ + + ASMJIT_INST_1x(aaa, Aaa, Gp) // X86 [EXPLICIT] + ASMJIT_INST_2i(aad, Aad, Gp, Imm) // X86 [EXPLICIT] + ASMJIT_INST_2i(aam, Aam, Gp, Imm) // X86 [EXPLICIT] + ASMJIT_INST_1x(aas, Aas, Gp) // X86 [EXPLICIT] + ASMJIT_INST_1x(daa, Daa, Gp) // X86 [EXPLICIT] + ASMJIT_INST_1x(das, Das, Gp) // X86 [EXPLICIT] + + //! \} + + //! \name ENTER/LEAVE Instructions + //! \{ + + ASMJIT_INST_2x(enter, Enter, Imm, Imm) // ANY + ASMJIT_INST_0x(leave, Leave) // ANY + + //! \} + + //! \name IN/OUT Instructions + //! \{ + + // NOTE: For some reason Doxygen is messed up here and thinks we are in cond. + //! \endcond + + ASMJIT_INST_2i(in, In, Gp_ZAX, Imm) // ANY + ASMJIT_INST_2x(in, In, Gp_ZAX, Gp_DX) // ANY + ASMJIT_INST_2x(ins, Ins, ES_ZDI, Gp_DX) // ANY + ASMJIT_INST_2x(out, Out, Imm, Gp_ZAX) // ANY + ASMJIT_INST_2i(out, Out, Gp_DX, Gp_ZAX) // ANY + ASMJIT_INST_2i(outs, Outs, Gp_DX, DS_ZSI) // ANY + + //! \} + + //! \name Clear/Set CF/DF Instructions + //! \{ + + ASMJIT_INST_0x(clc, Clc) // ANY + ASMJIT_INST_0x(cld, Cld) // ANY + ASMJIT_INST_0x(cmc, Cmc) // ANY + ASMJIT_INST_0x(stc, Stc) // ANY + ASMJIT_INST_0x(std, Std) // ANY + + //! \} + + //! \name LAHF/SAHF Instructions + //! \{ + + ASMJIT_INST_1x(lahf, Lahf, Gp_AH) // LAHFSAHF [EXPLICIT] AH <- EFL + ASMJIT_INST_1x(sahf, Sahf, Gp_AH) // LAHFSAHF [EXPLICIT] EFL <- AH + + //! \} + //! \name ADX Instructions //! \{ @@ -806,6 +775,16 @@ struct EmitterExplicitT { //! \} + //! \name LZCNT/POPCNT Instructions + //! \{ + + ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Gp) // LZCNT + ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Mem) // LZCNT + ASMJIT_INST_2x(popcnt, Popcnt, Gp, Gp) // POPCNT + ASMJIT_INST_2x(popcnt, Popcnt, Gp, Mem) // POPCNT + + //! \} + //! \name BMI Instructions //! \{ @@ -829,8 +808,8 @@ struct EmitterExplicitT { ASMJIT_INST_3x(bzhi, Bzhi, Gp, Gp, Gp) // BMI2 ASMJIT_INST_3x(bzhi, Bzhi, Gp, Mem, Gp) // BMI2 - ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Gp, ZDX) // BMI2 [EXPLICIT] - ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Mem, ZDX) // BMI2 [EXPLICIT] + ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Gp, Gp_ZDX) // BMI2 [EXPLICIT] + ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Mem, Gp_ZDX) // BMI2 [EXPLICIT] ASMJIT_INST_3x(pdep, Pdep, Gp, Gp, Gp) // BMI2 ASMJIT_INST_3x(pdep, Pdep, Gp, Gp, Mem) // BMI2 ASMJIT_INST_3x(pext, Pext, Gp, Gp, Gp) // BMI2 @@ -846,31 +825,166 @@ struct EmitterExplicitT { //! \} - //! \name CL Instructions + //! \name TBM Instructions + //! \{ + + ASMJIT_INST_2x(blcfill, Blcfill, Gp, Gp) // TBM + ASMJIT_INST_2x(blcfill, Blcfill, Gp, Mem) // TBM + ASMJIT_INST_2x(blci, Blci, Gp, Gp) // TBM + ASMJIT_INST_2x(blci, Blci, Gp, Mem) // TBM + ASMJIT_INST_2x(blcic, Blcic, Gp, Gp) // TBM + ASMJIT_INST_2x(blcic, Blcic, Gp, Mem) // TBM + ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Gp) // TBM + ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Mem) // TBM + ASMJIT_INST_2x(blcs, Blcs, Gp, Gp) // TBM + ASMJIT_INST_2x(blcs, Blcs, Gp, Mem) // TBM + ASMJIT_INST_2x(blsfill, Blsfill, Gp, Gp) // TBM + ASMJIT_INST_2x(blsfill, Blsfill, Gp, Mem) // TBM + ASMJIT_INST_2x(blsic, Blsic, Gp, Gp) // TBM + ASMJIT_INST_2x(blsic, Blsic, Gp, Mem) // TBM + ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Gp) // TBM + ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Mem) // TBM + ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Gp) // TBM + ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Mem) // TBM + + //! \} + + //! \name CRC32 Instructions (SSE4.2) + //! \{ + + ASMJIT_INST_2x(crc32, Crc32, Gp, Gp) // SSE4_2 + ASMJIT_INST_2x(crc32, Crc32, Gp, Mem) // SSE4_2 + + //! \} + + //! \name MOVBE Instructions + //! \{ + + ASMJIT_INST_2x(movbe, Movbe, Gp, Mem) // MOVBE + ASMJIT_INST_2x(movbe, Movbe, Mem, Gp) // MOVBE + + //! \} + + //! \name MOVDIRI & MOVDIR64B Instructions + //! \{ + + ASMJIT_INST_2x(movdiri, Movdiri, Mem, Gp) // MOVDIRI + ASMJIT_INST_2x(movdir64b, Movdir64b, Mem, Mem) // MOVDIR64B + + //! \} + + //! \name MXCSR Instructions (SSE) + //! \{ + + ASMJIT_INST_1x(ldmxcsr, Ldmxcsr, Mem) // SSE + ASMJIT_INST_1x(stmxcsr, Stmxcsr, Mem) // SSE + + //! \} + + //! \name FENCE Instructions (SSE and SSE2) + //! \{ + + ASMJIT_INST_0x(lfence, Lfence) // SSE2 + ASMJIT_INST_0x(mfence, Mfence) // SSE2 + ASMJIT_INST_0x(sfence, Sfence) // SSE + + //! \} + + //! \name PREFETCH Instructions + //! \{ + + ASMJIT_INST_1x(prefetch, Prefetch, Mem) // 3DNOW + ASMJIT_INST_1x(prefetchnta, Prefetchnta, Mem) // SSE + ASMJIT_INST_1x(prefetcht0, Prefetcht0, Mem) // SSE + ASMJIT_INST_1x(prefetcht1, Prefetcht1, Mem) // SSE + ASMJIT_INST_1x(prefetcht2, Prefetcht2, Mem) // SSE + ASMJIT_INST_1x(prefetchw, Prefetchw, Mem) // PREFETCHW + ASMJIT_INST_1x(prefetchwt1, Prefetchwt1, Mem) // PREFETCHW1 + + //! \} + + //! \name CPUID Instruction + //! \{ + + ASMJIT_INST_4x(cpuid, Cpuid, Gp_EAX, Gp_EBX, Gp_ECX, Gp_EDX) // I486 [EXPLICIT] EAX:EBX:ECX:EDX <- CPUID[EAX:ECX] + + //! \} + + //! \name CacheLine Instructions //! \{ ASMJIT_INST_1x(cldemote, Cldemote, Mem) // CLDEMOTE ASMJIT_INST_1x(clflush, Clflush, Mem) // CLFLUSH ASMJIT_INST_1x(clflushopt, Clflushopt, Mem) // CLFLUSH_OPT ASMJIT_INST_1x(clwb, Clwb, Mem) // CLWB - ASMJIT_INST_1x(clzero, Clzero, DS_ZAX) // CLZERO [EXPLICIT] - ASMJIT_INST_0x(wbnoinvd, Wbnoinvd) // WBNOINVD + ASMJIT_INST_1x(clzero, Clzero, DS_ZAX) // CLZERO [EXPLICIT] //! \} - //! \name CRC32 Instructions + //! \name SERIALIZE Instruction //! \{ - ASMJIT_INST_2x(crc32, Crc32, Gp, Gp) // SSE4_2 - ASMJIT_INST_2x(crc32, Crc32, Gp, Mem) // SSE4_2 + ASMJIT_INST_0x(serialize, Serialize) // SERIALIZE //! \} - //! \name ENQCMD Instructions + //! \name RDPID Instruction //! \{ - ASMJIT_INST_2x(enqcmd, Enqcmd, Mem, Mem) // ENQCMD - ASMJIT_INST_2x(enqcmds, Enqcmds, Mem, Mem) // ENQCMD + ASMJIT_INST_1x(rdpid, Rdpid, Gp) // RDPID + + //! \} + + //! \name RDPRU/RDPKRU Instructions + //! \{ + + ASMJIT_INST_3x(rdpru, Rdpru, Gp_EDX, Gp_EAX, Gp_ECX) // RDPRU [EXPLICIT] EDX:EAX <- PRU[ECX] + ASMJIT_INST_3x(rdpkru, Rdpkru, Gp_EDX, Gp_EAX, Gp_ECX) // RDPKRU [EXPLICIT] EDX:EAX <- PKRU[ECX] + + //! \} + + //! \name RDTSC/RDTSCP Instructions + //! \{ + + ASMJIT_INST_2x(rdtsc, Rdtsc, Gp_EDX, Gp_EAX) // RDTSC [EXPLICIT] EDX:EAX <- Counter + ASMJIT_INST_3x(rdtscp, Rdtscp, Gp_EDX, Gp_EAX, Gp_ECX) // RDTSCP [EXPLICIT] EDX:EAX:EXC <- Counter + + //! \} + + //! \name Other User-Mode Instructions + //! \{ + + ASMJIT_INST_2x(arpl, Arpl, Gp, Gp) // X86 + ASMJIT_INST_2x(arpl, Arpl, Mem, Gp) // X86 + ASMJIT_INST_0x(cli, Cli) // ANY + ASMJIT_INST_0x(getsec, Getsec) // SMX + ASMJIT_INST_1i(int_, Int, Imm) // ANY + ASMJIT_INST_0x(int3, Int3) // ANY + ASMJIT_INST_0x(into, Into) // ANY + ASMJIT_INST_2x(lar, Lar, Gp, Gp) // ANY + ASMJIT_INST_2x(lar, Lar, Gp, Mem) // ANY + ASMJIT_INST_2x(lds, Lds, Gp, Mem) // X86 + ASMJIT_INST_2x(les, Les, Gp, Mem) // X86 + ASMJIT_INST_2x(lfs, Lfs, Gp, Mem) // ANY + ASMJIT_INST_2x(lgs, Lgs, Gp, Mem) // ANY + ASMJIT_INST_2x(lsl, Lsl, Gp, Gp) // ANY + ASMJIT_INST_2x(lsl, Lsl, Gp, Mem) // ANY + ASMJIT_INST_2x(lss, Lss, Gp, Mem) // ANY + ASMJIT_INST_0x(pause, Pause) // SSE2 + ASMJIT_INST_0x(rsm, Rsm) // X86 + ASMJIT_INST_1x(sgdt, Sgdt, Mem) // ANY + ASMJIT_INST_1x(sidt, Sidt, Mem) // ANY + ASMJIT_INST_1x(sldt, Sldt, Gp) // ANY + ASMJIT_INST_1x(sldt, Sldt, Mem) // ANY + ASMJIT_INST_1x(smsw, Smsw, Gp) // ANY + ASMJIT_INST_1x(smsw, Smsw, Mem) // ANY + ASMJIT_INST_0x(sti, Sti) // ANY + ASMJIT_INST_1x(str, Str, Gp) // ANY + ASMJIT_INST_1x(str, Str, Mem) // ANY + ASMJIT_INST_1x(verr, Verr, Gp) // ANY + ASMJIT_INST_1x(verr, Verr, Mem) // ANY + ASMJIT_INST_1x(verw, Verw, Gp) // ANY + ASMJIT_INST_1x(verw, Verw, Mem) // ANY //! \} @@ -884,15 +998,86 @@ struct EmitterExplicitT { //! \} - //! \name FXSR & XSAVE Instructions + //! \name FXSR Instructions //! \{ ASMJIT_INST_1x(fxrstor, Fxrstor, Mem) // FXSR ASMJIT_INST_1x(fxrstor64, Fxrstor64, Mem) // FXSR ASMJIT_INST_1x(fxsave, Fxsave, Mem) // FXSR ASMJIT_INST_1x(fxsave64, Fxsave64, Mem) // FXSR - ASMJIT_INST_3x(xgetbv, Xgetbv, EDX, EAX, ECX) // XSAVE [EXPLICIT] EDX:EAX <- XCR[ECX] - ASMJIT_INST_3x(xsetbv, Xsetbv, EDX, EAX, ECX) // XSAVE [EXPLICIT] XCR[ECX] <- EDX:EAX + + //! \} + + //! \name XSAVE Instructions + //! \{ + + ASMJIT_INST_3x(xgetbv, Xgetbv, Gp_EDX, Gp_EAX, Gp_ECX) // XSAVE [EXPLICIT] EDX:EAX <- XCR[ECX] + + //! \} + + //! \name MPX Extensions + //! \{ + + ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Gp) // MPX + ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Mem) // MPX + ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Gp) // MPX + ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Mem) // MPX + ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Gp) // MPX + ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Mem) // MPX + ASMJIT_INST_2x(bndldx, Bndldx, Bnd, Mem) // MPX + ASMJIT_INST_2x(bndmk, Bndmk, Bnd, Mem) // MPX + ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Bnd) // MPX + ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Mem) // MPX + ASMJIT_INST_2x(bndmov, Bndmov, Mem, Bnd) // MPX + ASMJIT_INST_2x(bndstx, Bndstx, Mem, Bnd) // MPX + + //! \} + + //! \name MONITORX Instructions + //! \{ + + ASMJIT_INST_3x(monitorx, Monitorx, Mem, Gp, Gp) + ASMJIT_INST_3x(mwaitx, Mwaitx, Gp, Gp, Gp) + + //! \} + + //! \name MCOMMIT Instruction + //! \{ + + ASMJIT_INST_0x(mcommit, Mcommit) // MCOMMIT + + //! \} + + //! \name PTWRITE Instruction + //! \{ + + ASMJIT_INST_1x(ptwrite, Ptwrite, Gp) // PTWRITE + ASMJIT_INST_1x(ptwrite, Ptwrite, Mem) // PTWRITE + + //! \} + + //! \name ENQCMD Instructions + //! \{ + + ASMJIT_INST_2x(enqcmd, Enqcmd, Mem, Mem) // ENQCMD + ASMJIT_INST_2x(enqcmds, Enqcmds, Mem, Mem) // ENQCMD + + //! \} + + //! \name WAITPKG Instructions + //! \{ + + ASMJIT_INST_3x(tpause, Tpause, Gp, Gp, Gp) + ASMJIT_INST_1x(umonitor, Umonitor, Mem) + ASMJIT_INST_3x(umwait, Umwait, Gp, Gp, Gp) + + //! \} + + //! \name RDRAND & RDSEED Instructions + //! \{ + + ASMJIT_INST_1x(rdrand, Rdrand, Gp) // RDRAND + ASMJIT_INST_1x(rdseed, Rdseed, Gp) // RDSEED //! \} @@ -908,77 +1093,89 @@ struct EmitterExplicitT { //! \} - //! \name LZCNT Instructions + //! \name RTM & TSX Instructions //! \{ - ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Gp) // LZCNT - ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Mem) // LZCNT + ASMJIT_INST_0x(xabort, Xabort) // RTM + ASMJIT_INST_1x(xbegin, Xbegin, Label) // RTM + ASMJIT_INST_1x(xbegin, Xbegin, Imm) // RTM + ASMJIT_INST_1x(xbegin, Xbegin, uint64_t) // RTM + ASMJIT_INST_0x(xend, Xend) // RTM + ASMJIT_INST_0x(xtest, Xtest) // TSX //! \} - //! \name MOVBE Instructions + //! \name TSXLDTRK Instructions //! \{ - ASMJIT_INST_2x(movbe, Movbe, Gp, Mem) // MOVBE - ASMJIT_INST_2x(movbe, Movbe, Mem, Gp) // MOVBE + ASMJIT_INST_0x(xresldtrk, Xresldtrk) + ASMJIT_INST_0x(xsusldtrk, Xsusldtrk) //! \} - //! \name MOVDIRI & MOVDIR64B Instructions + //! \name CET-IBT Instructions //! \{ - ASMJIT_INST_2x(movdiri, Movdiri, Mem, Gp) // MOVDIRI - ASMJIT_INST_2x(movdir64b, Movdir64b, Mem, Mem) // MOVDIR64B + ASMJIT_INST_0x(endbr32, Endbr32) + ASMJIT_INST_0x(endbr64, Endbr64) //! \} - //! \name MPX Extensions + //! \name CET-SS Instructions //! \{ - ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Gp) // MPX - ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Mem) // MPX - ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Gp) // MPX - ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Mem) // MPX - ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Gp) // MPX - ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Mem) // MPX - ASMJIT_INST_2x(bndldx, Bndldx, Bnd, Mem) // MPX - ASMJIT_INST_2x(bndmk, Bndmk, Bnd, Mem) // MPX - ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Bnd) // MPX - ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Mem) // MPX - ASMJIT_INST_2x(bndmov, Bndmov, Mem, Bnd) // MPX - ASMJIT_INST_2x(bndstx, Bndstx, Mem, Bnd) // MPX - - //! \} + ASMJIT_INST_1x(clrssbsy, Clrssbsy, Mem) + ASMJIT_INST_0x(setssbsy, Setssbsy) - //! \name POPCNT Instructions - //! \{ + ASMJIT_INST_1x(rstorssp, Rstorssp, Mem) + ASMJIT_INST_0x(saveprevssp, Saveprevssp) - ASMJIT_INST_2x(popcnt, Popcnt, Gp, Gp) // POPCNT - ASMJIT_INST_2x(popcnt, Popcnt, Gp, Mem) // POPCNT + ASMJIT_INST_1x(incsspd, Incsspd, Gp) + ASMJIT_INST_1x(incsspq, Incsspq, Gp) + ASMJIT_INST_1x(rdsspd, Rdsspd, Gp) + ASMJIT_INST_1x(rdsspq, Rdsspq, Gp) + ASMJIT_INST_2x(wrssd, Wrssd, Mem, Gp) + ASMJIT_INST_2x(wrssq, Wrssq, Mem, Gp) + ASMJIT_INST_2x(wrussd, Wrussd, Mem, Gp) + ASMJIT_INST_2x(wrussq, Wrussq, Mem, Gp) //! \} - //! \name RDRAND & RDSEED Instructions + //! \name Core Privileged Instructions //! \{ - ASMJIT_INST_1x(rdrand, Rdrand, Gp) // RDRAND - ASMJIT_INST_1x(rdseed, Rdseed, Gp) // RDSEED + ASMJIT_INST_0x(clts, Clts) // ANY + ASMJIT_INST_0x(hlt, Hlt) // ANY + ASMJIT_INST_0x(invd, Invd) // ANY + ASMJIT_INST_1x(invlpg, Invlpg, Mem) // ANY + ASMJIT_INST_2x(invpcid, Invpcid, Gp, Mem) // ANY + ASMJIT_INST_1x(lgdt, Lgdt, Mem) // ANY + ASMJIT_INST_1x(lidt, Lidt, Mem) // ANY + ASMJIT_INST_1x(lldt, Lldt, Gp) // ANY + ASMJIT_INST_1x(lldt, Lldt, Mem) // ANY + ASMJIT_INST_1x(lmsw, Lmsw, Gp) // ANY + ASMJIT_INST_1x(lmsw, Lmsw, Mem) // ANY + ASMJIT_INST_1x(ltr, Ltr, Gp) // ANY + ASMJIT_INST_1x(ltr, Ltr, Mem) // ANY + ASMJIT_INST_3x(rdmsr, Rdmsr, Gp_EDX, Gp_EAX, Gp_ECX) // MSR [EXPLICIT] RDX:EAX <- MSR[ECX] + ASMJIT_INST_3x(rdpmc, Rdpmc, Gp_EDX, Gp_EAX, Gp_ECX) // ANY [EXPLICIT] RDX:EAX <- PMC[ECX] + ASMJIT_INST_0x(swapgs, Swapgs) // X64 + ASMJIT_INST_0x(wbinvd, Wbinvd) // ANY + ASMJIT_INST_0x(wbnoinvd, Wbnoinvd) // WBNOINVD + ASMJIT_INST_3x(wrmsr, Wrmsr, Gp_EDX, Gp_EAX, Gp_ECX) // MSR [EXPLICIT] RDX:EAX -> MSR[ECX] + ASMJIT_INST_3x(xsetbv, Xsetbv, Gp_EDX, Gp_EAX, Gp_ECX) // XSAVE [EXPLICIT] XCR[ECX] <- EDX:EAX //! \} - //! \name RTM & TSX Instructions + //! \name MONITOR Instructions (Privileged) //! \{ - ASMJIT_INST_0x(xabort, Xabort) // RTM - ASMJIT_INST_1x(xbegin, Xbegin, Label) // RTM - ASMJIT_INST_1x(xbegin, Xbegin, Imm) // RTM - ASMJIT_INST_1x(xbegin, Xbegin, uint64_t) // RTM - ASMJIT_INST_0x(xend, Xend) // RTM - ASMJIT_INST_0x(xtest, Xtest) // TSX + ASMJIT_INST_3x(monitor, Monitor, Mem, Gp, Gp) // MONITOR + ASMJIT_INST_2x(mwait, Mwait, Gp, Gp) // MONITOR //! \} - //! \name SMAP Instructions + //! \name SMAP Instructions (Privileged) //! \{ ASMJIT_INST_0x(clac, Clac) // SMAP @@ -986,45 +1183,25 @@ struct EmitterExplicitT { //! \} - //! \name SVM Instructions + //! \name SKINIT Instructions (Privileged) //! \{ - ASMJIT_INST_0x(clgi, Clgi) // SVM - ASMJIT_INST_2x(invlpga, Invlpga, Gp, Gp) // SVM [EXPLICIT] ASMJIT_INST_1x(skinit, Skinit, Gp) // SKINIT [EXPLICIT] ASMJIT_INST_0x(stgi, Stgi) // SKINIT - ASMJIT_INST_1x(vmload, Vmload, Gp) // SVM [EXPLICIT] - ASMJIT_INST_0x(vmmcall, Vmmcall) // SVM - ASMJIT_INST_1x(vmrun, Vmrun, Gp) // SVM [EXPLICIT] - ASMJIT_INST_1x(vmsave, Vmsave, Gp) // SVM [EXPLICIT] //! \} - //! \name TBM Instructions + //! \name SNP Instructions (Privileged) //! \{ - ASMJIT_INST_2x(blcfill, Blcfill, Gp, Gp) // TBM - ASMJIT_INST_2x(blcfill, Blcfill, Gp, Mem) // TBM - ASMJIT_INST_2x(blci, Blci, Gp, Gp) // TBM - ASMJIT_INST_2x(blci, Blci, Gp, Mem) // TBM - ASMJIT_INST_2x(blcic, Blcic, Gp, Gp) // TBM - ASMJIT_INST_2x(blcic, Blcic, Gp, Mem) // TBM - ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Gp) // TBM - ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Mem) // TBM - ASMJIT_INST_2x(blcs, Blcs, Gp, Gp) // TBM - ASMJIT_INST_2x(blcs, Blcs, Gp, Mem) // TBM - ASMJIT_INST_2x(blsfill, Blsfill, Gp, Gp) // TBM - ASMJIT_INST_2x(blsfill, Blsfill, Gp, Mem) // TBM - ASMJIT_INST_2x(blsic, Blsic, Gp, Gp) // TBM - ASMJIT_INST_2x(blsic, Blsic, Gp, Mem) // TBM - ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Gp) // TBM - ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Mem) // TBM - ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Gp) // TBM - ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Mem) // TBM + ASMJIT_INST_0x(psmash, Psmash) // SNP + ASMJIT_INST_0x(pvalidate, Pvalidate) // SNP + ASMJIT_INST_0x(rmpadjust, Rmpadjust) // SNP + ASMJIT_INST_0x(rmpupdate, Rmpupdate) // SNP //! \} - //! \name VMX Instructions + //! \name VMX Instructions (All privileged except vmfunc) //! \{ ASMJIT_INST_2x(invept, Invept, Gp, Mem) // VMX @@ -1042,12 +1219,15 @@ struct EmitterExplicitT { //! \} - //! \name Other GP Instructions + //! \name SVM Instructions (All privileged except vmmcall) //! \{ - ASMJIT_INST_0x(getsec, Getsec) // SMX - ASMJIT_INST_0x(pcommit, Pcommit) // PCOMMIT - ASMJIT_INST_1x(rdpid, Rdpid, Gp) // RDPID + ASMJIT_INST_0x(clgi, Clgi) // SVM + ASMJIT_INST_2x(invlpga, Invlpga, Gp, Gp) // SVM [EXPLICIT] + ASMJIT_INST_1x(vmload, Vmload, Gp) // SVM [EXPLICIT] + ASMJIT_INST_0x(vmmcall, Vmmcall) // SVM + ASMJIT_INST_1x(vmrun, Vmrun, Gp) // SVM [EXPLICIT] + ASMJIT_INST_1x(vmsave, Vmsave, Gp) // SVM [EXPLICIT] //! \} @@ -1276,12 +1456,6 @@ struct EmitterExplicitT { ASMJIT_INST_3i(extractps, Extractps, Mem, Xmm, Imm) // SSE4_1 ASMJIT_INST_2x(extrq, Extrq, Xmm, Xmm) // SSE4A ASMJIT_INST_3ii(extrq, Extrq, Xmm, Imm, Imm) // SSE4A - ASMJIT_INST_3i(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Xmm, Imm) // GFNI - ASMJIT_INST_3i(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Mem, Imm) // GFNI - ASMJIT_INST_3i(gf2p8affineqb, Gf2p8affineqb, Xmm, Xmm, Imm) // GFNI - ASMJIT_INST_3i(gf2p8affineqb, Gf2p8affineqb, Xmm, Mem, Imm) // GFNI - ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Xmm) // GFNI - ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Mem) // GFNI ASMJIT_INST_2x(haddpd, Haddpd, Xmm, Xmm) // SSE3 ASMJIT_INST_2x(haddpd, Haddpd, Xmm, Mem) // SSE3 ASMJIT_INST_2x(haddps, Haddps, Xmm, Xmm) // SSE3 @@ -1480,10 +1654,10 @@ struct EmitterExplicitT { ASMJIT_INST_3i(pblendw, Pblendw, Xmm, Mem, Imm) // SSE4_1 ASMJIT_INST_3i(pclmulqdq, Pclmulqdq, Xmm, Xmm, Imm) // PCLMULQDQ. ASMJIT_INST_3i(pclmulqdq, Pclmulqdq, Xmm, Mem, Imm) // PCLMULQDQ. - ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Xmm, Imm, ECX, EAX, EDX) // SSE4_2 [EXPLICIT] - ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Mem, Imm, ECX, EAX, EDX) // SSE4_2 [EXPLICIT] - ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Xmm, Imm, XMM0, EAX, EDX) // SSE4_2 [EXPLICIT] - ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Mem, Imm, XMM0, EAX, EDX) // SSE4_2 [EXPLICIT] + ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Xmm, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // SSE4_2 [EXPLICIT] + ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Mem, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // SSE4_2 [EXPLICIT] + ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Xmm, Imm, XMM0, Gp_EAX, Gp_EDX) // SSE4_2 [EXPLICIT] + ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Mem, Imm, XMM0, Gp_EAX, Gp_EDX) // SSE4_2 [EXPLICIT] ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Mm, Mm) // MMX ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Mm, Mem) // MMX ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Xmm, Xmm) // SSE2 @@ -1512,8 +1686,8 @@ struct EmitterExplicitT { ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Mm, Mem) // MMX ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Xmm, Xmm) // SSE2 ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Xmm, Mem) // SSE2 - ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Xmm, Imm, ECX) // SSE4_2 [EXPLICIT] - ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Mem, Imm, ECX) // SSE4_2 [EXPLICIT] + ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Xmm, Imm, Gp_ECX) // SSE4_2 [EXPLICIT] + ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Mem, Imm, Gp_ECX) // SSE4_2 [EXPLICIT] ASMJIT_INST_4x(pcmpistrm, Pcmpistrm, Xmm, Xmm, Imm, XMM0) // SSE4_2 [EXPLICIT] ASMJIT_INST_4x(pcmpistrm, Pcmpistrm, Xmm, Mem, Imm, XMM0) // SSE4_2 [EXPLICIT] ASMJIT_INST_3i(pextrb, Pextrb, Gp, Xmm, Imm) // SSE4_1 @@ -1909,6 +2083,13 @@ struct EmitterExplicitT { ASMJIT_INST_2x(pmulhrw, Pmulhrw, Mm, Mem) // 3DNOW ASMJIT_INST_2x(pswapd, Pswapd, Mm, Mm) // 3DNOW ASMJIT_INST_2x(pswapd, Pswapd, Mm, Mem) // 3DNOW + + //! \} + + //! \name EMMS/FEMMS Instructions + //! \{ + + ASMJIT_INST_0x(emms, Emms) // MMX ASMJIT_INST_0x(femms, Femms) // 3DNOW //! \} @@ -1951,6 +2132,21 @@ struct EmitterExplicitT { //! \} + //! \name GFNI Instructions + //! \{ + + // NOTE: For some reason Doxygen is messed up here and thinks we are in cond. + //! \endcond + + ASMJIT_INST_3i(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Xmm, Imm) // GFNI + ASMJIT_INST_3i(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Mem, Imm) // GFNI + ASMJIT_INST_3i(gf2p8affineqb, Gf2p8affineqb, Xmm, Xmm, Imm) // GFNI + ASMJIT_INST_3i(gf2p8affineqb, Gf2p8affineqb, Xmm, Mem, Imm) // GFNI + ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Xmm) // GFNI + ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Mem) // GFNI + + //! \} + //! \name AVX, FMA, and AVX512 Instructions //! \{ @@ -2025,203 +2221,90 @@ struct EmitterExplicitT { ASMJIT_INST_6x(v4fmaddss, V4fmaddss, Xmm, Xmm, Xmm, Xmm, Xmm, Mem) // AVX512_4FMAPS{kz} ASMJIT_INST_6x(v4fnmaddps, V4fnmaddps, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz} ASMJIT_INST_6x(v4fnmaddss, V4fnmaddss, Xmm, Xmm, Xmm, Xmm, Xmm, Mem) // AVX512_4FMAPS{kz} - ASMJIT_INST_3x(vaddpd, Vaddpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vaddpd, Vaddpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vaddpd, Vaddpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vaddpd, Vaddpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vaddpd, Vaddpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vaddpd, Vaddpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vaddps, Vaddps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vaddps, Vaddps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vaddps, Vaddps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vaddps, Vaddps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vaddps, Vaddps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vaddps, Vaddps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vaddpd, Vaddpd, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vaddpd, Vaddpd, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vaddps, Vaddps, Vec, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vaddps, Vaddps, Vec, Vec, Mem) // AVX AVX512_F{kz|b32} ASMJIT_INST_3x(vaddsd, Vaddsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vaddsd, Vaddsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vaddss, Vaddss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vaddss, Vaddss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} - ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Ymm, Ymm, Ymm) // AVX - ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Ymm, Ymm, Mem) // AVX - ASMJIT_INST_3x(vaddsubps, Vaddsubps, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vaddsubps, Vaddsubps, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vaddsubps, Vaddsubps, Ymm, Ymm, Ymm) // AVX - ASMJIT_INST_3x(vaddsubps, Vaddsubps, Ymm, Ymm, Mem) // AVX - ASMJIT_INST_3x(vaesdec, Vaesdec, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vaesdec, Vaesdec, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vaesdec, Vaesdec, Ymm, Ymm, Ymm) // VAES AVX512_VL - ASMJIT_INST_3x(vaesdec, Vaesdec, Ymm, Ymm, Mem) // VAES AVX512_VL - ASMJIT_INST_3x(vaesdec, Vaesdec, Zmm, Zmm, Zmm) // VAES - ASMJIT_INST_3x(vaesdec, Vaesdec, Zmm, Zmm, Mem) // VAES - ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Ymm, Ymm, Ymm) // VAES AVX512_VL - ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Ymm, Ymm, Mem) // VAES AVX512_VL - ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Zmm, Zmm, Zmm) // VAES - ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Zmm, Zmm, Mem) // VAES - ASMJIT_INST_3x(vaesenc, Vaesenc, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vaesenc, Vaesenc, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vaesenc, Vaesenc, Ymm, Ymm, Ymm) // VAES AVX512_VL - ASMJIT_INST_3x(vaesenc, Vaesenc, Ymm, Ymm, Mem) // VAES AVX512_VL - ASMJIT_INST_3x(vaesenc, Vaesenc, Zmm, Zmm, Zmm) // VAES - ASMJIT_INST_3x(vaesenc, Vaesenc, Zmm, Zmm, Mem) // VAES - ASMJIT_INST_3x(vaesenclast, Vaesenclast, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vaesenclast, Vaesenclast, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vaesenclast, Vaesenclast, Ymm, Ymm, Ymm) // VAES AVX512_VL - ASMJIT_INST_3x(vaesenclast, Vaesenclast, Ymm, Ymm, Mem) // VAES AVX512_VL - ASMJIT_INST_3x(vaesenclast, Vaesenclast, Zmm, Zmm, Zmm) // VAES - ASMJIT_INST_3x(vaesenclast, Vaesenclast, Zmm, Zmm, Mem) // VAES - ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Xmm) // AVX - ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Mem) // AVX - ASMJIT_INST_3i(vaeskeygenassist, Vaeskeygenassist, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_3i(vaeskeygenassist, Vaeskeygenassist, Xmm, Mem, Imm) // AVX - ASMJIT_INST_4i(valignd, Valignd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(valignd, Valignd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(valignd, Valignd, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(valignd, Valignd, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(valignd, Valignd, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(valignd, Valignd, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(valignq, Valignq, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(valignq, Valignq, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(valignq, Valignq, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(valignq, Valignq, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(valignq, Valignq, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_4i(valignq, Valignq, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vandnpd, Vandnpd, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vandnpd, Vandnpd, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vandnpd, Vandnpd, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vandnpd, Vandnpd, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vandnpd, Vandnpd, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vandnpd, Vandnpd, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vandnps, Vandnps, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vandnps, Vandnps, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vandnps, Vandnps, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vandnps, Vandnps, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vandnps, Vandnps, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b32} - ASMJIT_INST_3x(vandnps, Vandnps, Zmm, Zmm, Mem) // AVX512_DQ{kz|b32} - ASMJIT_INST_3x(vandpd, Vandpd, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vandpd, Vandpd, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vandpd, Vandpd, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vandpd, Vandpd, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vandpd, Vandpd, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vandpd, Vandpd, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vandps, Vandps, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vandps, Vandps, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vandps, Vandps, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vandps, Vandps, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vandps, Vandps, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b32} - ASMJIT_INST_3x(vandps, Vandps, Zmm, Zmm, Mem) // AVX512_DQ{kz|b32} - ASMJIT_INST_3x(vblendmb, Vblendmb, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vblendmb, Vblendmb, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vblendmb, Vblendmb, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vblendmb, Vblendmb, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vblendmb, Vblendmb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vblendmb, Vblendmb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vblendmd, Vblendmd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vblendmd, Vblendmd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vblendmd, Vblendmd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vblendmd, Vblendmd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vblendmd, Vblendmd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vblendmd, Vblendmd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vblendmpd, Vblendmpd, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vblendmpd, Vblendmpd, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vblendmpd, Vblendmpd, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vblendmpd, Vblendmpd, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vblendmpd, Vblendmpd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vblendmpd, Vblendmpd, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vblendmps, Vblendmps, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vblendmps, Vblendmps, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vblendmps, Vblendmps, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vblendmps, Vblendmps, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vblendmps, Vblendmps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vblendmps, Vblendmps, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vblendmq, Vblendmq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vblendmq, Vblendmq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vblendmq, Vblendmq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vblendmq, Vblendmq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vblendmq, Vblendmq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vblendmq, Vblendmq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vblendmw, Vblendmw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vblendmw, Vblendmw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vblendmw, Vblendmw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vblendmw, Vblendmw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vblendmw, Vblendmw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vblendmw, Vblendmw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_4i(vblendpd, Vblendpd, Xmm, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_4i(vblendpd, Vblendpd, Xmm, Xmm, Mem, Imm) // AVX - ASMJIT_INST_4i(vblendpd, Vblendpd, Ymm, Ymm, Ymm, Imm) // AVX - ASMJIT_INST_4i(vblendpd, Vblendpd, Ymm, Ymm, Mem, Imm) // AVX - ASMJIT_INST_4i(vblendps, Vblendps, Xmm, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_4i(vblendps, Vblendps, Xmm, Xmm, Mem, Imm) // AVX - ASMJIT_INST_4i(vblendps, Vblendps, Ymm, Ymm, Ymm, Imm) // AVX - ASMJIT_INST_4i(vblendps, Vblendps, Ymm, Ymm, Mem, Imm) // AVX - ASMJIT_INST_4x(vblendvpd, Vblendvpd, Xmm, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_4x(vblendvpd, Vblendvpd, Xmm, Xmm, Mem, Xmm) // AVX - ASMJIT_INST_4x(vblendvpd, Vblendvpd, Ymm, Ymm, Ymm, Ymm) // AVX - ASMJIT_INST_4x(vblendvpd, Vblendvpd, Ymm, Ymm, Mem, Ymm) // AVX - ASMJIT_INST_4x(vblendvps, Vblendvps, Xmm, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_4x(vblendvps, Vblendvps, Xmm, Xmm, Mem, Xmm) // AVX - ASMJIT_INST_4x(vblendvps, Vblendvps, Ymm, Ymm, Ymm, Ymm) // AVX - ASMJIT_INST_4x(vblendvps, Vblendvps, Ymm, Ymm, Mem, Ymm) // AVX - ASMJIT_INST_2x(vbroadcastf128, Vbroadcastf128, Ymm, Mem) // AVX - ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Ymm, Xmm) // AVX512_DQ{kz}-VL - ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Ymm, Mem) // AVX512_DQ{kz}-VL - ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Zmm, Xmm) // AVX512_DQ{kz} - ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Zmm, Mem) // AVX512_DQ{kz} - ASMJIT_INST_2x(vbroadcastf32x4, Vbroadcastf32x4, Ymm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vbroadcastf32x4, Vbroadcastf32x4, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vbroadcastf32x8, Vbroadcastf32x8, Zmm, Mem) // AVX512_DQ{kz} - ASMJIT_INST_2x(vbroadcastf64x2, Vbroadcastf64x2, Ymm, Mem) // AVX512_DQ{kz}-VL - ASMJIT_INST_2x(vbroadcastf64x2, Vbroadcastf64x2, Zmm, Mem) // AVX512_DQ{kz} - ASMJIT_INST_2x(vbroadcastf64x4, Vbroadcastf64x4, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vbroadcasti128, Vbroadcasti128, Ymm, Mem) // AVX2 - ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Xmm, Xmm) // AVX512_DQ{kz}-VL - ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Xmm, Mem) // AVX512_DQ{kz}-VL - ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Ymm, Xmm) // AVX512_DQ{kz}-VL - ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Ymm, Mem) // AVX512_DQ{kz}-VL - ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Zmm, Xmm) // AVX512_DQ{kz} - ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Zmm, Mem) // AVX512_DQ{kz} - ASMJIT_INST_2x(vbroadcasti32x4, Vbroadcasti32x4, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vbroadcasti32x4, Vbroadcasti32x4, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vbroadcasti32x8, Vbroadcasti32x8, Zmm, Mem) // AVX512_DQ{kz} - ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Ymm, Xmm) // AVX512_DQ{kz}-VL - ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Ymm, Mem) // AVX512_DQ{kz}-VL - ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Zmm, Xmm) // AVX512_DQ{kz} - ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Zmm, Mem) // AVX512_DQ{kz} - ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Ymm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Xmm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Ymm, Mem) // AVX AVX512_F{kz} - ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Ymm, Xmm) // AVX2 AVX512_F{kz} - ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Zmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Zmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_4i(vcmppd, Vcmppd, Xmm, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_4i(vcmppd, Vcmppd, Xmm, Xmm, Mem, Imm) // AVX - ASMJIT_INST_4i(vcmppd, Vcmppd, Ymm, Ymm, Ymm, Imm) // AVX - ASMJIT_INST_4i(vcmppd, Vcmppd, Ymm, Ymm, Mem, Imm) // AVX - ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Zmm, Mem, Imm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_4i(vcmpps, Vcmpps, Xmm, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_4i(vcmpps, Vcmpps, Xmm, Xmm, Mem, Imm) // AVX - ASMJIT_INST_4i(vcmpps, Vcmpps, Ymm, Ymm, Ymm, Imm) // AVX - ASMJIT_INST_4i(vcmpps, Vcmpps, Ymm, Ymm, Mem, Imm) // AVX - ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b32} - ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Zmm, Mem, Imm) // AVX512_F{kz|sae|b32} + ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Vec, Vec, Vec) // AVX + ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Vec, Vec, Mem) // AVX + ASMJIT_INST_3x(vaddsubps, Vaddsubps, Vec, Vec, Vec) // AVX + ASMJIT_INST_3x(vaddsubps, Vaddsubps, Vec, Vec, Mem) // AVX + ASMJIT_INST_3x(vaesdec, Vaesdec, Vec, Vec, Vec) // AVX+AESNI VAES + ASMJIT_INST_3x(vaesdec, Vaesdec, Vec, Vec, Mem) // AVX+AESNI VAES + ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Vec, Vec, Vec) // AVX+AESNI VAES + ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Vec, Vec, Mem) // AVX+AESNI VAES + ASMJIT_INST_3x(vaesenc, Vaesenc, Vec, Vec, Vec) // AVX+AESNI VAES + ASMJIT_INST_3x(vaesenc, Vaesenc, Vec, Vec, Mem) // AVX+AESNI VAES + ASMJIT_INST_3x(vaesenclast, Vaesenclast, Vec, Vec, Vec) // AVX+AESNI VAES + ASMJIT_INST_3x(vaesenclast, Vaesenclast, Vec, Vec, Mem) // AVX+AESNI VAES + ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Xmm) // AVX+AESNI + ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Mem) // AVX+AESNI + ASMJIT_INST_3i(vaeskeygenassist, Vaeskeygenassist, Xmm, Xmm, Imm) // AVX+AESNI + ASMJIT_INST_3i(vaeskeygenassist, Vaeskeygenassist, Xmm, Mem, Imm) // AVX+AESNI + ASMJIT_INST_4i(valignd, Valignd, Vec, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(valignd, Valignd, Vec, Vec, Mem, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(valignq, Valignq, Vec, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(valignq, Valignq, Vec, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vandnpd, Vandnpd, Vec, Vec, Vec) // AVX AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vandnpd, Vandnpd, Vec, Vec, Mem) // AVX AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vandnps, Vandnps, Vec, Vec, Vec) // AVX AVX512_DQ{kz|b32} + ASMJIT_INST_3x(vandnps, Vandnps, Vec, Vec, Mem) // AVX AVX512_DQ{kz|b32} + ASMJIT_INST_3x(vandpd, Vandpd, Vec, Vec, Vec) // AVX AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vandpd, Vandpd, Vec, Vec, Mem) // AVX AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vandps, Vandps, Vec, Vec, Vec) // AVX AVX512_DQ{kz|b32} + ASMJIT_INST_3x(vandps, Vandps, Vec, Vec, Mem) // AVX AVX512_DQ{kz|b32} + ASMJIT_INST_3x(vblendmb, Vblendmb, Vec, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_3x(vblendmb, Vblendmb, Vec, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_3x(vblendmd, Vblendmd, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vblendmd, Vblendmd, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vblendmpd, Vblendmpd, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vblendmpd, Vblendmpd, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vblendmps, Vblendmps, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vblendmps, Vblendmps, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vblendmq, Vblendmq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vblendmq, Vblendmq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vblendmw, Vblendmw, Vec, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_3x(vblendmw, Vblendmw, Vec, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_4i(vblendpd, Vblendpd, Vec, Vec, Vec, Imm) // AVX + ASMJIT_INST_4i(vblendpd, Vblendpd, Vec, Vec, Mem, Imm) // AVX + ASMJIT_INST_4i(vblendps, Vblendps, Vec, Vec, Vec, Imm) // AVX + ASMJIT_INST_4i(vblendps, Vblendps, Vec, Vec, Mem, Imm) // AVX + ASMJIT_INST_4x(vblendvpd, Vblendvpd, Vec, Vec, Vec, Vec) // AVX + ASMJIT_INST_4x(vblendvpd, Vblendvpd, Vec, Vec, Mem, Vec) // AVX + ASMJIT_INST_4x(vblendvps, Vblendvps, Vec, Vec, Vec, Vec) // AVX + ASMJIT_INST_4x(vblendvps, Vblendvps, Vec, Vec, Mem, Vec) // AVX + ASMJIT_INST_2x(vbroadcastf128, Vbroadcastf128, Vec, Mem) // AVX + ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Vec, Vec) // AVX512_DQ{kz} + ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Vec, Mem) // AVX512_DQ{kz} + ASMJIT_INST_2x(vbroadcastf32x4, Vbroadcastf32x4, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vbroadcastf32x8, Vbroadcastf32x8, Vec, Mem) // AVX512_DQ{kz} + ASMJIT_INST_2x(vbroadcastf64x2, Vbroadcastf64x2, Vec, Mem) // AVX512_DQ{kz} + ASMJIT_INST_2x(vbroadcastf64x4, Vbroadcastf64x4, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vbroadcasti128, Vbroadcasti128, Vec, Mem) // AVX2 + ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Vec, Vec) // AVX512_DQ{kz} + ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Vec, Mem) // AVX512_DQ{kz} + ASMJIT_INST_2x(vbroadcasti32x4, Vbroadcasti32x4, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vbroadcasti32x8, Vbroadcasti32x8, Vec, Mem) // AVX512_DQ{kz} + ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Vec, Vec) // AVX512_DQ{kz} + ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Vec, Mem) // AVX512_DQ{kz} + ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Vec, Mem) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Vec, Xmm) // AVX2 AVX512_F{kz} + ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Vec, Mem) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Vec, Xmm) // AVX2 AVX512_F{kz} + ASMJIT_INST_4i(vcmppd, Vcmppd, Vec, Vec, Vec, Imm) // AVX + ASMJIT_INST_4i(vcmppd, Vcmppd, Vec, Vec, Mem, Imm) // AVX + ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vcmpps, Vcmpps, Vec, Vec, Vec, Imm) // AVX + ASMJIT_INST_4i(vcmpps, Vcmpps, Vec, Vec, Mem, Imm) // AVX + ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Vec, Mem, Imm) // AVX512_F{kz|b32} ASMJIT_INST_4i(vcmpsd, Vcmpsd, Xmm, Xmm, Xmm, Imm) // AVX ASMJIT_INST_4i(vcmpsd, Vcmpsd, Xmm, Xmm, Mem, Imm) // AVX ASMJIT_INST_4i(vcmpsd, Vcmpsd, KReg, Xmm, Xmm, Imm) // AVX512_F{kz|sae} @@ -2234,121 +2317,46 @@ struct EmitterExplicitT { ASMJIT_INST_2x(vcomisd, Vcomisd, Xmm, Mem) // AVX AVX512_F{sae} ASMJIT_INST_2x(vcomiss, Vcomiss, Xmm, Xmm) // AVX AVX512_F{sae} ASMJIT_INST_2x(vcomiss, Vcomiss, Xmm, Mem) // AVX AVX512_F{sae} - ASMJIT_INST_2x(vcompresspd, Vcompresspd, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vcompresspd, Vcompresspd, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vcompresspd, Vcompresspd, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vcompresspd, Vcompresspd, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vcompresspd, Vcompresspd, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vcompresspd, Vcompresspd, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vcompressps, Vcompressps, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vcompressps, Vcompressps, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vcompressps, Vcompressps, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vcompressps, Vcompressps, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vcompressps, Vcompressps, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vcompressps, Vcompressps, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Ymm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Zmm, Ymm) // AVX512_F{kz|b32} - ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Zmm, Mem) // AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Xmm, Xmm, Xmm) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Xmm, Xmm, Mem) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Ymm, Ymm, Ymm) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Ymm, Ymm, Mem) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Zmm, Zmm, Zmm) // AVX512_BF16{kz|b32} - ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Zmm, Zmm, Mem) // AVX512_BF16{kz|b32} - ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Xmm, Xmm) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Xmm, Ymm) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Xmm, Mem) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Ymm, Zmm) // AVX512_BF16{kz|b32} - ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Ymm, Mem) // AVX512_BF16{kz|b32} - ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Xmm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Ymm, Zmm) // AVX512_F{kz|er|b64} - ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Ymm, Mem) // AVX512_F{kz|er|b64} - ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Xmm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Ymm, Zmm) // AVX512_F{kz|er|b64} - ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Ymm, Mem) // AVX512_F{kz|er|b64} - ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Xmm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Ymm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Zmm, Zmm) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Zmm, Mem) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Xmm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Ymm, Zmm) // AVX512_F{kz|er|b64} - ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Ymm, Mem) // AVX512_F{kz|er|b64} - ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Xmm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Ymm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Zmm, Zmm) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Zmm, Mem) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Xmm, Xmm) // F16C AVX512_F{kz}-VL - ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Xmm, Mem) // F16C AVX512_F{kz}-VL - ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Ymm, Xmm) // F16C AVX512_F{kz}-VL - ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Ymm, Mem) // F16C AVX512_F{kz}-VL - ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Zmm, Ymm) // AVX512_F{kz|sae} - ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Zmm, Mem) // AVX512_F{kz|sae} - ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Zmm, Mem) // AVX512_F{kz|er|b32} - ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Ymm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Zmm, Ymm) // AVX512_F{kz|er|b32} - ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Zmm, Mem) // AVX512_F{kz|er|b32} - ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Xmm, Xmm, Imm) // F16C AVX512_F{kz}-VL - ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Mem, Xmm, Imm) // F16C AVX512_F{kz}-VL - ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Xmm, Ymm, Imm) // F16C AVX512_F{kz}-VL - ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Mem, Ymm, Imm) // F16C AVX512_F{kz}-VL - ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Ymm, Zmm, Imm) // AVX512_F{kz|sae} - ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Mem, Zmm, Imm) // AVX512_F{kz|sae} - ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Xmm, Xmm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Xmm, Mem) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Ymm, Xmm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Ymm, Mem) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Zmm, Ymm) // AVX512_DQ{kz|er|b32} - ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Zmm, Mem) // AVX512_DQ{kz|er|b32} - ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Zmm, Mem) // AVX512_F{kz|er|b32} - ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Xmm, Xmm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Xmm, Mem) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Ymm, Xmm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Ymm, Mem) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Zmm, Ymm) // AVX512_DQ{kz|er|b32} - ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Zmm, Mem) // AVX512_DQ{kz|er|b32} - ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Xmm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Ymm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Zmm, Zmm) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Zmm, Mem) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Xmm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Xmm, Ymm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Ymm, Zmm) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Ymm, Mem) // AVX512_DQ{kz|er|b64} + ASMJIT_INST_2x(vcompresspd, Vcompresspd, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vcompresspd, Vcompresspd, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vcompressps, Vcompressps, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vcompressps, Vcompressps, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Vec, Vec, Vec) // AVX512_BF16{kz|b32} + ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Vec, Vec, Mem) // AVX512_BF16{kz|b32} + ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Vec, Vec) // AVX512_BF16{kz|b32} + ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Vec, Mem) // AVX512_BF16{kz|b32} + ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Vec, Vec) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Vec, Mem) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Vec, Vec) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Vec, Mem) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Vec, Vec) // F16C AVX512_F{kz} + ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Vec, Mem) // F16C AVX512_F{kz} + ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Vec, Vec, Imm) // F16C AVX512_F{kz} + ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Mem, Vec, Imm) // F16C AVX512_F{kz} + ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Vec, Vec) // AVX512_DQ{kz|b32} + ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Vec, Mem) // AVX512_DQ{kz|b32} + ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Vec, Vec) // AVX512_DQ{kz|b32} + ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Vec, Mem) // AVX512_DQ{kz|b32} + ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Vec, Vec) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Vec, Mem) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Vec, Vec) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Vec, Mem) // AVX512_DQ{kz|b64} ASMJIT_INST_2x(vcvtsd2si, Vcvtsd2si, Gp, Xmm) // AVX AVX512_F{er} ASMJIT_INST_2x(vcvtsd2si, Vcvtsd2si, Gp, Mem) // AVX AVX512_F{er} ASMJIT_INST_3x(vcvtsd2ss, Vcvtsd2ss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} @@ -2365,52 +2373,22 @@ struct EmitterExplicitT { ASMJIT_INST_2x(vcvtss2si, Vcvtss2si, Gp, Mem) // AVX AVX512_F{er} ASMJIT_INST_2x(vcvtss2usi, Vcvtss2usi, Gp, Xmm) // AVX512_F{er} ASMJIT_INST_2x(vcvtss2usi, Vcvtss2usi, Gp, Mem) // AVX512_F{er} - ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Xmm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Ymm, Zmm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Ymm, Mem) // AVX512_F{kz|sae|b64} - ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Zmm, Zmm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Zmm, Mem) // AVX512_F{kz|sae|b64} - ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Xmm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Ymm, Zmm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Ymm, Mem) // AVX512_F{kz|sae|b64} - ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Xmm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Ymm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Zmm, Zmm) // AVX512_DQ{kz|sae|b64} - ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Zmm, Mem) // AVX512_DQ{kz|sae|b64} - ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Zmm, Zmm) // AVX512_F{kz|sae|b32} - ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Zmm, Mem) // AVX512_F{kz|sae|b32} - ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Xmm, Xmm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Xmm, Mem) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Ymm, Xmm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Ymm, Mem) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Zmm, Ymm) // AVX512_DQ{kz|sae|b32} - ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Zmm, Mem) // AVX512_DQ{kz|sae|b32} - ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Zmm, Zmm) // AVX512_F{kz|sae|b32} - ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Zmm, Mem) // AVX512_F{kz|sae|b32} - ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Xmm, Xmm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Xmm, Mem) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Ymm, Xmm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Ymm, Mem) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Zmm, Ymm) // AVX512_DQ{kz|sae|b32} - ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Zmm, Mem) // AVX512_DQ{kz|sae|b32} + ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Vec, Vec) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Vec, Mem) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Vec, Vec) // AVX512_DQ{kz|b32} + ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Vec, Mem) // AVX512_DQ{kz|b32} + ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Vec, Vec) // AVX512_DQ{kz|b32} + ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Vec, Mem) // AVX512_DQ{kz|b32} ASMJIT_INST_2x(vcvttsd2si, Vcvttsd2si, Gp, Xmm) // AVX AVX512_F{sae} ASMJIT_INST_2x(vcvttsd2si, Vcvttsd2si, Gp, Mem) // AVX AVX512_F{sae} ASMJIT_INST_2x(vcvttsd2usi, Vcvttsd2usi, Gp, Xmm) // AVX512_F{sae} @@ -2419,415 +2397,204 @@ struct EmitterExplicitT { ASMJIT_INST_2x(vcvttss2si, Vcvttss2si, Gp, Mem) // AVX AVX512_F{sae} ASMJIT_INST_2x(vcvttss2usi, Vcvttss2usi, Gp, Xmm) // AVX512_F{sae} ASMJIT_INST_2x(vcvttss2usi, Vcvttss2usi, Gp, Mem) // AVX512_F{sae} - ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Ymm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Zmm, Ymm) // AVX512_F{kz|b32} - ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Zmm, Mem) // AVX512_F{kz|er|b32} - ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Xmm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Ymm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Zmm, Zmm) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Zmm, Mem) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Xmm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Xmm, Ymm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Ymm, Zmm) // AVX512_DQ{kz|er|b64} - ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Ymm, Mem) // AVX512_DQ{kz|er|b64} + ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Vec, Vec) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Vec, Mem) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Vec, Vec) // AVX512_DQ{kz|b64} + ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Vec, Mem) // AVX512_DQ{kz|b64} ASMJIT_INST_3x(vcvtusi2sd, Vcvtusi2sd, Xmm, Xmm, Gp) // AVX512_F{er} ASMJIT_INST_3x(vcvtusi2sd, Vcvtusi2sd, Xmm, Xmm, Mem) // AVX512_F{er} ASMJIT_INST_3x(vcvtusi2ss, Vcvtusi2ss, Xmm, Xmm, Gp) // AVX512_F{er} ASMJIT_INST_3x(vcvtusi2ss, Vcvtusi2ss, Xmm, Xmm, Mem) // AVX512_F{er} - ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Xmm, Xmm, Xmm, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Xmm, Xmm, Mem, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Ymm, Ymm, Ymm, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Ymm, Ymm, Mem, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Zmm, Zmm, Zmm, Imm) // AVX512_BW{kz} - ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Zmm, Zmm, Mem, Imm) // AVX512_BW{kz} - ASMJIT_INST_3x(vdivpd, Vdivpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vdivpd, Vdivpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vdivpd, Vdivpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vdivpd, Vdivpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vdivpd, Vdivpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vdivpd, Vdivpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vdivps, Vdivps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vdivps, Vdivps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vdivps, Vdivps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vdivps, Vdivps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vdivps, Vdivps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vdivps, Vdivps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32} + ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Vec, Vec, Vec, Imm) // AVX512_BW{kz} + ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Vec, Vec, Mem, Imm) // AVX512_BW{kz} + ASMJIT_INST_3x(vdivpd, Vdivpd, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vdivpd, Vdivpd, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vdivps, Vdivps, Vec, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vdivps, Vdivps, Vec, Vec, Mem) // AVX AVX512_F{kz|b32} ASMJIT_INST_3x(vdivsd, Vdivsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vdivsd, Vdivsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vdivss, Vdivss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vdivss, Vdivss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} - ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Xmm, Xmm, Xmm) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Xmm, Xmm, Mem) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Ymm, Ymm, Ymm) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Ymm, Ymm, Mem) // AVX512_BF16{kz|b32}-VL - ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Zmm, Zmm, Zmm) // AVX512_BF16{kz|b32} - ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Zmm, Zmm, Mem) // AVX512_BF16{kz|b32} - ASMJIT_INST_4i(vdppd, Vdppd, Xmm, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_4i(vdppd, Vdppd, Xmm, Xmm, Mem, Imm) // AVX - ASMJIT_INST_4i(vdpps, Vdpps, Xmm, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_4i(vdpps, Vdpps, Xmm, Xmm, Mem, Imm) // AVX - ASMJIT_INST_4i(vdpps, Vdpps, Ymm, Ymm, Ymm, Imm) // AVX - ASMJIT_INST_4i(vdpps, Vdpps, Ymm, Ymm, Mem, Imm) // AVX - ASMJIT_INST_2x(vexp2pd, Vexp2pd, Zmm, Zmm) // AVX512_ER{kz|sae|b64} - ASMJIT_INST_2x(vexp2pd, Vexp2pd, Zmm, Mem) // AVX512_ER{kz|sae|b64} - ASMJIT_INST_2x(vexp2ps, Vexp2ps, Zmm, Zmm) // AVX512_ER{kz|sae|b32} - ASMJIT_INST_2x(vexp2ps, Vexp2ps, Zmm, Mem) // AVX512_ER{kz|sae|b32} - ASMJIT_INST_2x(vexpandpd, Vexpandpd, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vexpandpd, Vexpandpd, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vexpandpd, Vexpandpd, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vexpandpd, Vexpandpd, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vexpandpd, Vexpandpd, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vexpandpd, Vexpandpd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vexpandps, Vexpandps, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vexpandps, Vexpandps, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vexpandps, Vexpandps, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vexpandps, Vexpandps, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vexpandps, Vexpandps, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vexpandps, Vexpandps, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_3i(vextractf128, Vextractf128, Xmm, Ymm, Imm) // AVX - ASMJIT_INST_3i(vextractf128, Vextractf128, Mem, Ymm, Imm) // AVX - ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Xmm, Ymm, Imm) // AVX512_F{kz}-VL - ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Mem, Ymm, Imm) // AVX512_F{kz}-VL - ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Xmm, Zmm, Imm) // AVX512_F{kz} - ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Mem, Zmm, Imm) // AVX512_F{kz} - ASMJIT_INST_3i(vextractf32x8, Vextractf32x8, Ymm, Zmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_3i(vextractf32x8, Vextractf32x8, Mem, Zmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Xmm, Ymm, Imm) // AVX512_DQ{kz}-VL - ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Mem, Ymm, Imm) // AVX512_DQ{kz}-VL - ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Xmm, Zmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Mem, Zmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_3i(vextractf64x4, Vextractf64x4, Ymm, Zmm, Imm) // AVX512_F{kz} - ASMJIT_INST_3i(vextractf64x4, Vextractf64x4, Mem, Zmm, Imm) // AVX512_F{kz} - ASMJIT_INST_3i(vextracti128, Vextracti128, Xmm, Ymm, Imm) // AVX2 - ASMJIT_INST_3i(vextracti128, Vextracti128, Mem, Ymm, Imm) // AVX2 - ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Xmm, Ymm, Imm) // AVX512_F{kz}-VL - ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Mem, Ymm, Imm) // AVX512_F{kz}-VL - ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Xmm, Zmm, Imm) // AVX512_F{kz} - ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Mem, Zmm, Imm) // AVX512_F{kz} - ASMJIT_INST_3i(vextracti32x8, Vextracti32x8, Ymm, Zmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_3i(vextracti32x8, Vextracti32x8, Mem, Zmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Xmm, Ymm, Imm) // AVX512_DQ{kz}-VL - ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Mem, Ymm, Imm) // AVX512_DQ{kz}-VL - ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Xmm, Zmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Mem, Zmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_3i(vextracti64x4, Vextracti64x4, Ymm, Zmm, Imm) // AVX512_F{kz} - ASMJIT_INST_3i(vextracti64x4, Vextracti64x4, Mem, Zmm, Imm) // AVX512_F{kz} + ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Vec, Vec, Vec) // AVX512_BF16{kz|b32} + ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Vec, Vec, Mem) // AVX512_BF16{kz|b32} + ASMJIT_INST_4i(vdppd, Vdppd, Vec, Vec, Vec, Imm) // AVX + ASMJIT_INST_4i(vdppd, Vdppd, Vec, Vec, Mem, Imm) // AVX + ASMJIT_INST_4i(vdpps, Vdpps, Vec, Vec, Vec, Imm) // AVX + ASMJIT_INST_4i(vdpps, Vdpps, Vec, Vec, Mem, Imm) // AVX + ASMJIT_INST_2x(vexp2pd, Vexp2pd, Vec, Vec) // AVX512_ER{kz|sae|b64} + ASMJIT_INST_2x(vexp2pd, Vexp2pd, Vec, Mem) // AVX512_ER{kz|sae|b64} + ASMJIT_INST_2x(vexp2ps, Vexp2ps, Vec, Vec) // AVX512_ER{kz|sae|b32} + ASMJIT_INST_2x(vexp2ps, Vexp2ps, Vec, Mem) // AVX512_ER{kz|sae|b32} + ASMJIT_INST_2x(vexpandpd, Vexpandpd, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vexpandpd, Vexpandpd, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vexpandps, Vexpandps, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vexpandps, Vexpandps, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_3i(vextractf128, Vextractf128, Vec, Vec, Imm) // AVX + ASMJIT_INST_3i(vextractf128, Vextractf128, Mem, Vec, Imm) // AVX + ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Vec, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Mem, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_3i(vextractf32x8, Vextractf32x8, Vec, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_3i(vextractf32x8, Vextractf32x8, Mem, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Vec, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Mem, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_3i(vextractf64x4, Vextractf64x4, Vec, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_3i(vextractf64x4, Vextractf64x4, Mem, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_3i(vextracti128, Vextracti128, Vec, Vec, Imm) // AVX2 + ASMJIT_INST_3i(vextracti128, Vextracti128, Mem, Vec, Imm) // AVX2 + ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Vec, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Mem, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_3i(vextracti32x8, Vextracti32x8, Vec, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_3i(vextracti32x8, Vextracti32x8, Mem, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Vec, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Mem, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_3i(vextracti64x4, Vextracti64x4, Vec, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_3i(vextracti64x4, Vextracti64x4, Mem, Vec, Imm) // AVX512_F{kz} ASMJIT_INST_3i(vextractps, Vextractps, Gp, Xmm, Imm) // AVX AVX512_F ASMJIT_INST_3i(vextractps, Vextractps, Mem, Xmm, Imm) // AVX AVX512_F - ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b32} - ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|sae|b32} + ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Vec, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Vec, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Vec, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Vec, Vec, Mem, Imm) // AVX512_F{kz|b32} ASMJIT_INST_4i(vfixupimmsd, Vfixupimmsd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae} ASMJIT_INST_4i(vfixupimmsd, Vfixupimmsd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae} ASMJIT_INST_4i(vfixupimmss, Vfixupimmss, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae} ASMJIT_INST_4i(vfixupimmss, Vfixupimmss, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae} - ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfmadd132sd, Vfmadd132sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmadd132sd, Vfmadd132sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmadd132ss, Vfmadd132ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmadd132ss, Vfmadd132ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfmadd213sd, Vfmadd213sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmadd213sd, Vfmadd213sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmadd213ss, Vfmadd213ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmadd213ss, Vfmadd213ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfmadd231sd, Vfmadd231sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmadd231sd, Vfmadd231sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmadd231ss, Vfmadd231ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmadd231ss, Vfmadd231ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfmsub132sd, Vfmsub132sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmsub132sd, Vfmsub132sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmsub132ss, Vfmsub132ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmsub132ss, Vfmsub132ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfmsub213sd, Vfmsub213sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmsub213sd, Vfmsub213sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmsub213ss, Vfmsub213ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmsub213ss, Vfmsub213ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfmsub231sd, Vfmsub231sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmsub231sd, Vfmsub231sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmsub231ss, Vfmsub231ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfmsub231ss, Vfmsub231ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfnmadd132sd, Vfnmadd132sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmadd132sd, Vfnmadd132sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmadd132ss, Vfnmadd132ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmadd132ss, Vfnmadd132ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfnmadd213sd, Vfnmadd213sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmadd213sd, Vfnmadd213sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmadd213ss, Vfnmadd213ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmadd213ss, Vfnmadd213ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfnmadd231sd, Vfnmadd231sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmadd231sd, Vfnmadd231sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmadd231ss, Vfnmadd231ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmadd231ss, Vfnmadd231ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfnmsub132sd, Vfnmsub132sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmsub132sd, Vfnmsub132sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmsub132ss, Vfnmsub132ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmsub132ss, Vfnmsub132ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfnmsub213sd, Vfnmsub213sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmsub213sd, Vfnmsub213sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmsub213ss, Vfnmsub213ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmsub213ss, Vfnmsub213ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Vec, Vec, Vec) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Vec, Vec, Mem) // FMA AVX512_F{kz|b64} + ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Vec, Vec, Vec) // FMA AVX512_F{kz|b32} + ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Vec, Vec, Mem) // FMA AVX512_F{kz|b32} ASMJIT_INST_3x(vfnmsub231sd, Vfnmsub231sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmsub231sd, Vfnmsub231sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmsub231ss, Vfnmsub231ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er} ASMJIT_INST_3x(vfnmsub231ss, Vfnmsub231ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er} - ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Xmm, Imm) // AVX512_DQ{k|b64}-VL - ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Mem, Imm) // AVX512_DQ{k|b64} AVX512_DQ{k|b64}-VL - ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Ymm, Imm) // AVX512_DQ{k|b64}-VL - ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Zmm, Imm) // AVX512_DQ{k|b64} - ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Xmm, Imm) // AVX512_DQ{k|b32}-VL - ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Mem, Imm) // AVX512_DQ{k|b32} AVX512_DQ{k|b32}-VL - ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Ymm, Imm) // AVX512_DQ{k|b32}-VL - ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Zmm, Imm) // AVX512_DQ{k|b32} + ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Vec, Imm) // AVX512_DQ{k|b64} + ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Mem, Imm) // AVX512_DQ{k|b64} + ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Vec, Imm) // AVX512_DQ{k|b32} + ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Mem, Imm) // AVX512_DQ{k|b32} ASMJIT_INST_3i(vfpclasssd, Vfpclasssd, KReg, Xmm, Imm) // AVX512_DQ{k} ASMJIT_INST_3i(vfpclasssd, Vfpclasssd, KReg, Mem, Imm) // AVX512_DQ{k} ASMJIT_INST_3i(vfpclassss, Vfpclassss, KReg, Xmm, Imm) // AVX512_DQ{k} ASMJIT_INST_3i(vfpclassss, Vfpclassss, KReg, Mem, Imm) // AVX512_DQ{k} - ASMJIT_INST_3x(vgatherdpd, Vgatherdpd, Xmm, Mem, Xmm) // AVX2 - ASMJIT_INST_3x(vgatherdpd, Vgatherdpd, Ymm, Mem, Ymm) // AVX2 - ASMJIT_INST_2x(vgatherdpd, Vgatherdpd, Xmm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vgatherdpd, Vgatherdpd, Ymm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vgatherdpd, Vgatherdpd, Zmm, Mem) // AVX512_F{k} - ASMJIT_INST_3x(vgatherdps, Vgatherdps, Xmm, Mem, Xmm) // AVX2 - ASMJIT_INST_3x(vgatherdps, Vgatherdps, Ymm, Mem, Ymm) // AVX2 - ASMJIT_INST_2x(vgatherdps, Vgatherdps, Xmm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vgatherdps, Vgatherdps, Ymm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vgatherdps, Vgatherdps, Zmm, Mem) // AVX512_F{k} + ASMJIT_INST_2x(vgatherdpd, Vgatherdpd, Vec, Mem) // AVX512_F{k} + ASMJIT_INST_3x(vgatherdpd, Vgatherdpd, Vec, Mem, Vec) // AVX2 + ASMJIT_INST_2x(vgatherdps, Vgatherdps, Vec, Mem) // AVX512_F{k} + ASMJIT_INST_3x(vgatherdps, Vgatherdps, Vec, Mem, Vec) // AVX2 ASMJIT_INST_1x(vgatherpf0dpd, Vgatherpf0dpd, Mem) // AVX512_PF{k} ASMJIT_INST_1x(vgatherpf0dps, Vgatherpf0dps, Mem) // AVX512_PF{k} ASMJIT_INST_1x(vgatherpf0qpd, Vgatherpf0qpd, Mem) // AVX512_PF{k} @@ -2836,249 +2603,121 @@ struct EmitterExplicitT { ASMJIT_INST_1x(vgatherpf1dps, Vgatherpf1dps, Mem) // AVX512_PF{k} ASMJIT_INST_1x(vgatherpf1qpd, Vgatherpf1qpd, Mem) // AVX512_PF{k} ASMJIT_INST_1x(vgatherpf1qps, Vgatherpf1qps, Mem) // AVX512_PF{k} - ASMJIT_INST_3x(vgatherqpd, Vgatherqpd, Xmm, Mem, Xmm) // AVX2 - ASMJIT_INST_3x(vgatherqpd, Vgatherqpd, Ymm, Mem, Ymm) // AVX2 - ASMJIT_INST_2x(vgatherqpd, Vgatherqpd, Xmm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vgatherqpd, Vgatherqpd, Ymm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vgatherqpd, Vgatherqpd, Zmm, Mem) // AVX512_F{k} - ASMJIT_INST_3x(vgatherqps, Vgatherqps, Xmm, Mem, Xmm) // AVX2 - ASMJIT_INST_2x(vgatherqps, Vgatherqps, Xmm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vgatherqps, Vgatherqps, Ymm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vgatherqps, Vgatherqps, Zmm, Mem) // AVX512_F{k} - ASMJIT_INST_2x(vgetexppd, Vgetexppd, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vgetexppd, Vgetexppd, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vgetexppd, Vgetexppd, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vgetexppd, Vgetexppd, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vgetexppd, Vgetexppd, Zmm, Zmm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_2x(vgetexppd, Vgetexppd, Zmm, Mem) // AVX512_F{kz|sae|b64} - ASMJIT_INST_2x(vgetexpps, Vgetexpps, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vgetexpps, Vgetexpps, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vgetexpps, Vgetexpps, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vgetexpps, Vgetexpps, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vgetexpps, Vgetexpps, Zmm, Zmm) // AVX512_F{kz|sae|b32} - ASMJIT_INST_2x(vgetexpps, Vgetexpps, Zmm, Mem) // AVX512_F{kz|sae|b32} + ASMJIT_INST_2x(vgatherqpd, Vgatherqpd, Vec, Mem) // AVX512_F{k} + ASMJIT_INST_3x(vgatherqpd, Vgatherqpd, Vec, Mem, Vec) // AVX2 + ASMJIT_INST_2x(vgatherqps, Vgatherqps, Vec, Mem) // AVX512_F{k} + ASMJIT_INST_3x(vgatherqps, Vgatherqps, Vec, Mem, Vec) // AVX2 + ASMJIT_INST_2x(vgetexppd, Vgetexppd, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vgetexppd, Vgetexppd, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vgetexpps, Vgetexpps, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vgetexpps, Vgetexpps, Vec, Mem) // AVX512_F{kz|b32} ASMJIT_INST_3x(vgetexpsd, Vgetexpsd, Xmm, Xmm, Xmm) // AVX512_F{kz|sae} ASMJIT_INST_3x(vgetexpsd, Vgetexpsd, Xmm, Xmm, Mem) // AVX512_F{kz|sae} ASMJIT_INST_3x(vgetexpss, Vgetexpss, Xmm, Xmm, Xmm) // AVX512_F{kz|sae} ASMJIT_INST_3x(vgetexpss, Vgetexpss, Xmm, Xmm, Mem) // AVX512_F{kz|sae} - ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Zmm, Mem, Imm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_3i(vgetmantps, Vgetmantps, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vgetmantps, Vgetmantps, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vgetmantps, Vgetmantps, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vgetmantps, Vgetmantps, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vgetmantps, Vgetmantps, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b32} - ASMJIT_INST_3i(vgetmantps, Vgetmantps, Zmm, Mem, Imm) // AVX512_F{kz|sae|b32} + ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3i(vgetmantps, Vgetmantps, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_3i(vgetmantps, Vgetmantps, Vec, Mem, Imm) // AVX512_F{kz|b32} ASMJIT_INST_4i(vgetmantsd, Vgetmantsd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae} ASMJIT_INST_4i(vgetmantsd, Vgetmantsd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae} ASMJIT_INST_4i(vgetmantss, Vgetmantss, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae} ASMJIT_INST_4i(vgetmantss, Vgetmantss, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae} - ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Xmm,Xmm,Xmm,Imm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Xmm,Xmm,Mem,Imm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Ymm,Ymm,Ymm,Imm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Ymm,Ymm,Mem,Imm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Zmm,Zmm,Zmm,Imm) // AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Zmm,Zmm,Mem,Imm) // AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Xmm, Xmm, Xmm, Imm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Xmm, Xmm, Mem, Imm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Ymm, Ymm, Ymm, Imm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Ymm, Ymm, Mem, Imm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Zmm, Zmm, Zmm, Imm) // AVX512_VL{kz} GFNI - ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Zmm, Zmm, Mem, Imm) // AVX512_VL{kz} GFNI - ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Xmm, Xmm, Xmm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Xmm, Xmm, Mem) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Ymm, Ymm, Ymm) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Ymm, Ymm, Mem) // AVX AVX512_VL{kz} GFNI - ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Zmm, Zmm, Zmm) // AVX512_VL{kz} GFNI - ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Zmm, Zmm, Mem) // AVX512_VL{kz} GFNI - ASMJIT_INST_3x(vhaddpd, Vhaddpd, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vhaddpd, Vhaddpd, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vhaddpd, Vhaddpd, Ymm, Ymm, Ymm) // AVX - ASMJIT_INST_3x(vhaddpd, Vhaddpd, Ymm, Ymm, Mem) // AVX - ASMJIT_INST_3x(vhaddps, Vhaddps, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vhaddps, Vhaddps, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vhaddps, Vhaddps, Ymm, Ymm, Ymm) // AVX - ASMJIT_INST_3x(vhaddps, Vhaddps, Ymm, Ymm, Mem) // AVX - ASMJIT_INST_3x(vhsubpd, Vhsubpd, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vhsubpd, Vhsubpd, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vhsubpd, Vhsubpd, Ymm, Ymm, Ymm) // AVX - ASMJIT_INST_3x(vhsubpd, Vhsubpd, Ymm, Ymm, Mem) // AVX - ASMJIT_INST_3x(vhsubps, Vhsubps, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vhsubps, Vhsubps, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vhsubps, Vhsubps, Ymm, Ymm, Ymm) // AVX - ASMJIT_INST_3x(vhsubps, Vhsubps, Ymm, Ymm, Mem) // AVX - ASMJIT_INST_4i(vinsertf128, Vinsertf128, Ymm, Ymm, Xmm, Imm) // AVX - ASMJIT_INST_4i(vinsertf128, Vinsertf128, Ymm, Ymm, Mem, Imm) // AVX - ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Ymm, Ymm, Xmm, Imm) // AVX512_F{kz}-VL - ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Ymm, Ymm, Mem, Imm) // AVX512_F{kz}-VL - ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Zmm, Zmm, Xmm, Imm) // AVX512_F{kz} - ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz} - ASMJIT_INST_4i(vinsertf32x8, Vinsertf32x8, Zmm, Zmm, Ymm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_4i(vinsertf32x8, Vinsertf32x8, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz} - ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Ymm, Ymm, Xmm, Imm) // AVX512_DQ{kz}-VL - ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Ymm, Ymm, Mem, Imm) // AVX512_DQ{kz}-VL - ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Zmm, Zmm, Xmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz} - ASMJIT_INST_4i(vinsertf64x4, Vinsertf64x4, Zmm, Zmm, Ymm, Imm) // AVX512_F{kz} - ASMJIT_INST_4i(vinsertf64x4, Vinsertf64x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz} - ASMJIT_INST_4i(vinserti128, Vinserti128, Ymm, Ymm, Xmm, Imm) // AVX2 - ASMJIT_INST_4i(vinserti128, Vinserti128, Ymm, Ymm, Mem, Imm) // AVX2 - ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Ymm, Ymm, Xmm, Imm) // AVX512_F{kz}-VL - ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Ymm, Ymm, Mem, Imm) // AVX512_F{kz}-VL - ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Zmm, Zmm, Xmm, Imm) // AVX512_F{kz} - ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz} - ASMJIT_INST_4i(vinserti32x8, Vinserti32x8, Zmm, Zmm, Ymm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_4i(vinserti32x8, Vinserti32x8, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz} - ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Ymm, Ymm, Xmm, Imm) // AVX512_DQ{kz}-VL - ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Ymm, Ymm, Mem, Imm) // AVX512_DQ{kz}-VL - ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Zmm, Zmm, Xmm, Imm) // AVX512_DQ{kz} - ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz} - ASMJIT_INST_4i(vinserti64x4, Vinserti64x4, Zmm, Zmm, Ymm, Imm) // AVX512_F{kz} - ASMJIT_INST_4i(vinserti64x4, Vinserti64x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz} + ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Vec,Vec,Vec,Imm) // AVX AVX512_VL{kz} GFNI + ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Vec,Vec,Mem,Imm) // AVX AVX512_VL{kz} GFNI + ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Vec, Vec, Vec, Imm) // AVX AVX512_VL{kz} GFNI + ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Vec, Vec, Mem, Imm) // AVX AVX512_VL{kz} GFNI + ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Vec, Vec, Vec) // AVX AVX512_VL{kz} GFNI + ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Vec, Vec, Mem) // AVX AVX512_VL{kz} GFNI + ASMJIT_INST_3x(vhaddpd, Vhaddpd, Vec, Vec, Vec) // AVX + ASMJIT_INST_3x(vhaddpd, Vhaddpd, Vec, Vec, Mem) // AVX + ASMJIT_INST_3x(vhaddps, Vhaddps, Vec, Vec, Vec) // AVX + ASMJIT_INST_3x(vhaddps, Vhaddps, Vec, Vec, Mem) // AVX + ASMJIT_INST_3x(vhsubpd, Vhsubpd, Vec, Vec, Vec) // AVX + ASMJIT_INST_3x(vhsubpd, Vhsubpd, Vec, Vec, Mem) // AVX + ASMJIT_INST_3x(vhsubps, Vhsubps, Vec, Vec, Vec) // AVX + ASMJIT_INST_3x(vhsubps, Vhsubps, Vec, Vec, Mem) // AVX + ASMJIT_INST_4i(vinsertf128, Vinsertf128, Vec, Vec, Vec, Imm) // AVX + ASMJIT_INST_4i(vinsertf128, Vinsertf128, Vec, Vec, Mem, Imm) // AVX + ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Vec, Vec, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Vec, Vec, Mem, Imm) // AVX512_F{kz} + ASMJIT_INST_4i(vinsertf32x8, Vinsertf32x8, Vec, Vec, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_4i(vinsertf32x8, Vinsertf32x8, Vec, Vec, Mem, Imm) // AVX512_DQ{kz} + ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Vec, Vec, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Vec, Vec, Mem, Imm) // AVX512_DQ{kz} + ASMJIT_INST_4i(vinsertf64x4, Vinsertf64x4, Vec, Vec, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_4i(vinsertf64x4, Vinsertf64x4, Vec, Vec, Mem, Imm) // AVX512_F{kz} + ASMJIT_INST_4i(vinserti128, Vinserti128, Vec, Vec, Vec, Imm) // AVX2 + ASMJIT_INST_4i(vinserti128, Vinserti128, Vec, Vec, Mem, Imm) // AVX2 + ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Vec, Vec, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Vec, Vec, Mem, Imm) // AVX512_F{kz} + ASMJIT_INST_4i(vinserti32x8, Vinserti32x8, Vec, Vec, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_4i(vinserti32x8, Vinserti32x8, Vec, Vec, Mem, Imm) // AVX512_DQ{kz} + ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Vec, Vec, Vec, Imm) // AVX512_DQ{kz} + ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Vec, Vec, Mem, Imm) // AVX512_DQ{kz} + ASMJIT_INST_4i(vinserti64x4, Vinserti64x4, Vec, Vec, Vec, Imm) // AVX512_F{kz} + ASMJIT_INST_4i(vinserti64x4, Vinserti64x4, Vec, Vec, Mem, Imm) // AVX512_F{kz} ASMJIT_INST_4i(vinsertps, Vinsertps, Xmm, Xmm, Xmm, Imm) // AVX AVX512_F ASMJIT_INST_4i(vinsertps, Vinsertps, Xmm, Xmm, Mem, Imm) // AVX AVX512_F - ASMJIT_INST_2x(vlddqu, Vlddqu, Xmm, Mem) // AVX - ASMJIT_INST_2x(vlddqu, Vlddqu, Ymm, Mem) // AVX + ASMJIT_INST_2x(vlddqu, Vlddqu, Vec, Mem) // AVX ASMJIT_INST_1x(vldmxcsr, Vldmxcsr, Mem) // AVX - ASMJIT_INST_3x(vmaskmovdqu, Vmaskmovdqu, Xmm, Xmm, DS_ZDI) // AVX [EXPLICIT] - ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Mem, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Mem, Ymm, Ymm) // AVX - ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Ymm, Ymm, Mem) // AVX - ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Mem, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Mem, Ymm, Ymm) // AVX - ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Ymm, Ymm, Mem) // AVX - ASMJIT_INST_3x(vmaxpd, Vmaxpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vmaxpd, Vmaxpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vmaxpd, Vmaxpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vmaxpd, Vmaxpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vmaxpd, Vmaxpd, Zmm, Zmm, Zmm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_3x(vmaxpd, Vmaxpd, Zmm, Zmm, Mem) // AVX512_F{kz|sae|b64} - ASMJIT_INST_3x(vmaxps, Vmaxps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vmaxps, Vmaxps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vmaxps, Vmaxps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vmaxps, Vmaxps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vmaxps, Vmaxps, Zmm, Zmm, Zmm) // AVX512_F{kz|sae|b32} - ASMJIT_INST_3x(vmaxps, Vmaxps, Zmm, Zmm, Mem) // AVX512_F{kz|sae|b32} - ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae}-VL - ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae}-VL - ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae}-VL - ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae}-VL - ASMJIT_INST_3x(vminpd, Vminpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vminpd, Vminpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vminpd, Vminpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vminpd, Vminpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vminpd, Vminpd, Zmm, Zmm, Zmm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_3x(vminpd, Vminpd, Zmm, Zmm, Mem) // AVX512_F{kz|sae|b64} - ASMJIT_INST_3x(vminps, Vminps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vminps, Vminps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vminps, Vminps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vminps, Vminps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vminps, Vminps, Zmm, Zmm, Zmm) // AVX512_F{kz|sae|b32} - ASMJIT_INST_3x(vminps, Vminps, Zmm, Zmm, Mem) // AVX512_F{kz|sae|b32} - ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae}-VL - ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae}-VL - ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae}-VL - ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae}-VL - ASMJIT_INST_2x(vmovapd, Vmovapd, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovapd, Vmovapd, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovapd, Vmovapd, Mem, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovapd, Vmovapd, Ymm, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovapd, Vmovapd, Ymm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovapd, Vmovapd, Mem, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovapd, Vmovapd, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovapd, Vmovapd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovapd, Vmovapd, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovaps, Vmovaps, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovaps, Vmovaps, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovaps, Vmovaps, Mem, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovaps, Vmovaps, Ymm, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovaps, Vmovaps, Ymm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovaps, Vmovaps, Mem, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovaps, Vmovaps, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovaps, Vmovaps, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovaps, Vmovaps, Mem, Zmm) // AVX512_F{kz} + ASMJIT_INST_3x(vmaskmovdqu, Vmaskmovdqu, Vec, Vec, DS_ZDI) // AVX [EXPLICIT] + ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Mem, Vec, Vec) // AVX + ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Vec, Vec, Mem) // AVX + ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Mem, Vec, Vec) // AVX + ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Vec, Vec, Mem) // AVX + ASMJIT_INST_3x(vmaxpd, Vmaxpd, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vmaxpd, Vmaxpd, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vmaxps, Vmaxps, Vec, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vmaxps, Vmaxps, Vec, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae} + ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae} + ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae} + ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae} + ASMJIT_INST_3x(vminpd, Vminpd, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vminpd, Vminpd, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vminps, Vminps, Vec, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vminps, Vminps, Vec, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae} + ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae} + ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae} + ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae} + ASMJIT_INST_2x(vmovapd, Vmovapd, Vec, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovapd, Vmovapd, Vec, Mem) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovapd, Vmovapd, Mem, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovaps, Vmovaps, Vec, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovaps, Vmovaps, Vec, Mem) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovaps, Vmovaps, Mem, Vec) // AVX AVX512_F{kz} ASMJIT_INST_2x(vmovd, Vmovd, Gp, Xmm) // AVX AVX512_F ASMJIT_INST_2x(vmovd, Vmovd, Mem, Xmm) // AVX AVX512_F ASMJIT_INST_2x(vmovd, Vmovd, Xmm, Gp) // AVX AVX512_F ASMJIT_INST_2x(vmovd, Vmovd, Xmm, Mem) // AVX AVX512_F - ASMJIT_INST_2x(vmovddup, Vmovddup, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovddup, Vmovddup, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovddup, Vmovddup, Ymm, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovddup, Vmovddup, Ymm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovddup, Vmovddup, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovddup, Vmovddup, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqa, Vmovdqa, Xmm, Xmm) // AVX - ASMJIT_INST_2x(vmovdqa, Vmovdqa, Xmm, Mem) // AVX - ASMJIT_INST_2x(vmovdqa, Vmovdqa, Mem, Xmm) // AVX - ASMJIT_INST_2x(vmovdqa, Vmovdqa, Ymm, Ymm) // AVX - ASMJIT_INST_2x(vmovdqa, Vmovdqa, Ymm, Mem) // AVX - ASMJIT_INST_2x(vmovdqa, Vmovdqa, Mem, Ymm) // AVX - ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqu, Vmovdqu, Xmm, Xmm) // AVX - ASMJIT_INST_2x(vmovdqu, Vmovdqu, Xmm, Mem) // AVX - ASMJIT_INST_2x(vmovdqu, Vmovdqu, Mem, Xmm) // AVX - ASMJIT_INST_2x(vmovdqu, Vmovdqu, Ymm, Ymm) // AVX - ASMJIT_INST_2x(vmovdqu, Vmovdqu, Ymm, Mem) // AVX - ASMJIT_INST_2x(vmovdqu, Vmovdqu, Mem, Ymm) // AVX - ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Mem, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Mem, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Mem, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Mem, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Mem, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Mem, Zmm) // AVX512_BW{kz} + ASMJIT_INST_2x(vmovddup, Vmovddup, Vec, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovddup, Vmovddup, Vec, Mem) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovdqa, Vmovdqa, Vec, Vec) // AVX + ASMJIT_INST_2x(vmovdqa, Vmovdqa, Vec, Mem) // AVX + ASMJIT_INST_2x(vmovdqa, Vmovdqa, Mem, Vec) // AVX + ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqu, Vmovdqu, Vec, Vec) // AVX + ASMJIT_INST_2x(vmovdqu, Vmovdqu, Vec, Mem) // AVX + ASMJIT_INST_2x(vmovdqu, Vmovdqu, Mem, Vec) // AVX + ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Mem, Vec) // AVX512_BW{kz} + ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Mem, Vec) // AVX512_BW{kz} ASMJIT_INST_3x(vmovhlps, Vmovhlps, Xmm, Xmm, Xmm) // AVX AVX512_F ASMJIT_INST_2x(vmovhpd, Vmovhpd, Mem, Xmm) // AVX AVX512_F ASMJIT_INST_3x(vmovhpd, Vmovhpd, Xmm, Xmm, Mem) // AVX AVX512_F @@ -3089,22 +2728,12 @@ struct EmitterExplicitT { ASMJIT_INST_3x(vmovlpd, Vmovlpd, Xmm, Xmm, Mem) // AVX AVX512_F ASMJIT_INST_2x(vmovlps, Vmovlps, Mem, Xmm) // AVX AVX512_F ASMJIT_INST_3x(vmovlps, Vmovlps, Xmm, Xmm, Mem) // AVX AVX512_F - ASMJIT_INST_2x(vmovmskpd, Vmovmskpd, Gp, Xmm) // AVX - ASMJIT_INST_2x(vmovmskpd, Vmovmskpd, Gp, Ymm) // AVX - ASMJIT_INST_2x(vmovmskps, Vmovmskps, Gp, Xmm) // AVX - ASMJIT_INST_2x(vmovmskps, Vmovmskps, Gp, Ymm) // AVX - ASMJIT_INST_2x(vmovntdq, Vmovntdq, Mem, Xmm) // AVX AVX512_F-VL - ASMJIT_INST_2x(vmovntdq, Vmovntdq, Mem, Ymm) // AVX AVX512_F-VL - ASMJIT_INST_2x(vmovntdq, Vmovntdq, Mem, Zmm) // AVX512_F - ASMJIT_INST_2x(vmovntdqa, Vmovntdqa, Xmm, Mem) // AVX AVX512_F-VL - ASMJIT_INST_2x(vmovntdqa, Vmovntdqa, Ymm, Mem) // AVX2 AVX512_F-VL - ASMJIT_INST_2x(vmovntdqa, Vmovntdqa, Zmm, Mem) // AVX512_F - ASMJIT_INST_2x(vmovntpd, Vmovntpd, Mem, Xmm) // AVX AVX512_F-VL - ASMJIT_INST_2x(vmovntpd, Vmovntpd, Mem, Ymm) // AVX AVX512_F-VL - ASMJIT_INST_2x(vmovntpd, Vmovntpd, Mem, Zmm) // AVX512_F - ASMJIT_INST_2x(vmovntps, Vmovntps, Mem, Xmm) // AVX AVX512_F-VL - ASMJIT_INST_2x(vmovntps, Vmovntps, Mem, Ymm) // AVX AVX512_F-VL - ASMJIT_INST_2x(vmovntps, Vmovntps, Mem, Zmm) // AVX512_F + ASMJIT_INST_2x(vmovmskpd, Vmovmskpd, Gp, Vec) // AVX + ASMJIT_INST_2x(vmovmskps, Vmovmskps, Gp, Vec) // AVX + ASMJIT_INST_2x(vmovntdq, Vmovntdq, Mem, Vec) // AVX+ AVX512_F + ASMJIT_INST_2x(vmovntdqa, Vmovntdqa, Vec, Mem) // AVX+ AVX512_F + ASMJIT_INST_2x(vmovntpd, Vmovntpd, Mem, Vec) // AVX AVX512_F + ASMJIT_INST_2x(vmovntps, Vmovntps, Mem, Vec) // AVX AVX512_F ASMJIT_INST_2x(vmovq, Vmovq, Gp, Xmm) // AVX AVX512_F ASMJIT_INST_2x(vmovq, Vmovq, Mem, Xmm) // AVX AVX512_F ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Mem) // AVX AVX512_F @@ -3113,627 +2742,245 @@ struct EmitterExplicitT { ASMJIT_INST_2x(vmovsd, Vmovsd, Mem, Xmm) // AVX AVX512_F ASMJIT_INST_2x(vmovsd, Vmovsd, Xmm, Mem) // AVX AVX512_F{kz} ASMJIT_INST_3x(vmovsd, Vmovsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz} - ASMJIT_INST_2x(vmovshdup, Vmovshdup, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovshdup, Vmovshdup, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovshdup, Vmovshdup, Ymm, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovshdup, Vmovshdup, Ymm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovshdup, Vmovshdup, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovshdup, Vmovshdup, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovsldup, Vmovsldup, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovsldup, Vmovsldup, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovsldup, Vmovsldup, Ymm, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovsldup, Vmovsldup, Ymm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovsldup, Vmovsldup, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovsldup, Vmovsldup, Zmm, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vmovshdup, Vmovshdup, Vec, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovshdup, Vmovshdup, Vec, Mem) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovsldup, Vmovsldup, Vec, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovsldup, Vmovsldup, Vec, Mem) // AVX AVX512_F{kz} ASMJIT_INST_2x(vmovss, Vmovss, Mem, Xmm) // AVX AVX512_F ASMJIT_INST_2x(vmovss, Vmovss, Xmm, Mem) // AVX AVX512_F{kz} ASMJIT_INST_3x(vmovss, Vmovss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz} - ASMJIT_INST_2x(vmovupd, Vmovupd, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovupd, Vmovupd, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovupd, Vmovupd, Mem, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovupd, Vmovupd, Ymm, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovupd, Vmovupd, Ymm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovupd, Vmovupd, Mem, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovupd, Vmovupd, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovupd, Vmovupd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovupd, Vmovupd, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovups, Vmovups, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovups, Vmovups, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovups, Vmovups, Mem, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovups, Vmovups, Ymm, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovups, Vmovups, Ymm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovups, Vmovups, Mem, Ymm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vmovups, Vmovups, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vmovups, Vmovups, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vmovups, Vmovups, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Xmm, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Xmm, Xmm, Mem, Imm) // AVX - ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Ymm, Ymm, Ymm, Imm) // AVX2 - ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Ymm, Ymm, Mem, Imm) // AVX2 - ASMJIT_INST_3x(vmulpd, Vmulpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vmulpd, Vmulpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vmulpd, Vmulpd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vmulpd, Vmulpd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vmulpd, Vmulpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vmulpd, Vmulpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vmulps, Vmulps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vmulps, Vmulps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vmulps, Vmulps, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vmulps, Vmulps, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vmulps, Vmulps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vmulps, Vmulps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32} + ASMJIT_INST_2x(vmovupd, Vmovupd, Vec, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovupd, Vmovupd, Vec, Mem) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovupd, Vmovupd, Mem, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovups, Vmovups, Vec, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovups, Vmovups, Vec, Mem) // AVX AVX512_F{kz} + ASMJIT_INST_2x(vmovups, Vmovups, Mem, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Vec, Vec, Vec, Imm) // AVX+ + ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Vec, Vec, Mem, Imm) // AVX+ + ASMJIT_INST_3x(vmulpd, Vmulpd, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vmulpd, Vmulpd, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vmulps, Vmulps, Vec, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vmulps, Vmulps, Vec, Vec, Mem) // AVX AVX512_F{kz|b32} ASMJIT_INST_3x(vmulsd, Vmulsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vmulsd, Vmulsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vmulss, Vmulss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vmulss, Vmulss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} - ASMJIT_INST_3x(vorpd, Vorpd, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vorpd, Vorpd, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vorpd, Vorpd, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vorpd, Vorpd, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vorpd, Vorpd, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vorpd, Vorpd, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vorps, Vorps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vorps, Vorps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vorps, Vorps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vorps, Vorps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vorps, Vorps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vorps, Vorps, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_6x(vp4dpwssd, Vp4dpwssd, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz} - ASMJIT_INST_6x(vp4dpwssds, Vp4dpwssds, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz} - ASMJIT_INST_2x(vpabsb, Vpabsb, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpabsb, Vpabsb, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpabsb, Vpabsb, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpabsb, Vpabsb, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpabsb, Vpabsb, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpabsb, Vpabsb, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_2x(vpabsd, Vpabsd, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpabsd, Vpabsd, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpabsd, Vpabsd, Ymm, Ymm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpabsd, Vpabsd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpabsd, Vpabsd, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpabsd, Vpabsd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpabsq, Vpabsq, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpabsq, Vpabsq, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpabsq, Vpabsq, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpabsq, Vpabsq, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpabsq, Vpabsq, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpabsq, Vpabsq, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpabsw, Vpabsw, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpabsw, Vpabsw, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpabsw, Vpabsw, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpabsw, Vpabsw, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpabsw, Vpabsw, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpabsw, Vpabsw, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpackssdw, Vpackssdw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz|b32}-VL - ASMJIT_INST_3x(vpackssdw, Vpackssdw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz|b32}-VL - ASMJIT_INST_3x(vpackssdw, Vpackssdw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz|b32}-VL - ASMJIT_INST_3x(vpackssdw, Vpackssdw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz|b32}-VL - ASMJIT_INST_3x(vpackssdw, Vpackssdw, Zmm, Zmm, Zmm) // AVX512_BW{kz|b32} - ASMJIT_INST_3x(vpackssdw, Vpackssdw, Zmm, Zmm, Mem) // AVX512_BW{kz|b32} - ASMJIT_INST_3x(vpacksswb, Vpacksswb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpacksswb, Vpacksswb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpacksswb, Vpacksswb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpacksswb, Vpacksswb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpacksswb, Vpacksswb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpacksswb, Vpacksswb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpackusdw, Vpackusdw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz|b32}-VL - ASMJIT_INST_3x(vpackusdw, Vpackusdw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz|b32}-VL - ASMJIT_INST_3x(vpackusdw, Vpackusdw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz|b32}-VL - ASMJIT_INST_3x(vpackusdw, Vpackusdw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz|b32}-VL - ASMJIT_INST_3x(vpackusdw, Vpackusdw, Zmm, Zmm, Zmm) // AVX512_BW{kz|b32} - ASMJIT_INST_3x(vpackusdw, Vpackusdw, Zmm, Zmm, Mem) // AVX512_BW{kz|b32} - ASMJIT_INST_3x(vpackuswb, Vpackuswb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpackuswb, Vpackuswb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpackuswb, Vpackuswb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpackuswb, Vpackuswb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpackuswb, Vpackuswb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpackuswb, Vpackuswb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddb, Vpaddb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddb, Vpaddb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddb, Vpaddb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddb, Vpaddb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddb, Vpaddb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddb, Vpaddb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddd, Vpaddd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpaddd, Vpaddd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpaddd, Vpaddd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpaddd, Vpaddd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpaddd, Vpaddd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpaddd, Vpaddd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpaddq, Vpaddq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpaddq, Vpaddq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpaddq, Vpaddq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpaddq, Vpaddq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpaddq, Vpaddq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpaddq, Vpaddq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpaddsb, Vpaddsb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddsb, Vpaddsb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddsb, Vpaddsb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddsb, Vpaddsb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddsb, Vpaddsb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddsb, Vpaddsb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddsw, Vpaddsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddsw, Vpaddsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddsw, Vpaddsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddsw, Vpaddsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddsw, Vpaddsw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddsw, Vpaddsw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddusb, Vpaddusb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddusb, Vpaddusb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddusb, Vpaddusb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddusb, Vpaddusb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddusb, Vpaddusb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddusb, Vpaddusb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddusw, Vpaddusw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddusw, Vpaddusw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddusw, Vpaddusw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddusw, Vpaddusw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddusw, Vpaddusw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddusw, Vpaddusw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddw, Vpaddw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddw, Vpaddw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddw, Vpaddw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddw, Vpaddw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpaddw, Vpaddw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpaddw, Vpaddw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_4i(vpalignr, Vpalignr, Xmm, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_4i(vpalignr, Vpalignr, Xmm, Xmm, Mem, Imm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_4i(vpalignr, Vpalignr, Ymm, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_4i(vpalignr, Vpalignr, Ymm, Ymm, Mem, Imm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_4i(vpalignr, Vpalignr, Zmm, Zmm, Zmm, Imm) // AVX512_BW{kz} - ASMJIT_INST_4i(vpalignr, Vpalignr, Zmm, Zmm, Mem, Imm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpand, Vpand, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpand, Vpand, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpand, Vpand, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpand, Vpand, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpandd, Vpandd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpandd, Vpandd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpandd, Vpandd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpandd, Vpandd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpandd, Vpandd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpandd, Vpandd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpandn, Vpandn, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpandn, Vpandn, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpandn, Vpandn, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpandn, Vpandn, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpandnd, Vpandnd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpandnd, Vpandnd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpandnd, Vpandnd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpandnd, Vpandnd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpandnd, Vpandnd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpandnd, Vpandnd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpandnq, Vpandnq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpandnq, Vpandnq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpandnq, Vpandnq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpandnq, Vpandnq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpandnq, Vpandnq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpandnq, Vpandnq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpandq, Vpandq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpandq, Vpandq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpandq, Vpandq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpandq, Vpandq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpandq, Vpandq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpandq, Vpandq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpavgb, Vpavgb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpavgb, Vpavgb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpavgb, Vpavgb, Ymm, Ymm, Ymm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpavgb, Vpavgb, Ymm, Ymm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpavgb, Vpavgb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpavgb, Vpavgb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpavgw, Vpavgw, Xmm, Xmm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpavgw, Vpavgw, Xmm, Xmm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpavgw, Vpavgw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpavgw, Vpavgw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpavgw, Vpavgw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpavgw, Vpavgw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_4i(vpblendd, Vpblendd, Xmm, Xmm, Xmm, Imm) // AVX2 - ASMJIT_INST_4i(vpblendd, Vpblendd, Xmm, Xmm, Mem, Imm) // AVX2 - ASMJIT_INST_4i(vpblendd, Vpblendd, Ymm, Ymm, Ymm, Imm) // AVX2 - ASMJIT_INST_4i(vpblendd, Vpblendd, Ymm, Ymm, Mem, Imm) // AVX2 - ASMJIT_INST_4x(vpblendvb, Vpblendvb, Xmm, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_4x(vpblendvb, Vpblendvb, Xmm, Xmm, Mem, Xmm) // AVX - ASMJIT_INST_4x(vpblendvb, Vpblendvb, Ymm, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_4x(vpblendvb, Vpblendvb, Ymm, Ymm, Mem, Ymm) // AVX2 - ASMJIT_INST_4i(vpblendw, Vpblendw, Xmm, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_4i(vpblendw, Vpblendw, Xmm, Xmm, Mem, Imm) // AVX - ASMJIT_INST_4i(vpblendw, Vpblendw, Ymm, Ymm, Ymm, Imm) // AVX2 - ASMJIT_INST_4i(vpblendw, Vpblendw, Ymm, Ymm, Mem, Imm) // AVX2 - ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Xmm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Xmm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Xmm, Gp) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Ymm, Gp) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Zmm, Gp) // AVX512_BW{kz} - ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Zmm, Xmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Xmm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Xmm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Xmm, Gp) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Ymm, Gp) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Zmm, Gp) // AVX512_F{kz} - ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpbroadcastmb2d, Vpbroadcastmb2d, Xmm, KReg) // AVX512_CD-VL - ASMJIT_INST_2x(vpbroadcastmb2d, Vpbroadcastmb2d, Ymm, KReg) // AVX512_CD-VL - ASMJIT_INST_2x(vpbroadcastmb2d, Vpbroadcastmb2d, Zmm, KReg) // AVX512_CD - ASMJIT_INST_2x(vpbroadcastmb2q, Vpbroadcastmb2q, Xmm, KReg) // AVX512_CD-VL - ASMJIT_INST_2x(vpbroadcastmb2q, Vpbroadcastmb2q, Ymm, KReg) // AVX512_CD-VL - ASMJIT_INST_2x(vpbroadcastmb2q, Vpbroadcastmb2q, Zmm, KReg) // AVX512_CD - ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Xmm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Xmm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Xmm, Gp) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Ymm, Gp) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Zmm, Gp) // AVX512_F{kz} - ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Xmm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Xmm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Xmm, Gp) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Ymm, Gp) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Zmm, Gp) // AVX512_BW{kz} - ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Zmm, Xmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Xmm, Xmm, Xmm, Imm) // AVX AVX512_F-VL - ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Xmm, Xmm, Mem, Imm) // AVX AVX512_F-VL - ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Ymm, Ymm, Ymm, Imm) // AVX512_F-VL VPCLMULQDQ - ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Ymm, Ymm, Mem, Imm) // AVX512_F-VL VPCLMULQDQ - ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Zmm, Zmm, Zmm, Imm) // AVX512_F VPCLMULQDQ - ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Zmm, Zmm, Mem, Imm) // AVX512_F VPCLMULQDQ - ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Xmm, Xmm, Imm) // AVX512_BW{k}-VL - ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Xmm, Mem, Imm) // AVX512_BW{k}-VL - ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Ymm, Ymm, Imm) // AVX512_BW{k}-VL - ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Ymm, Mem, Imm) // AVX512_BW{k}-VL - ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Zmm, Zmm, Imm) // AVX512_BW{k} - ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Zmm, Mem, Imm) // AVX512_BW{k} - ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Xmm, Xmm, Imm) // AVX512_F{k|b32}-VL - ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Xmm, Mem, Imm) // AVX512_F{k|b32}-VL - ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Ymm, Ymm, Imm) // AVX512_F{k|b32}-VL - ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Ymm, Mem, Imm) // AVX512_F{k|b32}-VL - ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Zmm, Zmm, Imm) // AVX512_F{k|b32} - ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Zmm, Mem, Imm) // AVX512_F{k|b32} - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Xmm, Xmm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Xmm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Ymm, Ymm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Ymm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Zmm, Zmm) // AVX512_BW{k} - ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Zmm, Mem) // AVX512_BW{k} - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Xmm, Xmm) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Xmm, Mem) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Ymm, Ymm) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Ymm, Mem) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Zmm, Zmm) // AVX512_F{k|b32} - ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Zmm, Mem) // AVX512_F{k|b32} - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Xmm, Xmm) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Xmm, Mem) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Ymm, Ymm) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Ymm, Mem) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Zmm, Zmm) // AVX512_F{k|b64} - ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Zmm, Mem) // AVX512_F{k|b64} - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Xmm, Xmm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Xmm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Ymm, Ymm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Ymm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Zmm, Zmm) // AVX512_BW{k} - ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Zmm, Mem) // AVX512_BW{k} - ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Xmm, Xmm, Imm, ECX, EAX, EDX) // AVX [EXPLICIT] - ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Xmm, Mem, Imm, ECX, EAX, EDX) // AVX [EXPLICIT] - ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Xmm, Xmm, Imm, XMM0, EAX, EDX)// AVX [EXPLICIT] - ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Xmm, Mem, Imm, XMM0, EAX, EDX)// AVX [EXPLICIT] - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Xmm, Xmm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Xmm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Ymm, Ymm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Ymm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Zmm, Zmm) // AVX512_BW{k} - ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Zmm, Mem) // AVX512_BW{k} - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Xmm, Xmm) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Xmm, Mem) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Ymm, Ymm) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Ymm, Mem) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Zmm, Zmm) // AVX512_F{k|b32} - ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Zmm, Mem) // AVX512_F{k|b32} - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Xmm, Xmm) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Xmm, Mem) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Ymm, Ymm) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Ymm, Mem) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Zmm, Zmm) // AVX512_F{k|b64} - ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Zmm, Mem) // AVX512_F{k|b64} - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Xmm, Xmm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Xmm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Ymm, Ymm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Ymm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Zmm, Zmm) // AVX512_BW{k} - ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Zmm, Mem) // AVX512_BW{k} - ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Xmm, Xmm, Imm, ECX) // AVX [EXPLICIT] - ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Xmm, Mem, Imm, ECX) // AVX [EXPLICIT] - ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Xmm, Xmm, Imm, XMM0) // AVX [EXPLICIT] - ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Xmm, Mem, Imm, XMM0) // AVX [EXPLICIT] - ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Xmm, Xmm, Imm) // AVX512_F{k|b64}-VL - ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Xmm, Mem, Imm) // AVX512_F{k|b64}-VL - ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Ymm, Ymm, Imm) // AVX512_F{k|b64}-VL - ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Ymm, Mem, Imm) // AVX512_F{k|b64}-VL - ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Zmm, Zmm, Imm) // AVX512_F{k|b64} - ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Zmm, Mem, Imm) // AVX512_F{k|b64} - ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Xmm, Xmm, Imm) // AVX512_BW{k}-VL - ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Xmm, Mem, Imm) // AVX512_BW{k}-VL - ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Ymm, Ymm, Imm) // AVX512_BW{k}-VL - ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Ymm, Mem, Imm) // AVX512_BW{k}-VL - ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Zmm, Zmm, Imm) // AVX512_BW{k} - ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Zmm, Mem, Imm) // AVX512_BW{k} - ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Xmm, Xmm, Imm) // AVX512_F{k|b32}-VL - ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Xmm, Mem, Imm) // AVX512_F{k|b32}-VL - ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Ymm, Ymm, Imm) // AVX512_F{k|b32}-VL - ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Ymm, Mem, Imm) // AVX512_F{k|b32}-VL - ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Zmm, Zmm, Imm) // AVX512_F{k|b32} - ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Zmm, Mem, Imm) // AVX512_F{k|b32} - ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Xmm, Xmm, Imm) // AVX512_F{k|b64}-VL - ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Xmm, Mem, Imm) // AVX512_F{k|b64}-VL - ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Ymm, Ymm, Imm) // AVX512_F{k|b64}-VL - ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Ymm, Mem, Imm) // AVX512_F{k|b64}-VL - ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Zmm, Zmm, Imm) // AVX512_F{k|b64} - ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Zmm, Mem, Imm) // AVX512_F{k|b64} - ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Xmm, Xmm, Imm) // AVX512_BW{k|b64}-VL - ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Xmm, Mem, Imm) // AVX512_BW{k|b64}-VL - ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Ymm, Ymm, Imm) // AVX512_BW{k|b64}-VL - ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Ymm, Mem, Imm) // AVX512_BW{k|b64}-VL - ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Zmm, Zmm, Imm) // AVX512_BW{k|b64} - ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Zmm, Mem, Imm) // AVX512_BW{k|b64} - ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Xmm, Xmm, Imm) // AVX512_BW{k|b64}-VL - ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Xmm, Mem, Imm) // AVX512_BW{k|b64}-VL - ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Ymm, Ymm, Imm) // AVX512_BW{k|b64}-VL - ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Ymm, Mem, Imm) // AVX512_BW{k|b64}-VL - ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Zmm, Zmm, Imm) // AVX512_BW{k|b64} - ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Zmm, Mem, Imm) // AVX512_BW{k|b64} - ASMJIT_INST_2x(vpcompressb, Vpcompressb, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpcompressb, Vpcompressb, Mem, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpcompressb, Vpcompressb, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpcompressb, Vpcompressb, Mem, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpcompressb, Vpcompressb, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_2x(vpcompressb, Vpcompressb, Mem, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_2x(vpcompressd, Vpcompressd, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpcompressd, Vpcompressd, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpcompressd, Vpcompressd, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpcompressd, Vpcompressd, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpcompressd, Vpcompressd, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpcompressd, Vpcompressd, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpcompressq, Vpcompressq, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpcompressq, Vpcompressq, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpcompressq, Vpcompressq, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpcompressq, Vpcompressq, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpcompressq, Vpcompressq, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpcompressq, Vpcompressq, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpcompressw, Vpcompressw, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpcompressw, Vpcompressw, Mem, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpcompressw, Vpcompressw, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpcompressw, Vpcompressw, Mem, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpcompressw, Vpcompressw, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_2x(vpcompressw, Vpcompressw, Mem, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_2x(vpconflictd, Vpconflictd, Xmm, Xmm) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vpconflictd, Vpconflictd, Xmm, Mem) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vpconflictd, Vpconflictd, Ymm, Ymm) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vpconflictd, Vpconflictd, Ymm, Mem) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vpconflictd, Vpconflictd, Zmm, Zmm) // AVX512_CD{kz|b32} - ASMJIT_INST_2x(vpconflictd, Vpconflictd, Zmm, Mem) // AVX512_CD{kz|b32} - ASMJIT_INST_2x(vpconflictq, Vpconflictq, Xmm, Xmm) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vpconflictq, Vpconflictq, Xmm, Mem) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vpconflictq, Vpconflictq, Ymm, Ymm) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vpconflictq, Vpconflictq, Ymm, Mem) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vpconflictq, Vpconflictq, Zmm, Zmm) // AVX512_CD{kz|b32} - ASMJIT_INST_2x(vpconflictq, Vpconflictq, Zmm, Mem) // AVX512_CD{kz|b32} - ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Xmm, Xmm, Xmm) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Xmm, Xmm, Mem) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Ymm, Ymm, Ymm) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Ymm, Ymm, Mem) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Zmm, Zmm, Zmm) // AVX512_VNNI{kz|b32} - ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Zmm, Zmm, Mem) // AVX512_VNNI{kz|b32} - ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Xmm, Xmm, Xmm) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Xmm, Xmm, Mem) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Ymm, Ymm, Ymm) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Ymm, Ymm, Mem) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Zmm, Zmm, Zmm) // AVX512_VNNI{kz|b32} - ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Zmm, Zmm, Mem) // AVX512_VNNI{kz|b32} - ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Xmm, Xmm, Xmm) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Xmm, Xmm, Mem) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Ymm, Ymm, Ymm) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Ymm, Ymm, Mem) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Zmm, Zmm, Zmm) // AVX512_VNNI{kz|b32} - ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Zmm, Zmm, Mem) // AVX512_VNNI{kz|b32} - ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Xmm, Xmm, Xmm) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Xmm, Xmm, Mem) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Ymm, Ymm, Ymm) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Ymm, Ymm, Mem) // AVX512_VNNI{kz|b32}-VL - ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Zmm, Zmm, Zmm) // AVX512_VNNI{kz|b32} - ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Zmm, Zmm, Mem) // AVX512_VNNI{kz|b32} - ASMJIT_INST_4i(vperm2f128, Vperm2f128, Ymm, Ymm, Ymm, Imm) // AVX - ASMJIT_INST_4i(vperm2f128, Vperm2f128, Ymm, Ymm, Mem, Imm) // AVX - ASMJIT_INST_4i(vperm2i128, Vperm2i128, Ymm, Ymm, Ymm, Imm) // AVX2 - ASMJIT_INST_4i(vperm2i128, Vperm2i128, Ymm, Ymm, Mem, Imm) // AVX2 - ASMJIT_INST_3x(vpermb, Vpermb, Xmm, Xmm, Xmm) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermb, Vpermb, Xmm, Xmm, Mem) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermb, Vpermb, Ymm, Ymm, Ymm) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermb, Vpermb, Ymm, Ymm, Mem) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermb, Vpermb, Zmm, Zmm, Zmm) // AVX512_VBMI{kz} - ASMJIT_INST_3x(vpermb, Vpermb, Zmm, Zmm, Mem) // AVX512_VBMI{kz} - ASMJIT_INST_3x(vpermd, Vpermd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermd, Vpermd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermd, Vpermd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermd, Vpermd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermi2b, Vpermi2b, Xmm, Xmm, Xmm) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermi2b, Vpermi2b, Xmm, Xmm, Mem) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermi2b, Vpermi2b, Ymm, Ymm, Ymm) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermi2b, Vpermi2b, Ymm, Ymm, Mem) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermi2b, Vpermi2b, Zmm, Zmm, Zmm) // AVX512_VBMI{kz} - ASMJIT_INST_3x(vpermi2b, Vpermi2b, Zmm, Zmm, Mem) // AVX512_VBMI{kz} - ASMJIT_INST_3x(vpermi2d, Vpermi2d, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermi2d, Vpermi2d, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermi2d, Vpermi2d, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermi2d, Vpermi2d, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermi2d, Vpermi2d, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpermi2d, Vpermi2d, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpermi2q, Vpermi2q, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermi2q, Vpermi2q, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermi2q, Vpermi2q, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermi2q, Vpermi2q, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermi2q, Vpermi2q, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermi2q, Vpermi2q, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermi2w, Vpermi2w, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermi2w, Vpermi2w, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermi2w, Vpermi2w, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermi2w, Vpermi2w, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermi2w, Vpermi2w, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpermi2w, Vpermi2w, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpermilpd, Vpermilpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermilpd, Vpermilpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermilpd, Vpermilpd, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermilpd, Vpermilpd, Xmm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermilpd, Vpermilpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermilpd, Vpermilpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermilpd, Vpermilpd, Ymm, Ymm, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermilpd, Vpermilpd, Ymm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermilpd, Vpermilpd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermilpd, Vpermilpd, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vpermilpd, Vpermilpd, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vpermilpd, Vpermilpd, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermilps, Vpermilps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermilps, Vpermilps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermilps, Vpermilps, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermilps, Vpermilps, Xmm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermilps, Vpermilps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermilps, Vpermilps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermilps, Vpermilps, Ymm, Ymm, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermilps, Vpermilps, Ymm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermilps, Vpermilps, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermilps, Vpermilps, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vpermilps, Vpermilps, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vpermilps, Vpermilps, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vpermpd, Vpermpd, Ymm, Ymm, Imm) // AVX2 - ASMJIT_INST_3i(vpermpd, Vpermpd, Ymm, Mem, Imm) // AVX2 - ASMJIT_INST_3x(vpermps, Vpermps, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpermps, Vpermps, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3i(vpermq, Vpermq, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermq, Vpermq, Ymm, Mem, Imm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermq, Vpermq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermq, Vpermq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermq, Vpermq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermq, Vpermq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermq, Vpermq, Zmm, Zmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpermq, Vpermq, Zmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermt2b, Vpermt2b, Xmm, Xmm, Xmm) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermt2b, Vpermt2b, Xmm, Xmm, Mem) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermt2b, Vpermt2b, Ymm, Ymm, Ymm) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermt2b, Vpermt2b, Ymm, Ymm, Mem) // AVX512_VBMI{kz}-VL - ASMJIT_INST_3x(vpermt2b, Vpermt2b, Zmm, Zmm, Zmm) // AVX512_VBMI{kz} - ASMJIT_INST_3x(vpermt2b, Vpermt2b, Zmm, Zmm, Mem) // AVX512_VBMI{kz} - ASMJIT_INST_3x(vpermt2d, Vpermt2d, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermt2d, Vpermt2d, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermt2d, Vpermt2d, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermt2d, Vpermt2d, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermt2d, Vpermt2d, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpermt2d, Vpermt2d, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpermt2q, Vpermt2q, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermt2q, Vpermt2q, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermt2q, Vpermt2q, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermt2q, Vpermt2q, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpermt2q, Vpermt2q, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermt2q, Vpermt2q, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpermt2w, Vpermt2w, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermt2w, Vpermt2w, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermt2w, Vpermt2w, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermt2w, Vpermt2w, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermt2w, Vpermt2w, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpermt2w, Vpermt2w, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpermw, Vpermw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermw, Vpermw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermw, Vpermw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermw, Vpermw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpermw, Vpermw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpermw, Vpermw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_2x(vpexpandb, Vpexpandb, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpexpandb, Vpexpandb, Xmm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpexpandb, Vpexpandb, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpexpandb, Vpexpandb, Ymm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpexpandb, Vpexpandb, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_2x(vpexpandb, Vpexpandb, Zmm, Mem) // AVX512_VBMI2{kz} - ASMJIT_INST_2x(vpexpandd, Vpexpandd, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpexpandd, Vpexpandd, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpexpandd, Vpexpandd, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpexpandd, Vpexpandd, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpexpandd, Vpexpandd, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpexpandd, Vpexpandd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpexpandq, Vpexpandq, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpexpandq, Vpexpandq, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpexpandq, Vpexpandq, Ymm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpexpandq, Vpexpandq, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpexpandq, Vpexpandq, Zmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpexpandq, Vpexpandq, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpexpandw, Vpexpandw, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpexpandw, Vpexpandw, Xmm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpexpandw, Vpexpandw, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpexpandw, Vpexpandw, Ymm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_2x(vpexpandw, Vpexpandw, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_2x(vpexpandw, Vpexpandw, Zmm, Mem) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vorpd, Vorpd, Vec, Vec, Vec) // AVX AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vorpd, Vorpd, Vec, Vec, Mem) // AVX AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vorps, Vorps, Vec, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vorps, Vorps, Vec, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_4x(vp2intersectd, Vp2intersectd, KReg, KReg, Vec, Vec) // AVX512_VP2INTERSECT{kz} + ASMJIT_INST_4x(vp2intersectd, Vp2intersectd, KReg, KReg, Vec, Mem) // AVX512_VP2INTERSECT{kz} + ASMJIT_INST_4x(vp2intersectq, Vp2intersectq, KReg, KReg, Vec, Vec) // AVX512_VP2INTERSECT{kz} + ASMJIT_INST_4x(vp2intersectq, Vp2intersectq, KReg, KReg, Vec, Mem) // AVX512_VP2INTERSECT{kz} + ASMJIT_INST_6x(vp4dpwssd, Vp4dpwssd, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz} + ASMJIT_INST_6x(vp4dpwssds, Vp4dpwssds, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz} + ASMJIT_INST_2x(vpabsb, Vpabsb, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_2x(vpabsb, Vpabsb, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_2x(vpabsd, Vpabsd, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpabsd, Vpabsd, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpabsq, Vpabsq, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpabsq, Vpabsq, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vpabsw, Vpabsw, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_2x(vpabsw, Vpabsw, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpackssdw, Vpackssdw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz|b32} + ASMJIT_INST_3x(vpackssdw, Vpackssdw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz|b32} + ASMJIT_INST_3x(vpacksswb, Vpacksswb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpacksswb, Vpacksswb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpackusdw, Vpackusdw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz|b32} + ASMJIT_INST_3x(vpackusdw, Vpackusdw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz|b32} + ASMJIT_INST_3x(vpackuswb, Vpackuswb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpackuswb, Vpackuswb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddb, Vpaddb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddb, Vpaddb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddd, Vpaddd, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpaddd, Vpaddd, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpaddq, Vpaddq, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpaddq, Vpaddq, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpaddsb, Vpaddsb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddsb, Vpaddsb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddsw, Vpaddsw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddsw, Vpaddsw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddusb, Vpaddusb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddusb, Vpaddusb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddusw, Vpaddusw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddusw, Vpaddusw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddw, Vpaddw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpaddw, Vpaddw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_4i(vpalignr, Vpalignr, Vec, Vec, Vec, Imm) // AVX+ AVX512_BW{kz} + ASMJIT_INST_4i(vpalignr, Vpalignr, Vec, Vec, Mem, Imm) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpand, Vpand, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpand, Vpand, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpandd, Vpandd, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpandd, Vpandd, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpandn, Vpandn, Vec, Vec, Vec) // AV+ + ASMJIT_INST_3x(vpandn, Vpandn, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpandnd, Vpandnd, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpandnd, Vpandnd, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpandnq, Vpandnq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpandnq, Vpandnq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpandq, Vpandq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpandq, Vpandq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpavgb, Vpavgb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpavgb, Vpavgb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpavgw, Vpavgw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpavgw, Vpavgw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_4i(vpblendd, Vpblendd, Vec, Vec, Vec, Imm) // AVX2 + ASMJIT_INST_4i(vpblendd, Vpblendd, Vec, Vec, Mem, Imm) // AVX2 + ASMJIT_INST_4x(vpblendvb, Vpblendvb, Vec, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_4x(vpblendvb, Vpblendvb, Vec, Vec, Mem, Vec) // AVX+ + ASMJIT_INST_4i(vpblendw, Vpblendw, Vec, Vec, Vec, Imm) // AVX+ + ASMJIT_INST_4i(vpblendw, Vpblendw, Vec, Vec, Mem, Imm) // AVX+ + ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Vec, Vec) // AVX2 AVX512_BW{kz} + ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Vec, Mem) // AVX2 AVX512_BW{kz} + ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Vec, Gp) // AVX512_BW{kz} + ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Vec, Vec) // AVX2 AVX512_F{kz} + ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Vec, Mem) // AVX2 AVX512_F{kz} + ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Vec, Gp) // AVX512_F{kz} + ASMJIT_INST_2x(vpbroadcastmb2d, Vpbroadcastmb2d, Vec, KReg) // AVX512_CD + ASMJIT_INST_2x(vpbroadcastmb2q, Vpbroadcastmb2q, Vec, KReg) // AVX512_CD + ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Vec, Vec) // AVX2 AVX512_F{kz} + ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Vec, Mem) // AVX2 AVX512_F{kz} + ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Vec, Gp) // AVX512_F{kz} + ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Vec, Vec) // AVX2 AVX512_BW{kz} + ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Vec, Mem) // AVX2 AVX512_BW{kz} + ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Vec, Gp) // AVX512_BW{kz} + ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Vec, Vec, Vec, Imm) // AVX VPCLMULQDQ AVX512_F + ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Vec, Vec, Mem, Imm) // AVX VPCLMULQDQ AVX512_F + ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Vec, Vec, Imm) // AVX512_BW{k} + ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Vec, Mem, Imm) // AVX512_BW{k} + ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Vec, Vec, Imm) // AVX512_F{k|b32} + ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Vec, Mem, Imm) // AVX512_F{k|b32} + ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Vec, Vec) // AVX512_BW{k} + ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Vec, Mem) // AVX512_BW{k} + ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Vec, Vec) // AVX512_F{k|b32} + ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Vec, Mem) // AVX512_F{k|b32} + ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Vec, Vec) // AVX512_F{k|b64} + ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Vec, Mem) // AVX512_F{k|b64} + ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Vec, Vec) // AVX512_BW{k} + ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Vec, Mem) // AVX512_BW{k} + ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Vec, Vec, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // AVX [EXPLICIT] + ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Vec, Mem, Imm, Gp_ECX, Gp_EAX, Gp_EDX) // AVX [EXPLICIT] + ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Vec, Vec, Imm, XMM0, Gp_EAX, Gp_EDX) // AVX [EXPLICIT] + ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Vec, Mem, Imm, XMM0, Gp_EAX, Gp_EDX) // AVX [EXPLICIT] + ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Vec, Vec) // AVX512_BW{k} + ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Vec, Mem) // AVX512_BW{k} + ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Vec, Vec) // AVX512_F{k|b32} + ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Vec, Mem) // AVX512_F{k|b32} + ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Vec, Vec) // AVX512_F{k|b64} + ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Vec, Mem) // AVX512_F{k|b64} + ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Vec, Vec) // AVX512_BW{k} + ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Vec, Mem) // AVX512_BW{k} + ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Vec, Vec, Imm, Gp_ECX) // AVX [EXPLICIT] + ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Vec, Mem, Imm, Gp_ECX) // AVX [EXPLICIT] + ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Vec, Vec, Imm, XMM0) // AVX [EXPLICIT] + ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Vec, Mem, Imm, XMM0) // AVX [EXPLICIT] + ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Vec, Vec, Imm) // AVX512_F{k|b64} + ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Vec, Mem, Imm) // AVX512_F{k|b64} + ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Vec, Vec, Imm) // AVX512_BW{k} + ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Vec, Mem, Imm) // AVX512_BW{k} + ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Vec, Vec, Imm) // AVX512_F{k|b32} + ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Vec, Mem, Imm) // AVX512_F{k|b32} + ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Vec, Vec, Imm) // AVX512_F{k|b64} + ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Vec, Mem, Imm) // AVX512_F{k|b64} + ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Vec, Vec, Imm) // AVX512_BW{k|b64} + ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Vec, Mem, Imm) // AVX512_BW{k|b64} + ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Vec, Vec, Imm) // AVX512_BW{k|b64} + ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Vec, Mem, Imm) // AVX512_BW{k|b64} + ASMJIT_INST_2x(vpcompressb, Vpcompressb, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_2x(vpcompressb, Vpcompressb, Mem, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_2x(vpcompressd, Vpcompressd, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpcompressd, Vpcompressd, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpcompressq, Vpcompressq, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpcompressq, Vpcompressq, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpcompressw, Vpcompressw, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_2x(vpcompressw, Vpcompressw, Mem, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_2x(vpconflictd, Vpconflictd, Vec, Vec) // AVX512_CD{kz|b32} + ASMJIT_INST_2x(vpconflictd, Vpconflictd, Vec, Mem) // AVX512_CD{kz|b32} + ASMJIT_INST_2x(vpconflictq, Vpconflictq, Vec, Vec) // AVX512_CD{kz|b32} + ASMJIT_INST_2x(vpconflictq, Vpconflictq, Vec, Mem) // AVX512_CD{kz|b32} + ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Vec, Vec, Vec) // AVX512_VNNI{kz|b32} + ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Vec, Vec, Mem) // AVX512_VNNI{kz|b32} + ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Vec, Vec, Vec) // AVX512_VNNI{kz|b32} + ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Vec, Vec, Mem) // AVX512_VNNI{kz|b32} + ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Vec, Vec, Vec) // AVX512_VNNI{kz|b32} + ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Vec, Vec, Mem) // AVX512_VNNI{kz|b32} + ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Vec, Vec, Vec) // AVX512_VNNI{kz|b32} + ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Vec, Vec, Mem) // AVX512_VNNI{kz|b32} + ASMJIT_INST_4i(vperm2f128, Vperm2f128, Vec, Vec, Vec, Imm) // AVX + ASMJIT_INST_4i(vperm2f128, Vperm2f128, Vec, Vec, Mem, Imm) // AVX + ASMJIT_INST_4i(vperm2i128, Vperm2i128, Vec, Vec, Vec, Imm) // AVX2 + ASMJIT_INST_4i(vperm2i128, Vperm2i128, Vec, Vec, Mem, Imm) // AVX2 + ASMJIT_INST_3x(vpermb, Vpermb, Vec, Vec, Vec) // AVX512_VBMI{kz} + ASMJIT_INST_3x(vpermb, Vpermb, Vec, Vec, Mem) // AVX512_VBMI{kz} + ASMJIT_INST_3x(vpermd, Vpermd, Vec, Vec, Vec) // AVX2 AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermd, Vpermd, Vec, Vec, Mem) // AVX2 AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermi2b, Vpermi2b, Vec, Vec, Vec) // AVX512_VBMI{kz} + ASMJIT_INST_3x(vpermi2b, Vpermi2b, Vec, Vec, Mem) // AVX512_VBMI{kz} + ASMJIT_INST_3x(vpermi2d, Vpermi2d, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermi2d, Vpermi2d, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermi2q, Vpermi2q, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermi2q, Vpermi2q, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermi2w, Vpermi2w, Vec, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_3x(vpermi2w, Vpermi2w, Vec, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermilpd, Vpermilpd, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3i(vpermilpd, Vpermilpd, Vec, Vec, Imm) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3i(vpermilpd, Vpermilpd, Vec, Mem, Imm) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermilps, Vpermilps, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3i(vpermilps, Vpermilps, Vec, Vec, Imm) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3i(vpermilps, Vpermilps, Vec, Mem, Imm) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3i(vpermpd, Vpermpd, Vec, Vec, Imm) // AVX2 + ASMJIT_INST_3i(vpermpd, Vpermpd, Vec, Mem, Imm) // AVX2 + ASMJIT_INST_3x(vpermps, Vpermps, Vec, Vec, Vec) // AVX2 + ASMJIT_INST_3x(vpermps, Vpermps, Vec, Vec, Mem) // AVX2 + ASMJIT_INST_3i(vpermq, Vpermq, Vec, Vec, Imm) // AVX2 AVX512_F{kz|b64} + ASMJIT_INST_3i(vpermq, Vpermq, Vec, Mem, Imm) // AVX2 AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermq, Vpermq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermq, Vpermq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermt2b, Vpermt2b, Vec, Vec, Vec) // AVX512_VBMI{kz} + ASMJIT_INST_3x(vpermt2b, Vpermt2b, Vec, Vec, Mem) // AVX512_VBMI{kz} + ASMJIT_INST_3x(vpermt2d, Vpermt2d, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermt2d, Vpermt2d, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpermt2q, Vpermt2q, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermt2q, Vpermt2q, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpermt2w, Vpermt2w, Vec, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_3x(vpermt2w, Vpermt2w, Vec, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_3x(vpermw, Vpermw, Vec, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_3x(vpermw, Vpermw, Vec, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_2x(vpexpandb, Vpexpandb, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_2x(vpexpandb, Vpexpandb, Vec, Mem) // AVX512_VBMI2{kz} + ASMJIT_INST_2x(vpexpandd, Vpexpandd, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpexpandd, Vpexpandd, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vpexpandq, Vpexpandq, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpexpandq, Vpexpandq, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_2x(vpexpandw, Vpexpandw, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_2x(vpexpandw, Vpexpandw, Vec, Mem) // AVX512_VBMI2{kz} ASMJIT_INST_3i(vpextrb, Vpextrb, Gp, Xmm, Imm) // AVX AVX512_BW ASMJIT_INST_3i(vpextrb, Vpextrb, Mem, Xmm, Imm) // AVX AVX512_BW ASMJIT_INST_3i(vpextrd, Vpextrd, Gp, Xmm, Imm) // AVX AVX512_DQ @@ -3742,51 +2989,28 @@ struct EmitterExplicitT { ASMJIT_INST_3i(vpextrq, Vpextrq, Mem, Xmm, Imm) // AVX AVX512_DQ ASMJIT_INST_3i(vpextrw, Vpextrw, Gp, Xmm, Imm) // AVX AVX512_BW ASMJIT_INST_3i(vpextrw, Vpextrw, Mem, Xmm, Imm) // AVX AVX512_BW - ASMJIT_INST_3x(vpgatherdd, Vpgatherdd, Xmm, Mem, Xmm) // AVX2 - ASMJIT_INST_3x(vpgatherdd, Vpgatherdd, Ymm, Mem, Ymm) // AVX2 - ASMJIT_INST_2x(vpgatherdd, Vpgatherdd, Xmm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpgatherdd, Vpgatherdd, Ymm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpgatherdd, Vpgatherdd, Zmm, Mem) // AVX512_F{k} - ASMJIT_INST_3x(vpgatherdq, Vpgatherdq, Xmm, Mem, Xmm) // AVX2 - ASMJIT_INST_3x(vpgatherdq, Vpgatherdq, Ymm, Mem, Ymm) // AVX2 - ASMJIT_INST_2x(vpgatherdq, Vpgatherdq, Xmm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpgatherdq, Vpgatherdq, Ymm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpgatherdq, Vpgatherdq, Zmm, Mem) // AVX512_F{k} - ASMJIT_INST_3x(vpgatherqd, Vpgatherqd, Xmm, Mem, Xmm) // AVX2 - ASMJIT_INST_2x(vpgatherqd, Vpgatherqd, Xmm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpgatherqd, Vpgatherqd, Ymm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpgatherqd, Vpgatherqd, Zmm, Mem) // AVX512_F{k} - ASMJIT_INST_3x(vpgatherqq, Vpgatherqq, Xmm, Mem, Xmm) // AVX2 - ASMJIT_INST_3x(vpgatherqq, Vpgatherqq, Ymm, Mem, Ymm) // AVX2 - ASMJIT_INST_2x(vpgatherqq, Vpgatherqq, Xmm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpgatherqq, Vpgatherqq, Ymm, Mem) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpgatherqq, Vpgatherqq, Zmm, Mem) // AVX512_F{k} - ASMJIT_INST_3x(vphaddd, Vphaddd, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vphaddd, Vphaddd, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vphaddd, Vphaddd, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vphaddd, Vphaddd, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vphaddsw, Vphaddsw, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vphaddsw, Vphaddsw, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vphaddsw, Vphaddsw, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vphaddsw, Vphaddsw, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vphaddw, Vphaddw, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vphaddw, Vphaddw, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vphaddw, Vphaddw, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vphaddw, Vphaddw, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_2x(vphminposuw, Vphminposuw, Xmm, Xmm) // AVX - ASMJIT_INST_2x(vphminposuw, Vphminposuw, Xmm, Mem) // AVX - ASMJIT_INST_3x(vphsubd, Vphsubd, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vphsubd, Vphsubd, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vphsubd, Vphsubd, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vphsubd, Vphsubd, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vphsubsw, Vphsubsw, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vphsubsw, Vphsubsw, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vphsubsw, Vphsubsw, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vphsubsw, Vphsubsw, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vphsubw, Vphsubw, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vphsubw, Vphsubw, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vphsubw, Vphsubw, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vphsubw, Vphsubw, Ymm, Ymm, Mem) // AVX2 + ASMJIT_INST_2x(vpgatherdd, Vpgatherdd, Vec, Mem) // AVX512_F{k} + ASMJIT_INST_3x(vpgatherdd, Vpgatherdd, Vec, Mem, Vec) // AVX2 + ASMJIT_INST_2x(vpgatherdq, Vpgatherdq, Vec, Mem) // AVX512_F{k} + ASMJIT_INST_3x(vpgatherdq, Vpgatherdq, Vec, Mem, Vec) // AVX2 + ASMJIT_INST_2x(vpgatherqd, Vpgatherqd, Vec, Mem) // AVX512_F{k} + ASMJIT_INST_3x(vpgatherqd, Vpgatherqd, Vec, Mem, Vec) // AVX2 + ASMJIT_INST_2x(vpgatherqq, Vpgatherqq, Vec, Mem) // AVX512_F{k} + ASMJIT_INST_3x(vpgatherqq, Vpgatherqq, Vec, Mem, Vec) // AVX2 + ASMJIT_INST_3x(vphaddd, Vphaddd, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vphaddd, Vphaddd, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vphaddsw, Vphaddsw, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vphaddsw, Vphaddsw, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vphaddw, Vphaddw, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vphaddw, Vphaddw, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_2x(vphminposuw, Vphminposuw, Vec, Vec) // AVX + ASMJIT_INST_2x(vphminposuw, Vphminposuw, Vec, Mem) // AVX + ASMJIT_INST_3x(vphsubd, Vphsubd, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vphsubd, Vphsubd, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vphsubsw, Vphsubsw, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vphsubsw, Vphsubsw, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vphsubw, Vphsubw, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vphsubw, Vphsubw, Vec, Vec, Mem) // AVX+ ASMJIT_INST_4i(vpinsrb, Vpinsrb, Xmm, Xmm, Gp, Imm) // AVX AVX512_BW{kz} ASMJIT_INST_4i(vpinsrb, Vpinsrb, Xmm, Xmm, Mem, Imm) // AVX AVX512_BW{kz} ASMJIT_INST_4i(vpinsrd, Vpinsrd, Xmm, Xmm, Gp, Imm) // AVX AVX512_DQ{kz} @@ -3795,1099 +3019,411 @@ struct EmitterExplicitT { ASMJIT_INST_4i(vpinsrq, Vpinsrq, Xmm, Xmm, Mem, Imm) // AVX AVX512_DQ{kz} ASMJIT_INST_4i(vpinsrw, Vpinsrw, Xmm, Xmm, Gp, Imm) // AVX AVX512_BW{kz} ASMJIT_INST_4i(vpinsrw, Vpinsrw, Xmm, Xmm, Mem, Imm) // AVX AVX512_BW{kz} - ASMJIT_INST_2x(vplzcntd, Vplzcntd, Xmm, Xmm) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vplzcntd, Vplzcntd, Xmm, Mem) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vplzcntd, Vplzcntd, Ymm, Ymm) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vplzcntd, Vplzcntd, Ymm, Mem) // AVX512_CD{kz|b32}-VL - ASMJIT_INST_2x(vplzcntd, Vplzcntd, Zmm, Zmm) // AVX512_CD{kz|b32} - ASMJIT_INST_2x(vplzcntd, Vplzcntd, Zmm, Mem) // AVX512_CD{kz|b32} - ASMJIT_INST_2x(vplzcntq, Vplzcntq, Xmm, Xmm) // AVX512_CD{kz|b64}-VL - ASMJIT_INST_2x(vplzcntq, Vplzcntq, Xmm, Mem) // AVX512_CD{kz|b64}-VL - ASMJIT_INST_2x(vplzcntq, Vplzcntq, Ymm, Ymm) // AVX512_CD{kz|b64}-VL - ASMJIT_INST_2x(vplzcntq, Vplzcntq, Ymm, Mem) // AVX512_CD{kz|b64}-VL - ASMJIT_INST_2x(vplzcntq, Vplzcntq, Zmm, Zmm) // AVX512_CD{kz|b64} - ASMJIT_INST_2x(vplzcntq, Vplzcntq, Zmm, Mem) // AVX512_CD{kz|b64} - ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Xmm, Xmm, Xmm) // AVX512_IFMA{kz|b64}-VL - ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Xmm, Xmm, Mem) // AVX512_IFMA{kz|b64}-VL - ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Ymm, Ymm, Ymm) // AVX512_IFMA{kz|b64}-VL - ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Ymm, Ymm, Mem) // AVX512_IFMA{kz|b64}-VL - ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Zmm, Zmm, Zmm) // AVX512_IFMA{kz|b64} - ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Zmm, Zmm, Mem) // AVX512_IFMA{kz|b64} - ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Xmm, Xmm, Xmm) // AVX512_IFMA{kz|b64}-VL - ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Xmm, Xmm, Mem) // AVX512_IFMA{kz|b64}-VL - ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Ymm, Ymm, Ymm) // AVX512_IFMA{kz|b64}-VL - ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Ymm, Ymm, Mem) // AVX512_IFMA{kz|b64}-VL - ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Zmm, Zmm, Zmm) // AVX512_IFMA{kz|b64} - ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Zmm, Zmm, Mem) // AVX512_IFMA{kz|b64} - ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Mem, Xmm, Xmm) // AVX2 - ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Mem, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Xmm, Xmm, Mem) // AVX2 - ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Mem, Xmm, Xmm) // AVX2 - ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Mem, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Xmm, Xmm, Mem) // AVX2 - ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaxub, Vpmaxub, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxub, Vpmaxub, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxub, Vpmaxub, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxub, Vpmaxub, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxub, Vpmaxub, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaxub, Vpmaxub, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaxud, Vpmaxud, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmaxud, Vpmaxud, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmaxud, Vpmaxud, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmaxud, Vpmaxud, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmaxud, Vpmaxud, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpmaxud, Vpmaxud, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpminsb, Vpminsb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminsb, Vpminsb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminsb, Vpminsb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminsb, Vpminsb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminsb, Vpminsb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpminsb, Vpminsb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpminsd, Vpminsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpminsd, Vpminsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpminsd, Vpminsd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpminsd, Vpminsd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpminsd, Vpminsd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpminsd, Vpminsd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpminsq, Vpminsq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpminsq, Vpminsq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpminsq, Vpminsq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpminsq, Vpminsq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpminsq, Vpminsq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpminsq, Vpminsq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpminsw, Vpminsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminsw, Vpminsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminsw, Vpminsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminsw, Vpminsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminsw, Vpminsw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpminsw, Vpminsw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpminub, Vpminub, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminub, Vpminub, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminub, Vpminub, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminub, Vpminub, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminub, Vpminub, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpminub, Vpminub, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpminud, Vpminud, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpminud, Vpminud, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpminud, Vpminud, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpminud, Vpminud, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpminud, Vpminud, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpminud, Vpminud, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpminuq, Vpminuq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpminuq, Vpminuq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpminuq, Vpminuq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpminuq, Vpminuq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpminuq, Vpminuq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpminuq, Vpminuq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpminuw, Vpminuw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminuw, Vpminuw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminuw, Vpminuw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminuw, Vpminuw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpminuw, Vpminuw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpminuw, Vpminuw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovb2m, Vpmovb2m, KReg, Xmm) // AVX512_BW-VL - ASMJIT_INST_2x(vpmovb2m, Vpmovb2m, KReg, Ymm) // AVX512_BW-VL - ASMJIT_INST_2x(vpmovb2m, Vpmovb2m, KReg, Zmm) // AVX512_BW - ASMJIT_INST_2x(vpmovd2m, Vpmovd2m, KReg, Xmm) // AVX512_DQ-VL - ASMJIT_INST_2x(vpmovd2m, Vpmovd2m, KReg, Ymm) // AVX512_DQ-VL - ASMJIT_INST_2x(vpmovd2m, Vpmovd2m, KReg, Zmm) // AVX512_DQ - ASMJIT_INST_2x(vpmovdb, Vpmovdb, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovdb, Vpmovdb, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovdb, Vpmovdb, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovdb, Vpmovdb, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovdb, Vpmovdb, Xmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovdb, Vpmovdb, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovdw, Vpmovdw, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovdw, Vpmovdw, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovdw, Vpmovdw, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovdw, Vpmovdw, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovdw, Vpmovdw, Ymm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovdw, Vpmovdw, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovm2b, Vpmovm2b, Xmm, KReg) // AVX512_BW-VL - ASMJIT_INST_2x(vpmovm2b, Vpmovm2b, Ymm, KReg) // AVX512_BW-VL - ASMJIT_INST_2x(vpmovm2b, Vpmovm2b, Zmm, KReg) // AVX512_BW - ASMJIT_INST_2x(vpmovm2d, Vpmovm2d, Xmm, KReg) // AVX512_DQ-VL - ASMJIT_INST_2x(vpmovm2d, Vpmovm2d, Ymm, KReg) // AVX512_DQ-VL - ASMJIT_INST_2x(vpmovm2d, Vpmovm2d, Zmm, KReg) // AVX512_DQ - ASMJIT_INST_2x(vpmovm2q, Vpmovm2q, Xmm, KReg) // AVX512_DQ-VL - ASMJIT_INST_2x(vpmovm2q, Vpmovm2q, Ymm, KReg) // AVX512_DQ-VL - ASMJIT_INST_2x(vpmovm2q, Vpmovm2q, Zmm, KReg) // AVX512_DQ - ASMJIT_INST_2x(vpmovm2w, Vpmovm2w, Xmm, KReg) // AVX512_BW-VL - ASMJIT_INST_2x(vpmovm2w, Vpmovm2w, Ymm, KReg) // AVX512_BW-VL - ASMJIT_INST_2x(vpmovm2w, Vpmovm2w, Zmm, KReg) // AVX512_BW - ASMJIT_INST_2x(vpmovmskb, Vpmovmskb, Gp, Xmm) // AVX - ASMJIT_INST_2x(vpmovmskb, Vpmovmskb, Gp, Ymm) // AVX2 - ASMJIT_INST_2x(vpmovq2m, Vpmovq2m, KReg, Xmm) // AVX512_DQ-VL - ASMJIT_INST_2x(vpmovq2m, Vpmovq2m, KReg, Ymm) // AVX512_DQ-VL - ASMJIT_INST_2x(vpmovq2m, Vpmovq2m, KReg, Zmm) // AVX512_DQ - ASMJIT_INST_2x(vpmovqb, Vpmovqb, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqb, Vpmovqb, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqb, Vpmovqb, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqb, Vpmovqb, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqb, Vpmovqb, Xmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovqb, Vpmovqb, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovqd, Vpmovqd, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqd, Vpmovqd, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqd, Vpmovqd, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqd, Vpmovqd, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqd, Vpmovqd, Ymm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovqd, Vpmovqd, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovqw, Vpmovqw, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqw, Vpmovqw, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqw, Vpmovqw, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqw, Vpmovqw, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovqw, Vpmovqw, Xmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovqw, Vpmovqw, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Xmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Ymm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Xmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Ymm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Xmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovswb, Vpmovswb, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovswb, Vpmovswb, Mem, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovswb, Vpmovswb, Xmm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovswb, Vpmovswb, Mem, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovswb, Vpmovswb, Ymm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovswb, Vpmovswb, Mem, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Zmm, Ymm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Zmm, Ymm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Zmm, Ymm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Xmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Ymm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Xmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Ymm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Mem, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Xmm, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Mem, Ymm) // AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Xmm, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Mem, Zmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Mem, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Xmm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Mem, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Ymm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Mem, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovw2m, Vpmovw2m, KReg, Xmm) // AVX512_BW-VL - ASMJIT_INST_2x(vpmovw2m, Vpmovw2m, KReg, Ymm) // AVX512_BW-VL - ASMJIT_INST_2x(vpmovw2m, Vpmovw2m, KReg, Zmm) // AVX512_BW - ASMJIT_INST_2x(vpmovwb, Vpmovwb, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovwb, Vpmovwb, Mem, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovwb, Vpmovwb, Xmm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovwb, Vpmovwb, Mem, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovwb, Vpmovwb, Ymm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovwb, Vpmovwb, Mem, Zmm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Zmm, Ymm) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Zmm, Ymm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Zmm, Ymm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_3x(vpmuldq, Vpmuldq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmuldq, Vpmuldq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmuldq, Vpmuldq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmuldq, Vpmuldq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmuldq, Vpmuldq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpmuldq, Vpmuldq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmulhw, Vpmulhw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhw, Vpmulhw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhw, Vpmulhw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhw, Vpmulhw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmulhw, Vpmulhw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmulhw, Vpmulhw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmulld, Vpmulld, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmulld, Vpmulld, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmulld, Vpmulld, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmulld, Vpmulld, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpmulld, Vpmulld, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpmulld, Vpmulld, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpmullq, Vpmullq, Xmm, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vpmullq, Vpmullq, Xmm, Xmm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vpmullq, Vpmullq, Ymm, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vpmullq, Vpmullq, Ymm, Ymm, Mem) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vpmullq, Vpmullq, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vpmullq, Vpmullq, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vpmullw, Vpmullw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmullw, Vpmullw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmullw, Vpmullw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmullw, Vpmullw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpmullw, Vpmullw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmullw, Vpmullw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Xmm, Xmm, Xmm) // AVX512_VBMI{kz|b64}-VL - ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Xmm, Xmm, Mem) // AVX512_VBMI{kz|b64}-VL - ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Ymm, Ymm, Ymm) // AVX512_VBMI{kz|b64}-VL - ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Ymm, Ymm, Mem) // AVX512_VBMI{kz|b64}-VL - ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Zmm, Zmm, Zmm) // AVX512_VBMI{kz|b64} - ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Zmm, Zmm, Mem) // AVX512_VBMI{kz|b64} - ASMJIT_INST_3x(vpmuludq, Vpmuludq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmuludq, Vpmuludq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmuludq, Vpmuludq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmuludq, Vpmuludq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpmuludq, Vpmuludq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpmuludq, Vpmuludq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_2x(vpopcntb, Vpopcntb, Xmm, Xmm) // AVX512_BITALG{kz|b32}-VL - ASMJIT_INST_2x(vpopcntb, Vpopcntb, Xmm, Mem) // AVX512_BITALG{kz|b32}-VL - ASMJIT_INST_2x(vpopcntb, Vpopcntb, Ymm, Ymm) // AVX512_BITALG{kz|b32}-VL - ASMJIT_INST_2x(vpopcntb, Vpopcntb, Ymm, Mem) // AVX512_BITALG{kz|b32}-VL - ASMJIT_INST_2x(vpopcntb, Vpopcntb, Zmm, Zmm) // AVX512_BITALG{kz|b32} - ASMJIT_INST_2x(vpopcntb, Vpopcntb, Zmm, Mem) // AVX512_BITALG{kz|b32} - ASMJIT_INST_2x(vpopcntd, Vpopcntd, Xmm, Xmm) // AVX512_VPOPCNTDQ{kz|b32}-VL - ASMJIT_INST_2x(vpopcntd, Vpopcntd, Xmm, Mem) // AVX512_VPOPCNTDQ{kz|b32}-VL - ASMJIT_INST_2x(vpopcntd, Vpopcntd, Ymm, Ymm) // AVX512_VPOPCNTDQ{kz|b32}-VL - ASMJIT_INST_2x(vpopcntd, Vpopcntd, Ymm, Mem) // AVX512_VPOPCNTDQ{kz|b32}-VL - ASMJIT_INST_2x(vpopcntd, Vpopcntd, Zmm, Zmm) // AVX512_VPOPCNTDQ{kz|b32} - ASMJIT_INST_2x(vpopcntd, Vpopcntd, Zmm, Mem) // AVX512_VPOPCNTDQ{kz|b32} - ASMJIT_INST_2x(vpopcntq, Vpopcntq, Xmm, Xmm) // AVX512_VPOPCNTDQ{kz|b64}-VL - ASMJIT_INST_2x(vpopcntq, Vpopcntq, Xmm, Mem) // AVX512_VPOPCNTDQ{kz|b64}-VL - ASMJIT_INST_2x(vpopcntq, Vpopcntq, Ymm, Ymm) // AVX512_VPOPCNTDQ{kz|b64}-VL - ASMJIT_INST_2x(vpopcntq, Vpopcntq, Ymm, Mem) // AVX512_VPOPCNTDQ{kz|b64}-VL - ASMJIT_INST_2x(vpopcntq, Vpopcntq, Zmm, Zmm) // AVX512_VPOPCNTDQ{kz|b64} - ASMJIT_INST_2x(vpopcntq, Vpopcntq, Zmm, Mem) // AVX512_VPOPCNTDQ{kz|b64} - ASMJIT_INST_2x(vpopcntw, Vpopcntw, Xmm, Xmm) // AVX512_BITALG{kz|b32}-VL - ASMJIT_INST_2x(vpopcntw, Vpopcntw, Xmm, Mem) // AVX512_BITALG{kz|b32}-VL - ASMJIT_INST_2x(vpopcntw, Vpopcntw, Ymm, Ymm) // AVX512_BITALG{kz|b32}-VL - ASMJIT_INST_2x(vpopcntw, Vpopcntw, Ymm, Mem) // AVX512_BITALG{kz|b32}-VL - ASMJIT_INST_2x(vpopcntw, Vpopcntw, Zmm, Zmm) // AVX512_BITALG{kz|b32} - ASMJIT_INST_2x(vpopcntw, Vpopcntw, Zmm, Mem) // AVX512_BITALG{kz|b32} - ASMJIT_INST_3x(vpor, Vpor, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpor, Vpor, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpor, Vpor, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpor, Vpor, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpord, Vpord, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpord, Vpord, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpord, Vpord, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpord, Vpord, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpord, Vpord, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpord, Vpord, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vporq, Vporq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vporq, Vporq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vporq, Vporq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vporq, Vporq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vporq, Vporq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vporq, Vporq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vprold, Vprold, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vprold, Vprold, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vprold, Vprold, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vprold, Vprold, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vprold, Vprold, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vprold, Vprold, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vprolq, Vprolq, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vprolq, Vprolq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vprolq, Vprolq, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vprolq, Vprolq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vprolq, Vprolq, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vprolq, Vprolq, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vprolvd, Vprolvd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vprolvd, Vprolvd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vprolvd, Vprolvd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vprolvd, Vprolvd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vprolvd, Vprolvd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vprolvd, Vprolvd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vprolvq, Vprolvq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vprolvq, Vprolvq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vprolvq, Vprolvq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vprolvq, Vprolvq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vprolvq, Vprolvq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vprolvq, Vprolvq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vprord, Vprord, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vprord, Vprord, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vprord, Vprord, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vprord, Vprord, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vprord, Vprord, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vprord, Vprord, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vprorq, Vprorq, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vprorq, Vprorq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vprorq, Vprorq, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vprorq, Vprorq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vprorq, Vprorq, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vprorq, Vprorq, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vprorvd, Vprorvd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vprorvd, Vprorvd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vprorvd, Vprorvd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vprorvd, Vprorvd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vprorvd, Vprorvd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vprorvd, Vprorvd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vprorvq, Vprorvq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vprorvq, Vprorvq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vprorvq, Vprorvq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vprorvq, Vprorvq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vprorvq, Vprorvq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vprorvq, Vprorvq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsadbw, Vpsadbw, Xmm, Xmm, Xmm) // AVX AVX512_BW-VL - ASMJIT_INST_3x(vpsadbw, Vpsadbw, Xmm, Xmm, Mem) // AVX AVX512_BW-VL - ASMJIT_INST_3x(vpsadbw, Vpsadbw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW-VL - ASMJIT_INST_3x(vpsadbw, Vpsadbw, Ymm, Ymm, Mem) // AVX2 AVX512_BW-VL - ASMJIT_INST_3x(vpsadbw, Vpsadbw, Zmm, Zmm, Zmm) // AVX512_BW - ASMJIT_INST_3x(vpsadbw, Vpsadbw, Zmm, Zmm, Mem) // AVX512_BW - ASMJIT_INST_2x(vpscatterdd, Vpscatterdd, Mem, Xmm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpscatterdd, Vpscatterdd, Mem, Ymm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpscatterdd, Vpscatterdd, Mem, Zmm) // AVX512_F{k} - ASMJIT_INST_2x(vpscatterdq, Vpscatterdq, Mem, Xmm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpscatterdq, Vpscatterdq, Mem, Ymm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpscatterdq, Vpscatterdq, Mem, Zmm) // AVX512_F{k} - ASMJIT_INST_2x(vpscatterqd, Vpscatterqd, Mem, Xmm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpscatterqd, Vpscatterqd, Mem, Ymm) // AVX512_F{k} - ASMJIT_INST_2x(vpscatterqq, Vpscatterqq, Mem, Xmm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpscatterqq, Vpscatterqq, Mem, Ymm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vpscatterqq, Vpscatterqq, Mem, Zmm) // AVX512_F{k} - ASMJIT_INST_4i(vpshldd, Vpshldd, Xmm, Xmm, Xmm, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshldd, Vpshldd, Xmm, Xmm, Mem, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshldd, Vpshldd, Ymm, Ymm, Ymm, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshldd, Vpshldd, Ymm, Ymm, Mem, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshldd, Vpshldd, Zmm, Zmm, Zmm, Imm) // AVX512_VBMI2{kz} - ASMJIT_INST_4i(vpshldd, Vpshldd, Zmm, Zmm, Mem, Imm) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshldvd, Vpshldvd, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvd, Vpshldvd, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvd, Vpshldvd, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvd, Vpshldvd, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvd, Vpshldvd, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshldvd, Vpshldvd, Zmm, Zmm, Mem) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshldvq, Vpshldvq, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvq, Vpshldvq, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvq, Vpshldvq, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvq, Vpshldvq, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvq, Vpshldvq, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshldvq, Vpshldvq, Zmm, Zmm, Mem) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshldvw, Vpshldvw, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvw, Vpshldvw, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvw, Vpshldvw, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvw, Vpshldvw, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshldvw, Vpshldvw, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshldvw, Vpshldvw, Zmm, Zmm, Mem) // AVX512_VBMI2{kz} - ASMJIT_INST_4i(vpshrdd, Vpshrdd, Xmm, Xmm, Xmm, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshrdd, Vpshrdd, Xmm, Xmm, Mem, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshrdd, Vpshrdd, Ymm, Ymm, Ymm, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshrdd, Vpshrdd, Ymm, Ymm, Mem, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshrdd, Vpshrdd, Zmm, Zmm, Zmm, Imm) // AVX512_VBMI2{kz} - ASMJIT_INST_4i(vpshrdd, Vpshrdd, Zmm, Zmm, Mem, Imm) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Zmm, Zmm, Mem) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Zmm, Zmm, Mem) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Zmm, Zmm, Mem) // AVX512_VBMI2{kz} - ASMJIT_INST_4i(vpshrdw, Vpshrdw, Xmm, Xmm, Xmm, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshrdw, Vpshrdw, Xmm, Xmm, Mem, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshrdw, Vpshrdw, Ymm, Ymm, Ymm, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshrdw, Vpshrdw, Ymm, Ymm, Mem, Imm) // AVX512_VBMI2{kz}-VL - ASMJIT_INST_4i(vpshrdw, Vpshrdw, Zmm, Zmm, Zmm, Imm) // AVX512_VBMI2{kz} - ASMJIT_INST_4i(vpshrdw, Vpshrdw, Zmm, Zmm, Mem, Imm) // AVX512_VBMI2{kz} - ASMJIT_INST_3x(vpshufb, Vpshufb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpshufb, Vpshufb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpshufb, Vpshufb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpshufb, Vpshufb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpshufb, Vpshufb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpshufb, Vpshufb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Xmm, Xmm) // AVX512_BITALG{k}-VL - ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Xmm, Mem) // AVX512_BITALG{k}-VL - ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Ymm, Ymm) // AVX512_BITALG{k}-VL - ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Ymm, Mem) // AVX512_BITALG{k}-VL - ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Zmm, Zmm) // AVX512_BITALG{k} - ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Zmm, Mem) // AVX512_BITALG{k} - ASMJIT_INST_3i(vpshufd, Vpshufd, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vpshufd, Vpshufd, Xmm, Mem, Imm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vpshufd, Vpshufd, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vpshufd, Vpshufd, Ymm, Mem, Imm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vpshufd, Vpshufd, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vpshufd, Vpshufd, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vpshufhw, Vpshufhw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpshufhw, Vpshufhw, Xmm, Mem, Imm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpshufhw, Vpshufhw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpshufhw, Vpshufhw, Ymm, Mem, Imm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpshufhw, Vpshufhw, Zmm, Zmm, Imm) // AVX512_BW{kz} - ASMJIT_INST_3i(vpshufhw, Vpshufhw, Zmm, Mem, Imm) // AVX512_BW{kz} - ASMJIT_INST_3i(vpshuflw, Vpshuflw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpshuflw, Vpshuflw, Xmm, Mem, Imm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpshuflw, Vpshuflw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpshuflw, Vpshuflw, Ymm, Mem, Imm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpshuflw, Vpshuflw, Zmm, Zmm, Imm) // AVX512_BW{kz} - ASMJIT_INST_3i(vpshuflw, Vpshuflw, Zmm, Mem, Imm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsignb, Vpsignb, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpsignb, Vpsignb, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpsignb, Vpsignb, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpsignb, Vpsignb, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpsignd, Vpsignd, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpsignd, Vpsignd, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpsignd, Vpsignd, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpsignd, Vpsignd, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpsignw, Vpsignw, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpsignw, Vpsignw, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpsignw, Vpsignw, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpsignw, Vpsignw, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3i(vpslld, Vpslld, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpslld, Vpslld, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3x(vpslld, Vpslld, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3i(vpslld, Vpslld, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpslld, Vpslld, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3x(vpslld, Vpslld, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3i(vpslld, Vpslld, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vpslld, Vpslld, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpslld, Vpslld, Zmm, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_3x(vpslld, Vpslld, Zmm, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_3i(vpslld, Vpslld, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vpslld, Vpslld, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vpslldq, Vpslldq, Xmm, Xmm, Imm) // AVX AVX512_BW-VL - ASMJIT_INST_3i(vpslldq, Vpslldq, Ymm, Ymm, Imm) // AVX2 AVX512_BW-VL - ASMJIT_INST_3i(vpslldq, Vpslldq, Xmm, Mem, Imm) // AVX512_BW-VL - ASMJIT_INST_3i(vpslldq, Vpslldq, Ymm, Mem, Imm) // AVX512_BW-VL - ASMJIT_INST_3i(vpslldq, Vpslldq, Zmm, Zmm, Imm) // AVX512_BW - ASMJIT_INST_3i(vpslldq, Vpslldq, Zmm, Mem, Imm) // AVX512_BW - ASMJIT_INST_3i(vpsllq, Vpsllq, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsllq, Vpsllq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsllq, Vpsllq, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsllq, Vpsllq, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsllq, Vpsllq, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsllq, Vpsllq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsllq, Vpsllq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpsllq, Vpsllq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsllq, Vpsllq, Zmm, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_3x(vpsllq, Vpsllq, Zmm, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_3i(vpsllq, Vpsllq, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vpsllq, Vpsllq, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsllvd, Vpsllvd, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsllvd, Vpsllvd, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsllvd, Vpsllvd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsllvd, Vpsllvd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsllvd, Vpsllvd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpsllvd, Vpsllvd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpsllvq, Vpsllvq, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsllvq, Vpsllvq, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsllvq, Vpsllvq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsllvq, Vpsllvq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsllvq, Vpsllvq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsllvq, Vpsllvq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsllvw, Vpsllvw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsllvw, Vpsllvw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsllvw, Vpsllvw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsllvw, Vpsllvw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsllvw, Vpsllvw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsllvw, Vpsllvw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsllw, Vpsllw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsllw, Vpsllw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsllw, Vpsllw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpsllw, Vpsllw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsllw, Vpsllw, Ymm, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsllw, Vpsllw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpsllw, Vpsllw, Xmm, Mem, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpsllw, Vpsllw, Ymm, Mem, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsllw, Vpsllw, Zmm, Zmm, Xmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsllw, Vpsllw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsllw, Vpsllw, Zmm, Zmm, Imm) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsllw, Vpsllw, Zmm, Mem, Imm) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsrad, Vpsrad, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrad, Vpsrad, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsrad, Vpsrad, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsrad, Vpsrad, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrad, Vpsrad, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsrad, Vpsrad, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsrad, Vpsrad, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vpsrad, Vpsrad, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrad, Vpsrad, Zmm, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_3x(vpsrad, Vpsrad, Zmm, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_3i(vpsrad, Vpsrad, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vpsrad, Vpsrad, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpsraq, Vpsraq, Xmm, Xmm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsraq, Vpsraq, Xmm, Xmm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsraq, Vpsraq, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpsraq, Vpsraq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsraq, Vpsraq, Ymm, Ymm, Xmm) // AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsraq, Vpsraq, Ymm, Ymm, Mem) // AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsraq, Vpsraq, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpsraq, Vpsraq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsraq, Vpsraq, Zmm, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_3x(vpsraq, Vpsraq, Zmm, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_3i(vpsraq, Vpsraq, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vpsraq, Vpsraq, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsravd, Vpsravd, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsravd, Vpsravd, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsravd, Vpsravd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsravd, Vpsravd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsravd, Vpsravd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpsravd, Vpsravd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpsravq, Vpsravq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsravq, Vpsravq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsravq, Vpsravq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsravq, Vpsravq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsravq, Vpsravq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsravq, Vpsravq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsravw, Vpsravw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsravw, Vpsravw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsravw, Vpsravw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsravw, Vpsravw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsravw, Vpsravw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsravw, Vpsravw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsraw, Vpsraw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsraw, Vpsraw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsraw, Vpsraw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpsraw, Vpsraw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsraw, Vpsraw, Ymm, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsraw, Vpsraw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpsraw, Vpsraw, Xmm, Mem, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpsraw, Vpsraw, Ymm, Mem, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsraw, Vpsraw, Zmm, Zmm, Xmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsraw, Vpsraw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsraw, Vpsraw, Zmm, Zmm, Imm) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsraw, Vpsraw, Zmm, Mem, Imm) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsrld, Vpsrld, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrld, Vpsrld, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsrld, Vpsrld, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsrld, Vpsrld, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrld, Vpsrld, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsrld, Vpsrld, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsrld, Vpsrld, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vpsrld, Vpsrld, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrld, Vpsrld, Zmm, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_3x(vpsrld, Vpsrld, Zmm, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_3i(vpsrld, Vpsrld, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vpsrld, Vpsrld, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_3i(vpsrldq, Vpsrldq, Xmm, Xmm, Imm) // AVX AVX512_BW-VL - ASMJIT_INST_3i(vpsrldq, Vpsrldq, Ymm, Ymm, Imm) // AVX2 AVX512_BW-VL - ASMJIT_INST_3i(vpsrldq, Vpsrldq, Xmm, Mem, Imm) // AVX512_BW-VL - ASMJIT_INST_3i(vpsrldq, Vpsrldq, Ymm, Mem, Imm) // AVX512_BW-VL - ASMJIT_INST_3i(vpsrldq, Vpsrldq, Zmm, Zmm, Imm) // AVX512_BW - ASMJIT_INST_3i(vpsrldq, Vpsrldq, Zmm, Mem, Imm) // AVX512_BW - ASMJIT_INST_3i(vpsrlq, Vpsrlq, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsrlq, Vpsrlq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsrlq, Vpsrlq, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsrlq, Vpsrlq, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsrlq, Vpsrlq, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3x(vpsrlq, Vpsrlq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL - ASMJIT_INST_3i(vpsrlq, Vpsrlq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vpsrlq, Vpsrlq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsrlq, Vpsrlq, Zmm, Zmm, Xmm) // AVX512_F{kz} - ASMJIT_INST_3x(vpsrlq, Vpsrlq, Zmm, Zmm, Mem) // AVX512_F{kz} - ASMJIT_INST_3i(vpsrlq, Vpsrlq, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3i(vpsrlq, Vpsrlq, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsrlw, Vpsrlw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsrlw, Vpsrlw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsrlw, Vpsrlw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpsrlw, Vpsrlw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsrlw, Vpsrlw, Ymm, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsrlw, Vpsrlw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpsrlw, Vpsrlw, Xmm, Mem, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_3i(vpsrlw, Vpsrlw, Ymm, Mem, Imm) // AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsrlw, Vpsrlw, Zmm, Zmm, Xmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsrlw, Vpsrlw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsrlw, Vpsrlw, Zmm, Zmm, Imm) // AVX512_BW{kz} - ASMJIT_INST_3i(vpsrlw, Vpsrlw, Zmm, Mem, Imm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubb, Vpsubb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubb, Vpsubb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubb, Vpsubb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubb, Vpsubb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubb, Vpsubb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubb, Vpsubb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubd, Vpsubd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsubd, Vpsubd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsubd, Vpsubd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsubd, Vpsubd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpsubd, Vpsubd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpsubd, Vpsubd, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpsubq, Vpsubq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsubq, Vpsubq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsubq, Vpsubq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsubq, Vpsubq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpsubq, Vpsubq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsubq, Vpsubq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpsubsb, Vpsubsb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubsb, Vpsubsb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubsb, Vpsubsb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubsb, Vpsubsb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubsb, Vpsubsb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubsb, Vpsubsb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubsw, Vpsubsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubsw, Vpsubsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubsw, Vpsubsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubsw, Vpsubsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubsw, Vpsubsw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubsw, Vpsubsw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubusb, Vpsubusb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubusb, Vpsubusb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubusb, Vpsubusb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubusb, Vpsubusb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubusb, Vpsubusb, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubusb, Vpsubusb, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubusw, Vpsubusw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubusw, Vpsubusw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubusw, Vpsubusw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubusw, Vpsubusw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubusw, Vpsubusw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubusw, Vpsubusw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubw, Vpsubw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubw, Vpsubw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubw, Vpsubw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubw, Vpsubw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpsubw, Vpsubw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpsubw, Vpsubw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_4i(vpternlogd, Vpternlogd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vpternlogd, Vpternlogd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vpternlogd, Vpternlogd, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vpternlogd, Vpternlogd, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vpternlogd, Vpternlogd, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(vpternlogd, Vpternlogd, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(vpternlogq, Vpternlogq, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vpternlogq, Vpternlogq, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vpternlogq, Vpternlogq, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vpternlogq, Vpternlogq, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vpternlogq, Vpternlogq, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_4i(vpternlogq, Vpternlogq, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_2x(vptest, Vptest, Xmm, Xmm) // AVX - ASMJIT_INST_2x(vptest, Vptest, Xmm, Mem) // AVX - ASMJIT_INST_2x(vptest, Vptest, Ymm, Ymm) // AVX - ASMJIT_INST_2x(vptest, Vptest, Ymm, Mem) // AVX - ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Xmm, Xmm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Xmm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Ymm, Ymm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Ymm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Zmm, Zmm) // AVX512_BW{k} - ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Zmm, Mem) // AVX512_BW{k} - ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Xmm, Xmm) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Xmm, Mem) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Ymm, Ymm) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Ymm, Mem) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Zmm, Zmm) // AVX512_F{k|b32} - ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Zmm, Mem) // AVX512_F{k|b32} - ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Xmm, Xmm) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Xmm, Mem) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Ymm, Ymm) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Ymm, Mem) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Zmm, Zmm) // AVX512_F{k|b64} - ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Zmm, Mem) // AVX512_F{k|b64} - ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Xmm, Xmm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Xmm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Ymm, Ymm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Ymm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Zmm, Zmm) // AVX512_BW{k} - ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Zmm, Mem) // AVX512_BW{k} - ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Xmm, Xmm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Xmm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Ymm, Ymm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Ymm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Zmm, Zmm) // AVX512_BW{k} - ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Zmm, Mem) // AVX512_BW{k} - ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Xmm, Xmm) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Xmm, Mem) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Ymm, Ymm) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Ymm, Mem) // AVX512_F{k|b32}-VL - ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Zmm, Zmm) // AVX512_F{k|b32} - ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Zmm, Mem) // AVX512_F{k|b32} - ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Xmm, Xmm) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Xmm, Mem) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Ymm, Ymm) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Ymm, Mem) // AVX512_F{k|b64}-VL - ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Zmm, Zmm) // AVX512_F{k|b64} - ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Zmm, Mem) // AVX512_F{k|b64} - ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Xmm, Xmm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Xmm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Ymm, Ymm) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Ymm, Mem) // AVX512_BW{k}-VL - ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Zmm, Zmm) // AVX512_BW{k} - ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Zmm, Mem) // AVX512_BW{k} - ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL - ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Zmm, Zmm, Zmm) // AVX512_BW{kz} - ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Zmm, Zmm, Mem) // AVX512_BW{kz} - ASMJIT_INST_3x(vpxor, Vpxor, Xmm, Xmm, Xmm) // AVX - ASMJIT_INST_3x(vpxor, Vpxor, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vpxor, Vpxor, Ymm, Ymm, Ymm) // AVX2 - ASMJIT_INST_3x(vpxor, Vpxor, Ymm, Ymm, Mem) // AVX2 - ASMJIT_INST_3x(vpxord, Vpxord, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpxord, Vpxord, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpxord, Vpxord, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpxord, Vpxord, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vpxord, Vpxord, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpxord, Vpxord, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vpxorq, Vpxorq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpxorq, Vpxorq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpxorq, Vpxorq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpxorq, Vpxorq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vpxorq, Vpxorq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vpxorq, Vpxorq, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_4i(vrangepd, Vrangepd, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_4i(vrangepd, Vrangepd, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_4i(vrangepd, Vrangepd, Ymm, Ymm, Ymm, Imm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_4i(vrangepd, Vrangepd, Ymm, Ymm, Mem, Imm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_4i(vrangepd, Vrangepd, Zmm, Zmm, Zmm, Imm) // AVX512_DQ{kz|sae|b64} - ASMJIT_INST_4i(vrangepd, Vrangepd, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz|sae|b64} - ASMJIT_INST_4i(vrangeps, Vrangeps, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_4i(vrangeps, Vrangeps, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_4i(vrangeps, Vrangeps, Ymm, Ymm, Ymm, Imm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_4i(vrangeps, Vrangeps, Ymm, Ymm, Mem, Imm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_4i(vrangeps, Vrangeps, Zmm, Zmm, Zmm, Imm) // AVX512_DQ{kz|sae|b32} - ASMJIT_INST_4i(vrangeps, Vrangeps, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz|sae|b32} + ASMJIT_INST_2x(vplzcntd, Vplzcntd, Vec, Vec) // AVX512_CD{kz|b32} + ASMJIT_INST_2x(vplzcntd, Vplzcntd, Vec, Mem) // AVX512_CD{kz|b32} + ASMJIT_INST_2x(vplzcntq, Vplzcntq, Vec, Vec) // AVX512_CD{kz|b64} + ASMJIT_INST_2x(vplzcntq, Vplzcntq, Vec, Mem) // AVX512_CD{kz|b64} + ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Vec, Vec, Vec) // AVX512_IFMA{kz|b64} + ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Vec, Vec, Mem) // AVX512_IFMA{kz|b64} + ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Vec, Vec, Vec) // AVX512_IFMA{kz|b64} + ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Vec, Vec, Mem) // AVX512_IFMA{kz|b64} + ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Mem, Vec, Vec) // AVX2 + ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Vec, Vec, Mem) // AVX2 + ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Mem, Vec, Vec) // AVX2 + ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Vec, Vec, Mem) // AVX2 + ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaxub, Vpmaxub, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaxub, Vpmaxub, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaxud, Vpmaxud, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpmaxud, Vpmaxud, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpminsb, Vpminsb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpminsb, Vpminsb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpminsd, Vpminsd, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpminsd, Vpminsd, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpminsq, Vpminsq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpminsq, Vpminsq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpminsw, Vpminsw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpminsw, Vpminsw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpminub, Vpminub, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpminub, Vpminub, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpminud, Vpminud, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpminud, Vpminud, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpminuq, Vpminuq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpminuq, Vpminuq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpminuw, Vpminuw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpminuw, Vpminuw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_2x(vpmovb2m, Vpmovb2m, KReg, Vec) // AVX512_BW + ASMJIT_INST_2x(vpmovd2m, Vpmovd2m, KReg, Vec) // AVX512_DQ + ASMJIT_INST_2x(vpmovdb, Vpmovdb, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovdb, Vpmovdb, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovdw, Vpmovdw, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovdw, Vpmovdw, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovm2b, Vpmovm2b, Vec, KReg) // AVX512_BW + ASMJIT_INST_2x(vpmovm2d, Vpmovm2d, Vec, KReg) // AVX512_DQ + ASMJIT_INST_2x(vpmovm2q, Vpmovm2q, Vec, KReg) // AVX512_DQ + ASMJIT_INST_2x(vpmovm2w, Vpmovm2w, Vec, KReg) // AVX512_BW + ASMJIT_INST_2x(vpmovmskb, Vpmovmskb, Gp, Vec) // AVX+ + ASMJIT_INST_2x(vpmovq2m, Vpmovq2m, KReg, Vec) // AVX512_DQ + ASMJIT_INST_2x(vpmovqb, Vpmovqb, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovqb, Vpmovqb, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovqd, Vpmovqd, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovqd, Vpmovqd, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovqw, Vpmovqw, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovqw, Vpmovqw, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovswb, Vpmovswb, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_2x(vpmovswb, Vpmovswb, Mem, Vec) // AVX512_BW{kz} + ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Mem, Vec) // AVX512_F{kz} + ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Mem, Vec) // AVX512_BW{kz} + ASMJIT_INST_2x(vpmovw2m, Vpmovw2m, KReg, Vec) // AVX512_BW + ASMJIT_INST_2x(vpmovwb, Vpmovwb, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_2x(vpmovwb, Vpmovwb, Mem, Vec) // AVX512_BW{kz} + ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_3x(vpmuldq, Vpmuldq, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vpmuldq, Vpmuldq, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmulhw, Vpmulhw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmulhw, Vpmulhw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmulld, Vpmulld, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpmulld, Vpmulld, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpmullq, Vpmullq, Vec, Vec, Vec) // AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vpmullq, Vpmullq, Vec, Vec, Mem) // AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vpmullw, Vpmullw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmullw, Vpmullw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Vec, Vec, Vec) // AVX512_VBMI{kz|b64} + ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Vec, Vec, Mem) // AVX512_VBMI{kz|b64} + ASMJIT_INST_3x(vpmuludq, Vpmuludq, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpmuludq, Vpmuludq, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_2x(vpopcntb, Vpopcntb, Vec, Vec) // AVX512_BITALG{kz|b32} + ASMJIT_INST_2x(vpopcntb, Vpopcntb, Vec, Mem) // AVX512_BITALG{kz|b32} + ASMJIT_INST_2x(vpopcntd, Vpopcntd, Vec, Vec) // AVX512_VPOPCNTDQ{kz|b32} + ASMJIT_INST_2x(vpopcntd, Vpopcntd, Vec, Mem) // AVX512_VPOPCNTDQ{kz|b32} + ASMJIT_INST_2x(vpopcntq, Vpopcntq, Vec, Vec) // AVX512_VPOPCNTDQ{kz|b64} + ASMJIT_INST_2x(vpopcntq, Vpopcntq, Vec, Mem) // AVX512_VPOPCNTDQ{kz|b64} + ASMJIT_INST_2x(vpopcntw, Vpopcntw, Vec, Vec) // AVX512_BITALG{kz|b32} + ASMJIT_INST_2x(vpopcntw, Vpopcntw, Vec, Mem) // AVX512_BITALG{kz|b32} + ASMJIT_INST_3x(vpor, Vpor, Vec, Vec, Vec) // AV+ + ASMJIT_INST_3x(vpor, Vpor, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpord, Vpord, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpord, Vpord, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vporq, Vporq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vporq, Vporq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3i(vprold, Vprold, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_3i(vprold, Vprold, Vec, Mem, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_3i(vprolq, Vprolq, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3i(vprolq, Vprolq, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vprolvd, Vprolvd, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vprolvd, Vprolvd, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vprolvq, Vprolvq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vprolvq, Vprolvq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3i(vprord, Vprord, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_3i(vprord, Vprord, Vec, Mem, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_3i(vprorq, Vprorq, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3i(vprorq, Vprorq, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vprorvd, Vprorvd, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vprorvd, Vprorvd, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vprorvq, Vprorvq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vprorvq, Vprorvq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsadbw, Vpsadbw, Vec, Vec, Vec) // AVX+ AVX512_BW + ASMJIT_INST_3x(vpsadbw, Vpsadbw, Vec, Vec, Mem) // AVX+ AVX512_BW + ASMJIT_INST_2x(vpscatterdd, Vpscatterdd, Mem, Vec) // AVX512_F{k} + ASMJIT_INST_2x(vpscatterdq, Vpscatterdq, Mem, Vec) // AVX512_F{k} + ASMJIT_INST_2x(vpscatterqd, Vpscatterqd, Mem, Vec) // AVX512_F{k} + ASMJIT_INST_2x(vpscatterqq, Vpscatterqq, Mem, Vec) // AVX512_F{k} + ASMJIT_INST_4i(vpshldd, Vpshldd, Vec, Vec, Vec, Imm) // AVX512_VBMI2{kz} + ASMJIT_INST_4i(vpshldd, Vpshldd, Vec, Vec, Mem, Imm) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshldvd, Vpshldvd, Vec, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshldvd, Vpshldvd, Vec, Vec, Mem) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshldvq, Vpshldvq, Vec, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshldvq, Vpshldvq, Vec, Vec, Mem) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshldvw, Vpshldvw, Vec, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshldvw, Vpshldvw, Vec, Vec, Mem) // AVX512_VBMI2{kz} + ASMJIT_INST_4i(vpshrdd, Vpshrdd, Vec, Vec, Vec, Imm) // AVX512_VBMI2{kz} + ASMJIT_INST_4i(vpshrdd, Vpshrdd, Vec, Vec, Mem, Imm) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Vec, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Vec, Vec, Mem) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Vec, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Vec, Vec, Mem) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Vec, Vec, Vec) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Vec, Vec, Mem) // AVX512_VBMI2{kz} + ASMJIT_INST_4i(vpshrdw, Vpshrdw, Vec, Vec, Vec, Imm) // AVX512_VBMI2{kz} + ASMJIT_INST_4i(vpshrdw, Vpshrdw, Vec, Vec, Mem, Imm) // AVX512_VBMI2{kz} + ASMJIT_INST_3x(vpshufb, Vpshufb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpshufb, Vpshufb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Vec, Vec) // AVX512_BITALG{k} + ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Vec, Mem) // AVX512_BITALG{k} + ASMJIT_INST_3i(vpshufd, Vpshufd, Vec, Vec, Imm) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3i(vpshufd, Vpshufd, Vec, Mem, Imm) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3i(vpshufhw, Vpshufhw, Vec, Vec, Imm) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3i(vpshufhw, Vpshufhw, Vec, Mem, Imm) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3i(vpshuflw, Vpshuflw, Vec, Vec, Imm) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3i(vpshuflw, Vpshuflw, Vec, Mem, Imm) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsignb, Vpsignb, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpsignb, Vpsignb, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpsignd, Vpsignd, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpsignd, Vpsignd, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpsignw, Vpsignw, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpsignw, Vpsignw, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3i(vpslld, Vpslld, Vec, Vec, Imm) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpslld, Vpslld, Vec, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_3x(vpslld, Vpslld, Vec, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_3i(vpslld, Vpslld, Vec, Mem, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_3i(vpslldq, Vpslldq, Vec, Vec, Imm) // AVX+ AVX512_BW + ASMJIT_INST_3i(vpslldq, Vpslldq, Vec, Mem, Imm) // AVX512_BW + ASMJIT_INST_3i(vpsllq, Vpsllq, Vec, Vec, Imm) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_3x(vpsllq, Vpsllq, Vec, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_3i(vpsllq, Vpsllq, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsllvd, Vpsllvd, Vec, Vec, Vec) // AVX2 AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsllvd, Vpsllvd, Vec, Vec, Mem) // AVX2 AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsllvq, Vpsllvq, Vec, Vec, Vec) // AVX2 AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsllvq, Vpsllvq, Vec, Vec, Mem) // AVX2 AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsllvw, Vpsllvw, Vec, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_3x(vpsllvw, Vpsllvw, Vec, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_3i(vpsllw, Vpsllw, Vec, Vec, Imm) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsllw, Vpsllw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3i(vpsllw, Vpsllw, Vec, Mem, Imm) // AVX512_BW{kz} + ASMJIT_INST_3i(vpsrad, Vpsrad, Vec, Vec, Imm) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_3x(vpsrad, Vpsrad, Vec, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_3i(vpsrad, Vpsrad, Vec, Mem, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Vec, Vec) // AVX512_F{kz} + ASMJIT_INST_3x(vpsraq, Vpsraq, Vec, Vec, Mem) // AVX512_F{kz} + ASMJIT_INST_3i(vpsraq, Vpsraq, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3i(vpsraq, Vpsraq, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsravd, Vpsravd, Vec, Vec, Vec) // AVX2 AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsravd, Vpsravd, Vec, Vec, Mem) // AVX2 AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsravq, Vpsravq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsravq, Vpsravq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsravw, Vpsravw, Vec, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_3x(vpsravw, Vpsravw, Vec, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_3i(vpsraw, Vpsraw, Vec, Vec, Imm) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsraw, Vpsraw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3i(vpsraw, Vpsraw, Vec, Mem, Imm) // AVX512_BW{kz} + ASMJIT_INST_3i(vpsrld, Vpsrld, Vec, Vec, Imm) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Vec, Vec) // AVX+ AVX512_F{kz} + ASMJIT_INST_3x(vpsrld, Vpsrld, Vec, Vec, Mem) // AVX+ AVX512_F{kz} + ASMJIT_INST_3i(vpsrld, Vpsrld, Vec, Mem, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_3i(vpsrldq, Vpsrldq, Vec, Vec, Imm) // AVX+ AVX512_BW + ASMJIT_INST_3i(vpsrldq, Vpsrldq, Vec, Mem, Imm) // AVX512_BW + ASMJIT_INST_3i(vpsrlq, Vpsrlq, Vec, Vec, Imm) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Vec, Vec) // AVX AVX512_F{kz} + ASMJIT_INST_3x(vpsrlq, Vpsrlq, Vec, Vec, Mem) // AVX AVX512_F{kz} + ASMJIT_INST_3i(vpsrlq, Vpsrlq, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Vec, Vec, Vec) // AVX2 AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Vec, Vec, Mem) // AVX2 AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Vec, Vec, Vec) // AVX2 AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Vec, Vec, Mem) // AVX2 AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Vec, Vec, Vec) // AVX512_BW{kz} + ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Vec, Vec, Mem) // AVX512_BW{kz} + ASMJIT_INST_3i(vpsrlw, Vpsrlw, Vec, Vec, Imm) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsrlw, Vpsrlw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3i(vpsrlw, Vpsrlw, Vec, Mem, Imm) // AVX512_BW{kz} + ASMJIT_INST_3x(vpsubb, Vpsubb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubb, Vpsubb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubd, Vpsubd, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsubd, Vpsubd, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpsubq, Vpsubq, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsubq, Vpsubq, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpsubsb, Vpsubsb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubsb, Vpsubsb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubsw, Vpsubsw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubsw, Vpsubsw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubusb, Vpsubusb, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubusb, Vpsubusb, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubusw, Vpsubusw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubusw, Vpsubusw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpsubw, Vpsubw, Vec, Vec, Vec) // AVX AVX512_BW{kz} + ASMJIT_INST_3x(vpsubw, Vpsubw, Vec, Vec, Mem) // AVX AVX512_BW{kz} + ASMJIT_INST_4i(vpternlogd, Vpternlogd, Vec, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(vpternlogd, Vpternlogd, Vec, Vec, Mem, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(vpternlogq, Vpternlogq, Vec, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vpternlogq, Vpternlogq, Vec, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vptest, Vptest, Vec, Vec) // AVX + ASMJIT_INST_2x(vptest, Vptest, Vec, Mem) // AVX + ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Vec, Vec) // AVX512_BW{k} + ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Vec, Mem) // AVX512_BW{k} + ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Vec, Vec) // AVX512_F{k|b32} + ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Vec, Mem) // AVX512_F{k|b32} + ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Vec, Vec) // AVX512_F{k|b64} + ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Vec, Mem) // AVX512_F{k|b64} + ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Vec, Vec) // AVX512_BW{k} + ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Vec, Mem) // AVX512_BW{k} + ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Vec, Vec) // AVX512_BW{k} + ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Vec, Mem) // AVX512_BW{k} + ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Vec, Vec) // AVX512_F{k|b32} + ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Vec, Mem) // AVX512_F{k|b32} + ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Vec, Vec) // AVX512_F{k|b64} + ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Vec, Mem) // AVX512_F{k|b64} + ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Vec, Vec) // AVX512_BW{k} + ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Vec, Mem) // AVX512_BW{k} + ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b32} + ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Vec, Vec, Vec) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Vec, Vec, Mem) // AVX+ AVX512_F{kz|b64} + ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Vec, Vec, Vec) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Vec, Vec, Mem) // AVX+ AVX512_BW{kz} + ASMJIT_INST_3x(vpxor, Vpxor, Vec, Vec, Vec) // AVX+ + ASMJIT_INST_3x(vpxor, Vpxor, Vec, Vec, Mem) // AVX+ + ASMJIT_INST_3x(vpxord, Vpxord, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpxord, Vpxord, Vec, Vec, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vpxorq, Vpxorq, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vpxorq, Vpxorq, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vrangepd, Vrangepd, Vec, Vec, Vec, Imm) // AVX512_DQ{kz|b64} + ASMJIT_INST_4i(vrangepd, Vrangepd, Vec, Vec, Mem, Imm) // AVX512_DQ{kz|b64} + ASMJIT_INST_4i(vrangeps, Vrangeps, Vec, Vec, Vec, Imm) // AVX512_DQ{kz|b32} + ASMJIT_INST_4i(vrangeps, Vrangeps, Vec, Vec, Mem, Imm) // AVX512_DQ{kz|b32} ASMJIT_INST_4i(vrangesd, Vrangesd, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz|sae} ASMJIT_INST_4i(vrangesd, Vrangesd, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz|sae} ASMJIT_INST_4i(vrangess, Vrangess, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz|sae} ASMJIT_INST_4i(vrangess, Vrangess, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz|sae} - ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Zmm, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Vec, Mem) // AVX512_F{kz|b32} ASMJIT_INST_3x(vrcp14sd, Vrcp14sd, Xmm, Xmm, Xmm) // AVX512_F{kz} ASMJIT_INST_3x(vrcp14sd, Vrcp14sd, Xmm, Xmm, Mem) // AVX512_F{kz} ASMJIT_INST_3x(vrcp14ss, Vrcp14ss, Xmm, Xmm, Xmm) // AVX512_F{kz} ASMJIT_INST_3x(vrcp14ss, Vrcp14ss, Xmm, Xmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Zmm, Zmm) // AVX512_ER{kz|sae|b64} - ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Zmm, Mem) // AVX512_ER{kz|sae|b64} - ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Zmm, Zmm) // AVX512_ER{kz|sae|b32} - ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Zmm, Mem) // AVX512_ER{kz|sae|b32} + ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Vec, Vec) // AVX512_ER{kz|sae|b64} + ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Vec, Mem) // AVX512_ER{kz|sae|b64} + ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Vec, Vec) // AVX512_ER{kz|sae|b32} + ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Vec, Mem) // AVX512_ER{kz|sae|b32} ASMJIT_INST_3x(vrcp28sd, Vrcp28sd, Xmm, Xmm, Xmm) // AVX512_ER{kz|sae} ASMJIT_INST_3x(vrcp28sd, Vrcp28sd, Xmm, Xmm, Mem) // AVX512_ER{kz|sae} ASMJIT_INST_3x(vrcp28ss, Vrcp28ss, Xmm, Xmm, Xmm) // AVX512_ER{kz|sae} ASMJIT_INST_3x(vrcp28ss, Vrcp28ss, Xmm, Xmm, Mem) // AVX512_ER{kz|sae} - ASMJIT_INST_2x(vrcpps, Vrcpps, Xmm, Xmm) // AVX - ASMJIT_INST_2x(vrcpps, Vrcpps, Xmm, Mem) // AVX - ASMJIT_INST_2x(vrcpps, Vrcpps, Ymm, Ymm) // AVX - ASMJIT_INST_2x(vrcpps, Vrcpps, Ymm, Mem) // AVX + ASMJIT_INST_2x(vrcpps, Vrcpps, Vec, Vec) // AVX + ASMJIT_INST_2x(vrcpps, Vrcpps, Vec, Mem) // AVX ASMJIT_INST_3x(vrcpss, Vrcpss, Xmm, Xmm, Xmm) // AVX ASMJIT_INST_3x(vrcpss, Vrcpss, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3i(vreducepd, Vreducepd, Xmm, Xmm, Imm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3i(vreducepd, Vreducepd, Xmm, Mem, Imm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3i(vreducepd, Vreducepd, Ymm, Ymm, Imm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3i(vreducepd, Vreducepd, Ymm, Mem, Imm) // AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3i(vreducepd, Vreducepd, Zmm, Zmm, Imm) // AVX512_DQ{kz|b64} - ASMJIT_INST_3i(vreducepd, Vreducepd, Zmm, Mem, Imm) // AVX512_DQ{kz|b64} - ASMJIT_INST_3i(vreduceps, Vreduceps, Xmm, Xmm, Imm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3i(vreduceps, Vreduceps, Xmm, Mem, Imm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3i(vreduceps, Vreduceps, Ymm, Ymm, Imm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3i(vreduceps, Vreduceps, Ymm, Mem, Imm) // AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3i(vreduceps, Vreduceps, Zmm, Zmm, Imm) // AVX512_DQ{kz|b32} - ASMJIT_INST_3i(vreduceps, Vreduceps, Zmm, Mem, Imm) // AVX512_DQ{kz|b32} + ASMJIT_INST_3i(vreducepd, Vreducepd, Vec, Vec, Imm) // AVX512_DQ{kz|b64} + ASMJIT_INST_3i(vreducepd, Vreducepd, Vec, Mem, Imm) // AVX512_DQ{kz|b64} + ASMJIT_INST_3i(vreduceps, Vreduceps, Vec, Vec, Imm) // AVX512_DQ{kz|b32} + ASMJIT_INST_3i(vreduceps, Vreduceps, Vec, Mem, Imm) // AVX512_DQ{kz|b32} ASMJIT_INST_4i(vreducesd, Vreducesd, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz} ASMJIT_INST_4i(vreducesd, Vreducesd, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz} ASMJIT_INST_4i(vreducess, Vreducess, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz} ASMJIT_INST_4i(vreducess, Vreducess, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz} - ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Zmm, Mem, Imm) // AVX512_F{kz|sae|b64} - ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b32} - ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Zmm, Mem, Imm) // AVX512_F{kz|sae|b32} + ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Vec, Mem, Imm) // AVX512_F{kz|b32} ASMJIT_INST_4i(vrndscalesd, Vrndscalesd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae} ASMJIT_INST_4i(vrndscalesd, Vrndscalesd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae} ASMJIT_INST_4i(vrndscaless, Vrndscaless, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae} ASMJIT_INST_4i(vrndscaless, Vrndscaless, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae} - ASMJIT_INST_3i(vroundpd, Vroundpd, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_3i(vroundpd, Vroundpd, Xmm, Mem, Imm) // AVX - ASMJIT_INST_3i(vroundpd, Vroundpd, Ymm, Ymm, Imm) // AVX - ASMJIT_INST_3i(vroundpd, Vroundpd, Ymm, Mem, Imm) // AVX - ASMJIT_INST_3i(vroundps, Vroundps, Xmm, Xmm, Imm) // AVX - ASMJIT_INST_3i(vroundps, Vroundps, Xmm, Mem, Imm) // AVX - ASMJIT_INST_3i(vroundps, Vroundps, Ymm, Ymm, Imm) // AVX - ASMJIT_INST_3i(vroundps, Vroundps, Ymm, Mem, Imm) // AVX + ASMJIT_INST_3i(vroundpd, Vroundpd, Vec, Vec, Imm) // AVX + ASMJIT_INST_3i(vroundpd, Vroundpd, Vec, Mem, Imm) // AVX + ASMJIT_INST_3i(vroundps, Vroundps, Vec, Vec, Imm) // AVX + ASMJIT_INST_3i(vroundps, Vroundps, Vec, Mem, Imm) // AVX ASMJIT_INST_4i(vroundsd, Vroundsd, Xmm, Xmm, Xmm, Imm) // AVX ASMJIT_INST_4i(vroundsd, Vroundsd, Xmm, Xmm, Mem, Imm) // AVX ASMJIT_INST_4i(vroundss, Vroundss, Xmm, Xmm, Xmm, Imm) // AVX ASMJIT_INST_4i(vroundss, Vroundss, Xmm, Xmm, Mem, Imm) // AVX - ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Zmm, Mem) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Vec, Mem) // AVX512_F{kz|b32} ASMJIT_INST_3x(vrsqrt14sd, Vrsqrt14sd, Xmm, Xmm, Xmm) // AVX512_F{kz} ASMJIT_INST_3x(vrsqrt14sd, Vrsqrt14sd, Xmm, Xmm, Mem) // AVX512_F{kz} ASMJIT_INST_3x(vrsqrt14ss, Vrsqrt14ss, Xmm, Xmm, Xmm) // AVX512_F{kz} ASMJIT_INST_3x(vrsqrt14ss, Vrsqrt14ss, Xmm, Xmm, Mem) // AVX512_F{kz} - ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Zmm, Zmm) // AVX512_ER{kz|sae|b64} - ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Zmm, Mem) // AVX512_ER{kz|sae|b64} - ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Zmm, Zmm) // AVX512_ER{kz|sae|b32} - ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Zmm, Mem) // AVX512_ER{kz|sae|b32} + ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Vec, Vec) // AVX512_ER{kz|sae|b64} + ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Vec, Mem) // AVX512_ER{kz|sae|b64} + ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Vec, Vec) // AVX512_ER{kz|sae|b32} + ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Vec, Mem) // AVX512_ER{kz|sae|b32} ASMJIT_INST_3x(vrsqrt28sd, Vrsqrt28sd, Xmm, Xmm, Xmm) // AVX512_ER{kz|sae} ASMJIT_INST_3x(vrsqrt28sd, Vrsqrt28sd, Xmm, Xmm, Mem) // AVX512_ER{kz|sae} ASMJIT_INST_3x(vrsqrt28ss, Vrsqrt28ss, Xmm, Xmm, Xmm) // AVX512_ER{kz|sae} ASMJIT_INST_3x(vrsqrt28ss, Vrsqrt28ss, Xmm, Xmm, Mem) // AVX512_ER{kz|sae} - ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Xmm, Xmm) // AVX - ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Xmm, Mem) // AVX - ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Ymm, Ymm) // AVX - ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Ymm, Mem) // AVX + ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Vec, Vec) // AVX + ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Vec, Mem) // AVX ASMJIT_INST_3x(vrsqrtss, Vrsqrtss, Xmm, Xmm, Xmm) // AVX ASMJIT_INST_3x(vrsqrtss, Vrsqrtss, Xmm, Xmm, Mem) // AVX - ASMJIT_INST_3x(vscalefpd, Vscalefpd, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vscalefpd, Vscalefpd, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vscalefpd, Vscalefpd, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vscalefpd, Vscalefpd, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vscalefpd, Vscalefpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vscalefpd, Vscalefpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vscalefps, Vscalefps, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vscalefps, Vscalefps, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vscalefps, Vscalefps, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vscalefps, Vscalefps, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vscalefps, Vscalefps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vscalefps, Vscalefps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vscalefpd, Vscalefpd, Vec, Vec, Vec) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vscalefpd, Vscalefpd, Vec, Vec, Mem) // AVX512_F{kz|b64} + ASMJIT_INST_3x(vscalefps, Vscalefps, Vec, Vec, Vec) // AVX512_F{kz|b32} + ASMJIT_INST_3x(vscalefps, Vscalefps, Vec, Vec, Mem) // AVX512_F{kz|b32} ASMJIT_INST_3x(vscalefsd, Vscalefsd, Xmm, Xmm, Xmm) // AVX512_F{kz|er} ASMJIT_INST_3x(vscalefsd, Vscalefsd, Xmm, Xmm, Mem) // AVX512_F{kz|er} ASMJIT_INST_3x(vscalefss, Vscalefss, Xmm, Xmm, Xmm) // AVX512_F{kz|er} ASMJIT_INST_3x(vscalefss, Vscalefss, Xmm, Xmm, Mem) // AVX512_F{kz|er} - ASMJIT_INST_2x(vscatterdpd, Vscatterdpd, Mem, Xmm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vscatterdpd, Vscatterdpd, Mem, Ymm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vscatterdpd, Vscatterdpd, Mem, Zmm) // AVX512_F{k} - ASMJIT_INST_2x(vscatterdps, Vscatterdps, Mem, Xmm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vscatterdps, Vscatterdps, Mem, Ymm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vscatterdps, Vscatterdps, Mem, Zmm) // AVX512_F{k} + ASMJIT_INST_2x(vscatterdpd, Vscatterdpd, Mem, Vec) // AVX512_F{k} + ASMJIT_INST_2x(vscatterdps, Vscatterdps, Mem, Vec) // AVX512_F{k} ASMJIT_INST_1x(vscatterpf0dpd, Vscatterpf0dpd, Mem) // AVX512_PF{k} ASMJIT_INST_1x(vscatterpf0dps, Vscatterpf0dps, Mem) // AVX512_PF{k} ASMJIT_INST_1x(vscatterpf0qpd, Vscatterpf0qpd, Mem) // AVX512_PF{k} @@ -4896,120 +3432,57 @@ struct EmitterExplicitT { ASMJIT_INST_1x(vscatterpf1dps, Vscatterpf1dps, Mem) // AVX512_PF{k} ASMJIT_INST_1x(vscatterpf1qpd, Vscatterpf1qpd, Mem) // AVX512_PF{k} ASMJIT_INST_1x(vscatterpf1qps, Vscatterpf1qps, Mem) // AVX512_PF{k} - ASMJIT_INST_2x(vscatterqpd, Vscatterqpd, Mem, Xmm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vscatterqpd, Vscatterqpd, Mem, Ymm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vscatterqpd, Vscatterqpd, Mem, Zmm) // AVX512_F{k} - ASMJIT_INST_2x(vscatterqps, Vscatterqps, Mem, Xmm) // AVX512_F{k}-VL - ASMJIT_INST_2x(vscatterqps, Vscatterqps, Mem, Ymm) // AVX512_F{k} - ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_4i(vshufpd, Vshufpd, Xmm, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vshufpd, Vshufpd, Xmm, Xmm, Mem, Imm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vshufpd, Vshufpd, Ymm, Ymm, Ymm, Imm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vshufpd, Vshufpd, Ymm, Ymm, Mem, Imm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_4i(vshufpd, Vshufpd, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(vshufpd, Vshufpd, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32} - ASMJIT_INST_4i(vshufps, Vshufps, Xmm, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vshufps, Vshufps, Xmm, Xmm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vshufps, Vshufps, Ymm, Ymm, Ymm, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vshufps, Vshufps, Ymm, Ymm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_4i(vshufps, Vshufps, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_4i(vshufps, Vshufps, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64} - ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Zmm, Zmm) // AVX512_F{kz|er|b64} - ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Zmm, Mem) // AVX512_F{kz|er|b64} - ASMJIT_INST_2x(vsqrtps, Vsqrtps, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vsqrtps, Vsqrtps, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vsqrtps, Vsqrtps, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vsqrtps, Vsqrtps, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_2x(vsqrtps, Vsqrtps, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_2x(vsqrtps, Vsqrtps, Zmm, Mem) // AVX512_F{kz|er|b32} + ASMJIT_INST_2x(vscatterqpd, Vscatterqpd, Mem, Vec) // AVX512_F{k} + ASMJIT_INST_2x(vscatterqps, Vscatterqps, Mem, Vec) // AVX512_F{k} + ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Vec, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Vec, Vec, Mem, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Vec, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Vec, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Vec, Vec, Vec, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Vec, Vec, Mem, Imm) // AVX512_F{kz|b32} + ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Vec, Vec, Vec, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Vec, Vec, Mem, Imm) // AVX512_F{kz|b64} + ASMJIT_INST_4i(vshufpd, Vshufpd, Vec, Vec, Vec, Imm) // AVX AVX512_F{kz|b32} + ASMJIT_INST_4i(vshufpd, Vshufpd, Vec, Vec, Mem, Imm) // AVX AVX512_F{kz|b32} + ASMJIT_INST_4i(vshufps, Vshufps, Vec, Vec, Vec, Imm) // AVX AVX512_F{kz|b64} + ASMJIT_INST_4i(vshufps, Vshufps, Vec, Vec, Mem, Imm) // AVX AVX512_F{kz|b64} + ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_2x(vsqrtps, Vsqrtps, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_2x(vsqrtps, Vsqrtps, Vec, Mem) // AVX AVX512_F{kz|b32} ASMJIT_INST_3x(vsqrtsd, Vsqrtsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vsqrtsd, Vsqrtsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vsqrtss, Vsqrtss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vsqrtss, Vsqrtss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} ASMJIT_INST_1x(vstmxcsr, Vstmxcsr, Mem) // AVX - ASMJIT_INST_3x(vsubpd, Vsubpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vsubpd, Vsubpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vsubpd, Vsubpd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vsubpd, Vsubpd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vsubpd, Vsubpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vsubpd, Vsubpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64} - ASMJIT_INST_3x(vsubps, Vsubps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vsubps, Vsubps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vsubps, Vsubps, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vsubps, Vsubps, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vsubps, Vsubps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32} - ASMJIT_INST_3x(vsubps, Vsubps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32} + ASMJIT_INST_3x(vsubpd, Vsubpd, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vsubpd, Vsubpd, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vsubps, Vsubps, Vec, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vsubps, Vsubps, Vec, Vec, Mem) // AVX AVX512_F{kz|b32} ASMJIT_INST_3x(vsubsd, Vsubsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vsubsd, Vsubsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vsubss, Vsubss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er} ASMJIT_INST_3x(vsubss, Vsubss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er} - ASMJIT_INST_2x(vtestpd, Vtestpd, Xmm, Xmm) // AVX - ASMJIT_INST_2x(vtestpd, Vtestpd, Xmm, Mem) // AVX - ASMJIT_INST_2x(vtestpd, Vtestpd, Ymm, Ymm) // AVX - ASMJIT_INST_2x(vtestpd, Vtestpd, Ymm, Mem) // AVX - ASMJIT_INST_2x(vtestps, Vtestps, Xmm, Xmm) // AVX - ASMJIT_INST_2x(vtestps, Vtestps, Xmm, Mem) // AVX - ASMJIT_INST_2x(vtestps, Vtestps, Ymm, Ymm) // AVX - ASMJIT_INST_2x(vtestps, Vtestps, Ymm, Mem) // AVX + ASMJIT_INST_2x(vtestpd, Vtestpd, Vec, Vec) // AVX + ASMJIT_INST_2x(vtestpd, Vtestpd, Vec, Mem) // AVX + ASMJIT_INST_2x(vtestps, Vtestps, Vec, Vec) // AVX + ASMJIT_INST_2x(vtestps, Vtestps, Vec, Mem) // AVX ASMJIT_INST_2x(vucomisd, Vucomisd, Xmm, Xmm) // AVX AVX512_F{sae} ASMJIT_INST_2x(vucomisd, Vucomisd, Xmm, Mem) // AVX AVX512_F{sae} ASMJIT_INST_2x(vucomiss, Vucomiss, Xmm, Xmm) // AVX AVX512_F{sae} ASMJIT_INST_2x(vucomiss, Vucomiss, Xmm, Mem) // AVX AVX512_F{sae} - ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vunpckhps, Vunpckhps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vunpckhps, Vunpckhps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vunpckhps, Vunpckhps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vunpckhps, Vunpckhps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vunpckhps, Vunpckhps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vunpckhps, Vunpckhps, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL - ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Zmm, Zmm, Mem) // AVX512_F{kz|b64} - ASMJIT_INST_3x(vunpcklps, Vunpcklps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vunpcklps, Vunpcklps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vunpcklps, Vunpcklps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vunpcklps, Vunpcklps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL - ASMJIT_INST_3x(vunpcklps, Vunpcklps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vunpcklps, Vunpcklps, Zmm, Zmm, Mem) // AVX512_F{kz|b32} - ASMJIT_INST_3x(vxorpd, Vxorpd, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vxorpd, Vxorpd, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vxorpd, Vxorpd, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vxorpd, Vxorpd, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b64}-VL - ASMJIT_INST_3x(vxorpd, Vxorpd, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vxorpd, Vxorpd, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64} - ASMJIT_INST_3x(vxorps, Vxorps, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vxorps, Vxorps, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vxorps, Vxorps, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vxorps, Vxorps, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b32}-VL - ASMJIT_INST_3x(vxorps, Vxorps, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b32} - ASMJIT_INST_3x(vxorps, Vxorps, Zmm, Zmm, Mem) // AVX512_DQ{kz|b32} + ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vunpckhps, Vunpckhps, Vec, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vunpckhps, Vunpckhps, Vec, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Vec, Vec, Vec) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Vec, Vec, Mem) // AVX AVX512_F{kz|b64} + ASMJIT_INST_3x(vunpcklps, Vunpcklps, Vec, Vec, Vec) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vunpcklps, Vunpcklps, Vec, Vec, Mem) // AVX AVX512_F{kz|b32} + ASMJIT_INST_3x(vxorpd, Vxorpd, Vec, Vec, Vec) // AVX AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vxorpd, Vxorpd, Vec, Vec, Mem) // AVX AVX512_DQ{kz|b64} + ASMJIT_INST_3x(vxorps, Vxorps, Vec, Vec, Vec) // AVX AVX512_DQ{kz|b32} + ASMJIT_INST_3x(vxorps, Vxorps, Vec, Vec, Mem) // AVX AVX512_DQ{kz|b32} ASMJIT_INST_0x(vzeroall, Vzeroall) // AVX ASMJIT_INST_0x(vzeroupper, Vzeroupper) // AVX @@ -5018,96 +3491,60 @@ struct EmitterExplicitT { //! \name FMA4 Instructions //! \{ - ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Ymm, Ymm, Ymm, Mem) // FMA4 - ASMJIT_INST_4x(vfmaddps, Vfmaddps, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfmaddps, Vfmaddps, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfmaddps, Vfmaddps, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfmaddps, Vfmaddps, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfmaddps, Vfmaddps, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfmaddps, Vfmaddps, Ymm, Ymm, Ymm, Mem) // FMA4 + ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Vec, Vec, Vec, Mem) // FMA4 + ASMJIT_INST_4x(vfmaddps, Vfmaddps, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfmaddps, Vfmaddps, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfmaddps, Vfmaddps, Vec, Vec, Vec, Mem) // FMA4 ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Xmm, Xmm) // FMA4 ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Mem, Xmm) // FMA4 ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Xmm, Mem) // FMA4 ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Xmm, Xmm) // FMA4 ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Mem, Xmm) // FMA4 ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Ymm, Ymm, Ymm, Mem) // FMA4 - ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Ymm, Ymm, Ymm, Mem) // FMA4 - ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Ymm, Ymm, Ymm, Mem) // FMA4 - ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Ymm, Ymm, Ymm, Mem) // FMA4 - ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Ymm, Ymm, Ymm, Mem) // FMA4 - ASMJIT_INST_4x(vfmsubps, Vfmsubps, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfmsubps, Vfmsubps, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfmsubps, Vfmsubps, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfmsubps, Vfmsubps, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfmsubps, Vfmsubps, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfmsubps, Vfmsubps, Ymm, Ymm, Ymm, Mem) // FMA4 + ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Vec, Vec, Vec, Mem) // FMA4 + ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Vec, Vec, Vec, Mem) // FMA4 + ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Vec, Vec, Vec, Mem) // FMA4 + ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Vec, Vec, Vec, Mem) // FMA4 + ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Vec, Vec, Vec, Mem) // FMA4 + ASMJIT_INST_4x(vfmsubps, Vfmsubps, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfmsubps, Vfmsubps, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfmsubps, Vfmsubps, Vec, Vec, Vec, Mem) // FMA4 ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Xmm, Xmm) // FMA4 ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Mem, Xmm) // FMA4 ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Xmm, Mem) // FMA4 ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Xmm, Xmm) // FMA4 ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Mem, Xmm) // FMA4 ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Ymm, Ymm, Ymm, Mem) // FMA4 - ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Ymm, Ymm, Ymm, Mem) // FMA4 + ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Vec, Vec, Vec, Mem) // FMA4 + ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Vec, Vec, Vec, Mem) // FMA4 ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Xmm, Xmm) // FMA4 ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Mem, Xmm) // FMA4 ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Xmm, Mem) // FMA4 ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Xmm, Xmm) // FMA4 ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Mem, Xmm) // FMA4 ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Ymm, Ymm, Ymm, Mem) // FMA4 - ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Xmm, Xmm, Xmm, Xmm) // FMA4 - ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Xmm, Xmm, Mem, Xmm) // FMA4 - ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Xmm, Xmm, Xmm, Mem) // FMA4 - ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Ymm, Ymm, Ymm, Ymm) // FMA4 - ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Ymm, Ymm, Mem, Ymm) // FMA4 - ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Ymm, Ymm, Ymm, Mem) // FMA4 + ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Vec, Vec, Vec, Mem) // FMA4 + ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Vec, Vec, Vec, Vec) // FMA4 + ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Vec, Vec, Mem, Vec) // FMA4 + ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Vec, Vec, Vec, Mem) // FMA4 ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Xmm, Xmm) // FMA4 ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Mem, Xmm) // FMA4 ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Xmm, Mem) // FMA4 @@ -5120,24 +3557,17 @@ struct EmitterExplicitT { //! \name XOP Instructions (Deprecated) //! \{ - ASMJIT_INST_2x(vfrczpd, Vfrczpd, Xmm, Xmm) // XOP - ASMJIT_INST_2x(vfrczpd, Vfrczpd, Xmm, Mem) // XOP - ASMJIT_INST_2x(vfrczpd, Vfrczpd, Ymm, Ymm) // XOP - ASMJIT_INST_2x(vfrczpd, Vfrczpd, Ymm, Mem) // XOP - ASMJIT_INST_2x(vfrczps, Vfrczps, Xmm, Xmm) // XOP - ASMJIT_INST_2x(vfrczps, Vfrczps, Xmm, Mem) // XOP - ASMJIT_INST_2x(vfrczps, Vfrczps, Ymm, Ymm) // XOP - ASMJIT_INST_2x(vfrczps, Vfrczps, Ymm, Mem) // XOP + ASMJIT_INST_2x(vfrczpd, Vfrczpd, Vec, Vec) // XOP + ASMJIT_INST_2x(vfrczpd, Vfrczpd, Vec, Mem) // XOP + ASMJIT_INST_2x(vfrczps, Vfrczps, Vec, Vec) // XOP + ASMJIT_INST_2x(vfrczps, Vfrczps, Vec, Mem) // XOP ASMJIT_INST_2x(vfrczsd, Vfrczsd, Xmm, Xmm) // XOP ASMJIT_INST_2x(vfrczsd, Vfrczsd, Xmm, Mem) // XOP ASMJIT_INST_2x(vfrczss, Vfrczss, Xmm, Xmm) // XOP ASMJIT_INST_2x(vfrczss, Vfrczss, Xmm, Mem) // XOP - ASMJIT_INST_4x(vpcmov, Vpcmov, Xmm, Xmm, Xmm, Xmm) // XOP - ASMJIT_INST_4x(vpcmov, Vpcmov, Xmm, Xmm, Mem, Xmm) // XOP - ASMJIT_INST_4x(vpcmov, Vpcmov, Xmm, Xmm, Xmm, Mem) // XOP - ASMJIT_INST_4x(vpcmov, Vpcmov, Ymm, Ymm, Ymm, Ymm) // XOP - ASMJIT_INST_4x(vpcmov, Vpcmov, Ymm, Ymm, Mem, Ymm) // XOP - ASMJIT_INST_4x(vpcmov, Vpcmov, Ymm, Ymm, Ymm, Mem) // XOP + ASMJIT_INST_4x(vpcmov, Vpcmov, Vec, Vec, Vec, Vec) // XOP + ASMJIT_INST_4x(vpcmov, Vpcmov, Vec, Vec, Mem, Vec) // XOP + ASMJIT_INST_4x(vpcmov, Vpcmov, Vec, Vec, Vec, Mem) // XOP ASMJIT_INST_4i(vpcomb, Vpcomb, Xmm, Xmm, Xmm, Imm) // XOP ASMJIT_INST_4i(vpcomb, Vpcomb, Xmm, Xmm, Mem, Imm) // XOP ASMJIT_INST_4i(vpcomd, Vpcomd, Xmm, Xmm, Xmm, Imm) // XOP @@ -5154,18 +3584,12 @@ struct EmitterExplicitT { ASMJIT_INST_4i(vpcomuq, Vpcomuq, Xmm, Xmm, Mem, Imm) // XOP ASMJIT_INST_4i(vpcomuw, Vpcomuw, Xmm, Xmm, Xmm, Imm) // XOP ASMJIT_INST_4i(vpcomuw, Vpcomuw, Xmm, Xmm, Mem, Imm) // XOP - ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Xmm, Xmm, Xmm, Xmm, Imm) // XOP - ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Xmm, Xmm, Mem, Xmm, Imm) // XOP - ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Xmm, Xmm, Xmm, Mem, Imm) // XOP - ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Ymm, Ymm, Ymm, Ymm, Imm) // XOP - ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Ymm, Ymm, Mem, Ymm, Imm) // XOP - ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Ymm, Ymm, Ymm, Mem, Imm) // XOP - ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Xmm, Xmm, Xmm, Xmm, Imm) // XOP - ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Xmm, Xmm, Mem, Xmm, Imm) // XOP - ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Xmm, Xmm, Xmm, Mem, Imm) // XOP - ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Ymm, Ymm, Ymm, Ymm, Imm) // XOP - ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Ymm, Ymm, Mem, Ymm, Imm) // XOP - ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Ymm, Ymm, Ymm, Mem, Imm) // XOP + ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Vec, Vec, Vec, Vec, Imm) // XOP + ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Vec, Vec, Mem, Vec, Imm) // XOP + ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Vec, Vec, Vec, Mem, Imm) // XOP + ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Vec, Vec, Vec, Vec, Imm) // XOP + ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Vec, Vec, Mem, Vec, Imm) // XOP + ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Vec, Vec, Vec, Mem, Imm) // XOP ASMJIT_INST_2x(vphaddbd, Vphaddbd, Xmm, Xmm) // XOP ASMJIT_INST_2x(vphaddbd, Vphaddbd, Xmm, Mem) // XOP ASMJIT_INST_2x(vphaddbq, Vphaddbq, Xmm, Xmm) // XOP @@ -5269,14 +3693,38 @@ struct EmitterExplicitT { ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Xmm, Mem) // XOP //! \} + + //! \name AMX Instructions + //! \{ + + ASMJIT_INST_1x(ldtilecfg, Ldtilecfg, Mem) // AMX_TILE + ASMJIT_INST_1x(sttilecfg, Sttilecfg, Mem) // AMX_TILE + ASMJIT_INST_2x(tileloadd, Tileloadd, Tmm, Mem) // AMX_TILE + ASMJIT_INST_2x(tileloaddt1, Tileloaddt1, Tmm, Mem) // AMX_TILE + ASMJIT_INST_0x(tilerelease, Tilerelease) // AMX_TILE + ASMJIT_INST_2x(tilestored, Tilestored, Mem, Tmm) // AMX_TILE + ASMJIT_INST_1x(tilezero, Tilezero, Tmm) // AMX_TILE + + ASMJIT_INST_3x(tdpbf16ps, Tdpbf16ps, Tmm, Tmm, Tmm) // AMX_BF16 + ASMJIT_INST_3x(tdpbssd, Tdpbssd, Tmm, Tmm, Tmm) // AMX_INT8 + ASMJIT_INST_3x(tdpbsud, Tdpbsud, Tmm, Tmm, Tmm) // AMX_INT8 + ASMJIT_INST_3x(tdpbusd, Tdpbusd, Tmm, Tmm, Tmm) // AMX_INT8 + ASMJIT_INST_3x(tdpbuud, Tdpbuud, Tmm, Tmm, Tmm) // AMX_INT8 + + //! \} }; // ============================================================================ // [asmjit::x86::EmitterImplicitT] // ============================================================================ +//! Emitter (X86 - implicit). template struct EmitterImplicitT : public EmitterExplicitT { + //! \cond + using EmitterExplicitT::_emitter; + //! \endcond + //! \name Prefix Options //! \{ @@ -5294,17 +3742,13 @@ struct EmitterImplicitT : public EmitterExplicitT { //! \} - //! \name Base Instructions & GP Extensions + //! \name Core Instructions //! \{ //! \cond - using EmitterExplicitT::_emitter; - - // TODO: xrstor and xsave don't have explicit variants yet. using EmitterExplicitT::cbw; using EmitterExplicitT::cdq; using EmitterExplicitT::cdqe; - using EmitterExplicitT::clzero; using EmitterExplicitT::cqo; using EmitterExplicitT::cwd; using EmitterExplicitT::cwde; @@ -5312,23 +3756,14 @@ struct EmitterImplicitT : public EmitterExplicitT { using EmitterExplicitT::cmpxchg; using EmitterExplicitT::cmpxchg8b; using EmitterExplicitT::cmpxchg16b; - using EmitterExplicitT::cpuid; using EmitterExplicitT::div; using EmitterExplicitT::idiv; using EmitterExplicitT::imul; using EmitterExplicitT::jecxz; - using EmitterExplicitT::lahf; - using EmitterExplicitT::mulx; - using EmitterExplicitT::movsd; + using EmitterExplicitT::loop; + using EmitterExplicitT::loope; + using EmitterExplicitT::loopne; using EmitterExplicitT::mul; - using EmitterExplicitT::rdmsr; - using EmitterExplicitT::rdpmc; - using EmitterExplicitT::rdtsc; - using EmitterExplicitT::rdtscp; - using EmitterExplicitT::sahf; - using EmitterExplicitT::wrmsr; - using EmitterExplicitT::xgetbv; - using EmitterExplicitT::xsetbv; //! \endcond ASMJIT_INST_0x(cbw, Cbw) // ANY [IMPLICIT] AX <- Sign Extend AL @@ -5338,12 +3773,9 @@ struct EmitterImplicitT : public EmitterExplicitT { ASMJIT_INST_2x(cmpxchg, Cmpxchg, Mem, Gp) // I486 [IMPLICIT] ASMJIT_INST_1x(cmpxchg16b, Cmpxchg16b, Mem) // CMPXCHG8B [IMPLICIT] m == RDX:RAX ? m <- RCX:RBX ASMJIT_INST_1x(cmpxchg8b, Cmpxchg8b, Mem) // CMPXCHG16B[IMPLICIT] m == EDX:EAX ? m <- ECX:EBX - ASMJIT_INST_0x(cpuid, Cpuid) // I486 [IMPLICIT] EAX:EBX:ECX:EDX <- CPUID[EAX:ECX] ASMJIT_INST_0x(cqo, Cqo) // X64 [IMPLICIT] RDX:RAX <- Sign Extend RAX ASMJIT_INST_0x(cwd, Cwd) // ANY [IMPLICIT] DX:AX <- Sign Extend AX ASMJIT_INST_0x(cwde, Cwde) // ANY [IMPLICIT] EAX <- Sign Extend AX - ASMJIT_INST_0x(daa, Daa) - ASMJIT_INST_0x(das, Das) ASMJIT_INST_1x(div, Div, Gp) // ANY [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / r8} {xDX[Rem]:xAX[Quot] <- DX:AX / r16|r32|r64} ASMJIT_INST_1x(div, Div, Mem) // ANY [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / m8} {xDX[Rem]:xAX[Quot] <- DX:AX / m16|m32|m64} ASMJIT_INST_1x(idiv, Idiv, Gp) // ANY [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / r8} {xDX[Rem]:xAX[Quot] <- DX:AX / r16|r32|r64} @@ -5357,7 +3789,6 @@ struct EmitterImplicitT : public EmitterExplicitT { ASMJIT_INST_1x(jecxz, Jecxz, Label) // ANY [IMPLICIT] Short jump if CX/ECX/RCX is zero. ASMJIT_INST_1x(jecxz, Jecxz, Imm) // ANY [IMPLICIT] Short jump if CX/ECX/RCX is zero. ASMJIT_INST_1x(jecxz, Jecxz, uint64_t) // ANY [IMPLICIT] Short jump if CX/ECX/RCX is zero. - ASMJIT_INST_0x(lahf, Lahf) // LAHFSAHF [IMPLICIT] AH <- EFL ASMJIT_INST_1x(loop, Loop, Label) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0. ASMJIT_INST_1x(loop, Loop, Imm) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0. ASMJIT_INST_1x(loop, Loop, uint64_t) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0. @@ -5369,20 +3800,8 @@ struct EmitterImplicitT : public EmitterExplicitT { ASMJIT_INST_1x(loopne, Loopne, uint64_t) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0. ASMJIT_INST_1x(mul, Mul, Gp) // ANY [IMPLICIT] {AX <- AL * r8} {xDX:xAX <- xAX * r16|r32|r64} ASMJIT_INST_1x(mul, Mul, Mem) // ANY [IMPLICIT] {AX <- AL * m8} {xDX:xAX <- xAX * m16|m32|m64} - ASMJIT_INST_0x(rdmsr, Rdmsr) // ANY [IMPLICIT] - ASMJIT_INST_0x(rdpmc, Rdpmc) // ANY [IMPLICIT] - ASMJIT_INST_0x(rdtsc, Rdtsc) // RDTSC [IMPLICIT] EDX:EAX <- CNT - ASMJIT_INST_0x(rdtscp, Rdtscp) // RDTSCP [IMPLICIT] EDX:EAX:EXC <- CNT ASMJIT_INST_0x(ret, Ret) ASMJIT_INST_1i(ret, Ret, Imm) - ASMJIT_INST_0x(sahf, Sahf) // LAHFSAHF [IMPLICIT] EFL <- AH - ASMJIT_INST_0x(syscall, Syscall) // X64 [IMPLICIT] - ASMJIT_INST_0x(sysenter, Sysenter) // X64 [IMPLICIT] - ASMJIT_INST_0x(sysexit, Sysexit) // X64 [IMPLICIT] - ASMJIT_INST_0x(sysexit64, Sysexit64) // X64 [IMPLICIT] - ASMJIT_INST_0x(sysret, Sysret) // X64 [IMPLICIT] - ASMJIT_INST_0x(sysret64, Sysret64) // X64 [IMPLICIT] - ASMJIT_INST_0x(wrmsr, Wrmsr) // ANY [IMPLICIT] ASMJIT_INST_0x(xlatb, Xlatb) // ANY [IMPLICIT] //! \} @@ -5390,15 +3809,19 @@ struct EmitterImplicitT : public EmitterExplicitT { //! \name String Instruction Aliases //! \{ + //! \cond + using EmitterExplicitT::movsd; + //! \endcond + inline Error cmpsb() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT::ptr_zsi(0, 1), EmitterExplicitT::ptr_zdi(0, 1)); } inline Error cmpsd() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT::ptr_zsi(0, 4), EmitterExplicitT::ptr_zdi(0, 4)); } inline Error cmpsq() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT::ptr_zsi(0, 8), EmitterExplicitT::ptr_zdi(0, 8)); } inline Error cmpsw() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT::ptr_zsi(0, 2), EmitterExplicitT::ptr_zdi(0, 2)); } - inline Error lodsb() { return _emitter()->emit(Inst::kIdLods, al , EmitterExplicitT::ptr_zdi(0, 1)); } - inline Error lodsd() { return _emitter()->emit(Inst::kIdLods, eax, EmitterExplicitT::ptr_zdi(0, 4)); } - inline Error lodsq() { return _emitter()->emit(Inst::kIdLods, rax, EmitterExplicitT::ptr_zdi(0, 8)); } - inline Error lodsw() { return _emitter()->emit(Inst::kIdLods, ax , EmitterExplicitT::ptr_zdi(0, 2)); } + inline Error lodsb() { return _emitter()->emit(Inst::kIdLods, al , EmitterExplicitT::ptr_zsi(0, 1)); } + inline Error lodsd() { return _emitter()->emit(Inst::kIdLods, eax, EmitterExplicitT::ptr_zsi(0, 4)); } + inline Error lodsq() { return _emitter()->emit(Inst::kIdLods, rax, EmitterExplicitT::ptr_zsi(0, 8)); } + inline Error lodsw() { return _emitter()->emit(Inst::kIdLods, ax , EmitterExplicitT::ptr_zsi(0, 2)); } inline Error movsb() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT::ptr_zdi(0, 1), EmitterExplicitT::ptr_zsi(0, 1)); } inline Error movsd() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT::ptr_zdi(0, 4), EmitterExplicitT::ptr_zsi(0, 4)); } @@ -5417,24 +3840,109 @@ struct EmitterImplicitT : public EmitterExplicitT { //! \} - //! \name CL Instructions + //! \name Deprecated 32-bit Instructions + //! \{ + + //! \cond + using EmitterExplicitT::aaa; + using EmitterExplicitT::aad; + using EmitterExplicitT::aam; + using EmitterExplicitT::aas; + using EmitterExplicitT::daa; + using EmitterExplicitT::das; + //! \endcond + + ASMJIT_INST_0x(aaa, Aaa) // X86 [IMPLICIT] + ASMJIT_INST_1i(aad, Aad, Imm) // X86 [IMPLICIT] + ASMJIT_INST_1i(aam, Aam, Imm) // X86 [IMPLICIT] + ASMJIT_INST_0x(aas, Aas) // X86 [IMPLICIT] + ASMJIT_INST_0x(daa, Daa) // X86 [IMPLICIT] + ASMJIT_INST_0x(das, Das) // X86 [IMPLICIT] + + //! \} + + //! \name LAHF/SAHF Instructions + //! \{ + + //! \cond + using EmitterExplicitT::lahf; + using EmitterExplicitT::sahf; + //! \endcond + + ASMJIT_INST_0x(lahf, Lahf) // LAHFSAHF [IMPLICIT] AH <- EFL + ASMJIT_INST_0x(sahf, Sahf) // LAHFSAHF [IMPLICIT] EFL <- AH + + //! \} + + //! \name CPUID Instruction + //! \{ + + //! \cond + using EmitterExplicitT::cpuid; + //! \endcond + + ASMJIT_INST_0x(cpuid, Cpuid) // I486 [IMPLICIT] EAX:EBX:ECX:EDX <- CPUID[EAX:ECX] + + //! \} + + //! \name CacheLine Instructions //! \{ + //! \cond + using EmitterExplicitT::clzero; + //! \endcond + ASMJIT_INST_0x(clzero, Clzero) // CLZERO [IMPLICIT] //! \} + //! \name RDPRU/RDPKRU Instructions + //! \{ + + //! \cond + using EmitterExplicitT::rdpru; + using EmitterExplicitT::rdpkru; + //! \endcond + + ASMJIT_INST_0x(rdpru, Rdpru) // RDPRU [IMPLICIT] EDX:EAX <- PRU[ECX] + ASMJIT_INST_0x(rdpkru, Rdpkru) // RDPKRU [IMPLICIT] EDX:EAX <- PKRU[ECX] + + //! \} + + //! \name RDTSC/RDTSCP Instructions + //! \{ + + //! \cond + using EmitterExplicitT::rdtsc; + using EmitterExplicitT::rdtscp; + //! \endcond + + ASMJIT_INST_0x(rdtsc, Rdtsc) // RDTSC [IMPLICIT] EDX:EAX <- CNT + ASMJIT_INST_0x(rdtscp, Rdtscp) // RDTSCP [IMPLICIT] EDX:EAX:EXC <- CNT + + //! \} + //! \name BMI2 Instructions //! \{ + //! \cond + using EmitterExplicitT::mulx; + //! \endcond + ASMJIT_INST_3x(mulx, Mulx, Gp, Gp, Gp) // BMI2 [IMPLICIT] ASMJIT_INST_3x(mulx, Mulx, Gp, Gp, Mem) // BMI2 [IMPLICIT] //! \} - //! \name FXSR & XSAVE Instructions + //! \name XSAVE Instructions //! \{ + // TODO: xrstor and xsave don't have explicit variants yet. + + //! \cond + using EmitterExplicitT::xgetbv; + //! \endcond + ASMJIT_INST_0x(xgetbv, Xgetbv) // XSAVE [IMPLICIT] EDX:EAX <- XCR[ECX] ASMJIT_INST_1x(xrstor, Xrstor, Mem) // XSAVE [IMPLICIT] ASMJIT_INST_1x(xrstor64, Xrstor64, Mem) // XSAVE+X64 [IMPLICIT] @@ -5448,6 +3956,35 @@ struct EmitterImplicitT : public EmitterExplicitT { ASMJIT_INST_1x(xsaveopt64, Xsaveopt64, Mem) // XSAVE+X64 [IMPLICIT] ASMJIT_INST_1x(xsaves, Xsaves, Mem) // XSAVE [IMPLICIT] ASMJIT_INST_1x(xsaves64, Xsaves64, Mem) // XSAVE+X64 [IMPLICIT] + + //! \} + + //! \name SYSCALL/SYSENTER Instructions + //! \{ + + ASMJIT_INST_0x(syscall, Syscall) // X64 [IMPLICIT] + ASMJIT_INST_0x(sysenter, Sysenter) // X64 [IMPLICIT] + + //! \} + + //! \name Privileged Instructions + //! \{ + + //! \cond + using EmitterExplicitT::rdmsr; + using EmitterExplicitT::rdpmc; + using EmitterExplicitT::wrmsr; + using EmitterExplicitT::xsetbv; + //! \endcond + + ASMJIT_INST_0x(pconfig, Pconfig) // PCONFIG [IMPLICIT] + ASMJIT_INST_0x(rdmsr, Rdmsr) // ANY [IMPLICIT] + ASMJIT_INST_0x(rdpmc, Rdpmc) // ANY [IMPLICIT] + ASMJIT_INST_0x(sysexit, Sysexit) // X64 [IMPLICIT] + ASMJIT_INST_0x(sysexit64, Sysexit64) // X64 [IMPLICIT] + ASMJIT_INST_0x(sysret, Sysret) // X64 [IMPLICIT] + ASMJIT_INST_0x(sysret64, Sysret64) // X64 [IMPLICIT] + ASMJIT_INST_0x(wrmsr, Wrmsr) // ANY [IMPLICIT] ASMJIT_INST_0x(xsetbv, Xsetbv) // XSAVE [IMPLICIT] XCR[ECX] <- EDX:EAX //! \} @@ -5455,6 +3992,13 @@ struct EmitterImplicitT : public EmitterExplicitT { //! \name Monitor & MWait Instructions //! \{ + //! \cond + using EmitterExplicitT::monitor; + using EmitterExplicitT::monitorx; + using EmitterExplicitT::mwait; + using EmitterExplicitT::mwaitx; + //! \endcond + ASMJIT_INST_0x(monitor, Monitor) ASMJIT_INST_0x(monitorx, Monitorx) ASMJIT_INST_0x(mwait, Mwait) @@ -5462,6 +4006,19 @@ struct EmitterImplicitT : public EmitterExplicitT { //! \} + //! \name WAITPKG Instructions + //! \{ + + //! \cond + using EmitterExplicitT::tpause; + using EmitterExplicitT::umwait; + //! \endcond + + ASMJIT_INST_1x(tpause, Tpause, Gp) + ASMJIT_INST_1x(umwait, Umwait, Gp) + + //! \} + //! \name MMX & SSE Instructions //! \{ @@ -5499,7 +4056,9 @@ struct EmitterImplicitT : public EmitterExplicitT { //! \name SHA Instructions //! \{ + //! \cond using EmitterExplicitT::sha256rnds2; + //! \endcond ASMJIT_INST_2x(sha256rnds2, Sha256rnds2, Xmm, Xmm) // SHA [IMPLICIT] ASMJIT_INST_2x(sha256rnds2, Sha256rnds2, Xmm, Mem) // SHA [IMPLICIT] @@ -5509,21 +4068,23 @@ struct EmitterImplicitT : public EmitterExplicitT { //! \name AVX, FMA, and AVX512 Instructions //! \{ + //! \cond using EmitterExplicitT::vmaskmovdqu; using EmitterExplicitT::vpcmpestri; using EmitterExplicitT::vpcmpestrm; using EmitterExplicitT::vpcmpistri; using EmitterExplicitT::vpcmpistrm; + //! \endcond - ASMJIT_INST_2x(vmaskmovdqu, Vmaskmovdqu, Xmm, Xmm) // AVX [IMPLICIT] - ASMJIT_INST_3i(vpcmpestri, Vpcmpestri, Xmm, Xmm, Imm) // AVX [IMPLICIT] - ASMJIT_INST_3i(vpcmpestri, Vpcmpestri, Xmm, Mem, Imm) // AVX [IMPLICIT] - ASMJIT_INST_3i(vpcmpestrm, Vpcmpestrm, Xmm, Xmm, Imm) // AVX [IMPLICIT] - ASMJIT_INST_3i(vpcmpestrm, Vpcmpestrm, Xmm, Mem, Imm) // AVX [IMPLICIT] - ASMJIT_INST_3i(vpcmpistri, Vpcmpistri, Xmm, Xmm, Imm) // AVX [IMPLICIT] - ASMJIT_INST_3i(vpcmpistri, Vpcmpistri, Xmm, Mem, Imm) // AVX [IMPLICIT] - ASMJIT_INST_3i(vpcmpistrm, Vpcmpistrm, Xmm, Xmm, Imm) // AVX [IMPLICIT] - ASMJIT_INST_3i(vpcmpistrm, Vpcmpistrm, Xmm, Mem, Imm) // AVX [IMPLICIT] + ASMJIT_INST_2x(vmaskmovdqu, Vmaskmovdqu, Xmm, Xmm) // AVX [IMPLICIT] + ASMJIT_INST_3i(vpcmpestri, Vpcmpestri, Xmm, Xmm, Imm) // AVX [IMPLICIT] + ASMJIT_INST_3i(vpcmpestri, Vpcmpestri, Xmm, Mem, Imm) // AVX [IMPLICIT] + ASMJIT_INST_3i(vpcmpestrm, Vpcmpestrm, Xmm, Xmm, Imm) // AVX [IMPLICIT] + ASMJIT_INST_3i(vpcmpestrm, Vpcmpestrm, Xmm, Mem, Imm) // AVX [IMPLICIT] + ASMJIT_INST_3i(vpcmpistri, Vpcmpistri, Xmm, Xmm, Imm) // AVX [IMPLICIT] + ASMJIT_INST_3i(vpcmpistri, Vpcmpistri, Xmm, Mem, Imm) // AVX [IMPLICIT] + ASMJIT_INST_3i(vpcmpistrm, Vpcmpistrm, Xmm, Xmm, Imm) // AVX [IMPLICIT] + ASMJIT_INST_3i(vpcmpistrm, Vpcmpistrm, Xmm, Mem, Imm) // AVX [IMPLICIT] //! \} }; diff --git a/libs/asmjit/src/asmjit/x86/x86features.cpp b/libs/asmjit/src/asmjit/x86/x86features.cpp index 6ee5772..16698c8 100644 --- a/libs/asmjit/src/asmjit/x86/x86features.cpp +++ b/libs/asmjit/src/asmjit/x86/x86features.cpp @@ -106,22 +106,25 @@ static inline void simplifyCpuVendor(CpuInfo& cpu, uint32_t d0, uint32_t d1, uin } static inline void simplifyCpuBrand(char* s) noexcept { - // Used to always clear the current character to ensure that the result - // doesn't contain garbage after the new zero terminator. char* d = s; + char c = s[0]; char prev = 0; - char curr = s[0]; + + // Used to always clear the current character to ensure that the result + // doesn't contain garbage after a new null terminator is placed at the end. s[0] = '\0'; for (;;) { - if (curr == 0) + if (!c) break; - if (!(curr == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@'))) - *d++ = prev = curr; + if (!(c == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@'))) { + *d++ = c; + prev = c; + } - curr = *++s; + c = *++s; s[0] = '\0'; } @@ -136,22 +139,26 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { Features& features = cpu._features.as(); cpu.reset(); - cpu._archInfo.init(ArchInfo::kIdHost); + cpu._arch = Environment::kArchHost; + cpu._subArch = Environment::kSubArchUnknown; + cpu._reserved = 0; cpu._maxLogicalProcessors = 1; features.add(Features::kI486); // -------------------------------------------------------------------------- - // [CPUID EAX=0x0] + // [CPUID EAX=0] // -------------------------------------------------------------------------- // Get vendor string/id. cpuidQuery(®s, 0x0); uint32_t maxId = regs.eax; + uint32_t maxSubLeafId_0x7 = 0; + simplifyCpuVendor(cpu, regs.ebx, regs.edx, regs.ecx); // -------------------------------------------------------------------------- - // [CPUID EAX=0x1] + // [CPUID EAX=1] // -------------------------------------------------------------------------- if (maxId >= 0x1) { @@ -167,7 +174,7 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { modelId += (((regs.eax >> 16) & 0x0Fu) << 4); if (familyId == 0x0Fu) - familyId += (((regs.eax >> 20) & 0xFFu) << 4); + familyId += ((regs.eax >> 20) & 0xFFu); cpu._modelId = modelId; cpu._familyId = familyId; @@ -200,12 +207,12 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { if (bitTest(regs.edx, 19)) features.add(Features::kCLFLUSH); if (bitTest(regs.edx, 23)) features.add(Features::kMMX); if (bitTest(regs.edx, 24)) features.add(Features::kFXSR); - if (bitTest(regs.edx, 25)) features.add(Features::kSSE, Features::kMMX2); + if (bitTest(regs.edx, 25)) features.add(Features::kSSE); if (bitTest(regs.edx, 26)) features.add(Features::kSSE, Features::kSSE2); if (bitTest(regs.edx, 28)) features.add(Features::kMT); - // Get the content of XCR0 if supported by CPU and enabled by OS. - if ((regs.ecx & 0x0C000000u) == 0x0C000000u) { + // Get the content of XCR0 if supported by the CPU and enabled by the OS. + if (features.hasXSAVE() && features.hasOSXSAVE()) { xgetbvQuery(&xcr0, 0); } @@ -222,8 +229,23 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { } } + constexpr uint32_t kXCR0_AMX_Bits = 0x3u << 17; + bool amxEnabledByOS = (xcr0.eax & kXCR0_AMX_Bits) == kXCR0_AMX_Bits; + +#if defined(__APPLE__) + // Apple platform provides on-demand AVX512 support. When an AVX512 instruction is used + // the first time it results in #UD, which would cause the thread being promoted to use + // AVX512 support by the OS in addition to enabling the necessary bits in XCR0 register. + bool avx512EnabledByOS = true; +#else + // - XCR0[2:1] == 11b - XMM/YMM states need to be enabled by OS. + // - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS. + constexpr uint32_t kXCR0_AVX512_Bits = (0x3u << 1) | (0x7u << 5); + bool avx512EnabledByOS = (xcr0.eax & kXCR0_AVX512_Bits) == kXCR0_AVX512_Bits; +#endif + // -------------------------------------------------------------------------- - // [CPUID EAX=0x7] + // [CPUID EAX=7 ECX=0] // -------------------------------------------------------------------------- // Detect new features if the processor supports CPUID-07. @@ -231,7 +253,9 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { if (maxId >= 0x7) { cpuidQuery(®s, 0x7); - uint32_t maxSubLeafId = regs.eax; + + maybeMPX = bitTest(regs.ebx, 14); + maxSubLeafId_0x7 = regs.eax; if (bitTest(regs.ebx, 0)) features.add(Features::kFSGSBASE); if (bitTest(regs.ebx, 3)) features.add(Features::kBMI); @@ -240,20 +264,25 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { if (bitTest(regs.ebx, 8)) features.add(Features::kBMI2); if (bitTest(regs.ebx, 9)) features.add(Features::kERMS); if (bitTest(regs.ebx, 11)) features.add(Features::kRTM); - if (bitTest(regs.ebx, 14)) maybeMPX = true; if (bitTest(regs.ebx, 18)) features.add(Features::kRDSEED); if (bitTest(regs.ebx, 19)) features.add(Features::kADX); if (bitTest(regs.ebx, 20)) features.add(Features::kSMAP); - if (bitTest(regs.ebx, 22)) features.add(Features::kPCOMMIT); if (bitTest(regs.ebx, 23)) features.add(Features::kCLFLUSHOPT); if (bitTest(regs.ebx, 24)) features.add(Features::kCLWB); if (bitTest(regs.ebx, 29)) features.add(Features::kSHA); if (bitTest(regs.ecx, 0)) features.add(Features::kPREFETCHWT1); + if (bitTest(regs.ecx, 4)) features.add(Features::kOSPKE); + if (bitTest(regs.ecx, 5)) features.add(Features::kWAITPKG); + if (bitTest(regs.ecx, 8)) features.add(Features::kGFNI); + if (bitTest(regs.ecx, 9)) features.add(Features::kVAES); + if (bitTest(regs.ecx, 10)) features.add(Features::kVPCLMULQDQ); if (bitTest(regs.ecx, 22)) features.add(Features::kRDPID); if (bitTest(regs.ecx, 25)) features.add(Features::kCLDEMOTE); if (bitTest(regs.ecx, 27)) features.add(Features::kMOVDIRI); if (bitTest(regs.ecx, 28)) features.add(Features::kMOVDIR64B); if (bitTest(regs.ecx, 29)) features.add(Features::kENQCMD); + if (bitTest(regs.edx, 14)) features.add(Features::kSERIALIZE); + if (bitTest(regs.edx, 16)) features.add(Features::kTSXLDTRK); if (bitTest(regs.edx, 18)) features.add(Features::kPCONFIG); // Detect 'TSX' - Requires at least one of `HLE` and `RTM` features. @@ -264,44 +293,47 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { if (bitTest(regs.ebx, 5) && features.hasAVX()) features.add(Features::kAVX2); + // Detect 'AMX'. + if (amxEnabledByOS) { + if (bitTest(regs.edx, 22)) features.add(Features::kAMX_BF16); + if (bitTest(regs.edx, 24)) features.add(Features::kAMX_TILE); + if (bitTest(regs.edx, 25)) features.add(Features::kAMX_INT8); + } + // Detect 'AVX_512'. - if (bitTest(regs.ebx, 16)) { - // - XCR0[2:1] == 11b - XMM/YMM states need to be enabled by OS. - // - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS. - if ((xcr0.eax & 0x000000E6u) == 0x000000E6u) { - features.add(Features::kAVX512_F); - - if (bitTest(regs.ebx, 17)) features.add(Features::kAVX512_DQ); - if (bitTest(regs.ebx, 21)) features.add(Features::kAVX512_IFMA); - if (bitTest(regs.ebx, 26)) features.add(Features::kAVX512_PFI); - if (bitTest(regs.ebx, 27)) features.add(Features::kAVX512_ERI); - if (bitTest(regs.ebx, 28)) features.add(Features::kAVX512_CDI); - if (bitTest(regs.ebx, 30)) features.add(Features::kAVX512_BW); - if (bitTest(regs.ebx, 31)) features.add(Features::kAVX512_VL); - if (bitTest(regs.ecx, 1)) features.add(Features::kAVX512_VBMI); - if (bitTest(regs.ecx, 5)) features.add(Features::kWAITPKG); - if (bitTest(regs.ecx, 6)) features.add(Features::kAVX512_VBMI2); - if (bitTest(regs.ecx, 8)) features.add(Features::kGFNI); - if (bitTest(regs.ecx, 9)) features.add(Features::kVAES); - if (bitTest(regs.ecx, 10)) features.add(Features::kVPCLMULQDQ); - if (bitTest(regs.ecx, 11)) features.add(Features::kAVX512_VNNI); - if (bitTest(regs.ecx, 12)) features.add(Features::kAVX512_BITALG); - if (bitTest(regs.ecx, 14)) features.add(Features::kAVX512_VPOPCNTDQ); - if (bitTest(regs.edx, 2)) features.add(Features::kAVX512_4VNNIW); - if (bitTest(regs.edx, 3)) features.add(Features::kAVX512_4FMAPS); - if (bitTest(regs.edx, 8)) features.add(Features::kAVX512_VP2INTERSECT); - } + if (avx512EnabledByOS && bitTest(regs.ebx, 16)) { + features.add(Features::kAVX512_F); + + if (bitTest(regs.ebx, 17)) features.add(Features::kAVX512_DQ); + if (bitTest(regs.ebx, 21)) features.add(Features::kAVX512_IFMA); + if (bitTest(regs.ebx, 26)) features.add(Features::kAVX512_PFI); + if (bitTest(regs.ebx, 27)) features.add(Features::kAVX512_ERI); + if (bitTest(regs.ebx, 28)) features.add(Features::kAVX512_CDI); + if (bitTest(regs.ebx, 30)) features.add(Features::kAVX512_BW); + if (bitTest(regs.ebx, 31)) features.add(Features::kAVX512_VL); + if (bitTest(regs.ecx, 1)) features.add(Features::kAVX512_VBMI); + if (bitTest(regs.ecx, 6)) features.add(Features::kAVX512_VBMI2); + if (bitTest(regs.ecx, 11)) features.add(Features::kAVX512_VNNI); + if (bitTest(regs.ecx, 12)) features.add(Features::kAVX512_BITALG); + if (bitTest(regs.ecx, 14)) features.add(Features::kAVX512_VPOPCNTDQ); + if (bitTest(regs.edx, 2)) features.add(Features::kAVX512_4VNNIW); + if (bitTest(regs.edx, 3)) features.add(Features::kAVX512_4FMAPS); + if (bitTest(regs.edx, 8)) features.add(Features::kAVX512_VP2INTERSECT); } + } - if (maxSubLeafId >= 1 && features.hasAVX512_F()) { - cpuidQuery(®s, 0x7, 1); + // -------------------------------------------------------------------------- + // [CPUID EAX=7 ECX=1] + // -------------------------------------------------------------------------- - if (bitTest(regs.eax, 5)) features.add(Features::kAVX512_BF16); - } + if (features.hasAVX512_F() && maxSubLeafId_0x7 >= 1) { + cpuidQuery(®s, 0x7, 1); + + if (bitTest(regs.eax, 5)) features.add(Features::kAVX512_BF16); } // -------------------------------------------------------------------------- - // [CPUID EAX=0xD] + // [CPUID EAX=13 ECX=0] // -------------------------------------------------------------------------- if (maxId >= 0xD) { @@ -312,11 +344,22 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { features.add(Features::kMPX); cpuidQuery(®s, 0xD, 1); + if (bitTest(regs.eax, 0)) features.add(Features::kXSAVEOPT); if (bitTest(regs.eax, 1)) features.add(Features::kXSAVEC); if (bitTest(regs.eax, 3)) features.add(Features::kXSAVES); } + // -------------------------------------------------------------------------- + // [CPUID EAX=14 ECX=0] + // -------------------------------------------------------------------------- + + if (maxId >= 0xE) { + cpuidQuery(®s, 0xE, 0); + + if (bitTest(regs.ebx, 4)) features.add(Features::kPTWRITE); + } + // -------------------------------------------------------------------------- // [CPUID EAX=0x80000000...maxId] // -------------------------------------------------------------------------- @@ -325,7 +368,7 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { uint32_t i = maxId; // The highest EAX that we understand. - uint32_t kHighestProcessedEAX = 0x80000008u; + uint32_t kHighestProcessedEAX = 0x8000001Fu; // Several CPUID calls are required to get the whole branc string. It's easy // to copy one DWORD at a time instead of performing a byte copy. @@ -352,8 +395,9 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { if (bitTest(regs.edx, 21)) features.add(Features::kFXSROPT); if (bitTest(regs.edx, 22)) features.add(Features::kMMX2); if (bitTest(regs.edx, 27)) features.add(Features::kRDTSCP); + if (bitTest(regs.edx, 29)) features.add(Features::kPREFETCHW); if (bitTest(regs.edx, 30)) features.add(Features::k3DNOW2, Features::kMMX2); - if (bitTest(regs.edx, 31)) features.add(Features::k3DNOW); + if (bitTest(regs.edx, 31)) features.add(Features::kPREFETCHW); if (cpu.hasFeature(Features::kAVX)) { if (bitTest(regs.ecx, 11)) features.add(Features::kXOP); @@ -374,12 +418,22 @@ ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept { *brand++ = regs.ecx; *brand++ = regs.edx; - // Go directly to the last one. + // Go directly to the next one we are interested in. if (i == 0x80000004u) i = 0x80000008u - 1; break; case 0x80000008u: if (bitTest(regs.ebx, 0)) features.add(Features::kCLZERO); + if (bitTest(regs.ebx, 0)) features.add(Features::kRDPRU); + if (bitTest(regs.ebx, 8)) features.add(Features::kMCOMMIT); + if (bitTest(regs.ebx, 9)) features.add(Features::kWBNOINVD); + + // Go directly to the next one we are interested in. + i = 0x8000001Fu - 1; + break; + + case 0x8000001Fu: + if (bitTest(regs.eax, 4)) features.add(Features::kSNP); break; } } while (++i <= maxId); diff --git a/libs/asmjit/src/asmjit/x86/x86features.h b/libs/asmjit/src/asmjit/x86/x86features.h index d73c063..527a765 100644 --- a/libs/asmjit/src/asmjit/x86/x86features.h +++ b/libs/asmjit/src/asmjit/x86/x86features.h @@ -40,6 +40,8 @@ class Features : public BaseFeatures { public: //! CPU feature ID. enum Id : uint32_t { + // @EnumValuesBegin{"enum": "x86::Features::Id"}@ + kNone = 0, //!< No feature (never set, used internally). kMT, //!< CPU has multi-threading capabilities. @@ -50,6 +52,9 @@ class Features : public BaseFeatures { kADX, //!< CPU has ADX (multi-precision add-carry instruction extensions). kAESNI, //!< CPU has AESNI (AES encode/decode instructions). kALTMOVCR8, //!< CPU has LOCK MOV R<->CR0 (supports `MOV R<->CR8` via `LOCK MOV R<->CR0` in 32-bit mode) [AMD]. + kAMX_BF16, //!< CPU has AMX_BF16 (advanced matrix extensions - BF16 instructions). + kAMX_INT8, //!< CPU has AMX_INT8 (advanced matrix extensions - INT8 instructions). + kAMX_TILE, //!< CPU has AMX_TILE (advanced matrix extensions). kAVX, //!< CPU has AVX (advanced vector extensions). kAVX2, //!< CPU has AVX2 (advanced vector extensions 2). kAVX512_4FMAPS, //!< CPU has AVX512_FMAPS (FMA packed single). @@ -71,6 +76,8 @@ class Features : public BaseFeatures { kAVX512_VPOPCNTDQ, //!< CPU has AVX512_VPOPCNTDQ (VPOPCNT[D|Q] instructions). kBMI, //!< CPU has BMI (bit manipulation instructions #1). kBMI2, //!< CPU has BMI2 (bit manipulation instructions #2). + kCET_IBT, //!< CPU has CET-IBT. + kCET_SS, //!< CPU has CET-SS. kCLDEMOTE, //!< CPU has CLDEMOTE (cache line demote). kCLFLUSH, //!< CPU has CLFUSH (Cache Line flush). kCLFLUSHOPT, //!< CPU has CLFUSHOPT (Cache Line flush - optimized). @@ -96,6 +103,7 @@ class Features : public BaseFeatures { kLAHFSAHF, //!< CPU has LAHF/SAHF (LAHF/SAHF in 64-bit mode) [X86_64]. kLWP, //!< CPU has LWP (lightweight profiling) [AMD]. kLZCNT, //!< CPU has LZCNT (LZCNT instruction). + kMCOMMIT, //!< CPU has MCOMMIT (MCOMMIT instruction). kMMX, //!< CPU has MMX (MMX base instructions). kMMX2, //!< CPU has MMX2 (MMX extensions or MMX2). kMONITOR, //!< CPU has MONITOR (MONITOR/MWAIT instructions). @@ -107,23 +115,27 @@ class Features : public BaseFeatures { kMSR, //!< CPU has MSR (RDMSR/WRMSR instructions). kMSSE, //!< CPU has MSSE (misaligned SSE support). kOSXSAVE, //!< CPU has OSXSAVE (XSAVE enabled by OS). + kOSPKE, //!< CPU has OSPKE (PKE enabled by OS). kPCLMULQDQ, //!< CPU has PCLMULQDQ (packed carry-less multiplication). - kPCOMMIT, //!< CPU has PCOMMIT (PCOMMIT instruction). kPCONFIG, //!< CPU has PCONFIG (PCONFIG instruction). kPOPCNT, //!< CPU has POPCNT (POPCNT instruction). kPREFETCHW, //!< CPU has PREFETCHW. kPREFETCHWT1, //!< CPU has PREFETCHWT1. + kPTWRITE, //!< CPU has PTWRITE. kRDPID, //!< CPU has RDPID. + kRDPRU, //!< CPU has RDPRU. kRDRAND, //!< CPU has RDRAND. kRDSEED, //!< CPU has RDSEED. kRDTSC, //!< CPU has RDTSC. kRDTSCP, //!< CPU has RDTSCP. kRTM, //!< CPU has RTM. + kSERIALIZE, //!< CPU has SERIALIZE. kSHA, //!< CPU has SHA (SHA-1 and SHA-256 instructions). kSKINIT, //!< CPU has SKINIT (SKINIT/STGI instructions) [AMD]. kSMAP, //!< CPU has SMAP (supervisor-mode access prevention). kSMEP, //!< CPU has SMEP (supervisor-mode execution prevention). kSMX, //!< CPU has SMX (safer mode extensions). + kSNP, //!< CPU has SNP. kSSE, //!< CPU has SSE. kSSE2, //!< CPU has SSE2. kSSE3, //!< CPU has SSE3. @@ -134,6 +146,7 @@ class Features : public BaseFeatures { kSVM, //!< CPU has SVM (virtualization) [AMD]. kTBM, //!< CPU has TBM (trailing bit manipulation) [AMD]. kTSX, //!< CPU has TSX. + kTSXLDTRK, //!< CPU has TSXLDTRK. kVAES, //!< CPU has VAES (vector AES 256|512 bit support). kVMX, //!< CPU has VMX (virtualization) [INTEL]. kVPCLMULQDQ, //!< CPU has VPCLMULQDQ (vector PCLMULQDQ 256|512-bit support). @@ -145,6 +158,8 @@ class Features : public BaseFeatures { kXSAVEOPT, //!< CPU has XSAVEOPT. kXSAVES, //!< CPU has XSAVES. + // @EnumValuesEnd@ + kCount //!< Count of X86 CPU features. }; @@ -153,6 +168,7 @@ class Features : public BaseFeatures { inline Features() noexcept : BaseFeatures() {} + inline Features(const Features& other) noexcept : BaseFeatures(other) {} @@ -179,6 +195,9 @@ class Features : public BaseFeatures { ASMJIT_X86_FEATURE(ADX) ASMJIT_X86_FEATURE(AESNI) ASMJIT_X86_FEATURE(ALTMOVCR8) + ASMJIT_X86_FEATURE(AMX_BF16) + ASMJIT_X86_FEATURE(AMX_INT8) + ASMJIT_X86_FEATURE(AMX_TILE) ASMJIT_X86_FEATURE(AVX) ASMJIT_X86_FEATURE(AVX2) ASMJIT_X86_FEATURE(AVX512_4FMAPS) @@ -225,6 +244,7 @@ class Features : public BaseFeatures { ASMJIT_X86_FEATURE(LAHFSAHF) ASMJIT_X86_FEATURE(LWP) ASMJIT_X86_FEATURE(LZCNT) + ASMJIT_X86_FEATURE(MCOMMIT) ASMJIT_X86_FEATURE(MMX) ASMJIT_X86_FEATURE(MMX2) ASMJIT_X86_FEATURE(MONITOR) @@ -237,22 +257,25 @@ class Features : public BaseFeatures { ASMJIT_X86_FEATURE(MSSE) ASMJIT_X86_FEATURE(OSXSAVE) ASMJIT_X86_FEATURE(PCLMULQDQ) - ASMJIT_X86_FEATURE(PCOMMIT) ASMJIT_X86_FEATURE(PCONFIG) ASMJIT_X86_FEATURE(POPCNT) ASMJIT_X86_FEATURE(PREFETCHW) ASMJIT_X86_FEATURE(PREFETCHWT1) + ASMJIT_X86_FEATURE(PTWRITE) ASMJIT_X86_FEATURE(RDPID) + ASMJIT_X86_FEATURE(RDPRU) ASMJIT_X86_FEATURE(RDRAND) ASMJIT_X86_FEATURE(RDSEED) ASMJIT_X86_FEATURE(RDTSC) ASMJIT_X86_FEATURE(RDTSCP) ASMJIT_X86_FEATURE(RTM) + ASMJIT_X86_FEATURE(SERIALIZE) ASMJIT_X86_FEATURE(SHA) ASMJIT_X86_FEATURE(SKINIT) ASMJIT_X86_FEATURE(SMAP) ASMJIT_X86_FEATURE(SMEP) ASMJIT_X86_FEATURE(SMX) + ASMJIT_X86_FEATURE(SNP) ASMJIT_X86_FEATURE(SSE) ASMJIT_X86_FEATURE(SSE2) ASMJIT_X86_FEATURE(SSE3) @@ -263,6 +286,7 @@ class Features : public BaseFeatures { ASMJIT_X86_FEATURE(SVM) ASMJIT_X86_FEATURE(TBM) ASMJIT_X86_FEATURE(TSX) + ASMJIT_X86_FEATURE(TSXLDTRK) ASMJIT_X86_FEATURE(XSAVE) ASMJIT_X86_FEATURE(XSAVEC) ASMJIT_X86_FEATURE(XSAVEOPT) diff --git a/libs/asmjit/src/asmjit/x86/x86logging.cpp b/libs/asmjit/src/asmjit/x86/x86formatter.cpp similarity index 67% rename from libs/asmjit/src/asmjit/x86/x86logging.cpp rename to libs/asmjit/src/asmjit/x86/x86formatter.cpp index cfb91db..d7b065c 100644 --- a/libs/asmjit/src/asmjit/x86/x86logging.cpp +++ b/libs/asmjit/src/asmjit/x86/x86formatter.cpp @@ -26,8 +26,9 @@ #include "../core/misc_p.h" #include "../core/support.h" +#include "../x86/x86features.h" +#include "../x86/x86formatter_p.h" #include "../x86/x86instdb_p.h" -#include "../x86/x86logging_p.h" #include "../x86/x86operand.h" #ifndef ASMJIT_NO_COMPILER @@ -37,7 +38,7 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) // ============================================================================ -// [asmjit::x86::LoggingInternal - Constants] +// [asmjit::x86::FormatterInternal - Constants] // ============================================================================ struct RegFormatInfo { @@ -77,6 +78,7 @@ struct RegFormatInfo_T { X == Reg::kTypeDReg ? 62 : X == Reg::kTypeSt ? 47 : X == Reg::kTypeBnd ? 55 : + X == Reg::kTypeTmm ? 65 : X == Reg::kTypeRip ? 39 : 0, kFormatIndex = X == Reg::kTypeGpbLo ? 1 : @@ -94,6 +96,7 @@ struct RegFormatInfo_T { X == Reg::kTypeDReg ? 80 : X == Reg::kTypeSt ? 55 : X == Reg::kTypeBnd ? 69 : + X == Reg::kTypeTmm ? 89 : X == Reg::kTypeRip ? 43 : 0, kSpecialIndex = X == Reg::kTypeGpbLo ? 96 : @@ -145,7 +148,9 @@ static const RegFormatInfo x86RegFormatInfo = { "k\0" // #53 "bnd\0" // #55 "cr\0" // #59 - "dr\0", // #62 + "dr\0" // #62 + "tmm\0" // #65 + , // Register name entries and strings. { ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_NAME_ENTRY, 0) }, @@ -169,7 +174,8 @@ static const RegFormatInfo x86RegFormatInfo = { "dr%u\0" // #80 "rip\0" // #85 - "\0\0\0\0\0\0\0" // #89 + "tmm%u\0" // #89 + "\0" // #95 "al\0\0" "cl\0\0" "dl\0\0" "bl\0\0" "spl\0" "bpl\0" "sil\0" "dil\0" // #96 "ah\0\0" "ch\0\0" "dh\0\0" "bh\0\0" "n/a\0" "n/a\0" "n/a\0" "n/a\0" // #128 @@ -196,56 +202,246 @@ static const char* x86GetAddressSizeString(uint32_t size) noexcept { } // ============================================================================ -// [asmjit::x86::LoggingInternal - Format Operand] +// [asmjit::x86::FormatterInternal - Format Feature] // ============================================================================ -ASMJIT_FAVOR_SIZE Error LoggingInternal::formatOperand( +Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept { + // @EnumStringBegin{"enum": "x86::Features::Id", "output": "sFeature", "strip": "k"}@ + static const char sFeatureString[] = + "None\0" + "MT\0" + "NX\0" + "3DNOW\0" + "3DNOW2\0" + "ADX\0" + "AESNI\0" + "ALTMOVCR8\0" + "AMX_BF16\0" + "AMX_INT8\0" + "AMX_TILE\0" + "AVX\0" + "AVX2\0" + "AVX512_4FMAPS\0" + "AVX512_4VNNIW\0" + "AVX512_BF16\0" + "AVX512_BITALG\0" + "AVX512_BW\0" + "AVX512_CDI\0" + "AVX512_DQ\0" + "AVX512_ERI\0" + "AVX512_F\0" + "AVX512_IFMA\0" + "AVX512_PFI\0" + "AVX512_VBMI\0" + "AVX512_VBMI2\0" + "AVX512_VL\0" + "AVX512_VNNI\0" + "AVX512_VP2INTERSECT\0" + "AVX512_VPOPCNTDQ\0" + "BMI\0" + "BMI2\0" + "CET_IBT\0" + "CET_SS\0" + "CLDEMOTE\0" + "CLFLUSH\0" + "CLFLUSHOPT\0" + "CLWB\0" + "CLZERO\0" + "CMOV\0" + "CMPXCHG16B\0" + "CMPXCHG8B\0" + "ENCLV\0" + "ENQCMD\0" + "ERMS\0" + "F16C\0" + "FMA\0" + "FMA4\0" + "FPU\0" + "FSGSBASE\0" + "FXSR\0" + "FXSROPT\0" + "GEODE\0" + "GFNI\0" + "HLE\0" + "I486\0" + "LAHFSAHF\0" + "LWP\0" + "LZCNT\0" + "MCOMMIT\0" + "MMX\0" + "MMX2\0" + "MONITOR\0" + "MONITORX\0" + "MOVBE\0" + "MOVDIR64B\0" + "MOVDIRI\0" + "MPX\0" + "MSR\0" + "MSSE\0" + "OSXSAVE\0" + "OSPKE\0" + "PCLMULQDQ\0" + "PCONFIG\0" + "POPCNT\0" + "PREFETCHW\0" + "PREFETCHWT1\0" + "PTWRITE\0" + "RDPID\0" + "RDPRU\0" + "RDRAND\0" + "RDSEED\0" + "RDTSC\0" + "RDTSCP\0" + "RTM\0" + "SERIALIZE\0" + "SHA\0" + "SKINIT\0" + "SMAP\0" + "SMEP\0" + "SMX\0" + "SNP\0" + "SSE\0" + "SSE2\0" + "SSE3\0" + "SSE4_1\0" + "SSE4_2\0" + "SSE4A\0" + "SSSE3\0" + "SVM\0" + "TBM\0" + "TSX\0" + "TSXLDTRK\0" + "VAES\0" + "VMX\0" + "VPCLMULQDQ\0" + "WAITPKG\0" + "WBNOINVD\0" + "XOP\0" + "XSAVE\0" + "XSAVEC\0" + "XSAVEOPT\0" + "XSAVES\0" + "\0"; + + static const uint16_t sFeatureIndex[] = { + 0, 5, 8, 11, 17, 24, 28, 34, 44, 53, 62, 71, 75, 80, 94, 108, 120, 134, 144, + 155, 165, 176, 185, 197, 208, 220, 233, 243, 255, 275, 292, 296, 301, 309, + 316, 325, 333, 344, 349, 356, 361, 372, 382, 388, 395, 400, 405, 409, 414, + 418, 427, 432, 440, 446, 451, 455, 460, 469, 473, 479, 487, 491, 496, 504, + 513, 519, 529, 537, 541, 545, 550, 558, 564, 574, 582, 589, 599, 611, 619, + 625, 631, 638, 645, 651, 658, 662, 672, 676, 683, 688, 693, 697, 701, 705, + 710, 715, 722, 729, 735, 741, 745, 749, 753, 762, 767, 771, 782, 790, 799, + 803, 809, 816, 825, 832 + }; + // @EnumStringEnd@ + + return sb.append(sFeatureString + sFeatureIndex[Support::min(featureId, x86::Features::kCount)]); +} + +// ============================================================================ +// [asmjit::x86::FormatterInternal - Format Register] +// ============================================================================ + +ASMJIT_FAVOR_SIZE Error FormatterInternal::formatRegister(String& sb, uint32_t flags, const BaseEmitter* emitter, uint32_t arch, uint32_t rType, uint32_t rId) noexcept { + DebugUtils::unused(arch); + const RegFormatInfo& info = x86RegFormatInfo; + +#ifndef ASMJIT_NO_COMPILER + if (Operand::isVirtId(rId)) { + if (emitter && emitter->emitterType() == BaseEmitter::kTypeCompiler) { + const BaseCompiler* cc = static_cast(emitter); + if (cc->isVirtIdValid(rId)) { + VirtReg* vReg = cc->virtRegById(rId); + ASMJIT_ASSERT(vReg != nullptr); + + const char* name = vReg->name(); + if (name && name[0] != '\0') + ASMJIT_PROPAGATE(sb.append(name)); + else + ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(rId)))); + + if (vReg->type() != rType && rType <= BaseReg::kTypeMax && (flags & FormatOptions::kFlagRegCasts) != 0) { + const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[rType]; + if (typeEntry.index) + ASMJIT_PROPAGATE(sb.appendFormat("@%s", info.typeStrings + typeEntry.index)); + } + + return kErrorOk; + } + } + } +#else + DebugUtils::unused(emitter, flags); +#endif + + if (ASMJIT_LIKELY(rType <= BaseReg::kTypeMax)) { + const RegFormatInfo::NameEntry& nameEntry = info.nameEntries[rType]; + + if (rId < nameEntry.specialCount) + return sb.append(info.nameStrings + nameEntry.specialIndex + rId * 4); + + if (rId < nameEntry.count) + return sb.appendFormat(info.nameStrings + nameEntry.formatIndex, unsigned(rId)); + + const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[rType]; + if (typeEntry.index) + return sb.appendFormat("%s@%u", info.typeStrings + typeEntry.index, rId); + } + + return sb.appendFormat("?%u", rType, rId); +} + +// ============================================================================ +// [asmjit::x86::FormatterInternal - Format Operand] +// ============================================================================ + +ASMJIT_FAVOR_SIZE Error FormatterInternal::formatOperand( String& sb, uint32_t flags, const BaseEmitter* emitter, - uint32_t archId, + uint32_t arch, const Operand_& op) noexcept { if (op.isReg()) - return formatRegister(sb, flags, emitter, archId, op.as().type(), op.as().id()); + return formatRegister(sb, flags, emitter, arch, op.as().type(), op.as().id()); if (op.isMem()) { const Mem& m = op.as(); - ASMJIT_PROPAGATE(sb.appendString(x86GetAddressSizeString(m.size()))); + ASMJIT_PROPAGATE(sb.append(x86GetAddressSizeString(m.size()))); // Segment override prefix. uint32_t seg = m.segmentId(); if (seg != SReg::kIdNone && seg < SReg::kIdCount) - ASMJIT_PROPAGATE(sb.appendFormat("%s:", x86RegFormatInfo.nameStrings + 224 + seg * 4)); + ASMJIT_PROPAGATE(sb.appendFormat("%s:", x86RegFormatInfo.nameStrings + 224 + size_t(seg) * 4)); - ASMJIT_PROPAGATE(sb.appendChar('[')); + ASMJIT_PROPAGATE(sb.append('[')); switch (m.addrType()) { - case BaseMem::kAddrTypeAbs: ASMJIT_PROPAGATE(sb.appendString("abs ")); break; - case BaseMem::kAddrTypeRel: ASMJIT_PROPAGATE(sb.appendString("rel ")); break; + case Mem::kAddrTypeAbs: ASMJIT_PROPAGATE(sb.append("abs ")); break; + case Mem::kAddrTypeRel: ASMJIT_PROPAGATE(sb.append("rel ")); break; } char opSign = '\0'; if (m.hasBase()) { opSign = '+'; if (m.hasBaseLabel()) { - ASMJIT_PROPAGATE(Logging::formatLabel(sb, flags, emitter, m.baseId())); + ASMJIT_PROPAGATE(Formatter::formatLabel(sb, flags, emitter, m.baseId())); } else { uint32_t modifiedFlags = flags; if (m.isRegHome()) { - ASMJIT_PROPAGATE(sb.appendString("&")); + ASMJIT_PROPAGATE(sb.append("&")); modifiedFlags &= ~FormatOptions::kFlagRegCasts; } - ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, archId, m.baseType(), m.baseId())); + ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, arch, m.baseType(), m.baseId())); } } if (m.hasIndex()) { if (opSign) - ASMJIT_PROPAGATE(sb.appendChar(opSign)); + ASMJIT_PROPAGATE(sb.append(opSign)); opSign = '+'; - ASMJIT_PROPAGATE(formatRegister(sb, flags, emitter, archId, m.indexType(), m.indexId())); + ASMJIT_PROPAGATE(formatRegister(sb, flags, emitter, arch, m.indexType(), m.indexId())); if (m.hasShift()) ASMJIT_PROPAGATE(sb.appendFormat("*%u", 1 << m.shift())); } @@ -258,26 +454,26 @@ ASMJIT_FAVOR_SIZE Error LoggingInternal::formatOperand( } if (opSign) - ASMJIT_PROPAGATE(sb.appendChar(opSign)); + ASMJIT_PROPAGATE(sb.append(opSign)); uint32_t base = 10; if ((flags & FormatOptions::kFlagHexOffsets) != 0 && off > 9) { - ASMJIT_PROPAGATE(sb.appendString("0x", 2)); + ASMJIT_PROPAGATE(sb.append("0x", 2)); base = 16; } ASMJIT_PROPAGATE(sb.appendUInt(off, base)); } - return sb.appendChar(']'); + return sb.append(']'); } if (op.isImm()) { const Imm& i = op.as(); - int64_t val = i.i64(); + int64_t val = i.value(); if ((flags & FormatOptions::kFlagHexImms) != 0 && uint64_t(val) > 9) { - ASMJIT_PROPAGATE(sb.appendString("0x", 2)); + ASMJIT_PROPAGATE(sb.append("0x", 2)); return sb.appendUInt(uint64_t(val), 16); } else { @@ -286,14 +482,14 @@ ASMJIT_FAVOR_SIZE Error LoggingInternal::formatOperand( } if (op.isLabel()) { - return Logging::formatLabel(sb, flags, emitter, op.id()); + return Formatter::formatLabel(sb, flags, emitter, op.id()); } - return sb.appendString(""); + return sb.append(""); } // ============================================================================ -// [asmjit::x86::LoggingInternal - Format Immediate (Extension)] +// [asmjit::x86::FormatterInternal - Format Immediate (Extension)] // ============================================================================ static constexpr char kImmCharStart = '{'; @@ -312,22 +508,22 @@ struct ImmBits { char text[48 - 3]; }; -ASMJIT_FAVOR_SIZE static Error LoggingInternal_formatImmShuf(String& sb, uint32_t u8, uint32_t bits, uint32_t count) noexcept { +ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmShuf(String& sb, uint32_t u8, uint32_t bits, uint32_t count) noexcept { uint32_t mask = (1 << bits) - 1; for (uint32_t i = 0; i < count; i++, u8 >>= bits) { uint32_t value = u8 & mask; - ASMJIT_PROPAGATE(sb.appendChar(i == 0 ? kImmCharStart : kImmCharOr)); + ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr)); ASMJIT_PROPAGATE(sb.appendUInt(value)); } if (kImmCharEnd) - ASMJIT_PROPAGATE(sb.appendChar(kImmCharEnd)); + ASMJIT_PROPAGATE(sb.append(kImmCharEnd)); return kErrorOk; } -ASMJIT_FAVOR_SIZE static Error LoggingInternal_formatImmBits(String& sb, uint32_t u8, const ImmBits* bits, uint32_t count) noexcept { +ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmBits(String& sb, uint32_t u8, const ImmBits* bits, uint32_t count) noexcept { uint32_t n = 0; char buf[64]; @@ -354,33 +550,33 @@ ASMJIT_FAVOR_SIZE static Error LoggingInternal_formatImmBits(String& sb, uint32_ if (!str[0]) continue; - ASMJIT_PROPAGATE(sb.appendChar(++n == 1 ? kImmCharStart : kImmCharOr)); - ASMJIT_PROPAGATE(sb.appendString(str)); + ASMJIT_PROPAGATE(sb.append(++n == 1 ? kImmCharStart : kImmCharOr)); + ASMJIT_PROPAGATE(sb.append(str)); } if (n && kImmCharEnd) - ASMJIT_PROPAGATE(sb.appendChar(kImmCharEnd)); + ASMJIT_PROPAGATE(sb.append(kImmCharEnd)); return kErrorOk; } -ASMJIT_FAVOR_SIZE static Error LoggingInternal_formatImmText(String& sb, uint32_t u8, uint32_t bits, uint32_t advance, const char* text, uint32_t count = 1) noexcept { +ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmText(String& sb, uint32_t u8, uint32_t bits, uint32_t advance, const char* text, uint32_t count = 1) noexcept { uint32_t mask = (1u << bits) - 1; uint32_t pos = 0; for (uint32_t i = 0; i < count; i++, u8 >>= bits, pos += advance) { uint32_t value = (u8 & mask) + pos; - ASMJIT_PROPAGATE(sb.appendChar(i == 0 ? kImmCharStart : kImmCharOr)); - ASMJIT_PROPAGATE(sb.appendString(Support::findPackedString(text, value))); + ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr)); + ASMJIT_PROPAGATE(sb.append(Support::findPackedString(text, value))); } if (kImmCharEnd) - ASMJIT_PROPAGATE(sb.appendChar(kImmCharEnd)); + ASMJIT_PROPAGATE(sb.append(kImmCharEnd)); return kErrorOk; } -ASMJIT_FAVOR_SIZE static Error LoggingInternal_explainConst( +ASMJIT_FAVOR_SIZE static Error FormatterInternal_explainConst( String& sb, uint32_t flags, uint32_t instId, @@ -456,51 +652,51 @@ ASMJIT_FAVOR_SIZE static Error LoggingInternal_explainConst( { 0x08u, 3, ImmBits::kModeLookup, "\0" "INEXACT\0" } }; - uint32_t u8 = imm.u8(); + uint32_t u8 = imm.valueAs(); switch (instId) { case Inst::kIdVblendpd: case Inst::kIdBlendpd: - return LoggingInternal_formatImmShuf(sb, u8, 1, vecSize / 8); + return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8); case Inst::kIdVblendps: case Inst::kIdBlendps: - return LoggingInternal_formatImmShuf(sb, u8, 1, vecSize / 4); + return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 4); case Inst::kIdVcmppd: case Inst::kIdVcmpps: case Inst::kIdVcmpsd: case Inst::kIdVcmpss: - return LoggingInternal_formatImmText(sb, u8, 5, 0, vcmpx); + return FormatterInternal_formatImmText(sb, u8, 5, 0, vcmpx); case Inst::kIdCmppd: case Inst::kIdCmpps: case Inst::kIdCmpsd: case Inst::kIdCmpss: - return LoggingInternal_formatImmText(sb, u8, 3, 0, vcmpx); + return FormatterInternal_formatImmText(sb, u8, 3, 0, vcmpx); case Inst::kIdVdbpsadbw: - return LoggingInternal_formatImmShuf(sb, u8, 2, 4); + return FormatterInternal_formatImmShuf(sb, u8, 2, 4); case Inst::kIdVdppd: case Inst::kIdVdpps: case Inst::kIdDppd: case Inst::kIdDpps: - return LoggingInternal_formatImmShuf(sb, u8, 1, 8); + return FormatterInternal_formatImmShuf(sb, u8, 1, 8); case Inst::kIdVmpsadbw: case Inst::kIdMpsadbw: - return LoggingInternal_formatImmBits(sb, u8, vmpsadbw, Support::min(vecSize / 8, 4)); + return FormatterInternal_formatImmBits(sb, u8, vmpsadbw, Support::min(vecSize / 8, 4)); case Inst::kIdVpblendw: case Inst::kIdPblendw: - return LoggingInternal_formatImmShuf(sb, u8, 1, 8); + return FormatterInternal_formatImmShuf(sb, u8, 1, 8); case Inst::kIdVpblendd: - return LoggingInternal_formatImmShuf(sb, u8, 1, Support::min(vecSize / 4, 8)); + return FormatterInternal_formatImmShuf(sb, u8, 1, Support::min(vecSize / 4, 8)); case Inst::kIdVpclmulqdq: case Inst::kIdPclmulqdq: - return LoggingInternal_formatImmBits(sb, u8, vpclmulqdq, ASMJIT_ARRAY_SIZE(vpclmulqdq)); + return FormatterInternal_formatImmBits(sb, u8, vpclmulqdq, ASMJIT_ARRAY_SIZE(vpclmulqdq)); case Inst::kIdVroundpd: case Inst::kIdVroundps: @@ -510,57 +706,57 @@ ASMJIT_FAVOR_SIZE static Error LoggingInternal_explainConst( case Inst::kIdRoundps: case Inst::kIdRoundsd: case Inst::kIdRoundss: - return LoggingInternal_formatImmBits(sb, u8, vroundxx, ASMJIT_ARRAY_SIZE(vroundxx)); + return FormatterInternal_formatImmBits(sb, u8, vroundxx, ASMJIT_ARRAY_SIZE(vroundxx)); case Inst::kIdVshufpd: case Inst::kIdShufpd: - return LoggingInternal_formatImmText(sb, u8, 1, 2, vshufpd, Support::min(vecSize / 8, 8)); + return FormatterInternal_formatImmText(sb, u8, 1, 2, vshufpd, Support::min(vecSize / 8, 8)); case Inst::kIdVshufps: case Inst::kIdShufps: - return LoggingInternal_formatImmText(sb, u8, 2, 4, vshufps, 4); + return FormatterInternal_formatImmText(sb, u8, 2, 4, vshufps, 4); case Inst::kIdVcvtps2ph: - return LoggingInternal_formatImmBits(sb, u8, vroundxx, 1); + return FormatterInternal_formatImmBits(sb, u8, vroundxx, 1); case Inst::kIdVperm2f128: case Inst::kIdVperm2i128: - return LoggingInternal_formatImmBits(sb, u8, vperm2x128, ASMJIT_ARRAY_SIZE(vperm2x128)); + return FormatterInternal_formatImmBits(sb, u8, vperm2x128, ASMJIT_ARRAY_SIZE(vperm2x128)); case Inst::kIdVpermilpd: - return LoggingInternal_formatImmShuf(sb, u8, 1, vecSize / 8); + return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8); case Inst::kIdVpermilps: - return LoggingInternal_formatImmShuf(sb, u8, 2, 4); + return FormatterInternal_formatImmShuf(sb, u8, 2, 4); case Inst::kIdVpshufd: case Inst::kIdPshufd: - return LoggingInternal_formatImmShuf(sb, u8, 2, 4); + return FormatterInternal_formatImmShuf(sb, u8, 2, 4); case Inst::kIdVpshufhw: case Inst::kIdVpshuflw: case Inst::kIdPshufhw: case Inst::kIdPshuflw: case Inst::kIdPshufw: - return LoggingInternal_formatImmShuf(sb, u8, 2, 4); + return FormatterInternal_formatImmShuf(sb, u8, 2, 4); case Inst::kIdVfixupimmpd: case Inst::kIdVfixupimmps: case Inst::kIdVfixupimmsd: case Inst::kIdVfixupimmss: - return LoggingInternal_formatImmBits(sb, u8, vfixupimmxx, ASMJIT_ARRAY_SIZE(vfixupimmxx)); + return FormatterInternal_formatImmBits(sb, u8, vfixupimmxx, ASMJIT_ARRAY_SIZE(vfixupimmxx)); case Inst::kIdVfpclasspd: case Inst::kIdVfpclassps: case Inst::kIdVfpclasssd: case Inst::kIdVfpclassss: - return LoggingInternal_formatImmBits(sb, u8, vfpclassxx, ASMJIT_ARRAY_SIZE(vfpclassxx)); + return FormatterInternal_formatImmBits(sb, u8, vfpclassxx, ASMJIT_ARRAY_SIZE(vfpclassxx)); case Inst::kIdVgetmantpd: case Inst::kIdVgetmantps: case Inst::kIdVgetmantsd: case Inst::kIdVgetmantss: - return LoggingInternal_formatImmBits(sb, u8, vgetmantxx, ASMJIT_ARRAY_SIZE(vgetmantxx)); + return FormatterInternal_formatImmBits(sb, u8, vgetmantxx, ASMJIT_ARRAY_SIZE(vgetmantxx)); case Inst::kIdVpcmpb: case Inst::kIdVpcmpd: @@ -570,7 +766,7 @@ ASMJIT_FAVOR_SIZE static Error LoggingInternal_explainConst( case Inst::kIdVpcmpud: case Inst::kIdVpcmpuq: case Inst::kIdVpcmpuw: - return LoggingInternal_formatImmText(sb, u8, 3, 0, vpcmpx); + return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcmpx); case Inst::kIdVpcomb: case Inst::kIdVpcomd: @@ -580,21 +776,21 @@ ASMJIT_FAVOR_SIZE static Error LoggingInternal_explainConst( case Inst::kIdVpcomud: case Inst::kIdVpcomuq: case Inst::kIdVpcomuw: - return LoggingInternal_formatImmText(sb, u8, 3, 0, vpcomx); + return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcomx); case Inst::kIdVpermq: case Inst::kIdVpermpd: - return LoggingInternal_formatImmShuf(sb, u8, 2, 4); + return FormatterInternal_formatImmShuf(sb, u8, 2, 4); case Inst::kIdVpternlogd: case Inst::kIdVpternlogq: - return LoggingInternal_formatImmShuf(sb, u8, 1, 8); + return FormatterInternal_formatImmShuf(sb, u8, 1, 8); case Inst::kIdVrangepd: case Inst::kIdVrangeps: case Inst::kIdVrangesd: case Inst::kIdVrangess: - return LoggingInternal_formatImmBits(sb, u8, vrangexx, ASMJIT_ARRAY_SIZE(vrangexx)); + return FormatterInternal_formatImmBits(sb, u8, vrangexx, ASMJIT_ARRAY_SIZE(vrangexx)); case Inst::kIdVreducepd: case Inst::kIdVreduceps: @@ -604,7 +800,7 @@ ASMJIT_FAVOR_SIZE static Error LoggingInternal_explainConst( case Inst::kIdVrndscaleps: case Inst::kIdVrndscalesd: case Inst::kIdVrndscaless: - return LoggingInternal_formatImmBits(sb, u8, vreducexx_vrndscalexx, ASMJIT_ARRAY_SIZE(vreducexx_vrndscalexx)); + return FormatterInternal_formatImmBits(sb, u8, vreducexx_vrndscalexx, ASMJIT_ARRAY_SIZE(vreducexx_vrndscalexx)); case Inst::kIdVshuff32x4: case Inst::kIdVshuff64x2: @@ -612,7 +808,7 @@ ASMJIT_FAVOR_SIZE static Error LoggingInternal_explainConst( case Inst::kIdVshufi64x2: { uint32_t count = Support::max(vecSize / 16, 2u); uint32_t bits = count <= 2 ? 1u : 2u; - return LoggingInternal_formatImmShuf(sb, u8, bits, count); + return FormatterInternal_formatImmShuf(sb, u8, bits, count); } default: @@ -621,68 +817,15 @@ ASMJIT_FAVOR_SIZE static Error LoggingInternal_explainConst( } // ============================================================================ -// [asmjit::x86::LoggingInternal - Format Register] -// ============================================================================ - -ASMJIT_FAVOR_SIZE Error LoggingInternal::formatRegister(String& sb, uint32_t flags, const BaseEmitter* emitter, uint32_t archId, uint32_t rType, uint32_t rId) noexcept { - DebugUtils::unused(archId); - const RegFormatInfo& info = x86RegFormatInfo; - -#ifndef ASMJIT_NO_COMPILER - if (Operand::isVirtId(rId)) { - if (emitter && emitter->emitterType() == BaseEmitter::kTypeCompiler) { - const BaseCompiler* cc = static_cast(emitter); - if (cc->isVirtIdValid(rId)) { - VirtReg* vReg = cc->virtRegById(rId); - ASMJIT_ASSERT(vReg != nullptr); - - const char* name = vReg->name(); - if (name && name[0] != '\0') - ASMJIT_PROPAGATE(sb.appendString(name)); - else - ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(rId)))); - - if (vReg->type() != rType && rType <= BaseReg::kTypeMax && (flags & FormatOptions::kFlagRegCasts) != 0) { - const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[rType]; - if (typeEntry.index) - ASMJIT_PROPAGATE(sb.appendFormat("@%s", info.typeStrings + typeEntry.index)); - } - - return kErrorOk; - } - } - } -#else - DebugUtils::unused(emitter, flags); -#endif - - if (ASMJIT_LIKELY(rType <= BaseReg::kTypeMax)) { - const RegFormatInfo::NameEntry& nameEntry = info.nameEntries[rType]; - - if (rId < nameEntry.specialCount) - return sb.appendString(info.nameStrings + nameEntry.specialIndex + rId * 4); - - if (rId < nameEntry.count) - return sb.appendFormat(info.nameStrings + nameEntry.formatIndex, unsigned(rId)); - - const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[rType]; - if (typeEntry.index) - return sb.appendFormat("%s@%u", info.typeStrings + typeEntry.index, rId); - } - - return sb.appendFormat("Reg?%u@%u", rType, rId); -} - -// ============================================================================ -// [asmjit::x86::LoggingInternal - Format Instruction] +// [asmjit::x86::FormatterInternal - Format Instruction] // ============================================================================ -ASMJIT_FAVOR_SIZE Error LoggingInternal::formatInstruction( +ASMJIT_FAVOR_SIZE Error FormatterInternal::formatInstruction( String& sb, uint32_t flags, const BaseEmitter* emitter, - uint32_t archId, - const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept { + uint32_t arch, + const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept { uint32_t instId = inst.id(); uint32_t options = inst.options(); @@ -690,21 +833,21 @@ ASMJIT_FAVOR_SIZE Error LoggingInternal::formatInstruction( // Format instruction options and instruction mnemonic. if (instId < Inst::_kIdCount) { // SHORT|LONG options. - if (options & Inst::kOptionShortForm) ASMJIT_PROPAGATE(sb.appendString("short ")); - if (options & Inst::kOptionLongForm) ASMJIT_PROPAGATE(sb.appendString("long ")); + if (options & Inst::kOptionShortForm) ASMJIT_PROPAGATE(sb.append("short ")); + if (options & Inst::kOptionLongForm) ASMJIT_PROPAGATE(sb.append("long ")); // LOCK|XACQUIRE|XRELEASE options. - if (options & Inst::kOptionXAcquire) ASMJIT_PROPAGATE(sb.appendString("xacquire ")); - if (options & Inst::kOptionXRelease) ASMJIT_PROPAGATE(sb.appendString("xrelease ")); - if (options & Inst::kOptionLock) ASMJIT_PROPAGATE(sb.appendString("lock ")); + if (options & Inst::kOptionXAcquire) ASMJIT_PROPAGATE(sb.append("xacquire ")); + if (options & Inst::kOptionXRelease) ASMJIT_PROPAGATE(sb.append("xrelease ")); + if (options & Inst::kOptionLock) ASMJIT_PROPAGATE(sb.append("lock ")); // REP|REPNE options. if (options & (Inst::kOptionRep | Inst::kOptionRepne)) { - sb.appendString((options & Inst::kOptionRep) ? "rep " : "repnz "); + sb.append((options & Inst::kOptionRep) ? "rep " : "repnz "); if (inst.hasExtraReg()) { - ASMJIT_PROPAGATE(sb.appendString("{")); - ASMJIT_PROPAGATE(formatOperand(sb, flags, emitter, archId, inst.extraReg().toReg())); - ASMJIT_PROPAGATE(sb.appendString("} ")); + ASMJIT_PROPAGATE(sb.append("{")); + ASMJIT_PROPAGATE(formatOperand(sb, flags, emitter, arch, inst.extraReg().toReg())); + ASMJIT_PROPAGATE(sb.append("} ")); } } @@ -715,23 +858,23 @@ ASMJIT_FAVOR_SIZE Error LoggingInternal::formatInstruction( Inst::kOptionOpCodeB | Inst::kOptionOpCodeW ; if (options & kRXBWMask) { - sb.appendString("rex."); - if (options & Inst::kOptionOpCodeR) sb.appendChar('r'); - if (options & Inst::kOptionOpCodeX) sb.appendChar('x'); - if (options & Inst::kOptionOpCodeB) sb.appendChar('b'); - if (options & Inst::kOptionOpCodeW) sb.appendChar('w'); - sb.appendChar(' '); + sb.append("rex."); + if (options & Inst::kOptionOpCodeR) sb.append('r'); + if (options & Inst::kOptionOpCodeX) sb.append('x'); + if (options & Inst::kOptionOpCodeB) sb.append('b'); + if (options & Inst::kOptionOpCodeW) sb.append('w'); + sb.append(' '); } else { - ASMJIT_PROPAGATE(sb.appendString("rex ")); + ASMJIT_PROPAGATE(sb.append("rex ")); } } // VEX|EVEX options. - if (options & Inst::kOptionVex3) ASMJIT_PROPAGATE(sb.appendString("vex3 ")); - if (options & Inst::kOptionEvex) ASMJIT_PROPAGATE(sb.appendString("evex ")); + if (options & Inst::kOptionVex3) ASMJIT_PROPAGATE(sb.append("vex3 ")); + if (options & Inst::kOptionEvex) ASMJIT_PROPAGATE(sb.append("evex ")); - ASMJIT_PROPAGATE(InstAPI::instIdToString(archId, instId, sb)); + ASMJIT_PROPAGATE(InstAPI::instIdToString(arch, instId, sb)); } else { ASMJIT_PROPAGATE(sb.appendFormat("[InstId=#%u]", unsigned(instId))); @@ -741,29 +884,29 @@ ASMJIT_FAVOR_SIZE Error LoggingInternal::formatInstruction( const Operand_& op = operands[i]; if (op.isNone()) break; - ASMJIT_PROPAGATE(sb.appendString(i == 0 ? " " : ", ")); - ASMJIT_PROPAGATE(formatOperand(sb, flags, emitter, archId, op)); + ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", ")); + ASMJIT_PROPAGATE(formatOperand(sb, flags, emitter, arch, op)); if (op.isImm() && (flags & FormatOptions::kFlagExplainImms)) { uint32_t vecSize = 16; for (uint32_t j = 0; j < opCount; j++) if (operands[j].isReg()) vecSize = Support::max(vecSize, operands[j].size()); - ASMJIT_PROPAGATE(LoggingInternal_explainConst(sb, flags, instId, vecSize, op.as())); + ASMJIT_PROPAGATE(FormatterInternal_explainConst(sb, flags, instId, vecSize, op.as())); } // Support AVX-512 masking - {k}{z}. if (i == 0) { if (inst.extraReg().group() == Reg::kGroupKReg) { - ASMJIT_PROPAGATE(sb.appendString(" {")); - ASMJIT_PROPAGATE(formatRegister(sb, flags, emitter, archId, inst.extraReg().type(), inst.extraReg().id())); - ASMJIT_PROPAGATE(sb.appendChar('}')); + ASMJIT_PROPAGATE(sb.append(" {")); + ASMJIT_PROPAGATE(formatRegister(sb, flags, emitter, arch, inst.extraReg().type(), inst.extraReg().id())); + ASMJIT_PROPAGATE(sb.append('}')); if (options & Inst::kOptionZMask) - ASMJIT_PROPAGATE(sb.appendString("{z}")); + ASMJIT_PROPAGATE(sb.append("{z}")); } else if (options & Inst::kOptionZMask) { - ASMJIT_PROPAGATE(sb.appendString(" {z}")); + ASMJIT_PROPAGATE(sb.append(" {z}")); } } diff --git a/libs/asmjit/src/asmjit/x86/x86logging_p.h b/libs/asmjit/src/asmjit/x86/x86formatter_p.h similarity index 64% rename from libs/asmjit/src/asmjit/x86/x86logging_p.h rename to libs/asmjit/src/asmjit/x86/x86formatter_p.h index c0d7d16..d7d58e4 100644 --- a/libs/asmjit/src/asmjit/x86/x86logging_p.h +++ b/libs/asmjit/src/asmjit/x86/x86formatter_p.h @@ -21,52 +21,60 @@ // misrepresented as being the original software. // 3. This notice may not be removed or altered from any source distribution. -#ifndef ASMJIT_X86_X86LOGGING_P_H_INCLUDED -#define ASMJIT_X86_X86LOGGING_P_H_INCLUDED +#ifndef ASMJIT_X86_X86FORMATTER_P_H_INCLUDED +#define ASMJIT_X86_X86FORMATTER_P_H_INCLUDED #include "../core/api-config.h" #ifndef ASMJIT_NO_LOGGING -#include "../core/logging.h" +#include "../core/formatter.h" #include "../core/string.h" #include "../x86/x86globals.h" ASMJIT_BEGIN_SUB_NAMESPACE(x86) +//! \cond INTERNAL //! \addtogroup asmjit_x86 //! \{ // ============================================================================ -// [asmjit::x86::LoggingInternal] +// [asmjit::x86::FormatterInternal] // ============================================================================ -namespace LoggingInternal { - Error formatRegister( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t archId, - uint32_t regType, - uint32_t regId) noexcept; +namespace FormatterInternal { - Error formatOperand( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t archId, - const Operand_& op) noexcept; +Error formatFeature( + String& sb, + uint32_t featureId) noexcept; - Error formatInstruction( - String& sb, - uint32_t flags, - const BaseEmitter* emitter, - uint32_t archId, - const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept; -}; +Error formatRegister( + String& sb, + uint32_t flags, + const BaseEmitter* emitter, + uint32_t arch, + uint32_t regType, + uint32_t regId) noexcept; + +Error formatOperand( + String& sb, + uint32_t flags, + const BaseEmitter* emitter, + uint32_t arch, + const Operand_& op) noexcept; + +Error formatInstruction( + String& sb, + uint32_t flags, + const BaseEmitter* emitter, + uint32_t arch, + const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept; + +} // {FormatterInternal} //! \} +//! \endcond ASMJIT_END_SUB_NAMESPACE #endif // !ASMJIT_NO_LOGGING -#endif // ASMJIT_X86_X86LOGGING_P_H_INCLUDED +#endif // ASMJIT_X86_X86FORMATTER_P_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/x86/x86func.cpp b/libs/asmjit/src/asmjit/x86/x86func.cpp new file mode 100644 index 0000000..ef5c4d9 --- /dev/null +++ b/libs/asmjit/src/asmjit/x86/x86func.cpp @@ -0,0 +1,531 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include "../core/api-build_p.h" +#ifdef ASMJIT_BUILD_X86 + +#include "../x86/x86func_p.h" +#include "../x86/x86emithelper_p.h" +#include "../x86/x86operand.h" + +ASMJIT_BEGIN_SUB_NAMESPACE(x86) + +// ============================================================================ +// [asmjit::x86::FuncInternal - Init] +// ============================================================================ + +namespace FuncInternal { + +static inline bool shouldThreatAsCDeclIn64BitMode(uint32_t ccId) noexcept { + return ccId == CallConv::kIdCDecl || + ccId == CallConv::kIdStdCall || + ccId == CallConv::kIdThisCall || + ccId == CallConv::kIdFastCall || + ccId == CallConv::kIdRegParm1 || + ccId == CallConv::kIdRegParm2 || + ccId == CallConv::kIdRegParm3; +} + +ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, uint32_t ccId, const Environment& environment) noexcept { + constexpr uint32_t kGroupGp = Reg::kGroupGp; + constexpr uint32_t kGroupVec = Reg::kGroupVec; + constexpr uint32_t kGroupMm = Reg::kGroupMm; + constexpr uint32_t kGroupKReg = Reg::kGroupKReg; + + constexpr uint32_t kZax = Gp::kIdAx; + constexpr uint32_t kZbx = Gp::kIdBx; + constexpr uint32_t kZcx = Gp::kIdCx; + constexpr uint32_t kZdx = Gp::kIdDx; + constexpr uint32_t kZsp = Gp::kIdSp; + constexpr uint32_t kZbp = Gp::kIdBp; + constexpr uint32_t kZsi = Gp::kIdSi; + constexpr uint32_t kZdi = Gp::kIdDi; + + bool winABI = environment.isPlatformWindows() || environment.isAbiMSVC(); + + cc.setArch(environment.arch()); + cc.setSaveRestoreRegSize(Reg::kGroupVec, 16); + cc.setSaveRestoreRegSize(Reg::kGroupMm, 8); + cc.setSaveRestoreRegSize(Reg::kGroupKReg, 8); + cc.setSaveRestoreAlignment(Reg::kGroupVec, 16); + cc.setSaveRestoreAlignment(Reg::kGroupMm, 8); + cc.setSaveRestoreAlignment(Reg::kGroupKReg, 8); + + if (environment.is32Bit()) { + bool isStandardCallConv = true; + + cc.setSaveRestoreRegSize(Reg::kGroupGp, 4); + cc.setSaveRestoreAlignment(Reg::kGroupGp, 4); + + cc.setPreservedRegs(Reg::kGroupGp, Support::bitMask(Gp::kIdBx, Gp::kIdSp, Gp::kIdBp, Gp::kIdSi, Gp::kIdDi)); + cc.setNaturalStackAlignment(4); + + switch (ccId) { + case CallConv::kIdCDecl: + break; + + case CallConv::kIdStdCall: + cc.setFlags(CallConv::kFlagCalleePopsStack); + break; + + case CallConv::kIdFastCall: + cc.setFlags(CallConv::kFlagCalleePopsStack); + cc.setPassedOrder(kGroupGp, kZcx, kZdx); + break; + + case CallConv::kIdVectorCall: + cc.setFlags(CallConv::kFlagCalleePopsStack); + cc.setPassedOrder(kGroupGp, kZcx, kZdx); + cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5); + break; + + case CallConv::kIdThisCall: + // NOTE: Even MINGW (starting with GCC 4.7.0) now uses __thiscall on MS Windows, + // so we won't bail to any other calling convention if __thiscall was specified. + if (winABI) { + cc.setFlags(CallConv::kFlagCalleePopsStack); + cc.setPassedOrder(kGroupGp, kZcx); + } + else { + ccId = CallConv::kIdCDecl; + } + break; + + case CallConv::kIdRegParm1: + cc.setPassedOrder(kGroupGp, kZax); + break; + + case CallConv::kIdRegParm2: + cc.setPassedOrder(kGroupGp, kZax, kZdx); + break; + + case CallConv::kIdRegParm3: + cc.setPassedOrder(kGroupGp, kZax, kZdx, kZcx); + break; + + case CallConv::kIdLightCall2: + case CallConv::kIdLightCall3: + case CallConv::kIdLightCall4: { + uint32_t n = (ccId - CallConv::kIdLightCall2) + 2; + + cc.setFlags(CallConv::kFlagPassFloatsByVec); + cc.setPassedOrder(kGroupGp, kZax, kZdx, kZcx, kZsi, kZdi); + cc.setPassedOrder(kGroupMm, 0, 1, 2, 3, 4, 5, 6, 7); + cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5, 6, 7); + cc.setPassedOrder(kGroupKReg, 0, 1, 2, 3, 4, 5, 6, 7); + cc.setPreservedRegs(kGroupGp, Support::lsbMask(8)); + cc.setPreservedRegs(kGroupVec, Support::lsbMask(8) & ~Support::lsbMask(n)); + + cc.setNaturalStackAlignment(16); + isStandardCallConv = false; + break; + } + + default: + return DebugUtils::errored(kErrorInvalidArgument); + } + + if (isStandardCallConv) { + // MMX arguments is something where compiler vendors disagree. For example + // GCC and MSVC would pass first three via registers and the rest via stack, + // however Clang passes all via stack. Returning MMX registers is even more + // fun, where GCC uses MM0, but Clang uses EAX:EDX pair. I'm not sure it's + // something we should be worried about as MMX is deprecated anyway. + cc.setPassedOrder(kGroupMm, 0, 1, 2); + + // Vector arguments (XMM|YMM|ZMM) are passed via registers. However, if the + // function is variadic then they have to be passed via stack. + cc.setPassedOrder(kGroupVec, 0, 1, 2); + + // Functions with variable arguments always use stack for MM and vector + // arguments. + cc.addFlags(CallConv::kFlagPassVecByStackIfVA); + } + + if (ccId == CallConv::kIdCDecl) { + cc.addFlags(CallConv::kFlagVarArgCompatible); + } + } + else { + cc.setSaveRestoreRegSize(Reg::kGroupGp, 8); + cc.setSaveRestoreAlignment(Reg::kGroupGp, 8); + + // Preprocess the calling convention into a common id as many conventions + // are normally ignored even by C/C++ compilers and treated as `__cdecl`. + if (shouldThreatAsCDeclIn64BitMode(ccId)) + ccId = winABI ? CallConv::kIdX64Windows : CallConv::kIdX64SystemV; + + switch (ccId) { + case CallConv::kIdX64SystemV: { + cc.setFlags(CallConv::kFlagPassFloatsByVec | + CallConv::kFlagPassMmxByXmm | + CallConv::kFlagVarArgCompatible); + cc.setNaturalStackAlignment(16); + cc.setRedZoneSize(128); + cc.setPassedOrder(kGroupGp, kZdi, kZsi, kZdx, kZcx, 8, 9); + cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5, 6, 7); + cc.setPreservedRegs(kGroupGp, Support::bitMask(kZbx, kZsp, kZbp, 12, 13, 14, 15)); + break; + } + + case CallConv::kIdX64Windows: { + cc.setStrategy(CallConv::kStrategyX64Windows); + cc.setFlags(CallConv::kFlagPassFloatsByVec | + CallConv::kFlagIndirectVecArgs | + CallConv::kFlagPassMmxByGp | + CallConv::kFlagVarArgCompatible); + cc.setNaturalStackAlignment(16); + // Maximum 4 arguments in registers, each adds 8 bytes to the spill zone. + cc.setSpillZoneSize(4 * 8); + cc.setPassedOrder(kGroupGp, kZcx, kZdx, 8, 9); + cc.setPassedOrder(kGroupVec, 0, 1, 2, 3); + cc.setPreservedRegs(kGroupGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15)); + cc.setPreservedRegs(kGroupVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); + break; + } + + case CallConv::kIdVectorCall: { + cc.setStrategy(CallConv::kStrategyX64VectorCall); + cc.setFlags(CallConv::kFlagPassFloatsByVec | + CallConv::kFlagPassMmxByGp ); + cc.setNaturalStackAlignment(16); + // Maximum 6 arguments in registers, each adds 8 bytes to the spill zone. + cc.setSpillZoneSize(6 * 8); + cc.setPassedOrder(kGroupGp, kZcx, kZdx, 8, 9); + cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5); + cc.setPreservedRegs(kGroupGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15)); + cc.setPreservedRegs(kGroupVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); + break; + } + + case CallConv::kIdLightCall2: + case CallConv::kIdLightCall3: + case CallConv::kIdLightCall4: { + uint32_t n = (ccId - CallConv::kIdLightCall2) + 2; + + cc.setFlags(CallConv::kFlagPassFloatsByVec); + cc.setNaturalStackAlignment(16); + cc.setPassedOrder(kGroupGp, kZax, kZdx, kZcx, kZsi, kZdi); + cc.setPassedOrder(kGroupMm, 0, 1, 2, 3, 4, 5, 6, 7); + cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5, 6, 7); + cc.setPassedOrder(kGroupKReg, 0, 1, 2, 3, 4, 5, 6, 7); + + cc.setPreservedRegs(kGroupGp, Support::lsbMask(16)); + cc.setPreservedRegs(kGroupVec, ~Support::lsbMask(n)); + break; + } + + default: + return DebugUtils::errored(kErrorInvalidArgument); + } + } + + cc.setId(ccId); + return kErrorOk; +} + +ASMJIT_FAVOR_SIZE void unpackValues(FuncDetail& func, FuncValuePack& pack) noexcept { + uint32_t typeId = pack[0].typeId(); + switch (typeId) { + case Type::kIdI64: + case Type::kIdU64: { + if (Environment::is32Bit(func.callConv().arch())) { + // Convert a 64-bit return value to two 32-bit return values. + pack[0].initTypeId(Type::kIdU32); + pack[1].initTypeId(typeId - 2); + break; + } + break; + } + } +} + +ASMJIT_FAVOR_SIZE Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept { + const CallConv& cc = func.callConv(); + uint32_t arch = cc.arch(); + uint32_t stackOffset = cc._spillZoneSize; + uint32_t argCount = func.argCount(); + + // Up to two return values can be returned in GP registers. + static const uint8_t gpReturnIndexes[4] = { + uint8_t(Gp::kIdAx), + uint8_t(Gp::kIdDx), + uint8_t(BaseReg::kIdBad), + uint8_t(BaseReg::kIdBad) + }; + + if (func.hasRet()) { + unpackValues(func, func._rets); + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + uint32_t typeId = func._rets[valueIndex].typeId(); + + // Terminate at the first void type (end of the pack). + if (!typeId) + break; + + switch (typeId) { + case Type::kIdI64: + case Type::kIdU64: { + if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad) + func._rets[valueIndex].initReg(Reg::kTypeGpq, gpReturnIndexes[valueIndex], typeId); + else + return DebugUtils::errored(kErrorInvalidState); + break; + } + + case Type::kIdI8: + case Type::kIdI16: + case Type::kIdI32: { + if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad) + func._rets[valueIndex].initReg(Reg::kTypeGpd, gpReturnIndexes[valueIndex], Type::kIdI32); + else + return DebugUtils::errored(kErrorInvalidState); + break; + } + + case Type::kIdU8: + case Type::kIdU16: + case Type::kIdU32: { + if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad) + func._rets[valueIndex].initReg(Reg::kTypeGpd, gpReturnIndexes[valueIndex], Type::kIdU32); + else + return DebugUtils::errored(kErrorInvalidState); + break; + } + + case Type::kIdF32: + case Type::kIdF64: { + uint32_t regType = Environment::is32Bit(arch) ? Reg::kTypeSt : Reg::kTypeXmm; + func._rets[valueIndex].initReg(regType, valueIndex, typeId); + break; + } + + case Type::kIdF80: { + // 80-bit floats are always returned by FP0. + func._rets[valueIndex].initReg(Reg::kTypeSt, valueIndex, typeId); + break; + } + + case Type::kIdMmx32: + case Type::kIdMmx64: { + // MM registers are returned through XMM (SystemV) or GPQ (Win64). + uint32_t regType = Reg::kTypeMm; + uint32_t regIndex = valueIndex; + if (Environment::is64Bit(arch)) { + regType = cc.strategy() == CallConv::kStrategyDefault ? Reg::kTypeXmm : Reg::kTypeGpq; + regIndex = cc.strategy() == CallConv::kStrategyDefault ? valueIndex : gpReturnIndexes[valueIndex]; + + if (regIndex == BaseReg::kIdBad) + return DebugUtils::errored(kErrorInvalidState); + } + + func._rets[valueIndex].initReg(regType, regIndex, typeId); + break; + } + + default: { + func._rets[valueIndex].initReg(vecTypeIdToRegType(typeId), valueIndex, typeId); + break; + } + } + } + } + + switch (cc.strategy()) { + case CallConv::kStrategyDefault: { + uint32_t gpzPos = 0; + uint32_t vecPos = 0; + + for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { + unpackValues(func, func._args[argIndex]); + + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + FuncValue& arg = func._args[argIndex][valueIndex]; + + // Terminate if there are no more arguments in the pack. + if (!arg) + break; + + uint32_t typeId = arg.typeId(); + + if (Type::isInt(typeId)) { + uint32_t regId = BaseReg::kIdBad; + + if (gpzPos < CallConv::kMaxRegArgsPerGroup) + regId = cc._passedOrder[Reg::kGroupGp].id[gpzPos]; + + if (regId != BaseReg::kIdBad) { + uint32_t regType = (typeId <= Type::kIdU32) ? Reg::kTypeGpd : Reg::kTypeGpq; + arg.assignRegData(regType, regId); + func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId)); + gpzPos++; + } + else { + uint32_t size = Support::max(Type::sizeOf(typeId), registerSize); + arg.assignStackOffset(int32_t(stackOffset)); + stackOffset += size; + } + continue; + } + + if (Type::isFloat(typeId) || Type::isVec(typeId)) { + uint32_t regId = BaseReg::kIdBad; + + if (vecPos < CallConv::kMaxRegArgsPerGroup) + regId = cc._passedOrder[Reg::kGroupVec].id[vecPos]; + + if (Type::isFloat(typeId)) { + // If this is a float, but `kFlagPassFloatsByVec` is false, we have + // to use stack instead. This should be only used by 32-bit calling + // conventions. + if (!cc.hasFlag(CallConv::kFlagPassFloatsByVec)) + regId = BaseReg::kIdBad; + } + else { + // Pass vector registers via stack if this is a variable arguments + // function. This should be only used by 32-bit calling conventions. + if (signature.hasVarArgs() && cc.hasFlag(CallConv::kFlagPassVecByStackIfVA)) + regId = BaseReg::kIdBad; + } + + if (regId != BaseReg::kIdBad) { + arg.initTypeId(typeId); + arg.assignRegData(vecTypeIdToRegType(typeId), regId); + func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId)); + vecPos++; + } + else { + uint32_t size = Type::sizeOf(typeId); + arg.assignStackOffset(int32_t(stackOffset)); + stackOffset += size; + } + continue; + } + } + } + break; + } + + case CallConv::kStrategyX64Windows: + case CallConv::kStrategyX64VectorCall: { + // Both X64 and VectorCall behave similarly - arguments are indexed + // from left to right. The position of the argument determines in + // which register the argument is allocated, so it's either GP or + // one of XMM/YMM/ZMM registers. + // + // [ X64 ] [VecCall] + // Index: #0 #1 #2 #3 #4 #5 + // + // GP : RCX RDX R8 R9 + // VEC : XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 + // + // For example function `f(int a, double b, int c, double d)` will be: + // + // (a) (b) (c) (d) + // RCX XMM1 R8 XMM3 + // + // Unused vector registers are used by HVA. + bool isVectorCall = (cc.strategy() == CallConv::kStrategyX64VectorCall); + + for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { + unpackValues(func, func._args[argIndex]); + + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + FuncValue& arg = func._args[argIndex][valueIndex]; + + // Terminate if there are no more arguments in the pack. + if (!arg) + break; + + uint32_t typeId = arg.typeId(); + uint32_t size = Type::sizeOf(typeId); + + if (Type::isInt(typeId) || Type::isMmx(typeId)) { + uint32_t regId = BaseReg::kIdBad; + + if (argIndex < CallConv::kMaxRegArgsPerGroup) + regId = cc._passedOrder[Reg::kGroupGp].id[argIndex]; + + if (regId != BaseReg::kIdBad) { + uint32_t regType = (size <= 4 && !Type::isMmx(typeId)) ? Reg::kTypeGpd : Reg::kTypeGpq; + arg.assignRegData(regType, regId); + func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId)); + } + else { + arg.assignStackOffset(int32_t(stackOffset)); + stackOffset += 8; + } + continue; + } + + if (Type::isFloat(typeId) || Type::isVec(typeId)) { + uint32_t regId = BaseReg::kIdBad; + + if (argIndex < CallConv::kMaxRegArgsPerGroup) + regId = cc._passedOrder[Reg::kGroupVec].id[argIndex]; + + if (regId != BaseReg::kIdBad) { + // X64-ABI doesn't allow vector types (XMM|YMM|ZMM) to be passed + // via registers, however, VectorCall was designed for that purpose. + if (Type::isFloat(typeId) || isVectorCall) { + uint32_t regType = vecTypeIdToRegType(typeId); + arg.assignRegData(regType, regId); + func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId)); + continue; + } + } + + // Passed via stack if the argument is float/double or indirectly. + // The trap is - if the argument is passed indirectly, the address + // can be passed via register, if the argument's index has GP one. + if (Type::isFloat(typeId)) { + arg.assignStackOffset(int32_t(stackOffset)); + } + else { + uint32_t gpRegId = cc._passedOrder[Reg::kGroupGp].id[argIndex]; + if (gpRegId != BaseReg::kIdBad) + arg.assignRegData(Reg::kTypeGpq, gpRegId); + else + arg.assignStackOffset(int32_t(stackOffset)); + arg.addFlags(FuncValue::kFlagIsIndirect); + } + + // Always 8 bytes (float/double/pointer). + stackOffset += 8; + continue; + } + } + } + break; + } + } + + func._argStackSize = stackOffset; + return kErrorOk; +} + +} // {FuncInternal} + +ASMJIT_END_SUB_NAMESPACE + +#endif // ASMJIT_BUILD_X86 diff --git a/libs/asmjit/src/asmjit/x86/x86callconv_p.h b/libs/asmjit/src/asmjit/x86/x86func_p.h similarity index 73% rename from libs/asmjit/src/asmjit/x86/x86callconv_p.h rename to libs/asmjit/src/asmjit/x86/x86func_p.h index 5fc2113..94745ca 100644 --- a/libs/asmjit/src/asmjit/x86/x86callconv_p.h +++ b/libs/asmjit/src/asmjit/x86/x86func_p.h @@ -21,10 +21,10 @@ // misrepresented as being the original software. // 3. This notice may not be removed or altered from any source distribution. -#ifndef ASMJIT_X86_X86CALLCONV_P_H_INCLUDED -#define ASMJIT_X86_X86CALLCONV_P_H_INCLUDED +#ifndef ASMJIT_X86_X86FUNC_P_H_INCLUDED +#define ASMJIT_X86_X86FUNC_P_H_INCLUDED -#include "../core/callconv.h" +#include "../core/func.h" ASMJIT_BEGIN_SUB_NAMESPACE(x86) @@ -33,18 +33,23 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! \{ // ============================================================================ -// [asmjit::x86::CallConvInternal] +// [asmjit::x86::FuncInternal] // ============================================================================ //! X86-specific function API (calling conventions and other utilities). -namespace CallConvInternal { - //! Initialize `CallConv` structure (X86 specific). - Error init(CallConv& cc, uint32_t ccId) noexcept; -} +namespace FuncInternal { + +//! Initialize `CallConv` structure (X86 specific). +Error initCallConv(CallConv& cc, uint32_t ccId, const Environment& environment) noexcept; + +//! Initialize `FuncDetail` (X86 specific). +Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept; + +} // {FuncInternal} //! \} //! \endcond ASMJIT_END_SUB_NAMESPACE -#endif // ASMJIT_X86_X86CALLCONV_P_H_INCLUDED +#endif // ASMJIT_X86_X86FUNC_P_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/x86/x86globals.h b/libs/asmjit/src/asmjit/x86/x86globals.h index dca2b95..95838fc 100644 --- a/libs/asmjit/src/asmjit/x86/x86globals.h +++ b/libs/asmjit/src/asmjit/x86/x86globals.h @@ -24,7 +24,7 @@ #ifndef ASMJIT_X86_X86GLOBALS_H_INCLUDED #define ASMJIT_X86_X86GLOBALS_H_INCLUDED -#include "../core/arch.h" +#include "../core/archtraits.h" #include "../core/inst.h" ASMJIT_BEGIN_SUB_NAMESPACE(x86) @@ -119,6 +119,7 @@ struct Inst : public BaseInst { kIdClflushopt, //!< Instruction 'clflushopt' {CLFLUSHOPT}. kIdClgi, //!< Instruction 'clgi' {SVM}. kIdCli, //!< Instruction 'cli'. + kIdClrssbsy, //!< Instruction 'clrssbsy' {CET_SS}. kIdClts, //!< Instruction 'clts'. kIdClwb, //!< Instruction 'clwb' {CLWB}. kIdClzero, //!< Instruction 'clzero' {CLZERO}. @@ -202,6 +203,8 @@ struct Inst : public BaseInst { kIdDppd, //!< Instruction 'dppd' {SSE4_1}. kIdDpps, //!< Instruction 'dpps' {SSE4_1}. kIdEmms, //!< Instruction 'emms' {MMX}. + kIdEndbr32, //!< Instruction 'endbr32' {CET_IBT}. + kIdEndbr64, //!< Instruction 'endbr64' {CET_IBT}. kIdEnqcmd, //!< Instruction 'enqcmd' {ENQCMD}. kIdEnqcmds, //!< Instruction 'enqcmds' {ENQCMD}. kIdEnter, //!< Instruction 'enter'. @@ -318,6 +321,8 @@ struct Inst : public BaseInst { kIdImul, //!< Instruction 'imul'. kIdIn, //!< Instruction 'in'. kIdInc, //!< Instruction 'inc'. + kIdIncsspd, //!< Instruction 'incsspd' {CET_SS}. + kIdIncsspq, //!< Instruction 'incsspq' {CET_SS} (X64). kIdIns, //!< Instruction 'ins'. kIdInsertps, //!< Instruction 'insertps' {SSE4_1}. kIdInsertq, //!< Instruction 'insertq' {SSE4A}. @@ -422,6 +427,7 @@ struct Inst : public BaseInst { kIdLddqu, //!< Instruction 'lddqu' {SSE3}. kIdLdmxcsr, //!< Instruction 'ldmxcsr' {SSE}. kIdLds, //!< Instruction 'lds' (X86). + kIdLdtilecfg, //!< Instruction 'ldtilecfg' {AMX_TILE} (X64). kIdLea, //!< Instruction 'lea'. kIdLeave, //!< Instruction 'leave'. kIdLes, //!< Instruction 'les' (X86). @@ -449,6 +455,7 @@ struct Inst : public BaseInst { kIdMaxps, //!< Instruction 'maxps' {SSE}. kIdMaxsd, //!< Instruction 'maxsd' {SSE2}. kIdMaxss, //!< Instruction 'maxss' {SSE}. + kIdMcommit, //!< Instruction 'mcommit' {MCOMMIT}. kIdMfence, //!< Instruction 'mfence' {SSE2}. kIdMinpd, //!< Instruction 'minpd' {SSE2}. kIdMinps, //!< Instruction 'minps' {SSE}. @@ -549,7 +556,7 @@ struct Inst : public BaseInst { kIdPcmpgtw, //!< Instruction 'pcmpgtw' {MMX|SSE2}. kIdPcmpistri, //!< Instruction 'pcmpistri' {SSE4_2}. kIdPcmpistrm, //!< Instruction 'pcmpistrm' {SSE4_2}. - kIdPcommit, //!< Instruction 'pcommit' {PCOMMIT}. + kIdPconfig, //!< Instruction 'pconfig' {PCONFIG}. kIdPdep, //!< Instruction 'pdep' {BMI2}. kIdPext, //!< Instruction 'pext' {BMI2}. kIdPextrb, //!< Instruction 'pextrb' {SSE4_1}. @@ -653,6 +660,7 @@ struct Inst : public BaseInst { kIdPslldq, //!< Instruction 'pslldq' {SSE2}. kIdPsllq, //!< Instruction 'psllq' {MMX|SSE2}. kIdPsllw, //!< Instruction 'psllw' {MMX|SSE2}. + kIdPsmash, //!< Instruction 'psmash' {SNP} (X64). kIdPsrad, //!< Instruction 'psrad' {MMX|SSE2}. kIdPsraw, //!< Instruction 'psraw' {MMX|SSE2}. kIdPsrld, //!< Instruction 'psrld' {MMX|SSE2}. @@ -669,6 +677,7 @@ struct Inst : public BaseInst { kIdPsubw, //!< Instruction 'psubw' {MMX|SSE2}. kIdPswapd, //!< Instruction 'pswapd' {3DNOW2}. kIdPtest, //!< Instruction 'ptest' {SSE4_1}. + kIdPtwrite, //!< Instruction 'ptwrite' {PTWRITE}. kIdPunpckhbw, //!< Instruction 'punpckhbw' {MMX|SSE2}. kIdPunpckhdq, //!< Instruction 'punpckhdq' {MMX|SSE2}. kIdPunpckhqdq, //!< Instruction 'punpckhqdq' {SSE2}. @@ -683,6 +692,7 @@ struct Inst : public BaseInst { kIdPushf, //!< Instruction 'pushf'. kIdPushfd, //!< Instruction 'pushfd' (X86). kIdPushfq, //!< Instruction 'pushfq' (X64). + kIdPvalidate, //!< Instruction 'pvalidate' {SNP}. kIdPxor, //!< Instruction 'pxor' {MMX|SSE2}. kIdRcl, //!< Instruction 'rcl'. kIdRcpps, //!< Instruction 'rcpps' {SSE}. @@ -692,12 +702,18 @@ struct Inst : public BaseInst { kIdRdgsbase, //!< Instruction 'rdgsbase' {FSGSBASE} (X64). kIdRdmsr, //!< Instruction 'rdmsr' {MSR}. kIdRdpid, //!< Instruction 'rdpid' {RDPID}. + kIdRdpkru, //!< Instruction 'rdpkru' {OSPKE}. kIdRdpmc, //!< Instruction 'rdpmc'. + kIdRdpru, //!< Instruction 'rdpru' {RDPRU}. kIdRdrand, //!< Instruction 'rdrand' {RDRAND}. kIdRdseed, //!< Instruction 'rdseed' {RDSEED}. + kIdRdsspd, //!< Instruction 'rdsspd' {CET_SS}. + kIdRdsspq, //!< Instruction 'rdsspq' {CET_SS} (X64). kIdRdtsc, //!< Instruction 'rdtsc' {RDTSC}. kIdRdtscp, //!< Instruction 'rdtscp' {RDTSCP}. kIdRet, //!< Instruction 'ret'. + kIdRmpadjust, //!< Instruction 'rmpadjust' {SNP} (X64). + kIdRmpupdate, //!< Instruction 'rmpupdate' {SNP} (X64). kIdRol, //!< Instruction 'rol'. kIdRor, //!< Instruction 'ror'. kIdRorx, //!< Instruction 'rorx' {BMI2}. @@ -708,12 +724,15 @@ struct Inst : public BaseInst { kIdRsm, //!< Instruction 'rsm' (X86). kIdRsqrtps, //!< Instruction 'rsqrtps' {SSE}. kIdRsqrtss, //!< Instruction 'rsqrtss' {SSE}. + kIdRstorssp, //!< Instruction 'rstorssp' {CET_SS}. kIdSahf, //!< Instruction 'sahf' {LAHFSAHF}. kIdSal, //!< Instruction 'sal'. kIdSar, //!< Instruction 'sar'. kIdSarx, //!< Instruction 'sarx' {BMI2}. + kIdSaveprevssp, //!< Instruction 'saveprevssp' {CET_SS}. kIdSbb, //!< Instruction 'sbb'. kIdScas, //!< Instruction 'scas'. + kIdSerialize, //!< Instruction 'serialize' {SERIALIZE}. kIdSeta, //!< Instruction 'seta'. kIdSetae, //!< Instruction 'setae'. kIdSetb, //!< Instruction 'setb'. @@ -743,6 +762,7 @@ struct Inst : public BaseInst { kIdSetpe, //!< Instruction 'setpe'. kIdSetpo, //!< Instruction 'setpo'. kIdSets, //!< Instruction 'sets'. + kIdSetssbsy, //!< Instruction 'setssbsy' {CET_SS}. kIdSetz, //!< Instruction 'setz'. kIdSfence, //!< Instruction 'sfence' {MMX2}. kIdSgdt, //!< Instruction 'sgdt'. @@ -778,6 +798,7 @@ struct Inst : public BaseInst { kIdStmxcsr, //!< Instruction 'stmxcsr' {SSE}. kIdStos, //!< Instruction 'stos'. kIdStr, //!< Instruction 'str'. + kIdSttilecfg, //!< Instruction 'sttilecfg' {AMX_TILE} (X64). kIdSub, //!< Instruction 'sub'. kIdSubpd, //!< Instruction 'subpd' {SSE2}. kIdSubps, //!< Instruction 'subps' {SSE}. @@ -791,12 +812,27 @@ struct Inst : public BaseInst { kIdSysret, //!< Instruction 'sysret' (X64). kIdSysret64, //!< Instruction 'sysret64' (X64). kIdT1mskc, //!< Instruction 't1mskc' {TBM}. + kIdTdpbf16ps, //!< Instruction 'tdpbf16ps' {AMX_BF16} (X64). + kIdTdpbssd, //!< Instruction 'tdpbssd' {AMX_INT8} (X64). + kIdTdpbsud, //!< Instruction 'tdpbsud' {AMX_INT8} (X64). + kIdTdpbusd, //!< Instruction 'tdpbusd' {AMX_INT8} (X64). + kIdTdpbuud, //!< Instruction 'tdpbuud' {AMX_INT8} (X64). kIdTest, //!< Instruction 'test'. + kIdTileloadd, //!< Instruction 'tileloadd' {AMX_TILE} (X64). + kIdTileloaddt1, //!< Instruction 'tileloaddt1' {AMX_TILE} (X64). + kIdTilerelease, //!< Instruction 'tilerelease' {AMX_TILE} (X64). + kIdTilestored, //!< Instruction 'tilestored' {AMX_TILE} (X64). + kIdTilezero, //!< Instruction 'tilezero' {AMX_TILE} (X64). + kIdTpause, //!< Instruction 'tpause' {WAITPKG}. kIdTzcnt, //!< Instruction 'tzcnt' {BMI}. kIdTzmsk, //!< Instruction 'tzmsk' {TBM}. kIdUcomisd, //!< Instruction 'ucomisd' {SSE2}. kIdUcomiss, //!< Instruction 'ucomiss' {SSE}. + kIdUd0, //!< Instruction 'ud0'. + kIdUd1, //!< Instruction 'ud1'. kIdUd2, //!< Instruction 'ud2'. + kIdUmonitor, //!< Instruction 'umonitor' {WAITPKG}. + kIdUmwait, //!< Instruction 'umwait' {WAITPKG}. kIdUnpckhpd, //!< Instruction 'unpckhpd' {SSE2}. kIdUnpckhps, //!< Instruction 'unpckhps' {SSE}. kIdUnpcklpd, //!< Instruction 'unpcklpd' {SSE2}. @@ -1119,6 +1155,8 @@ struct Inst : public BaseInst { kIdVmxon, //!< Instruction 'vmxon' {VMX}. kIdVorpd, //!< Instruction 'vorpd' {AVX|AVX512_DQ+VL}. kIdVorps, //!< Instruction 'vorps' {AVX|AVX512_DQ+VL}. + kIdVp2intersectd, //!< Instruction 'vp2intersectd' {AVX512_VP2INTERSECT}. + kIdVp2intersectq, //!< Instruction 'vp2intersectq' {AVX512_VP2INTERSECT}. kIdVp4dpwssd, //!< Instruction 'vp4dpwssd' {AVX512_4VNNIW}. kIdVp4dpwssds, //!< Instruction 'vp4dpwssds' {AVX512_4VNNIW}. kIdVpabsb, //!< Instruction 'vpabsb' {AVX|AVX2|AVX512_BW+VL}. @@ -1528,6 +1566,10 @@ struct Inst : public BaseInst { kIdWrfsbase, //!< Instruction 'wrfsbase' {FSGSBASE} (X64). kIdWrgsbase, //!< Instruction 'wrgsbase' {FSGSBASE} (X64). kIdWrmsr, //!< Instruction 'wrmsr' {MSR}. + kIdWrssd, //!< Instruction 'wrssd' {CET_SS}. + kIdWrssq, //!< Instruction 'wrssq' {CET_SS} (X64). + kIdWrussd, //!< Instruction 'wrussd' {CET_SS}. + kIdWrussq, //!< Instruction 'wrussq' {CET_SS} (X64). kIdXabort, //!< Instruction 'xabort' {RTM}. kIdXadd, //!< Instruction 'xadd' {I486}. kIdXbegin, //!< Instruction 'xbegin' {RTM}. @@ -1538,6 +1580,7 @@ struct Inst : public BaseInst { kIdXor, //!< Instruction 'xor'. kIdXorpd, //!< Instruction 'xorpd' {SSE2}. kIdXorps, //!< Instruction 'xorps' {SSE}. + kIdXresldtrk, //!< Instruction 'xresldtrk' {TSXLDTRK}. kIdXrstor, //!< Instruction 'xrstor' {XSAVE}. kIdXrstor64, //!< Instruction 'xrstor64' {XSAVE} (X64). kIdXrstors, //!< Instruction 'xrstors' {XSAVES}. @@ -1551,6 +1594,7 @@ struct Inst : public BaseInst { kIdXsaves, //!< Instruction 'xsaves' {XSAVES}. kIdXsaves64, //!< Instruction 'xsaves64' {XSAVES} (X64). kIdXsetbv, //!< Instruction 'xsetbv' {XSAVE}. + kIdXsusldtrk, //!< Instruction 'xsusldtrk' {TSXLDTRK}. kIdXtest, //!< Instruction 'xtest' {TSX}. _kIdCount // ${InstId:End} @@ -1652,14 +1696,14 @@ namespace Condition { kUnsignedGT = kA, //!< Unsigned `a > b`. kUnsignedGE = kAE, //!< Unsigned `a >= b`. - kZero = kZ, - kNotZero = kNZ, + kZero = kZ, //!< Zero flag. + kNotZero = kNZ, //!< Non-zero flag. - kNegative = kS, - kPositive = kNS, + kNegative = kS, //!< Sign flag. + kPositive = kNS, //!< No sign flag. - kParityEven = kP, - kParityOdd = kPO + kParityEven = kP, //!< Even parity flag. + kParityOdd = kPO //!< Odd parity flag. }; static constexpr uint8_t reverseTable[kCount] = { @@ -1679,16 +1723,16 @@ namespace Condition { static constexpr uint16_t cmovccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdCmov) }; #undef ASMJIT_INST_FROM_COND - //! Reverse a condition code (reverses the corresponding operands of a comparison). + //! Reverses a condition code (reverses the corresponding operands of a comparison). static constexpr uint32_t reverse(uint32_t cond) noexcept { return reverseTable[cond]; } - //! Negate a condition code. + //! Negates a condition code. static constexpr uint32_t negate(uint32_t cond) noexcept { return cond ^ 1u; } - //! Translate a condition code `cond` to a `jcc` instruction id. + //! Translates a condition code `cond` to a `jcc` instruction id. static constexpr uint32_t toJcc(uint32_t cond) noexcept { return jccTable[cond]; } - //! Translate a condition code `cond` to a `setcc` instruction id. + //! Translates a condition code `cond` to a `setcc` instruction id. static constexpr uint32_t toSetcc(uint32_t cond) noexcept { return setccTable[cond]; } - //! Translate a condition code `cond` to a `cmovcc` instruction id. + //! Translates a condition code `cond` to a `cmovcc` instruction id. static constexpr uint32_t toCmovcc(uint32_t cond) noexcept { return cmovccTable[cond]; } } @@ -1696,54 +1740,92 @@ namespace Condition { // [asmjit::x86::FpuWord] // ============================================================================ -//! FPU control and status word. +//! FPU control and status words. namespace FpuWord { //! FPU status word. enum Status : uint32_t { + //! Invalid operation. kStatusInvalid = 0x0001u, + //! Denormalized operand. kStatusDenormalized = 0x0002u, + //! Division by zero. kStatusDivByZero = 0x0004u, + //! Overflown. kStatusOverflow = 0x0008u, + //! Underflown. kStatusUnderflow = 0x0010u, + //! Precision lost. kStatusPrecision = 0x0020u, + //! Stack fault. kStatusStackFault = 0x0040u, + //! Interrupt. kStatusInterrupt = 0x0080u, + //! C0 flag. kStatusC0 = 0x0100u, + //! C1 flag. kStatusC1 = 0x0200u, + //! C2 flag. kStatusC2 = 0x0400u, + //! Top of the stack. kStatusTop = 0x3800u, + //! C3 flag. kStatusC3 = 0x4000u, + //! FPU is busy. kStatusBusy = 0x8000u }; //! FPU control word. enum Control : uint32_t { - // Bits 0-5. + // [Bits 0-5] + + //! Exception mask (0x3F). kControlEM_Mask = 0x003Fu, + //! Invalid operation exception. kControlEM_Invalid = 0x0001u, + //! Denormalized operand exception. kControlEM_Denormal = 0x0002u, + //! Division by zero exception. kControlEM_DivByZero = 0x0004u, + //! Overflow exception. kControlEM_Overflow = 0x0008u, + //! Underflow exception. kControlEM_Underflow = 0x0010u, + //! Inexact operation exception. kControlEM_Inexact = 0x0020u, - // Bits 8-9. + // [Bits 8-9] + + //! Precision control mask. kControlPC_Mask = 0x0300u, + //! Single precision (24 bits). kControlPC_Float = 0x0000u, + //! Reserved. kControlPC_Reserved = 0x0100u, + //! Double precision (53 bits). kControlPC_Double = 0x0200u, + //! Extended precision (64 bits). kControlPC_Extended = 0x0300u, - // Bits 10-11. + // [Bits 10-11] + + //! Rounding control mask. kControlRC_Mask = 0x0C00u, + //! Round to nearest even. kControlRC_Nearest = 0x0000u, + //! Round down (floor). kControlRC_Down = 0x0400u, + //! Round up (ceil). kControlRC_Up = 0x0800u, + //! Round towards zero (truncate). kControlRC_Truncate = 0x0C00u, - // Bit 12. + // [Bit 12] + + //! Infinity control. kControlIC_Mask = 0x1000u, + //! Projective (not supported on X64). kControlIC_Projective = 0x0000u, + //! Affine (default). kControlIC_Affine = 0x1000u }; } @@ -1760,26 +1842,39 @@ namespace Status { // [Architecture Neutral Flags - 0x000000FF] // ------------------------------------------------------------------------ - kCF = 0x00000001u, //!< Carry flag. - kOF = 0x00000002u, //!< Signed overflow flag. - kSF = 0x00000004u, //!< Sign flag (negative/sign, if set). - kZF = 0x00000008u, //!< Zero and/or equality flag (1 if zero/equal). + //! Carry flag. + kCF = 0x00000001u, + //! Signed overflow flag. + kOF = 0x00000002u, + //! Sign flag (negative/sign, if set). + kSF = 0x00000004u, + //! Zero and/or equality flag (1 if zero/equal). + kZF = 0x00000008u, // ------------------------------------------------------------------------ // [Architecture Specific Flags - 0xFFFFFF00] // ------------------------------------------------------------------------ - kAF = 0x00000100u, //!< Adjust flag. - kPF = 0x00000200u, //!< Parity flag. - kDF = 0x00000400u, //!< Direction flag. - kIF = 0x00000800u, //!< Interrupt enable flag. - - kAC = 0x00001000u, //!< Alignment check. - - kC0 = 0x00010000u, //!< FPU C0 status flag. - kC1 = 0x00020000u, //!< FPU C1 status flag. - kC2 = 0x00040000u, //!< FPU C2 status flag. - kC3 = 0x00080000u //!< FPU C3 status flag. + //! Adjust flag. + kAF = 0x00000100u, + //! Parity flag. + kPF = 0x00000200u, + //! Direction flag. + kDF = 0x00000400u, + //! Interrupt enable flag. + kIF = 0x00000800u, + + //! Alignment check. + kAC = 0x00001000u, + + //! FPU C0 status flag. + kC0 = 0x00010000u, + //! FPU C1 status flag. + kC1 = 0x00020000u, + //! FPU C2 status flag. + kC2 = 0x00040000u, + //! FPU C3 status flag. + kC3 = 0x00080000u }; } @@ -1834,12 +1929,18 @@ namespace Predicate { //! A predicate used by ROUND[PD|PS|SD|SS] instructions. enum Round : uint32_t { - kRoundNearest = 0x00u, //!< Round to nearest (even). - kRoundDown = 0x01u, //!< Round to down toward -INF (floor), - kRoundUp = 0x02u, //!< Round to up toward +INF (ceil). - kRoundTrunc = 0x03u, //!< Round toward zero (truncate). - kRoundCurrent = 0x04u, //!< Round to the current rounding mode set (ignores other RC bits). - kRoundInexact = 0x08u //!< Avoids inexact exception, if set. + //! Round to nearest (even). + kRoundNearest = 0x00u, + //! Round to down toward -INF (floor), + kRoundDown = 0x01u, + //! Round to up toward +INF (ceil). + kRoundUp = 0x02u, + //! Round toward zero (truncate). + kRoundTrunc = 0x03u, + //! Round to the current rounding mode set (ignores other RC bits). + kRoundCurrent = 0x04u, + //! Avoids inexact exception, if set. + kRoundInexact = 0x08u }; //! A predicate used by VCMP[PD|PS|SD|SS] instructions. @@ -1998,25 +2099,46 @@ namespace Predicate { //! Bitwise ternary logic between 3 operands introduced by AVX-512. namespace TLog { //! A predicate that can be used to create a common predicate for VPTERNLOG[D|Q]. + //! + //! There are 3 inputs to the instruction (\ref kA, \ref kB, \ref kC), and + //! ternary logic can define any combination that would be performed on these + //! 3 inputs to get the desired output - any combination of AND, OR, XOR, NOT. enum Operator : uint32_t { - k0 = 0x00u, //!< 0 value. - k1 = 0xFFu, //!< 1 value. - kA = 0xF0u, //!< A value. - kB = 0xCCu, //!< B value. - kC = 0xAAu, //!< C value. - kNotA = kA ^ k1, //!< `!A` expression. - kNotB = kB ^ k1, //!< `!B` expression. - kNotC = kC ^ k1, //!< `!C` expression. - - kAB = kA & kB, //!< `A & B` expression. - kAC = kA & kC, //!< `A & C` expression. - kBC = kB & kC, //!< `B & C` expression. - kNotAB = kAB ^ k1, //!< `!(A & B)` expression. - kNotAC = kAC ^ k1, //!< `!(A & C)` expression. - kNotBC = kBC ^ k1, //!< `!(B & C)` expression. - - kABC = kAB & kC, //!< `A & B & C` expression. - kNotABC = kABC ^ k1 //!< `!(A & B & C)` expression. + //! 0 value. + k0 = 0x00u, + //! 1 value. + k1 = 0xFFu, + //! A value. + kA = 0xF0u, + //! B value. + kB = 0xCCu, + //! C value. + kC = 0xAAu, + + //! `!A` expression. + kNotA = kA ^ k1, + //! `!B` expression. + kNotB = kB ^ k1, + //! `!C` expression. + kNotC = kC ^ k1, + + //! `A & B` expression. + kAB = kA & kB, + //! `A & C` expression. + kAC = kA & kC, + //! `B & C` expression. + kBC = kB & kC, + //! `!(A & B)` expression. + kNotAB = kAB ^ k1, + //! `!(A & C)` expression. + kNotAC = kAC ^ k1, + //! `!(B & C)` expression. + kNotBC = kBC ^ k1, + + //! `A & B & C` expression. + kABC = kAB & kC, + //! `!(A & B & C)` expression. + kNotABC = kABC ^ k1 }; //! Creates an immediate that can be used by VPTERNLOG[D|Q] instructions. diff --git a/libs/asmjit/src/asmjit/x86/x86instapi.cpp b/libs/asmjit/src/asmjit/x86/x86instapi.cpp index 6c5e28b..fec69b2 100644 --- a/libs/asmjit/src/asmjit/x86/x86instapi.cpp +++ b/libs/asmjit/src/asmjit/x86/x86instapi.cpp @@ -58,18 +58,18 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) // ============================================================================ #ifndef ASMJIT_NO_TEXT -Error InstInternal::instIdToString(uint32_t archId, uint32_t instId, String& output) noexcept { - DebugUtils::unused(archId); +Error InstInternal::instIdToString(uint32_t arch, uint32_t instId, String& output) noexcept { + DebugUtils::unused(arch); if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId))) return DebugUtils::errored(kErrorInvalidInstruction); const InstDB::InstInfo& info = InstDB::infoById(instId); - return output.appendString(InstDB::_nameData + info._nameDataIndex); + return output.append(InstDB::_nameData + info._nameDataIndex); } -uint32_t InstInternal::stringToInstId(uint32_t archId, const char* s, size_t len) noexcept { - DebugUtils::unused(archId); +uint32_t InstInternal::stringToInstId(uint32_t arch, const char* s, size_t len) noexcept { + DebugUtils::unused(arch); if (ASMJIT_UNLIKELY(!s)) return Inst::kIdNone; @@ -126,61 +126,64 @@ struct X86ValidationData { uint32_t allowedMemIndexRegs; }; -#define VALUE(X) \ - (X == Reg::kTypeGpbLo) ? InstDB::kOpGpbLo : \ - (X == Reg::kTypeGpbHi) ? InstDB::kOpGpbHi : \ - (X == Reg::kTypeGpw ) ? InstDB::kOpGpw : \ - (X == Reg::kTypeGpd ) ? InstDB::kOpGpd : \ - (X == Reg::kTypeGpq ) ? InstDB::kOpGpq : \ - (X == Reg::kTypeXmm ) ? InstDB::kOpXmm : \ - (X == Reg::kTypeYmm ) ? InstDB::kOpYmm : \ - (X == Reg::kTypeZmm ) ? InstDB::kOpZmm : \ - (X == Reg::kTypeMm ) ? InstDB::kOpMm : \ - (X == Reg::kTypeKReg ) ? InstDB::kOpKReg : \ - (X == Reg::kTypeSReg ) ? InstDB::kOpSReg : \ - (X == Reg::kTypeCReg ) ? InstDB::kOpCReg : \ - (X == Reg::kTypeDReg ) ? InstDB::kOpDReg : \ - (X == Reg::kTypeSt ) ? InstDB::kOpSt : \ - (X == Reg::kTypeBnd ) ? InstDB::kOpBnd : \ - (X == Reg::kTypeRip ) ? InstDB::kOpNone : InstDB::kOpNone +#define VALUE(x) \ + (x == Reg::kTypeGpbLo) ? InstDB::kOpGpbLo : \ + (x == Reg::kTypeGpbHi) ? InstDB::kOpGpbHi : \ + (x == Reg::kTypeGpw ) ? InstDB::kOpGpw : \ + (x == Reg::kTypeGpd ) ? InstDB::kOpGpd : \ + (x == Reg::kTypeGpq ) ? InstDB::kOpGpq : \ + (x == Reg::kTypeXmm ) ? InstDB::kOpXmm : \ + (x == Reg::kTypeYmm ) ? InstDB::kOpYmm : \ + (x == Reg::kTypeZmm ) ? InstDB::kOpZmm : \ + (x == Reg::kTypeMm ) ? InstDB::kOpMm : \ + (x == Reg::kTypeKReg ) ? InstDB::kOpKReg : \ + (x == Reg::kTypeSReg ) ? InstDB::kOpSReg : \ + (x == Reg::kTypeCReg ) ? InstDB::kOpCReg : \ + (x == Reg::kTypeDReg ) ? InstDB::kOpDReg : \ + (x == Reg::kTypeSt ) ? InstDB::kOpSt : \ + (x == Reg::kTypeBnd ) ? InstDB::kOpBnd : \ + (x == Reg::kTypeTmm ) ? InstDB::kOpTmm : \ + (x == Reg::kTypeRip ) ? InstDB::kOpNone : InstDB::kOpNone static const uint32_t _x86OpFlagFromRegType[Reg::kTypeMax + 1] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) }; #undef VALUE -#define REG_MASK_FROM_REG_TYPE_X86(X) \ - (X == Reg::kTypeGpbLo) ? 0x0000000Fu : \ - (X == Reg::kTypeGpbHi) ? 0x0000000Fu : \ - (X == Reg::kTypeGpw ) ? 0x000000FFu : \ - (X == Reg::kTypeGpd ) ? 0x000000FFu : \ - (X == Reg::kTypeGpq ) ? 0x000000FFu : \ - (X == Reg::kTypeXmm ) ? 0x000000FFu : \ - (X == Reg::kTypeYmm ) ? 0x000000FFu : \ - (X == Reg::kTypeZmm ) ? 0x000000FFu : \ - (X == Reg::kTypeMm ) ? 0x000000FFu : \ - (X == Reg::kTypeKReg ) ? 0x000000FFu : \ - (X == Reg::kTypeSReg ) ? 0x0000007Eu : \ - (X == Reg::kTypeCReg ) ? 0x0000FFFFu : \ - (X == Reg::kTypeDReg ) ? 0x000000FFu : \ - (X == Reg::kTypeSt ) ? 0x000000FFu : \ - (X == Reg::kTypeBnd ) ? 0x0000000Fu : \ - (X == Reg::kTypeRip ) ? 0x00000001u : 0u - -#define REG_MASK_FROM_REG_TYPE_X64(X) \ - (X == Reg::kTypeGpbLo) ? 0x0000FFFFu : \ - (X == Reg::kTypeGpbHi) ? 0x0000000Fu : \ - (X == Reg::kTypeGpw ) ? 0x0000FFFFu : \ - (X == Reg::kTypeGpd ) ? 0x0000FFFFu : \ - (X == Reg::kTypeGpq ) ? 0x0000FFFFu : \ - (X == Reg::kTypeXmm ) ? 0xFFFFFFFFu : \ - (X == Reg::kTypeYmm ) ? 0xFFFFFFFFu : \ - (X == Reg::kTypeZmm ) ? 0xFFFFFFFFu : \ - (X == Reg::kTypeMm ) ? 0x000000FFu : \ - (X == Reg::kTypeKReg ) ? 0x000000FFu : \ - (X == Reg::kTypeSReg ) ? 0x0000007Eu : \ - (X == Reg::kTypeCReg ) ? 0x0000FFFFu : \ - (X == Reg::kTypeDReg ) ? 0x0000FFFFu : \ - (X == Reg::kTypeSt ) ? 0x000000FFu : \ - (X == Reg::kTypeBnd ) ? 0x0000000Fu : \ - (X == Reg::kTypeRip ) ? 0x00000001u : 0u +#define REG_MASK_FROM_REG_TYPE_X86(x) \ + (x == Reg::kTypeGpbLo) ? 0x0000000Fu : \ + (x == Reg::kTypeGpbHi) ? 0x0000000Fu : \ + (x == Reg::kTypeGpw ) ? 0x000000FFu : \ + (x == Reg::kTypeGpd ) ? 0x000000FFu : \ + (x == Reg::kTypeGpq ) ? 0x000000FFu : \ + (x == Reg::kTypeXmm ) ? 0x000000FFu : \ + (x == Reg::kTypeYmm ) ? 0x000000FFu : \ + (x == Reg::kTypeZmm ) ? 0x000000FFu : \ + (x == Reg::kTypeMm ) ? 0x000000FFu : \ + (x == Reg::kTypeKReg ) ? 0x000000FFu : \ + (x == Reg::kTypeSReg ) ? 0x0000007Eu : \ + (x == Reg::kTypeCReg ) ? 0x0000FFFFu : \ + (x == Reg::kTypeDReg ) ? 0x000000FFu : \ + (x == Reg::kTypeSt ) ? 0x000000FFu : \ + (x == Reg::kTypeBnd ) ? 0x0000000Fu : \ + (x == Reg::kTypeTmm ) ? 0x000000FFu : \ + (x == Reg::kTypeRip ) ? 0x00000001u : 0u + +#define REG_MASK_FROM_REG_TYPE_X64(x) \ + (x == Reg::kTypeGpbLo) ? 0x0000FFFFu : \ + (x == Reg::kTypeGpbHi) ? 0x0000000Fu : \ + (x == Reg::kTypeGpw ) ? 0x0000FFFFu : \ + (x == Reg::kTypeGpd ) ? 0x0000FFFFu : \ + (x == Reg::kTypeGpq ) ? 0x0000FFFFu : \ + (x == Reg::kTypeXmm ) ? 0xFFFFFFFFu : \ + (x == Reg::kTypeYmm ) ? 0xFFFFFFFFu : \ + (x == Reg::kTypeZmm ) ? 0xFFFFFFFFu : \ + (x == Reg::kTypeMm ) ? 0x000000FFu : \ + (x == Reg::kTypeKReg ) ? 0x000000FFu : \ + (x == Reg::kTypeSReg ) ? 0x0000007Eu : \ + (x == Reg::kTypeCReg ) ? 0x0000FFFFu : \ + (x == Reg::kTypeDReg ) ? 0x0000FFFFu : \ + (x == Reg::kTypeSt ) ? 0x000000FFu : \ + (x == Reg::kTypeBnd ) ? 0x0000000Fu : \ + (x == Reg::kTypeTmm ) ? 0x000000FFu : \ + (x == Reg::kTypeRip ) ? 0x00000001u : 0u static const X86ValidationData _x86ValidationData = { { ASMJIT_LOOKUP_TABLE_32(REG_MASK_FROM_REG_TYPE_X86, 0) }, @@ -235,18 +238,18 @@ static ASMJIT_INLINE bool x86CheckOSig(const InstDB::OpSignature& op, const Inst return true; } -ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept { - // Only called when `archId` matches X86 family. - ASMJIT_ASSERT(ArchInfo::isX86Family(archId)); +ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, uint32_t validationFlags) noexcept { + // Only called when `arch` matches X86 family. + ASMJIT_ASSERT(Environment::isFamilyX86(arch)); const X86ValidationData* vd; - if (archId == ArchInfo::kIdX86) + if (arch == Environment::kArchX86) vd = &_x86ValidationData; else vd = &_x64ValidationData; uint32_t i; - uint32_t mode = InstDB::modeFromArchId(archId); + uint32_t mode = InstDB::modeFromArch(arch); // Get the instruction data. uint32_t instId = inst.id(); @@ -328,7 +331,6 @@ ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& // that the register is virtual and its index will be assigned later // by the register allocator. We must pass unless asked to disallow // virtual registers. - // TODO: We need an option to refuse virtual regs here. uint32_t regId = op.id(); if (regId < Operand::kVirtIdMin) { if (ASMJIT_UNLIKELY(regId >= 32)) @@ -341,6 +343,8 @@ ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& combinedRegMask |= regMask; } else { + if (!(validationFlags & InstAPI::kValidationFlagVirtRegs)) + return DebugUtils::errored(kErrorIllegalVirtReg); regMask = 0xFFFFFFFFu; } break; @@ -376,7 +380,7 @@ ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& memSize <<= m.getBroadcast(); } - if (baseType) { + if (baseType != 0 && baseType > Label::kLabelTag) { uint32_t baseId = m.baseId(); if (m.isRegHome()) { @@ -393,6 +397,9 @@ ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& // memory operand. Basically only usable for string instructions and other // instructions where memory operand is implicit and has 'seg:[reg]' form. if (baseId < Operand::kVirtIdMin) { + if (ASMJIT_UNLIKELY(baseId >= 32)) + return DebugUtils::errored(kErrorInvalidPhysId); + // Physical base id. regMask = Support::bitMask(baseId); combinedRegMask |= regMask; @@ -400,12 +407,17 @@ ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& else { // Virtual base id - fill the whole mask for implicit mem validation. // The register is not assigned yet, so we cannot predict the phys id. + if (!(validationFlags & InstAPI::kValidationFlagVirtRegs)) + return DebugUtils::errored(kErrorIllegalVirtReg); regMask = 0xFFFFFFFFu; } if (!indexType && !m.offsetLo32()) memFlags |= InstDB::kMemOpBaseOnly; } + else if (baseType == Label::kLabelTag) { + // [Label] - there is no need to validate the base as it's label. + } else { // Base is a 64-bit address. int64_t offset = m.offset(); @@ -461,8 +473,16 @@ ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& return DebugUtils::errored(kErrorInvalidAddress); uint32_t indexId = m.indexId(); - if (indexId < Operand::kVirtIdMin) + if (indexId < Operand::kVirtIdMin) { + if (ASMJIT_UNLIKELY(indexId >= 32)) + return DebugUtils::errored(kErrorInvalidPhysId); + combinedRegMask |= Support::bitMask(indexId); + } + else { + if (!(validationFlags & InstAPI::kValidationFlagVirtRegs)) + return DebugUtils::errored(kErrorIllegalVirtReg); + } // Only used for implicit memory operands having 'seg:[reg]' form, so clear it. regMask = 0; @@ -490,7 +510,7 @@ ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& } case Operand::kOpImm: { - uint64_t immValue = op.as().u64(); + uint64_t immValue = op.as().valueAs(); uint32_t immFlags = 0; if (int64_t(immValue) >= 0) { @@ -574,7 +594,9 @@ ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& } else { // Illegal use of a high 8-bit register with REX prefix. - if (ASMJIT_UNLIKELY((combinedOpFlags & InstDB::kOpGpbHi) != 0 && (combinedRegMask & 0xFFFFFF00u) != 0)) + bool hasREX = inst.hasOption(Inst::kOptionRex) || + ((combinedRegMask & 0xFFFFFF00u) != 0); + if (ASMJIT_UNLIKELY(hasREX && (combinedOpFlags & InstDB::kOpGpbHi) != 0)) return DebugUtils::errored(kErrorInvalidUseOfGpbHi); } @@ -761,12 +783,6 @@ static const uint64_t rwRegGroupByteMask[Reg::kGroupCount] = { 0x00000000000000FFu // RIP. }; -// TODO: Make universal. -static ASMJIT_INLINE uint32_t gpRegSizeByArchId(uint32_t archId) noexcept { - static const uint8_t table[] = { 0, 4, 8, 4, 8 }; - return table[archId]; -} - static ASMJIT_INLINE void rwZeroExtendGp(OpRWInfo& opRwInfo, const Gp& reg, uint32_t nativeGpSize) noexcept { ASMJIT_ASSERT(BaseReg::isGp(reg.as())); if (reg.size() + 4 == nativeGpSize) { @@ -793,11 +809,11 @@ static ASMJIT_INLINE void rwZeroExtendNonVec(OpRWInfo& opRwInfo, const Reg& reg) } } -Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, InstRWInfo& out) noexcept { +Error InstInternal::queryRWInfo(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept { using namespace Status; - // Only called when `archId` matches X86 family. - ASMJIT_ASSERT(ArchInfo::isX86Family(archId)); + // Only called when `arch` matches X86 family. + ASMJIT_ASSERT(Environment::isFamilyX86(arch)); // Get the instruction data. uint32_t instId = inst.id(); @@ -808,23 +824,25 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope const InstDB::CommonInfoTableB& tabB = InstDB::_commonInfoTableB[InstDB::_instInfoTable[instId]._commonInfoIndexB]; const InstDB::RWFlagsInfoTable& rwFlags = InstDB::_rwFlagsInfoTable[tabB._rwFlagsIndex]; - // Each RWInfo contains two indexes - // [0] - OpCount == 2 - // [1] - OpCount != 2 - // They are used this way as there are instructions that have 2 and 3 - // operand overloads that use different semantics. So instead of adding - // more special cases we just separated their data tables. - const InstDB::RWInfo& instRwInfo = InstDB::rwInfo[InstDB::rwInfoIndex[instId * 2u + uint32_t(opCount != 2)]]; + + // There are two data tables, one for `opCount == 2` and the second for + // `opCount != 2`. There are two reasons for that: + // - There are instructions that share the same name that have both 2 + // or 3 operands, which have different RW information / semantics. + // - There must be 2 tables otherwise the lookup index won't fit into + // 8 bits (there is more than 256 records of combined rwInfo A and B). + const InstDB::RWInfo& instRwInfo = opCount == 2 ? InstDB::rwInfoA[InstDB::rwInfoIndexA[instId]] + : InstDB::rwInfoB[InstDB::rwInfoIndexB[instId]]; const InstDB::RWInfoRm& instRmInfo = InstDB::rwInfoRm[instRwInfo.rmInfo]; - out._instFlags = 0; - out._opCount = uint8_t(opCount); - out._rmFeature = instRmInfo.rmFeature; - out._extraReg.reset(); - out._readFlags = rwFlags.readFlags; - out._writeFlags = rwFlags.writeFlags; + out->_instFlags = 0; + out->_opCount = uint8_t(opCount); + out->_rmFeature = instRmInfo.rmFeature; + out->_extraReg.reset(); + out->_readFlags = rwFlags.readFlags; + out->_writeFlags = rwFlags.writeFlags; - uint32_t nativeGpSize = gpRegSizeByArchId(archId); + uint32_t nativeGpSize = Environment::registerSizeFromArch(arch); constexpr uint32_t R = OpRWInfo::kRead; constexpr uint32_t W = OpRWInfo::kWrite; @@ -833,13 +851,13 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope constexpr uint32_t RegPhys = OpRWInfo::kRegPhysId; constexpr uint32_t MibRead = OpRWInfo::kMemBaseRead | OpRWInfo::kMemIndexRead; - if (ASMJIT_LIKELY(instRwInfo.category == InstDB::RWInfo::kCategoryGeneric)) { + if (instRwInfo.category == InstDB::RWInfo::kCategoryGeneric) { uint32_t i; uint32_t rmOpsMask = 0; uint32_t rmMaxSize = 0; for (i = 0; i < opCount; i++) { - OpRWInfo& op = out._operands[i]; + OpRWInfo& op = out->_operands[i]; const Operand_& srcOp = operands[i]; const InstDB::RWInfoOp& rwOpData = InstDB::rwInfoOp[instRwInfo.opInfoIndex[i]]; @@ -883,7 +901,13 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope rmOpsMask |= Support::bitMask(i); } else { - op.addOpFlags(MibRead); + const x86::Mem& memOp = srcOp.as(); + // The RW flags of BASE+INDEX are either provided by the data, which means + // that the instruction is border-case, or they are deduced from the operand. + if (memOp.hasBaseReg() && !(op.opFlags() & OpRWInfo::kMemBaseRW)) + op.addOpFlags(OpRWInfo::kMemBaseRead); + if (memOp.hasIndexReg() && !(op.opFlags() & OpRWInfo::kMemIndexRW)) + op.addOpFlags(OpRWInfo::kMemIndexRead); } } @@ -893,7 +917,7 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope do { i = it.next(); - OpRWInfo& op = out._operands[i]; + OpRWInfo& op = out->_operands[i]; op.addOpFlags(RegM); switch (instRmInfo.category) { @@ -932,38 +956,38 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope const Reg& o1 = operands[1].as(); if (o0.isGp() && o1.isGp()) { - out._operands[0].reset(W | RegM, operands[0].size()); - out._operands[1].reset(R | RegM, operands[1].size()); + out->_operands[0].reset(W | RegM, operands[0].size()); + out->_operands[1].reset(R | RegM, operands[1].size()); - rwZeroExtendGp(out._operands[0], operands[0].as(), nativeGpSize); + rwZeroExtendGp(out->_operands[0], operands[0].as(), nativeGpSize); return kErrorOk; } if (o0.isGp() && o1.isSReg()) { - out._operands[0].reset(W | RegM, nativeGpSize); - out._operands[0].setRmSize(2); - out._operands[1].reset(R, 2); + out->_operands[0].reset(W | RegM, nativeGpSize); + out->_operands[0].setRmSize(2); + out->_operands[1].reset(R, 2); return kErrorOk; } if (o0.isSReg() && o1.isGp()) { - out._operands[0].reset(W, 2); - out._operands[1].reset(R | RegM, 2); - out._operands[1].setRmSize(2); + out->_operands[0].reset(W, 2); + out->_operands[1].reset(R | RegM, 2); + out->_operands[1].setRmSize(2); return kErrorOk; } if (o0.isGp() && (o1.isCReg() || o1.isDReg())) { - out._operands[0].reset(W, nativeGpSize); - out._operands[1].reset(R, nativeGpSize); - out._writeFlags = kOF | kSF | kZF | kAF | kPF | kCF; + out->_operands[0].reset(W, nativeGpSize); + out->_operands[1].reset(R, nativeGpSize); + out->_writeFlags = kOF | kSF | kZF | kAF | kPF | kCF; return kErrorOk; } if ((o0.isCReg() || o0.isDReg()) && o1.isGp()) { - out._operands[0].reset(W, nativeGpSize); - out._operands[1].reset(R, nativeGpSize); - out._writeFlags = kOF | kSF | kZF | kAF | kPF | kCF; + out->_operands[0].reset(W, nativeGpSize); + out->_operands[1].reset(R, nativeGpSize); + out->_writeFlags = kOF | kSF | kZF | kAF | kPF | kCF; return kErrorOk; } } @@ -974,18 +998,18 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope if (o0.isGp()) { if (!o1.isOffset64Bit()) - out._operands[0].reset(W, o0.size()); + out->_operands[0].reset(W, o0.size()); else - out._operands[0].reset(W | RegPhys, o0.size(), Gp::kIdAx); + out->_operands[0].reset(W | RegPhys, o0.size(), Gp::kIdAx); - out._operands[1].reset(R | MibRead, o0.size()); - rwZeroExtendGp(out._operands[0], operands[0].as(), nativeGpSize); + out->_operands[1].reset(R | MibRead, o0.size()); + rwZeroExtendGp(out->_operands[0], operands[0].as(), nativeGpSize); return kErrorOk; } if (o0.isSReg()) { - out._operands[0].reset(W, 2); - out._operands[1].reset(R, 2); + out->_operands[0].reset(W, 2); + out->_operands[1].reset(R, 2); return kErrorOk; } } @@ -995,34 +1019,34 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope const Reg& o1 = operands[1].as(); if (o1.isGp()) { - out._operands[0].reset(W | MibRead, o1.size()); + out->_operands[0].reset(W | MibRead, o1.size()); if (!o0.isOffset64Bit()) - out._operands[1].reset(R, o1.size()); + out->_operands[1].reset(R, o1.size()); else - out._operands[1].reset(R | RegPhys, o1.size(), Gp::kIdAx); + out->_operands[1].reset(R | RegPhys, o1.size(), Gp::kIdAx); return kErrorOk; } if (o1.isSReg()) { - out._operands[0].reset(W | MibRead, 2); - out._operands[1].reset(R, 2); + out->_operands[0].reset(W | MibRead, 2); + out->_operands[1].reset(R, 2); return kErrorOk; } } if (Reg::isGp(operands[0]) && operands[1].isImm()) { const Reg& o0 = operands[0].as(); - out._operands[0].reset(W | RegM, o0.size()); - out._operands[1].reset(); + out->_operands[0].reset(W | RegM, o0.size()); + out->_operands[1].reset(); - rwZeroExtendGp(out._operands[0], operands[0].as(), nativeGpSize); + rwZeroExtendGp(out->_operands[0], operands[0].as(), nativeGpSize); return kErrorOk; } if (operands[0].isMem() && operands[1].isImm()) { const Reg& o0 = operands[0].as(); - out._operands[0].reset(W | MibRead, o0.size()); - out._operands[1].reset(); + out->_operands[0].reset(W | MibRead, o0.size()); + out->_operands[1].reset(); return kErrorOk; } } @@ -1040,51 +1064,51 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope if (opCount == 2) { if (operands[0].isReg() && operands[1].isImm()) { - out._operands[0].reset(X, operands[0].size()); - out._operands[1].reset(); + out->_operands[0].reset(X, operands[0].size()); + out->_operands[1].reset(); - rwZeroExtendGp(out._operands[0], operands[0].as(), nativeGpSize); + rwZeroExtendGp(out->_operands[0], operands[0].as(), nativeGpSize); return kErrorOk; } if (Reg::isGpw(operands[0]) && operands[1].size() == 1) { // imul ax, r8/m8 <- AX = AL * r8/m8 - out._operands[0].reset(X | RegPhys, 2, Gp::kIdAx); - out._operands[0].setReadByteMask(Support::lsbMask(1)); - out._operands[1].reset(R | RegM, 1); + out->_operands[0].reset(X | RegPhys, 2, Gp::kIdAx); + out->_operands[0].setReadByteMask(Support::lsbMask(1)); + out->_operands[1].reset(R | RegM, 1); } else { // imul r?, r?/m? - out._operands[0].reset(X, operands[0].size()); - out._operands[1].reset(R | RegM, operands[0].size()); - rwZeroExtendGp(out._operands[0], operands[0].as(), nativeGpSize); + out->_operands[0].reset(X, operands[0].size()); + out->_operands[1].reset(R | RegM, operands[0].size()); + rwZeroExtendGp(out->_operands[0], operands[0].as(), nativeGpSize); } if (operands[1].isMem()) - out._operands[1].addOpFlags(MibRead); + out->_operands[1].addOpFlags(MibRead); return kErrorOk; } if (opCount == 3) { if (operands[2].isImm()) { - out._operands[0].reset(W, operands[0].size()); - out._operands[1].reset(R | RegM, operands[1].size()); - out._operands[2].reset(); + out->_operands[0].reset(W, operands[0].size()); + out->_operands[1].reset(R | RegM, operands[1].size()); + out->_operands[2].reset(); - rwZeroExtendGp(out._operands[0], operands[0].as(), nativeGpSize); + rwZeroExtendGp(out->_operands[0], operands[0].as(), nativeGpSize); if (operands[1].isMem()) - out._operands[1].addOpFlags(MibRead); + out->_operands[1].addOpFlags(MibRead); return kErrorOk; } else { - out._operands[0].reset(W | RegPhys, operands[0].size(), Gp::kIdDx); - out._operands[1].reset(X | RegPhys, operands[1].size(), Gp::kIdAx); - out._operands[2].reset(R | RegM, operands[2].size()); + out->_operands[0].reset(W | RegPhys, operands[0].size(), Gp::kIdDx); + out->_operands[1].reset(X | RegPhys, operands[1].size(), Gp::kIdAx); + out->_operands[2].reset(R | RegM, operands[2].size()); - rwZeroExtendGp(out._operands[0], operands[0].as(), nativeGpSize); - rwZeroExtendGp(out._operands[1], operands[1].as(), nativeGpSize); + rwZeroExtendGp(out->_operands[0], operands[0].as(), nativeGpSize); + rwZeroExtendGp(out->_operands[1], operands[1].as(), nativeGpSize); if (operands[2].isMem()) - out._operands[2].addOpFlags(MibRead); + out->_operands[2].addOpFlags(MibRead); return kErrorOk; } } @@ -1097,16 +1121,16 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope // 2 or 3 operands that are use `kCategoryGeneric`. if (opCount == 2) { if (BaseReg::isVec(operands[0]) && operands[1].isMem()) { - out._operands[0].reset(W, 8); - out._operands[0].setWriteByteMask(Support::lsbMask(8) << 8); - out._operands[1].reset(R | MibRead, 8); + out->_operands[0].reset(W, 8); + out->_operands[0].setWriteByteMask(Support::lsbMask(8) << 8); + out->_operands[1].reset(R | MibRead, 8); return kErrorOk; } if (operands[0].isMem() && BaseReg::isVec(operands[1])) { - out._operands[0].reset(W | MibRead, 8); - out._operands[1].reset(R, 8); - out._operands[1].setReadByteMask(Support::lsbMask(8) << 8); + out->_operands[0].reset(W | MibRead, 8); + out->_operands[1].reset(R, 8); + out->_operands[1].setReadByteMask(Support::lsbMask(8) << 8); return kErrorOk; } } @@ -1117,18 +1141,18 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope // Special case for 'vmaskmovpd|vmaskmovps|vpmaskmovd|vpmaskmovq' instructions. if (opCount == 3) { if (BaseReg::isVec(operands[0]) && BaseReg::isVec(operands[1]) && operands[2].isMem()) { - out._operands[0].reset(W, operands[0].size()); - out._operands[1].reset(R, operands[1].size()); - out._operands[2].reset(R | MibRead, operands[1].size()); + out->_operands[0].reset(W, operands[0].size()); + out->_operands[1].reset(R, operands[1].size()); + out->_operands[2].reset(R | MibRead, operands[1].size()); - rwZeroExtendAvxVec(out._operands[0], operands[0].as()); + rwZeroExtendAvxVec(out->_operands[0], operands[0].as()); return kErrorOk; } if (operands[0].isMem() && BaseReg::isVec(operands[1]) && BaseReg::isVec(operands[2])) { - out._operands[0].reset(X | MibRead, operands[1].size()); - out._operands[1].reset(R, operands[1].size()); - out._operands[2].reset(R, operands[2].size()); + out->_operands[0].reset(X | MibRead, operands[1].size()); + out->_operands[1].reset(R, operands[1].size()); + out->_operands[2].reset(R, operands[2].size()); return kErrorOk; } } @@ -1145,11 +1169,11 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope uint32_t o0Size = operands[0].size(); uint32_t o1Size = o0Size == 16 ? 8 : o0Size; - out._operands[0].reset(W, o0Size); - out._operands[1].reset(R | RegM, o1Size); - out._operands[1]._readByteMask &= 0x00FF00FF00FF00FFu; + out->_operands[0].reset(W, o0Size); + out->_operands[1].reset(R | RegM, o1Size); + out->_operands[1]._readByteMask &= 0x00FF00FF00FF00FFu; - rwZeroExtendAvxVec(out._operands[0], operands[0].as()); + rwZeroExtendAvxVec(out->_operands[0], operands[0].as()); return kErrorOk; } @@ -1157,10 +1181,10 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope uint32_t o0Size = operands[0].size(); uint32_t o1Size = o0Size == 16 ? 8 : o0Size; - out._operands[0].reset(W, o0Size); - out._operands[1].reset(R | MibRead, o1Size); + out->_operands[0].reset(W, o0Size); + out->_operands[1].reset(R | MibRead, o1Size); - rwZeroExtendAvxVec(out._operands[0], operands[0].as()); + rwZeroExtendAvxVec(out->_operands[0], operands[0].as()); return kErrorOk; } } @@ -1172,9 +1196,9 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope // Special case for 'vmovmskpd|vmovmskps' instructions. if (opCount == 2) { if (BaseReg::isGp(operands[0]) && BaseReg::isVec(operands[1])) { - out._operands[0].reset(W, 1); - out._operands[0].setExtendByteMask(Support::lsbMask(nativeGpSize - 1) << 1); - out._operands[1].reset(R, operands[1].size()); + out->_operands[0].reset(W, 1); + out->_operands[0].setExtendByteMask(Support::lsbMask(nativeGpSize - 1) << 1); + out->_operands[1].reset(R, operands[1].size()); return kErrorOk; } } @@ -1208,32 +1232,32 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope if (opCount >= 3) { if (opCount > 3) return DebugUtils::errored(kErrorInvalidInstruction); - out._operands[2].reset(); + out->_operands[2].reset(); } if (operands[0].isReg() && operands[1].isReg()) { uint32_t size1 = operands[1].size(); uint32_t size0 = size1 >> shift; - out._operands[0].reset(W, size0); - out._operands[1].reset(R, size1); + out->_operands[0].reset(W, size0); + out->_operands[1].reset(R, size1); if (instRmInfo.rmOpsMask & 0x1) { - out._operands[0].addOpFlags(RegM); - out._operands[0].setRmSize(size0); + out->_operands[0].addOpFlags(RegM); + out->_operands[0].setRmSize(size0); } if (instRmInfo.rmOpsMask & 0x2) { - out._operands[1].addOpFlags(RegM); - out._operands[1].setRmSize(size1); + out->_operands[1].addOpFlags(RegM); + out->_operands[1].setRmSize(size1); } // Handle 'pmovmskb|vpmovmskb'. if (BaseReg::isGp(operands[0])) - rwZeroExtendGp(out._operands[0], operands[0].as(), nativeGpSize); + rwZeroExtendGp(out->_operands[0], operands[0].as(), nativeGpSize); if (BaseReg::isVec(operands[0])) - rwZeroExtendAvxVec(out._operands[0], operands[0].as()); + rwZeroExtendAvxVec(out->_operands[0], operands[0].as()); return kErrorOk; } @@ -1242,8 +1266,8 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope uint32_t size1 = operands[1].size() ? operands[1].size() : uint32_t(16); uint32_t size0 = size1 >> shift; - out._operands[0].reset(W, size0); - out._operands[1].reset(R | MibRead, size1); + out->_operands[0].reset(W, size0); + out->_operands[1].reset(R | MibRead, size1); return kErrorOk; } @@ -1251,8 +1275,8 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope uint32_t size1 = operands[1].size(); uint32_t size0 = size1 >> shift; - out._operands[0].reset(W | MibRead, size0); - out._operands[1].reset(R, size1); + out->_operands[0].reset(W | MibRead, size0); + out->_operands[1].reset(R, size1); return kErrorOk; } } @@ -1285,30 +1309,30 @@ Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Ope if (opCount >= 3) { if (opCount > 3) return DebugUtils::errored(kErrorInvalidInstruction); - out._operands[2].reset(); + out->_operands[2].reset(); } uint32_t size0 = operands[0].size(); uint32_t size1 = size0 >> shift; - out._operands[0].reset(W, size0); - out._operands[1].reset(R, size1); + out->_operands[0].reset(W, size0); + out->_operands[1].reset(R, size1); if (operands[0].isReg() && operands[1].isReg()) { if (instRmInfo.rmOpsMask & 0x1) { - out._operands[0].addOpFlags(RegM); - out._operands[0].setRmSize(size0); + out->_operands[0].addOpFlags(RegM); + out->_operands[0].setRmSize(size0); } if (instRmInfo.rmOpsMask & 0x2) { - out._operands[1].addOpFlags(RegM); - out._operands[1].setRmSize(size1); + out->_operands[1].addOpFlags(RegM); + out->_operands[1].setRmSize(size1); } return kErrorOk; } if (operands[0].isReg() && operands[1].isMem()) { - out._operands[1].addOpFlags(MibRead); + out->_operands[1].addOpFlags(MibRead); return kErrorOk; } } @@ -1334,7 +1358,7 @@ struct RegAnalysis { } }; -static RegAnalysis InstInternal_regAnalysis(const Operand_* operands, uint32_t opCount) noexcept { +static RegAnalysis InstInternal_regAnalysis(const Operand_* operands, size_t opCount) noexcept { uint32_t mask = 0; uint32_t highVecUsed = 0; @@ -1359,10 +1383,10 @@ static RegAnalysis InstInternal_regAnalysis(const Operand_* operands, uint32_t o return RegAnalysis { mask, highVecUsed }; } -Error InstInternal::queryFeatures(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, BaseFeatures& out) noexcept { - // Only called when `archId` matches X86 family. - DebugUtils::unused(archId); - ASMJIT_ASSERT(ArchInfo::isX86Family(archId)); +Error InstInternal::queryFeatures(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, BaseFeatures* out) noexcept { + // Only called when `arch` matches X86 family. + DebugUtils::unused(arch); + ASMJIT_ASSERT(Environment::isFamilyX86(arch)); // Get the instruction data. uint32_t instId = inst.id(); @@ -1378,12 +1402,12 @@ Error InstInternal::queryFeatures(uint32_t archId, const BaseInst& inst, const O const uint8_t* fEnd = tableB.featuresEnd(); // Copy all features to `out`. - out.reset(); + out->reset(); do { uint32_t feature = fData[0]; if (!feature) break; - out.add(feature); + out->add(feature); } while (++fData != fEnd); // Since AsmJit aggregates instructions that share the same name we have to @@ -1392,19 +1416,19 @@ Error InstInternal::queryFeatures(uint32_t archId, const BaseInst& inst, const O RegAnalysis regAnalysis = InstInternal_regAnalysis(operands, opCount); // Handle MMX vs SSE overlap. - if (out.has(Features::kMMX) || out.has(Features::kMMX2)) { + if (out->has(Features::kMMX) || out->has(Features::kMMX2)) { // Only instructions defined by SSE and SSE2 overlap. Instructions // introduced by newer instruction sets like SSE3+ don't state MMX as // they require SSE3+. - if (out.has(Features::kSSE) || out.has(Features::kSSE2)) { + if (out->has(Features::kSSE) || out->has(Features::kSSE2)) { if (!regAnalysis.hasRegType(Reg::kTypeXmm)) { // The instruction doesn't use XMM register(s), thus it's MMX/MMX2 only. - out.remove(Features::kSSE); - out.remove(Features::kSSE2); + out->remove(Features::kSSE); + out->remove(Features::kSSE2); } else { - out.remove(Features::kMMX); - out.remove(Features::kMMX2); + out->remove(Features::kMMX); + out->remove(Features::kMMX2); } // Special case: PEXTRW instruction is MMX/SSE2 instruction. However, @@ -1414,34 +1438,34 @@ Error InstInternal::queryFeatures(uint32_t archId, const BaseInst& inst, const O // can extract directly to memory. This instruction is, of course, not // compatible with MMX/SSE2 and would #UD if SSE4.1 is not supported. if (instId == Inst::kIdPextrw) { - ASMJIT_ASSERT(out.has(Features::kSSE2)); - ASMJIT_ASSERT(out.has(Features::kSSE4_1)); + ASMJIT_ASSERT(out->has(Features::kSSE2)); + ASMJIT_ASSERT(out->has(Features::kSSE4_1)); if (opCount >= 1 && operands[0].isMem()) - out.remove(Features::kSSE2); + out->remove(Features::kSSE2); else - out.remove(Features::kSSE4_1); + out->remove(Features::kSSE4_1); } } } // Handle PCLMULQDQ vs VPCLMULQDQ. - if (out.has(Features::kVPCLMULQDQ)) { + if (out->has(Features::kVPCLMULQDQ)) { if (regAnalysis.hasRegType(Reg::kTypeZmm) || Support::bitTest(options, Inst::kOptionEvex)) { // AVX512_F & VPCLMULQDQ. - out.remove(Features::kAVX, Features::kPCLMULQDQ); + out->remove(Features::kAVX, Features::kPCLMULQDQ); } else if (regAnalysis.hasRegType(Reg::kTypeYmm)) { - out.remove(Features::kAVX512_F, Features::kAVX512_VL); + out->remove(Features::kAVX512_F, Features::kAVX512_VL); } else { // AVX & PCLMULQDQ. - out.remove(Features::kAVX512_F, Features::kAVX512_VL, Features::kVPCLMULQDQ); + out->remove(Features::kAVX512_F, Features::kAVX512_VL, Features::kVPCLMULQDQ); } } // Handle AVX vs AVX2 overlap. - if (out.has(Features::kAVX) && out.has(Features::kAVX2)) { + if (out->has(Features::kAVX) && out->has(Features::kAVX2)) { bool isAVX2 = true; // Special case: VBROADCASTSS and VBROADCASTSD were introduced in AVX, but // only version that uses memory as a source operand. AVX2 then added support @@ -1459,15 +1483,15 @@ Error InstInternal::queryFeatures(uint32_t archId, const BaseInst& inst, const O } if (isAVX2) - out.remove(Features::kAVX); + out->remove(Features::kAVX); else - out.remove(Features::kAVX2); + out->remove(Features::kAVX2); } // Handle AVX|AVX2|FMA|F16C vs AVX512 overlap. - if (out.has(Features::kAVX) || out.has(Features::kAVX2) || out.has(Features::kFMA) || out.has(Features::kF16C)) { + if (out->has(Features::kAVX) || out->has(Features::kAVX2) || out->has(Features::kFMA) || out->has(Features::kF16C)) { // Only AVX512-F|BW|DQ allow to encode AVX/AVX2/FMA/F16C instructions - if (out.has(Features::kAVX512_F) || out.has(Features::kAVX512_BW) || out.has(Features::kAVX512_DQ)) { + if (out->has(Features::kAVX512_F) || out->has(Features::kAVX512_BW) || out->has(Features::kAVX512_DQ)) { uint32_t hasEvex = options & (Inst::kOptionEvex | Inst::_kOptionAvx512Mask); uint32_t hasKMask = inst.extraReg().type() == Reg::kTypeKReg; uint32_t hasKOrZmm = regAnalysis.regTypeMask & Support::bitMask(Reg::kTypeZmm, Reg::kTypeKReg); @@ -1500,15 +1524,15 @@ Error InstInternal::queryFeatures(uint32_t archId, const BaseInst& inst, const O } if (!(hasEvex | mustUseEvex | hasKMask | hasKOrZmm | regAnalysis.highVecUsed)) - out.remove(Features::kAVX512_F, Features::kAVX512_BW, Features::kAVX512_DQ, Features::kAVX512_VL); + out->remove(Features::kAVX512_F, Features::kAVX512_BW, Features::kAVX512_DQ, Features::kAVX512_VL); else - out.remove(Features::kAVX, Features::kAVX2, Features::kFMA, Features::kF16C); + out->remove(Features::kAVX, Features::kAVX2, Features::kFMA, Features::kF16C); } } // Clear AVX512_VL if ZMM register is used. if (regAnalysis.hasRegType(Reg::kTypeZmm)) - out.remove(Features::kAVX512_VL); + out->remove(Features::kAVX512_VL); } return kErrorOk; diff --git a/libs/asmjit/src/asmjit/x86/x86instapi_p.h b/libs/asmjit/src/asmjit/x86/x86instapi_p.h index 0389cf5..83b3f77 100644 --- a/libs/asmjit/src/asmjit/x86/x86instapi_p.h +++ b/libs/asmjit/src/asmjit/x86/x86instapi_p.h @@ -36,17 +36,17 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) namespace InstInternal { #ifndef ASMJIT_NO_TEXT -Error instIdToString(uint32_t archId, uint32_t instId, String& output) noexcept; -uint32_t stringToInstId(uint32_t archId, const char* s, size_t len) noexcept; +Error instIdToString(uint32_t arch, uint32_t instId, String& output) noexcept; +uint32_t stringToInstId(uint32_t arch, const char* s, size_t len) noexcept; #endif // !ASMJIT_NO_TEXT #ifndef ASMJIT_NO_VALIDATION -Error validate(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept; +Error validate(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, uint32_t validationFlags) noexcept; #endif // !ASMJIT_NO_VALIDATION #ifndef ASMJIT_NO_INTROSPECTION -Error queryRWInfo(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, InstRWInfo& out) noexcept; -Error queryFeatures(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, BaseFeatures& out) noexcept; +Error queryRWInfo(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept; +Error queryFeatures(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount, BaseFeatures* out) noexcept; #endif // !ASMJIT_NO_INTROSPECTION } // {InstInternal} diff --git a/libs/asmjit/src/asmjit/x86/x86instdb.cpp b/libs/asmjit/src/asmjit/x86/x86instdb.cpp index 2d48558..911682b 100644 --- a/libs/asmjit/src/asmjit/x86/x86instdb.cpp +++ b/libs/asmjit/src/asmjit/x86/x86instdb.cpp @@ -64,17 +64,17 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) ((PREFIX) | (OPCODE) | (O) | (L) | (W) | (EvexW) | (N) | (TT) | \ (VEX && ((PREFIX) & Opcode::kMM_Mask) != Opcode::kMM_0F ? int(Opcode::kMM_ForceVex3) : 0)) -#define O(PREFIX, OPCODE, O, LL, W, EvexW, N, TT) (O_ENCODE(0, Opcode::k##PREFIX, 0x##OPCODE, Opcode::kO_##O, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT)) -#define V(PREFIX, OPCODE, O, LL, W, EvexW, N, TT) (O_ENCODE(1, Opcode::k##PREFIX, 0x##OPCODE, Opcode::kO_##O, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT)) -#define E(PREFIX, OPCODE, O, LL, W, EvexW, N, TT) (O_ENCODE(1, Opcode::k##PREFIX, 0x##OPCODE, Opcode::kO_##O, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT) | Opcode::kMM_ForceEvex) -#define O_FPU(PREFIX, OPCODE, O) (Opcode::kFPU_##PREFIX | (0x##OPCODE & 0xFFu) | ((0x##OPCODE >> 8) << Opcode::kFPU_2B_Shift) | Opcode::kO_##O) +#define O(PREFIX, OPCODE, ModO, LL, W, EvexW, N, ModRM) (O_ENCODE(0, Opcode::k##PREFIX, 0x##OPCODE, Opcode::kModO_##ModO, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kModRM_##ModRM)) +#define V(PREFIX, OPCODE, ModO, LL, W, EvexW, N, TT) (O_ENCODE(1, Opcode::k##PREFIX, 0x##OPCODE, Opcode::kModO_##ModO, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT)) +#define E(PREFIX, OPCODE, ModO, LL, W, EvexW, N, TT) (O_ENCODE(1, Opcode::k##PREFIX, 0x##OPCODE, Opcode::kModO_##ModO, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT) | Opcode::kMM_ForceEvex) +#define O_FPU(PREFIX, OPCODE, ModO) (Opcode::kFPU_##PREFIX | (0x##OPCODE & 0xFFu) | ((0x##OPCODE >> 8) << Opcode::kFPU_2B_Shift) | Opcode::kModO_##ModO) // Don't store `_nameDataIndex` if instruction names are disabled. Since some // APIs can use `_nameDataIndex` it's much safer if it's zero if it's not defined. #ifndef ASMJIT_NO_TEXT - #define NAME_DATA_INDEX(X) X + #define NAME_DATA_INDEX(Index) Index #else - #define NAME_DATA_INDEX(X) 0 + #define NAME_DATA_INDEX(Index) 0 #endif // Defines an X86 instruction. @@ -101,26 +101,26 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Aas , X86Op_xAX , O(000000,3F,_,_,_,_,_,_ ), 0 , 0 , 0 , 13 , 1 , 1 ), // #4 INST(Adc , X86Arith , O(000000,10,2,_,x,_,_,_ ), 0 , 1 , 0 , 17 , 3 , 2 ), // #5 INST(Adcx , X86Rm , O(660F38,F6,_,_,x,_,_,_ ), 0 , 2 , 0 , 21 , 4 , 3 ), // #6 - INST(Add , X86Arith , O(000000,00,0,_,x,_,_,_ ), 0 , 0 , 0 , 761 , 3 , 1 ), // #7 - INST(Addpd , ExtRm , O(660F00,58,_,_,_,_,_,_ ), 0 , 3 , 0 , 4814 , 5 , 4 ), // #8 - INST(Addps , ExtRm , O(000F00,58,_,_,_,_,_,_ ), 0 , 4 , 0 , 4826 , 5 , 5 ), // #9 - INST(Addsd , ExtRm , O(F20F00,58,_,_,_,_,_,_ ), 0 , 5 , 0 , 5048 , 6 , 4 ), // #10 - INST(Addss , ExtRm , O(F30F00,58,_,_,_,_,_,_ ), 0 , 6 , 0 , 2955 , 7 , 5 ), // #11 - INST(Addsubpd , ExtRm , O(660F00,D0,_,_,_,_,_,_ ), 0 , 3 , 0 , 4553 , 5 , 6 ), // #12 - INST(Addsubps , ExtRm , O(F20F00,D0,_,_,_,_,_,_ ), 0 , 5 , 0 , 4565 , 5 , 6 ), // #13 + INST(Add , X86Arith , O(000000,00,0,_,x,_,_,_ ), 0 , 0 , 0 , 3112 , 3 , 1 ), // #7 + INST(Addpd , ExtRm , O(660F00,58,_,_,_,_,_,_ ), 0 , 3 , 0 , 5102 , 5 , 4 ), // #8 + INST(Addps , ExtRm , O(000F00,58,_,_,_,_,_,_ ), 0 , 4 , 0 , 5114 , 5 , 5 ), // #9 + INST(Addsd , ExtRm , O(F20F00,58,_,_,_,_,_,_ ), 0 , 5 , 0 , 5336 , 6 , 4 ), // #10 + INST(Addss , ExtRm , O(F30F00,58,_,_,_,_,_,_ ), 0 , 6 , 0 , 3243 , 7 , 5 ), // #11 + INST(Addsubpd , ExtRm , O(660F00,D0,_,_,_,_,_,_ ), 0 , 3 , 0 , 4841 , 5 , 6 ), // #12 + INST(Addsubps , ExtRm , O(F20F00,D0,_,_,_,_,_,_ ), 0 , 5 , 0 , 4853 , 5 , 6 ), // #13 INST(Adox , X86Rm , O(F30F38,F6,_,_,x,_,_,_ ), 0 , 7 , 0 , 26 , 4 , 7 ), // #14 - INST(Aesdec , ExtRm , O(660F38,DE,_,_,_,_,_,_ ), 0 , 2 , 0 , 3010 , 5 , 8 ), // #15 - INST(Aesdeclast , ExtRm , O(660F38,DF,_,_,_,_,_,_ ), 0 , 2 , 0 , 3018 , 5 , 8 ), // #16 - INST(Aesenc , ExtRm , O(660F38,DC,_,_,_,_,_,_ ), 0 , 2 , 0 , 3030 , 5 , 8 ), // #17 - INST(Aesenclast , ExtRm , O(660F38,DD,_,_,_,_,_,_ ), 0 , 2 , 0 , 3038 , 5 , 8 ), // #18 - INST(Aesimc , ExtRm , O(660F38,DB,_,_,_,_,_,_ ), 0 , 2 , 0 , 3050 , 5 , 8 ), // #19 - INST(Aeskeygenassist , ExtRmi , O(660F3A,DF,_,_,_,_,_,_ ), 0 , 8 , 0 , 3058 , 8 , 8 ), // #20 - INST(And , X86Arith , O(000000,20,4,_,x,_,_,_ ), 0 , 9 , 0 , 2433 , 9 , 1 ), // #21 - INST(Andn , VexRvm_Wx , V(000F38,F2,_,0,x,_,_,_ ), 0 , 10 , 0 , 6494 , 10 , 9 ), // #22 - INST(Andnpd , ExtRm , O(660F00,55,_,_,_,_,_,_ ), 0 , 3 , 0 , 3091 , 5 , 4 ), // #23 - INST(Andnps , ExtRm , O(000F00,55,_,_,_,_,_,_ ), 0 , 4 , 0 , 3099 , 5 , 5 ), // #24 - INST(Andpd , ExtRm , O(660F00,54,_,_,_,_,_,_ ), 0 , 3 , 0 , 4067 , 11 , 4 ), // #25 - INST(Andps , ExtRm , O(000F00,54,_,_,_,_,_,_ ), 0 , 4 , 0 , 4077 , 11 , 5 ), // #26 + INST(Aesdec , ExtRm , O(660F38,DE,_,_,_,_,_,_ ), 0 , 2 , 0 , 3298 , 5 , 8 ), // #15 + INST(Aesdeclast , ExtRm , O(660F38,DF,_,_,_,_,_,_ ), 0 , 2 , 0 , 3306 , 5 , 8 ), // #16 + INST(Aesenc , ExtRm , O(660F38,DC,_,_,_,_,_,_ ), 0 , 2 , 0 , 3318 , 5 , 8 ), // #17 + INST(Aesenclast , ExtRm , O(660F38,DD,_,_,_,_,_,_ ), 0 , 2 , 0 , 3326 , 5 , 8 ), // #18 + INST(Aesimc , ExtRm , O(660F38,DB,_,_,_,_,_,_ ), 0 , 2 , 0 , 3338 , 5 , 8 ), // #19 + INST(Aeskeygenassist , ExtRmi , O(660F3A,DF,_,_,_,_,_,_ ), 0 , 8 , 0 , 3346 , 8 , 8 ), // #20 + INST(And , X86Arith , O(000000,20,4,_,x,_,_,_ ), 0 , 9 , 0 , 2510 , 9 , 1 ), // #21 + INST(Andn , VexRvm_Wx , V(000F38,F2,_,0,x,_,_,_ ), 0 , 10 , 0 , 6810 , 10 , 9 ), // #22 + INST(Andnpd , ExtRm , O(660F00,55,_,_,_,_,_,_ ), 0 , 3 , 0 , 3379 , 5 , 4 ), // #23 + INST(Andnps , ExtRm , O(000F00,55,_,_,_,_,_,_ ), 0 , 4 , 0 , 3387 , 5 , 5 ), // #24 + INST(Andpd , ExtRm , O(660F00,54,_,_,_,_,_,_ ), 0 , 3 , 0 , 4355 , 11 , 4 ), // #25 + INST(Andps , ExtRm , O(000F00,54,_,_,_,_,_,_ ), 0 , 4 , 0 , 4365 , 11 , 5 ), // #26 INST(Arpl , X86Mr_NoSize , O(000000,63,_,_,_,_,_,_ ), 0 , 0 , 0 , 31 , 12 , 10 ), // #27 INST(Bextr , VexRmv_Wx , V(000F38,F7,_,0,x,_,_,_ ), 0 , 10 , 0 , 36 , 13 , 9 ), // #28 INST(Blcfill , VexVm_Wx , V(XOP_M9,01,1,0,x,_,_,_ ), 0 , 11 , 0 , 42 , 14 , 11 ), // #29 @@ -128,10 +128,10 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Blcic , VexVm_Wx , V(XOP_M9,01,5,0,x,_,_,_ ), 0 , 13 , 0 , 55 , 14 , 11 ), // #31 INST(Blcmsk , VexVm_Wx , V(XOP_M9,02,1,0,x,_,_,_ ), 0 , 11 , 0 , 61 , 14 , 11 ), // #32 INST(Blcs , VexVm_Wx , V(XOP_M9,01,3,0,x,_,_,_ ), 0 , 14 , 0 , 68 , 14 , 11 ), // #33 - INST(Blendpd , ExtRmi , O(660F3A,0D,_,_,_,_,_,_ ), 0 , 8 , 0 , 3177 , 8 , 12 ), // #34 - INST(Blendps , ExtRmi , O(660F3A,0C,_,_,_,_,_,_ ), 0 , 8 , 0 , 3186 , 8 , 12 ), // #35 - INST(Blendvpd , ExtRm_XMM0 , O(660F38,15,_,_,_,_,_,_ ), 0 , 2 , 0 , 3195 , 15 , 12 ), // #36 - INST(Blendvps , ExtRm_XMM0 , O(660F38,14,_,_,_,_,_,_ ), 0 , 2 , 0 , 3205 , 15 , 12 ), // #37 + INST(Blendpd , ExtRmi , O(660F3A,0D,_,_,_,_,_,_ ), 0 , 8 , 0 , 3465 , 8 , 12 ), // #34 + INST(Blendps , ExtRmi , O(660F3A,0C,_,_,_,_,_,_ ), 0 , 8 , 0 , 3474 , 8 , 12 ), // #35 + INST(Blendvpd , ExtRm_XMM0 , O(660F38,15,_,_,_,_,_,_ ), 0 , 2 , 0 , 3483 , 15 , 12 ), // #36 + INST(Blendvps , ExtRm_XMM0 , O(660F38,14,_,_,_,_,_,_ ), 0 , 2 , 0 , 3493 , 15 , 12 ), // #37 INST(Blsfill , VexVm_Wx , V(XOP_M9,01,2,0,x,_,_,_ ), 0 , 15 , 0 , 73 , 14 , 11 ), // #38 INST(Blsi , VexVm_Wx , V(000F38,F3,3,0,x,_,_,_ ), 0 , 16 , 0 , 81 , 14 , 9 ), // #39 INST(Blsic , VexVm_Wx , V(XOP_M9,01,6,0,x,_,_,_ ), 0 , 12 , 0 , 86 , 14 , 11 ), // #40 @@ -153,7 +153,7 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Btr , X86Bt , O(000F00,B3,_,_,x,_,_,_ ), O(000F00,BA,6,_,x,_,_,_ ), 4 , 4 , 176 , 25 , 14 ), // #56 INST(Bts , X86Bt , O(000F00,AB,_,_,x,_,_,_ ), O(000F00,BA,5,_,x,_,_,_ ), 4 , 5 , 180 , 25 , 14 ), // #57 INST(Bzhi , VexRmv_Wx , V(000F38,F5,_,0,x,_,_,_ ), 0 , 10 , 0 , 184 , 13 , 15 ), // #58 - INST(Call , X86Call , O(000000,FF,2,_,_,_,_,_ ), 0 , 1 , 0 , 2848 , 26 , 1 ), // #59 + INST(Call , X86Call , O(000000,FF,2,_,_,_,_,_ ), 0 , 1 , 0 , 3009 , 26 , 1 ), // #59 INST(Cbw , X86Op_xAX , O(660000,98,_,_,_,_,_,_ ), 0 , 19 , 0 , 189 , 27 , 0 ), // #60 INST(Cdq , X86Op_xDX_xAX , O(000000,99,_,_,_,_,_,_ ), 0 , 0 , 0 , 193 , 28 , 0 ), // #61 INST(Cdqe , X86Op_xAX , O(000000,98,_,_,1,_,_,_ ), 0 , 20 , 0 , 197 , 29 , 0 ), // #62 @@ -165,1439 +165,1483 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { INST(Clflushopt , X86M_Only , O(660F00,AE,7,_,_,_,_,_ ), 0 , 23 , 0 , 232 , 31 , 21 ), // #68 INST(Clgi , X86Op , O(000F01,DD,_,_,_,_,_,_ ), 0 , 21 , 0 , 243 , 30 , 22 ), // #69 INST(Cli , X86Op , O(000000,FA,_,_,_,_,_,_ ), 0 , 0 , 0 , 248 , 30 , 23 ), // #70 - INST(Clts , X86Op , O(000F00,06,_,_,_,_,_,_ ), 0 , 4 , 0 , 252 , 30 , 0 ), // #71 - INST(Clwb , X86M_Only , O(660F00,AE,6,_,_,_,_,_ ), 0 , 24 , 0 , 257 , 31 , 24 ), // #72 - INST(Clzero , X86Op_MemZAX , O(000F01,FC,_,_,_,_,_,_ ), 0 , 21 , 0 , 262 , 32 , 25 ), // #73 - INST(Cmc , X86Op , O(000000,F5,_,_,_,_,_,_ ), 0 , 0 , 0 , 269 , 30 , 26 ), // #74 - INST(Cmova , X86Rm , O(000F00,47,_,_,x,_,_,_ ), 0 , 4 , 0 , 273 , 22 , 27 ), // #75 - INST(Cmovae , X86Rm , O(000F00,43,_,_,x,_,_,_ ), 0 , 4 , 0 , 279 , 22 , 28 ), // #76 - INST(Cmovb , X86Rm , O(000F00,42,_,_,x,_,_,_ ), 0 , 4 , 0 , 618 , 22 , 28 ), // #77 - INST(Cmovbe , X86Rm , O(000F00,46,_,_,x,_,_,_ ), 0 , 4 , 0 , 625 , 22 , 27 ), // #78 - INST(Cmovc , X86Rm , O(000F00,42,_,_,x,_,_,_ ), 0 , 4 , 0 , 286 , 22 , 28 ), // #79 - INST(Cmove , X86Rm , O(000F00,44,_,_,x,_,_,_ ), 0 , 4 , 0 , 633 , 22 , 29 ), // #80 - INST(Cmovg , X86Rm , O(000F00,4F,_,_,x,_,_,_ ), 0 , 4 , 0 , 292 , 22 , 30 ), // #81 - INST(Cmovge , X86Rm , O(000F00,4D,_,_,x,_,_,_ ), 0 , 4 , 0 , 298 , 22 , 31 ), // #82 - INST(Cmovl , X86Rm , O(000F00,4C,_,_,x,_,_,_ ), 0 , 4 , 0 , 305 , 22 , 31 ), // #83 - INST(Cmovle , X86Rm , O(000F00,4E,_,_,x,_,_,_ ), 0 , 4 , 0 , 311 , 22 , 30 ), // #84 - INST(Cmovna , X86Rm , O(000F00,46,_,_,x,_,_,_ ), 0 , 4 , 0 , 318 , 22 , 27 ), // #85 - INST(Cmovnae , X86Rm , O(000F00,42,_,_,x,_,_,_ ), 0 , 4 , 0 , 325 , 22 , 28 ), // #86 - INST(Cmovnb , X86Rm , O(000F00,43,_,_,x,_,_,_ ), 0 , 4 , 0 , 640 , 22 , 28 ), // #87 - INST(Cmovnbe , X86Rm , O(000F00,47,_,_,x,_,_,_ ), 0 , 4 , 0 , 648 , 22 , 27 ), // #88 - INST(Cmovnc , X86Rm , O(000F00,43,_,_,x,_,_,_ ), 0 , 4 , 0 , 333 , 22 , 28 ), // #89 - INST(Cmovne , X86Rm , O(000F00,45,_,_,x,_,_,_ ), 0 , 4 , 0 , 657 , 22 , 29 ), // #90 - INST(Cmovng , X86Rm , O(000F00,4E,_,_,x,_,_,_ ), 0 , 4 , 0 , 340 , 22 , 30 ), // #91 - INST(Cmovnge , X86Rm , O(000F00,4C,_,_,x,_,_,_ ), 0 , 4 , 0 , 347 , 22 , 31 ), // #92 - INST(Cmovnl , X86Rm , O(000F00,4D,_,_,x,_,_,_ ), 0 , 4 , 0 , 355 , 22 , 31 ), // #93 - INST(Cmovnle , X86Rm , O(000F00,4F,_,_,x,_,_,_ ), 0 , 4 , 0 , 362 , 22 , 30 ), // #94 - INST(Cmovno , X86Rm , O(000F00,41,_,_,x,_,_,_ ), 0 , 4 , 0 , 370 , 22 , 32 ), // #95 - INST(Cmovnp , X86Rm , O(000F00,4B,_,_,x,_,_,_ ), 0 , 4 , 0 , 377 , 22 , 33 ), // #96 - INST(Cmovns , X86Rm , O(000F00,49,_,_,x,_,_,_ ), 0 , 4 , 0 , 384 , 22 , 34 ), // #97 - INST(Cmovnz , X86Rm , O(000F00,45,_,_,x,_,_,_ ), 0 , 4 , 0 , 391 , 22 , 29 ), // #98 - INST(Cmovo , X86Rm , O(000F00,40,_,_,x,_,_,_ ), 0 , 4 , 0 , 398 , 22 , 32 ), // #99 - INST(Cmovp , X86Rm , O(000F00,4A,_,_,x,_,_,_ ), 0 , 4 , 0 , 404 , 22 , 33 ), // #100 - INST(Cmovpe , X86Rm , O(000F00,4A,_,_,x,_,_,_ ), 0 , 4 , 0 , 410 , 22 , 33 ), // #101 - INST(Cmovpo , X86Rm , O(000F00,4B,_,_,x,_,_,_ ), 0 , 4 , 0 , 417 , 22 , 33 ), // #102 - INST(Cmovs , X86Rm , O(000F00,48,_,_,x,_,_,_ ), 0 , 4 , 0 , 424 , 22 , 34 ), // #103 - INST(Cmovz , X86Rm , O(000F00,44,_,_,x,_,_,_ ), 0 , 4 , 0 , 430 , 22 , 29 ), // #104 - INST(Cmp , X86Arith , O(000000,38,7,_,x,_,_,_ ), 0 , 25 , 0 , 436 , 33 , 1 ), // #105 - INST(Cmppd , ExtRmi , O(660F00,C2,_,_,_,_,_,_ ), 0 , 3 , 0 , 3431 , 8 , 4 ), // #106 - INST(Cmpps , ExtRmi , O(000F00,C2,_,_,_,_,_,_ ), 0 , 4 , 0 , 3438 , 8 , 5 ), // #107 - INST(Cmps , X86StrMm , O(000000,A6,_,_,_,_,_,_ ), 0 , 0 , 0 , 440 , 34 , 35 ), // #108 - INST(Cmpsd , ExtRmi , O(F20F00,C2,_,_,_,_,_,_ ), 0 , 5 , 0 , 3445 , 35 , 4 ), // #109 - INST(Cmpss , ExtRmi , O(F30F00,C2,_,_,_,_,_,_ ), 0 , 6 , 0 , 3452 , 36 , 5 ), // #110 - INST(Cmpxchg , X86Cmpxchg , O(000F00,B0,_,_,x,_,_,_ ), 0 , 4 , 0 , 445 , 37 , 36 ), // #111 - INST(Cmpxchg16b , X86Cmpxchg8b_16b , O(000F00,C7,1,_,1,_,_,_ ), 0 , 26 , 0 , 453 , 38 , 37 ), // #112 - INST(Cmpxchg8b , X86Cmpxchg8b_16b , O(000F00,C7,1,_,_,_,_,_ ), 0 , 27 , 0 , 464 , 39 , 38 ), // #113 - INST(Comisd , ExtRm , O(660F00,2F,_,_,_,_,_,_ ), 0 , 3 , 0 , 9930 , 6 , 39 ), // #114 - INST(Comiss , ExtRm , O(000F00,2F,_,_,_,_,_,_ ), 0 , 4 , 0 , 9939 , 7 , 40 ), // #115 - INST(Cpuid , X86Op , O(000F00,A2,_,_,_,_,_,_ ), 0 , 4 , 0 , 474 , 40 , 41 ), // #116 - INST(Cqo , X86Op_xDX_xAX , O(000000,99,_,_,1,_,_,_ ), 0 , 20 , 0 , 480 , 41 , 0 ), // #117 - INST(Crc32 , X86Crc , O(F20F38,F0,_,_,x,_,_,_ ), 0 , 28 , 0 , 484 , 42 , 42 ), // #118 - INST(Cvtdq2pd , ExtRm , O(F30F00,E6,_,_,_,_,_,_ ), 0 , 6 , 0 , 3499 , 6 , 4 ), // #119 - INST(Cvtdq2ps , ExtRm , O(000F00,5B,_,_,_,_,_,_ ), 0 , 4 , 0 , 3509 , 5 , 4 ), // #120 - INST(Cvtpd2dq , ExtRm , O(F20F00,E6,_,_,_,_,_,_ ), 0 , 5 , 0 , 3548 , 5 , 4 ), // #121 - INST(Cvtpd2pi , ExtRm , O(660F00,2D,_,_,_,_,_,_ ), 0 , 3 , 0 , 490 , 43 , 4 ), // #122 - INST(Cvtpd2ps , ExtRm , O(660F00,5A,_,_,_,_,_,_ ), 0 , 3 , 0 , 3558 , 5 , 4 ), // #123 - INST(Cvtpi2pd , ExtRm , O(660F00,2A,_,_,_,_,_,_ ), 0 , 3 , 0 , 499 , 44 , 4 ), // #124 - INST(Cvtpi2ps , ExtRm , O(000F00,2A,_,_,_,_,_,_ ), 0 , 4 , 0 , 508 , 44 , 5 ), // #125 - INST(Cvtps2dq , ExtRm , O(660F00,5B,_,_,_,_,_,_ ), 0 , 3 , 0 , 3610 , 5 , 4 ), // #126 - INST(Cvtps2pd , ExtRm , O(000F00,5A,_,_,_,_,_,_ ), 0 , 4 , 0 , 3620 , 6 , 4 ), // #127 - INST(Cvtps2pi , ExtRm , O(000F00,2D,_,_,_,_,_,_ ), 0 , 4 , 0 , 517 , 45 , 5 ), // #128 - INST(Cvtsd2si , ExtRm_Wx , O(F20F00,2D,_,_,x,_,_,_ ), 0 , 5 , 0 , 3692 , 46 , 4 ), // #129 - INST(Cvtsd2ss , ExtRm , O(F20F00,5A,_,_,_,_,_,_ ), 0 , 5 , 0 , 3702 , 6 , 4 ), // #130 - INST(Cvtsi2sd , ExtRm_Wx , O(F20F00,2A,_,_,x,_,_,_ ), 0 , 5 , 0 , 3723 , 47 , 4 ), // #131 - INST(Cvtsi2ss , ExtRm_Wx , O(F30F00,2A,_,_,x,_,_,_ ), 0 , 6 , 0 , 3733 , 47 , 5 ), // #132 - INST(Cvtss2sd , ExtRm , O(F30F00,5A,_,_,_,_,_,_ ), 0 , 6 , 0 , 3743 , 7 , 4 ), // #133 - INST(Cvtss2si , ExtRm_Wx , O(F30F00,2D,_,_,x,_,_,_ ), 0 , 6 , 0 , 3753 , 48 , 5 ), // #134 - INST(Cvttpd2dq , ExtRm , O(660F00,E6,_,_,_,_,_,_ ), 0 , 3 , 0 , 3774 , 5 , 4 ), // #135 - INST(Cvttpd2pi , ExtRm , O(660F00,2C,_,_,_,_,_,_ ), 0 , 3 , 0 , 526 , 43 , 4 ), // #136 - INST(Cvttps2dq , ExtRm , O(F30F00,5B,_,_,_,_,_,_ ), 0 , 6 , 0 , 3820 , 5 , 4 ), // #137 - INST(Cvttps2pi , ExtRm , O(000F00,2C,_,_,_,_,_,_ ), 0 , 4 , 0 , 536 , 45 , 5 ), // #138 - INST(Cvttsd2si , ExtRm_Wx , O(F20F00,2C,_,_,x,_,_,_ ), 0 , 5 , 0 , 3866 , 46 , 4 ), // #139 - INST(Cvttss2si , ExtRm_Wx , O(F30F00,2C,_,_,x,_,_,_ ), 0 , 6 , 0 , 3889 , 48 , 5 ), // #140 - INST(Cwd , X86Op_xDX_xAX , O(660000,99,_,_,_,_,_,_ ), 0 , 19 , 0 , 546 , 49 , 0 ), // #141 - INST(Cwde , X86Op_xAX , O(000000,98,_,_,_,_,_,_ ), 0 , 0 , 0 , 550 , 50 , 0 ), // #142 - INST(Daa , X86Op , O(000000,27,_,_,_,_,_,_ ), 0 , 0 , 0 , 555 , 1 , 1 ), // #143 - INST(Das , X86Op , O(000000,2F,_,_,_,_,_,_ ), 0 , 0 , 0 , 559 , 1 , 1 ), // #144 - INST(Dec , X86IncDec , O(000000,FE,1,_,x,_,_,_ ), O(000000,48,_,_,x,_,_,_ ), 29 , 6 , 3013 , 51 , 43 ), // #145 - INST(Div , X86M_GPB_MulDiv , O(000000,F6,6,_,x,_,_,_ ), 0 , 30 , 0 , 780 , 52 , 1 ), // #146 - INST(Divpd , ExtRm , O(660F00,5E,_,_,_,_,_,_ ), 0 , 3 , 0 , 3988 , 5 , 4 ), // #147 - INST(Divps , ExtRm , O(000F00,5E,_,_,_,_,_,_ ), 0 , 4 , 0 , 3995 , 5 , 5 ), // #148 - INST(Divsd , ExtRm , O(F20F00,5E,_,_,_,_,_,_ ), 0 , 5 , 0 , 4002 , 6 , 4 ), // #149 - INST(Divss , ExtRm , O(F30F00,5E,_,_,_,_,_,_ ), 0 , 6 , 0 , 4009 , 7 , 5 ), // #150 - INST(Dppd , ExtRmi , O(660F3A,41,_,_,_,_,_,_ ), 0 , 8 , 0 , 4026 , 8 , 12 ), // #151 - INST(Dpps , ExtRmi , O(660F3A,40,_,_,_,_,_,_ ), 0 , 8 , 0 , 4032 , 8 , 12 ), // #152 - INST(Emms , X86Op , O(000F00,77,_,_,_,_,_,_ ), 0 , 4 , 0 , 748 , 53 , 44 ), // #153 - INST(Enqcmd , X86EnqcmdMovdir64b , O(F20F38,F8,_,_,_,_,_,_ ), 0 , 28 , 0 , 563 , 54 , 45 ), // #154 - INST(Enqcmds , X86EnqcmdMovdir64b , O(F30F38,F8,_,_,_,_,_,_ ), 0 , 7 , 0 , 570 , 54 , 45 ), // #155 - INST(Enter , X86Enter , O(000000,C8,_,_,_,_,_,_ ), 0 , 0 , 0 , 2856 , 55 , 0 ), // #156 - INST(Extractps , ExtExtract , O(660F3A,17,_,_,_,_,_,_ ), 0 , 8 , 0 , 4222 , 56 , 12 ), // #157 - INST(Extrq , ExtExtrq , O(660F00,79,_,_,_,_,_,_ ), O(660F00,78,0,_,_,_,_,_ ), 3 , 7 , 7290 , 57 , 46 ), // #158 - INST(F2xm1 , FpuOp , O_FPU(00,D9F0,_) , 0 , 31 , 0 , 578 , 30 , 0 ), // #159 - INST(Fabs , FpuOp , O_FPU(00,D9E1,_) , 0 , 31 , 0 , 584 , 30 , 0 ), // #160 - INST(Fadd , FpuArith , O_FPU(00,C0C0,0) , 0 , 32 , 0 , 2067 , 58 , 0 ), // #161 - INST(Faddp , FpuRDef , O_FPU(00,DEC0,_) , 0 , 33 , 0 , 589 , 59 , 0 ), // #162 - INST(Fbld , X86M_Only , O_FPU(00,00DF,4) , 0 , 34 , 0 , 595 , 60 , 0 ), // #163 - INST(Fbstp , X86M_Only , O_FPU(00,00DF,6) , 0 , 35 , 0 , 600 , 60 , 0 ), // #164 - INST(Fchs , FpuOp , O_FPU(00,D9E0,_) , 0 , 31 , 0 , 606 , 30 , 0 ), // #165 - INST(Fclex , FpuOp , O_FPU(9B,DBE2,_) , 0 , 36 , 0 , 611 , 30 , 0 ), // #166 - INST(Fcmovb , FpuR , O_FPU(00,DAC0,_) , 0 , 37 , 0 , 617 , 61 , 28 ), // #167 - INST(Fcmovbe , FpuR , O_FPU(00,DAD0,_) , 0 , 37 , 0 , 624 , 61 , 27 ), // #168 - INST(Fcmove , FpuR , O_FPU(00,DAC8,_) , 0 , 37 , 0 , 632 , 61 , 29 ), // #169 - INST(Fcmovnb , FpuR , O_FPU(00,DBC0,_) , 0 , 38 , 0 , 639 , 61 , 28 ), // #170 - INST(Fcmovnbe , FpuR , O_FPU(00,DBD0,_) , 0 , 38 , 0 , 647 , 61 , 27 ), // #171 - INST(Fcmovne , FpuR , O_FPU(00,DBC8,_) , 0 , 38 , 0 , 656 , 61 , 29 ), // #172 - INST(Fcmovnu , FpuR , O_FPU(00,DBD8,_) , 0 , 38 , 0 , 664 , 61 , 33 ), // #173 - INST(Fcmovu , FpuR , O_FPU(00,DAD8,_) , 0 , 37 , 0 , 672 , 61 , 33 ), // #174 - INST(Fcom , FpuCom , O_FPU(00,D0D0,2) , 0 , 39 , 0 , 679 , 62 , 0 ), // #175 - INST(Fcomi , FpuR , O_FPU(00,DBF0,_) , 0 , 38 , 0 , 684 , 61 , 47 ), // #176 - INST(Fcomip , FpuR , O_FPU(00,DFF0,_) , 0 , 40 , 0 , 690 , 61 , 47 ), // #177 - INST(Fcomp , FpuCom , O_FPU(00,D8D8,3) , 0 , 41 , 0 , 697 , 62 , 0 ), // #178 - INST(Fcompp , FpuOp , O_FPU(00,DED9,_) , 0 , 33 , 0 , 703 , 30 , 0 ), // #179 - INST(Fcos , FpuOp , O_FPU(00,D9FF,_) , 0 , 31 , 0 , 710 , 30 , 0 ), // #180 - INST(Fdecstp , FpuOp , O_FPU(00,D9F6,_) , 0 , 31 , 0 , 715 , 30 , 0 ), // #181 - INST(Fdiv , FpuArith , O_FPU(00,F0F8,6) , 0 , 42 , 0 , 723 , 58 , 0 ), // #182 - INST(Fdivp , FpuRDef , O_FPU(00,DEF8,_) , 0 , 33 , 0 , 728 , 59 , 0 ), // #183 - INST(Fdivr , FpuArith , O_FPU(00,F8F0,7) , 0 , 43 , 0 , 734 , 58 , 0 ), // #184 - INST(Fdivrp , FpuRDef , O_FPU(00,DEF0,_) , 0 , 33 , 0 , 740 , 59 , 0 ), // #185 - INST(Femms , X86Op , O(000F00,0E,_,_,_,_,_,_ ), 0 , 4 , 0 , 747 , 30 , 48 ), // #186 - INST(Ffree , FpuR , O_FPU(00,DDC0,_) , 0 , 44 , 0 , 753 , 61 , 0 ), // #187 - INST(Fiadd , FpuM , O_FPU(00,00DA,0) , 0 , 45 , 0 , 759 , 63 , 0 ), // #188 - INST(Ficom , FpuM , O_FPU(00,00DA,2) , 0 , 46 , 0 , 765 , 63 , 0 ), // #189 - INST(Ficomp , FpuM , O_FPU(00,00DA,3) , 0 , 47 , 0 , 771 , 63 , 0 ), // #190 - INST(Fidiv , FpuM , O_FPU(00,00DA,6) , 0 , 35 , 0 , 778 , 63 , 0 ), // #191 - INST(Fidivr , FpuM , O_FPU(00,00DA,7) , 0 , 48 , 0 , 784 , 63 , 0 ), // #192 - INST(Fild , FpuM , O_FPU(00,00DB,0) , O_FPU(00,00DF,5) , 45 , 8 , 791 , 64 , 0 ), // #193 - INST(Fimul , FpuM , O_FPU(00,00DA,1) , 0 , 49 , 0 , 796 , 63 , 0 ), // #194 - INST(Fincstp , FpuOp , O_FPU(00,D9F7,_) , 0 , 31 , 0 , 802 , 30 , 0 ), // #195 - INST(Finit , FpuOp , O_FPU(9B,DBE3,_) , 0 , 36 , 0 , 810 , 30 , 0 ), // #196 - INST(Fist , FpuM , O_FPU(00,00DB,2) , 0 , 46 , 0 , 816 , 63 , 0 ), // #197 - INST(Fistp , FpuM , O_FPU(00,00DB,3) , O_FPU(00,00DF,7) , 47 , 9 , 821 , 64 , 0 ), // #198 - INST(Fisttp , FpuM , O_FPU(00,00DB,1) , O_FPU(00,00DD,1) , 49 , 10 , 827 , 64 , 6 ), // #199 - INST(Fisub , FpuM , O_FPU(00,00DA,4) , 0 , 34 , 0 , 834 , 63 , 0 ), // #200 - INST(Fisubr , FpuM , O_FPU(00,00DA,5) , 0 , 50 , 0 , 840 , 63 , 0 ), // #201 - INST(Fld , FpuFldFst , O_FPU(00,00D9,0) , O_FPU(00,00DB,5) , 45 , 11 , 847 , 65 , 0 ), // #202 - INST(Fld1 , FpuOp , O_FPU(00,D9E8,_) , 0 , 31 , 0 , 851 , 30 , 0 ), // #203 - INST(Fldcw , X86M_Only , O_FPU(00,00D9,5) , 0 , 50 , 0 , 856 , 66 , 0 ), // #204 - INST(Fldenv , X86M_Only , O_FPU(00,00D9,4) , 0 , 34 , 0 , 862 , 31 , 0 ), // #205 - INST(Fldl2e , FpuOp , O_FPU(00,D9EA,_) , 0 , 31 , 0 , 869 , 30 , 0 ), // #206 - INST(Fldl2t , FpuOp , O_FPU(00,D9E9,_) , 0 , 31 , 0 , 876 , 30 , 0 ), // #207 - INST(Fldlg2 , FpuOp , O_FPU(00,D9EC,_) , 0 , 31 , 0 , 883 , 30 , 0 ), // #208 - INST(Fldln2 , FpuOp , O_FPU(00,D9ED,_) , 0 , 31 , 0 , 890 , 30 , 0 ), // #209 - INST(Fldpi , FpuOp , O_FPU(00,D9EB,_) , 0 , 31 , 0 , 897 , 30 , 0 ), // #210 - INST(Fldz , FpuOp , O_FPU(00,D9EE,_) , 0 , 31 , 0 , 903 , 30 , 0 ), // #211 - INST(Fmul , FpuArith , O_FPU(00,C8C8,1) , 0 , 51 , 0 , 2109 , 58 , 0 ), // #212 - INST(Fmulp , FpuRDef , O_FPU(00,DEC8,_) , 0 , 33 , 0 , 908 , 59 , 0 ), // #213 - INST(Fnclex , FpuOp , O_FPU(00,DBE2,_) , 0 , 38 , 0 , 914 , 30 , 0 ), // #214 - INST(Fninit , FpuOp , O_FPU(00,DBE3,_) , 0 , 38 , 0 , 921 , 30 , 0 ), // #215 - INST(Fnop , FpuOp , O_FPU(00,D9D0,_) , 0 , 31 , 0 , 928 , 30 , 0 ), // #216 - INST(Fnsave , X86M_Only , O_FPU(00,00DD,6) , 0 , 35 , 0 , 933 , 31 , 0 ), // #217 - INST(Fnstcw , X86M_Only , O_FPU(00,00D9,7) , 0 , 48 , 0 , 940 , 66 , 0 ), // #218 - INST(Fnstenv , X86M_Only , O_FPU(00,00D9,6) , 0 , 35 , 0 , 947 , 31 , 0 ), // #219 - INST(Fnstsw , FpuStsw , O_FPU(00,00DD,7) , O_FPU(00,DFE0,_) , 48 , 12 , 955 , 67 , 0 ), // #220 - INST(Fpatan , FpuOp , O_FPU(00,D9F3,_) , 0 , 31 , 0 , 962 , 30 , 0 ), // #221 - INST(Fprem , FpuOp , O_FPU(00,D9F8,_) , 0 , 31 , 0 , 969 , 30 , 0 ), // #222 - INST(Fprem1 , FpuOp , O_FPU(00,D9F5,_) , 0 , 31 , 0 , 975 , 30 , 0 ), // #223 - INST(Fptan , FpuOp , O_FPU(00,D9F2,_) , 0 , 31 , 0 , 982 , 30 , 0 ), // #224 - INST(Frndint , FpuOp , O_FPU(00,D9FC,_) , 0 , 31 , 0 , 988 , 30 , 0 ), // #225 - INST(Frstor , X86M_Only , O_FPU(00,00DD,4) , 0 , 34 , 0 , 996 , 31 , 0 ), // #226 - INST(Fsave , X86M_Only , O_FPU(9B,00DD,6) , 0 , 52 , 0 , 1003 , 31 , 0 ), // #227 - INST(Fscale , FpuOp , O_FPU(00,D9FD,_) , 0 , 31 , 0 , 1009 , 30 , 0 ), // #228 - INST(Fsin , FpuOp , O_FPU(00,D9FE,_) , 0 , 31 , 0 , 1016 , 30 , 0 ), // #229 - INST(Fsincos , FpuOp , O_FPU(00,D9FB,_) , 0 , 31 , 0 , 1021 , 30 , 0 ), // #230 - INST(Fsqrt , FpuOp , O_FPU(00,D9FA,_) , 0 , 31 , 0 , 1029 , 30 , 0 ), // #231 - INST(Fst , FpuFldFst , O_FPU(00,00D9,2) , 0 , 46 , 0 , 1035 , 68 , 0 ), // #232 - INST(Fstcw , X86M_Only , O_FPU(9B,00D9,7) , 0 , 53 , 0 , 1039 , 66 , 0 ), // #233 - INST(Fstenv , X86M_Only , O_FPU(9B,00D9,6) , 0 , 52 , 0 , 1045 , 31 , 0 ), // #234 - INST(Fstp , FpuFldFst , O_FPU(00,00D9,3) , O(000000,DB,7,_,_,_,_,_ ), 47 , 13 , 1052 , 65 , 0 ), // #235 - INST(Fstsw , FpuStsw , O_FPU(9B,00DD,7) , O_FPU(9B,DFE0,_) , 53 , 14 , 1057 , 67 , 0 ), // #236 - INST(Fsub , FpuArith , O_FPU(00,E0E8,4) , 0 , 54 , 0 , 2187 , 58 , 0 ), // #237 - INST(Fsubp , FpuRDef , O_FPU(00,DEE8,_) , 0 , 33 , 0 , 1063 , 59 , 0 ), // #238 - INST(Fsubr , FpuArith , O_FPU(00,E8E0,5) , 0 , 55 , 0 , 2193 , 58 , 0 ), // #239 - INST(Fsubrp , FpuRDef , O_FPU(00,DEE0,_) , 0 , 33 , 0 , 1069 , 59 , 0 ), // #240 - INST(Ftst , FpuOp , O_FPU(00,D9E4,_) , 0 , 31 , 0 , 1076 , 30 , 0 ), // #241 - INST(Fucom , FpuRDef , O_FPU(00,DDE0,_) , 0 , 44 , 0 , 1081 , 59 , 0 ), // #242 - INST(Fucomi , FpuR , O_FPU(00,DBE8,_) , 0 , 38 , 0 , 1087 , 61 , 47 ), // #243 - INST(Fucomip , FpuR , O_FPU(00,DFE8,_) , 0 , 40 , 0 , 1094 , 61 , 47 ), // #244 - INST(Fucomp , FpuRDef , O_FPU(00,DDE8,_) , 0 , 44 , 0 , 1102 , 59 , 0 ), // #245 - INST(Fucompp , FpuOp , O_FPU(00,DAE9,_) , 0 , 37 , 0 , 1109 , 30 , 0 ), // #246 - INST(Fwait , X86Op , O_FPU(00,009B,_) , 0 , 56 , 0 , 1117 , 30 , 0 ), // #247 - INST(Fxam , FpuOp , O_FPU(00,D9E5,_) , 0 , 31 , 0 , 1123 , 30 , 0 ), // #248 - INST(Fxch , FpuR , O_FPU(00,D9C8,_) , 0 , 31 , 0 , 1128 , 59 , 0 ), // #249 - INST(Fxrstor , X86M_Only , O(000F00,AE,1,_,_,_,_,_ ), 0 , 27 , 0 , 1133 , 31 , 49 ), // #250 - INST(Fxrstor64 , X86M_Only , O(000F00,AE,1,_,1,_,_,_ ), 0 , 26 , 0 , 1141 , 69 , 49 ), // #251 - INST(Fxsave , X86M_Only , O(000F00,AE,0,_,_,_,_,_ ), 0 , 4 , 0 , 1151 , 31 , 49 ), // #252 - INST(Fxsave64 , X86M_Only , O(000F00,AE,0,_,1,_,_,_ ), 0 , 57 , 0 , 1158 , 69 , 49 ), // #253 - INST(Fxtract , FpuOp , O_FPU(00,D9F4,_) , 0 , 31 , 0 , 1167 , 30 , 0 ), // #254 - INST(Fyl2x , FpuOp , O_FPU(00,D9F1,_) , 0 , 31 , 0 , 1175 , 30 , 0 ), // #255 - INST(Fyl2xp1 , FpuOp , O_FPU(00,D9F9,_) , 0 , 31 , 0 , 1181 , 30 , 0 ), // #256 - INST(Getsec , X86Op , O(000F00,37,_,_,_,_,_,_ ), 0 , 4 , 0 , 1189 , 30 , 50 ), // #257 - INST(Gf2p8affineinvqb , ExtRmi , O(660F3A,CF,_,_,_,_,_,_ ), 0 , 8 , 0 , 5577 , 8 , 51 ), // #258 - INST(Gf2p8affineqb , ExtRmi , O(660F3A,CE,_,_,_,_,_,_ ), 0 , 8 , 0 , 5595 , 8 , 51 ), // #259 - INST(Gf2p8mulb , ExtRm , O(660F38,CF,_,_,_,_,_,_ ), 0 , 2 , 0 , 5610 , 5 , 51 ), // #260 - INST(Haddpd , ExtRm , O(660F00,7C,_,_,_,_,_,_ ), 0 , 3 , 0 , 5621 , 5 , 6 ), // #261 - INST(Haddps , ExtRm , O(F20F00,7C,_,_,_,_,_,_ ), 0 , 5 , 0 , 5629 , 5 , 6 ), // #262 - INST(Hlt , X86Op , O(000000,F4,_,_,_,_,_,_ ), 0 , 0 , 0 , 1196 , 30 , 0 ), // #263 - INST(Hsubpd , ExtRm , O(660F00,7D,_,_,_,_,_,_ ), 0 , 3 , 0 , 5637 , 5 , 6 ), // #264 - INST(Hsubps , ExtRm , O(F20F00,7D,_,_,_,_,_,_ ), 0 , 5 , 0 , 5645 , 5 , 6 ), // #265 - INST(Idiv , X86M_GPB_MulDiv , O(000000,F6,7,_,x,_,_,_ ), 0 , 25 , 0 , 779 , 52 , 1 ), // #266 - INST(Imul , X86Imul , O(000000,F6,5,_,x,_,_,_ ), 0 , 58 , 0 , 797 , 70 , 1 ), // #267 - INST(In , X86In , O(000000,EC,_,_,_,_,_,_ ), O(000000,E4,_,_,_,_,_,_ ), 0 , 15 , 10076, 71 , 0 ), // #268 - INST(Inc , X86IncDec , O(000000,FE,0,_,x,_,_,_ ), O(000000,40,_,_,x,_,_,_ ), 0 , 16 , 1200 , 51 , 43 ), // #269 - INST(Ins , X86Ins , O(000000,6C,_,_,_,_,_,_ ), 0 , 0 , 0 , 1857 , 72 , 0 ), // #270 - INST(Insertps , ExtRmi , O(660F3A,21,_,_,_,_,_,_ ), 0 , 8 , 0 , 5781 , 36 , 12 ), // #271 - INST(Insertq , ExtInsertq , O(F20F00,79,_,_,_,_,_,_ ), O(F20F00,78,_,_,_,_,_,_ ), 5 , 17 , 1204 , 73 , 46 ), // #272 - INST(Int , X86Int , O(000000,CD,_,_,_,_,_,_ ), 0 , 0 , 0 , 992 , 74 , 0 ), // #273 - INST(Int3 , X86Op , O(000000,CC,_,_,_,_,_,_ ), 0 , 0 , 0 , 1212 , 30 , 0 ), // #274 - INST(Into , X86Op , O(000000,CE,_,_,_,_,_,_ ), 0 , 0 , 0 , 1217 , 75 , 52 ), // #275 - INST(Invd , X86Op , O(000F00,08,_,_,_,_,_,_ ), 0 , 4 , 0 , 10031, 30 , 41 ), // #276 - INST(Invept , X86Rm_NoSize , O(660F38,80,_,_,_,_,_,_ ), 0 , 2 , 0 , 1222 , 76 , 53 ), // #277 - INST(Invlpg , X86M_Only , O(000F00,01,7,_,_,_,_,_ ), 0 , 22 , 0 , 1229 , 31 , 41 ), // #278 - INST(Invlpga , X86Op_xAddr , O(000F01,DF,_,_,_,_,_,_ ), 0 , 21 , 0 , 1236 , 77 , 22 ), // #279 - INST(Invpcid , X86Rm_NoSize , O(660F38,82,_,_,_,_,_,_ ), 0 , 2 , 0 , 1244 , 76 , 41 ), // #280 - INST(Invvpid , X86Rm_NoSize , O(660F38,81,_,_,_,_,_,_ ), 0 , 2 , 0 , 1252 , 76 , 53 ), // #281 - INST(Iret , X86Op , O(000000,CF,_,_,_,_,_,_ ), 0 , 0 , 0 , 1260 , 78 , 1 ), // #282 - INST(Iretd , X86Op , O(000000,CF,_,_,_,_,_,_ ), 0 , 0 , 0 , 1265 , 78 , 1 ), // #283 - INST(Iretq , X86Op , O(000000,CF,_,_,1,_,_,_ ), 0 , 20 , 0 , 1271 , 79 , 1 ), // #284 - INST(Iretw , X86Op , O(660000,CF,_,_,_,_,_,_ ), 0 , 19 , 0 , 1277 , 78 , 1 ), // #285 - INST(Ja , X86Jcc , O(000F00,87,_,_,_,_,_,_ ), O(000000,77,_,_,_,_,_,_ ), 4 , 18 , 1283 , 80 , 54 ), // #286 - INST(Jae , X86Jcc , O(000F00,83,_,_,_,_,_,_ ), O(000000,73,_,_,_,_,_,_ ), 4 , 19 , 1286 , 80 , 55 ), // #287 - INST(Jb , X86Jcc , O(000F00,82,_,_,_,_,_,_ ), O(000000,72,_,_,_,_,_,_ ), 4 , 20 , 1290 , 80 , 55 ), // #288 - INST(Jbe , X86Jcc , O(000F00,86,_,_,_,_,_,_ ), O(000000,76,_,_,_,_,_,_ ), 4 , 21 , 1293 , 80 , 54 ), // #289 - INST(Jc , X86Jcc , O(000F00,82,_,_,_,_,_,_ ), O(000000,72,_,_,_,_,_,_ ), 4 , 20 , 1297 , 80 , 55 ), // #290 - INST(Je , X86Jcc , O(000F00,84,_,_,_,_,_,_ ), O(000000,74,_,_,_,_,_,_ ), 4 , 22 , 1300 , 80 , 56 ), // #291 - INST(Jecxz , X86JecxzLoop , 0 , O(000000,E3,_,_,_,_,_,_ ), 0 , 23 , 1303 , 81 , 0 ), // #292 - INST(Jg , X86Jcc , O(000F00,8F,_,_,_,_,_,_ ), O(000000,7F,_,_,_,_,_,_ ), 4 , 24 , 1309 , 80 , 57 ), // #293 - INST(Jge , X86Jcc , O(000F00,8D,_,_,_,_,_,_ ), O(000000,7D,_,_,_,_,_,_ ), 4 , 25 , 1312 , 80 , 58 ), // #294 - INST(Jl , X86Jcc , O(000F00,8C,_,_,_,_,_,_ ), O(000000,7C,_,_,_,_,_,_ ), 4 , 26 , 1316 , 80 , 58 ), // #295 - INST(Jle , X86Jcc , O(000F00,8E,_,_,_,_,_,_ ), O(000000,7E,_,_,_,_,_,_ ), 4 , 27 , 1319 , 80 , 57 ), // #296 - INST(Jmp , X86Jmp , O(000000,FF,4,_,_,_,_,_ ), O(000000,EB,_,_,_,_,_,_ ), 9 , 28 , 1323 , 82 , 0 ), // #297 - INST(Jna , X86Jcc , O(000F00,86,_,_,_,_,_,_ ), O(000000,76,_,_,_,_,_,_ ), 4 , 21 , 1327 , 80 , 54 ), // #298 - INST(Jnae , X86Jcc , O(000F00,82,_,_,_,_,_,_ ), O(000000,72,_,_,_,_,_,_ ), 4 , 20 , 1331 , 80 , 55 ), // #299 - INST(Jnb , X86Jcc , O(000F00,83,_,_,_,_,_,_ ), O(000000,73,_,_,_,_,_,_ ), 4 , 19 , 1336 , 80 , 55 ), // #300 - INST(Jnbe , X86Jcc , O(000F00,87,_,_,_,_,_,_ ), O(000000,77,_,_,_,_,_,_ ), 4 , 18 , 1340 , 80 , 54 ), // #301 - INST(Jnc , X86Jcc , O(000F00,83,_,_,_,_,_,_ ), O(000000,73,_,_,_,_,_,_ ), 4 , 19 , 1345 , 80 , 55 ), // #302 - INST(Jne , X86Jcc , O(000F00,85,_,_,_,_,_,_ ), O(000000,75,_,_,_,_,_,_ ), 4 , 29 , 1349 , 80 , 56 ), // #303 - INST(Jng , X86Jcc , O(000F00,8E,_,_,_,_,_,_ ), O(000000,7E,_,_,_,_,_,_ ), 4 , 27 , 1353 , 80 , 57 ), // #304 - INST(Jnge , X86Jcc , O(000F00,8C,_,_,_,_,_,_ ), O(000000,7C,_,_,_,_,_,_ ), 4 , 26 , 1357 , 80 , 58 ), // #305 - INST(Jnl , X86Jcc , O(000F00,8D,_,_,_,_,_,_ ), O(000000,7D,_,_,_,_,_,_ ), 4 , 25 , 1362 , 80 , 58 ), // #306 - INST(Jnle , X86Jcc , O(000F00,8F,_,_,_,_,_,_ ), O(000000,7F,_,_,_,_,_,_ ), 4 , 24 , 1366 , 80 , 57 ), // #307 - INST(Jno , X86Jcc , O(000F00,81,_,_,_,_,_,_ ), O(000000,71,_,_,_,_,_,_ ), 4 , 30 , 1371 , 80 , 52 ), // #308 - INST(Jnp , X86Jcc , O(000F00,8B,_,_,_,_,_,_ ), O(000000,7B,_,_,_,_,_,_ ), 4 , 31 , 1375 , 80 , 59 ), // #309 - INST(Jns , X86Jcc , O(000F00,89,_,_,_,_,_,_ ), O(000000,79,_,_,_,_,_,_ ), 4 , 32 , 1379 , 80 , 60 ), // #310 - INST(Jnz , X86Jcc , O(000F00,85,_,_,_,_,_,_ ), O(000000,75,_,_,_,_,_,_ ), 4 , 29 , 1383 , 80 , 56 ), // #311 - INST(Jo , X86Jcc , O(000F00,80,_,_,_,_,_,_ ), O(000000,70,_,_,_,_,_,_ ), 4 , 33 , 1387 , 80 , 52 ), // #312 - INST(Jp , X86Jcc , O(000F00,8A,_,_,_,_,_,_ ), O(000000,7A,_,_,_,_,_,_ ), 4 , 34 , 1390 , 80 , 59 ), // #313 - INST(Jpe , X86Jcc , O(000F00,8A,_,_,_,_,_,_ ), O(000000,7A,_,_,_,_,_,_ ), 4 , 34 , 1393 , 80 , 59 ), // #314 - INST(Jpo , X86Jcc , O(000F00,8B,_,_,_,_,_,_ ), O(000000,7B,_,_,_,_,_,_ ), 4 , 31 , 1397 , 80 , 59 ), // #315 - INST(Js , X86Jcc , O(000F00,88,_,_,_,_,_,_ ), O(000000,78,_,_,_,_,_,_ ), 4 , 35 , 1401 , 80 , 60 ), // #316 - INST(Jz , X86Jcc , O(000F00,84,_,_,_,_,_,_ ), O(000000,74,_,_,_,_,_,_ ), 4 , 22 , 1404 , 80 , 56 ), // #317 - INST(Kaddb , VexRvm , V(660F00,4A,_,1,0,_,_,_ ), 0 , 59 , 0 , 1407 , 83 , 61 ), // #318 - INST(Kaddd , VexRvm , V(660F00,4A,_,1,1,_,_,_ ), 0 , 60 , 0 , 1413 , 83 , 62 ), // #319 - INST(Kaddq , VexRvm , V(000F00,4A,_,1,1,_,_,_ ), 0 , 61 , 0 , 1419 , 83 , 62 ), // #320 - INST(Kaddw , VexRvm , V(000F00,4A,_,1,0,_,_,_ ), 0 , 62 , 0 , 1425 , 83 , 61 ), // #321 - INST(Kandb , VexRvm , V(660F00,41,_,1,0,_,_,_ ), 0 , 59 , 0 , 1431 , 83 , 61 ), // #322 - INST(Kandd , VexRvm , V(660F00,41,_,1,1,_,_,_ ), 0 , 60 , 0 , 1437 , 83 , 62 ), // #323 - INST(Kandnb , VexRvm , V(660F00,42,_,1,0,_,_,_ ), 0 , 59 , 0 , 1443 , 83 , 61 ), // #324 - INST(Kandnd , VexRvm , V(660F00,42,_,1,1,_,_,_ ), 0 , 60 , 0 , 1450 , 83 , 62 ), // #325 - INST(Kandnq , VexRvm , V(000F00,42,_,1,1,_,_,_ ), 0 , 61 , 0 , 1457 , 83 , 62 ), // #326 - INST(Kandnw , VexRvm , V(000F00,42,_,1,0,_,_,_ ), 0 , 62 , 0 , 1464 , 83 , 63 ), // #327 - INST(Kandq , VexRvm , V(000F00,41,_,1,1,_,_,_ ), 0 , 61 , 0 , 1471 , 83 , 62 ), // #328 - INST(Kandw , VexRvm , V(000F00,41,_,1,0,_,_,_ ), 0 , 62 , 0 , 1477 , 83 , 63 ), // #329 - INST(Kmovb , VexKmov , V(660F00,90,_,0,0,_,_,_ ), V(660F00,92,_,0,0,_,_,_ ), 63 , 36 , 1483 , 84 , 61 ), // #330 - INST(Kmovd , VexKmov , V(660F00,90,_,0,1,_,_,_ ), V(F20F00,92,_,0,0,_,_,_ ), 64 , 37 , 7770 , 85 , 62 ), // #331 - INST(Kmovq , VexKmov , V(000F00,90,_,0,1,_,_,_ ), V(F20F00,92,_,0,1,_,_,_ ), 65 , 38 , 7781 , 86 , 62 ), // #332 - INST(Kmovw , VexKmov , V(000F00,90,_,0,0,_,_,_ ), V(000F00,92,_,0,0,_,_,_ ), 66 , 39 , 1489 , 87 , 63 ), // #333 - INST(Knotb , VexRm , V(660F00,44,_,0,0,_,_,_ ), 0 , 63 , 0 , 1495 , 88 , 61 ), // #334 - INST(Knotd , VexRm , V(660F00,44,_,0,1,_,_,_ ), 0 , 64 , 0 , 1501 , 88 , 62 ), // #335 - INST(Knotq , VexRm , V(000F00,44,_,0,1,_,_,_ ), 0 , 65 , 0 , 1507 , 88 , 62 ), // #336 - INST(Knotw , VexRm , V(000F00,44,_,0,0,_,_,_ ), 0 , 66 , 0 , 1513 , 88 , 63 ), // #337 - INST(Korb , VexRvm , V(660F00,45,_,1,0,_,_,_ ), 0 , 59 , 0 , 1519 , 83 , 61 ), // #338 - INST(Kord , VexRvm , V(660F00,45,_,1,1,_,_,_ ), 0 , 60 , 0 , 1524 , 83 , 62 ), // #339 - INST(Korq , VexRvm , V(000F00,45,_,1,1,_,_,_ ), 0 , 61 , 0 , 1529 , 83 , 62 ), // #340 - INST(Kortestb , VexRm , V(660F00,98,_,0,0,_,_,_ ), 0 , 63 , 0 , 1534 , 88 , 64 ), // #341 - INST(Kortestd , VexRm , V(660F00,98,_,0,1,_,_,_ ), 0 , 64 , 0 , 1543 , 88 , 65 ), // #342 - INST(Kortestq , VexRm , V(000F00,98,_,0,1,_,_,_ ), 0 , 65 , 0 , 1552 , 88 , 65 ), // #343 - INST(Kortestw , VexRm , V(000F00,98,_,0,0,_,_,_ ), 0 , 66 , 0 , 1561 , 88 , 66 ), // #344 - INST(Korw , VexRvm , V(000F00,45,_,1,0,_,_,_ ), 0 , 62 , 0 , 1570 , 83 , 63 ), // #345 - INST(Kshiftlb , VexRmi , V(660F3A,32,_,0,0,_,_,_ ), 0 , 67 , 0 , 1575 , 89 , 61 ), // #346 - INST(Kshiftld , VexRmi , V(660F3A,33,_,0,0,_,_,_ ), 0 , 67 , 0 , 1584 , 89 , 62 ), // #347 - INST(Kshiftlq , VexRmi , V(660F3A,33,_,0,1,_,_,_ ), 0 , 68 , 0 , 1593 , 89 , 62 ), // #348 - INST(Kshiftlw , VexRmi , V(660F3A,32,_,0,1,_,_,_ ), 0 , 68 , 0 , 1602 , 89 , 63 ), // #349 - INST(Kshiftrb , VexRmi , V(660F3A,30,_,0,0,_,_,_ ), 0 , 67 , 0 , 1611 , 89 , 61 ), // #350 - INST(Kshiftrd , VexRmi , V(660F3A,31,_,0,0,_,_,_ ), 0 , 67 , 0 , 1620 , 89 , 62 ), // #351 - INST(Kshiftrq , VexRmi , V(660F3A,31,_,0,1,_,_,_ ), 0 , 68 , 0 , 1629 , 89 , 62 ), // #352 - INST(Kshiftrw , VexRmi , V(660F3A,30,_,0,1,_,_,_ ), 0 , 68 , 0 , 1638 , 89 , 63 ), // #353 - INST(Ktestb , VexRm , V(660F00,99,_,0,0,_,_,_ ), 0 , 63 , 0 , 1647 , 88 , 64 ), // #354 - INST(Ktestd , VexRm , V(660F00,99,_,0,1,_,_,_ ), 0 , 64 , 0 , 1654 , 88 , 65 ), // #355 - INST(Ktestq , VexRm , V(000F00,99,_,0,1,_,_,_ ), 0 , 65 , 0 , 1661 , 88 , 65 ), // #356 - INST(Ktestw , VexRm , V(000F00,99,_,0,0,_,_,_ ), 0 , 66 , 0 , 1668 , 88 , 64 ), // #357 - INST(Kunpckbw , VexRvm , V(660F00,4B,_,1,0,_,_,_ ), 0 , 59 , 0 , 1675 , 83 , 63 ), // #358 - INST(Kunpckdq , VexRvm , V(000F00,4B,_,1,1,_,_,_ ), 0 , 61 , 0 , 1684 , 83 , 62 ), // #359 - INST(Kunpckwd , VexRvm , V(000F00,4B,_,1,0,_,_,_ ), 0 , 62 , 0 , 1693 , 83 , 62 ), // #360 - INST(Kxnorb , VexRvm , V(660F00,46,_,1,0,_,_,_ ), 0 , 59 , 0 , 1702 , 83 , 61 ), // #361 - INST(Kxnord , VexRvm , V(660F00,46,_,1,1,_,_,_ ), 0 , 60 , 0 , 1709 , 83 , 62 ), // #362 - INST(Kxnorq , VexRvm , V(000F00,46,_,1,1,_,_,_ ), 0 , 61 , 0 , 1716 , 83 , 62 ), // #363 - INST(Kxnorw , VexRvm , V(000F00,46,_,1,0,_,_,_ ), 0 , 62 , 0 , 1723 , 83 , 63 ), // #364 - INST(Kxorb , VexRvm , V(660F00,47,_,1,0,_,_,_ ), 0 , 59 , 0 , 1730 , 83 , 61 ), // #365 - INST(Kxord , VexRvm , V(660F00,47,_,1,1,_,_,_ ), 0 , 60 , 0 , 1736 , 83 , 62 ), // #366 - INST(Kxorq , VexRvm , V(000F00,47,_,1,1,_,_,_ ), 0 , 61 , 0 , 1742 , 83 , 62 ), // #367 - INST(Kxorw , VexRvm , V(000F00,47,_,1,0,_,_,_ ), 0 , 62 , 0 , 1748 , 83 , 63 ), // #368 - INST(Lahf , X86Op , O(000000,9F,_,_,_,_,_,_ ), 0 , 0 , 0 , 1754 , 90 , 67 ), // #369 - INST(Lar , X86Rm , O(000F00,02,_,_,_,_,_,_ ), 0 , 4 , 0 , 1759 , 91 , 10 ), // #370 - INST(Lddqu , ExtRm , O(F20F00,F0,_,_,_,_,_,_ ), 0 , 5 , 0 , 5791 , 92 , 6 ), // #371 - INST(Ldmxcsr , X86M_Only , O(000F00,AE,2,_,_,_,_,_ ), 0 , 69 , 0 , 5798 , 93 , 5 ), // #372 - INST(Lds , X86Rm , O(000000,C5,_,_,_,_,_,_ ), 0 , 0 , 0 , 1763 , 94 , 0 ), // #373 - INST(Lea , X86Lea , O(000000,8D,_,_,x,_,_,_ ), 0 , 0 , 0 , 1767 , 95 , 0 ), // #374 - INST(Leave , X86Op , O(000000,C9,_,_,_,_,_,_ ), 0 , 0 , 0 , 1771 , 30 , 0 ), // #375 - INST(Les , X86Rm , O(000000,C4,_,_,_,_,_,_ ), 0 , 0 , 0 , 1777 , 94 , 0 ), // #376 - INST(Lfence , X86Fence , O(000F00,AE,5,_,_,_,_,_ ), 0 , 70 , 0 , 1781 , 30 , 4 ), // #377 - INST(Lfs , X86Rm , O(000F00,B4,_,_,_,_,_,_ ), 0 , 4 , 0 , 1788 , 96 , 0 ), // #378 - INST(Lgdt , X86M_Only , O(000F00,01,2,_,_,_,_,_ ), 0 , 69 , 0 , 1792 , 31 , 0 ), // #379 - INST(Lgs , X86Rm , O(000F00,B5,_,_,_,_,_,_ ), 0 , 4 , 0 , 1797 , 96 , 0 ), // #380 - INST(Lidt , X86M_Only , O(000F00,01,3,_,_,_,_,_ ), 0 , 71 , 0 , 1801 , 31 , 0 ), // #381 - INST(Lldt , X86M_NoSize , O(000F00,00,2,_,_,_,_,_ ), 0 , 69 , 0 , 1806 , 97 , 0 ), // #382 - INST(Llwpcb , VexR_Wx , V(XOP_M9,12,0,0,x,_,_,_ ), 0 , 72 , 0 , 1811 , 98 , 68 ), // #383 - INST(Lmsw , X86M_NoSize , O(000F00,01,6,_,_,_,_,_ ), 0 , 73 , 0 , 1818 , 97 , 0 ), // #384 - INST(Lods , X86StrRm , O(000000,AC,_,_,_,_,_,_ ), 0 , 0 , 0 , 1823 , 99 , 69 ), // #385 - INST(Loop , X86JecxzLoop , 0 , O(000000,E2,_,_,_,_,_,_ ), 0 , 40 , 1828 , 100, 0 ), // #386 - INST(Loope , X86JecxzLoop , 0 , O(000000,E1,_,_,_,_,_,_ ), 0 , 41 , 1833 , 100, 56 ), // #387 - INST(Loopne , X86JecxzLoop , 0 , O(000000,E0,_,_,_,_,_,_ ), 0 , 42 , 1839 , 100, 56 ), // #388 - INST(Lsl , X86Rm , O(000F00,03,_,_,_,_,_,_ ), 0 , 4 , 0 , 1846 , 101, 10 ), // #389 - INST(Lss , X86Rm , O(000F00,B2,_,_,_,_,_,_ ), 0 , 4 , 0 , 6289 , 96 , 0 ), // #390 - INST(Ltr , X86M_NoSize , O(000F00,00,3,_,_,_,_,_ ), 0 , 71 , 0 , 1850 , 97 , 0 ), // #391 - INST(Lwpins , VexVmi4_Wx , V(XOP_MA,12,0,0,x,_,_,_ ), 0 , 74 , 0 , 1854 , 102, 68 ), // #392 - INST(Lwpval , VexVmi4_Wx , V(XOP_MA,12,1,0,x,_,_,_ ), 0 , 75 , 0 , 1861 , 102, 68 ), // #393 - INST(Lzcnt , X86Rm_Raw66H , O(F30F00,BD,_,_,x,_,_,_ ), 0 , 6 , 0 , 1868 , 22 , 70 ), // #394 - INST(Maskmovdqu , ExtRm_ZDI , O(660F00,57,_,_,_,_,_,_ ), 0 , 3 , 0 , 5807 , 103, 4 ), // #395 - INST(Maskmovq , ExtRm_ZDI , O(000F00,F7,_,_,_,_,_,_ ), 0 , 4 , 0 , 7778 , 104, 71 ), // #396 - INST(Maxpd , ExtRm , O(660F00,5F,_,_,_,_,_,_ ), 0 , 3 , 0 , 5841 , 5 , 4 ), // #397 - INST(Maxps , ExtRm , O(000F00,5F,_,_,_,_,_,_ ), 0 , 4 , 0 , 5848 , 5 , 5 ), // #398 - INST(Maxsd , ExtRm , O(F20F00,5F,_,_,_,_,_,_ ), 0 , 5 , 0 , 7797 , 6 , 4 ), // #399 - INST(Maxss , ExtRm , O(F30F00,5F,_,_,_,_,_,_ ), 0 , 6 , 0 , 5862 , 7 , 5 ), // #400 - INST(Mfence , X86Fence , O(000F00,AE,6,_,_,_,_,_ ), 0 , 73 , 0 , 1874 , 30 , 4 ), // #401 - INST(Minpd , ExtRm , O(660F00,5D,_,_,_,_,_,_ ), 0 , 3 , 0 , 5891 , 5 , 4 ), // #402 - INST(Minps , ExtRm , O(000F00,5D,_,_,_,_,_,_ ), 0 , 4 , 0 , 5898 , 5 , 5 ), // #403 - INST(Minsd , ExtRm , O(F20F00,5D,_,_,_,_,_,_ ), 0 , 5 , 0 , 7861 , 6 , 4 ), // #404 - INST(Minss , ExtRm , O(F30F00,5D,_,_,_,_,_,_ ), 0 , 6 , 0 , 5912 , 7 , 5 ), // #405 - INST(Monitor , X86Op , O(000F01,C8,_,_,_,_,_,_ ), 0 , 21 , 0 , 1881 , 105, 72 ), // #406 - INST(Monitorx , X86Op , O(000F01,FA,_,_,_,_,_,_ ), 0 , 21 , 0 , 1889 , 105, 73 ), // #407 - INST(Mov , X86Mov , 0 , 0 , 0 , 0 , 138 , 106, 0 ), // #408 - INST(Movapd , ExtMov , O(660F00,28,_,_,_,_,_,_ ), O(660F00,29,_,_,_,_,_,_ ), 3 , 43 , 5943 , 107, 4 ), // #409 - INST(Movaps , ExtMov , O(000F00,28,_,_,_,_,_,_ ), O(000F00,29,_,_,_,_,_,_ ), 4 , 44 , 5951 , 107, 5 ), // #410 - INST(Movbe , ExtMovbe , O(000F38,F0,_,_,x,_,_,_ ), O(000F38,F1,_,_,x,_,_,_ ), 76 , 45 , 626 , 108, 74 ), // #411 - INST(Movd , ExtMovd , O(000F00,6E,_,_,_,_,_,_ ), O(000F00,7E,_,_,_,_,_,_ ), 4 , 46 , 7771 , 109, 75 ), // #412 - INST(Movddup , ExtMov , O(F20F00,12,_,_,_,_,_,_ ), 0 , 5 , 0 , 5965 , 6 , 6 ), // #413 - INST(Movdir64b , X86EnqcmdMovdir64b , O(660F38,F8,_,_,_,_,_,_ ), 0 , 2 , 0 , 1898 , 110, 76 ), // #414 - INST(Movdiri , X86MovntiMovdiri , O(000F38,F9,_,_,_,_,_,_ ), 0 , 76 , 0 , 1908 , 111, 77 ), // #415 - INST(Movdq2q , ExtMov , O(F20F00,D6,_,_,_,_,_,_ ), 0 , 5 , 0 , 1916 , 112, 4 ), // #416 - INST(Movdqa , ExtMov , O(660F00,6F,_,_,_,_,_,_ ), O(660F00,7F,_,_,_,_,_,_ ), 3 , 47 , 5974 , 107, 4 ), // #417 - INST(Movdqu , ExtMov , O(F30F00,6F,_,_,_,_,_,_ ), O(F30F00,7F,_,_,_,_,_,_ ), 6 , 48 , 5811 , 107, 4 ), // #418 - INST(Movhlps , ExtMov , O(000F00,12,_,_,_,_,_,_ ), 0 , 4 , 0 , 6049 , 113, 5 ), // #419 - INST(Movhpd , ExtMov , O(660F00,16,_,_,_,_,_,_ ), O(660F00,17,_,_,_,_,_,_ ), 3 , 49 , 6058 , 114, 4 ), // #420 - INST(Movhps , ExtMov , O(000F00,16,_,_,_,_,_,_ ), O(000F00,17,_,_,_,_,_,_ ), 4 , 50 , 6066 , 114, 5 ), // #421 - INST(Movlhps , ExtMov , O(000F00,16,_,_,_,_,_,_ ), 0 , 4 , 0 , 6074 , 113, 5 ), // #422 - INST(Movlpd , ExtMov , O(660F00,12,_,_,_,_,_,_ ), O(660F00,13,_,_,_,_,_,_ ), 3 , 51 , 6083 , 114, 4 ), // #423 - INST(Movlps , ExtMov , O(000F00,12,_,_,_,_,_,_ ), O(000F00,13,_,_,_,_,_,_ ), 4 , 52 , 6091 , 114, 5 ), // #424 - INST(Movmskpd , ExtMov , O(660F00,50,_,_,_,_,_,_ ), 0 , 3 , 0 , 6099 , 115, 4 ), // #425 - INST(Movmskps , ExtMov , O(000F00,50,_,_,_,_,_,_ ), 0 , 4 , 0 , 6109 , 115, 5 ), // #426 - INST(Movntdq , ExtMov , 0 , O(660F00,E7,_,_,_,_,_,_ ), 0 , 53 , 6119 , 116, 4 ), // #427 - INST(Movntdqa , ExtMov , O(660F38,2A,_,_,_,_,_,_ ), 0 , 2 , 0 , 6128 , 92 , 12 ), // #428 - INST(Movnti , X86MovntiMovdiri , O(000F00,C3,_,_,x,_,_,_ ), 0 , 4 , 0 , 1924 , 111, 4 ), // #429 - INST(Movntpd , ExtMov , 0 , O(660F00,2B,_,_,_,_,_,_ ), 0 , 54 , 6138 , 116, 4 ), // #430 - INST(Movntps , ExtMov , 0 , O(000F00,2B,_,_,_,_,_,_ ), 0 , 55 , 6147 , 116, 5 ), // #431 - INST(Movntq , ExtMov , 0 , O(000F00,E7,_,_,_,_,_,_ ), 0 , 56 , 1931 , 117, 71 ), // #432 - INST(Movntsd , ExtMov , 0 , O(F20F00,2B,_,_,_,_,_,_ ), 0 , 57 , 1938 , 118, 46 ), // #433 - INST(Movntss , ExtMov , 0 , O(F30F00,2B,_,_,_,_,_,_ ), 0 , 58 , 1946 , 119, 46 ), // #434 - INST(Movq , ExtMovq , O(000F00,6E,_,_,x,_,_,_ ), O(000F00,7E,_,_,x,_,_,_ ), 4 , 59 , 7782 , 120, 75 ), // #435 - INST(Movq2dq , ExtRm , O(F30F00,D6,_,_,_,_,_,_ ), 0 , 6 , 0 , 1954 , 121, 4 ), // #436 - INST(Movs , X86StrMm , O(000000,A4,_,_,_,_,_,_ ), 0 , 0 , 0 , 425 , 122, 69 ), // #437 - INST(Movsd , ExtMov , O(F20F00,10,_,_,_,_,_,_ ), O(F20F00,11,_,_,_,_,_,_ ), 5 , 60 , 6162 , 123, 4 ), // #438 - INST(Movshdup , ExtRm , O(F30F00,16,_,_,_,_,_,_ ), 0 , 6 , 0 , 6169 , 5 , 6 ), // #439 - INST(Movsldup , ExtRm , O(F30F00,12,_,_,_,_,_,_ ), 0 , 6 , 0 , 6179 , 5 , 6 ), // #440 - INST(Movss , ExtMov , O(F30F00,10,_,_,_,_,_,_ ), O(F30F00,11,_,_,_,_,_,_ ), 6 , 61 , 6189 , 124, 5 ), // #441 - INST(Movsx , X86MovsxMovzx , O(000F00,BE,_,_,x,_,_,_ ), 0 , 4 , 0 , 1962 , 125, 0 ), // #442 - INST(Movsxd , X86Rm , O(000000,63,_,_,1,_,_,_ ), 0 , 20 , 0 , 1968 , 126, 0 ), // #443 - INST(Movupd , ExtMov , O(660F00,10,_,_,_,_,_,_ ), O(660F00,11,_,_,_,_,_,_ ), 3 , 62 , 6196 , 107, 4 ), // #444 - INST(Movups , ExtMov , O(000F00,10,_,_,_,_,_,_ ), O(000F00,11,_,_,_,_,_,_ ), 4 , 63 , 6204 , 107, 5 ), // #445 - INST(Movzx , X86MovsxMovzx , O(000F00,B6,_,_,x,_,_,_ ), 0 , 4 , 0 , 1975 , 125, 0 ), // #446 - INST(Mpsadbw , ExtRmi , O(660F3A,42,_,_,_,_,_,_ ), 0 , 8 , 0 , 6212 , 8 , 12 ), // #447 - INST(Mul , X86M_GPB_MulDiv , O(000000,F6,4,_,x,_,_,_ ), 0 , 9 , 0 , 798 , 52 , 1 ), // #448 - INST(Mulpd , ExtRm , O(660F00,59,_,_,_,_,_,_ ), 0 , 3 , 0 , 6266 , 5 , 4 ), // #449 - INST(Mulps , ExtRm , O(000F00,59,_,_,_,_,_,_ ), 0 , 4 , 0 , 6273 , 5 , 5 ), // #450 - INST(Mulsd , ExtRm , O(F20F00,59,_,_,_,_,_,_ ), 0 , 5 , 0 , 6280 , 6 , 4 ), // #451 - INST(Mulss , ExtRm , O(F30F00,59,_,_,_,_,_,_ ), 0 , 6 , 0 , 6287 , 7 , 5 ), // #452 - INST(Mulx , VexRvm_ZDX_Wx , V(F20F38,F6,_,0,x,_,_,_ ), 0 , 77 , 0 , 1981 , 127, 78 ), // #453 - INST(Mwait , X86Op , O(000F01,C9,_,_,_,_,_,_ ), 0 , 21 , 0 , 1986 , 128, 72 ), // #454 - INST(Mwaitx , X86Op , O(000F01,FB,_,_,_,_,_,_ ), 0 , 21 , 0 , 1992 , 129, 73 ), // #455 - INST(Neg , X86M_GPB , O(000000,F6,3,_,x,_,_,_ ), 0 , 78 , 0 , 1999 , 130, 1 ), // #456 - INST(Nop , X86M_Nop , O(000000,90,_,_,_,_,_,_ ), 0 , 0 , 0 , 929 , 131, 0 ), // #457 - INST(Not , X86M_GPB , O(000000,F6,2,_,x,_,_,_ ), 0 , 1 , 0 , 2003 , 130, 0 ), // #458 - INST(Or , X86Arith , O(000000,08,1,_,x,_,_,_ ), 0 , 29 , 0 , 1138 , 132, 1 ), // #459 - INST(Orpd , ExtRm , O(660F00,56,_,_,_,_,_,_ ), 0 , 3 , 0 , 9988 , 11 , 4 ), // #460 - INST(Orps , ExtRm , O(000F00,56,_,_,_,_,_,_ ), 0 , 4 , 0 , 9995 , 11 , 5 ), // #461 - INST(Out , X86Out , O(000000,EE,_,_,_,_,_,_ ), O(000000,E6,_,_,_,_,_,_ ), 0 , 64 , 2007 , 133, 0 ), // #462 - INST(Outs , X86Outs , O(000000,6E,_,_,_,_,_,_ ), 0 , 0 , 0 , 2011 , 134, 0 ), // #463 - INST(Pabsb , ExtRm_P , O(000F38,1C,_,_,_,_,_,_ ), 0 , 76 , 0 , 6341 , 135, 79 ), // #464 - INST(Pabsd , ExtRm_P , O(000F38,1E,_,_,_,_,_,_ ), 0 , 76 , 0 , 6348 , 135, 79 ), // #465 - INST(Pabsw , ExtRm_P , O(000F38,1D,_,_,_,_,_,_ ), 0 , 76 , 0 , 6362 , 135, 79 ), // #466 - INST(Packssdw , ExtRm_P , O(000F00,6B,_,_,_,_,_,_ ), 0 , 4 , 0 , 6369 , 135, 75 ), // #467 - INST(Packsswb , ExtRm_P , O(000F00,63,_,_,_,_,_,_ ), 0 , 4 , 0 , 6379 , 135, 75 ), // #468 - INST(Packusdw , ExtRm , O(660F38,2B,_,_,_,_,_,_ ), 0 , 2 , 0 , 6389 , 5 , 12 ), // #469 - INST(Packuswb , ExtRm_P , O(000F00,67,_,_,_,_,_,_ ), 0 , 4 , 0 , 6399 , 135, 75 ), // #470 - INST(Paddb , ExtRm_P , O(000F00,FC,_,_,_,_,_,_ ), 0 , 4 , 0 , 6409 , 135, 75 ), // #471 - INST(Paddd , ExtRm_P , O(000F00,FE,_,_,_,_,_,_ ), 0 , 4 , 0 , 6416 , 135, 75 ), // #472 - INST(Paddq , ExtRm_P , O(000F00,D4,_,_,_,_,_,_ ), 0 , 4 , 0 , 6423 , 135, 4 ), // #473 - INST(Paddsb , ExtRm_P , O(000F00,EC,_,_,_,_,_,_ ), 0 , 4 , 0 , 6430 , 135, 75 ), // #474 - INST(Paddsw , ExtRm_P , O(000F00,ED,_,_,_,_,_,_ ), 0 , 4 , 0 , 6438 , 135, 75 ), // #475 - INST(Paddusb , ExtRm_P , O(000F00,DC,_,_,_,_,_,_ ), 0 , 4 , 0 , 6446 , 135, 75 ), // #476 - INST(Paddusw , ExtRm_P , O(000F00,DD,_,_,_,_,_,_ ), 0 , 4 , 0 , 6455 , 135, 75 ), // #477 - INST(Paddw , ExtRm_P , O(000F00,FD,_,_,_,_,_,_ ), 0 , 4 , 0 , 6464 , 135, 75 ), // #478 - INST(Palignr , ExtRmi_P , O(000F3A,0F,_,_,_,_,_,_ ), 0 , 79 , 0 , 6471 , 136, 6 ), // #479 - INST(Pand , ExtRm_P , O(000F00,DB,_,_,_,_,_,_ ), 0 , 4 , 0 , 6480 , 137, 75 ), // #480 - INST(Pandn , ExtRm_P , O(000F00,DF,_,_,_,_,_,_ ), 0 , 4 , 0 , 6493 , 138, 75 ), // #481 - INST(Pause , X86Op , O(F30000,90,_,_,_,_,_,_ ), 0 , 80 , 0 , 2016 , 30 , 0 ), // #482 - INST(Pavgb , ExtRm_P , O(000F00,E0,_,_,_,_,_,_ ), 0 , 4 , 0 , 6523 , 135, 80 ), // #483 - INST(Pavgusb , Ext3dNow , O(000F0F,BF,_,_,_,_,_,_ ), 0 , 81 , 0 , 2022 , 139, 48 ), // #484 - INST(Pavgw , ExtRm_P , O(000F00,E3,_,_,_,_,_,_ ), 0 , 4 , 0 , 6530 , 135, 80 ), // #485 - INST(Pblendvb , ExtRm_XMM0 , O(660F38,10,_,_,_,_,_,_ ), 0 , 2 , 0 , 6546 , 15 , 12 ), // #486 - INST(Pblendw , ExtRmi , O(660F3A,0E,_,_,_,_,_,_ ), 0 , 8 , 0 , 6556 , 8 , 12 ), // #487 - INST(Pclmulqdq , ExtRmi , O(660F3A,44,_,_,_,_,_,_ ), 0 , 8 , 0 , 6649 , 8 , 81 ), // #488 - INST(Pcmpeqb , ExtRm_P , O(000F00,74,_,_,_,_,_,_ ), 0 , 4 , 0 , 6681 , 138, 75 ), // #489 - INST(Pcmpeqd , ExtRm_P , O(000F00,76,_,_,_,_,_,_ ), 0 , 4 , 0 , 6690 , 138, 75 ), // #490 - INST(Pcmpeqq , ExtRm , O(660F38,29,_,_,_,_,_,_ ), 0 , 2 , 0 , 6699 , 140, 12 ), // #491 - INST(Pcmpeqw , ExtRm_P , O(000F00,75,_,_,_,_,_,_ ), 0 , 4 , 0 , 6708 , 138, 75 ), // #492 - INST(Pcmpestri , ExtRmi , O(660F3A,61,_,_,_,_,_,_ ), 0 , 8 , 0 , 6717 , 141, 82 ), // #493 - INST(Pcmpestrm , ExtRmi , O(660F3A,60,_,_,_,_,_,_ ), 0 , 8 , 0 , 6728 , 142, 82 ), // #494 - INST(Pcmpgtb , ExtRm_P , O(000F00,64,_,_,_,_,_,_ ), 0 , 4 , 0 , 6739 , 138, 75 ), // #495 - INST(Pcmpgtd , ExtRm_P , O(000F00,66,_,_,_,_,_,_ ), 0 , 4 , 0 , 6748 , 138, 75 ), // #496 - INST(Pcmpgtq , ExtRm , O(660F38,37,_,_,_,_,_,_ ), 0 , 2 , 0 , 6757 , 140, 42 ), // #497 - INST(Pcmpgtw , ExtRm_P , O(000F00,65,_,_,_,_,_,_ ), 0 , 4 , 0 , 6766 , 138, 75 ), // #498 - INST(Pcmpistri , ExtRmi , O(660F3A,63,_,_,_,_,_,_ ), 0 , 8 , 0 , 6775 , 143, 82 ), // #499 - INST(Pcmpistrm , ExtRmi , O(660F3A,62,_,_,_,_,_,_ ), 0 , 8 , 0 , 6786 , 144, 82 ), // #500 - INST(Pcommit , X86Op_O , O(660F00,AE,7,_,_,_,_,_ ), 0 , 23 , 0 , 2030 , 30 , 83 ), // #501 - INST(Pdep , VexRvm_Wx , V(F20F38,F5,_,0,x,_,_,_ ), 0 , 77 , 0 , 2038 , 10 , 78 ), // #502 - INST(Pext , VexRvm_Wx , V(F30F38,F5,_,0,x,_,_,_ ), 0 , 82 , 0 , 2043 , 10 , 78 ), // #503 - INST(Pextrb , ExtExtract , O(000F3A,14,_,_,_,_,_,_ ), 0 , 79 , 0 , 7273 , 145, 12 ), // #504 - INST(Pextrd , ExtExtract , O(000F3A,16,_,_,_,_,_,_ ), 0 , 79 , 0 , 7281 , 56 , 12 ), // #505 - INST(Pextrq , ExtExtract , O(000F3A,16,_,_,1,_,_,_ ), 0 , 83 , 0 , 7289 , 146, 12 ), // #506 - INST(Pextrw , ExtPextrw , O(000F00,C5,_,_,_,_,_,_ ), O(000F3A,15,_,_,_,_,_,_ ), 4 , 65 , 7297 , 147, 84 ), // #507 - INST(Pf2id , Ext3dNow , O(000F0F,1D,_,_,_,_,_,_ ), 0 , 81 , 0 , 2048 , 139, 48 ), // #508 - INST(Pf2iw , Ext3dNow , O(000F0F,1C,_,_,_,_,_,_ ), 0 , 81 , 0 , 2054 , 139, 85 ), // #509 - INST(Pfacc , Ext3dNow , O(000F0F,AE,_,_,_,_,_,_ ), 0 , 81 , 0 , 2060 , 139, 48 ), // #510 - INST(Pfadd , Ext3dNow , O(000F0F,9E,_,_,_,_,_,_ ), 0 , 81 , 0 , 2066 , 139, 48 ), // #511 - INST(Pfcmpeq , Ext3dNow , O(000F0F,B0,_,_,_,_,_,_ ), 0 , 81 , 0 , 2072 , 139, 48 ), // #512 - INST(Pfcmpge , Ext3dNow , O(000F0F,90,_,_,_,_,_,_ ), 0 , 81 , 0 , 2080 , 139, 48 ), // #513 - INST(Pfcmpgt , Ext3dNow , O(000F0F,A0,_,_,_,_,_,_ ), 0 , 81 , 0 , 2088 , 139, 48 ), // #514 - INST(Pfmax , Ext3dNow , O(000F0F,A4,_,_,_,_,_,_ ), 0 , 81 , 0 , 2096 , 139, 48 ), // #515 - INST(Pfmin , Ext3dNow , O(000F0F,94,_,_,_,_,_,_ ), 0 , 81 , 0 , 2102 , 139, 48 ), // #516 - INST(Pfmul , Ext3dNow , O(000F0F,B4,_,_,_,_,_,_ ), 0 , 81 , 0 , 2108 , 139, 48 ), // #517 - INST(Pfnacc , Ext3dNow , O(000F0F,8A,_,_,_,_,_,_ ), 0 , 81 , 0 , 2114 , 139, 85 ), // #518 - INST(Pfpnacc , Ext3dNow , O(000F0F,8E,_,_,_,_,_,_ ), 0 , 81 , 0 , 2121 , 139, 85 ), // #519 - INST(Pfrcp , Ext3dNow , O(000F0F,96,_,_,_,_,_,_ ), 0 , 81 , 0 , 2129 , 139, 48 ), // #520 - INST(Pfrcpit1 , Ext3dNow , O(000F0F,A6,_,_,_,_,_,_ ), 0 , 81 , 0 , 2135 , 139, 48 ), // #521 - INST(Pfrcpit2 , Ext3dNow , O(000F0F,B6,_,_,_,_,_,_ ), 0 , 81 , 0 , 2144 , 139, 48 ), // #522 - INST(Pfrcpv , Ext3dNow , O(000F0F,86,_,_,_,_,_,_ ), 0 , 81 , 0 , 2153 , 139, 86 ), // #523 - INST(Pfrsqit1 , Ext3dNow , O(000F0F,A7,_,_,_,_,_,_ ), 0 , 81 , 0 , 2160 , 139, 48 ), // #524 - INST(Pfrsqrt , Ext3dNow , O(000F0F,97,_,_,_,_,_,_ ), 0 , 81 , 0 , 2169 , 139, 48 ), // #525 - INST(Pfrsqrtv , Ext3dNow , O(000F0F,87,_,_,_,_,_,_ ), 0 , 81 , 0 , 2177 , 139, 86 ), // #526 - INST(Pfsub , Ext3dNow , O(000F0F,9A,_,_,_,_,_,_ ), 0 , 81 , 0 , 2186 , 139, 48 ), // #527 - INST(Pfsubr , Ext3dNow , O(000F0F,AA,_,_,_,_,_,_ ), 0 , 81 , 0 , 2192 , 139, 48 ), // #528 - INST(Phaddd , ExtRm_P , O(000F38,02,_,_,_,_,_,_ ), 0 , 76 , 0 , 7376 , 135, 79 ), // #529 - INST(Phaddsw , ExtRm_P , O(000F38,03,_,_,_,_,_,_ ), 0 , 76 , 0 , 7393 , 135, 79 ), // #530 - INST(Phaddw , ExtRm_P , O(000F38,01,_,_,_,_,_,_ ), 0 , 76 , 0 , 7462 , 135, 79 ), // #531 - INST(Phminposuw , ExtRm , O(660F38,41,_,_,_,_,_,_ ), 0 , 2 , 0 , 7488 , 5 , 12 ), // #532 - INST(Phsubd , ExtRm_P , O(000F38,06,_,_,_,_,_,_ ), 0 , 76 , 0 , 7509 , 135, 79 ), // #533 - INST(Phsubsw , ExtRm_P , O(000F38,07,_,_,_,_,_,_ ), 0 , 76 , 0 , 7526 , 135, 79 ), // #534 - INST(Phsubw , ExtRm_P , O(000F38,05,_,_,_,_,_,_ ), 0 , 76 , 0 , 7535 , 135, 79 ), // #535 - INST(Pi2fd , Ext3dNow , O(000F0F,0D,_,_,_,_,_,_ ), 0 , 81 , 0 , 2199 , 139, 48 ), // #536 - INST(Pi2fw , Ext3dNow , O(000F0F,0C,_,_,_,_,_,_ ), 0 , 81 , 0 , 2205 , 139, 85 ), // #537 - INST(Pinsrb , ExtRmi , O(660F3A,20,_,_,_,_,_,_ ), 0 , 8 , 0 , 7552 , 148, 12 ), // #538 - INST(Pinsrd , ExtRmi , O(660F3A,22,_,_,_,_,_,_ ), 0 , 8 , 0 , 7560 , 149, 12 ), // #539 - INST(Pinsrq , ExtRmi , O(660F3A,22,_,_,1,_,_,_ ), 0 , 84 , 0 , 7568 , 150, 12 ), // #540 - INST(Pinsrw , ExtRmi_P , O(000F00,C4,_,_,_,_,_,_ ), 0 , 4 , 0 , 7576 , 151, 80 ), // #541 - INST(Pmaddubsw , ExtRm_P , O(000F38,04,_,_,_,_,_,_ ), 0 , 76 , 0 , 7746 , 135, 79 ), // #542 - INST(Pmaddwd , ExtRm_P , O(000F00,F5,_,_,_,_,_,_ ), 0 , 4 , 0 , 7757 , 135, 75 ), // #543 - INST(Pmaxsb , ExtRm , O(660F38,3C,_,_,_,_,_,_ ), 0 , 2 , 0 , 7788 , 11 , 12 ), // #544 - INST(Pmaxsd , ExtRm , O(660F38,3D,_,_,_,_,_,_ ), 0 , 2 , 0 , 7796 , 11 , 12 ), // #545 - INST(Pmaxsw , ExtRm_P , O(000F00,EE,_,_,_,_,_,_ ), 0 , 4 , 0 , 7812 , 137, 80 ), // #546 - INST(Pmaxub , ExtRm_P , O(000F00,DE,_,_,_,_,_,_ ), 0 , 4 , 0 , 7820 , 137, 80 ), // #547 - INST(Pmaxud , ExtRm , O(660F38,3F,_,_,_,_,_,_ ), 0 , 2 , 0 , 7828 , 11 , 12 ), // #548 - INST(Pmaxuw , ExtRm , O(660F38,3E,_,_,_,_,_,_ ), 0 , 2 , 0 , 7844 , 11 , 12 ), // #549 - INST(Pminsb , ExtRm , O(660F38,38,_,_,_,_,_,_ ), 0 , 2 , 0 , 7852 , 11 , 12 ), // #550 - INST(Pminsd , ExtRm , O(660F38,39,_,_,_,_,_,_ ), 0 , 2 , 0 , 7860 , 11 , 12 ), // #551 - INST(Pminsw , ExtRm_P , O(000F00,EA,_,_,_,_,_,_ ), 0 , 4 , 0 , 7876 , 137, 80 ), // #552 - INST(Pminub , ExtRm_P , O(000F00,DA,_,_,_,_,_,_ ), 0 , 4 , 0 , 7884 , 137, 80 ), // #553 - INST(Pminud , ExtRm , O(660F38,3B,_,_,_,_,_,_ ), 0 , 2 , 0 , 7892 , 11 , 12 ), // #554 - INST(Pminuw , ExtRm , O(660F38,3A,_,_,_,_,_,_ ), 0 , 2 , 0 , 7908 , 11 , 12 ), // #555 - INST(Pmovmskb , ExtRm_P , O(000F00,D7,_,_,_,_,_,_ ), 0 , 4 , 0 , 7986 , 152, 80 ), // #556 - INST(Pmovsxbd , ExtRm , O(660F38,21,_,_,_,_,_,_ ), 0 , 2 , 0 , 8083 , 7 , 12 ), // #557 - INST(Pmovsxbq , ExtRm , O(660F38,22,_,_,_,_,_,_ ), 0 , 2 , 0 , 8093 , 153, 12 ), // #558 - INST(Pmovsxbw , ExtRm , O(660F38,20,_,_,_,_,_,_ ), 0 , 2 , 0 , 8103 , 6 , 12 ), // #559 - INST(Pmovsxdq , ExtRm , O(660F38,25,_,_,_,_,_,_ ), 0 , 2 , 0 , 8113 , 6 , 12 ), // #560 - INST(Pmovsxwd , ExtRm , O(660F38,23,_,_,_,_,_,_ ), 0 , 2 , 0 , 8123 , 6 , 12 ), // #561 - INST(Pmovsxwq , ExtRm , O(660F38,24,_,_,_,_,_,_ ), 0 , 2 , 0 , 8133 , 7 , 12 ), // #562 - INST(Pmovzxbd , ExtRm , O(660F38,31,_,_,_,_,_,_ ), 0 , 2 , 0 , 8220 , 7 , 12 ), // #563 - INST(Pmovzxbq , ExtRm , O(660F38,32,_,_,_,_,_,_ ), 0 , 2 , 0 , 8230 , 153, 12 ), // #564 - INST(Pmovzxbw , ExtRm , O(660F38,30,_,_,_,_,_,_ ), 0 , 2 , 0 , 8240 , 6 , 12 ), // #565 - INST(Pmovzxdq , ExtRm , O(660F38,35,_,_,_,_,_,_ ), 0 , 2 , 0 , 8250 , 6 , 12 ), // #566 - INST(Pmovzxwd , ExtRm , O(660F38,33,_,_,_,_,_,_ ), 0 , 2 , 0 , 8260 , 6 , 12 ), // #567 - INST(Pmovzxwq , ExtRm , O(660F38,34,_,_,_,_,_,_ ), 0 , 2 , 0 , 8270 , 7 , 12 ), // #568 - INST(Pmuldq , ExtRm , O(660F38,28,_,_,_,_,_,_ ), 0 , 2 , 0 , 8280 , 5 , 12 ), // #569 - INST(Pmulhrsw , ExtRm_P , O(000F38,0B,_,_,_,_,_,_ ), 0 , 76 , 0 , 8288 , 135, 79 ), // #570 - INST(Pmulhrw , Ext3dNow , O(000F0F,B7,_,_,_,_,_,_ ), 0 , 81 , 0 , 2211 , 139, 48 ), // #571 - INST(Pmulhuw , ExtRm_P , O(000F00,E4,_,_,_,_,_,_ ), 0 , 4 , 0 , 8298 , 135, 80 ), // #572 - INST(Pmulhw , ExtRm_P , O(000F00,E5,_,_,_,_,_,_ ), 0 , 4 , 0 , 8307 , 135, 75 ), // #573 - INST(Pmulld , ExtRm , O(660F38,40,_,_,_,_,_,_ ), 0 , 2 , 0 , 8315 , 5 , 12 ), // #574 - INST(Pmullw , ExtRm_P , O(000F00,D5,_,_,_,_,_,_ ), 0 , 4 , 0 , 8331 , 135, 75 ), // #575 - INST(Pmuludq , ExtRm_P , O(000F00,F4,_,_,_,_,_,_ ), 0 , 4 , 0 , 8354 , 135, 4 ), // #576 - INST(Pop , X86Pop , O(000000,8F,0,_,_,_,_,_ ), O(000000,58,_,_,_,_,_,_ ), 0 , 66 , 2219 , 154, 0 ), // #577 - INST(Popa , X86Op , O(660000,61,_,_,_,_,_,_ ), 0 , 19 , 0 , 2223 , 75 , 0 ), // #578 - INST(Popad , X86Op , O(000000,61,_,_,_,_,_,_ ), 0 , 0 , 0 , 2228 , 75 , 0 ), // #579 - INST(Popcnt , X86Rm_Raw66H , O(F30F00,B8,_,_,x,_,_,_ ), 0 , 6 , 0 , 2234 , 22 , 87 ), // #580 - INST(Popf , X86Op , O(660000,9D,_,_,_,_,_,_ ), 0 , 19 , 0 , 2241 , 30 , 88 ), // #581 - INST(Popfd , X86Op , O(000000,9D,_,_,_,_,_,_ ), 0 , 0 , 0 , 2246 , 75 , 88 ), // #582 - INST(Popfq , X86Op , O(000000,9D,_,_,_,_,_,_ ), 0 , 0 , 0 , 2252 , 155, 88 ), // #583 - INST(Por , ExtRm_P , O(000F00,EB,_,_,_,_,_,_ ), 0 , 4 , 0 , 8399 , 137, 75 ), // #584 - INST(Prefetch , X86M_Only , O(000F00,0D,0,_,_,_,_,_ ), 0 , 4 , 0 , 2258 , 31 , 48 ), // #585 - INST(Prefetchnta , X86M_Only , O(000F00,18,0,_,_,_,_,_ ), 0 , 4 , 0 , 2267 , 31 , 71 ), // #586 - INST(Prefetcht0 , X86M_Only , O(000F00,18,1,_,_,_,_,_ ), 0 , 27 , 0 , 2279 , 31 , 71 ), // #587 - INST(Prefetcht1 , X86M_Only , O(000F00,18,2,_,_,_,_,_ ), 0 , 69 , 0 , 2290 , 31 , 71 ), // #588 - INST(Prefetcht2 , X86M_Only , O(000F00,18,3,_,_,_,_,_ ), 0 , 71 , 0 , 2301 , 31 , 71 ), // #589 - INST(Prefetchw , X86M_Only , O(000F00,0D,1,_,_,_,_,_ ), 0 , 27 , 0 , 2312 , 31 , 89 ), // #590 - INST(Prefetchwt1 , X86M_Only , O(000F00,0D,2,_,_,_,_,_ ), 0 , 69 , 0 , 2322 , 31 , 90 ), // #591 - INST(Psadbw , ExtRm_P , O(000F00,F6,_,_,_,_,_,_ ), 0 , 4 , 0 , 3980 , 135, 80 ), // #592 - INST(Pshufb , ExtRm_P , O(000F38,00,_,_,_,_,_,_ ), 0 , 76 , 0 , 8725 , 135, 79 ), // #593 - INST(Pshufd , ExtRmi , O(660F00,70,_,_,_,_,_,_ ), 0 , 3 , 0 , 8746 , 8 , 4 ), // #594 - INST(Pshufhw , ExtRmi , O(F30F00,70,_,_,_,_,_,_ ), 0 , 6 , 0 , 8754 , 8 , 4 ), // #595 - INST(Pshuflw , ExtRmi , O(F20F00,70,_,_,_,_,_,_ ), 0 , 5 , 0 , 8763 , 8 , 4 ), // #596 - INST(Pshufw , ExtRmi_P , O(000F00,70,_,_,_,_,_,_ ), 0 , 4 , 0 , 2334 , 156, 71 ), // #597 - INST(Psignb , ExtRm_P , O(000F38,08,_,_,_,_,_,_ ), 0 , 76 , 0 , 8772 , 135, 79 ), // #598 - INST(Psignd , ExtRm_P , O(000F38,0A,_,_,_,_,_,_ ), 0 , 76 , 0 , 8780 , 135, 79 ), // #599 - INST(Psignw , ExtRm_P , O(000F38,09,_,_,_,_,_,_ ), 0 , 76 , 0 , 8788 , 135, 79 ), // #600 - INST(Pslld , ExtRmRi_P , O(000F00,F2,_,_,_,_,_,_ ), O(000F00,72,6,_,_,_,_,_ ), 4 , 67 , 8796 , 157, 75 ), // #601 - INST(Pslldq , ExtRmRi , 0 , O(660F00,73,7,_,_,_,_,_ ), 0 , 68 , 8803 , 158, 4 ), // #602 - INST(Psllq , ExtRmRi_P , O(000F00,F3,_,_,_,_,_,_ ), O(000F00,73,6,_,_,_,_,_ ), 4 , 69 , 8811 , 157, 75 ), // #603 - INST(Psllw , ExtRmRi_P , O(000F00,F1,_,_,_,_,_,_ ), O(000F00,71,6,_,_,_,_,_ ), 4 , 70 , 8842 , 157, 75 ), // #604 - INST(Psrad , ExtRmRi_P , O(000F00,E2,_,_,_,_,_,_ ), O(000F00,72,4,_,_,_,_,_ ), 4 , 71 , 8849 , 157, 75 ), // #605 - INST(Psraw , ExtRmRi_P , O(000F00,E1,_,_,_,_,_,_ ), O(000F00,71,4,_,_,_,_,_ ), 4 , 72 , 8887 , 157, 75 ), // #606 - INST(Psrld , ExtRmRi_P , O(000F00,D2,_,_,_,_,_,_ ), O(000F00,72,2,_,_,_,_,_ ), 4 , 73 , 8894 , 157, 75 ), // #607 - INST(Psrldq , ExtRmRi , 0 , O(660F00,73,3,_,_,_,_,_ ), 0 , 74 , 8901 , 158, 4 ), // #608 - INST(Psrlq , ExtRmRi_P , O(000F00,D3,_,_,_,_,_,_ ), O(000F00,73,2,_,_,_,_,_ ), 4 , 75 , 8909 , 157, 75 ), // #609 - INST(Psrlw , ExtRmRi_P , O(000F00,D1,_,_,_,_,_,_ ), O(000F00,71,2,_,_,_,_,_ ), 4 , 76 , 8940 , 157, 75 ), // #610 - INST(Psubb , ExtRm_P , O(000F00,F8,_,_,_,_,_,_ ), 0 , 4 , 0 , 8947 , 138, 75 ), // #611 - INST(Psubd , ExtRm_P , O(000F00,FA,_,_,_,_,_,_ ), 0 , 4 , 0 , 8954 , 138, 75 ), // #612 - INST(Psubq , ExtRm_P , O(000F00,FB,_,_,_,_,_,_ ), 0 , 4 , 0 , 8961 , 138, 4 ), // #613 - INST(Psubsb , ExtRm_P , O(000F00,E8,_,_,_,_,_,_ ), 0 , 4 , 0 , 8968 , 138, 75 ), // #614 - INST(Psubsw , ExtRm_P , O(000F00,E9,_,_,_,_,_,_ ), 0 , 4 , 0 , 8976 , 138, 75 ), // #615 - INST(Psubusb , ExtRm_P , O(000F00,D8,_,_,_,_,_,_ ), 0 , 4 , 0 , 8984 , 138, 75 ), // #616 - INST(Psubusw , ExtRm_P , O(000F00,D9,_,_,_,_,_,_ ), 0 , 4 , 0 , 8993 , 138, 75 ), // #617 - INST(Psubw , ExtRm_P , O(000F00,F9,_,_,_,_,_,_ ), 0 , 4 , 0 , 9002 , 138, 75 ), // #618 - INST(Pswapd , Ext3dNow , O(000F0F,BB,_,_,_,_,_,_ ), 0 , 81 , 0 , 2341 , 139, 85 ), // #619 - INST(Ptest , ExtRm , O(660F38,17,_,_,_,_,_,_ ), 0 , 2 , 0 , 9031 , 5 , 91 ), // #620 - INST(Punpckhbw , ExtRm_P , O(000F00,68,_,_,_,_,_,_ ), 0 , 4 , 0 , 9114 , 135, 75 ), // #621 - INST(Punpckhdq , ExtRm_P , O(000F00,6A,_,_,_,_,_,_ ), 0 , 4 , 0 , 9125 , 135, 75 ), // #622 - INST(Punpckhqdq , ExtRm , O(660F00,6D,_,_,_,_,_,_ ), 0 , 3 , 0 , 9136 , 5 , 4 ), // #623 - INST(Punpckhwd , ExtRm_P , O(000F00,69,_,_,_,_,_,_ ), 0 , 4 , 0 , 9148 , 135, 75 ), // #624 - INST(Punpcklbw , ExtRm_P , O(000F00,60,_,_,_,_,_,_ ), 0 , 4 , 0 , 9159 , 135, 75 ), // #625 - INST(Punpckldq , ExtRm_P , O(000F00,62,_,_,_,_,_,_ ), 0 , 4 , 0 , 9170 , 135, 75 ), // #626 - INST(Punpcklqdq , ExtRm , O(660F00,6C,_,_,_,_,_,_ ), 0 , 3 , 0 , 9181 , 5 , 4 ), // #627 - INST(Punpcklwd , ExtRm_P , O(000F00,61,_,_,_,_,_,_ ), 0 , 4 , 0 , 9193 , 135, 75 ), // #628 - INST(Push , X86Push , O(000000,FF,6,_,_,_,_,_ ), O(000000,50,_,_,_,_,_,_ ), 30 , 77 , 2348 , 159, 0 ), // #629 - INST(Pusha , X86Op , O(660000,60,_,_,_,_,_,_ ), 0 , 19 , 0 , 2353 , 75 , 0 ), // #630 - INST(Pushad , X86Op , O(000000,60,_,_,_,_,_,_ ), 0 , 0 , 0 , 2359 , 75 , 0 ), // #631 - INST(Pushf , X86Op , O(660000,9C,_,_,_,_,_,_ ), 0 , 19 , 0 , 2366 , 30 , 92 ), // #632 - INST(Pushfd , X86Op , O(000000,9C,_,_,_,_,_,_ ), 0 , 0 , 0 , 2372 , 75 , 92 ), // #633 - INST(Pushfq , X86Op , O(000000,9C,_,_,_,_,_,_ ), 0 , 0 , 0 , 2379 , 155, 92 ), // #634 - INST(Pxor , ExtRm_P , O(000F00,EF,_,_,_,_,_,_ ), 0 , 4 , 0 , 9204 , 138, 75 ), // #635 - INST(Rcl , X86Rot , O(000000,D0,2,_,x,_,_,_ ), 0 , 1 , 0 , 2386 , 160, 93 ), // #636 - INST(Rcpps , ExtRm , O(000F00,53,_,_,_,_,_,_ ), 0 , 4 , 0 , 9332 , 5 , 5 ), // #637 - INST(Rcpss , ExtRm , O(F30F00,53,_,_,_,_,_,_ ), 0 , 6 , 0 , 9339 , 7 , 5 ), // #638 - INST(Rcr , X86Rot , O(000000,D0,3,_,x,_,_,_ ), 0 , 78 , 0 , 2390 , 160, 93 ), // #639 - INST(Rdfsbase , X86M , O(F30F00,AE,0,_,x,_,_,_ ), 0 , 6 , 0 , 2394 , 161, 94 ), // #640 - INST(Rdgsbase , X86M , O(F30F00,AE,1,_,x,_,_,_ ), 0 , 85 , 0 , 2403 , 161, 94 ), // #641 - INST(Rdmsr , X86Op , O(000F00,32,_,_,_,_,_,_ ), 0 , 4 , 0 , 2412 , 162, 95 ), // #642 - INST(Rdpid , X86R_Native , O(F30F00,C7,7,_,_,_,_,_ ), 0 , 86 , 0 , 2418 , 163, 96 ), // #643 - INST(Rdpmc , X86Op , O(000F00,33,_,_,_,_,_,_ ), 0 , 4 , 0 , 2424 , 162, 0 ), // #644 - INST(Rdrand , X86M , O(000F00,C7,6,_,x,_,_,_ ), 0 , 73 , 0 , 2430 , 23 , 97 ), // #645 - INST(Rdseed , X86M , O(000F00,C7,7,_,x,_,_,_ ), 0 , 22 , 0 , 2437 , 23 , 98 ), // #646 - INST(Rdtsc , X86Op , O(000F00,31,_,_,_,_,_,_ ), 0 , 4 , 0 , 2444 , 28 , 99 ), // #647 - INST(Rdtscp , X86Op , O(000F01,F9,_,_,_,_,_,_ ), 0 , 21 , 0 , 2450 , 162, 100), // #648 - INST(Ret , X86Ret , O(000000,C2,_,_,_,_,_,_ ), 0 , 0 , 0 , 2883 , 164, 0 ), // #649 - INST(Rol , X86Rot , O(000000,D0,0,_,x,_,_,_ ), 0 , 0 , 0 , 2457 , 160, 101), // #650 - INST(Ror , X86Rot , O(000000,D0,1,_,x,_,_,_ ), 0 , 29 , 0 , 2461 , 160, 101), // #651 - INST(Rorx , VexRmi_Wx , V(F20F3A,F0,_,0,x,_,_,_ ), 0 , 87 , 0 , 2465 , 165, 78 ), // #652 - INST(Roundpd , ExtRmi , O(660F3A,09,_,_,_,_,_,_ ), 0 , 8 , 0 , 9434 , 8 , 12 ), // #653 - INST(Roundps , ExtRmi , O(660F3A,08,_,_,_,_,_,_ ), 0 , 8 , 0 , 9443 , 8 , 12 ), // #654 - INST(Roundsd , ExtRmi , O(660F3A,0B,_,_,_,_,_,_ ), 0 , 8 , 0 , 9452 , 35 , 12 ), // #655 - INST(Roundss , ExtRmi , O(660F3A,0A,_,_,_,_,_,_ ), 0 , 8 , 0 , 9461 , 36 , 12 ), // #656 - INST(Rsm , X86Op , O(000F00,AA,_,_,_,_,_,_ ), 0 , 4 , 0 , 2470 , 75 , 1 ), // #657 - INST(Rsqrtps , ExtRm , O(000F00,52,_,_,_,_,_,_ ), 0 , 4 , 0 , 9558 , 5 , 5 ), // #658 - INST(Rsqrtss , ExtRm , O(F30F00,52,_,_,_,_,_,_ ), 0 , 6 , 0 , 9567 , 7 , 5 ), // #659 - INST(Sahf , X86Op , O(000000,9E,_,_,_,_,_,_ ), 0 , 0 , 0 , 2474 , 90 , 102), // #660 - INST(Sal , X86Rot , O(000000,D0,4,_,x,_,_,_ ), 0 , 9 , 0 , 2479 , 160, 1 ), // #661 - INST(Sar , X86Rot , O(000000,D0,7,_,x,_,_,_ ), 0 , 25 , 0 , 2483 , 160, 1 ), // #662 - INST(Sarx , VexRmv_Wx , V(F30F38,F7,_,0,x,_,_,_ ), 0 , 82 , 0 , 2487 , 13 , 78 ), // #663 - INST(Sbb , X86Arith , O(000000,18,3,_,x,_,_,_ ), 0 , 78 , 0 , 2492 , 166, 2 ), // #664 - INST(Scas , X86StrRm , O(000000,AE,_,_,_,_,_,_ ), 0 , 0 , 0 , 2496 , 167, 35 ), // #665 - INST(Seta , X86Set , O(000F00,97,_,_,_,_,_,_ ), 0 , 4 , 0 , 2501 , 168, 54 ), // #666 - INST(Setae , X86Set , O(000F00,93,_,_,_,_,_,_ ), 0 , 4 , 0 , 2506 , 168, 55 ), // #667 - INST(Setb , X86Set , O(000F00,92,_,_,_,_,_,_ ), 0 , 4 , 0 , 2512 , 168, 55 ), // #668 - INST(Setbe , X86Set , O(000F00,96,_,_,_,_,_,_ ), 0 , 4 , 0 , 2517 , 168, 54 ), // #669 - INST(Setc , X86Set , O(000F00,92,_,_,_,_,_,_ ), 0 , 4 , 0 , 2523 , 168, 55 ), // #670 - INST(Sete , X86Set , O(000F00,94,_,_,_,_,_,_ ), 0 , 4 , 0 , 2528 , 168, 56 ), // #671 - INST(Setg , X86Set , O(000F00,9F,_,_,_,_,_,_ ), 0 , 4 , 0 , 2533 , 168, 57 ), // #672 - INST(Setge , X86Set , O(000F00,9D,_,_,_,_,_,_ ), 0 , 4 , 0 , 2538 , 168, 58 ), // #673 - INST(Setl , X86Set , O(000F00,9C,_,_,_,_,_,_ ), 0 , 4 , 0 , 2544 , 168, 58 ), // #674 - INST(Setle , X86Set , O(000F00,9E,_,_,_,_,_,_ ), 0 , 4 , 0 , 2549 , 168, 57 ), // #675 - INST(Setna , X86Set , O(000F00,96,_,_,_,_,_,_ ), 0 , 4 , 0 , 2555 , 168, 54 ), // #676 - INST(Setnae , X86Set , O(000F00,92,_,_,_,_,_,_ ), 0 , 4 , 0 , 2561 , 168, 55 ), // #677 - INST(Setnb , X86Set , O(000F00,93,_,_,_,_,_,_ ), 0 , 4 , 0 , 2568 , 168, 55 ), // #678 - INST(Setnbe , X86Set , O(000F00,97,_,_,_,_,_,_ ), 0 , 4 , 0 , 2574 , 168, 54 ), // #679 - INST(Setnc , X86Set , O(000F00,93,_,_,_,_,_,_ ), 0 , 4 , 0 , 2581 , 168, 55 ), // #680 - INST(Setne , X86Set , O(000F00,95,_,_,_,_,_,_ ), 0 , 4 , 0 , 2587 , 168, 56 ), // #681 - INST(Setng , X86Set , O(000F00,9E,_,_,_,_,_,_ ), 0 , 4 , 0 , 2593 , 168, 57 ), // #682 - INST(Setnge , X86Set , O(000F00,9C,_,_,_,_,_,_ ), 0 , 4 , 0 , 2599 , 168, 58 ), // #683 - INST(Setnl , X86Set , O(000F00,9D,_,_,_,_,_,_ ), 0 , 4 , 0 , 2606 , 168, 58 ), // #684 - INST(Setnle , X86Set , O(000F00,9F,_,_,_,_,_,_ ), 0 , 4 , 0 , 2612 , 168, 57 ), // #685 - INST(Setno , X86Set , O(000F00,91,_,_,_,_,_,_ ), 0 , 4 , 0 , 2619 , 168, 52 ), // #686 - INST(Setnp , X86Set , O(000F00,9B,_,_,_,_,_,_ ), 0 , 4 , 0 , 2625 , 168, 59 ), // #687 - INST(Setns , X86Set , O(000F00,99,_,_,_,_,_,_ ), 0 , 4 , 0 , 2631 , 168, 60 ), // #688 - INST(Setnz , X86Set , O(000F00,95,_,_,_,_,_,_ ), 0 , 4 , 0 , 2637 , 168, 56 ), // #689 - INST(Seto , X86Set , O(000F00,90,_,_,_,_,_,_ ), 0 , 4 , 0 , 2643 , 168, 52 ), // #690 - INST(Setp , X86Set , O(000F00,9A,_,_,_,_,_,_ ), 0 , 4 , 0 , 2648 , 168, 59 ), // #691 - INST(Setpe , X86Set , O(000F00,9A,_,_,_,_,_,_ ), 0 , 4 , 0 , 2653 , 168, 59 ), // #692 - INST(Setpo , X86Set , O(000F00,9B,_,_,_,_,_,_ ), 0 , 4 , 0 , 2659 , 168, 59 ), // #693 - INST(Sets , X86Set , O(000F00,98,_,_,_,_,_,_ ), 0 , 4 , 0 , 2665 , 168, 60 ), // #694 - INST(Setz , X86Set , O(000F00,94,_,_,_,_,_,_ ), 0 , 4 , 0 , 2670 , 168, 56 ), // #695 - INST(Sfence , X86Fence , O(000F00,AE,7,_,_,_,_,_ ), 0 , 22 , 0 , 2675 , 30 , 71 ), // #696 - INST(Sgdt , X86M_Only , O(000F00,01,0,_,_,_,_,_ ), 0 , 4 , 0 , 2682 , 31 , 0 ), // #697 - INST(Sha1msg1 , ExtRm , O(000F38,C9,_,_,_,_,_,_ ), 0 , 76 , 0 , 2687 , 5 , 103), // #698 - INST(Sha1msg2 , ExtRm , O(000F38,CA,_,_,_,_,_,_ ), 0 , 76 , 0 , 2696 , 5 , 103), // #699 - INST(Sha1nexte , ExtRm , O(000F38,C8,_,_,_,_,_,_ ), 0 , 76 , 0 , 2705 , 5 , 103), // #700 - INST(Sha1rnds4 , ExtRmi , O(000F3A,CC,_,_,_,_,_,_ ), 0 , 79 , 0 , 2715 , 8 , 103), // #701 - INST(Sha256msg1 , ExtRm , O(000F38,CC,_,_,_,_,_,_ ), 0 , 76 , 0 , 2725 , 5 , 103), // #702 - INST(Sha256msg2 , ExtRm , O(000F38,CD,_,_,_,_,_,_ ), 0 , 76 , 0 , 2736 , 5 , 103), // #703 - INST(Sha256rnds2 , ExtRm_XMM0 , O(000F38,CB,_,_,_,_,_,_ ), 0 , 76 , 0 , 2747 , 15 , 103), // #704 - INST(Shl , X86Rot , O(000000,D0,4,_,x,_,_,_ ), 0 , 9 , 0 , 2759 , 160, 1 ), // #705 - INST(Shld , X86ShldShrd , O(000F00,A4,_,_,x,_,_,_ ), 0 , 4 , 0 , 8603 , 169, 1 ), // #706 - INST(Shlx , VexRmv_Wx , V(660F38,F7,_,0,x,_,_,_ ), 0 , 88 , 0 , 2763 , 13 , 78 ), // #707 - INST(Shr , X86Rot , O(000000,D0,5,_,x,_,_,_ ), 0 , 58 , 0 , 2768 , 160, 1 ), // #708 - INST(Shrd , X86ShldShrd , O(000F00,AC,_,_,x,_,_,_ ), 0 , 4 , 0 , 2772 , 169, 1 ), // #709 - INST(Shrx , VexRmv_Wx , V(F20F38,F7,_,0,x,_,_,_ ), 0 , 77 , 0 , 2777 , 13 , 78 ), // #710 - INST(Shufpd , ExtRmi , O(660F00,C6,_,_,_,_,_,_ ), 0 , 3 , 0 , 9828 , 8 , 4 ), // #711 - INST(Shufps , ExtRmi , O(000F00,C6,_,_,_,_,_,_ ), 0 , 4 , 0 , 9836 , 8 , 5 ), // #712 - INST(Sidt , X86M_Only , O(000F00,01,1,_,_,_,_,_ ), 0 , 27 , 0 , 2782 , 31 , 0 ), // #713 - INST(Skinit , X86Op_xAX , O(000F01,DE,_,_,_,_,_,_ ), 0 , 21 , 0 , 2787 , 50 , 104), // #714 - INST(Sldt , X86M , O(000F00,00,0,_,_,_,_,_ ), 0 , 4 , 0 , 2794 , 170, 0 ), // #715 - INST(Slwpcb , VexR_Wx , V(XOP_M9,12,1,0,x,_,_,_ ), 0 , 11 , 0 , 2799 , 98 , 68 ), // #716 - INST(Smsw , X86M , O(000F00,01,4,_,_,_,_,_ ), 0 , 89 , 0 , 2806 , 170, 0 ), // #717 - INST(Sqrtpd , ExtRm , O(660F00,51,_,_,_,_,_,_ ), 0 , 3 , 0 , 9844 , 5 , 4 ), // #718 - INST(Sqrtps , ExtRm , O(000F00,51,_,_,_,_,_,_ ), 0 , 4 , 0 , 9559 , 5 , 5 ), // #719 - INST(Sqrtsd , ExtRm , O(F20F00,51,_,_,_,_,_,_ ), 0 , 5 , 0 , 9860 , 6 , 4 ), // #720 - INST(Sqrtss , ExtRm , O(F30F00,51,_,_,_,_,_,_ ), 0 , 6 , 0 , 9568 , 7 , 5 ), // #721 - INST(Stac , X86Op , O(000F01,CB,_,_,_,_,_,_ ), 0 , 21 , 0 , 2811 , 30 , 16 ), // #722 - INST(Stc , X86Op , O(000000,F9,_,_,_,_,_,_ ), 0 , 0 , 0 , 2816 , 30 , 17 ), // #723 - INST(Std , X86Op , O(000000,FD,_,_,_,_,_,_ ), 0 , 0 , 0 , 6586 , 30 , 18 ), // #724 - INST(Stgi , X86Op , O(000F01,DC,_,_,_,_,_,_ ), 0 , 21 , 0 , 2820 , 30 , 104), // #725 - INST(Sti , X86Op , O(000000,FB,_,_,_,_,_,_ ), 0 , 0 , 0 , 2825 , 30 , 23 ), // #726 - INST(Stmxcsr , X86M_Only , O(000F00,AE,3,_,_,_,_,_ ), 0 , 71 , 0 , 9876 , 93 , 5 ), // #727 - INST(Stos , X86StrMr , O(000000,AA,_,_,_,_,_,_ ), 0 , 0 , 0 , 2829 , 171, 69 ), // #728 - INST(Str , X86M , O(000F00,00,1,_,_,_,_,_ ), 0 , 27 , 0 , 2834 , 170, 0 ), // #729 - INST(Sub , X86Arith , O(000000,28,5,_,x,_,_,_ ), 0 , 58 , 0 , 836 , 166, 1 ), // #730 - INST(Subpd , ExtRm , O(660F00,5C,_,_,_,_,_,_ ), 0 , 3 , 0 , 4556 , 5 , 4 ), // #731 - INST(Subps , ExtRm , O(000F00,5C,_,_,_,_,_,_ ), 0 , 4 , 0 , 4568 , 5 , 5 ), // #732 - INST(Subsd , ExtRm , O(F20F00,5C,_,_,_,_,_,_ ), 0 , 5 , 0 , 5244 , 6 , 4 ), // #733 - INST(Subss , ExtRm , O(F30F00,5C,_,_,_,_,_,_ ), 0 , 6 , 0 , 5254 , 7 , 5 ), // #734 - INST(Swapgs , X86Op , O(000F01,F8,_,_,_,_,_,_ ), 0 , 21 , 0 , 2838 , 155, 0 ), // #735 - INST(Syscall , X86Op , O(000F00,05,_,_,_,_,_,_ ), 0 , 4 , 0 , 2845 , 155, 0 ), // #736 - INST(Sysenter , X86Op , O(000F00,34,_,_,_,_,_,_ ), 0 , 4 , 0 , 2853 , 30 , 0 ), // #737 - INST(Sysexit , X86Op , O(000F00,35,_,_,_,_,_,_ ), 0 , 4 , 0 , 2862 , 30 , 0 ), // #738 - INST(Sysexit64 , X86Op , O(000F00,35,_,_,_,_,_,_ ), 0 , 4 , 0 , 2870 , 30 , 0 ), // #739 - INST(Sysret , X86Op , O(000F00,07,_,_,_,_,_,_ ), 0 , 4 , 0 , 2880 , 155, 0 ), // #740 - INST(Sysret64 , X86Op , O(000F00,07,_,_,_,_,_,_ ), 0 , 4 , 0 , 2887 , 155, 0 ), // #741 - INST(T1mskc , VexVm_Wx , V(XOP_M9,01,7,0,x,_,_,_ ), 0 , 90 , 0 , 2896 , 14 , 11 ), // #742 - INST(Test , X86Test , O(000000,84,_,_,x,_,_,_ ), O(000000,F6,_,_,x,_,_,_ ), 0 , 78 , 9032 , 172, 1 ), // #743 - INST(Tzcnt , X86Rm_Raw66H , O(F30F00,BC,_,_,x,_,_,_ ), 0 , 6 , 0 , 2903 , 22 , 9 ), // #744 - INST(Tzmsk , VexVm_Wx , V(XOP_M9,01,4,0,x,_,_,_ ), 0 , 91 , 0 , 2909 , 14 , 11 ), // #745 - INST(Ucomisd , ExtRm , O(660F00,2E,_,_,_,_,_,_ ), 0 , 3 , 0 , 9929 , 6 , 39 ), // #746 - INST(Ucomiss , ExtRm , O(000F00,2E,_,_,_,_,_,_ ), 0 , 4 , 0 , 9938 , 7 , 40 ), // #747 - INST(Ud2 , X86Op , O(000F00,0B,_,_,_,_,_,_ ), 0 , 4 , 0 , 2915 , 30 , 0 ), // #748 - INST(Unpckhpd , ExtRm , O(660F00,15,_,_,_,_,_,_ ), 0 , 3 , 0 , 9947 , 5 , 4 ), // #749 - INST(Unpckhps , ExtRm , O(000F00,15,_,_,_,_,_,_ ), 0 , 4 , 0 , 9957 , 5 , 5 ), // #750 - INST(Unpcklpd , ExtRm , O(660F00,14,_,_,_,_,_,_ ), 0 , 3 , 0 , 9967 , 5 , 4 ), // #751 - INST(Unpcklps , ExtRm , O(000F00,14,_,_,_,_,_,_ ), 0 , 4 , 0 , 9977 , 5 , 5 ), // #752 - INST(V4fmaddps , VexRm_T1_4X , E(F20F38,9A,_,2,_,0,2,T4X), 0 , 92 , 0 , 2919 , 173, 105), // #753 - INST(V4fmaddss , VexRm_T1_4X , E(F20F38,9B,_,2,_,0,2,T4X), 0 , 92 , 0 , 2929 , 174, 105), // #754 - INST(V4fnmaddps , VexRm_T1_4X , E(F20F38,AA,_,2,_,0,2,T4X), 0 , 92 , 0 , 2939 , 173, 105), // #755 - INST(V4fnmaddss , VexRm_T1_4X , E(F20F38,AB,_,2,_,0,2,T4X), 0 , 92 , 0 , 2950 , 174, 105), // #756 - INST(Vaddpd , VexRvm_Lx , V(660F00,58,_,x,I,1,4,FV ), 0 , 93 , 0 , 2961 , 175, 106), // #757 - INST(Vaddps , VexRvm_Lx , V(000F00,58,_,x,I,0,4,FV ), 0 , 94 , 0 , 2968 , 176, 106), // #758 - INST(Vaddsd , VexRvm , V(F20F00,58,_,I,I,1,3,T1S), 0 , 95 , 0 , 2975 , 177, 107), // #759 - INST(Vaddss , VexRvm , V(F30F00,58,_,I,I,0,2,T1S), 0 , 96 , 0 , 2982 , 178, 107), // #760 - INST(Vaddsubpd , VexRvm_Lx , V(660F00,D0,_,x,I,_,_,_ ), 0 , 63 , 0 , 2989 , 179, 108), // #761 - INST(Vaddsubps , VexRvm_Lx , V(F20F00,D0,_,x,I,_,_,_ ), 0 , 97 , 0 , 2999 , 179, 108), // #762 - INST(Vaesdec , VexRvm_Lx , V(660F38,DE,_,x,I,_,4,FVM), 0 , 98 , 0 , 3009 , 180, 109), // #763 - INST(Vaesdeclast , VexRvm_Lx , V(660F38,DF,_,x,I,_,4,FVM), 0 , 98 , 0 , 3017 , 180, 109), // #764 - INST(Vaesenc , VexRvm_Lx , V(660F38,DC,_,x,I,_,4,FVM), 0 , 98 , 0 , 3029 , 180, 109), // #765 - INST(Vaesenclast , VexRvm_Lx , V(660F38,DD,_,x,I,_,4,FVM), 0 , 98 , 0 , 3037 , 180, 109), // #766 - INST(Vaesimc , VexRm , V(660F38,DB,_,0,I,_,_,_ ), 0 , 88 , 0 , 3049 , 181, 110), // #767 - INST(Vaeskeygenassist , VexRmi , V(660F3A,DF,_,0,I,_,_,_ ), 0 , 67 , 0 , 3057 , 182, 110), // #768 - INST(Valignd , VexRvmi_Lx , E(660F3A,03,_,x,_,0,4,FV ), 0 , 99 , 0 , 3074 , 183, 111), // #769 - INST(Valignq , VexRvmi_Lx , E(660F3A,03,_,x,_,1,4,FV ), 0 , 100, 0 , 3082 , 184, 111), // #770 - INST(Vandnpd , VexRvm_Lx , V(660F00,55,_,x,I,1,4,FV ), 0 , 93 , 0 , 3090 , 185, 112), // #771 - INST(Vandnps , VexRvm_Lx , V(000F00,55,_,x,I,0,4,FV ), 0 , 94 , 0 , 3098 , 186, 112), // #772 - INST(Vandpd , VexRvm_Lx , V(660F00,54,_,x,I,1,4,FV ), 0 , 93 , 0 , 3106 , 187, 112), // #773 - INST(Vandps , VexRvm_Lx , V(000F00,54,_,x,I,0,4,FV ), 0 , 94 , 0 , 3113 , 188, 112), // #774 - INST(Vblendmb , VexRvm_Lx , E(660F38,66,_,x,_,0,4,FVM), 0 , 101, 0 , 3120 , 189, 113), // #775 - INST(Vblendmd , VexRvm_Lx , E(660F38,64,_,x,_,0,4,FV ), 0 , 102, 0 , 3129 , 190, 111), // #776 - INST(Vblendmpd , VexRvm_Lx , E(660F38,65,_,x,_,1,4,FV ), 0 , 103, 0 , 3138 , 191, 111), // #777 - INST(Vblendmps , VexRvm_Lx , E(660F38,65,_,x,_,0,4,FV ), 0 , 102, 0 , 3148 , 190, 111), // #778 - INST(Vblendmq , VexRvm_Lx , E(660F38,64,_,x,_,1,4,FV ), 0 , 103, 0 , 3158 , 191, 111), // #779 - INST(Vblendmw , VexRvm_Lx , E(660F38,66,_,x,_,1,4,FVM), 0 , 104, 0 , 3167 , 189, 113), // #780 - INST(Vblendpd , VexRvmi_Lx , V(660F3A,0D,_,x,I,_,_,_ ), 0 , 67 , 0 , 3176 , 192, 108), // #781 - INST(Vblendps , VexRvmi_Lx , V(660F3A,0C,_,x,I,_,_,_ ), 0 , 67 , 0 , 3185 , 192, 108), // #782 - INST(Vblendvpd , VexRvmr_Lx , V(660F3A,4B,_,x,0,_,_,_ ), 0 , 67 , 0 , 3194 , 193, 108), // #783 - INST(Vblendvps , VexRvmr_Lx , V(660F3A,4A,_,x,0,_,_,_ ), 0 , 67 , 0 , 3204 , 193, 108), // #784 - INST(Vbroadcastf128 , VexRm , V(660F38,1A,_,1,0,_,_,_ ), 0 , 105, 0 , 3214 , 194, 108), // #785 - INST(Vbroadcastf32x2 , VexRm_Lx , E(660F38,19,_,x,_,0,3,T2 ), 0 , 106, 0 , 3229 , 195, 114), // #786 - INST(Vbroadcastf32x4 , VexRm_Lx , E(660F38,1A,_,x,_,0,4,T4 ), 0 , 107, 0 , 3245 , 196, 63 ), // #787 - INST(Vbroadcastf32x8 , VexRm , E(660F38,1B,_,2,_,0,5,T8 ), 0 , 108, 0 , 3261 , 197, 61 ), // #788 - INST(Vbroadcastf64x2 , VexRm_Lx , E(660F38,1A,_,x,_,1,4,T2 ), 0 , 109, 0 , 3277 , 196, 114), // #789 - INST(Vbroadcastf64x4 , VexRm , E(660F38,1B,_,2,_,1,5,T4 ), 0 , 110, 0 , 3293 , 197, 63 ), // #790 - INST(Vbroadcasti128 , VexRm , V(660F38,5A,_,1,0,_,_,_ ), 0 , 105, 0 , 3309 , 194, 115), // #791 - INST(Vbroadcasti32x2 , VexRm_Lx , E(660F38,59,_,x,_,0,3,T2 ), 0 , 106, 0 , 3324 , 198, 114), // #792 - INST(Vbroadcasti32x4 , VexRm_Lx , E(660F38,5A,_,x,_,0,4,T4 ), 0 , 107, 0 , 3340 , 196, 111), // #793 - INST(Vbroadcasti32x8 , VexRm , E(660F38,5B,_,2,_,0,5,T8 ), 0 , 108, 0 , 3356 , 197, 61 ), // #794 - INST(Vbroadcasti64x2 , VexRm_Lx , E(660F38,5A,_,x,_,1,4,T2 ), 0 , 109, 0 , 3372 , 196, 114), // #795 - INST(Vbroadcasti64x4 , VexRm , E(660F38,5B,_,2,_,1,5,T4 ), 0 , 110, 0 , 3388 , 197, 63 ), // #796 - INST(Vbroadcastsd , VexRm_Lx , V(660F38,19,_,x,0,1,3,T1S), 0 , 111, 0 , 3404 , 199, 116), // #797 - INST(Vbroadcastss , VexRm_Lx , V(660F38,18,_,x,0,0,2,T1S), 0 , 112, 0 , 3417 , 200, 116), // #798 - INST(Vcmppd , VexRvmi_Lx , V(660F00,C2,_,x,I,1,4,FV ), 0 , 93 , 0 , 3430 , 201, 106), // #799 - INST(Vcmpps , VexRvmi_Lx , V(000F00,C2,_,x,I,0,4,FV ), 0 , 94 , 0 , 3437 , 202, 106), // #800 - INST(Vcmpsd , VexRvmi , V(F20F00,C2,_,I,I,1,3,T1S), 0 , 95 , 0 , 3444 , 203, 107), // #801 - INST(Vcmpss , VexRvmi , V(F30F00,C2,_,I,I,0,2,T1S), 0 , 96 , 0 , 3451 , 204, 107), // #802 - INST(Vcomisd , VexRm , V(660F00,2F,_,I,I,1,3,T1S), 0 , 113, 0 , 3458 , 205, 117), // #803 - INST(Vcomiss , VexRm , V(000F00,2F,_,I,I,0,2,T1S), 0 , 114, 0 , 3466 , 206, 117), // #804 - INST(Vcompresspd , VexMr_Lx , E(660F38,8A,_,x,_,1,3,T1S), 0 , 115, 0 , 3474 , 207, 111), // #805 - INST(Vcompressps , VexMr_Lx , E(660F38,8A,_,x,_,0,2,T1S), 0 , 116, 0 , 3486 , 207, 111), // #806 - INST(Vcvtdq2pd , VexRm_Lx , V(F30F00,E6,_,x,I,0,3,HV ), 0 , 117, 0 , 3498 , 208, 106), // #807 - INST(Vcvtdq2ps , VexRm_Lx , V(000F00,5B,_,x,I,0,4,FV ), 0 , 94 , 0 , 3508 , 209, 106), // #808 - INST(Vcvtne2ps2bf16 , VexRvm , E(F20F38,72,_,_,_,0,_,_ ), 0 , 118, 0 , 3518 , 190, 118), // #809 - INST(Vcvtneps2bf16 , VexRm , E(F30F38,72,_,_,_,0,_,_ ), 0 , 119, 0 , 3533 , 210, 118), // #810 - INST(Vcvtpd2dq , VexRm_Lx , V(F20F00,E6,_,x,I,1,4,FV ), 0 , 120, 0 , 3547 , 211, 106), // #811 - INST(Vcvtpd2ps , VexRm_Lx , V(660F00,5A,_,x,I,1,4,FV ), 0 , 93 , 0 , 3557 , 211, 106), // #812 - INST(Vcvtpd2qq , VexRm_Lx , E(660F00,7B,_,x,_,1,4,FV ), 0 , 121, 0 , 3567 , 212, 114), // #813 - INST(Vcvtpd2udq , VexRm_Lx , E(000F00,79,_,x,_,1,4,FV ), 0 , 122, 0 , 3577 , 213, 111), // #814 - INST(Vcvtpd2uqq , VexRm_Lx , E(660F00,79,_,x,_,1,4,FV ), 0 , 121, 0 , 3588 , 212, 114), // #815 - INST(Vcvtph2ps , VexRm_Lx , V(660F38,13,_,x,0,0,3,HVM), 0 , 123, 0 , 3599 , 214, 119), // #816 - INST(Vcvtps2dq , VexRm_Lx , V(660F00,5B,_,x,I,0,4,FV ), 0 , 124, 0 , 3609 , 209, 106), // #817 - INST(Vcvtps2pd , VexRm_Lx , V(000F00,5A,_,x,I,0,4,HV ), 0 , 125, 0 , 3619 , 215, 106), // #818 - INST(Vcvtps2ph , VexMri_Lx , V(660F3A,1D,_,x,0,0,3,HVM), 0 , 126, 0 , 3629 , 216, 119), // #819 - INST(Vcvtps2qq , VexRm_Lx , E(660F00,7B,_,x,_,0,3,HV ), 0 , 127, 0 , 3639 , 217, 114), // #820 - INST(Vcvtps2udq , VexRm_Lx , E(000F00,79,_,x,_,0,4,FV ), 0 , 128, 0 , 3649 , 218, 111), // #821 - INST(Vcvtps2uqq , VexRm_Lx , E(660F00,79,_,x,_,0,3,HV ), 0 , 127, 0 , 3660 , 217, 114), // #822 - INST(Vcvtqq2pd , VexRm_Lx , E(F30F00,E6,_,x,_,1,4,FV ), 0 , 129, 0 , 3671 , 212, 114), // #823 - INST(Vcvtqq2ps , VexRm_Lx , E(000F00,5B,_,x,_,1,4,FV ), 0 , 122, 0 , 3681 , 213, 114), // #824 - INST(Vcvtsd2si , VexRm_Wx , V(F20F00,2D,_,I,x,x,3,T1F), 0 , 130, 0 , 3691 , 219, 107), // #825 - INST(Vcvtsd2ss , VexRvm , V(F20F00,5A,_,I,I,1,3,T1S), 0 , 95 , 0 , 3701 , 177, 107), // #826 - INST(Vcvtsd2usi , VexRm_Wx , E(F20F00,79,_,I,_,x,3,T1F), 0 , 131, 0 , 3711 , 220, 63 ), // #827 - INST(Vcvtsi2sd , VexRvm_Wx , V(F20F00,2A,_,I,x,x,2,T1W), 0 , 132, 0 , 3722 , 221, 107), // #828 - INST(Vcvtsi2ss , VexRvm_Wx , V(F30F00,2A,_,I,x,x,2,T1W), 0 , 133, 0 , 3732 , 221, 107), // #829 - INST(Vcvtss2sd , VexRvm , V(F30F00,5A,_,I,I,0,2,T1S), 0 , 96 , 0 , 3742 , 222, 107), // #830 - INST(Vcvtss2si , VexRm_Wx , V(F30F00,2D,_,I,x,x,2,T1F), 0 , 134, 0 , 3752 , 223, 107), // #831 - INST(Vcvtss2usi , VexRm_Wx , E(F30F00,79,_,I,_,x,2,T1F), 0 , 135, 0 , 3762 , 224, 63 ), // #832 - INST(Vcvttpd2dq , VexRm_Lx , V(660F00,E6,_,x,I,1,4,FV ), 0 , 93 , 0 , 3773 , 225, 106), // #833 - INST(Vcvttpd2qq , VexRm_Lx , E(660F00,7A,_,x,_,1,4,FV ), 0 , 121, 0 , 3784 , 226, 111), // #834 - INST(Vcvttpd2udq , VexRm_Lx , E(000F00,78,_,x,_,1,4,FV ), 0 , 122, 0 , 3795 , 227, 111), // #835 - INST(Vcvttpd2uqq , VexRm_Lx , E(660F00,78,_,x,_,1,4,FV ), 0 , 121, 0 , 3807 , 226, 114), // #836 - INST(Vcvttps2dq , VexRm_Lx , V(F30F00,5B,_,x,I,0,4,FV ), 0 , 136, 0 , 3819 , 228, 106), // #837 - INST(Vcvttps2qq , VexRm_Lx , E(660F00,7A,_,x,_,0,3,HV ), 0 , 127, 0 , 3830 , 229, 114), // #838 - INST(Vcvttps2udq , VexRm_Lx , E(000F00,78,_,x,_,0,4,FV ), 0 , 128, 0 , 3841 , 230, 111), // #839 - INST(Vcvttps2uqq , VexRm_Lx , E(660F00,78,_,x,_,0,3,HV ), 0 , 127, 0 , 3853 , 229, 114), // #840 - INST(Vcvttsd2si , VexRm_Wx , V(F20F00,2C,_,I,x,x,3,T1F), 0 , 130, 0 , 3865 , 231, 107), // #841 - INST(Vcvttsd2usi , VexRm_Wx , E(F20F00,78,_,I,_,x,3,T1F), 0 , 131, 0 , 3876 , 232, 63 ), // #842 - INST(Vcvttss2si , VexRm_Wx , V(F30F00,2C,_,I,x,x,2,T1F), 0 , 134, 0 , 3888 , 233, 107), // #843 - INST(Vcvttss2usi , VexRm_Wx , E(F30F00,78,_,I,_,x,2,T1F), 0 , 135, 0 , 3899 , 234, 63 ), // #844 - INST(Vcvtudq2pd , VexRm_Lx , E(F30F00,7A,_,x,_,0,3,HV ), 0 , 137, 0 , 3911 , 235, 111), // #845 - INST(Vcvtudq2ps , VexRm_Lx , E(F20F00,7A,_,x,_,0,4,FV ), 0 , 138, 0 , 3922 , 218, 111), // #846 - INST(Vcvtuqq2pd , VexRm_Lx , E(F30F00,7A,_,x,_,1,4,FV ), 0 , 129, 0 , 3933 , 212, 114), // #847 - INST(Vcvtuqq2ps , VexRm_Lx , E(F20F00,7A,_,x,_,1,4,FV ), 0 , 139, 0 , 3944 , 213, 114), // #848 - INST(Vcvtusi2sd , VexRvm_Wx , E(F20F00,7B,_,I,_,x,2,T1W), 0 , 140, 0 , 3955 , 236, 63 ), // #849 - INST(Vcvtusi2ss , VexRvm_Wx , E(F30F00,7B,_,I,_,x,2,T1W), 0 , 141, 0 , 3966 , 236, 63 ), // #850 - INST(Vdbpsadbw , VexRvmi_Lx , E(660F3A,42,_,x,_,0,4,FVM), 0 , 142, 0 , 3977 , 237, 113), // #851 - INST(Vdivpd , VexRvm_Lx , V(660F00,5E,_,x,I,1,4,FV ), 0 , 93 , 0 , 3987 , 175, 106), // #852 - INST(Vdivps , VexRvm_Lx , V(000F00,5E,_,x,I,0,4,FV ), 0 , 94 , 0 , 3994 , 176, 106), // #853 - INST(Vdivsd , VexRvm , V(F20F00,5E,_,I,I,1,3,T1S), 0 , 95 , 0 , 4001 , 177, 107), // #854 - INST(Vdivss , VexRvm , V(F30F00,5E,_,I,I,0,2,T1S), 0 , 96 , 0 , 4008 , 178, 107), // #855 - INST(Vdpbf16ps , VexRvm , E(F30F38,52,_,_,_,0,_,_ ), 0 , 119, 0 , 4015 , 190, 118), // #856 - INST(Vdppd , VexRvmi_Lx , V(660F3A,41,_,x,I,_,_,_ ), 0 , 67 , 0 , 4025 , 238, 108), // #857 - INST(Vdpps , VexRvmi_Lx , V(660F3A,40,_,x,I,_,_,_ ), 0 , 67 , 0 , 4031 , 192, 108), // #858 - INST(Verr , X86M_NoSize , O(000F00,00,4,_,_,_,_,_ ), 0 , 89 , 0 , 4037 , 97 , 10 ), // #859 - INST(Verw , X86M_NoSize , O(000F00,00,5,_,_,_,_,_ ), 0 , 70 , 0 , 4042 , 97 , 10 ), // #860 - INST(Vexp2pd , VexRm , E(660F38,C8,_,2,_,1,4,FV ), 0 , 143, 0 , 4047 , 239, 120), // #861 - INST(Vexp2ps , VexRm , E(660F38,C8,_,2,_,0,4,FV ), 0 , 144, 0 , 4055 , 240, 120), // #862 - INST(Vexpandpd , VexRm_Lx , E(660F38,88,_,x,_,1,3,T1S), 0 , 115, 0 , 4063 , 241, 111), // #863 - INST(Vexpandps , VexRm_Lx , E(660F38,88,_,x,_,0,2,T1S), 0 , 116, 0 , 4073 , 241, 111), // #864 - INST(Vextractf128 , VexMri , V(660F3A,19,_,1,0,_,_,_ ), 0 , 145, 0 , 4083 , 242, 108), // #865 - INST(Vextractf32x4 , VexMri_Lx , E(660F3A,19,_,x,_,0,4,T4 ), 0 , 146, 0 , 4096 , 243, 111), // #866 - INST(Vextractf32x8 , VexMri , E(660F3A,1B,_,2,_,0,5,T8 ), 0 , 147, 0 , 4110 , 244, 61 ), // #867 - INST(Vextractf64x2 , VexMri_Lx , E(660F3A,19,_,x,_,1,4,T2 ), 0 , 148, 0 , 4124 , 243, 114), // #868 - INST(Vextractf64x4 , VexMri , E(660F3A,1B,_,2,_,1,5,T4 ), 0 , 149, 0 , 4138 , 244, 63 ), // #869 - INST(Vextracti128 , VexMri , V(660F3A,39,_,1,0,_,_,_ ), 0 , 145, 0 , 4152 , 242, 115), // #870 - INST(Vextracti32x4 , VexMri_Lx , E(660F3A,39,_,x,_,0,4,T4 ), 0 , 146, 0 , 4165 , 243, 111), // #871 - INST(Vextracti32x8 , VexMri , E(660F3A,3B,_,2,_,0,5,T8 ), 0 , 147, 0 , 4179 , 244, 61 ), // #872 - INST(Vextracti64x2 , VexMri_Lx , E(660F3A,39,_,x,_,1,4,T2 ), 0 , 148, 0 , 4193 , 243, 114), // #873 - INST(Vextracti64x4 , VexMri , E(660F3A,3B,_,2,_,1,5,T4 ), 0 , 149, 0 , 4207 , 244, 63 ), // #874 - INST(Vextractps , VexMri , V(660F3A,17,_,0,I,I,2,T1S), 0 , 150, 0 , 4221 , 245, 107), // #875 - INST(Vfixupimmpd , VexRvmi_Lx , E(660F3A,54,_,x,_,1,4,FV ), 0 , 100, 0 , 4232 , 246, 111), // #876 - INST(Vfixupimmps , VexRvmi_Lx , E(660F3A,54,_,x,_,0,4,FV ), 0 , 99 , 0 , 4244 , 247, 111), // #877 - INST(Vfixupimmsd , VexRvmi , E(660F3A,55,_,I,_,1,3,T1S), 0 , 151, 0 , 4256 , 248, 63 ), // #878 - INST(Vfixupimmss , VexRvmi , E(660F3A,55,_,I,_,0,2,T1S), 0 , 152, 0 , 4268 , 249, 63 ), // #879 - INST(Vfmadd132pd , VexRvm_Lx , V(660F38,98,_,x,1,1,4,FV ), 0 , 153, 0 , 4280 , 175, 121), // #880 - INST(Vfmadd132ps , VexRvm_Lx , V(660F38,98,_,x,0,0,4,FV ), 0 , 154, 0 , 4292 , 176, 121), // #881 - INST(Vfmadd132sd , VexRvm , V(660F38,99,_,I,1,1,3,T1S), 0 , 155, 0 , 4304 , 177, 122), // #882 - INST(Vfmadd132ss , VexRvm , V(660F38,99,_,I,0,0,2,T1S), 0 , 112, 0 , 4316 , 178, 122), // #883 - INST(Vfmadd213pd , VexRvm_Lx , V(660F38,A8,_,x,1,1,4,FV ), 0 , 153, 0 , 4328 , 175, 121), // #884 - INST(Vfmadd213ps , VexRvm_Lx , V(660F38,A8,_,x,0,0,4,FV ), 0 , 154, 0 , 4340 , 176, 121), // #885 - INST(Vfmadd213sd , VexRvm , V(660F38,A9,_,I,1,1,3,T1S), 0 , 155, 0 , 4352 , 177, 122), // #886 - INST(Vfmadd213ss , VexRvm , V(660F38,A9,_,I,0,0,2,T1S), 0 , 112, 0 , 4364 , 178, 122), // #887 - INST(Vfmadd231pd , VexRvm_Lx , V(660F38,B8,_,x,1,1,4,FV ), 0 , 153, 0 , 4376 , 175, 121), // #888 - INST(Vfmadd231ps , VexRvm_Lx , V(660F38,B8,_,x,0,0,4,FV ), 0 , 154, 0 , 4388 , 176, 121), // #889 - INST(Vfmadd231sd , VexRvm , V(660F38,B9,_,I,1,1,3,T1S), 0 , 155, 0 , 4400 , 177, 122), // #890 - INST(Vfmadd231ss , VexRvm , V(660F38,B9,_,I,0,0,2,T1S), 0 , 112, 0 , 4412 , 178, 122), // #891 - INST(Vfmaddpd , Fma4_Lx , V(660F3A,69,_,x,x,_,_,_ ), 0 , 67 , 0 , 4424 , 250, 123), // #892 - INST(Vfmaddps , Fma4_Lx , V(660F3A,68,_,x,x,_,_,_ ), 0 , 67 , 0 , 4433 , 250, 123), // #893 - INST(Vfmaddsd , Fma4 , V(660F3A,6B,_,0,x,_,_,_ ), 0 , 67 , 0 , 4442 , 251, 123), // #894 - INST(Vfmaddss , Fma4 , V(660F3A,6A,_,0,x,_,_,_ ), 0 , 67 , 0 , 4451 , 252, 123), // #895 - INST(Vfmaddsub132pd , VexRvm_Lx , V(660F38,96,_,x,1,1,4,FV ), 0 , 153, 0 , 4460 , 175, 121), // #896 - INST(Vfmaddsub132ps , VexRvm_Lx , V(660F38,96,_,x,0,0,4,FV ), 0 , 154, 0 , 4475 , 176, 121), // #897 - INST(Vfmaddsub213pd , VexRvm_Lx , V(660F38,A6,_,x,1,1,4,FV ), 0 , 153, 0 , 4490 , 175, 121), // #898 - INST(Vfmaddsub213ps , VexRvm_Lx , V(660F38,A6,_,x,0,0,4,FV ), 0 , 154, 0 , 4505 , 176, 121), // #899 - INST(Vfmaddsub231pd , VexRvm_Lx , V(660F38,B6,_,x,1,1,4,FV ), 0 , 153, 0 , 4520 , 175, 121), // #900 - INST(Vfmaddsub231ps , VexRvm_Lx , V(660F38,B6,_,x,0,0,4,FV ), 0 , 154, 0 , 4535 , 176, 121), // #901 - INST(Vfmaddsubpd , Fma4_Lx , V(660F3A,5D,_,x,x,_,_,_ ), 0 , 67 , 0 , 4550 , 250, 123), // #902 - INST(Vfmaddsubps , Fma4_Lx , V(660F3A,5C,_,x,x,_,_,_ ), 0 , 67 , 0 , 4562 , 250, 123), // #903 - INST(Vfmsub132pd , VexRvm_Lx , V(660F38,9A,_,x,1,1,4,FV ), 0 , 153, 0 , 4574 , 175, 121), // #904 - INST(Vfmsub132ps , VexRvm_Lx , V(660F38,9A,_,x,0,0,4,FV ), 0 , 154, 0 , 4586 , 176, 121), // #905 - INST(Vfmsub132sd , VexRvm , V(660F38,9B,_,I,1,1,3,T1S), 0 , 155, 0 , 4598 , 177, 122), // #906 - INST(Vfmsub132ss , VexRvm , V(660F38,9B,_,I,0,0,2,T1S), 0 , 112, 0 , 4610 , 178, 122), // #907 - INST(Vfmsub213pd , VexRvm_Lx , V(660F38,AA,_,x,1,1,4,FV ), 0 , 153, 0 , 4622 , 175, 121), // #908 - INST(Vfmsub213ps , VexRvm_Lx , V(660F38,AA,_,x,0,0,4,FV ), 0 , 154, 0 , 4634 , 176, 121), // #909 - INST(Vfmsub213sd , VexRvm , V(660F38,AB,_,I,1,1,3,T1S), 0 , 155, 0 , 4646 , 177, 122), // #910 - INST(Vfmsub213ss , VexRvm , V(660F38,AB,_,I,0,0,2,T1S), 0 , 112, 0 , 4658 , 178, 122), // #911 - INST(Vfmsub231pd , VexRvm_Lx , V(660F38,BA,_,x,1,1,4,FV ), 0 , 153, 0 , 4670 , 175, 121), // #912 - INST(Vfmsub231ps , VexRvm_Lx , V(660F38,BA,_,x,0,0,4,FV ), 0 , 154, 0 , 4682 , 176, 121), // #913 - INST(Vfmsub231sd , VexRvm , V(660F38,BB,_,I,1,1,3,T1S), 0 , 155, 0 , 4694 , 177, 122), // #914 - INST(Vfmsub231ss , VexRvm , V(660F38,BB,_,I,0,0,2,T1S), 0 , 112, 0 , 4706 , 178, 122), // #915 - INST(Vfmsubadd132pd , VexRvm_Lx , V(660F38,97,_,x,1,1,4,FV ), 0 , 153, 0 , 4718 , 175, 121), // #916 - INST(Vfmsubadd132ps , VexRvm_Lx , V(660F38,97,_,x,0,0,4,FV ), 0 , 154, 0 , 4733 , 176, 121), // #917 - INST(Vfmsubadd213pd , VexRvm_Lx , V(660F38,A7,_,x,1,1,4,FV ), 0 , 153, 0 , 4748 , 175, 121), // #918 - INST(Vfmsubadd213ps , VexRvm_Lx , V(660F38,A7,_,x,0,0,4,FV ), 0 , 154, 0 , 4763 , 176, 121), // #919 - INST(Vfmsubadd231pd , VexRvm_Lx , V(660F38,B7,_,x,1,1,4,FV ), 0 , 153, 0 , 4778 , 175, 121), // #920 - INST(Vfmsubadd231ps , VexRvm_Lx , V(660F38,B7,_,x,0,0,4,FV ), 0 , 154, 0 , 4793 , 176, 121), // #921 - INST(Vfmsubaddpd , Fma4_Lx , V(660F3A,5F,_,x,x,_,_,_ ), 0 , 67 , 0 , 4808 , 250, 123), // #922 - INST(Vfmsubaddps , Fma4_Lx , V(660F3A,5E,_,x,x,_,_,_ ), 0 , 67 , 0 , 4820 , 250, 123), // #923 - INST(Vfmsubpd , Fma4_Lx , V(660F3A,6D,_,x,x,_,_,_ ), 0 , 67 , 0 , 4832 , 250, 123), // #924 - INST(Vfmsubps , Fma4_Lx , V(660F3A,6C,_,x,x,_,_,_ ), 0 , 67 , 0 , 4841 , 250, 123), // #925 - INST(Vfmsubsd , Fma4 , V(660F3A,6F,_,0,x,_,_,_ ), 0 , 67 , 0 , 4850 , 251, 123), // #926 - INST(Vfmsubss , Fma4 , V(660F3A,6E,_,0,x,_,_,_ ), 0 , 67 , 0 , 4859 , 252, 123), // #927 - INST(Vfnmadd132pd , VexRvm_Lx , V(660F38,9C,_,x,1,1,4,FV ), 0 , 153, 0 , 4868 , 175, 121), // #928 - INST(Vfnmadd132ps , VexRvm_Lx , V(660F38,9C,_,x,0,0,4,FV ), 0 , 154, 0 , 4881 , 176, 121), // #929 - INST(Vfnmadd132sd , VexRvm , V(660F38,9D,_,I,1,1,3,T1S), 0 , 155, 0 , 4894 , 177, 122), // #930 - INST(Vfnmadd132ss , VexRvm , V(660F38,9D,_,I,0,0,2,T1S), 0 , 112, 0 , 4907 , 178, 122), // #931 - INST(Vfnmadd213pd , VexRvm_Lx , V(660F38,AC,_,x,1,1,4,FV ), 0 , 153, 0 , 4920 , 175, 121), // #932 - INST(Vfnmadd213ps , VexRvm_Lx , V(660F38,AC,_,x,0,0,4,FV ), 0 , 154, 0 , 4933 , 176, 121), // #933 - INST(Vfnmadd213sd , VexRvm , V(660F38,AD,_,I,1,1,3,T1S), 0 , 155, 0 , 4946 , 177, 122), // #934 - INST(Vfnmadd213ss , VexRvm , V(660F38,AD,_,I,0,0,2,T1S), 0 , 112, 0 , 4959 , 178, 122), // #935 - INST(Vfnmadd231pd , VexRvm_Lx , V(660F38,BC,_,x,1,1,4,FV ), 0 , 153, 0 , 4972 , 175, 121), // #936 - INST(Vfnmadd231ps , VexRvm_Lx , V(660F38,BC,_,x,0,0,4,FV ), 0 , 154, 0 , 4985 , 176, 121), // #937 - INST(Vfnmadd231sd , VexRvm , V(660F38,BC,_,I,1,1,3,T1S), 0 , 155, 0 , 4998 , 177, 122), // #938 - INST(Vfnmadd231ss , VexRvm , V(660F38,BC,_,I,0,0,2,T1S), 0 , 112, 0 , 5011 , 178, 122), // #939 - INST(Vfnmaddpd , Fma4_Lx , V(660F3A,79,_,x,x,_,_,_ ), 0 , 67 , 0 , 5024 , 250, 123), // #940 - INST(Vfnmaddps , Fma4_Lx , V(660F3A,78,_,x,x,_,_,_ ), 0 , 67 , 0 , 5034 , 250, 123), // #941 - INST(Vfnmaddsd , Fma4 , V(660F3A,7B,_,0,x,_,_,_ ), 0 , 67 , 0 , 5044 , 251, 123), // #942 - INST(Vfnmaddss , Fma4 , V(660F3A,7A,_,0,x,_,_,_ ), 0 , 67 , 0 , 5054 , 252, 123), // #943 - INST(Vfnmsub132pd , VexRvm_Lx , V(660F38,9E,_,x,1,1,4,FV ), 0 , 153, 0 , 5064 , 175, 121), // #944 - INST(Vfnmsub132ps , VexRvm_Lx , V(660F38,9E,_,x,0,0,4,FV ), 0 , 154, 0 , 5077 , 176, 121), // #945 - INST(Vfnmsub132sd , VexRvm , V(660F38,9F,_,I,1,1,3,T1S), 0 , 155, 0 , 5090 , 177, 122), // #946 - INST(Vfnmsub132ss , VexRvm , V(660F38,9F,_,I,0,0,2,T1S), 0 , 112, 0 , 5103 , 178, 122), // #947 - INST(Vfnmsub213pd , VexRvm_Lx , V(660F38,AE,_,x,1,1,4,FV ), 0 , 153, 0 , 5116 , 175, 121), // #948 - INST(Vfnmsub213ps , VexRvm_Lx , V(660F38,AE,_,x,0,0,4,FV ), 0 , 154, 0 , 5129 , 176, 121), // #949 - INST(Vfnmsub213sd , VexRvm , V(660F38,AF,_,I,1,1,3,T1S), 0 , 155, 0 , 5142 , 177, 122), // #950 - INST(Vfnmsub213ss , VexRvm , V(660F38,AF,_,I,0,0,2,T1S), 0 , 112, 0 , 5155 , 178, 122), // #951 - INST(Vfnmsub231pd , VexRvm_Lx , V(660F38,BE,_,x,1,1,4,FV ), 0 , 153, 0 , 5168 , 175, 121), // #952 - INST(Vfnmsub231ps , VexRvm_Lx , V(660F38,BE,_,x,0,0,4,FV ), 0 , 154, 0 , 5181 , 176, 121), // #953 - INST(Vfnmsub231sd , VexRvm , V(660F38,BF,_,I,1,1,3,T1S), 0 , 155, 0 , 5194 , 177, 122), // #954 - INST(Vfnmsub231ss , VexRvm , V(660F38,BF,_,I,0,0,2,T1S), 0 , 112, 0 , 5207 , 178, 122), // #955 - INST(Vfnmsubpd , Fma4_Lx , V(660F3A,7D,_,x,x,_,_,_ ), 0 , 67 , 0 , 5220 , 250, 123), // #956 - INST(Vfnmsubps , Fma4_Lx , V(660F3A,7C,_,x,x,_,_,_ ), 0 , 67 , 0 , 5230 , 250, 123), // #957 - INST(Vfnmsubsd , Fma4 , V(660F3A,7F,_,0,x,_,_,_ ), 0 , 67 , 0 , 5240 , 251, 123), // #958 - INST(Vfnmsubss , Fma4 , V(660F3A,7E,_,0,x,_,_,_ ), 0 , 67 , 0 , 5250 , 252, 123), // #959 - INST(Vfpclasspd , VexRmi_Lx , E(660F3A,66,_,x,_,1,4,FV ), 0 , 100, 0 , 5260 , 253, 114), // #960 - INST(Vfpclassps , VexRmi_Lx , E(660F3A,66,_,x,_,0,4,FV ), 0 , 99 , 0 , 5271 , 254, 114), // #961 - INST(Vfpclasssd , VexRmi_Lx , E(660F3A,67,_,I,_,1,3,T1S), 0 , 151, 0 , 5282 , 255, 61 ), // #962 - INST(Vfpclassss , VexRmi_Lx , E(660F3A,67,_,I,_,0,2,T1S), 0 , 152, 0 , 5293 , 256, 61 ), // #963 - INST(Vfrczpd , VexRm_Lx , V(XOP_M9,81,_,x,0,_,_,_ ), 0 , 72 , 0 , 5304 , 257, 124), // #964 - INST(Vfrczps , VexRm_Lx , V(XOP_M9,80,_,x,0,_,_,_ ), 0 , 72 , 0 , 5312 , 257, 124), // #965 - INST(Vfrczsd , VexRm , V(XOP_M9,83,_,0,0,_,_,_ ), 0 , 72 , 0 , 5320 , 258, 124), // #966 - INST(Vfrczss , VexRm , V(XOP_M9,82,_,0,0,_,_,_ ), 0 , 72 , 0 , 5328 , 259, 124), // #967 - INST(Vgatherdpd , VexRmvRm_VM , V(660F38,92,_,x,1,_,_,_ ), V(660F38,92,_,x,_,1,3,T1S), 156, 79 , 5336 , 260, 125), // #968 - INST(Vgatherdps , VexRmvRm_VM , V(660F38,92,_,x,0,_,_,_ ), V(660F38,92,_,x,_,0,2,T1S), 88 , 80 , 5347 , 261, 125), // #969 - INST(Vgatherpf0dpd , VexM_VM , E(660F38,C6,1,2,_,1,3,T1S), 0 , 157, 0 , 5358 , 262, 126), // #970 - INST(Vgatherpf0dps , VexM_VM , E(660F38,C6,1,2,_,0,2,T1S), 0 , 158, 0 , 5372 , 263, 126), // #971 - INST(Vgatherpf0qpd , VexM_VM , E(660F38,C7,1,2,_,1,3,T1S), 0 , 157, 0 , 5386 , 264, 126), // #972 - INST(Vgatherpf0qps , VexM_VM , E(660F38,C7,1,2,_,0,2,T1S), 0 , 158, 0 , 5400 , 264, 126), // #973 - INST(Vgatherpf1dpd , VexM_VM , E(660F38,C6,2,2,_,1,3,T1S), 0 , 159, 0 , 5414 , 262, 126), // #974 - INST(Vgatherpf1dps , VexM_VM , E(660F38,C6,2,2,_,0,2,T1S), 0 , 160, 0 , 5428 , 263, 126), // #975 - INST(Vgatherpf1qpd , VexM_VM , E(660F38,C7,2,2,_,1,3,T1S), 0 , 159, 0 , 5442 , 264, 126), // #976 - INST(Vgatherpf1qps , VexM_VM , E(660F38,C7,2,2,_,0,2,T1S), 0 , 160, 0 , 5456 , 264, 126), // #977 - INST(Vgatherqpd , VexRmvRm_VM , V(660F38,93,_,x,1,_,_,_ ), V(660F38,93,_,x,_,1,3,T1S), 156, 81 , 5470 , 265, 125), // #978 - INST(Vgatherqps , VexRmvRm_VM , V(660F38,93,_,x,0,_,_,_ ), V(660F38,93,_,x,_,0,2,T1S), 88 , 82 , 5481 , 266, 125), // #979 - INST(Vgetexppd , VexRm_Lx , E(660F38,42,_,x,_,1,4,FV ), 0 , 103, 0 , 5492 , 226, 111), // #980 - INST(Vgetexpps , VexRm_Lx , E(660F38,42,_,x,_,0,4,FV ), 0 , 102, 0 , 5502 , 230, 111), // #981 - INST(Vgetexpsd , VexRvm , E(660F38,43,_,I,_,1,3,T1S), 0 , 115, 0 , 5512 , 267, 63 ), // #982 - INST(Vgetexpss , VexRvm , E(660F38,43,_,I,_,0,2,T1S), 0 , 116, 0 , 5522 , 268, 63 ), // #983 - INST(Vgetmantpd , VexRmi_Lx , E(660F3A,26,_,x,_,1,4,FV ), 0 , 100, 0 , 5532 , 269, 111), // #984 - INST(Vgetmantps , VexRmi_Lx , E(660F3A,26,_,x,_,0,4,FV ), 0 , 99 , 0 , 5543 , 270, 111), // #985 - INST(Vgetmantsd , VexRvmi , E(660F3A,27,_,I,_,1,3,T1S), 0 , 151, 0 , 5554 , 248, 63 ), // #986 - INST(Vgetmantss , VexRvmi , E(660F3A,27,_,I,_,0,2,T1S), 0 , 152, 0 , 5565 , 249, 63 ), // #987 - INST(Vgf2p8affineinvqb, VexRvmi_Lx , V(660F3A,CF,_,x,1,1,4,FV ), 0 , 161, 0 , 5576 , 271, 127), // #988 - INST(Vgf2p8affineqb , VexRvmi_Lx , V(660F3A,CE,_,x,1,1,4,FV ), 0 , 161, 0 , 5594 , 271, 127), // #989 - INST(Vgf2p8mulb , VexRvm_Lx , V(660F38,CF,_,x,0,0,4,FV ), 0 , 154, 0 , 5609 , 272, 127), // #990 - INST(Vhaddpd , VexRvm_Lx , V(660F00,7C,_,x,I,_,_,_ ), 0 , 63 , 0 , 5620 , 179, 108), // #991 - INST(Vhaddps , VexRvm_Lx , V(F20F00,7C,_,x,I,_,_,_ ), 0 , 97 , 0 , 5628 , 179, 108), // #992 - INST(Vhsubpd , VexRvm_Lx , V(660F00,7D,_,x,I,_,_,_ ), 0 , 63 , 0 , 5636 , 179, 108), // #993 - INST(Vhsubps , VexRvm_Lx , V(F20F00,7D,_,x,I,_,_,_ ), 0 , 97 , 0 , 5644 , 179, 108), // #994 - INST(Vinsertf128 , VexRvmi , V(660F3A,18,_,1,0,_,_,_ ), 0 , 145, 0 , 5652 , 273, 108), // #995 - INST(Vinsertf32x4 , VexRvmi_Lx , E(660F3A,18,_,x,_,0,4,T4 ), 0 , 146, 0 , 5664 , 274, 111), // #996 - INST(Vinsertf32x8 , VexRvmi , E(660F3A,1A,_,2,_,0,5,T8 ), 0 , 147, 0 , 5677 , 275, 61 ), // #997 - INST(Vinsertf64x2 , VexRvmi_Lx , E(660F3A,18,_,x,_,1,4,T2 ), 0 , 148, 0 , 5690 , 274, 114), // #998 - INST(Vinsertf64x4 , VexRvmi , E(660F3A,1A,_,2,_,1,5,T4 ), 0 , 149, 0 , 5703 , 275, 63 ), // #999 - INST(Vinserti128 , VexRvmi , V(660F3A,38,_,1,0,_,_,_ ), 0 , 145, 0 , 5716 , 273, 115), // #1000 - INST(Vinserti32x4 , VexRvmi_Lx , E(660F3A,38,_,x,_,0,4,T4 ), 0 , 146, 0 , 5728 , 274, 111), // #1001 - INST(Vinserti32x8 , VexRvmi , E(660F3A,3A,_,2,_,0,5,T8 ), 0 , 147, 0 , 5741 , 275, 61 ), // #1002 - INST(Vinserti64x2 , VexRvmi_Lx , E(660F3A,38,_,x,_,1,4,T2 ), 0 , 148, 0 , 5754 , 274, 114), // #1003 - INST(Vinserti64x4 , VexRvmi , E(660F3A,3A,_,2,_,1,5,T4 ), 0 , 149, 0 , 5767 , 275, 63 ), // #1004 - INST(Vinsertps , VexRvmi , V(660F3A,21,_,0,I,0,2,T1S), 0 , 150, 0 , 5780 , 276, 107), // #1005 - INST(Vlddqu , VexRm_Lx , V(F20F00,F0,_,x,I,_,_,_ ), 0 , 97 , 0 , 5790 , 277, 108), // #1006 - INST(Vldmxcsr , VexM , V(000F00,AE,2,0,I,_,_,_ ), 0 , 162, 0 , 5797 , 278, 108), // #1007 - INST(Vmaskmovdqu , VexRm_ZDI , V(660F00,F7,_,0,I,_,_,_ ), 0 , 63 , 0 , 5806 , 279, 108), // #1008 - INST(Vmaskmovpd , VexRvmMvr_Lx , V(660F38,2D,_,x,0,_,_,_ ), V(660F38,2F,_,x,0,_,_,_ ), 88 , 83 , 5818 , 280, 108), // #1009 - INST(Vmaskmovps , VexRvmMvr_Lx , V(660F38,2C,_,x,0,_,_,_ ), V(660F38,2E,_,x,0,_,_,_ ), 88 , 84 , 5829 , 280, 108), // #1010 - INST(Vmaxpd , VexRvm_Lx , V(660F00,5F,_,x,I,1,4,FV ), 0 , 93 , 0 , 5840 , 281, 106), // #1011 - INST(Vmaxps , VexRvm_Lx , V(000F00,5F,_,x,I,0,4,FV ), 0 , 94 , 0 , 5847 , 282, 106), // #1012 - INST(Vmaxsd , VexRvm , V(F20F00,5F,_,I,I,1,3,T1S), 0 , 95 , 0 , 5854 , 283, 106), // #1013 - INST(Vmaxss , VexRvm , V(F30F00,5F,_,I,I,0,2,T1S), 0 , 96 , 0 , 5861 , 222, 106), // #1014 - INST(Vmcall , X86Op , O(000F01,C1,_,_,_,_,_,_ ), 0 , 21 , 0 , 5868 , 30 , 53 ), // #1015 - INST(Vmclear , X86M_Only , O(660F00,C7,6,_,_,_,_,_ ), 0 , 24 , 0 , 5875 , 284, 53 ), // #1016 - INST(Vmfunc , X86Op , O(000F01,D4,_,_,_,_,_,_ ), 0 , 21 , 0 , 5883 , 30 , 53 ), // #1017 - INST(Vminpd , VexRvm_Lx , V(660F00,5D,_,x,I,1,4,FV ), 0 , 93 , 0 , 5890 , 281, 106), // #1018 - INST(Vminps , VexRvm_Lx , V(000F00,5D,_,x,I,0,4,FV ), 0 , 94 , 0 , 5897 , 282, 106), // #1019 - INST(Vminsd , VexRvm , V(F20F00,5D,_,I,I,1,3,T1S), 0 , 95 , 0 , 5904 , 283, 106), // #1020 - INST(Vminss , VexRvm , V(F30F00,5D,_,I,I,0,2,T1S), 0 , 96 , 0 , 5911 , 222, 106), // #1021 - INST(Vmlaunch , X86Op , O(000F01,C2,_,_,_,_,_,_ ), 0 , 21 , 0 , 5918 , 30 , 53 ), // #1022 - INST(Vmload , X86Op_xAX , O(000F01,DA,_,_,_,_,_,_ ), 0 , 21 , 0 , 5927 , 285, 22 ), // #1023 - INST(Vmmcall , X86Op , O(000F01,D9,_,_,_,_,_,_ ), 0 , 21 , 0 , 5934 , 30 , 22 ), // #1024 - INST(Vmovapd , VexRmMr_Lx , V(660F00,28,_,x,I,1,4,FVM), V(660F00,29,_,x,I,1,4,FVM), 163, 85 , 5942 , 286, 106), // #1025 - INST(Vmovaps , VexRmMr_Lx , V(000F00,28,_,x,I,0,4,FVM), V(000F00,29,_,x,I,0,4,FVM), 164, 86 , 5950 , 286, 106), // #1026 - INST(Vmovd , VexMovdMovq , V(660F00,6E,_,0,0,0,2,T1S), V(660F00,7E,_,0,0,0,2,T1S), 165, 87 , 5958 , 287, 107), // #1027 - INST(Vmovddup , VexRm_Lx , V(F20F00,12,_,x,I,1,3,DUP), 0 , 166, 0 , 5964 , 288, 106), // #1028 - INST(Vmovdqa , VexRmMr_Lx , V(660F00,6F,_,x,I,_,_,_ ), V(660F00,7F,_,x,I,_,_,_ ), 63 , 88 , 5973 , 289, 108), // #1029 - INST(Vmovdqa32 , VexRmMr_Lx , E(660F00,6F,_,x,_,0,4,FVM), E(660F00,7F,_,x,_,0,4,FVM), 167, 89 , 5981 , 290, 111), // #1030 - INST(Vmovdqa64 , VexRmMr_Lx , E(660F00,6F,_,x,_,1,4,FVM), E(660F00,7F,_,x,_,1,4,FVM), 168, 90 , 5991 , 290, 111), // #1031 - INST(Vmovdqu , VexRmMr_Lx , V(F30F00,6F,_,x,I,_,_,_ ), V(F30F00,7F,_,x,I,_,_,_ ), 169, 91 , 6001 , 289, 108), // #1032 - INST(Vmovdqu16 , VexRmMr_Lx , E(F20F00,6F,_,x,_,1,4,FVM), E(F20F00,7F,_,x,_,1,4,FVM), 170, 92 , 6009 , 290, 113), // #1033 - INST(Vmovdqu32 , VexRmMr_Lx , E(F30F00,6F,_,x,_,0,4,FVM), E(F30F00,7F,_,x,_,0,4,FVM), 171, 93 , 6019 , 290, 111), // #1034 - INST(Vmovdqu64 , VexRmMr_Lx , E(F30F00,6F,_,x,_,1,4,FVM), E(F30F00,7F,_,x,_,1,4,FVM), 172, 94 , 6029 , 290, 111), // #1035 - INST(Vmovdqu8 , VexRmMr_Lx , E(F20F00,6F,_,x,_,0,4,FVM), E(F20F00,7F,_,x,_,0,4,FVM), 173, 95 , 6039 , 290, 113), // #1036 - INST(Vmovhlps , VexRvm , V(000F00,12,_,0,I,0,_,_ ), 0 , 66 , 0 , 6048 , 291, 107), // #1037 - INST(Vmovhpd , VexRvmMr , V(660F00,16,_,0,I,1,3,T1S), V(660F00,17,_,0,I,1,3,T1S), 113, 96 , 6057 , 292, 107), // #1038 - INST(Vmovhps , VexRvmMr , V(000F00,16,_,0,I,0,3,T2 ), V(000F00,17,_,0,I,0,3,T2 ), 174, 97 , 6065 , 292, 107), // #1039 - INST(Vmovlhps , VexRvm , V(000F00,16,_,0,I,0,_,_ ), 0 , 66 , 0 , 6073 , 291, 107), // #1040 - INST(Vmovlpd , VexRvmMr , V(660F00,12,_,0,I,1,3,T1S), V(660F00,13,_,0,I,1,3,T1S), 113, 98 , 6082 , 292, 107), // #1041 - INST(Vmovlps , VexRvmMr , V(000F00,12,_,0,I,0,3,T2 ), V(000F00,13,_,0,I,0,3,T2 ), 174, 99 , 6090 , 292, 107), // #1042 - INST(Vmovmskpd , VexRm_Lx , V(660F00,50,_,x,I,_,_,_ ), 0 , 63 , 0 , 6098 , 293, 108), // #1043 - INST(Vmovmskps , VexRm_Lx , V(000F00,50,_,x,I,_,_,_ ), 0 , 66 , 0 , 6108 , 293, 108), // #1044 - INST(Vmovntdq , VexMr_Lx , V(660F00,E7,_,x,I,0,4,FVM), 0 , 175, 0 , 6118 , 294, 106), // #1045 - INST(Vmovntdqa , VexRm_Lx , V(660F38,2A,_,x,I,0,4,FVM), 0 , 98 , 0 , 6127 , 295, 116), // #1046 - INST(Vmovntpd , VexMr_Lx , V(660F00,2B,_,x,I,1,4,FVM), 0 , 163, 0 , 6137 , 294, 106), // #1047 - INST(Vmovntps , VexMr_Lx , V(000F00,2B,_,x,I,0,4,FVM), 0 , 164, 0 , 6146 , 294, 106), // #1048 - INST(Vmovq , VexMovdMovq , V(660F00,6E,_,0,I,1,3,T1S), V(660F00,7E,_,0,I,1,3,T1S), 113, 100, 6155 , 296, 107), // #1049 - INST(Vmovsd , VexMovssMovsd , V(F20F00,10,_,I,I,1,3,T1S), V(F20F00,11,_,I,I,1,3,T1S), 95 , 101, 6161 , 297, 107), // #1050 - INST(Vmovshdup , VexRm_Lx , V(F30F00,16,_,x,I,0,4,FVM), 0 , 176, 0 , 6168 , 298, 106), // #1051 - INST(Vmovsldup , VexRm_Lx , V(F30F00,12,_,x,I,0,4,FVM), 0 , 176, 0 , 6178 , 298, 106), // #1052 - INST(Vmovss , VexMovssMovsd , V(F30F00,10,_,I,I,0,2,T1S), V(F30F00,11,_,I,I,0,2,T1S), 96 , 102, 6188 , 299, 107), // #1053 - INST(Vmovupd , VexRmMr_Lx , V(660F00,10,_,x,I,1,4,FVM), V(660F00,11,_,x,I,1,4,FVM), 163, 103, 6195 , 286, 106), // #1054 - INST(Vmovups , VexRmMr_Lx , V(000F00,10,_,x,I,0,4,FVM), V(000F00,11,_,x,I,0,4,FVM), 164, 104, 6203 , 286, 106), // #1055 - INST(Vmpsadbw , VexRvmi_Lx , V(660F3A,42,_,x,I,_,_,_ ), 0 , 67 , 0 , 6211 , 192, 128), // #1056 - INST(Vmptrld , X86M_Only , O(000F00,C7,6,_,_,_,_,_ ), 0 , 73 , 0 , 6220 , 284, 53 ), // #1057 - INST(Vmptrst , X86M_Only , O(000F00,C7,7,_,_,_,_,_ ), 0 , 22 , 0 , 6228 , 284, 53 ), // #1058 - INST(Vmread , X86Mr_NoSize , O(000F00,78,_,_,_,_,_,_ ), 0 , 4 , 0 , 6236 , 300, 53 ), // #1059 - INST(Vmresume , X86Op , O(000F01,C3,_,_,_,_,_,_ ), 0 , 21 , 0 , 6243 , 30 , 53 ), // #1060 - INST(Vmrun , X86Op_xAX , O(000F01,D8,_,_,_,_,_,_ ), 0 , 21 , 0 , 6252 , 285, 22 ), // #1061 - INST(Vmsave , X86Op_xAX , O(000F01,DB,_,_,_,_,_,_ ), 0 , 21 , 0 , 6258 , 285, 22 ), // #1062 - INST(Vmulpd , VexRvm_Lx , V(660F00,59,_,x,I,1,4,FV ), 0 , 93 , 0 , 6265 , 175, 106), // #1063 - INST(Vmulps , VexRvm_Lx , V(000F00,59,_,x,I,0,4,FV ), 0 , 94 , 0 , 6272 , 176, 106), // #1064 - INST(Vmulsd , VexRvm_Lx , V(F20F00,59,_,I,I,1,3,T1S), 0 , 95 , 0 , 6279 , 177, 107), // #1065 - INST(Vmulss , VexRvm_Lx , V(F30F00,59,_,I,I,0,2,T1S), 0 , 96 , 0 , 6286 , 178, 107), // #1066 - INST(Vmwrite , X86Rm_NoSize , O(000F00,79,_,_,_,_,_,_ ), 0 , 4 , 0 , 6293 , 301, 53 ), // #1067 - INST(Vmxon , X86M_Only , O(F30F00,C7,6,_,_,_,_,_ ), 0 , 177, 0 , 6301 , 284, 53 ), // #1068 - INST(Vorpd , VexRvm_Lx , V(660F00,56,_,x,I,1,4,FV ), 0 , 93 , 0 , 6307 , 187, 112), // #1069 - INST(Vorps , VexRvm_Lx , V(000F00,56,_,x,I,0,4,FV ), 0 , 94 , 0 , 6313 , 188, 112), // #1070 - INST(Vp4dpwssd , VexRm_T1_4X , E(F20F38,52,_,2,_,0,2,T4X), 0 , 92 , 0 , 6319 , 173, 129), // #1071 - INST(Vp4dpwssds , VexRm_T1_4X , E(F20F38,53,_,2,_,0,2,T4X), 0 , 92 , 0 , 6329 , 173, 129), // #1072 - INST(Vpabsb , VexRm_Lx , V(660F38,1C,_,x,I,_,4,FVM), 0 , 98 , 0 , 6340 , 298, 130), // #1073 - INST(Vpabsd , VexRm_Lx , V(660F38,1E,_,x,I,0,4,FV ), 0 , 154, 0 , 6347 , 298, 116), // #1074 - INST(Vpabsq , VexRm_Lx , E(660F38,1F,_,x,_,1,4,FV ), 0 , 103, 0 , 6354 , 241, 111), // #1075 - INST(Vpabsw , VexRm_Lx , V(660F38,1D,_,x,I,_,4,FVM), 0 , 98 , 0 , 6361 , 298, 130), // #1076 - INST(Vpackssdw , VexRvm_Lx , V(660F00,6B,_,x,I,0,4,FV ), 0 , 124, 0 , 6368 , 186, 130), // #1077 - INST(Vpacksswb , VexRvm_Lx , V(660F00,63,_,x,I,I,4,FVM), 0 , 175, 0 , 6378 , 272, 130), // #1078 - INST(Vpackusdw , VexRvm_Lx , V(660F38,2B,_,x,I,0,4,FV ), 0 , 154, 0 , 6388 , 186, 130), // #1079 - INST(Vpackuswb , VexRvm_Lx , V(660F00,67,_,x,I,I,4,FVM), 0 , 175, 0 , 6398 , 272, 130), // #1080 - INST(Vpaddb , VexRvm_Lx , V(660F00,FC,_,x,I,I,4,FVM), 0 , 175, 0 , 6408 , 272, 130), // #1081 - INST(Vpaddd , VexRvm_Lx , V(660F00,FE,_,x,I,0,4,FV ), 0 , 124, 0 , 6415 , 186, 116), // #1082 - INST(Vpaddq , VexRvm_Lx , V(660F00,D4,_,x,I,1,4,FV ), 0 , 93 , 0 , 6422 , 185, 116), // #1083 - INST(Vpaddsb , VexRvm_Lx , V(660F00,EC,_,x,I,I,4,FVM), 0 , 175, 0 , 6429 , 272, 130), // #1084 - INST(Vpaddsw , VexRvm_Lx , V(660F00,ED,_,x,I,I,4,FVM), 0 , 175, 0 , 6437 , 272, 130), // #1085 - INST(Vpaddusb , VexRvm_Lx , V(660F00,DC,_,x,I,I,4,FVM), 0 , 175, 0 , 6445 , 272, 130), // #1086 - INST(Vpaddusw , VexRvm_Lx , V(660F00,DD,_,x,I,I,4,FVM), 0 , 175, 0 , 6454 , 272, 130), // #1087 - INST(Vpaddw , VexRvm_Lx , V(660F00,FD,_,x,I,I,4,FVM), 0 , 175, 0 , 6463 , 272, 130), // #1088 - INST(Vpalignr , VexRvmi_Lx , V(660F3A,0F,_,x,I,I,4,FVM), 0 , 178, 0 , 6470 , 271, 130), // #1089 - INST(Vpand , VexRvm_Lx , V(660F00,DB,_,x,I,_,_,_ ), 0 , 63 , 0 , 6479 , 302, 128), // #1090 - INST(Vpandd , VexRvm_Lx , E(660F00,DB,_,x,_,0,4,FV ), 0 , 179, 0 , 6485 , 303, 111), // #1091 - INST(Vpandn , VexRvm_Lx , V(660F00,DF,_,x,I,_,_,_ ), 0 , 63 , 0 , 6492 , 304, 128), // #1092 - INST(Vpandnd , VexRvm_Lx , E(660F00,DF,_,x,_,0,4,FV ), 0 , 179, 0 , 6499 , 305, 111), // #1093 - INST(Vpandnq , VexRvm_Lx , E(660F00,DF,_,x,_,1,4,FV ), 0 , 121, 0 , 6507 , 306, 111), // #1094 - INST(Vpandq , VexRvm_Lx , E(660F00,DB,_,x,_,1,4,FV ), 0 , 121, 0 , 6515 , 307, 111), // #1095 - INST(Vpavgb , VexRvm_Lx , V(660F00,E0,_,x,I,I,4,FVM), 0 , 175, 0 , 6522 , 272, 130), // #1096 - INST(Vpavgw , VexRvm_Lx , V(660F00,E3,_,x,I,I,4,FVM), 0 , 175, 0 , 6529 , 272, 130), // #1097 - INST(Vpblendd , VexRvmi_Lx , V(660F3A,02,_,x,0,_,_,_ ), 0 , 67 , 0 , 6536 , 192, 115), // #1098 - INST(Vpblendvb , VexRvmr , V(660F3A,4C,_,x,0,_,_,_ ), 0 , 67 , 0 , 6545 , 193, 128), // #1099 - INST(Vpblendw , VexRvmi_Lx , V(660F3A,0E,_,x,I,_,_,_ ), 0 , 67 , 0 , 6555 , 192, 128), // #1100 - INST(Vpbroadcastb , VexRm_Lx_Bcst , V(660F38,78,_,x,0,0,0,T1S), E(660F38,7A,_,x,0,0,0,T1S), 180, 105, 6564 , 308, 131), // #1101 - INST(Vpbroadcastd , VexRm_Lx_Bcst , V(660F38,58,_,x,0,0,2,T1S), E(660F38,7C,_,x,0,0,0,T1S), 112, 106, 6577 , 309, 125), // #1102 - INST(Vpbroadcastmb2d , VexRm_Lx , E(F30F38,3A,_,x,_,0,_,_ ), 0 , 119, 0 , 6590 , 310, 132), // #1103 - INST(Vpbroadcastmb2q , VexRm_Lx , E(F30F38,2A,_,x,_,1,_,_ ), 0 , 181, 0 , 6606 , 310, 132), // #1104 - INST(Vpbroadcastq , VexRm_Lx_Bcst , V(660F38,59,_,x,0,1,3,T1S), E(660F38,7C,_,x,0,1,0,T1S), 111, 107, 6622 , 311, 125), // #1105 - INST(Vpbroadcastw , VexRm_Lx_Bcst , V(660F38,79,_,x,0,0,1,T1S), E(660F38,7B,_,x,0,0,0,T1S), 182, 108, 6635 , 312, 131), // #1106 - INST(Vpclmulqdq , VexRvmi_Lx , V(660F3A,44,_,x,I,_,4,FVM), 0 , 178, 0 , 6648 , 313, 133), // #1107 - INST(Vpcmov , VexRvrmRvmr_Lx , V(XOP_M8,A2,_,x,x,_,_,_ ), 0 , 183, 0 , 6659 , 250, 124), // #1108 - INST(Vpcmpb , VexRvmi_Lx , E(660F3A,3F,_,x,_,0,4,FVM), 0 , 142, 0 , 6666 , 314, 113), // #1109 - INST(Vpcmpd , VexRvmi_Lx , E(660F3A,1F,_,x,_,0,4,FV ), 0 , 99 , 0 , 6673 , 315, 111), // #1110 - INST(Vpcmpeqb , VexRvm_Lx , V(660F00,74,_,x,I,I,4,FV ), 0 , 124, 0 , 6680 , 316, 130), // #1111 - INST(Vpcmpeqd , VexRvm_Lx , V(660F00,76,_,x,I,0,4,FVM), 0 , 175, 0 , 6689 , 317, 116), // #1112 - INST(Vpcmpeqq , VexRvm_Lx , V(660F38,29,_,x,I,1,4,FVM), 0 , 184, 0 , 6698 , 318, 116), // #1113 - INST(Vpcmpeqw , VexRvm_Lx , V(660F00,75,_,x,I,I,4,FV ), 0 , 124, 0 , 6707 , 316, 130), // #1114 - INST(Vpcmpestri , VexRmi , V(660F3A,61,_,0,I,_,_,_ ), 0 , 67 , 0 , 6716 , 319, 134), // #1115 - INST(Vpcmpestrm , VexRmi , V(660F3A,60,_,0,I,_,_,_ ), 0 , 67 , 0 , 6727 , 320, 134), // #1116 - INST(Vpcmpgtb , VexRvm_Lx , V(660F00,64,_,x,I,I,4,FV ), 0 , 124, 0 , 6738 , 316, 130), // #1117 - INST(Vpcmpgtd , VexRvm_Lx , V(660F00,66,_,x,I,0,4,FVM), 0 , 175, 0 , 6747 , 317, 116), // #1118 - INST(Vpcmpgtq , VexRvm_Lx , V(660F38,37,_,x,I,1,4,FVM), 0 , 184, 0 , 6756 , 318, 116), // #1119 - INST(Vpcmpgtw , VexRvm_Lx , V(660F00,65,_,x,I,I,4,FV ), 0 , 124, 0 , 6765 , 316, 130), // #1120 - INST(Vpcmpistri , VexRmi , V(660F3A,63,_,0,I,_,_,_ ), 0 , 67 , 0 , 6774 , 321, 134), // #1121 - INST(Vpcmpistrm , VexRmi , V(660F3A,62,_,0,I,_,_,_ ), 0 , 67 , 0 , 6785 , 322, 134), // #1122 - INST(Vpcmpq , VexRvmi_Lx , E(660F3A,1F,_,x,_,1,4,FV ), 0 , 100, 0 , 6796 , 323, 111), // #1123 - INST(Vpcmpub , VexRvmi_Lx , E(660F3A,3E,_,x,_,0,4,FVM), 0 , 142, 0 , 6803 , 314, 113), // #1124 - INST(Vpcmpud , VexRvmi_Lx , E(660F3A,1E,_,x,_,0,4,FV ), 0 , 99 , 0 , 6811 , 315, 111), // #1125 - INST(Vpcmpuq , VexRvmi_Lx , E(660F3A,1E,_,x,_,1,4,FV ), 0 , 100, 0 , 6819 , 323, 111), // #1126 - INST(Vpcmpuw , VexRvmi_Lx , E(660F3A,3E,_,x,_,1,4,FVM), 0 , 185, 0 , 6827 , 323, 113), // #1127 - INST(Vpcmpw , VexRvmi_Lx , E(660F3A,3F,_,x,_,1,4,FVM), 0 , 185, 0 , 6835 , 323, 113), // #1128 - INST(Vpcomb , VexRvmi , V(XOP_M8,CC,_,0,0,_,_,_ ), 0 , 183, 0 , 6842 , 238, 124), // #1129 - INST(Vpcomd , VexRvmi , V(XOP_M8,CE,_,0,0,_,_,_ ), 0 , 183, 0 , 6849 , 238, 124), // #1130 - INST(Vpcompressb , VexMr_Lx , E(660F38,63,_,x,_,0,0,T1S), 0 , 186, 0 , 6856 , 207, 135), // #1131 - INST(Vpcompressd , VexMr_Lx , E(660F38,8B,_,x,_,0,2,T1S), 0 , 116, 0 , 6868 , 207, 111), // #1132 - INST(Vpcompressq , VexMr_Lx , E(660F38,8B,_,x,_,1,3,T1S), 0 , 115, 0 , 6880 , 207, 111), // #1133 - INST(Vpcompressw , VexMr_Lx , E(660F38,63,_,x,_,1,1,T1S), 0 , 187, 0 , 6892 , 207, 135), // #1134 - INST(Vpcomq , VexRvmi , V(XOP_M8,CF,_,0,0,_,_,_ ), 0 , 183, 0 , 6904 , 238, 124), // #1135 - INST(Vpcomub , VexRvmi , V(XOP_M8,EC,_,0,0,_,_,_ ), 0 , 183, 0 , 6911 , 238, 124), // #1136 - INST(Vpcomud , VexRvmi , V(XOP_M8,EE,_,0,0,_,_,_ ), 0 , 183, 0 , 6919 , 238, 124), // #1137 - INST(Vpcomuq , VexRvmi , V(XOP_M8,EF,_,0,0,_,_,_ ), 0 , 183, 0 , 6927 , 238, 124), // #1138 - INST(Vpcomuw , VexRvmi , V(XOP_M8,ED,_,0,0,_,_,_ ), 0 , 183, 0 , 6935 , 238, 124), // #1139 - INST(Vpcomw , VexRvmi , V(XOP_M8,CD,_,0,0,_,_,_ ), 0 , 183, 0 , 6943 , 238, 124), // #1140 - INST(Vpconflictd , VexRm_Lx , E(660F38,C4,_,x,_,0,4,FV ), 0 , 102, 0 , 6950 , 324, 132), // #1141 - INST(Vpconflictq , VexRm_Lx , E(660F38,C4,_,x,_,1,4,FV ), 0 , 103, 0 , 6962 , 324, 132), // #1142 - INST(Vpdpbusd , VexRvm_Lx , E(660F38,50,_,x,_,0,4,FV ), 0 , 102, 0 , 6974 , 190, 136), // #1143 - INST(Vpdpbusds , VexRvm_Lx , E(660F38,51,_,x,_,0,4,FV ), 0 , 102, 0 , 6983 , 190, 136), // #1144 - INST(Vpdpwssd , VexRvm_Lx , E(660F38,52,_,x,_,0,4,FV ), 0 , 102, 0 , 6993 , 190, 136), // #1145 - INST(Vpdpwssds , VexRvm_Lx , E(660F38,53,_,x,_,0,4,FV ), 0 , 102, 0 , 7002 , 190, 136), // #1146 - INST(Vperm2f128 , VexRvmi , V(660F3A,06,_,1,0,_,_,_ ), 0 , 145, 0 , 7012 , 325, 108), // #1147 - INST(Vperm2i128 , VexRvmi , V(660F3A,46,_,1,0,_,_,_ ), 0 , 145, 0 , 7023 , 325, 115), // #1148 - INST(Vpermb , VexRvm_Lx , E(660F38,8D,_,x,_,0,4,FVM), 0 , 101, 0 , 7034 , 189, 137), // #1149 - INST(Vpermd , VexRvm_Lx , V(660F38,36,_,x,0,0,4,FV ), 0 , 154, 0 , 7041 , 326, 125), // #1150 - INST(Vpermi2b , VexRvm_Lx , E(660F38,75,_,x,_,0,4,FVM), 0 , 101, 0 , 7048 , 189, 137), // #1151 - INST(Vpermi2d , VexRvm_Lx , E(660F38,76,_,x,_,0,4,FV ), 0 , 102, 0 , 7057 , 190, 111), // #1152 - INST(Vpermi2pd , VexRvm_Lx , E(660F38,77,_,x,_,1,4,FV ), 0 , 103, 0 , 7066 , 191, 111), // #1153 - INST(Vpermi2ps , VexRvm_Lx , E(660F38,77,_,x,_,0,4,FV ), 0 , 102, 0 , 7076 , 190, 111), // #1154 - INST(Vpermi2q , VexRvm_Lx , E(660F38,76,_,x,_,1,4,FV ), 0 , 103, 0 , 7086 , 191, 111), // #1155 - INST(Vpermi2w , VexRvm_Lx , E(660F38,75,_,x,_,1,4,FVM), 0 , 104, 0 , 7095 , 189, 113), // #1156 - INST(Vpermil2pd , VexRvrmiRvmri_Lx , V(660F3A,49,_,x,x,_,_,_ ), 0 , 67 , 0 , 7104 , 327, 124), // #1157 - INST(Vpermil2ps , VexRvrmiRvmri_Lx , V(660F3A,48,_,x,x,_,_,_ ), 0 , 67 , 0 , 7115 , 327, 124), // #1158 - INST(Vpermilpd , VexRvmRmi_Lx , V(660F38,0D,_,x,0,1,4,FV ), V(660F3A,05,_,x,0,1,4,FV ), 188, 109, 7126 , 328, 106), // #1159 - INST(Vpermilps , VexRvmRmi_Lx , V(660F38,0C,_,x,0,0,4,FV ), V(660F3A,04,_,x,0,0,4,FV ), 154, 110, 7136 , 328, 106), // #1160 - INST(Vpermpd , VexRvmRmi_Lx , E(660F38,16,_,x,1,1,4,FV ), V(660F3A,01,_,x,1,1,4,FV ), 189, 111, 7146 , 329, 125), // #1161 - INST(Vpermps , VexRvm_Lx , V(660F38,16,_,x,0,0,4,FV ), 0 , 154, 0 , 7154 , 326, 125), // #1162 - INST(Vpermq , VexRvmRmi_Lx , V(660F38,36,_,x,_,1,4,FV ), V(660F3A,00,_,x,1,1,4,FV ), 188, 112, 7162 , 329, 125), // #1163 - INST(Vpermt2b , VexRvm_Lx , E(660F38,7D,_,x,_,0,4,FVM), 0 , 101, 0 , 7169 , 189, 137), // #1164 - INST(Vpermt2d , VexRvm_Lx , E(660F38,7E,_,x,_,0,4,FV ), 0 , 102, 0 , 7178 , 190, 111), // #1165 - INST(Vpermt2pd , VexRvm_Lx , E(660F38,7F,_,x,_,1,4,FV ), 0 , 103, 0 , 7187 , 191, 111), // #1166 - INST(Vpermt2ps , VexRvm_Lx , E(660F38,7F,_,x,_,0,4,FV ), 0 , 102, 0 , 7197 , 190, 111), // #1167 - INST(Vpermt2q , VexRvm_Lx , E(660F38,7E,_,x,_,1,4,FV ), 0 , 103, 0 , 7207 , 191, 111), // #1168 - INST(Vpermt2w , VexRvm_Lx , E(660F38,7D,_,x,_,1,4,FVM), 0 , 104, 0 , 7216 , 189, 113), // #1169 - INST(Vpermw , VexRvm_Lx , E(660F38,8D,_,x,_,1,4,FVM), 0 , 104, 0 , 7225 , 189, 113), // #1170 - INST(Vpexpandb , VexRm_Lx , E(660F38,62,_,x,_,0,0,T1S), 0 , 186, 0 , 7232 , 241, 135), // #1171 - INST(Vpexpandd , VexRm_Lx , E(660F38,89,_,x,_,0,2,T1S), 0 , 116, 0 , 7242 , 241, 111), // #1172 - INST(Vpexpandq , VexRm_Lx , E(660F38,89,_,x,_,1,3,T1S), 0 , 115, 0 , 7252 , 241, 111), // #1173 - INST(Vpexpandw , VexRm_Lx , E(660F38,62,_,x,_,1,1,T1S), 0 , 187, 0 , 7262 , 241, 135), // #1174 - INST(Vpextrb , VexMri , V(660F3A,14,_,0,0,I,0,T1S), 0 , 190, 0 , 7272 , 330, 138), // #1175 - INST(Vpextrd , VexMri , V(660F3A,16,_,0,0,0,2,T1S), 0 , 150, 0 , 7280 , 245, 139), // #1176 - INST(Vpextrq , VexMri , V(660F3A,16,_,0,1,1,3,T1S), 0 , 191, 0 , 7288 , 331, 139), // #1177 - INST(Vpextrw , VexMri , V(660F3A,15,_,0,0,I,1,T1S), 0 , 192, 0 , 7296 , 332, 138), // #1178 - INST(Vpgatherdd , VexRmvRm_VM , V(660F38,90,_,x,0,_,_,_ ), V(660F38,90,_,x,_,0,2,T1S), 88 , 113, 7304 , 261, 125), // #1179 - INST(Vpgatherdq , VexRmvRm_VM , V(660F38,90,_,x,1,_,_,_ ), V(660F38,90,_,x,_,1,3,T1S), 156, 114, 7315 , 260, 125), // #1180 - INST(Vpgatherqd , VexRmvRm_VM , V(660F38,91,_,x,0,_,_,_ ), V(660F38,91,_,x,_,0,2,T1S), 88 , 115, 7326 , 266, 125), // #1181 - INST(Vpgatherqq , VexRmvRm_VM , V(660F38,91,_,x,1,_,_,_ ), V(660F38,91,_,x,_,1,3,T1S), 156, 116, 7337 , 265, 125), // #1182 - INST(Vphaddbd , VexRm , V(XOP_M9,C2,_,0,0,_,_,_ ), 0 , 72 , 0 , 7348 , 181, 124), // #1183 - INST(Vphaddbq , VexRm , V(XOP_M9,C3,_,0,0,_,_,_ ), 0 , 72 , 0 , 7357 , 181, 124), // #1184 - INST(Vphaddbw , VexRm , V(XOP_M9,C1,_,0,0,_,_,_ ), 0 , 72 , 0 , 7366 , 181, 124), // #1185 - INST(Vphaddd , VexRvm_Lx , V(660F38,02,_,x,I,_,_,_ ), 0 , 88 , 0 , 7375 , 179, 128), // #1186 - INST(Vphadddq , VexRm , V(XOP_M9,CB,_,0,0,_,_,_ ), 0 , 72 , 0 , 7383 , 181, 124), // #1187 - INST(Vphaddsw , VexRvm_Lx , V(660F38,03,_,x,I,_,_,_ ), 0 , 88 , 0 , 7392 , 179, 128), // #1188 - INST(Vphaddubd , VexRm , V(XOP_M9,D2,_,0,0,_,_,_ ), 0 , 72 , 0 , 7401 , 181, 124), // #1189 - INST(Vphaddubq , VexRm , V(XOP_M9,D3,_,0,0,_,_,_ ), 0 , 72 , 0 , 7411 , 181, 124), // #1190 - INST(Vphaddubw , VexRm , V(XOP_M9,D1,_,0,0,_,_,_ ), 0 , 72 , 0 , 7421 , 181, 124), // #1191 - INST(Vphaddudq , VexRm , V(XOP_M9,DB,_,0,0,_,_,_ ), 0 , 72 , 0 , 7431 , 181, 124), // #1192 - INST(Vphadduwd , VexRm , V(XOP_M9,D6,_,0,0,_,_,_ ), 0 , 72 , 0 , 7441 , 181, 124), // #1193 - INST(Vphadduwq , VexRm , V(XOP_M9,D7,_,0,0,_,_,_ ), 0 , 72 , 0 , 7451 , 181, 124), // #1194 - INST(Vphaddw , VexRvm_Lx , V(660F38,01,_,x,I,_,_,_ ), 0 , 88 , 0 , 7461 , 179, 128), // #1195 - INST(Vphaddwd , VexRm , V(XOP_M9,C6,_,0,0,_,_,_ ), 0 , 72 , 0 , 7469 , 181, 124), // #1196 - INST(Vphaddwq , VexRm , V(XOP_M9,C7,_,0,0,_,_,_ ), 0 , 72 , 0 , 7478 , 181, 124), // #1197 - INST(Vphminposuw , VexRm , V(660F38,41,_,0,I,_,_,_ ), 0 , 88 , 0 , 7487 , 181, 108), // #1198 - INST(Vphsubbw , VexRm , V(XOP_M9,E1,_,0,0,_,_,_ ), 0 , 72 , 0 , 7499 , 181, 124), // #1199 - INST(Vphsubd , VexRvm_Lx , V(660F38,06,_,x,I,_,_,_ ), 0 , 88 , 0 , 7508 , 179, 128), // #1200 - INST(Vphsubdq , VexRm , V(XOP_M9,E3,_,0,0,_,_,_ ), 0 , 72 , 0 , 7516 , 181, 124), // #1201 - INST(Vphsubsw , VexRvm_Lx , V(660F38,07,_,x,I,_,_,_ ), 0 , 88 , 0 , 7525 , 179, 128), // #1202 - INST(Vphsubw , VexRvm_Lx , V(660F38,05,_,x,I,_,_,_ ), 0 , 88 , 0 , 7534 , 179, 128), // #1203 - INST(Vphsubwd , VexRm , V(XOP_M9,E2,_,0,0,_,_,_ ), 0 , 72 , 0 , 7542 , 181, 124), // #1204 - INST(Vpinsrb , VexRvmi , V(660F3A,20,_,0,0,I,0,T1S), 0 , 190, 0 , 7551 , 333, 138), // #1205 - INST(Vpinsrd , VexRvmi , V(660F3A,22,_,0,0,0,2,T1S), 0 , 150, 0 , 7559 , 334, 139), // #1206 - INST(Vpinsrq , VexRvmi , V(660F3A,22,_,0,1,1,3,T1S), 0 , 191, 0 , 7567 , 335, 139), // #1207 - INST(Vpinsrw , VexRvmi , V(660F00,C4,_,0,0,I,1,T1S), 0 , 193, 0 , 7575 , 336, 138), // #1208 - INST(Vplzcntd , VexRm_Lx , E(660F38,44,_,x,_,0,4,FV ), 0 , 102, 0 , 7583 , 324, 132), // #1209 - INST(Vplzcntq , VexRm_Lx , E(660F38,44,_,x,_,1,4,FV ), 0 , 103, 0 , 7592 , 337, 132), // #1210 - INST(Vpmacsdd , VexRvmr , V(XOP_M8,9E,_,0,0,_,_,_ ), 0 , 183, 0 , 7601 , 338, 124), // #1211 - INST(Vpmacsdqh , VexRvmr , V(XOP_M8,9F,_,0,0,_,_,_ ), 0 , 183, 0 , 7610 , 338, 124), // #1212 - INST(Vpmacsdql , VexRvmr , V(XOP_M8,97,_,0,0,_,_,_ ), 0 , 183, 0 , 7620 , 338, 124), // #1213 - INST(Vpmacssdd , VexRvmr , V(XOP_M8,8E,_,0,0,_,_,_ ), 0 , 183, 0 , 7630 , 338, 124), // #1214 - INST(Vpmacssdqh , VexRvmr , V(XOP_M8,8F,_,0,0,_,_,_ ), 0 , 183, 0 , 7640 , 338, 124), // #1215 - INST(Vpmacssdql , VexRvmr , V(XOP_M8,87,_,0,0,_,_,_ ), 0 , 183, 0 , 7651 , 338, 124), // #1216 - INST(Vpmacsswd , VexRvmr , V(XOP_M8,86,_,0,0,_,_,_ ), 0 , 183, 0 , 7662 , 338, 124), // #1217 - INST(Vpmacssww , VexRvmr , V(XOP_M8,85,_,0,0,_,_,_ ), 0 , 183, 0 , 7672 , 338, 124), // #1218 - INST(Vpmacswd , VexRvmr , V(XOP_M8,96,_,0,0,_,_,_ ), 0 , 183, 0 , 7682 , 338, 124), // #1219 - INST(Vpmacsww , VexRvmr , V(XOP_M8,95,_,0,0,_,_,_ ), 0 , 183, 0 , 7691 , 338, 124), // #1220 - INST(Vpmadcsswd , VexRvmr , V(XOP_M8,A6,_,0,0,_,_,_ ), 0 , 183, 0 , 7700 , 338, 124), // #1221 - INST(Vpmadcswd , VexRvmr , V(XOP_M8,B6,_,0,0,_,_,_ ), 0 , 183, 0 , 7711 , 338, 124), // #1222 - INST(Vpmadd52huq , VexRvm_Lx , E(660F38,B5,_,x,_,1,4,FV ), 0 , 103, 0 , 7721 , 191, 140), // #1223 - INST(Vpmadd52luq , VexRvm_Lx , E(660F38,B4,_,x,_,1,4,FV ), 0 , 103, 0 , 7733 , 191, 140), // #1224 - INST(Vpmaddubsw , VexRvm_Lx , V(660F38,04,_,x,I,I,4,FVM), 0 , 98 , 0 , 7745 , 272, 130), // #1225 - INST(Vpmaddwd , VexRvm_Lx , V(660F00,F5,_,x,I,I,4,FVM), 0 , 175, 0 , 7756 , 272, 130), // #1226 - INST(Vpmaskmovd , VexRvmMvr_Lx , V(660F38,8C,_,x,0,_,_,_ ), V(660F38,8E,_,x,0,_,_,_ ), 88 , 117, 7765 , 280, 115), // #1227 - INST(Vpmaskmovq , VexRvmMvr_Lx , V(660F38,8C,_,x,1,_,_,_ ), V(660F38,8E,_,x,1,_,_,_ ), 156, 118, 7776 , 280, 115), // #1228 - INST(Vpmaxsb , VexRvm_Lx , V(660F38,3C,_,x,I,I,4,FVM), 0 , 98 , 0 , 7787 , 339, 130), // #1229 - INST(Vpmaxsd , VexRvm_Lx , V(660F38,3D,_,x,I,0,4,FV ), 0 , 154, 0 , 7795 , 188, 116), // #1230 - INST(Vpmaxsq , VexRvm_Lx , E(660F38,3D,_,x,_,1,4,FV ), 0 , 103, 0 , 7803 , 191, 111), // #1231 - INST(Vpmaxsw , VexRvm_Lx , V(660F00,EE,_,x,I,I,4,FVM), 0 , 175, 0 , 7811 , 339, 130), // #1232 - INST(Vpmaxub , VexRvm_Lx , V(660F00,DE,_,x,I,I,4,FVM), 0 , 175, 0 , 7819 , 339, 130), // #1233 - INST(Vpmaxud , VexRvm_Lx , V(660F38,3F,_,x,I,0,4,FV ), 0 , 154, 0 , 7827 , 188, 116), // #1234 - INST(Vpmaxuq , VexRvm_Lx , E(660F38,3F,_,x,_,1,4,FV ), 0 , 103, 0 , 7835 , 191, 111), // #1235 - INST(Vpmaxuw , VexRvm_Lx , V(660F38,3E,_,x,I,I,4,FVM), 0 , 98 , 0 , 7843 , 339, 130), // #1236 - INST(Vpminsb , VexRvm_Lx , V(660F38,38,_,x,I,I,4,FVM), 0 , 98 , 0 , 7851 , 339, 130), // #1237 - INST(Vpminsd , VexRvm_Lx , V(660F38,39,_,x,I,0,4,FV ), 0 , 154, 0 , 7859 , 188, 116), // #1238 - INST(Vpminsq , VexRvm_Lx , E(660F38,39,_,x,_,1,4,FV ), 0 , 103, 0 , 7867 , 191, 111), // #1239 - INST(Vpminsw , VexRvm_Lx , V(660F00,EA,_,x,I,I,4,FVM), 0 , 175, 0 , 7875 , 339, 130), // #1240 - INST(Vpminub , VexRvm_Lx , V(660F00,DA,_,x,I,_,4,FVM), 0 , 175, 0 , 7883 , 339, 130), // #1241 - INST(Vpminud , VexRvm_Lx , V(660F38,3B,_,x,I,0,4,FV ), 0 , 154, 0 , 7891 , 188, 116), // #1242 - INST(Vpminuq , VexRvm_Lx , E(660F38,3B,_,x,_,1,4,FV ), 0 , 103, 0 , 7899 , 191, 111), // #1243 - INST(Vpminuw , VexRvm_Lx , V(660F38,3A,_,x,I,_,4,FVM), 0 , 98 , 0 , 7907 , 339, 130), // #1244 - INST(Vpmovb2m , VexRm_Lx , E(F30F38,29,_,x,_,0,_,_ ), 0 , 119, 0 , 7915 , 340, 113), // #1245 - INST(Vpmovd2m , VexRm_Lx , E(F30F38,39,_,x,_,0,_,_ ), 0 , 119, 0 , 7924 , 340, 114), // #1246 - INST(Vpmovdb , VexMr_Lx , E(F30F38,31,_,x,_,0,2,QVM), 0 , 194, 0 , 7933 , 341, 111), // #1247 - INST(Vpmovdw , VexMr_Lx , E(F30F38,33,_,x,_,0,3,HVM), 0 , 195, 0 , 7941 , 342, 111), // #1248 - INST(Vpmovm2b , VexRm_Lx , E(F30F38,28,_,x,_,0,_,_ ), 0 , 119, 0 , 7949 , 310, 113), // #1249 - INST(Vpmovm2d , VexRm_Lx , E(F30F38,38,_,x,_,0,_,_ ), 0 , 119, 0 , 7958 , 310, 114), // #1250 - INST(Vpmovm2q , VexRm_Lx , E(F30F38,38,_,x,_,1,_,_ ), 0 , 181, 0 , 7967 , 310, 114), // #1251 - INST(Vpmovm2w , VexRm_Lx , E(F30F38,28,_,x,_,1,_,_ ), 0 , 181, 0 , 7976 , 310, 113), // #1252 - INST(Vpmovmskb , VexRm_Lx , V(660F00,D7,_,x,I,_,_,_ ), 0 , 63 , 0 , 7985 , 293, 128), // #1253 - INST(Vpmovq2m , VexRm_Lx , E(F30F38,39,_,x,_,1,_,_ ), 0 , 181, 0 , 7995 , 340, 114), // #1254 - INST(Vpmovqb , VexMr_Lx , E(F30F38,32,_,x,_,0,1,OVM), 0 , 196, 0 , 8004 , 343, 111), // #1255 - INST(Vpmovqd , VexMr_Lx , E(F30F38,35,_,x,_,0,3,HVM), 0 , 195, 0 , 8012 , 342, 111), // #1256 - INST(Vpmovqw , VexMr_Lx , E(F30F38,34,_,x,_,0,2,QVM), 0 , 194, 0 , 8020 , 341, 111), // #1257 - INST(Vpmovsdb , VexMr_Lx , E(F30F38,21,_,x,_,0,2,QVM), 0 , 194, 0 , 8028 , 341, 111), // #1258 - INST(Vpmovsdw , VexMr_Lx , E(F30F38,23,_,x,_,0,3,HVM), 0 , 195, 0 , 8037 , 342, 111), // #1259 - INST(Vpmovsqb , VexMr_Lx , E(F30F38,22,_,x,_,0,1,OVM), 0 , 196, 0 , 8046 , 343, 111), // #1260 - INST(Vpmovsqd , VexMr_Lx , E(F30F38,25,_,x,_,0,3,HVM), 0 , 195, 0 , 8055 , 342, 111), // #1261 - INST(Vpmovsqw , VexMr_Lx , E(F30F38,24,_,x,_,0,2,QVM), 0 , 194, 0 , 8064 , 341, 111), // #1262 - INST(Vpmovswb , VexMr_Lx , E(F30F38,20,_,x,_,0,3,HVM), 0 , 195, 0 , 8073 , 342, 113), // #1263 - INST(Vpmovsxbd , VexRm_Lx , V(660F38,21,_,x,I,I,2,QVM), 0 , 197, 0 , 8082 , 344, 116), // #1264 - INST(Vpmovsxbq , VexRm_Lx , V(660F38,22,_,x,I,I,1,OVM), 0 , 198, 0 , 8092 , 345, 116), // #1265 - INST(Vpmovsxbw , VexRm_Lx , V(660F38,20,_,x,I,I,3,HVM), 0 , 123, 0 , 8102 , 346, 130), // #1266 - INST(Vpmovsxdq , VexRm_Lx , V(660F38,25,_,x,I,0,3,HVM), 0 , 123, 0 , 8112 , 346, 116), // #1267 - INST(Vpmovsxwd , VexRm_Lx , V(660F38,23,_,x,I,I,3,HVM), 0 , 123, 0 , 8122 , 346, 116), // #1268 - INST(Vpmovsxwq , VexRm_Lx , V(660F38,24,_,x,I,I,2,QVM), 0 , 197, 0 , 8132 , 344, 116), // #1269 - INST(Vpmovusdb , VexMr_Lx , E(F30F38,11,_,x,_,0,2,QVM), 0 , 194, 0 , 8142 , 341, 111), // #1270 - INST(Vpmovusdw , VexMr_Lx , E(F30F38,13,_,x,_,0,3,HVM), 0 , 195, 0 , 8152 , 342, 111), // #1271 - INST(Vpmovusqb , VexMr_Lx , E(F30F38,12,_,x,_,0,1,OVM), 0 , 196, 0 , 8162 , 343, 111), // #1272 - INST(Vpmovusqd , VexMr_Lx , E(F30F38,15,_,x,_,0,3,HVM), 0 , 195, 0 , 8172 , 342, 111), // #1273 - INST(Vpmovusqw , VexMr_Lx , E(F30F38,14,_,x,_,0,2,QVM), 0 , 194, 0 , 8182 , 341, 111), // #1274 - INST(Vpmovuswb , VexMr_Lx , E(F30F38,10,_,x,_,0,3,HVM), 0 , 195, 0 , 8192 , 342, 113), // #1275 - INST(Vpmovw2m , VexRm_Lx , E(F30F38,29,_,x,_,1,_,_ ), 0 , 181, 0 , 8202 , 340, 113), // #1276 - INST(Vpmovwb , VexMr_Lx , E(F30F38,30,_,x,_,0,3,HVM), 0 , 195, 0 , 8211 , 342, 113), // #1277 - INST(Vpmovzxbd , VexRm_Lx , V(660F38,31,_,x,I,I,2,QVM), 0 , 197, 0 , 8219 , 344, 116), // #1278 - INST(Vpmovzxbq , VexRm_Lx , V(660F38,32,_,x,I,I,1,OVM), 0 , 198, 0 , 8229 , 345, 116), // #1279 - INST(Vpmovzxbw , VexRm_Lx , V(660F38,30,_,x,I,I,3,HVM), 0 , 123, 0 , 8239 , 346, 130), // #1280 - INST(Vpmovzxdq , VexRm_Lx , V(660F38,35,_,x,I,0,3,HVM), 0 , 123, 0 , 8249 , 346, 116), // #1281 - INST(Vpmovzxwd , VexRm_Lx , V(660F38,33,_,x,I,I,3,HVM), 0 , 123, 0 , 8259 , 346, 116), // #1282 - INST(Vpmovzxwq , VexRm_Lx , V(660F38,34,_,x,I,I,2,QVM), 0 , 197, 0 , 8269 , 344, 116), // #1283 - INST(Vpmuldq , VexRvm_Lx , V(660F38,28,_,x,I,1,4,FV ), 0 , 188, 0 , 8279 , 185, 116), // #1284 - INST(Vpmulhrsw , VexRvm_Lx , V(660F38,0B,_,x,I,I,4,FVM), 0 , 98 , 0 , 8287 , 272, 130), // #1285 - INST(Vpmulhuw , VexRvm_Lx , V(660F00,E4,_,x,I,I,4,FVM), 0 , 175, 0 , 8297 , 272, 130), // #1286 - INST(Vpmulhw , VexRvm_Lx , V(660F00,E5,_,x,I,I,4,FVM), 0 , 175, 0 , 8306 , 272, 130), // #1287 - INST(Vpmulld , VexRvm_Lx , V(660F38,40,_,x,I,0,4,FV ), 0 , 154, 0 , 8314 , 186, 116), // #1288 - INST(Vpmullq , VexRvm_Lx , E(660F38,40,_,x,_,1,4,FV ), 0 , 103, 0 , 8322 , 191, 114), // #1289 - INST(Vpmullw , VexRvm_Lx , V(660F00,D5,_,x,I,I,4,FVM), 0 , 175, 0 , 8330 , 272, 130), // #1290 - INST(Vpmultishiftqb , VexRvm_Lx , E(660F38,83,_,x,_,1,4,FV ), 0 , 103, 0 , 8338 , 191, 137), // #1291 - INST(Vpmuludq , VexRvm_Lx , V(660F00,F4,_,x,I,1,4,FV ), 0 , 93 , 0 , 8353 , 185, 116), // #1292 - INST(Vpopcntb , VexRm_Lx , E(660F38,54,_,x,_,0,4,FV ), 0 , 102, 0 , 8362 , 241, 141), // #1293 - INST(Vpopcntd , VexRm_Lx , E(660F38,55,_,x,_,0,4,FVM), 0 , 101, 0 , 8371 , 324, 142), // #1294 - INST(Vpopcntq , VexRm_Lx , E(660F38,55,_,x,_,1,4,FVM), 0 , 104, 0 , 8380 , 337, 142), // #1295 - INST(Vpopcntw , VexRm_Lx , E(660F38,54,_,x,_,1,4,FV ), 0 , 103, 0 , 8389 , 241, 141), // #1296 - INST(Vpor , VexRvm_Lx , V(660F00,EB,_,x,I,_,_,_ ), 0 , 63 , 0 , 8398 , 302, 128), // #1297 - INST(Vpord , VexRvm_Lx , E(660F00,EB,_,x,_,0,4,FV ), 0 , 179, 0 , 8403 , 303, 111), // #1298 - INST(Vporq , VexRvm_Lx , E(660F00,EB,_,x,_,1,4,FV ), 0 , 121, 0 , 8409 , 307, 111), // #1299 - INST(Vpperm , VexRvrmRvmr , V(XOP_M8,A3,_,0,x,_,_,_ ), 0 , 183, 0 , 8415 , 347, 124), // #1300 - INST(Vprold , VexVmi_Lx , E(660F00,72,1,x,_,0,4,FV ), 0 , 199, 0 , 8422 , 348, 111), // #1301 - INST(Vprolq , VexVmi_Lx , E(660F00,72,1,x,_,1,4,FV ), 0 , 200, 0 , 8429 , 349, 111), // #1302 - INST(Vprolvd , VexRvm_Lx , E(660F38,15,_,x,_,0,4,FV ), 0 , 102, 0 , 8436 , 190, 111), // #1303 - INST(Vprolvq , VexRvm_Lx , E(660F38,15,_,x,_,1,4,FV ), 0 , 103, 0 , 8444 , 191, 111), // #1304 - INST(Vprord , VexVmi_Lx , E(660F00,72,0,x,_,0,4,FV ), 0 , 179, 0 , 8452 , 348, 111), // #1305 - INST(Vprorq , VexVmi_Lx , E(660F00,72,0,x,_,1,4,FV ), 0 , 121, 0 , 8459 , 349, 111), // #1306 - INST(Vprorvd , VexRvm_Lx , E(660F38,14,_,x,_,0,4,FV ), 0 , 102, 0 , 8466 , 190, 111), // #1307 - INST(Vprorvq , VexRvm_Lx , E(660F38,14,_,x,_,1,4,FV ), 0 , 103, 0 , 8474 , 191, 111), // #1308 - INST(Vprotb , VexRvmRmvRmi , V(XOP_M9,90,_,0,x,_,_,_ ), V(XOP_M8,C0,_,0,x,_,_,_ ), 72 , 119, 8482 , 350, 124), // #1309 - INST(Vprotd , VexRvmRmvRmi , V(XOP_M9,92,_,0,x,_,_,_ ), V(XOP_M8,C2,_,0,x,_,_,_ ), 72 , 120, 8489 , 350, 124), // #1310 - INST(Vprotq , VexRvmRmvRmi , V(XOP_M9,93,_,0,x,_,_,_ ), V(XOP_M8,C3,_,0,x,_,_,_ ), 72 , 121, 8496 , 350, 124), // #1311 - INST(Vprotw , VexRvmRmvRmi , V(XOP_M9,91,_,0,x,_,_,_ ), V(XOP_M8,C1,_,0,x,_,_,_ ), 72 , 122, 8503 , 350, 124), // #1312 - INST(Vpsadbw , VexRvm_Lx , V(660F00,F6,_,x,I,I,4,FVM), 0 , 175, 0 , 8510 , 180, 130), // #1313 - INST(Vpscatterdd , VexMr_VM , E(660F38,A0,_,x,_,0,2,T1S), 0 , 116, 0 , 8518 , 351, 111), // #1314 - INST(Vpscatterdq , VexMr_VM , E(660F38,A0,_,x,_,1,3,T1S), 0 , 115, 0 , 8530 , 351, 111), // #1315 - INST(Vpscatterqd , VexMr_VM , E(660F38,A1,_,x,_,0,2,T1S), 0 , 116, 0 , 8542 , 352, 111), // #1316 - INST(Vpscatterqq , VexMr_VM , E(660F38,A1,_,x,_,1,3,T1S), 0 , 115, 0 , 8554 , 353, 111), // #1317 - INST(Vpshab , VexRvmRmv , V(XOP_M9,98,_,0,x,_,_,_ ), 0 , 72 , 0 , 8566 , 354, 124), // #1318 - INST(Vpshad , VexRvmRmv , V(XOP_M9,9A,_,0,x,_,_,_ ), 0 , 72 , 0 , 8573 , 354, 124), // #1319 - INST(Vpshaq , VexRvmRmv , V(XOP_M9,9B,_,0,x,_,_,_ ), 0 , 72 , 0 , 8580 , 354, 124), // #1320 - INST(Vpshaw , VexRvmRmv , V(XOP_M9,99,_,0,x,_,_,_ ), 0 , 72 , 0 , 8587 , 354, 124), // #1321 - INST(Vpshlb , VexRvmRmv , V(XOP_M9,94,_,0,x,_,_,_ ), 0 , 72 , 0 , 8594 , 354, 124), // #1322 - INST(Vpshld , VexRvmRmv , V(XOP_M9,96,_,0,x,_,_,_ ), 0 , 72 , 0 , 8601 , 354, 124), // #1323 - INST(Vpshldd , VexRvmi_Lx , E(660F3A,71,_,x,_,0,4,FV ), 0 , 99 , 0 , 8608 , 183, 135), // #1324 - INST(Vpshldq , VexRvmi_Lx , E(660F3A,71,_,x,_,1,4,FV ), 0 , 100, 0 , 8616 , 184, 135), // #1325 - INST(Vpshldvd , VexRvm_Lx , E(660F38,71,_,x,_,0,4,FV ), 0 , 102, 0 , 8624 , 190, 135), // #1326 - INST(Vpshldvq , VexRvm_Lx , E(660F38,71,_,x,_,1,4,FV ), 0 , 103, 0 , 8633 , 191, 135), // #1327 - INST(Vpshldvw , VexRvm_Lx , E(660F38,70,_,x,_,0,4,FVM), 0 , 101, 0 , 8642 , 189, 135), // #1328 - INST(Vpshldw , VexRvmi_Lx , E(660F3A,70,_,x,_,0,4,FVM), 0 , 142, 0 , 8651 , 237, 135), // #1329 - INST(Vpshlq , VexRvmRmv , V(XOP_M9,97,_,0,x,_,_,_ ), 0 , 72 , 0 , 8659 , 354, 124), // #1330 - INST(Vpshlw , VexRvmRmv , V(XOP_M9,95,_,0,x,_,_,_ ), 0 , 72 , 0 , 8666 , 354, 124), // #1331 - INST(Vpshrdd , VexRvmi_Lx , E(660F3A,73,_,x,_,0,4,FV ), 0 , 99 , 0 , 8673 , 183, 135), // #1332 - INST(Vpshrdq , VexRvmi_Lx , E(660F3A,73,_,x,_,1,4,FV ), 0 , 100, 0 , 8681 , 184, 135), // #1333 - INST(Vpshrdvd , VexRvm_Lx , E(660F38,73,_,x,_,0,4,FV ), 0 , 102, 0 , 8689 , 190, 135), // #1334 - INST(Vpshrdvq , VexRvm_Lx , E(660F38,73,_,x,_,1,4,FV ), 0 , 103, 0 , 8698 , 191, 135), // #1335 - INST(Vpshrdvw , VexRvm_Lx , E(660F38,72,_,x,_,0,4,FVM), 0 , 101, 0 , 8707 , 189, 135), // #1336 - INST(Vpshrdw , VexRvmi_Lx , E(660F3A,72,_,x,_,0,4,FVM), 0 , 142, 0 , 8716 , 237, 135), // #1337 - INST(Vpshufb , VexRvm_Lx , V(660F38,00,_,x,I,I,4,FVM), 0 , 98 , 0 , 8724 , 272, 130), // #1338 - INST(Vpshufbitqmb , VexRvm_Lx , E(660F38,8F,_,x,0,0,4,FVM), 0 , 101, 0 , 8732 , 355, 141), // #1339 - INST(Vpshufd , VexRmi_Lx , V(660F00,70,_,x,I,0,4,FV ), 0 , 124, 0 , 8745 , 356, 116), // #1340 - INST(Vpshufhw , VexRmi_Lx , V(F30F00,70,_,x,I,I,4,FVM), 0 , 176, 0 , 8753 , 357, 130), // #1341 - INST(Vpshuflw , VexRmi_Lx , V(F20F00,70,_,x,I,I,4,FVM), 0 , 201, 0 , 8762 , 357, 130), // #1342 - INST(Vpsignb , VexRvm_Lx , V(660F38,08,_,x,I,_,_,_ ), 0 , 88 , 0 , 8771 , 179, 128), // #1343 - INST(Vpsignd , VexRvm_Lx , V(660F38,0A,_,x,I,_,_,_ ), 0 , 88 , 0 , 8779 , 179, 128), // #1344 - INST(Vpsignw , VexRvm_Lx , V(660F38,09,_,x,I,_,_,_ ), 0 , 88 , 0 , 8787 , 179, 128), // #1345 - INST(Vpslld , VexRvmVmi_Lx , V(660F00,F2,_,x,I,0,4,128), V(660F00,72,6,x,I,0,4,FV ), 202, 123, 8795 , 358, 116), // #1346 - INST(Vpslldq , VexEvexVmi_Lx , V(660F00,73,7,x,I,I,4,FVM), 0 , 203, 0 , 8802 , 359, 130), // #1347 - INST(Vpsllq , VexRvmVmi_Lx , V(660F00,F3,_,x,I,1,4,128), V(660F00,73,6,x,I,1,4,FV ), 204, 124, 8810 , 360, 116), // #1348 - INST(Vpsllvd , VexRvm_Lx , V(660F38,47,_,x,0,0,4,FV ), 0 , 154, 0 , 8817 , 186, 125), // #1349 - INST(Vpsllvq , VexRvm_Lx , V(660F38,47,_,x,1,1,4,FV ), 0 , 153, 0 , 8825 , 185, 125), // #1350 - INST(Vpsllvw , VexRvm_Lx , E(660F38,12,_,x,_,1,4,FVM), 0 , 104, 0 , 8833 , 189, 113), // #1351 - INST(Vpsllw , VexRvmVmi_Lx , V(660F00,F1,_,x,I,I,4,FVM), V(660F00,71,6,x,I,I,4,FVM), 175, 125, 8841 , 361, 130), // #1352 - INST(Vpsrad , VexRvmVmi_Lx , V(660F00,E2,_,x,I,0,4,128), V(660F00,72,4,x,I,0,4,FV ), 202, 126, 8848 , 358, 116), // #1353 - INST(Vpsraq , VexRvmVmi_Lx , E(660F00,E2,_,x,_,1,4,128), E(660F00,72,4,x,_,1,4,FV ), 205, 127, 8855 , 362, 111), // #1354 - INST(Vpsravd , VexRvm_Lx , V(660F38,46,_,x,0,0,4,FV ), 0 , 154, 0 , 8862 , 186, 125), // #1355 - INST(Vpsravq , VexRvm_Lx , E(660F38,46,_,x,_,1,4,FV ), 0 , 103, 0 , 8870 , 191, 111), // #1356 - INST(Vpsravw , VexRvm_Lx , E(660F38,11,_,x,_,1,4,FVM), 0 , 104, 0 , 8878 , 189, 113), // #1357 - INST(Vpsraw , VexRvmVmi_Lx , V(660F00,E1,_,x,I,I,4,128), V(660F00,71,4,x,I,I,4,FVM), 202, 128, 8886 , 361, 130), // #1358 - INST(Vpsrld , VexRvmVmi_Lx , V(660F00,D2,_,x,I,0,4,128), V(660F00,72,2,x,I,0,4,FV ), 202, 129, 8893 , 358, 116), // #1359 - INST(Vpsrldq , VexEvexVmi_Lx , V(660F00,73,3,x,I,I,4,FVM), 0 , 206, 0 , 8900 , 359, 130), // #1360 - INST(Vpsrlq , VexRvmVmi_Lx , V(660F00,D3,_,x,I,1,4,128), V(660F00,73,2,x,I,1,4,FV ), 204, 130, 8908 , 360, 116), // #1361 - INST(Vpsrlvd , VexRvm_Lx , V(660F38,45,_,x,0,0,4,FV ), 0 , 154, 0 , 8915 , 186, 125), // #1362 - INST(Vpsrlvq , VexRvm_Lx , V(660F38,45,_,x,1,1,4,FV ), 0 , 153, 0 , 8923 , 185, 125), // #1363 - INST(Vpsrlvw , VexRvm_Lx , E(660F38,10,_,x,_,1,4,FVM), 0 , 104, 0 , 8931 , 189, 113), // #1364 - INST(Vpsrlw , VexRvmVmi_Lx , V(660F00,D1,_,x,I,I,4,128), V(660F00,71,2,x,I,I,4,FVM), 202, 131, 8939 , 361, 130), // #1365 - INST(Vpsubb , VexRvm_Lx , V(660F00,F8,_,x,I,I,4,FVM), 0 , 175, 0 , 8946 , 363, 130), // #1366 - INST(Vpsubd , VexRvm_Lx , V(660F00,FA,_,x,I,0,4,FV ), 0 , 124, 0 , 8953 , 364, 116), // #1367 - INST(Vpsubq , VexRvm_Lx , V(660F00,FB,_,x,I,1,4,FV ), 0 , 93 , 0 , 8960 , 365, 116), // #1368 - INST(Vpsubsb , VexRvm_Lx , V(660F00,E8,_,x,I,I,4,FVM), 0 , 175, 0 , 8967 , 363, 130), // #1369 - INST(Vpsubsw , VexRvm_Lx , V(660F00,E9,_,x,I,I,4,FVM), 0 , 175, 0 , 8975 , 363, 130), // #1370 - INST(Vpsubusb , VexRvm_Lx , V(660F00,D8,_,x,I,I,4,FVM), 0 , 175, 0 , 8983 , 363, 130), // #1371 - INST(Vpsubusw , VexRvm_Lx , V(660F00,D9,_,x,I,I,4,FVM), 0 , 175, 0 , 8992 , 363, 130), // #1372 - INST(Vpsubw , VexRvm_Lx , V(660F00,F9,_,x,I,I,4,FVM), 0 , 175, 0 , 9001 , 363, 130), // #1373 - INST(Vpternlogd , VexRvmi_Lx , E(660F3A,25,_,x,_,0,4,FV ), 0 , 99 , 0 , 9008 , 183, 111), // #1374 - INST(Vpternlogq , VexRvmi_Lx , E(660F3A,25,_,x,_,1,4,FV ), 0 , 100, 0 , 9019 , 184, 111), // #1375 - INST(Vptest , VexRm_Lx , V(660F38,17,_,x,I,_,_,_ ), 0 , 88 , 0 , 9030 , 257, 134), // #1376 - INST(Vptestmb , VexRvm_Lx , E(660F38,26,_,x,_,0,4,FVM), 0 , 101, 0 , 9037 , 355, 113), // #1377 - INST(Vptestmd , VexRvm_Lx , E(660F38,27,_,x,_,0,4,FV ), 0 , 102, 0 , 9046 , 366, 111), // #1378 - INST(Vptestmq , VexRvm_Lx , E(660F38,27,_,x,_,1,4,FV ), 0 , 103, 0 , 9055 , 367, 111), // #1379 - INST(Vptestmw , VexRvm_Lx , E(660F38,26,_,x,_,1,4,FVM), 0 , 104, 0 , 9064 , 355, 113), // #1380 - INST(Vptestnmb , VexRvm_Lx , E(F30F38,26,_,x,_,0,4,FVM), 0 , 207, 0 , 9073 , 355, 113), // #1381 - INST(Vptestnmd , VexRvm_Lx , E(F30F38,27,_,x,_,0,4,FV ), 0 , 208, 0 , 9083 , 366, 111), // #1382 - INST(Vptestnmq , VexRvm_Lx , E(F30F38,27,_,x,_,1,4,FV ), 0 , 209, 0 , 9093 , 367, 111), // #1383 - INST(Vptestnmw , VexRvm_Lx , E(F30F38,26,_,x,_,1,4,FVM), 0 , 210, 0 , 9103 , 355, 113), // #1384 - INST(Vpunpckhbw , VexRvm_Lx , V(660F00,68,_,x,I,I,4,FVM), 0 , 175, 0 , 9113 , 272, 130), // #1385 - INST(Vpunpckhdq , VexRvm_Lx , V(660F00,6A,_,x,I,0,4,FV ), 0 , 124, 0 , 9124 , 186, 116), // #1386 - INST(Vpunpckhqdq , VexRvm_Lx , V(660F00,6D,_,x,I,1,4,FV ), 0 , 93 , 0 , 9135 , 185, 116), // #1387 - INST(Vpunpckhwd , VexRvm_Lx , V(660F00,69,_,x,I,I,4,FVM), 0 , 175, 0 , 9147 , 272, 130), // #1388 - INST(Vpunpcklbw , VexRvm_Lx , V(660F00,60,_,x,I,I,4,FVM), 0 , 175, 0 , 9158 , 272, 130), // #1389 - INST(Vpunpckldq , VexRvm_Lx , V(660F00,62,_,x,I,0,4,FV ), 0 , 124, 0 , 9169 , 186, 116), // #1390 - INST(Vpunpcklqdq , VexRvm_Lx , V(660F00,6C,_,x,I,1,4,FV ), 0 , 93 , 0 , 9180 , 185, 116), // #1391 - INST(Vpunpcklwd , VexRvm_Lx , V(660F00,61,_,x,I,I,4,FVM), 0 , 175, 0 , 9192 , 272, 130), // #1392 - INST(Vpxor , VexRvm_Lx , V(660F00,EF,_,x,I,_,_,_ ), 0 , 63 , 0 , 9203 , 304, 128), // #1393 - INST(Vpxord , VexRvm_Lx , E(660F00,EF,_,x,_,0,4,FV ), 0 , 179, 0 , 9209 , 305, 111), // #1394 - INST(Vpxorq , VexRvm_Lx , E(660F00,EF,_,x,_,1,4,FV ), 0 , 121, 0 , 9216 , 306, 111), // #1395 - INST(Vrangepd , VexRvmi_Lx , E(660F3A,50,_,x,_,1,4,FV ), 0 , 100, 0 , 9223 , 246, 114), // #1396 - INST(Vrangeps , VexRvmi_Lx , E(660F3A,50,_,x,_,0,4,FV ), 0 , 99 , 0 , 9232 , 247, 114), // #1397 - INST(Vrangesd , VexRvmi , E(660F3A,51,_,I,_,1,3,T1S), 0 , 151, 0 , 9241 , 248, 61 ), // #1398 - INST(Vrangess , VexRvmi , E(660F3A,51,_,I,_,0,2,T1S), 0 , 152, 0 , 9250 , 249, 61 ), // #1399 - INST(Vrcp14pd , VexRm_Lx , E(660F38,4C,_,x,_,1,4,FV ), 0 , 103, 0 , 9259 , 337, 111), // #1400 - INST(Vrcp14ps , VexRm_Lx , E(660F38,4C,_,x,_,0,4,FV ), 0 , 102, 0 , 9268 , 324, 111), // #1401 - INST(Vrcp14sd , VexRvm , E(660F38,4D,_,I,_,1,3,T1S), 0 , 115, 0 , 9277 , 368, 63 ), // #1402 - INST(Vrcp14ss , VexRvm , E(660F38,4D,_,I,_,0,2,T1S), 0 , 116, 0 , 9286 , 369, 63 ), // #1403 - INST(Vrcp28pd , VexRm , E(660F38,CA,_,2,_,1,4,FV ), 0 , 143, 0 , 9295 , 239, 120), // #1404 - INST(Vrcp28ps , VexRm , E(660F38,CA,_,2,_,0,4,FV ), 0 , 144, 0 , 9304 , 240, 120), // #1405 - INST(Vrcp28sd , VexRvm , E(660F38,CB,_,I,_,1,3,T1S), 0 , 115, 0 , 9313 , 267, 120), // #1406 - INST(Vrcp28ss , VexRvm , E(660F38,CB,_,I,_,0,2,T1S), 0 , 116, 0 , 9322 , 268, 120), // #1407 - INST(Vrcpps , VexRm_Lx , V(000F00,53,_,x,I,_,_,_ ), 0 , 66 , 0 , 9331 , 257, 108), // #1408 - INST(Vrcpss , VexRvm , V(F30F00,53,_,I,I,_,_,_ ), 0 , 169, 0 , 9338 , 370, 108), // #1409 - INST(Vreducepd , VexRmi_Lx , E(660F3A,56,_,x,_,1,4,FV ), 0 , 100, 0 , 9345 , 349, 114), // #1410 - INST(Vreduceps , VexRmi_Lx , E(660F3A,56,_,x,_,0,4,FV ), 0 , 99 , 0 , 9355 , 348, 114), // #1411 - INST(Vreducesd , VexRvmi , E(660F3A,57,_,I,_,1,3,T1S), 0 , 151, 0 , 9365 , 371, 61 ), // #1412 - INST(Vreducess , VexRvmi , E(660F3A,57,_,I,_,0,2,T1S), 0 , 152, 0 , 9375 , 372, 61 ), // #1413 - INST(Vrndscalepd , VexRmi_Lx , E(660F3A,09,_,x,_,1,4,FV ), 0 , 100, 0 , 9385 , 269, 111), // #1414 - INST(Vrndscaleps , VexRmi_Lx , E(660F3A,08,_,x,_,0,4,FV ), 0 , 99 , 0 , 9397 , 270, 111), // #1415 - INST(Vrndscalesd , VexRvmi , E(660F3A,0B,_,I,_,1,3,T1S), 0 , 151, 0 , 9409 , 248, 63 ), // #1416 - INST(Vrndscaless , VexRvmi , E(660F3A,0A,_,I,_,0,2,T1S), 0 , 152, 0 , 9421 , 249, 63 ), // #1417 - INST(Vroundpd , VexRmi_Lx , V(660F3A,09,_,x,I,_,_,_ ), 0 , 67 , 0 , 9433 , 373, 108), // #1418 - INST(Vroundps , VexRmi_Lx , V(660F3A,08,_,x,I,_,_,_ ), 0 , 67 , 0 , 9442 , 373, 108), // #1419 - INST(Vroundsd , VexRvmi , V(660F3A,0B,_,I,I,_,_,_ ), 0 , 67 , 0 , 9451 , 374, 108), // #1420 - INST(Vroundss , VexRvmi , V(660F3A,0A,_,I,I,_,_,_ ), 0 , 67 , 0 , 9460 , 375, 108), // #1421 - INST(Vrsqrt14pd , VexRm_Lx , E(660F38,4E,_,x,_,1,4,FV ), 0 , 103, 0 , 9469 , 337, 111), // #1422 - INST(Vrsqrt14ps , VexRm_Lx , E(660F38,4E,_,x,_,0,4,FV ), 0 , 102, 0 , 9480 , 324, 111), // #1423 - INST(Vrsqrt14sd , VexRvm , E(660F38,4F,_,I,_,1,3,T1S), 0 , 115, 0 , 9491 , 368, 63 ), // #1424 - INST(Vrsqrt14ss , VexRvm , E(660F38,4F,_,I,_,0,2,T1S), 0 , 116, 0 , 9502 , 369, 63 ), // #1425 - INST(Vrsqrt28pd , VexRm , E(660F38,CC,_,2,_,1,4,FV ), 0 , 143, 0 , 9513 , 239, 120), // #1426 - INST(Vrsqrt28ps , VexRm , E(660F38,CC,_,2,_,0,4,FV ), 0 , 144, 0 , 9524 , 240, 120), // #1427 - INST(Vrsqrt28sd , VexRvm , E(660F38,CD,_,I,_,1,3,T1S), 0 , 115, 0 , 9535 , 267, 120), // #1428 - INST(Vrsqrt28ss , VexRvm , E(660F38,CD,_,I,_,0,2,T1S), 0 , 116, 0 , 9546 , 268, 120), // #1429 - INST(Vrsqrtps , VexRm_Lx , V(000F00,52,_,x,I,_,_,_ ), 0 , 66 , 0 , 9557 , 257, 108), // #1430 - INST(Vrsqrtss , VexRvm , V(F30F00,52,_,I,I,_,_,_ ), 0 , 169, 0 , 9566 , 370, 108), // #1431 - INST(Vscalefpd , VexRvm_Lx , E(660F38,2C,_,x,_,1,4,FV ), 0 , 103, 0 , 9575 , 376, 111), // #1432 - INST(Vscalefps , VexRvm_Lx , E(660F38,2C,_,x,_,0,4,FV ), 0 , 102, 0 , 9585 , 377, 111), // #1433 - INST(Vscalefsd , VexRvm , E(660F38,2D,_,I,_,1,3,T1S), 0 , 115, 0 , 9595 , 378, 63 ), // #1434 - INST(Vscalefss , VexRvm , E(660F38,2D,_,I,_,0,2,T1S), 0 , 116, 0 , 9605 , 379, 63 ), // #1435 - INST(Vscatterdpd , VexMr_Lx , E(660F38,A2,_,x,_,1,3,T1S), 0 , 115, 0 , 9615 , 380, 111), // #1436 - INST(Vscatterdps , VexMr_Lx , E(660F38,A2,_,x,_,0,2,T1S), 0 , 116, 0 , 9627 , 351, 111), // #1437 - INST(Vscatterpf0dpd , VexM_VM , E(660F38,C6,5,2,_,1,3,T1S), 0 , 211, 0 , 9639 , 262, 126), // #1438 - INST(Vscatterpf0dps , VexM_VM , E(660F38,C6,5,2,_,0,2,T1S), 0 , 212, 0 , 9654 , 263, 126), // #1439 - INST(Vscatterpf0qpd , VexM_VM , E(660F38,C7,5,2,_,1,3,T1S), 0 , 211, 0 , 9669 , 264, 126), // #1440 - INST(Vscatterpf0qps , VexM_VM , E(660F38,C7,5,2,_,0,2,T1S), 0 , 212, 0 , 9684 , 264, 126), // #1441 - INST(Vscatterpf1dpd , VexM_VM , E(660F38,C6,6,2,_,1,3,T1S), 0 , 213, 0 , 9699 , 262, 126), // #1442 - INST(Vscatterpf1dps , VexM_VM , E(660F38,C6,6,2,_,0,2,T1S), 0 , 214, 0 , 9714 , 263, 126), // #1443 - INST(Vscatterpf1qpd , VexM_VM , E(660F38,C7,6,2,_,1,3,T1S), 0 , 213, 0 , 9729 , 264, 126), // #1444 - INST(Vscatterpf1qps , VexM_VM , E(660F38,C7,6,2,_,0,2,T1S), 0 , 214, 0 , 9744 , 264, 126), // #1445 - INST(Vscatterqpd , VexMr_Lx , E(660F38,A3,_,x,_,1,3,T1S), 0 , 115, 0 , 9759 , 353, 111), // #1446 - INST(Vscatterqps , VexMr_Lx , E(660F38,A3,_,x,_,0,2,T1S), 0 , 116, 0 , 9771 , 352, 111), // #1447 - INST(Vshuff32x4 , VexRvmi_Lx , E(660F3A,23,_,x,_,0,4,FV ), 0 , 99 , 0 , 9783 , 381, 111), // #1448 - INST(Vshuff64x2 , VexRvmi_Lx , E(660F3A,23,_,x,_,1,4,FV ), 0 , 100, 0 , 9794 , 382, 111), // #1449 - INST(Vshufi32x4 , VexRvmi_Lx , E(660F3A,43,_,x,_,0,4,FV ), 0 , 99 , 0 , 9805 , 381, 111), // #1450 - INST(Vshufi64x2 , VexRvmi_Lx , E(660F3A,43,_,x,_,1,4,FV ), 0 , 100, 0 , 9816 , 382, 111), // #1451 - INST(Vshufpd , VexRvmi_Lx , V(660F00,C6,_,x,I,1,4,FV ), 0 , 93 , 0 , 9827 , 383, 106), // #1452 - INST(Vshufps , VexRvmi_Lx , V(000F00,C6,_,x,I,0,4,FV ), 0 , 94 , 0 , 9835 , 384, 106), // #1453 - INST(Vsqrtpd , VexRm_Lx , V(660F00,51,_,x,I,1,4,FV ), 0 , 93 , 0 , 9843 , 385, 106), // #1454 - INST(Vsqrtps , VexRm_Lx , V(000F00,51,_,x,I,0,4,FV ), 0 , 94 , 0 , 9851 , 209, 106), // #1455 - INST(Vsqrtsd , VexRvm , V(F20F00,51,_,I,I,1,3,T1S), 0 , 95 , 0 , 9859 , 177, 107), // #1456 - INST(Vsqrtss , VexRvm , V(F30F00,51,_,I,I,0,2,T1S), 0 , 96 , 0 , 9867 , 178, 107), // #1457 - INST(Vstmxcsr , VexM , V(000F00,AE,3,0,I,_,_,_ ), 0 , 215, 0 , 9875 , 278, 108), // #1458 - INST(Vsubpd , VexRvm_Lx , V(660F00,5C,_,x,I,1,4,FV ), 0 , 93 , 0 , 9884 , 175, 106), // #1459 - INST(Vsubps , VexRvm_Lx , V(000F00,5C,_,x,I,0,4,FV ), 0 , 94 , 0 , 9891 , 176, 106), // #1460 - INST(Vsubsd , VexRvm , V(F20F00,5C,_,I,I,1,3,T1S), 0 , 95 , 0 , 9898 , 177, 107), // #1461 - INST(Vsubss , VexRvm , V(F30F00,5C,_,I,I,0,2,T1S), 0 , 96 , 0 , 9905 , 178, 107), // #1462 - INST(Vtestpd , VexRm_Lx , V(660F38,0F,_,x,0,_,_,_ ), 0 , 88 , 0 , 9912 , 257, 134), // #1463 - INST(Vtestps , VexRm_Lx , V(660F38,0E,_,x,0,_,_,_ ), 0 , 88 , 0 , 9920 , 257, 134), // #1464 - INST(Vucomisd , VexRm , V(660F00,2E,_,I,I,1,3,T1S), 0 , 113, 0 , 9928 , 205, 117), // #1465 - INST(Vucomiss , VexRm , V(000F00,2E,_,I,I,0,2,T1S), 0 , 114, 0 , 9937 , 206, 117), // #1466 - INST(Vunpckhpd , VexRvm_Lx , V(660F00,15,_,x,I,1,4,FV ), 0 , 93 , 0 , 9946 , 185, 106), // #1467 - INST(Vunpckhps , VexRvm_Lx , V(000F00,15,_,x,I,0,4,FV ), 0 , 94 , 0 , 9956 , 186, 106), // #1468 - INST(Vunpcklpd , VexRvm_Lx , V(660F00,14,_,x,I,1,4,FV ), 0 , 93 , 0 , 9966 , 185, 106), // #1469 - INST(Vunpcklps , VexRvm_Lx , V(000F00,14,_,x,I,0,4,FV ), 0 , 94 , 0 , 9976 , 186, 106), // #1470 - INST(Vxorpd , VexRvm_Lx , V(660F00,57,_,x,I,1,4,FV ), 0 , 93 , 0 , 9986 , 365, 112), // #1471 - INST(Vxorps , VexRvm_Lx , V(000F00,57,_,x,I,0,4,FV ), 0 , 94 , 0 , 9993 , 364, 112), // #1472 - INST(Vzeroall , VexOp , V(000F00,77,_,1,I,_,_,_ ), 0 , 62 , 0 , 10000, 386, 108), // #1473 - INST(Vzeroupper , VexOp , V(000F00,77,_,0,I,_,_,_ ), 0 , 66 , 0 , 10009, 386, 108), // #1474 - INST(Wbinvd , X86Op , O(000F00,09,_,_,_,_,_,_ ), 0 , 4 , 0 , 10020, 30 , 0 ), // #1475 - INST(Wbnoinvd , X86Op , O(F30F00,09,_,_,_,_,_,_ ), 0 , 6 , 0 , 10027, 30 , 143), // #1476 - INST(Wrfsbase , X86M , O(F30F00,AE,2,_,x,_,_,_ ), 0 , 216, 0 , 10036, 161, 94 ), // #1477 - INST(Wrgsbase , X86M , O(F30F00,AE,3,_,x,_,_,_ ), 0 , 217, 0 , 10045, 161, 94 ), // #1478 - INST(Wrmsr , X86Op , O(000F00,30,_,_,_,_,_,_ ), 0 , 4 , 0 , 10054, 162, 95 ), // #1479 - INST(Xabort , X86Op_O_I8 , O(000000,C6,7,_,_,_,_,_ ), 0 , 25 , 0 , 10060, 74 , 144), // #1480 - INST(Xadd , X86Xadd , O(000F00,C0,_,_,x,_,_,_ ), 0 , 4 , 0 , 10067, 387, 36 ), // #1481 - INST(Xbegin , X86JmpRel , O(000000,C7,7,_,_,_,_,_ ), 0 , 25 , 0 , 10072, 388, 144), // #1482 - INST(Xchg , X86Xchg , O(000000,86,_,_,x,_,_,_ ), 0 , 0 , 0 , 448 , 389, 0 ), // #1483 - INST(Xend , X86Op , O(000F01,D5,_,_,_,_,_,_ ), 0 , 21 , 0 , 10079, 30 , 144), // #1484 - INST(Xgetbv , X86Op , O(000F01,D0,_,_,_,_,_,_ ), 0 , 21 , 0 , 10084, 162, 145), // #1485 - INST(Xlatb , X86Op , O(000000,D7,_,_,_,_,_,_ ), 0 , 0 , 0 , 10091, 30 , 0 ), // #1486 - INST(Xor , X86Arith , O(000000,30,6,_,x,_,_,_ ), 0 , 30 , 0 , 9205 , 166, 1 ), // #1487 - INST(Xorpd , ExtRm , O(660F00,57,_,_,_,_,_,_ ), 0 , 3 , 0 , 9987 , 140, 4 ), // #1488 - INST(Xorps , ExtRm , O(000F00,57,_,_,_,_,_,_ ), 0 , 4 , 0 , 9994 , 140, 5 ), // #1489 - INST(Xrstor , X86M_Only , O(000F00,AE,5,_,_,_,_,_ ), 0 , 70 , 0 , 1134 , 390, 145), // #1490 - INST(Xrstor64 , X86M_Only , O(000F00,AE,5,_,1,_,_,_ ), 0 , 218, 0 , 1142 , 391, 145), // #1491 - INST(Xrstors , X86M_Only , O(000F00,C7,3,_,_,_,_,_ ), 0 , 71 , 0 , 10097, 390, 146), // #1492 - INST(Xrstors64 , X86M_Only , O(000F00,C7,3,_,1,_,_,_ ), 0 , 219, 0 , 10105, 391, 146), // #1493 - INST(Xsave , X86M_Only , O(000F00,AE,4,_,_,_,_,_ ), 0 , 89 , 0 , 1152 , 390, 145), // #1494 - INST(Xsave64 , X86M_Only , O(000F00,AE,4,_,1,_,_,_ ), 0 , 220, 0 , 1159 , 391, 145), // #1495 - INST(Xsavec , X86M_Only , O(000F00,C7,4,_,_,_,_,_ ), 0 , 89 , 0 , 10115, 390, 147), // #1496 - INST(Xsavec64 , X86M_Only , O(000F00,C7,4,_,1,_,_,_ ), 0 , 220, 0 , 10122, 391, 147), // #1497 - INST(Xsaveopt , X86M_Only , O(000F00,AE,6,_,_,_,_,_ ), 0 , 73 , 0 , 10131, 390, 148), // #1498 - INST(Xsaveopt64 , X86M_Only , O(000F00,AE,6,_,1,_,_,_ ), 0 , 221, 0 , 10140, 391, 148), // #1499 - INST(Xsaves , X86M_Only , O(000F00,C7,5,_,_,_,_,_ ), 0 , 70 , 0 , 10151, 390, 146), // #1500 - INST(Xsaves64 , X86M_Only , O(000F00,C7,5,_,1,_,_,_ ), 0 , 218, 0 , 10158, 391, 146), // #1501 - INST(Xsetbv , X86Op , O(000F01,D1,_,_,_,_,_,_ ), 0 , 21 , 0 , 10167, 162, 145), // #1502 - INST(Xtest , X86Op , O(000F01,D6,_,_,_,_,_,_ ), 0 , 21 , 0 , 10174, 30 , 149) // #1503 + INST(Clrssbsy , X86M , O(F30F00,AE,6,_,_,_,_,_ ), 0 , 24 , 0 , 252 , 32 , 24 ), // #71 + INST(Clts , X86Op , O(000F00,06,_,_,_,_,_,_ ), 0 , 4 , 0 , 261 , 30 , 0 ), // #72 + INST(Clwb , X86M_Only , O(660F00,AE,6,_,_,_,_,_ ), 0 , 25 , 0 , 266 , 31 , 25 ), // #73 + INST(Clzero , X86Op_MemZAX , O(000F01,FC,_,_,_,_,_,_ ), 0 , 21 , 0 , 271 , 33 , 26 ), // #74 + INST(Cmc , X86Op , O(000000,F5,_,_,_,_,_,_ ), 0 , 0 , 0 , 278 , 30 , 27 ), // #75 + INST(Cmova , X86Rm , O(000F00,47,_,_,x,_,_,_ ), 0 , 4 , 0 , 282 , 22 , 28 ), // #76 + INST(Cmovae , X86Rm , O(000F00,43,_,_,x,_,_,_ ), 0 , 4 , 0 , 288 , 22 , 29 ), // #77 + INST(Cmovb , X86Rm , O(000F00,42,_,_,x,_,_,_ ), 0 , 4 , 0 , 643 , 22 , 29 ), // #78 + INST(Cmovbe , X86Rm , O(000F00,46,_,_,x,_,_,_ ), 0 , 4 , 0 , 650 , 22 , 28 ), // #79 + INST(Cmovc , X86Rm , O(000F00,42,_,_,x,_,_,_ ), 0 , 4 , 0 , 295 , 22 , 29 ), // #80 + INST(Cmove , X86Rm , O(000F00,44,_,_,x,_,_,_ ), 0 , 4 , 0 , 658 , 22 , 30 ), // #81 + INST(Cmovg , X86Rm , O(000F00,4F,_,_,x,_,_,_ ), 0 , 4 , 0 , 301 , 22 , 31 ), // #82 + INST(Cmovge , X86Rm , O(000F00,4D,_,_,x,_,_,_ ), 0 , 4 , 0 , 307 , 22 , 32 ), // #83 + INST(Cmovl , X86Rm , O(000F00,4C,_,_,x,_,_,_ ), 0 , 4 , 0 , 314 , 22 , 32 ), // #84 + INST(Cmovle , X86Rm , O(000F00,4E,_,_,x,_,_,_ ), 0 , 4 , 0 , 320 , 22 , 31 ), // #85 + INST(Cmovna , X86Rm , O(000F00,46,_,_,x,_,_,_ ), 0 , 4 , 0 , 327 , 22 , 28 ), // #86 + INST(Cmovnae , X86Rm , O(000F00,42,_,_,x,_,_,_ ), 0 , 4 , 0 , 334 , 22 , 29 ), // #87 + INST(Cmovnb , X86Rm , O(000F00,43,_,_,x,_,_,_ ), 0 , 4 , 0 , 665 , 22 , 29 ), // #88 + INST(Cmovnbe , X86Rm , O(000F00,47,_,_,x,_,_,_ ), 0 , 4 , 0 , 673 , 22 , 28 ), // #89 + INST(Cmovnc , X86Rm , O(000F00,43,_,_,x,_,_,_ ), 0 , 4 , 0 , 342 , 22 , 29 ), // #90 + INST(Cmovne , X86Rm , O(000F00,45,_,_,x,_,_,_ ), 0 , 4 , 0 , 682 , 22 , 30 ), // #91 + INST(Cmovng , X86Rm , O(000F00,4E,_,_,x,_,_,_ ), 0 , 4 , 0 , 349 , 22 , 31 ), // #92 + INST(Cmovnge , X86Rm , O(000F00,4C,_,_,x,_,_,_ ), 0 , 4 , 0 , 356 , 22 , 32 ), // #93 + INST(Cmovnl , X86Rm , O(000F00,4D,_,_,x,_,_,_ ), 0 , 4 , 0 , 364 , 22 , 32 ), // #94 + INST(Cmovnle , X86Rm , O(000F00,4F,_,_,x,_,_,_ ), 0 , 4 , 0 , 371 , 22 , 31 ), // #95 + INST(Cmovno , X86Rm , O(000F00,41,_,_,x,_,_,_ ), 0 , 4 , 0 , 379 , 22 , 33 ), // #96 + INST(Cmovnp , X86Rm , O(000F00,4B,_,_,x,_,_,_ ), 0 , 4 , 0 , 386 , 22 , 34 ), // #97 + INST(Cmovns , X86Rm , O(000F00,49,_,_,x,_,_,_ ), 0 , 4 , 0 , 393 , 22 , 35 ), // #98 + INST(Cmovnz , X86Rm , O(000F00,45,_,_,x,_,_,_ ), 0 , 4 , 0 , 400 , 22 , 30 ), // #99 + INST(Cmovo , X86Rm , O(000F00,40,_,_,x,_,_,_ ), 0 , 4 , 0 , 407 , 22 , 33 ), // #100 + INST(Cmovp , X86Rm , O(000F00,4A,_,_,x,_,_,_ ), 0 , 4 , 0 , 413 , 22 , 34 ), // #101 + INST(Cmovpe , X86Rm , O(000F00,4A,_,_,x,_,_,_ ), 0 , 4 , 0 , 419 , 22 , 34 ), // #102 + INST(Cmovpo , X86Rm , O(000F00,4B,_,_,x,_,_,_ ), 0 , 4 , 0 , 426 , 22 , 34 ), // #103 + INST(Cmovs , X86Rm , O(000F00,48,_,_,x,_,_,_ ), 0 , 4 , 0 , 433 , 22 , 35 ), // #104 + INST(Cmovz , X86Rm , O(000F00,44,_,_,x,_,_,_ ), 0 , 4 , 0 , 439 , 22 , 30 ), // #105 + INST(Cmp , X86Arith , O(000000,38,7,_,x,_,_,_ ), 0 , 26 , 0 , 445 , 34 , 1 ), // #106 + INST(Cmppd , ExtRmi , O(660F00,C2,_,_,_,_,_,_ ), 0 , 3 , 0 , 3719 , 8 , 4 ), // #107 + INST(Cmpps , ExtRmi , O(000F00,C2,_,_,_,_,_,_ ), 0 , 4 , 0 , 3726 , 8 , 5 ), // #108 + INST(Cmps , X86StrMm , O(000000,A6,_,_,_,_,_,_ ), 0 , 0 , 0 , 449 , 35 , 36 ), // #109 + INST(Cmpsd , ExtRmi , O(F20F00,C2,_,_,_,_,_,_ ), 0 , 5 , 0 , 3733 , 36 , 4 ), // #110 + INST(Cmpss , ExtRmi , O(F30F00,C2,_,_,_,_,_,_ ), 0 , 6 , 0 , 3740 , 37 , 5 ), // #111 + INST(Cmpxchg , X86Cmpxchg , O(000F00,B0,_,_,x,_,_,_ ), 0 , 4 , 0 , 454 , 38 , 37 ), // #112 + INST(Cmpxchg16b , X86Cmpxchg8b_16b , O(000F00,C7,1,_,1,_,_,_ ), 0 , 27 , 0 , 462 , 39 , 38 ), // #113 + INST(Cmpxchg8b , X86Cmpxchg8b_16b , O(000F00,C7,1,_,_,_,_,_ ), 0 , 28 , 0 , 473 , 40 , 39 ), // #114 + INST(Comisd , ExtRm , O(660F00,2F,_,_,_,_,_,_ ), 0 , 3 , 0 , 10246, 6 , 40 ), // #115 + INST(Comiss , ExtRm , O(000F00,2F,_,_,_,_,_,_ ), 0 , 4 , 0 , 10255, 7 , 41 ), // #116 + INST(Cpuid , X86Op , O(000F00,A2,_,_,_,_,_,_ ), 0 , 4 , 0 , 483 , 41 , 42 ), // #117 + INST(Cqo , X86Op_xDX_xAX , O(000000,99,_,_,1,_,_,_ ), 0 , 20 , 0 , 489 , 42 , 0 ), // #118 + INST(Crc32 , X86Crc , O(F20F38,F0,_,_,x,_,_,_ ), 0 , 29 , 0 , 493 , 43 , 43 ), // #119 + INST(Cvtdq2pd , ExtRm , O(F30F00,E6,_,_,_,_,_,_ ), 0 , 6 , 0 , 3787 , 6 , 4 ), // #120 + INST(Cvtdq2ps , ExtRm , O(000F00,5B,_,_,_,_,_,_ ), 0 , 4 , 0 , 3797 , 5 , 4 ), // #121 + INST(Cvtpd2dq , ExtRm , O(F20F00,E6,_,_,_,_,_,_ ), 0 , 5 , 0 , 3836 , 5 , 4 ), // #122 + INST(Cvtpd2pi , ExtRm , O(660F00,2D,_,_,_,_,_,_ ), 0 , 3 , 0 , 499 , 44 , 4 ), // #123 + INST(Cvtpd2ps , ExtRm , O(660F00,5A,_,_,_,_,_,_ ), 0 , 3 , 0 , 3846 , 5 , 4 ), // #124 + INST(Cvtpi2pd , ExtRm , O(660F00,2A,_,_,_,_,_,_ ), 0 , 3 , 0 , 508 , 45 , 4 ), // #125 + INST(Cvtpi2ps , ExtRm , O(000F00,2A,_,_,_,_,_,_ ), 0 , 4 , 0 , 517 , 45 , 5 ), // #126 + INST(Cvtps2dq , ExtRm , O(660F00,5B,_,_,_,_,_,_ ), 0 , 3 , 0 , 3898 , 5 , 4 ), // #127 + INST(Cvtps2pd , ExtRm , O(000F00,5A,_,_,_,_,_,_ ), 0 , 4 , 0 , 3908 , 6 , 4 ), // #128 + INST(Cvtps2pi , ExtRm , O(000F00,2D,_,_,_,_,_,_ ), 0 , 4 , 0 , 526 , 46 , 5 ), // #129 + INST(Cvtsd2si , ExtRm_Wx , O(F20F00,2D,_,_,x,_,_,_ ), 0 , 5 , 0 , 3980 , 47 , 4 ), // #130 + INST(Cvtsd2ss , ExtRm , O(F20F00,5A,_,_,_,_,_,_ ), 0 , 5 , 0 , 3990 , 6 , 4 ), // #131 + INST(Cvtsi2sd , ExtRm_Wx , O(F20F00,2A,_,_,x,_,_,_ ), 0 , 5 , 0 , 4011 , 48 , 4 ), // #132 + INST(Cvtsi2ss , ExtRm_Wx , O(F30F00,2A,_,_,x,_,_,_ ), 0 , 6 , 0 , 4021 , 48 , 5 ), // #133 + INST(Cvtss2sd , ExtRm , O(F30F00,5A,_,_,_,_,_,_ ), 0 , 6 , 0 , 4031 , 7 , 4 ), // #134 + INST(Cvtss2si , ExtRm_Wx , O(F30F00,2D,_,_,x,_,_,_ ), 0 , 6 , 0 , 4041 , 49 , 5 ), // #135 + INST(Cvttpd2dq , ExtRm , O(660F00,E6,_,_,_,_,_,_ ), 0 , 3 , 0 , 4062 , 5 , 4 ), // #136 + INST(Cvttpd2pi , ExtRm , O(660F00,2C,_,_,_,_,_,_ ), 0 , 3 , 0 , 535 , 44 , 4 ), // #137 + INST(Cvttps2dq , ExtRm , O(F30F00,5B,_,_,_,_,_,_ ), 0 , 6 , 0 , 4108 , 5 , 4 ), // #138 + INST(Cvttps2pi , ExtRm , O(000F00,2C,_,_,_,_,_,_ ), 0 , 4 , 0 , 545 , 46 , 5 ), // #139 + INST(Cvttsd2si , ExtRm_Wx , O(F20F00,2C,_,_,x,_,_,_ ), 0 , 5 , 0 , 4154 , 47 , 4 ), // #140 + INST(Cvttss2si , ExtRm_Wx , O(F30F00,2C,_,_,x,_,_,_ ), 0 , 6 , 0 , 4177 , 49 , 5 ), // #141 + INST(Cwd , X86Op_xDX_xAX , O(660000,99,_,_,_,_,_,_ ), 0 , 19 , 0 , 555 , 50 , 0 ), // #142 + INST(Cwde , X86Op_xAX , O(000000,98,_,_,_,_,_,_ ), 0 , 0 , 0 , 559 , 51 , 0 ), // #143 + INST(Daa , X86Op , O(000000,27,_,_,_,_,_,_ ), 0 , 0 , 0 , 564 , 1 , 1 ), // #144 + INST(Das , X86Op , O(000000,2F,_,_,_,_,_,_ ), 0 , 0 , 0 , 568 , 1 , 1 ), // #145 + INST(Dec , X86IncDec , O(000000,FE,1,_,x,_,_,_ ), O(000000,48,_,_,x,_,_,_ ), 30 , 6 , 3301 , 52 , 44 ), // #146 + INST(Div , X86M_GPB_MulDiv , O(000000,F6,6,_,x,_,_,_ ), 0 , 31 , 0 , 805 , 53 , 1 ), // #147 + INST(Divpd , ExtRm , O(660F00,5E,_,_,_,_,_,_ ), 0 , 3 , 0 , 4276 , 5 , 4 ), // #148 + INST(Divps , ExtRm , O(000F00,5E,_,_,_,_,_,_ ), 0 , 4 , 0 , 4283 , 5 , 5 ), // #149 + INST(Divsd , ExtRm , O(F20F00,5E,_,_,_,_,_,_ ), 0 , 5 , 0 , 4290 , 6 , 4 ), // #150 + INST(Divss , ExtRm , O(F30F00,5E,_,_,_,_,_,_ ), 0 , 6 , 0 , 4297 , 7 , 5 ), // #151 + INST(Dppd , ExtRmi , O(660F3A,41,_,_,_,_,_,_ ), 0 , 8 , 0 , 4314 , 8 , 12 ), // #152 + INST(Dpps , ExtRmi , O(660F3A,40,_,_,_,_,_,_ ), 0 , 8 , 0 , 4320 , 8 , 12 ), // #153 + INST(Emms , X86Op , O(000F00,77,_,_,_,_,_,_ ), 0 , 4 , 0 , 773 , 54 , 45 ), // #154 + INST(Endbr32 , X86Op_Mod11RM , O(F30F00,FB,7,_,_,_,_,3 ), 0 , 32 , 0 , 572 , 30 , 46 ), // #155 + INST(Endbr64 , X86Op_Mod11RM , O(F30F00,FA,7,_,_,_,_,2 ), 0 , 33 , 0 , 580 , 30 , 46 ), // #156 + INST(Enqcmd , X86EnqcmdMovdir64b , O(F20F38,F8,_,_,_,_,_,_ ), 0 , 29 , 0 , 588 , 55 , 47 ), // #157 + INST(Enqcmds , X86EnqcmdMovdir64b , O(F30F38,F8,_,_,_,_,_,_ ), 0 , 7 , 0 , 595 , 55 , 47 ), // #158 + INST(Enter , X86Enter , O(000000,C8,_,_,_,_,_,_ ), 0 , 0 , 0 , 3017 , 56 , 0 ), // #159 + INST(Extractps , ExtExtract , O(660F3A,17,_,_,_,_,_,_ ), 0 , 8 , 0 , 4510 , 57 , 12 ), // #160 + INST(Extrq , ExtExtrq , O(660F00,79,_,_,_,_,_,_ ), O(660F00,78,0,_,_,_,_,_ ), 3 , 7 , 7606 , 58 , 48 ), // #161 + INST(F2xm1 , FpuOp , O_FPU(00,D9F0,_) , 0 , 34 , 0 , 603 , 30 , 0 ), // #162 + INST(Fabs , FpuOp , O_FPU(00,D9E1,_) , 0 , 34 , 0 , 609 , 30 , 0 ), // #163 + INST(Fadd , FpuArith , O_FPU(00,C0C0,0) , 0 , 35 , 0 , 2106 , 59 , 0 ), // #164 + INST(Faddp , FpuRDef , O_FPU(00,DEC0,_) , 0 , 36 , 0 , 614 , 60 , 0 ), // #165 + INST(Fbld , X86M_Only , O_FPU(00,00DF,4) , 0 , 37 , 0 , 620 , 61 , 0 ), // #166 + INST(Fbstp , X86M_Only , O_FPU(00,00DF,6) , 0 , 38 , 0 , 625 , 61 , 0 ), // #167 + INST(Fchs , FpuOp , O_FPU(00,D9E0,_) , 0 , 34 , 0 , 631 , 30 , 0 ), // #168 + INST(Fclex , FpuOp , O_FPU(9B,DBE2,_) , 0 , 39 , 0 , 636 , 30 , 0 ), // #169 + INST(Fcmovb , FpuR , O_FPU(00,DAC0,_) , 0 , 40 , 0 , 642 , 62 , 29 ), // #170 + INST(Fcmovbe , FpuR , O_FPU(00,DAD0,_) , 0 , 40 , 0 , 649 , 62 , 28 ), // #171 + INST(Fcmove , FpuR , O_FPU(00,DAC8,_) , 0 , 40 , 0 , 657 , 62 , 30 ), // #172 + INST(Fcmovnb , FpuR , O_FPU(00,DBC0,_) , 0 , 41 , 0 , 664 , 62 , 29 ), // #173 + INST(Fcmovnbe , FpuR , O_FPU(00,DBD0,_) , 0 , 41 , 0 , 672 , 62 , 28 ), // #174 + INST(Fcmovne , FpuR , O_FPU(00,DBC8,_) , 0 , 41 , 0 , 681 , 62 , 30 ), // #175 + INST(Fcmovnu , FpuR , O_FPU(00,DBD8,_) , 0 , 41 , 0 , 689 , 62 , 34 ), // #176 + INST(Fcmovu , FpuR , O_FPU(00,DAD8,_) , 0 , 40 , 0 , 697 , 62 , 34 ), // #177 + INST(Fcom , FpuCom , O_FPU(00,D0D0,2) , 0 , 42 , 0 , 704 , 63 , 0 ), // #178 + INST(Fcomi , FpuR , O_FPU(00,DBF0,_) , 0 , 41 , 0 , 709 , 62 , 49 ), // #179 + INST(Fcomip , FpuR , O_FPU(00,DFF0,_) , 0 , 43 , 0 , 715 , 62 , 49 ), // #180 + INST(Fcomp , FpuCom , O_FPU(00,D8D8,3) , 0 , 44 , 0 , 722 , 63 , 0 ), // #181 + INST(Fcompp , FpuOp , O_FPU(00,DED9,_) , 0 , 36 , 0 , 728 , 30 , 0 ), // #182 + INST(Fcos , FpuOp , O_FPU(00,D9FF,_) , 0 , 34 , 0 , 735 , 30 , 0 ), // #183 + INST(Fdecstp , FpuOp , O_FPU(00,D9F6,_) , 0 , 34 , 0 , 740 , 30 , 0 ), // #184 + INST(Fdiv , FpuArith , O_FPU(00,F0F8,6) , 0 , 45 , 0 , 748 , 59 , 0 ), // #185 + INST(Fdivp , FpuRDef , O_FPU(00,DEF8,_) , 0 , 36 , 0 , 753 , 60 , 0 ), // #186 + INST(Fdivr , FpuArith , O_FPU(00,F8F0,7) , 0 , 46 , 0 , 759 , 59 , 0 ), // #187 + INST(Fdivrp , FpuRDef , O_FPU(00,DEF0,_) , 0 , 36 , 0 , 765 , 60 , 0 ), // #188 + INST(Femms , X86Op , O(000F00,0E,_,_,_,_,_,_ ), 0 , 4 , 0 , 772 , 30 , 50 ), // #189 + INST(Ffree , FpuR , O_FPU(00,DDC0,_) , 0 , 47 , 0 , 778 , 62 , 0 ), // #190 + INST(Fiadd , FpuM , O_FPU(00,00DA,0) , 0 , 48 , 0 , 784 , 64 , 0 ), // #191 + INST(Ficom , FpuM , O_FPU(00,00DA,2) , 0 , 49 , 0 , 790 , 64 , 0 ), // #192 + INST(Ficomp , FpuM , O_FPU(00,00DA,3) , 0 , 50 , 0 , 796 , 64 , 0 ), // #193 + INST(Fidiv , FpuM , O_FPU(00,00DA,6) , 0 , 38 , 0 , 803 , 64 , 0 ), // #194 + INST(Fidivr , FpuM , O_FPU(00,00DA,7) , 0 , 51 , 0 , 809 , 64 , 0 ), // #195 + INST(Fild , FpuM , O_FPU(00,00DB,0) , O_FPU(00,00DF,5) , 48 , 8 , 816 , 65 , 0 ), // #196 + INST(Fimul , FpuM , O_FPU(00,00DA,1) , 0 , 52 , 0 , 821 , 64 , 0 ), // #197 + INST(Fincstp , FpuOp , O_FPU(00,D9F7,_) , 0 , 34 , 0 , 827 , 30 , 0 ), // #198 + INST(Finit , FpuOp , O_FPU(9B,DBE3,_) , 0 , 39 , 0 , 835 , 30 , 0 ), // #199 + INST(Fist , FpuM , O_FPU(00,00DB,2) , 0 , 49 , 0 , 841 , 64 , 0 ), // #200 + INST(Fistp , FpuM , O_FPU(00,00DB,3) , O_FPU(00,00DF,7) , 50 , 9 , 846 , 65 , 0 ), // #201 + INST(Fisttp , FpuM , O_FPU(00,00DB,1) , O_FPU(00,00DD,1) , 52 , 10 , 852 , 65 , 6 ), // #202 + INST(Fisub , FpuM , O_FPU(00,00DA,4) , 0 , 37 , 0 , 859 , 64 , 0 ), // #203 + INST(Fisubr , FpuM , O_FPU(00,00DA,5) , 0 , 53 , 0 , 865 , 64 , 0 ), // #204 + INST(Fld , FpuFldFst , O_FPU(00,00D9,0) , O_FPU(00,00DB,5) , 48 , 11 , 872 , 66 , 0 ), // #205 + INST(Fld1 , FpuOp , O_FPU(00,D9E8,_) , 0 , 34 , 0 , 876 , 30 , 0 ), // #206 + INST(Fldcw , X86M_Only , O_FPU(00,00D9,5) , 0 , 53 , 0 , 881 , 67 , 0 ), // #207 + INST(Fldenv , X86M_Only , O_FPU(00,00D9,4) , 0 , 37 , 0 , 887 , 31 , 0 ), // #208 + INST(Fldl2e , FpuOp , O_FPU(00,D9EA,_) , 0 , 34 , 0 , 894 , 30 , 0 ), // #209 + INST(Fldl2t , FpuOp , O_FPU(00,D9E9,_) , 0 , 34 , 0 , 901 , 30 , 0 ), // #210 + INST(Fldlg2 , FpuOp , O_FPU(00,D9EC,_) , 0 , 34 , 0 , 908 , 30 , 0 ), // #211 + INST(Fldln2 , FpuOp , O_FPU(00,D9ED,_) , 0 , 34 , 0 , 915 , 30 , 0 ), // #212 + INST(Fldpi , FpuOp , O_FPU(00,D9EB,_) , 0 , 34 , 0 , 922 , 30 , 0 ), // #213 + INST(Fldz , FpuOp , O_FPU(00,D9EE,_) , 0 , 34 , 0 , 928 , 30 , 0 ), // #214 + INST(Fmul , FpuArith , O_FPU(00,C8C8,1) , 0 , 54 , 0 , 2148 , 59 , 0 ), // #215 + INST(Fmulp , FpuRDef , O_FPU(00,DEC8,_) , 0 , 36 , 0 , 933 , 60 , 0 ), // #216 + INST(Fnclex , FpuOp , O_FPU(00,DBE2,_) , 0 , 41 , 0 , 939 , 30 , 0 ), // #217 + INST(Fninit , FpuOp , O_FPU(00,DBE3,_) , 0 , 41 , 0 , 946 , 30 , 0 ), // #218 + INST(Fnop , FpuOp , O_FPU(00,D9D0,_) , 0 , 34 , 0 , 953 , 30 , 0 ), // #219 + INST(Fnsave , X86M_Only , O_FPU(00,00DD,6) , 0 , 38 , 0 , 958 , 31 , 0 ), // #220 + INST(Fnstcw , X86M_Only , O_FPU(00,00D9,7) , 0 , 51 , 0 , 965 , 67 , 0 ), // #221 + INST(Fnstenv , X86M_Only , O_FPU(00,00D9,6) , 0 , 38 , 0 , 972 , 31 , 0 ), // #222 + INST(Fnstsw , FpuStsw , O_FPU(00,00DD,7) , O_FPU(00,DFE0,_) , 51 , 12 , 980 , 68 , 0 ), // #223 + INST(Fpatan , FpuOp , O_FPU(00,D9F3,_) , 0 , 34 , 0 , 987 , 30 , 0 ), // #224 + INST(Fprem , FpuOp , O_FPU(00,D9F8,_) , 0 , 34 , 0 , 994 , 30 , 0 ), // #225 + INST(Fprem1 , FpuOp , O_FPU(00,D9F5,_) , 0 , 34 , 0 , 1000 , 30 , 0 ), // #226 + INST(Fptan , FpuOp , O_FPU(00,D9F2,_) , 0 , 34 , 0 , 1007 , 30 , 0 ), // #227 + INST(Frndint , FpuOp , O_FPU(00,D9FC,_) , 0 , 34 , 0 , 1013 , 30 , 0 ), // #228 + INST(Frstor , X86M_Only , O_FPU(00,00DD,4) , 0 , 37 , 0 , 1021 , 31 , 0 ), // #229 + INST(Fsave , X86M_Only , O_FPU(9B,00DD,6) , 0 , 55 , 0 , 1028 , 31 , 0 ), // #230 + INST(Fscale , FpuOp , O_FPU(00,D9FD,_) , 0 , 34 , 0 , 1034 , 30 , 0 ), // #231 + INST(Fsin , FpuOp , O_FPU(00,D9FE,_) , 0 , 34 , 0 , 1041 , 30 , 0 ), // #232 + INST(Fsincos , FpuOp , O_FPU(00,D9FB,_) , 0 , 34 , 0 , 1046 , 30 , 0 ), // #233 + INST(Fsqrt , FpuOp , O_FPU(00,D9FA,_) , 0 , 34 , 0 , 1054 , 30 , 0 ), // #234 + INST(Fst , FpuFldFst , O_FPU(00,00D9,2) , 0 , 49 , 0 , 1060 , 69 , 0 ), // #235 + INST(Fstcw , X86M_Only , O_FPU(9B,00D9,7) , 0 , 56 , 0 , 1064 , 67 , 0 ), // #236 + INST(Fstenv , X86M_Only , O_FPU(9B,00D9,6) , 0 , 55 , 0 , 1070 , 31 , 0 ), // #237 + INST(Fstp , FpuFldFst , O_FPU(00,00D9,3) , O(000000,DB,7,_,_,_,_,_ ), 50 , 13 , 1077 , 66 , 0 ), // #238 + INST(Fstsw , FpuStsw , O_FPU(9B,00DD,7) , O_FPU(9B,DFE0,_) , 56 , 14 , 1082 , 68 , 0 ), // #239 + INST(Fsub , FpuArith , O_FPU(00,E0E8,4) , 0 , 57 , 0 , 2226 , 59 , 0 ), // #240 + INST(Fsubp , FpuRDef , O_FPU(00,DEE8,_) , 0 , 36 , 0 , 1088 , 60 , 0 ), // #241 + INST(Fsubr , FpuArith , O_FPU(00,E8E0,5) , 0 , 58 , 0 , 2232 , 59 , 0 ), // #242 + INST(Fsubrp , FpuRDef , O_FPU(00,DEE0,_) , 0 , 36 , 0 , 1094 , 60 , 0 ), // #243 + INST(Ftst , FpuOp , O_FPU(00,D9E4,_) , 0 , 34 , 0 , 1101 , 30 , 0 ), // #244 + INST(Fucom , FpuRDef , O_FPU(00,DDE0,_) , 0 , 47 , 0 , 1106 , 60 , 0 ), // #245 + INST(Fucomi , FpuR , O_FPU(00,DBE8,_) , 0 , 41 , 0 , 1112 , 62 , 49 ), // #246 + INST(Fucomip , FpuR , O_FPU(00,DFE8,_) , 0 , 43 , 0 , 1119 , 62 , 49 ), // #247 + INST(Fucomp , FpuRDef , O_FPU(00,DDE8,_) , 0 , 47 , 0 , 1127 , 60 , 0 ), // #248 + INST(Fucompp , FpuOp , O_FPU(00,DAE9,_) , 0 , 40 , 0 , 1134 , 30 , 0 ), // #249 + INST(Fwait , X86Op , O_FPU(00,009B,_) , 0 , 59 , 0 , 1142 , 30 , 0 ), // #250 + INST(Fxam , FpuOp , O_FPU(00,D9E5,_) , 0 , 34 , 0 , 1148 , 30 , 0 ), // #251 + INST(Fxch , FpuR , O_FPU(00,D9C8,_) , 0 , 34 , 0 , 1153 , 60 , 0 ), // #252 + INST(Fxrstor , X86M_Only , O(000F00,AE,1,_,_,_,_,_ ), 0 , 28 , 0 , 1158 , 31 , 51 ), // #253 + INST(Fxrstor64 , X86M_Only , O(000F00,AE,1,_,1,_,_,_ ), 0 , 27 , 0 , 1166 , 70 , 51 ), // #254 + INST(Fxsave , X86M_Only , O(000F00,AE,0,_,_,_,_,_ ), 0 , 4 , 0 , 1176 , 31 , 51 ), // #255 + INST(Fxsave64 , X86M_Only , O(000F00,AE,0,_,1,_,_,_ ), 0 , 60 , 0 , 1183 , 70 , 51 ), // #256 + INST(Fxtract , FpuOp , O_FPU(00,D9F4,_) , 0 , 34 , 0 , 1192 , 30 , 0 ), // #257 + INST(Fyl2x , FpuOp , O_FPU(00,D9F1,_) , 0 , 34 , 0 , 1200 , 30 , 0 ), // #258 + INST(Fyl2xp1 , FpuOp , O_FPU(00,D9F9,_) , 0 , 34 , 0 , 1206 , 30 , 0 ), // #259 + INST(Getsec , X86Op , O(000F00,37,_,_,_,_,_,_ ), 0 , 4 , 0 , 1214 , 30 , 52 ), // #260 + INST(Gf2p8affineinvqb , ExtRmi , O(660F3A,CF,_,_,_,_,_,_ ), 0 , 8 , 0 , 5865 , 8 , 53 ), // #261 + INST(Gf2p8affineqb , ExtRmi , O(660F3A,CE,_,_,_,_,_,_ ), 0 , 8 , 0 , 5883 , 8 , 53 ), // #262 + INST(Gf2p8mulb , ExtRm , O(660F38,CF,_,_,_,_,_,_ ), 0 , 2 , 0 , 5898 , 5 , 53 ), // #263 + INST(Haddpd , ExtRm , O(660F00,7C,_,_,_,_,_,_ ), 0 , 3 , 0 , 5909 , 5 , 6 ), // #264 + INST(Haddps , ExtRm , O(F20F00,7C,_,_,_,_,_,_ ), 0 , 5 , 0 , 5917 , 5 , 6 ), // #265 + INST(Hlt , X86Op , O(000000,F4,_,_,_,_,_,_ ), 0 , 0 , 0 , 1221 , 30 , 0 ), // #266 + INST(Hsubpd , ExtRm , O(660F00,7D,_,_,_,_,_,_ ), 0 , 3 , 0 , 5925 , 5 , 6 ), // #267 + INST(Hsubps , ExtRm , O(F20F00,7D,_,_,_,_,_,_ ), 0 , 5 , 0 , 5933 , 5 , 6 ), // #268 + INST(Idiv , X86M_GPB_MulDiv , O(000000,F6,7,_,x,_,_,_ ), 0 , 26 , 0 , 804 , 53 , 1 ), // #269 + INST(Imul , X86Imul , O(000000,F6,5,_,x,_,_,_ ), 0 , 61 , 0 , 822 , 71 , 1 ), // #270 + INST(In , X86In , O(000000,EC,_,_,_,_,_,_ ), O(000000,E4,_,_,_,_,_,_ ), 0 , 15 , 10418, 72 , 0 ), // #271 + INST(Inc , X86IncDec , O(000000,FE,0,_,x,_,_,_ ), O(000000,40,_,_,x,_,_,_ ), 0 , 16 , 1225 , 52 , 44 ), // #272 + INST(Incsspd , X86M , O(F30F00,AE,5,_,0,_,_,_ ), 0 , 62 , 0 , 1229 , 73 , 54 ), // #273 + INST(Incsspq , X86M , O(F30F00,AE,5,_,1,_,_,_ ), 0 , 63 , 0 , 1237 , 74 , 54 ), // #274 + INST(Ins , X86Ins , O(000000,6C,_,_,_,_,_,_ ), 0 , 0 , 0 , 1908 , 75 , 0 ), // #275 + INST(Insertps , ExtRmi , O(660F3A,21,_,_,_,_,_,_ ), 0 , 8 , 0 , 6069 , 37 , 12 ), // #276 + INST(Insertq , ExtInsertq , O(F20F00,79,_,_,_,_,_,_ ), O(F20F00,78,_,_,_,_,_,_ ), 5 , 17 , 1245 , 76 , 48 ), // #277 + INST(Int , X86Int , O(000000,CD,_,_,_,_,_,_ ), 0 , 0 , 0 , 1017 , 77 , 0 ), // #278 + INST(Int3 , X86Op , O(000000,CC,_,_,_,_,_,_ ), 0 , 0 , 0 , 1253 , 30 , 0 ), // #279 + INST(Into , X86Op , O(000000,CE,_,_,_,_,_,_ ), 0 , 0 , 0 , 1258 , 78 , 55 ), // #280 + INST(Invd , X86Op , O(000F00,08,_,_,_,_,_,_ ), 0 , 4 , 0 , 10347, 30 , 42 ), // #281 + INST(Invept , X86Rm_NoSize , O(660F38,80,_,_,_,_,_,_ ), 0 , 2 , 0 , 1263 , 79 , 56 ), // #282 + INST(Invlpg , X86M_Only , O(000F00,01,7,_,_,_,_,_ ), 0 , 22 , 0 , 1270 , 31 , 42 ), // #283 + INST(Invlpga , X86Op_xAddr , O(000F01,DF,_,_,_,_,_,_ ), 0 , 21 , 0 , 1277 , 80 , 22 ), // #284 + INST(Invpcid , X86Rm_NoSize , O(660F38,82,_,_,_,_,_,_ ), 0 , 2 , 0 , 1285 , 79 , 42 ), // #285 + INST(Invvpid , X86Rm_NoSize , O(660F38,81,_,_,_,_,_,_ ), 0 , 2 , 0 , 1293 , 79 , 56 ), // #286 + INST(Iret , X86Op , O(000000,CF,_,_,_,_,_,_ ), 0 , 0 , 0 , 1301 , 81 , 1 ), // #287 + INST(Iretd , X86Op , O(000000,CF,_,_,_,_,_,_ ), 0 , 0 , 0 , 1306 , 81 , 1 ), // #288 + INST(Iretq , X86Op , O(000000,CF,_,_,1,_,_,_ ), 0 , 20 , 0 , 1312 , 82 , 1 ), // #289 + INST(Iretw , X86Op , O(660000,CF,_,_,_,_,_,_ ), 0 , 19 , 0 , 1318 , 81 , 1 ), // #290 + INST(Ja , X86Jcc , O(000F00,87,_,_,_,_,_,_ ), O(000000,77,_,_,_,_,_,_ ), 4 , 18 , 1324 , 83 , 57 ), // #291 + INST(Jae , X86Jcc , O(000F00,83,_,_,_,_,_,_ ), O(000000,73,_,_,_,_,_,_ ), 4 , 19 , 1327 , 83 , 58 ), // #292 + INST(Jb , X86Jcc , O(000F00,82,_,_,_,_,_,_ ), O(000000,72,_,_,_,_,_,_ ), 4 , 20 , 1331 , 83 , 58 ), // #293 + INST(Jbe , X86Jcc , O(000F00,86,_,_,_,_,_,_ ), O(000000,76,_,_,_,_,_,_ ), 4 , 21 , 1334 , 83 , 57 ), // #294 + INST(Jc , X86Jcc , O(000F00,82,_,_,_,_,_,_ ), O(000000,72,_,_,_,_,_,_ ), 4 , 20 , 1338 , 83 , 58 ), // #295 + INST(Je , X86Jcc , O(000F00,84,_,_,_,_,_,_ ), O(000000,74,_,_,_,_,_,_ ), 4 , 22 , 1341 , 83 , 59 ), // #296 + INST(Jecxz , X86JecxzLoop , 0 , O(000000,E3,_,_,_,_,_,_ ), 0 , 23 , 1344 , 84 , 0 ), // #297 + INST(Jg , X86Jcc , O(000F00,8F,_,_,_,_,_,_ ), O(000000,7F,_,_,_,_,_,_ ), 4 , 24 , 1350 , 83 , 60 ), // #298 + INST(Jge , X86Jcc , O(000F00,8D,_,_,_,_,_,_ ), O(000000,7D,_,_,_,_,_,_ ), 4 , 25 , 1353 , 83 , 61 ), // #299 + INST(Jl , X86Jcc , O(000F00,8C,_,_,_,_,_,_ ), O(000000,7C,_,_,_,_,_,_ ), 4 , 26 , 1357 , 83 , 61 ), // #300 + INST(Jle , X86Jcc , O(000F00,8E,_,_,_,_,_,_ ), O(000000,7E,_,_,_,_,_,_ ), 4 , 27 , 1360 , 83 , 60 ), // #301 + INST(Jmp , X86Jmp , O(000000,FF,4,_,_,_,_,_ ), O(000000,EB,_,_,_,_,_,_ ), 9 , 28 , 1364 , 85 , 0 ), // #302 + INST(Jna , X86Jcc , O(000F00,86,_,_,_,_,_,_ ), O(000000,76,_,_,_,_,_,_ ), 4 , 21 , 1368 , 83 , 57 ), // #303 + INST(Jnae , X86Jcc , O(000F00,82,_,_,_,_,_,_ ), O(000000,72,_,_,_,_,_,_ ), 4 , 20 , 1372 , 83 , 58 ), // #304 + INST(Jnb , X86Jcc , O(000F00,83,_,_,_,_,_,_ ), O(000000,73,_,_,_,_,_,_ ), 4 , 19 , 1377 , 83 , 58 ), // #305 + INST(Jnbe , X86Jcc , O(000F00,87,_,_,_,_,_,_ ), O(000000,77,_,_,_,_,_,_ ), 4 , 18 , 1381 , 83 , 57 ), // #306 + INST(Jnc , X86Jcc , O(000F00,83,_,_,_,_,_,_ ), O(000000,73,_,_,_,_,_,_ ), 4 , 19 , 1386 , 83 , 58 ), // #307 + INST(Jne , X86Jcc , O(000F00,85,_,_,_,_,_,_ ), O(000000,75,_,_,_,_,_,_ ), 4 , 29 , 1390 , 83 , 59 ), // #308 + INST(Jng , X86Jcc , O(000F00,8E,_,_,_,_,_,_ ), O(000000,7E,_,_,_,_,_,_ ), 4 , 27 , 1394 , 83 , 60 ), // #309 + INST(Jnge , X86Jcc , O(000F00,8C,_,_,_,_,_,_ ), O(000000,7C,_,_,_,_,_,_ ), 4 , 26 , 1398 , 83 , 61 ), // #310 + INST(Jnl , X86Jcc , O(000F00,8D,_,_,_,_,_,_ ), O(000000,7D,_,_,_,_,_,_ ), 4 , 25 , 1403 , 83 , 61 ), // #311 + INST(Jnle , X86Jcc , O(000F00,8F,_,_,_,_,_,_ ), O(000000,7F,_,_,_,_,_,_ ), 4 , 24 , 1407 , 83 , 60 ), // #312 + INST(Jno , X86Jcc , O(000F00,81,_,_,_,_,_,_ ), O(000000,71,_,_,_,_,_,_ ), 4 , 30 , 1412 , 83 , 55 ), // #313 + INST(Jnp , X86Jcc , O(000F00,8B,_,_,_,_,_,_ ), O(000000,7B,_,_,_,_,_,_ ), 4 , 31 , 1416 , 83 , 62 ), // #314 + INST(Jns , X86Jcc , O(000F00,89,_,_,_,_,_,_ ), O(000000,79,_,_,_,_,_,_ ), 4 , 32 , 1420 , 83 , 63 ), // #315 + INST(Jnz , X86Jcc , O(000F00,85,_,_,_,_,_,_ ), O(000000,75,_,_,_,_,_,_ ), 4 , 29 , 1424 , 83 , 59 ), // #316 + INST(Jo , X86Jcc , O(000F00,80,_,_,_,_,_,_ ), O(000000,70,_,_,_,_,_,_ ), 4 , 33 , 1428 , 83 , 55 ), // #317 + INST(Jp , X86Jcc , O(000F00,8A,_,_,_,_,_,_ ), O(000000,7A,_,_,_,_,_,_ ), 4 , 34 , 1431 , 83 , 62 ), // #318 + INST(Jpe , X86Jcc , O(000F00,8A,_,_,_,_,_,_ ), O(000000,7A,_,_,_,_,_,_ ), 4 , 34 , 1434 , 83 , 62 ), // #319 + INST(Jpo , X86Jcc , O(000F00,8B,_,_,_,_,_,_ ), O(000000,7B,_,_,_,_,_,_ ), 4 , 31 , 1438 , 83 , 62 ), // #320 + INST(Js , X86Jcc , O(000F00,88,_,_,_,_,_,_ ), O(000000,78,_,_,_,_,_,_ ), 4 , 35 , 1442 , 83 , 63 ), // #321 + INST(Jz , X86Jcc , O(000F00,84,_,_,_,_,_,_ ), O(000000,74,_,_,_,_,_,_ ), 4 , 22 , 1445 , 83 , 59 ), // #322 + INST(Kaddb , VexRvm , V(660F00,4A,_,1,0,_,_,_ ), 0 , 64 , 0 , 1448 , 86 , 64 ), // #323 + INST(Kaddd , VexRvm , V(660F00,4A,_,1,1,_,_,_ ), 0 , 65 , 0 , 1454 , 86 , 65 ), // #324 + INST(Kaddq , VexRvm , V(000F00,4A,_,1,1,_,_,_ ), 0 , 66 , 0 , 1460 , 86 , 65 ), // #325 + INST(Kaddw , VexRvm , V(000F00,4A,_,1,0,_,_,_ ), 0 , 67 , 0 , 1466 , 86 , 64 ), // #326 + INST(Kandb , VexRvm , V(660F00,41,_,1,0,_,_,_ ), 0 , 64 , 0 , 1472 , 86 , 64 ), // #327 + INST(Kandd , VexRvm , V(660F00,41,_,1,1,_,_,_ ), 0 , 65 , 0 , 1478 , 86 , 65 ), // #328 + INST(Kandnb , VexRvm , V(660F00,42,_,1,0,_,_,_ ), 0 , 64 , 0 , 1484 , 86 , 64 ), // #329 + INST(Kandnd , VexRvm , V(660F00,42,_,1,1,_,_,_ ), 0 , 65 , 0 , 1491 , 86 , 65 ), // #330 + INST(Kandnq , VexRvm , V(000F00,42,_,1,1,_,_,_ ), 0 , 66 , 0 , 1498 , 86 , 65 ), // #331 + INST(Kandnw , VexRvm , V(000F00,42,_,1,0,_,_,_ ), 0 , 67 , 0 , 1505 , 86 , 66 ), // #332 + INST(Kandq , VexRvm , V(000F00,41,_,1,1,_,_,_ ), 0 , 66 , 0 , 1512 , 86 , 65 ), // #333 + INST(Kandw , VexRvm , V(000F00,41,_,1,0,_,_,_ ), 0 , 67 , 0 , 1518 , 86 , 66 ), // #334 + INST(Kmovb , VexKmov , V(660F00,90,_,0,0,_,_,_ ), V(660F00,92,_,0,0,_,_,_ ), 68 , 36 , 1524 , 87 , 64 ), // #335 + INST(Kmovd , VexKmov , V(660F00,90,_,0,1,_,_,_ ), V(F20F00,92,_,0,0,_,_,_ ), 69 , 37 , 8086 , 88 , 65 ), // #336 + INST(Kmovq , VexKmov , V(000F00,90,_,0,1,_,_,_ ), V(F20F00,92,_,0,1,_,_,_ ), 70 , 38 , 8097 , 89 , 65 ), // #337 + INST(Kmovw , VexKmov , V(000F00,90,_,0,0,_,_,_ ), V(000F00,92,_,0,0,_,_,_ ), 71 , 39 , 1530 , 90 , 66 ), // #338 + INST(Knotb , VexRm , V(660F00,44,_,0,0,_,_,_ ), 0 , 68 , 0 , 1536 , 91 , 64 ), // #339 + INST(Knotd , VexRm , V(660F00,44,_,0,1,_,_,_ ), 0 , 69 , 0 , 1542 , 91 , 65 ), // #340 + INST(Knotq , VexRm , V(000F00,44,_,0,1,_,_,_ ), 0 , 70 , 0 , 1548 , 91 , 65 ), // #341 + INST(Knotw , VexRm , V(000F00,44,_,0,0,_,_,_ ), 0 , 71 , 0 , 1554 , 91 , 66 ), // #342 + INST(Korb , VexRvm , V(660F00,45,_,1,0,_,_,_ ), 0 , 64 , 0 , 1560 , 86 , 64 ), // #343 + INST(Kord , VexRvm , V(660F00,45,_,1,1,_,_,_ ), 0 , 65 , 0 , 1565 , 86 , 65 ), // #344 + INST(Korq , VexRvm , V(000F00,45,_,1,1,_,_,_ ), 0 , 66 , 0 , 1570 , 86 , 65 ), // #345 + INST(Kortestb , VexRm , V(660F00,98,_,0,0,_,_,_ ), 0 , 68 , 0 , 1575 , 91 , 67 ), // #346 + INST(Kortestd , VexRm , V(660F00,98,_,0,1,_,_,_ ), 0 , 69 , 0 , 1584 , 91 , 68 ), // #347 + INST(Kortestq , VexRm , V(000F00,98,_,0,1,_,_,_ ), 0 , 70 , 0 , 1593 , 91 , 68 ), // #348 + INST(Kortestw , VexRm , V(000F00,98,_,0,0,_,_,_ ), 0 , 71 , 0 , 1602 , 91 , 69 ), // #349 + INST(Korw , VexRvm , V(000F00,45,_,1,0,_,_,_ ), 0 , 67 , 0 , 1611 , 86 , 66 ), // #350 + INST(Kshiftlb , VexRmi , V(660F3A,32,_,0,0,_,_,_ ), 0 , 72 , 0 , 1616 , 92 , 64 ), // #351 + INST(Kshiftld , VexRmi , V(660F3A,33,_,0,0,_,_,_ ), 0 , 72 , 0 , 1625 , 92 , 65 ), // #352 + INST(Kshiftlq , VexRmi , V(660F3A,33,_,0,1,_,_,_ ), 0 , 73 , 0 , 1634 , 92 , 65 ), // #353 + INST(Kshiftlw , VexRmi , V(660F3A,32,_,0,1,_,_,_ ), 0 , 73 , 0 , 1643 , 92 , 66 ), // #354 + INST(Kshiftrb , VexRmi , V(660F3A,30,_,0,0,_,_,_ ), 0 , 72 , 0 , 1652 , 92 , 64 ), // #355 + INST(Kshiftrd , VexRmi , V(660F3A,31,_,0,0,_,_,_ ), 0 , 72 , 0 , 1661 , 92 , 65 ), // #356 + INST(Kshiftrq , VexRmi , V(660F3A,31,_,0,1,_,_,_ ), 0 , 73 , 0 , 1670 , 92 , 65 ), // #357 + INST(Kshiftrw , VexRmi , V(660F3A,30,_,0,1,_,_,_ ), 0 , 73 , 0 , 1679 , 92 , 66 ), // #358 + INST(Ktestb , VexRm , V(660F00,99,_,0,0,_,_,_ ), 0 , 68 , 0 , 1688 , 91 , 67 ), // #359 + INST(Ktestd , VexRm , V(660F00,99,_,0,1,_,_,_ ), 0 , 69 , 0 , 1695 , 91 , 68 ), // #360 + INST(Ktestq , VexRm , V(000F00,99,_,0,1,_,_,_ ), 0 , 70 , 0 , 1702 , 91 , 68 ), // #361 + INST(Ktestw , VexRm , V(000F00,99,_,0,0,_,_,_ ), 0 , 71 , 0 , 1709 , 91 , 67 ), // #362 + INST(Kunpckbw , VexRvm , V(660F00,4B,_,1,0,_,_,_ ), 0 , 64 , 0 , 1716 , 86 , 66 ), // #363 + INST(Kunpckdq , VexRvm , V(000F00,4B,_,1,1,_,_,_ ), 0 , 66 , 0 , 1725 , 86 , 65 ), // #364 + INST(Kunpckwd , VexRvm , V(000F00,4B,_,1,0,_,_,_ ), 0 , 67 , 0 , 1734 , 86 , 65 ), // #365 + INST(Kxnorb , VexRvm , V(660F00,46,_,1,0,_,_,_ ), 0 , 64 , 0 , 1743 , 86 , 64 ), // #366 + INST(Kxnord , VexRvm , V(660F00,46,_,1,1,_,_,_ ), 0 , 65 , 0 , 1750 , 86 , 65 ), // #367 + INST(Kxnorq , VexRvm , V(000F00,46,_,1,1,_,_,_ ), 0 , 66 , 0 , 1757 , 86 , 65 ), // #368 + INST(Kxnorw , VexRvm , V(000F00,46,_,1,0,_,_,_ ), 0 , 67 , 0 , 1764 , 86 , 66 ), // #369 + INST(Kxorb , VexRvm , V(660F00,47,_,1,0,_,_,_ ), 0 , 64 , 0 , 1771 , 86 , 64 ), // #370 + INST(Kxord , VexRvm , V(660F00,47,_,1,1,_,_,_ ), 0 , 65 , 0 , 1777 , 86 , 65 ), // #371 + INST(Kxorq , VexRvm , V(000F00,47,_,1,1,_,_,_ ), 0 , 66 , 0 , 1783 , 86 , 65 ), // #372 + INST(Kxorw , VexRvm , V(000F00,47,_,1,0,_,_,_ ), 0 , 67 , 0 , 1789 , 86 , 66 ), // #373 + INST(Lahf , X86Op , O(000000,9F,_,_,_,_,_,_ ), 0 , 0 , 0 , 1795 , 93 , 70 ), // #374 + INST(Lar , X86Rm , O(000F00,02,_,_,_,_,_,_ ), 0 , 4 , 0 , 1800 , 94 , 10 ), // #375 + INST(Lddqu , ExtRm , O(F20F00,F0,_,_,_,_,_,_ ), 0 , 5 , 0 , 6079 , 95 , 6 ), // #376 + INST(Ldmxcsr , X86M_Only , O(000F00,AE,2,_,_,_,_,_ ), 0 , 74 , 0 , 6086 , 96 , 5 ), // #377 + INST(Lds , X86Rm , O(000000,C5,_,_,_,_,_,_ ), 0 , 0 , 0 , 1804 , 97 , 0 ), // #378 + INST(Ldtilecfg , AmxCfg , V(000F38,49,_,0,0,_,_,_ ), 0 , 10 , 0 , 1808 , 98 , 71 ), // #379 + INST(Lea , X86Lea , O(000000,8D,_,_,x,_,_,_ ), 0 , 0 , 0 , 1818 , 99 , 0 ), // #380 + INST(Leave , X86Op , O(000000,C9,_,_,_,_,_,_ ), 0 , 0 , 0 , 1822 , 30 , 0 ), // #381 + INST(Les , X86Rm , O(000000,C4,_,_,_,_,_,_ ), 0 , 0 , 0 , 1828 , 97 , 0 ), // #382 + INST(Lfence , X86Fence , O(000F00,AE,5,_,_,_,_,_ ), 0 , 75 , 0 , 1832 , 30 , 4 ), // #383 + INST(Lfs , X86Rm , O(000F00,B4,_,_,_,_,_,_ ), 0 , 4 , 0 , 1839 , 100, 0 ), // #384 + INST(Lgdt , X86M_Only , O(000F00,01,2,_,_,_,_,_ ), 0 , 74 , 0 , 1843 , 31 , 0 ), // #385 + INST(Lgs , X86Rm , O(000F00,B5,_,_,_,_,_,_ ), 0 , 4 , 0 , 1848 , 100, 0 ), // #386 + INST(Lidt , X86M_Only , O(000F00,01,3,_,_,_,_,_ ), 0 , 76 , 0 , 1852 , 31 , 0 ), // #387 + INST(Lldt , X86M_NoSize , O(000F00,00,2,_,_,_,_,_ ), 0 , 74 , 0 , 1857 , 101, 0 ), // #388 + INST(Llwpcb , VexR_Wx , V(XOP_M9,12,0,0,x,_,_,_ ), 0 , 77 , 0 , 1862 , 102, 72 ), // #389 + INST(Lmsw , X86M_NoSize , O(000F00,01,6,_,_,_,_,_ ), 0 , 78 , 0 , 1869 , 101, 0 ), // #390 + INST(Lods , X86StrRm , O(000000,AC,_,_,_,_,_,_ ), 0 , 0 , 0 , 1874 , 103, 73 ), // #391 + INST(Loop , X86JecxzLoop , 0 , O(000000,E2,_,_,_,_,_,_ ), 0 , 40 , 1879 , 104, 0 ), // #392 + INST(Loope , X86JecxzLoop , 0 , O(000000,E1,_,_,_,_,_,_ ), 0 , 41 , 1884 , 104, 59 ), // #393 + INST(Loopne , X86JecxzLoop , 0 , O(000000,E0,_,_,_,_,_,_ ), 0 , 42 , 1890 , 104, 59 ), // #394 + INST(Lsl , X86Rm , O(000F00,03,_,_,_,_,_,_ ), 0 , 4 , 0 , 1897 , 105, 10 ), // #395 + INST(Lss , X86Rm , O(000F00,B2,_,_,_,_,_,_ ), 0 , 4 , 0 , 6577 , 100, 0 ), // #396 + INST(Ltr , X86M_NoSize , O(000F00,00,3,_,_,_,_,_ ), 0 , 76 , 0 , 1901 , 101, 0 ), // #397 + INST(Lwpins , VexVmi4_Wx , V(XOP_MA,12,0,0,x,_,_,_ ), 0 , 79 , 0 , 1905 , 106, 72 ), // #398 + INST(Lwpval , VexVmi4_Wx , V(XOP_MA,12,1,0,x,_,_,_ ), 0 , 80 , 0 , 1912 , 106, 72 ), // #399 + INST(Lzcnt , X86Rm_Raw66H , O(F30F00,BD,_,_,x,_,_,_ ), 0 , 6 , 0 , 1919 , 22 , 74 ), // #400 + INST(Maskmovdqu , ExtRm_ZDI , O(660F00,57,_,_,_,_,_,_ ), 0 , 3 , 0 , 6095 , 107, 4 ), // #401 + INST(Maskmovq , ExtRm_ZDI , O(000F00,F7,_,_,_,_,_,_ ), 0 , 4 , 0 , 8094 , 108, 75 ), // #402 + INST(Maxpd , ExtRm , O(660F00,5F,_,_,_,_,_,_ ), 0 , 3 , 0 , 6129 , 5 , 4 ), // #403 + INST(Maxps , ExtRm , O(000F00,5F,_,_,_,_,_,_ ), 0 , 4 , 0 , 6136 , 5 , 5 ), // #404 + INST(Maxsd , ExtRm , O(F20F00,5F,_,_,_,_,_,_ ), 0 , 5 , 0 , 8113 , 6 , 4 ), // #405 + INST(Maxss , ExtRm , O(F30F00,5F,_,_,_,_,_,_ ), 0 , 6 , 0 , 6150 , 7 , 5 ), // #406 + INST(Mcommit , X86Op , O(F30F01,FA,_,_,_,_,_,_ ), 0 , 81 , 0 , 1925 , 30 , 76 ), // #407 + INST(Mfence , X86Fence , O(000F00,AE,6,_,_,_,_,_ ), 0 , 78 , 0 , 1933 , 30 , 4 ), // #408 + INST(Minpd , ExtRm , O(660F00,5D,_,_,_,_,_,_ ), 0 , 3 , 0 , 6179 , 5 , 4 ), // #409 + INST(Minps , ExtRm , O(000F00,5D,_,_,_,_,_,_ ), 0 , 4 , 0 , 6186 , 5 , 5 ), // #410 + INST(Minsd , ExtRm , O(F20F00,5D,_,_,_,_,_,_ ), 0 , 5 , 0 , 8177 , 6 , 4 ), // #411 + INST(Minss , ExtRm , O(F30F00,5D,_,_,_,_,_,_ ), 0 , 6 , 0 , 6200 , 7 , 5 ), // #412 + INST(Monitor , X86Op , O(000F01,C8,_,_,_,_,_,_ ), 0 , 21 , 0 , 3192 , 109, 77 ), // #413 + INST(Monitorx , X86Op , O(000F01,FA,_,_,_,_,_,_ ), 0 , 21 , 0 , 1940 , 109, 78 ), // #414 + INST(Mov , X86Mov , 0 , 0 , 0 , 0 , 138 , 110, 0 ), // #415 + INST(Movapd , ExtMov , O(660F00,28,_,_,_,_,_,_ ), O(660F00,29,_,_,_,_,_,_ ), 3 , 43 , 6231 , 111, 4 ), // #416 + INST(Movaps , ExtMov , O(000F00,28,_,_,_,_,_,_ ), O(000F00,29,_,_,_,_,_,_ ), 4 , 44 , 6239 , 111, 5 ), // #417 + INST(Movbe , ExtMovbe , O(000F38,F0,_,_,x,_,_,_ ), O(000F38,F1,_,_,x,_,_,_ ), 82 , 45 , 651 , 112, 79 ), // #418 + INST(Movd , ExtMovd , O(000F00,6E,_,_,_,_,_,_ ), O(000F00,7E,_,_,_,_,_,_ ), 4 , 46 , 8087 , 113, 80 ), // #419 + INST(Movddup , ExtMov , O(F20F00,12,_,_,_,_,_,_ ), 0 , 5 , 0 , 6253 , 6 , 6 ), // #420 + INST(Movdir64b , X86EnqcmdMovdir64b , O(660F38,F8,_,_,_,_,_,_ ), 0 , 2 , 0 , 1949 , 114, 81 ), // #421 + INST(Movdiri , X86MovntiMovdiri , O(000F38,F9,_,_,_,_,_,_ ), 0 , 82 , 0 , 1959 , 115, 82 ), // #422 + INST(Movdq2q , ExtMov , O(F20F00,D6,_,_,_,_,_,_ ), 0 , 5 , 0 , 1967 , 116, 4 ), // #423 + INST(Movdqa , ExtMov , O(660F00,6F,_,_,_,_,_,_ ), O(660F00,7F,_,_,_,_,_,_ ), 3 , 47 , 6262 , 111, 4 ), // #424 + INST(Movdqu , ExtMov , O(F30F00,6F,_,_,_,_,_,_ ), O(F30F00,7F,_,_,_,_,_,_ ), 6 , 48 , 6099 , 111, 4 ), // #425 + INST(Movhlps , ExtMov , O(000F00,12,_,_,_,_,_,_ ), 0 , 4 , 0 , 6337 , 117, 5 ), // #426 + INST(Movhpd , ExtMov , O(660F00,16,_,_,_,_,_,_ ), O(660F00,17,_,_,_,_,_,_ ), 3 , 49 , 6346 , 118, 4 ), // #427 + INST(Movhps , ExtMov , O(000F00,16,_,_,_,_,_,_ ), O(000F00,17,_,_,_,_,_,_ ), 4 , 50 , 6354 , 118, 5 ), // #428 + INST(Movlhps , ExtMov , O(000F00,16,_,_,_,_,_,_ ), 0 , 4 , 0 , 6362 , 117, 5 ), // #429 + INST(Movlpd , ExtMov , O(660F00,12,_,_,_,_,_,_ ), O(660F00,13,_,_,_,_,_,_ ), 3 , 51 , 6371 , 118, 4 ), // #430 + INST(Movlps , ExtMov , O(000F00,12,_,_,_,_,_,_ ), O(000F00,13,_,_,_,_,_,_ ), 4 , 52 , 6379 , 118, 5 ), // #431 + INST(Movmskpd , ExtMov , O(660F00,50,_,_,_,_,_,_ ), 0 , 3 , 0 , 6387 , 119, 4 ), // #432 + INST(Movmskps , ExtMov , O(000F00,50,_,_,_,_,_,_ ), 0 , 4 , 0 , 6397 , 119, 5 ), // #433 + INST(Movntdq , ExtMov , 0 , O(660F00,E7,_,_,_,_,_,_ ), 0 , 53 , 6407 , 120, 4 ), // #434 + INST(Movntdqa , ExtMov , O(660F38,2A,_,_,_,_,_,_ ), 0 , 2 , 0 , 6416 , 95 , 12 ), // #435 + INST(Movnti , X86MovntiMovdiri , O(000F00,C3,_,_,x,_,_,_ ), 0 , 4 , 0 , 1975 , 115, 4 ), // #436 + INST(Movntpd , ExtMov , 0 , O(660F00,2B,_,_,_,_,_,_ ), 0 , 54 , 6426 , 120, 4 ), // #437 + INST(Movntps , ExtMov , 0 , O(000F00,2B,_,_,_,_,_,_ ), 0 , 55 , 6435 , 120, 5 ), // #438 + INST(Movntq , ExtMov , 0 , O(000F00,E7,_,_,_,_,_,_ ), 0 , 56 , 1982 , 121, 75 ), // #439 + INST(Movntsd , ExtMov , 0 , O(F20F00,2B,_,_,_,_,_,_ ), 0 , 57 , 1989 , 122, 48 ), // #440 + INST(Movntss , ExtMov , 0 , O(F30F00,2B,_,_,_,_,_,_ ), 0 , 58 , 1997 , 123, 48 ), // #441 + INST(Movq , ExtMovq , O(000F00,6E,_,_,x,_,_,_ ), O(000F00,7E,_,_,x,_,_,_ ), 4 , 59 , 8098 , 124, 80 ), // #442 + INST(Movq2dq , ExtRm , O(F30F00,D6,_,_,_,_,_,_ ), 0 , 6 , 0 , 2005 , 125, 4 ), // #443 + INST(Movs , X86StrMm , O(000000,A4,_,_,_,_,_,_ ), 0 , 0 , 0 , 434 , 126, 73 ), // #444 + INST(Movsd , ExtMov , O(F20F00,10,_,_,_,_,_,_ ), O(F20F00,11,_,_,_,_,_,_ ), 5 , 60 , 6450 , 127, 4 ), // #445 + INST(Movshdup , ExtRm , O(F30F00,16,_,_,_,_,_,_ ), 0 , 6 , 0 , 6457 , 5 , 6 ), // #446 + INST(Movsldup , ExtRm , O(F30F00,12,_,_,_,_,_,_ ), 0 , 6 , 0 , 6467 , 5 , 6 ), // #447 + INST(Movss , ExtMov , O(F30F00,10,_,_,_,_,_,_ ), O(F30F00,11,_,_,_,_,_,_ ), 6 , 61 , 6477 , 128, 5 ), // #448 + INST(Movsx , X86MovsxMovzx , O(000F00,BE,_,_,x,_,_,_ ), 0 , 4 , 0 , 2013 , 129, 0 ), // #449 + INST(Movsxd , X86Rm , O(000000,63,_,_,x,_,_,_ ), 0 , 0 , 0 , 2019 , 130, 0 ), // #450 + INST(Movupd , ExtMov , O(660F00,10,_,_,_,_,_,_ ), O(660F00,11,_,_,_,_,_,_ ), 3 , 62 , 6484 , 111, 4 ), // #451 + INST(Movups , ExtMov , O(000F00,10,_,_,_,_,_,_ ), O(000F00,11,_,_,_,_,_,_ ), 4 , 63 , 6492 , 111, 5 ), // #452 + INST(Movzx , X86MovsxMovzx , O(000F00,B6,_,_,x,_,_,_ ), 0 , 4 , 0 , 2026 , 129, 0 ), // #453 + INST(Mpsadbw , ExtRmi , O(660F3A,42,_,_,_,_,_,_ ), 0 , 8 , 0 , 6500 , 8 , 12 ), // #454 + INST(Mul , X86M_GPB_MulDiv , O(000000,F6,4,_,x,_,_,_ ), 0 , 9 , 0 , 823 , 53 , 1 ), // #455 + INST(Mulpd , ExtRm , O(660F00,59,_,_,_,_,_,_ ), 0 , 3 , 0 , 6554 , 5 , 4 ), // #456 + INST(Mulps , ExtRm , O(000F00,59,_,_,_,_,_,_ ), 0 , 4 , 0 , 6561 , 5 , 5 ), // #457 + INST(Mulsd , ExtRm , O(F20F00,59,_,_,_,_,_,_ ), 0 , 5 , 0 , 6568 , 6 , 4 ), // #458 + INST(Mulss , ExtRm , O(F30F00,59,_,_,_,_,_,_ ), 0 , 6 , 0 , 6575 , 7 , 5 ), // #459 + INST(Mulx , VexRvm_ZDX_Wx , V(F20F38,F6,_,0,x,_,_,_ ), 0 , 83 , 0 , 2032 , 131, 83 ), // #460 + INST(Mwait , X86Op , O(000F01,C9,_,_,_,_,_,_ ), 0 , 21 , 0 , 3201 , 132, 77 ), // #461 + INST(Mwaitx , X86Op , O(000F01,FB,_,_,_,_,_,_ ), 0 , 21 , 0 , 2037 , 133, 78 ), // #462 + INST(Neg , X86M_GPB , O(000000,F6,3,_,x,_,_,_ ), 0 , 84 , 0 , 2044 , 134, 1 ), // #463 + INST(Nop , X86M_Nop , O(000000,90,_,_,_,_,_,_ ), 0 , 0 , 0 , 954 , 135, 0 ), // #464 + INST(Not , X86M_GPB , O(000000,F6,2,_,x,_,_,_ ), 0 , 1 , 0 , 2048 , 134, 0 ), // #465 + INST(Or , X86Arith , O(000000,08,1,_,x,_,_,_ ), 0 , 30 , 0 , 3197 , 136, 1 ), // #466 + INST(Orpd , ExtRm , O(660F00,56,_,_,_,_,_,_ ), 0 , 3 , 0 , 10304, 11 , 4 ), // #467 + INST(Orps , ExtRm , O(000F00,56,_,_,_,_,_,_ ), 0 , 4 , 0 , 10311, 11 , 5 ), // #468 + INST(Out , X86Out , O(000000,EE,_,_,_,_,_,_ ), O(000000,E6,_,_,_,_,_,_ ), 0 , 64 , 2052 , 137, 0 ), // #469 + INST(Outs , X86Outs , O(000000,6E,_,_,_,_,_,_ ), 0 , 0 , 0 , 2056 , 138, 0 ), // #470 + INST(Pabsb , ExtRm_P , O(000F38,1C,_,_,_,_,_,_ ), 0 , 82 , 0 , 6657 , 139, 84 ), // #471 + INST(Pabsd , ExtRm_P , O(000F38,1E,_,_,_,_,_,_ ), 0 , 82 , 0 , 6664 , 139, 84 ), // #472 + INST(Pabsw , ExtRm_P , O(000F38,1D,_,_,_,_,_,_ ), 0 , 82 , 0 , 6678 , 139, 84 ), // #473 + INST(Packssdw , ExtRm_P , O(000F00,6B,_,_,_,_,_,_ ), 0 , 4 , 0 , 6685 , 139, 80 ), // #474 + INST(Packsswb , ExtRm_P , O(000F00,63,_,_,_,_,_,_ ), 0 , 4 , 0 , 6695 , 139, 80 ), // #475 + INST(Packusdw , ExtRm , O(660F38,2B,_,_,_,_,_,_ ), 0 , 2 , 0 , 6705 , 5 , 12 ), // #476 + INST(Packuswb , ExtRm_P , O(000F00,67,_,_,_,_,_,_ ), 0 , 4 , 0 , 6715 , 139, 80 ), // #477 + INST(Paddb , ExtRm_P , O(000F00,FC,_,_,_,_,_,_ ), 0 , 4 , 0 , 6725 , 139, 80 ), // #478 + INST(Paddd , ExtRm_P , O(000F00,FE,_,_,_,_,_,_ ), 0 , 4 , 0 , 6732 , 139, 80 ), // #479 + INST(Paddq , ExtRm_P , O(000F00,D4,_,_,_,_,_,_ ), 0 , 4 , 0 , 6739 , 139, 4 ), // #480 + INST(Paddsb , ExtRm_P , O(000F00,EC,_,_,_,_,_,_ ), 0 , 4 , 0 , 6746 , 139, 80 ), // #481 + INST(Paddsw , ExtRm_P , O(000F00,ED,_,_,_,_,_,_ ), 0 , 4 , 0 , 6754 , 139, 80 ), // #482 + INST(Paddusb , ExtRm_P , O(000F00,DC,_,_,_,_,_,_ ), 0 , 4 , 0 , 6762 , 139, 80 ), // #483 + INST(Paddusw , ExtRm_P , O(000F00,DD,_,_,_,_,_,_ ), 0 , 4 , 0 , 6771 , 139, 80 ), // #484 + INST(Paddw , ExtRm_P , O(000F00,FD,_,_,_,_,_,_ ), 0 , 4 , 0 , 6780 , 139, 80 ), // #485 + INST(Palignr , ExtRmi_P , O(000F3A,0F,_,_,_,_,_,_ ), 0 , 85 , 0 , 6787 , 140, 6 ), // #486 + INST(Pand , ExtRm_P , O(000F00,DB,_,_,_,_,_,_ ), 0 , 4 , 0 , 6796 , 141, 80 ), // #487 + INST(Pandn , ExtRm_P , O(000F00,DF,_,_,_,_,_,_ ), 0 , 4 , 0 , 6809 , 142, 80 ), // #488 + INST(Pause , X86Op , O(F30000,90,_,_,_,_,_,_ ), 0 , 86 , 0 , 3161 , 30 , 0 ), // #489 + INST(Pavgb , ExtRm_P , O(000F00,E0,_,_,_,_,_,_ ), 0 , 4 , 0 , 6839 , 139, 85 ), // #490 + INST(Pavgusb , Ext3dNow , O(000F0F,BF,_,_,_,_,_,_ ), 0 , 87 , 0 , 2061 , 143, 50 ), // #491 + INST(Pavgw , ExtRm_P , O(000F00,E3,_,_,_,_,_,_ ), 0 , 4 , 0 , 6846 , 139, 85 ), // #492 + INST(Pblendvb , ExtRm_XMM0 , O(660F38,10,_,_,_,_,_,_ ), 0 , 2 , 0 , 6862 , 15 , 12 ), // #493 + INST(Pblendw , ExtRmi , O(660F3A,0E,_,_,_,_,_,_ ), 0 , 8 , 0 , 6872 , 8 , 12 ), // #494 + INST(Pclmulqdq , ExtRmi , O(660F3A,44,_,_,_,_,_,_ ), 0 , 8 , 0 , 6965 , 8 , 86 ), // #495 + INST(Pcmpeqb , ExtRm_P , O(000F00,74,_,_,_,_,_,_ ), 0 , 4 , 0 , 6997 , 142, 80 ), // #496 + INST(Pcmpeqd , ExtRm_P , O(000F00,76,_,_,_,_,_,_ ), 0 , 4 , 0 , 7006 , 142, 80 ), // #497 + INST(Pcmpeqq , ExtRm , O(660F38,29,_,_,_,_,_,_ ), 0 , 2 , 0 , 7015 , 144, 12 ), // #498 + INST(Pcmpeqw , ExtRm_P , O(000F00,75,_,_,_,_,_,_ ), 0 , 4 , 0 , 7024 , 142, 80 ), // #499 + INST(Pcmpestri , ExtRmi , O(660F3A,61,_,_,_,_,_,_ ), 0 , 8 , 0 , 7033 , 145, 87 ), // #500 + INST(Pcmpestrm , ExtRmi , O(660F3A,60,_,_,_,_,_,_ ), 0 , 8 , 0 , 7044 , 146, 87 ), // #501 + INST(Pcmpgtb , ExtRm_P , O(000F00,64,_,_,_,_,_,_ ), 0 , 4 , 0 , 7055 , 142, 80 ), // #502 + INST(Pcmpgtd , ExtRm_P , O(000F00,66,_,_,_,_,_,_ ), 0 , 4 , 0 , 7064 , 142, 80 ), // #503 + INST(Pcmpgtq , ExtRm , O(660F38,37,_,_,_,_,_,_ ), 0 , 2 , 0 , 7073 , 144, 43 ), // #504 + INST(Pcmpgtw , ExtRm_P , O(000F00,65,_,_,_,_,_,_ ), 0 , 4 , 0 , 7082 , 142, 80 ), // #505 + INST(Pcmpistri , ExtRmi , O(660F3A,63,_,_,_,_,_,_ ), 0 , 8 , 0 , 7091 , 147, 87 ), // #506 + INST(Pcmpistrm , ExtRmi , O(660F3A,62,_,_,_,_,_,_ ), 0 , 8 , 0 , 7102 , 148, 87 ), // #507 + INST(Pconfig , X86Op , O(000F01,C5,_,_,_,_,_,_ ), 0 , 21 , 0 , 2069 , 30 , 88 ), // #508 + INST(Pdep , VexRvm_Wx , V(F20F38,F5,_,0,x,_,_,_ ), 0 , 83 , 0 , 2077 , 10 , 83 ), // #509 + INST(Pext , VexRvm_Wx , V(F30F38,F5,_,0,x,_,_,_ ), 0 , 88 , 0 , 2082 , 10 , 83 ), // #510 + INST(Pextrb , ExtExtract , O(000F3A,14,_,_,_,_,_,_ ), 0 , 85 , 0 , 7589 , 149, 12 ), // #511 + INST(Pextrd , ExtExtract , O(000F3A,16,_,_,_,_,_,_ ), 0 , 85 , 0 , 7597 , 57 , 12 ), // #512 + INST(Pextrq , ExtExtract , O(000F3A,16,_,_,1,_,_,_ ), 0 , 89 , 0 , 7605 , 150, 12 ), // #513 + INST(Pextrw , ExtPextrw , O(000F00,C5,_,_,_,_,_,_ ), O(000F3A,15,_,_,_,_,_,_ ), 4 , 65 , 7613 , 151, 89 ), // #514 + INST(Pf2id , Ext3dNow , O(000F0F,1D,_,_,_,_,_,_ ), 0 , 87 , 0 , 2087 , 143, 50 ), // #515 + INST(Pf2iw , Ext3dNow , O(000F0F,1C,_,_,_,_,_,_ ), 0 , 87 , 0 , 2093 , 143, 90 ), // #516 + INST(Pfacc , Ext3dNow , O(000F0F,AE,_,_,_,_,_,_ ), 0 , 87 , 0 , 2099 , 143, 50 ), // #517 + INST(Pfadd , Ext3dNow , O(000F0F,9E,_,_,_,_,_,_ ), 0 , 87 , 0 , 2105 , 143, 50 ), // #518 + INST(Pfcmpeq , Ext3dNow , O(000F0F,B0,_,_,_,_,_,_ ), 0 , 87 , 0 , 2111 , 143, 50 ), // #519 + INST(Pfcmpge , Ext3dNow , O(000F0F,90,_,_,_,_,_,_ ), 0 , 87 , 0 , 2119 , 143, 50 ), // #520 + INST(Pfcmpgt , Ext3dNow , O(000F0F,A0,_,_,_,_,_,_ ), 0 , 87 , 0 , 2127 , 143, 50 ), // #521 + INST(Pfmax , Ext3dNow , O(000F0F,A4,_,_,_,_,_,_ ), 0 , 87 , 0 , 2135 , 143, 50 ), // #522 + INST(Pfmin , Ext3dNow , O(000F0F,94,_,_,_,_,_,_ ), 0 , 87 , 0 , 2141 , 143, 50 ), // #523 + INST(Pfmul , Ext3dNow , O(000F0F,B4,_,_,_,_,_,_ ), 0 , 87 , 0 , 2147 , 143, 50 ), // #524 + INST(Pfnacc , Ext3dNow , O(000F0F,8A,_,_,_,_,_,_ ), 0 , 87 , 0 , 2153 , 143, 90 ), // #525 + INST(Pfpnacc , Ext3dNow , O(000F0F,8E,_,_,_,_,_,_ ), 0 , 87 , 0 , 2160 , 143, 90 ), // #526 + INST(Pfrcp , Ext3dNow , O(000F0F,96,_,_,_,_,_,_ ), 0 , 87 , 0 , 2168 , 143, 50 ), // #527 + INST(Pfrcpit1 , Ext3dNow , O(000F0F,A6,_,_,_,_,_,_ ), 0 , 87 , 0 , 2174 , 143, 50 ), // #528 + INST(Pfrcpit2 , Ext3dNow , O(000F0F,B6,_,_,_,_,_,_ ), 0 , 87 , 0 , 2183 , 143, 50 ), // #529 + INST(Pfrcpv , Ext3dNow , O(000F0F,86,_,_,_,_,_,_ ), 0 , 87 , 0 , 2192 , 143, 91 ), // #530 + INST(Pfrsqit1 , Ext3dNow , O(000F0F,A7,_,_,_,_,_,_ ), 0 , 87 , 0 , 2199 , 143, 50 ), // #531 + INST(Pfrsqrt , Ext3dNow , O(000F0F,97,_,_,_,_,_,_ ), 0 , 87 , 0 , 2208 , 143, 50 ), // #532 + INST(Pfrsqrtv , Ext3dNow , O(000F0F,87,_,_,_,_,_,_ ), 0 , 87 , 0 , 2216 , 143, 91 ), // #533 + INST(Pfsub , Ext3dNow , O(000F0F,9A,_,_,_,_,_,_ ), 0 , 87 , 0 , 2225 , 143, 50 ), // #534 + INST(Pfsubr , Ext3dNow , O(000F0F,AA,_,_,_,_,_,_ ), 0 , 87 , 0 , 2231 , 143, 50 ), // #535 + INST(Phaddd , ExtRm_P , O(000F38,02,_,_,_,_,_,_ ), 0 , 82 , 0 , 7692 , 139, 84 ), // #536 + INST(Phaddsw , ExtRm_P , O(000F38,03,_,_,_,_,_,_ ), 0 , 82 , 0 , 7709 , 139, 84 ), // #537 + INST(Phaddw , ExtRm_P , O(000F38,01,_,_,_,_,_,_ ), 0 , 82 , 0 , 7778 , 139, 84 ), // #538 + INST(Phminposuw , ExtRm , O(660F38,41,_,_,_,_,_,_ ), 0 , 2 , 0 , 7804 , 5 , 12 ), // #539 + INST(Phsubd , ExtRm_P , O(000F38,06,_,_,_,_,_,_ ), 0 , 82 , 0 , 7825 , 139, 84 ), // #540 + INST(Phsubsw , ExtRm_P , O(000F38,07,_,_,_,_,_,_ ), 0 , 82 , 0 , 7842 , 139, 84 ), // #541 + INST(Phsubw , ExtRm_P , O(000F38,05,_,_,_,_,_,_ ), 0 , 82 , 0 , 7851 , 139, 84 ), // #542 + INST(Pi2fd , Ext3dNow , O(000F0F,0D,_,_,_,_,_,_ ), 0 , 87 , 0 , 2238 , 143, 50 ), // #543 + INST(Pi2fw , Ext3dNow , O(000F0F,0C,_,_,_,_,_,_ ), 0 , 87 , 0 , 2244 , 143, 90 ), // #544 + INST(Pinsrb , ExtRmi , O(660F3A,20,_,_,_,_,_,_ ), 0 , 8 , 0 , 7868 , 152, 12 ), // #545 + INST(Pinsrd , ExtRmi , O(660F3A,22,_,_,_,_,_,_ ), 0 , 8 , 0 , 7876 , 153, 12 ), // #546 + INST(Pinsrq , ExtRmi , O(660F3A,22,_,_,1,_,_,_ ), 0 , 90 , 0 , 7884 , 154, 12 ), // #547 + INST(Pinsrw , ExtRmi_P , O(000F00,C4,_,_,_,_,_,_ ), 0 , 4 , 0 , 7892 , 155, 85 ), // #548 + INST(Pmaddubsw , ExtRm_P , O(000F38,04,_,_,_,_,_,_ ), 0 , 82 , 0 , 8062 , 139, 84 ), // #549 + INST(Pmaddwd , ExtRm_P , O(000F00,F5,_,_,_,_,_,_ ), 0 , 4 , 0 , 8073 , 139, 80 ), // #550 + INST(Pmaxsb , ExtRm , O(660F38,3C,_,_,_,_,_,_ ), 0 , 2 , 0 , 8104 , 11 , 12 ), // #551 + INST(Pmaxsd , ExtRm , O(660F38,3D,_,_,_,_,_,_ ), 0 , 2 , 0 , 8112 , 11 , 12 ), // #552 + INST(Pmaxsw , ExtRm_P , O(000F00,EE,_,_,_,_,_,_ ), 0 , 4 , 0 , 8128 , 141, 85 ), // #553 + INST(Pmaxub , ExtRm_P , O(000F00,DE,_,_,_,_,_,_ ), 0 , 4 , 0 , 8136 , 141, 85 ), // #554 + INST(Pmaxud , ExtRm , O(660F38,3F,_,_,_,_,_,_ ), 0 , 2 , 0 , 8144 , 11 , 12 ), // #555 + INST(Pmaxuw , ExtRm , O(660F38,3E,_,_,_,_,_,_ ), 0 , 2 , 0 , 8160 , 11 , 12 ), // #556 + INST(Pminsb , ExtRm , O(660F38,38,_,_,_,_,_,_ ), 0 , 2 , 0 , 8168 , 11 , 12 ), // #557 + INST(Pminsd , ExtRm , O(660F38,39,_,_,_,_,_,_ ), 0 , 2 , 0 , 8176 , 11 , 12 ), // #558 + INST(Pminsw , ExtRm_P , O(000F00,EA,_,_,_,_,_,_ ), 0 , 4 , 0 , 8192 , 141, 85 ), // #559 + INST(Pminub , ExtRm_P , O(000F00,DA,_,_,_,_,_,_ ), 0 , 4 , 0 , 8200 , 141, 85 ), // #560 + INST(Pminud , ExtRm , O(660F38,3B,_,_,_,_,_,_ ), 0 , 2 , 0 , 8208 , 11 , 12 ), // #561 + INST(Pminuw , ExtRm , O(660F38,3A,_,_,_,_,_,_ ), 0 , 2 , 0 , 8224 , 11 , 12 ), // #562 + INST(Pmovmskb , ExtRm_P , O(000F00,D7,_,_,_,_,_,_ ), 0 , 4 , 0 , 8302 , 156, 85 ), // #563 + INST(Pmovsxbd , ExtRm , O(660F38,21,_,_,_,_,_,_ ), 0 , 2 , 0 , 8399 , 7 , 12 ), // #564 + INST(Pmovsxbq , ExtRm , O(660F38,22,_,_,_,_,_,_ ), 0 , 2 , 0 , 8409 , 157, 12 ), // #565 + INST(Pmovsxbw , ExtRm , O(660F38,20,_,_,_,_,_,_ ), 0 , 2 , 0 , 8419 , 6 , 12 ), // #566 + INST(Pmovsxdq , ExtRm , O(660F38,25,_,_,_,_,_,_ ), 0 , 2 , 0 , 8429 , 6 , 12 ), // #567 + INST(Pmovsxwd , ExtRm , O(660F38,23,_,_,_,_,_,_ ), 0 , 2 , 0 , 8439 , 6 , 12 ), // #568 + INST(Pmovsxwq , ExtRm , O(660F38,24,_,_,_,_,_,_ ), 0 , 2 , 0 , 8449 , 7 , 12 ), // #569 + INST(Pmovzxbd , ExtRm , O(660F38,31,_,_,_,_,_,_ ), 0 , 2 , 0 , 8536 , 7 , 12 ), // #570 + INST(Pmovzxbq , ExtRm , O(660F38,32,_,_,_,_,_,_ ), 0 , 2 , 0 , 8546 , 157, 12 ), // #571 + INST(Pmovzxbw , ExtRm , O(660F38,30,_,_,_,_,_,_ ), 0 , 2 , 0 , 8556 , 6 , 12 ), // #572 + INST(Pmovzxdq , ExtRm , O(660F38,35,_,_,_,_,_,_ ), 0 , 2 , 0 , 8566 , 6 , 12 ), // #573 + INST(Pmovzxwd , ExtRm , O(660F38,33,_,_,_,_,_,_ ), 0 , 2 , 0 , 8576 , 6 , 12 ), // #574 + INST(Pmovzxwq , ExtRm , O(660F38,34,_,_,_,_,_,_ ), 0 , 2 , 0 , 8586 , 7 , 12 ), // #575 + INST(Pmuldq , ExtRm , O(660F38,28,_,_,_,_,_,_ ), 0 , 2 , 0 , 8596 , 5 , 12 ), // #576 + INST(Pmulhrsw , ExtRm_P , O(000F38,0B,_,_,_,_,_,_ ), 0 , 82 , 0 , 8604 , 139, 84 ), // #577 + INST(Pmulhrw , Ext3dNow , O(000F0F,B7,_,_,_,_,_,_ ), 0 , 87 , 0 , 2250 , 143, 50 ), // #578 + INST(Pmulhuw , ExtRm_P , O(000F00,E4,_,_,_,_,_,_ ), 0 , 4 , 0 , 8614 , 139, 85 ), // #579 + INST(Pmulhw , ExtRm_P , O(000F00,E5,_,_,_,_,_,_ ), 0 , 4 , 0 , 8623 , 139, 80 ), // #580 + INST(Pmulld , ExtRm , O(660F38,40,_,_,_,_,_,_ ), 0 , 2 , 0 , 8631 , 5 , 12 ), // #581 + INST(Pmullw , ExtRm_P , O(000F00,D5,_,_,_,_,_,_ ), 0 , 4 , 0 , 8647 , 139, 80 ), // #582 + INST(Pmuludq , ExtRm_P , O(000F00,F4,_,_,_,_,_,_ ), 0 , 4 , 0 , 8670 , 139, 4 ), // #583 + INST(Pop , X86Pop , O(000000,8F,0,_,_,_,_,_ ), O(000000,58,_,_,_,_,_,_ ), 0 , 66 , 2258 , 158, 0 ), // #584 + INST(Popa , X86Op , O(660000,61,_,_,_,_,_,_ ), 0 , 19 , 0 , 2262 , 78 , 0 ), // #585 + INST(Popad , X86Op , O(000000,61,_,_,_,_,_,_ ), 0 , 0 , 0 , 2267 , 78 , 0 ), // #586 + INST(Popcnt , X86Rm_Raw66H , O(F30F00,B8,_,_,x,_,_,_ ), 0 , 6 , 0 , 2273 , 22 , 92 ), // #587 + INST(Popf , X86Op , O(660000,9D,_,_,_,_,_,_ ), 0 , 19 , 0 , 2280 , 30 , 93 ), // #588 + INST(Popfd , X86Op , O(000000,9D,_,_,_,_,_,_ ), 0 , 0 , 0 , 2285 , 78 , 93 ), // #589 + INST(Popfq , X86Op , O(000000,9D,_,_,_,_,_,_ ), 0 , 0 , 0 , 2291 , 159, 93 ), // #590 + INST(Por , ExtRm_P , O(000F00,EB,_,_,_,_,_,_ ), 0 , 4 , 0 , 8715 , 141, 80 ), // #591 + INST(Prefetch , X86M_Only , O(000F00,0D,0,_,_,_,_,_ ), 0 , 4 , 0 , 2297 , 31 , 50 ), // #592 + INST(Prefetchnta , X86M_Only , O(000F00,18,0,_,_,_,_,_ ), 0 , 4 , 0 , 2306 , 31 , 75 ), // #593 + INST(Prefetcht0 , X86M_Only , O(000F00,18,1,_,_,_,_,_ ), 0 , 28 , 0 , 2318 , 31 , 75 ), // #594 + INST(Prefetcht1 , X86M_Only , O(000F00,18,2,_,_,_,_,_ ), 0 , 74 , 0 , 2329 , 31 , 75 ), // #595 + INST(Prefetcht2 , X86M_Only , O(000F00,18,3,_,_,_,_,_ ), 0 , 76 , 0 , 2340 , 31 , 75 ), // #596 + INST(Prefetchw , X86M_Only , O(000F00,0D,1,_,_,_,_,_ ), 0 , 28 , 0 , 2351 , 31 , 94 ), // #597 + INST(Prefetchwt1 , X86M_Only , O(000F00,0D,2,_,_,_,_,_ ), 0 , 74 , 0 , 2361 , 31 , 95 ), // #598 + INST(Psadbw , ExtRm_P , O(000F00,F6,_,_,_,_,_,_ ), 0 , 4 , 0 , 4268 , 139, 85 ), // #599 + INST(Pshufb , ExtRm_P , O(000F38,00,_,_,_,_,_,_ ), 0 , 82 , 0 , 9041 , 139, 84 ), // #600 + INST(Pshufd , ExtRmi , O(660F00,70,_,_,_,_,_,_ ), 0 , 3 , 0 , 9062 , 8 , 4 ), // #601 + INST(Pshufhw , ExtRmi , O(F30F00,70,_,_,_,_,_,_ ), 0 , 6 , 0 , 9070 , 8 , 4 ), // #602 + INST(Pshuflw , ExtRmi , O(F20F00,70,_,_,_,_,_,_ ), 0 , 5 , 0 , 9079 , 8 , 4 ), // #603 + INST(Pshufw , ExtRmi_P , O(000F00,70,_,_,_,_,_,_ ), 0 , 4 , 0 , 2373 , 160, 75 ), // #604 + INST(Psignb , ExtRm_P , O(000F38,08,_,_,_,_,_,_ ), 0 , 82 , 0 , 9088 , 139, 84 ), // #605 + INST(Psignd , ExtRm_P , O(000F38,0A,_,_,_,_,_,_ ), 0 , 82 , 0 , 9096 , 139, 84 ), // #606 + INST(Psignw , ExtRm_P , O(000F38,09,_,_,_,_,_,_ ), 0 , 82 , 0 , 9104 , 139, 84 ), // #607 + INST(Pslld , ExtRmRi_P , O(000F00,F2,_,_,_,_,_,_ ), O(000F00,72,6,_,_,_,_,_ ), 4 , 67 , 9112 , 161, 80 ), // #608 + INST(Pslldq , ExtRmRi , 0 , O(660F00,73,7,_,_,_,_,_ ), 0 , 68 , 9119 , 162, 4 ), // #609 + INST(Psllq , ExtRmRi_P , O(000F00,F3,_,_,_,_,_,_ ), O(000F00,73,6,_,_,_,_,_ ), 4 , 69 , 9127 , 161, 80 ), // #610 + INST(Psllw , ExtRmRi_P , O(000F00,F1,_,_,_,_,_,_ ), O(000F00,71,6,_,_,_,_,_ ), 4 , 70 , 9158 , 161, 80 ), // #611 + INST(Psmash , X86Op , O(F30F01,FF,_,_,_,_,_,_ ), 0 , 81 , 0 , 2380 , 159, 96 ), // #612 + INST(Psrad , ExtRmRi_P , O(000F00,E2,_,_,_,_,_,_ ), O(000F00,72,4,_,_,_,_,_ ), 4 , 71 , 9165 , 161, 80 ), // #613 + INST(Psraw , ExtRmRi_P , O(000F00,E1,_,_,_,_,_,_ ), O(000F00,71,4,_,_,_,_,_ ), 4 , 72 , 9203 , 161, 80 ), // #614 + INST(Psrld , ExtRmRi_P , O(000F00,D2,_,_,_,_,_,_ ), O(000F00,72,2,_,_,_,_,_ ), 4 , 73 , 9210 , 161, 80 ), // #615 + INST(Psrldq , ExtRmRi , 0 , O(660F00,73,3,_,_,_,_,_ ), 0 , 74 , 9217 , 162, 4 ), // #616 + INST(Psrlq , ExtRmRi_P , O(000F00,D3,_,_,_,_,_,_ ), O(000F00,73,2,_,_,_,_,_ ), 4 , 75 , 9225 , 161, 80 ), // #617 + INST(Psrlw , ExtRmRi_P , O(000F00,D1,_,_,_,_,_,_ ), O(000F00,71,2,_,_,_,_,_ ), 4 , 76 , 9256 , 161, 80 ), // #618 + INST(Psubb , ExtRm_P , O(000F00,F8,_,_,_,_,_,_ ), 0 , 4 , 0 , 9263 , 142, 80 ), // #619 + INST(Psubd , ExtRm_P , O(000F00,FA,_,_,_,_,_,_ ), 0 , 4 , 0 , 9270 , 142, 80 ), // #620 + INST(Psubq , ExtRm_P , O(000F00,FB,_,_,_,_,_,_ ), 0 , 4 , 0 , 9277 , 142, 4 ), // #621 + INST(Psubsb , ExtRm_P , O(000F00,E8,_,_,_,_,_,_ ), 0 , 4 , 0 , 9284 , 142, 80 ), // #622 + INST(Psubsw , ExtRm_P , O(000F00,E9,_,_,_,_,_,_ ), 0 , 4 , 0 , 9292 , 142, 80 ), // #623 + INST(Psubusb , ExtRm_P , O(000F00,D8,_,_,_,_,_,_ ), 0 , 4 , 0 , 9300 , 142, 80 ), // #624 + INST(Psubusw , ExtRm_P , O(000F00,D9,_,_,_,_,_,_ ), 0 , 4 , 0 , 9309 , 142, 80 ), // #625 + INST(Psubw , ExtRm_P , O(000F00,F9,_,_,_,_,_,_ ), 0 , 4 , 0 , 9318 , 142, 80 ), // #626 + INST(Pswapd , Ext3dNow , O(000F0F,BB,_,_,_,_,_,_ ), 0 , 87 , 0 , 2387 , 143, 90 ), // #627 + INST(Ptest , ExtRm , O(660F38,17,_,_,_,_,_,_ ), 0 , 2 , 0 , 9347 , 5 , 97 ), // #628 + INST(Ptwrite , X86M , O(F30F00,AE,4,_,_,_,_,_ ), 0 , 91 , 0 , 2394 , 163, 98 ), // #629 + INST(Punpckhbw , ExtRm_P , O(000F00,68,_,_,_,_,_,_ ), 0 , 4 , 0 , 9430 , 139, 80 ), // #630 + INST(Punpckhdq , ExtRm_P , O(000F00,6A,_,_,_,_,_,_ ), 0 , 4 , 0 , 9441 , 139, 80 ), // #631 + INST(Punpckhqdq , ExtRm , O(660F00,6D,_,_,_,_,_,_ ), 0 , 3 , 0 , 9452 , 5 , 4 ), // #632 + INST(Punpckhwd , ExtRm_P , O(000F00,69,_,_,_,_,_,_ ), 0 , 4 , 0 , 9464 , 139, 80 ), // #633 + INST(Punpcklbw , ExtRm_P , O(000F00,60,_,_,_,_,_,_ ), 0 , 4 , 0 , 9475 , 139, 80 ), // #634 + INST(Punpckldq , ExtRm_P , O(000F00,62,_,_,_,_,_,_ ), 0 , 4 , 0 , 9486 , 139, 80 ), // #635 + INST(Punpcklqdq , ExtRm , O(660F00,6C,_,_,_,_,_,_ ), 0 , 3 , 0 , 9497 , 5 , 4 ), // #636 + INST(Punpcklwd , ExtRm_P , O(000F00,61,_,_,_,_,_,_ ), 0 , 4 , 0 , 9509 , 139, 80 ), // #637 + INST(Push , X86Push , O(000000,FF,6,_,_,_,_,_ ), O(000000,50,_,_,_,_,_,_ ), 31 , 77 , 2402 , 164, 0 ), // #638 + INST(Pusha , X86Op , O(660000,60,_,_,_,_,_,_ ), 0 , 19 , 0 , 2407 , 78 , 0 ), // #639 + INST(Pushad , X86Op , O(000000,60,_,_,_,_,_,_ ), 0 , 0 , 0 , 2413 , 78 , 0 ), // #640 + INST(Pushf , X86Op , O(660000,9C,_,_,_,_,_,_ ), 0 , 19 , 0 , 2420 , 30 , 99 ), // #641 + INST(Pushfd , X86Op , O(000000,9C,_,_,_,_,_,_ ), 0 , 0 , 0 , 2426 , 78 , 99 ), // #642 + INST(Pushfq , X86Op , O(000000,9C,_,_,_,_,_,_ ), 0 , 0 , 0 , 2433 , 159, 99 ), // #643 + INST(Pvalidate , X86Op , O(F20F01,FF,_,_,_,_,_,_ ), 0 , 92 , 0 , 2440 , 30 , 100), // #644 + INST(Pxor , ExtRm_P , O(000F00,EF,_,_,_,_,_,_ ), 0 , 4 , 0 , 9520 , 142, 80 ), // #645 + INST(Rcl , X86Rot , O(000000,D0,2,_,x,_,_,_ ), 0 , 1 , 0 , 2450 , 165, 101), // #646 + INST(Rcpps , ExtRm , O(000F00,53,_,_,_,_,_,_ ), 0 , 4 , 0 , 9648 , 5 , 5 ), // #647 + INST(Rcpss , ExtRm , O(F30F00,53,_,_,_,_,_,_ ), 0 , 6 , 0 , 9655 , 7 , 5 ), // #648 + INST(Rcr , X86Rot , O(000000,D0,3,_,x,_,_,_ ), 0 , 84 , 0 , 2454 , 165, 101), // #649 + INST(Rdfsbase , X86M , O(F30F00,AE,0,_,x,_,_,_ ), 0 , 6 , 0 , 2458 , 166, 102), // #650 + INST(Rdgsbase , X86M , O(F30F00,AE,1,_,x,_,_,_ ), 0 , 93 , 0 , 2467 , 166, 102), // #651 + INST(Rdmsr , X86Op , O(000F00,32,_,_,_,_,_,_ ), 0 , 4 , 0 , 2476 , 167, 103), // #652 + INST(Rdpid , X86R_Native , O(F30F00,C7,7,_,_,_,_,_ ), 0 , 94 , 0 , 2482 , 168, 104), // #653 + INST(Rdpkru , X86Op , O(000F01,EE,_,_,_,_,_,_ ), 0 , 21 , 0 , 2488 , 167, 105), // #654 + INST(Rdpmc , X86Op , O(000F00,33,_,_,_,_,_,_ ), 0 , 4 , 0 , 2495 , 167, 0 ), // #655 + INST(Rdpru , X86Op , O(000F01,FD,_,_,_,_,_,_ ), 0 , 21 , 0 , 2501 , 167, 106), // #656 + INST(Rdrand , X86M , O(000F00,C7,6,_,x,_,_,_ ), 0 , 78 , 0 , 2507 , 23 , 107), // #657 + INST(Rdseed , X86M , O(000F00,C7,7,_,x,_,_,_ ), 0 , 22 , 0 , 2514 , 23 , 108), // #658 + INST(Rdsspd , X86M , O(F30F00,1E,1,_,_,_,_,_ ), 0 , 93 , 0 , 2521 , 73 , 54 ), // #659 + INST(Rdsspq , X86M , O(F30F00,1E,1,_,_,_,_,_ ), 0 , 93 , 0 , 2528 , 74 , 54 ), // #660 + INST(Rdtsc , X86Op , O(000F00,31,_,_,_,_,_,_ ), 0 , 4 , 0 , 2535 , 28 , 109), // #661 + INST(Rdtscp , X86Op , O(000F01,F9,_,_,_,_,_,_ ), 0 , 21 , 0 , 2541 , 167, 110), // #662 + INST(Ret , X86Ret , O(000000,C2,_,_,_,_,_,_ ), 0 , 0 , 0 , 3044 , 169, 0 ), // #663 + INST(Rmpadjust , X86Op , O(F30F01,FE,_,_,_,_,_,_ ), 0 , 81 , 0 , 2548 , 159, 96 ), // #664 + INST(Rmpupdate , X86Op , O(F20F01,FE,_,_,_,_,_,_ ), 0 , 92 , 0 , 2558 , 159, 96 ), // #665 + INST(Rol , X86Rot , O(000000,D0,0,_,x,_,_,_ ), 0 , 0 , 0 , 2568 , 165, 111), // #666 + INST(Ror , X86Rot , O(000000,D0,1,_,x,_,_,_ ), 0 , 30 , 0 , 2572 , 165, 111), // #667 + INST(Rorx , VexRmi_Wx , V(F20F3A,F0,_,0,x,_,_,_ ), 0 , 95 , 0 , 2576 , 170, 83 ), // #668 + INST(Roundpd , ExtRmi , O(660F3A,09,_,_,_,_,_,_ ), 0 , 8 , 0 , 9750 , 8 , 12 ), // #669 + INST(Roundps , ExtRmi , O(660F3A,08,_,_,_,_,_,_ ), 0 , 8 , 0 , 9759 , 8 , 12 ), // #670 + INST(Roundsd , ExtRmi , O(660F3A,0B,_,_,_,_,_,_ ), 0 , 8 , 0 , 9768 , 36 , 12 ), // #671 + INST(Roundss , ExtRmi , O(660F3A,0A,_,_,_,_,_,_ ), 0 , 8 , 0 , 9777 , 37 , 12 ), // #672 + INST(Rsm , X86Op , O(000F00,AA,_,_,_,_,_,_ ), 0 , 4 , 0 , 2581 , 78 , 1 ), // #673 + INST(Rsqrtps , ExtRm , O(000F00,52,_,_,_,_,_,_ ), 0 , 4 , 0 , 9874 , 5 , 5 ), // #674 + INST(Rsqrtss , ExtRm , O(F30F00,52,_,_,_,_,_,_ ), 0 , 6 , 0 , 9883 , 7 , 5 ), // #675 + INST(Rstorssp , X86M , O(F30F00,01,5,_,_,_,_,_ ), 0 , 62 , 0 , 2585 , 32 , 24 ), // #676 + INST(Sahf , X86Op , O(000000,9E,_,_,_,_,_,_ ), 0 , 0 , 0 , 2594 , 93 , 112), // #677 + INST(Sal , X86Rot , O(000000,D0,4,_,x,_,_,_ ), 0 , 9 , 0 , 2599 , 165, 1 ), // #678 + INST(Sar , X86Rot , O(000000,D0,7,_,x,_,_,_ ), 0 , 26 , 0 , 2603 , 165, 1 ), // #679 + INST(Sarx , VexRmv_Wx , V(F30F38,F7,_,0,x,_,_,_ ), 0 , 88 , 0 , 2607 , 13 , 83 ), // #680 + INST(Saveprevssp , X86Op , O(F30F01,EA,_,_,_,_,_,_ ), 0 , 81 , 0 , 2612 , 30 , 24 ), // #681 + INST(Sbb , X86Arith , O(000000,18,3,_,x,_,_,_ ), 0 , 84 , 0 , 2624 , 171, 2 ), // #682 + INST(Scas , X86StrRm , O(000000,AE,_,_,_,_,_,_ ), 0 , 0 , 0 , 2628 , 172, 36 ), // #683 + INST(Serialize , X86Op , O(000F01,E8,_,_,_,_,_,_ ), 0 , 21 , 0 , 2633 , 30 , 113), // #684 + INST(Seta , X86Set , O(000F00,97,_,_,_,_,_,_ ), 0 , 4 , 0 , 2643 , 173, 57 ), // #685 + INST(Setae , X86Set , O(000F00,93,_,_,_,_,_,_ ), 0 , 4 , 0 , 2648 , 173, 58 ), // #686 + INST(Setb , X86Set , O(000F00,92,_,_,_,_,_,_ ), 0 , 4 , 0 , 2654 , 173, 58 ), // #687 + INST(Setbe , X86Set , O(000F00,96,_,_,_,_,_,_ ), 0 , 4 , 0 , 2659 , 173, 57 ), // #688 + INST(Setc , X86Set , O(000F00,92,_,_,_,_,_,_ ), 0 , 4 , 0 , 2665 , 173, 58 ), // #689 + INST(Sete , X86Set , O(000F00,94,_,_,_,_,_,_ ), 0 , 4 , 0 , 2670 , 173, 59 ), // #690 + INST(Setg , X86Set , O(000F00,9F,_,_,_,_,_,_ ), 0 , 4 , 0 , 2675 , 173, 60 ), // #691 + INST(Setge , X86Set , O(000F00,9D,_,_,_,_,_,_ ), 0 , 4 , 0 , 2680 , 173, 61 ), // #692 + INST(Setl , X86Set , O(000F00,9C,_,_,_,_,_,_ ), 0 , 4 , 0 , 2686 , 173, 61 ), // #693 + INST(Setle , X86Set , O(000F00,9E,_,_,_,_,_,_ ), 0 , 4 , 0 , 2691 , 173, 60 ), // #694 + INST(Setna , X86Set , O(000F00,96,_,_,_,_,_,_ ), 0 , 4 , 0 , 2697 , 173, 57 ), // #695 + INST(Setnae , X86Set , O(000F00,92,_,_,_,_,_,_ ), 0 , 4 , 0 , 2703 , 173, 58 ), // #696 + INST(Setnb , X86Set , O(000F00,93,_,_,_,_,_,_ ), 0 , 4 , 0 , 2710 , 173, 58 ), // #697 + INST(Setnbe , X86Set , O(000F00,97,_,_,_,_,_,_ ), 0 , 4 , 0 , 2716 , 173, 57 ), // #698 + INST(Setnc , X86Set , O(000F00,93,_,_,_,_,_,_ ), 0 , 4 , 0 , 2723 , 173, 58 ), // #699 + INST(Setne , X86Set , O(000F00,95,_,_,_,_,_,_ ), 0 , 4 , 0 , 2729 , 173, 59 ), // #700 + INST(Setng , X86Set , O(000F00,9E,_,_,_,_,_,_ ), 0 , 4 , 0 , 2735 , 173, 60 ), // #701 + INST(Setnge , X86Set , O(000F00,9C,_,_,_,_,_,_ ), 0 , 4 , 0 , 2741 , 173, 61 ), // #702 + INST(Setnl , X86Set , O(000F00,9D,_,_,_,_,_,_ ), 0 , 4 , 0 , 2748 , 173, 61 ), // #703 + INST(Setnle , X86Set , O(000F00,9F,_,_,_,_,_,_ ), 0 , 4 , 0 , 2754 , 173, 60 ), // #704 + INST(Setno , X86Set , O(000F00,91,_,_,_,_,_,_ ), 0 , 4 , 0 , 2761 , 173, 55 ), // #705 + INST(Setnp , X86Set , O(000F00,9B,_,_,_,_,_,_ ), 0 , 4 , 0 , 2767 , 173, 62 ), // #706 + INST(Setns , X86Set , O(000F00,99,_,_,_,_,_,_ ), 0 , 4 , 0 , 2773 , 173, 63 ), // #707 + INST(Setnz , X86Set , O(000F00,95,_,_,_,_,_,_ ), 0 , 4 , 0 , 2779 , 173, 59 ), // #708 + INST(Seto , X86Set , O(000F00,90,_,_,_,_,_,_ ), 0 , 4 , 0 , 2785 , 173, 55 ), // #709 + INST(Setp , X86Set , O(000F00,9A,_,_,_,_,_,_ ), 0 , 4 , 0 , 2790 , 173, 62 ), // #710 + INST(Setpe , X86Set , O(000F00,9A,_,_,_,_,_,_ ), 0 , 4 , 0 , 2795 , 173, 62 ), // #711 + INST(Setpo , X86Set , O(000F00,9B,_,_,_,_,_,_ ), 0 , 4 , 0 , 2801 , 173, 62 ), // #712 + INST(Sets , X86Set , O(000F00,98,_,_,_,_,_,_ ), 0 , 4 , 0 , 2807 , 173, 63 ), // #713 + INST(Setssbsy , X86Op , O(F30F01,E8,_,_,_,_,_,_ ), 0 , 81 , 0 , 2812 , 30 , 54 ), // #714 + INST(Setz , X86Set , O(000F00,94,_,_,_,_,_,_ ), 0 , 4 , 0 , 2821 , 173, 59 ), // #715 + INST(Sfence , X86Fence , O(000F00,AE,7,_,_,_,_,_ ), 0 , 22 , 0 , 2826 , 30 , 75 ), // #716 + INST(Sgdt , X86M_Only , O(000F00,01,0,_,_,_,_,_ ), 0 , 4 , 0 , 2833 , 31 , 0 ), // #717 + INST(Sha1msg1 , ExtRm , O(000F38,C9,_,_,_,_,_,_ ), 0 , 82 , 0 , 2838 , 5 , 114), // #718 + INST(Sha1msg2 , ExtRm , O(000F38,CA,_,_,_,_,_,_ ), 0 , 82 , 0 , 2847 , 5 , 114), // #719 + INST(Sha1nexte , ExtRm , O(000F38,C8,_,_,_,_,_,_ ), 0 , 82 , 0 , 2856 , 5 , 114), // #720 + INST(Sha1rnds4 , ExtRmi , O(000F3A,CC,_,_,_,_,_,_ ), 0 , 85 , 0 , 2866 , 8 , 114), // #721 + INST(Sha256msg1 , ExtRm , O(000F38,CC,_,_,_,_,_,_ ), 0 , 82 , 0 , 2876 , 5 , 114), // #722 + INST(Sha256msg2 , ExtRm , O(000F38,CD,_,_,_,_,_,_ ), 0 , 82 , 0 , 2887 , 5 , 114), // #723 + INST(Sha256rnds2 , ExtRm_XMM0 , O(000F38,CB,_,_,_,_,_,_ ), 0 , 82 , 0 , 2898 , 15 , 114), // #724 + INST(Shl , X86Rot , O(000000,D0,4,_,x,_,_,_ ), 0 , 9 , 0 , 2910 , 165, 1 ), // #725 + INST(Shld , X86ShldShrd , O(000F00,A4,_,_,x,_,_,_ ), 0 , 4 , 0 , 8919 , 174, 1 ), // #726 + INST(Shlx , VexRmv_Wx , V(660F38,F7,_,0,x,_,_,_ ), 0 , 96 , 0 , 2914 , 13 , 83 ), // #727 + INST(Shr , X86Rot , O(000000,D0,5,_,x,_,_,_ ), 0 , 61 , 0 , 2919 , 165, 1 ), // #728 + INST(Shrd , X86ShldShrd , O(000F00,AC,_,_,x,_,_,_ ), 0 , 4 , 0 , 2923 , 174, 1 ), // #729 + INST(Shrx , VexRmv_Wx , V(F20F38,F7,_,0,x,_,_,_ ), 0 , 83 , 0 , 2928 , 13 , 83 ), // #730 + INST(Shufpd , ExtRmi , O(660F00,C6,_,_,_,_,_,_ ), 0 , 3 , 0 , 10144, 8 , 4 ), // #731 + INST(Shufps , ExtRmi , O(000F00,C6,_,_,_,_,_,_ ), 0 , 4 , 0 , 10152, 8 , 5 ), // #732 + INST(Sidt , X86M_Only , O(000F00,01,1,_,_,_,_,_ ), 0 , 28 , 0 , 2933 , 31 , 0 ), // #733 + INST(Skinit , X86Op_xAX , O(000F01,DE,_,_,_,_,_,_ ), 0 , 21 , 0 , 2938 , 51 , 115), // #734 + INST(Sldt , X86M , O(000F00,00,0,_,_,_,_,_ ), 0 , 4 , 0 , 2945 , 175, 0 ), // #735 + INST(Slwpcb , VexR_Wx , V(XOP_M9,12,1,0,x,_,_,_ ), 0 , 11 , 0 , 2950 , 102, 72 ), // #736 + INST(Smsw , X86M , O(000F00,01,4,_,_,_,_,_ ), 0 , 97 , 0 , 2957 , 175, 0 ), // #737 + INST(Sqrtpd , ExtRm , O(660F00,51,_,_,_,_,_,_ ), 0 , 3 , 0 , 10160, 5 , 4 ), // #738 + INST(Sqrtps , ExtRm , O(000F00,51,_,_,_,_,_,_ ), 0 , 4 , 0 , 9875 , 5 , 5 ), // #739 + INST(Sqrtsd , ExtRm , O(F20F00,51,_,_,_,_,_,_ ), 0 , 5 , 0 , 10176, 6 , 4 ), // #740 + INST(Sqrtss , ExtRm , O(F30F00,51,_,_,_,_,_,_ ), 0 , 6 , 0 , 9884 , 7 , 5 ), // #741 + INST(Stac , X86Op , O(000F01,CB,_,_,_,_,_,_ ), 0 , 21 , 0 , 2962 , 30 , 16 ), // #742 + INST(Stc , X86Op , O(000000,F9,_,_,_,_,_,_ ), 0 , 0 , 0 , 2967 , 30 , 17 ), // #743 + INST(Std , X86Op , O(000000,FD,_,_,_,_,_,_ ), 0 , 0 , 0 , 6902 , 30 , 18 ), // #744 + INST(Stgi , X86Op , O(000F01,DC,_,_,_,_,_,_ ), 0 , 21 , 0 , 2971 , 30 , 115), // #745 + INST(Sti , X86Op , O(000000,FB,_,_,_,_,_,_ ), 0 , 0 , 0 , 2976 , 30 , 23 ), // #746 + INST(Stmxcsr , X86M_Only , O(000F00,AE,3,_,_,_,_,_ ), 0 , 76 , 0 , 10192, 96 , 5 ), // #747 + INST(Stos , X86StrMr , O(000000,AA,_,_,_,_,_,_ ), 0 , 0 , 0 , 2980 , 176, 73 ), // #748 + INST(Str , X86M , O(000F00,00,1,_,_,_,_,_ ), 0 , 28 , 0 , 2985 , 175, 0 ), // #749 + INST(Sttilecfg , AmxCfg , V(660F38,49,_,0,0,_,_,_ ), 0 , 96 , 0 , 2989 , 98 , 71 ), // #750 + INST(Sub , X86Arith , O(000000,28,5,_,x,_,_,_ ), 0 , 61 , 0 , 861 , 171, 1 ), // #751 + INST(Subpd , ExtRm , O(660F00,5C,_,_,_,_,_,_ ), 0 , 3 , 0 , 4844 , 5 , 4 ), // #752 + INST(Subps , ExtRm , O(000F00,5C,_,_,_,_,_,_ ), 0 , 4 , 0 , 4856 , 5 , 5 ), // #753 + INST(Subsd , ExtRm , O(F20F00,5C,_,_,_,_,_,_ ), 0 , 5 , 0 , 5532 , 6 , 4 ), // #754 + INST(Subss , ExtRm , O(F30F00,5C,_,_,_,_,_,_ ), 0 , 6 , 0 , 5542 , 7 , 5 ), // #755 + INST(Swapgs , X86Op , O(000F01,F8,_,_,_,_,_,_ ), 0 , 21 , 0 , 2999 , 159, 0 ), // #756 + INST(Syscall , X86Op , O(000F00,05,_,_,_,_,_,_ ), 0 , 4 , 0 , 3006 , 159, 0 ), // #757 + INST(Sysenter , X86Op , O(000F00,34,_,_,_,_,_,_ ), 0 , 4 , 0 , 3014 , 30 , 0 ), // #758 + INST(Sysexit , X86Op , O(000F00,35,_,_,_,_,_,_ ), 0 , 4 , 0 , 3023 , 30 , 0 ), // #759 + INST(Sysexit64 , X86Op , O(000F00,35,_,_,_,_,_,_ ), 0 , 4 , 0 , 3031 , 30 , 0 ), // #760 + INST(Sysret , X86Op , O(000F00,07,_,_,_,_,_,_ ), 0 , 4 , 0 , 3041 , 159, 0 ), // #761 + INST(Sysret64 , X86Op , O(000F00,07,_,_,_,_,_,_ ), 0 , 4 , 0 , 3048 , 159, 0 ), // #762 + INST(T1mskc , VexVm_Wx , V(XOP_M9,01,7,0,x,_,_,_ ), 0 , 98 , 0 , 3057 , 14 , 11 ), // #763 + INST(Tdpbf16ps , AmxRmv , V(F30F38,5C,_,0,0,_,_,_ ), 0 , 88 , 0 , 3064 , 177, 116), // #764 + INST(Tdpbssd , AmxRmv , V(F20F38,5E,_,0,0,_,_,_ ), 0 , 83 , 0 , 3074 , 177, 117), // #765 + INST(Tdpbsud , AmxRmv , V(F30F38,5E,_,0,0,_,_,_ ), 0 , 88 , 0 , 3082 , 177, 117), // #766 + INST(Tdpbusd , AmxRmv , V(660F38,5E,_,0,0,_,_,_ ), 0 , 96 , 0 , 3090 , 177, 117), // #767 + INST(Tdpbuud , AmxRmv , V(000F38,5E,_,0,0,_,_,_ ), 0 , 10 , 0 , 3098 , 177, 117), // #768 + INST(Test , X86Test , O(000000,84,_,_,x,_,_,_ ), O(000000,F6,_,_,x,_,_,_ ), 0 , 78 , 9348 , 178, 1 ), // #769 + INST(Tileloadd , AmxRm , V(F20F38,4B,_,0,0,_,_,_ ), 0 , 83 , 0 , 3106 , 179, 71 ), // #770 + INST(Tileloaddt1 , AmxRm , V(660F38,4B,_,0,0,_,_,_ ), 0 , 96 , 0 , 3116 , 179, 71 ), // #771 + INST(Tilerelease , VexOpMod , V(000F38,49,0,0,0,_,_,_ ), 0 , 10 , 0 , 3128 , 180, 71 ), // #772 + INST(Tilestored , AmxMr , V(F30F38,4B,_,0,0,_,_,_ ), 0 , 88 , 0 , 3140 , 181, 71 ), // #773 + INST(Tilezero , AmxR , V(F20F38,49,_,0,0,_,_,_ ), 0 , 83 , 0 , 3151 , 182, 71 ), // #774 + INST(Tpause , X86R32_EDX_EAX , O(660F00,AE,6,_,_,_,_,_ ), 0 , 25 , 0 , 3160 , 183, 118), // #775 + INST(Tzcnt , X86Rm_Raw66H , O(F30F00,BC,_,_,x,_,_,_ ), 0 , 6 , 0 , 3167 , 22 , 9 ), // #776 + INST(Tzmsk , VexVm_Wx , V(XOP_M9,01,4,0,x,_,_,_ ), 0 , 99 , 0 , 3173 , 14 , 11 ), // #777 + INST(Ucomisd , ExtRm , O(660F00,2E,_,_,_,_,_,_ ), 0 , 3 , 0 , 10245, 6 , 40 ), // #778 + INST(Ucomiss , ExtRm , O(000F00,2E,_,_,_,_,_,_ ), 0 , 4 , 0 , 10254, 7 , 41 ), // #779 + INST(Ud0 , X86M , O(000F00,FF,_,_,_,_,_,_ ), 0 , 4 , 0 , 3179 , 184, 0 ), // #780 + INST(Ud1 , X86M , O(000F00,B9,_,_,_,_,_,_ ), 0 , 4 , 0 , 3183 , 184, 0 ), // #781 + INST(Ud2 , X86Op , O(000F00,0B,_,_,_,_,_,_ ), 0 , 4 , 0 , 3187 , 30 , 0 ), // #782 + INST(Umonitor , X86R_FromM , O(F30F00,AE,6,_,_,_,_,_ ), 0 , 24 , 0 , 3191 , 185, 119), // #783 + INST(Umwait , X86R32_EDX_EAX , O(F20F00,AE,6,_,_,_,_,_ ), 0 , 100, 0 , 3200 , 183, 118), // #784 + INST(Unpckhpd , ExtRm , O(660F00,15,_,_,_,_,_,_ ), 0 , 3 , 0 , 10263, 5 , 4 ), // #785 + INST(Unpckhps , ExtRm , O(000F00,15,_,_,_,_,_,_ ), 0 , 4 , 0 , 10273, 5 , 5 ), // #786 + INST(Unpcklpd , ExtRm , O(660F00,14,_,_,_,_,_,_ ), 0 , 3 , 0 , 10283, 5 , 4 ), // #787 + INST(Unpcklps , ExtRm , O(000F00,14,_,_,_,_,_,_ ), 0 , 4 , 0 , 10293, 5 , 5 ), // #788 + INST(V4fmaddps , VexRm_T1_4X , E(F20F38,9A,_,2,_,0,2,T4X), 0 , 101, 0 , 3207 , 186, 120), // #789 + INST(V4fmaddss , VexRm_T1_4X , E(F20F38,9B,_,2,_,0,2,T4X), 0 , 101, 0 , 3217 , 187, 120), // #790 + INST(V4fnmaddps , VexRm_T1_4X , E(F20F38,AA,_,2,_,0,2,T4X), 0 , 101, 0 , 3227 , 186, 120), // #791 + INST(V4fnmaddss , VexRm_T1_4X , E(F20F38,AB,_,2,_,0,2,T4X), 0 , 101, 0 , 3238 , 187, 120), // #792 + INST(Vaddpd , VexRvm_Lx , V(660F00,58,_,x,I,1,4,FV ), 0 , 102, 0 , 3249 , 188, 121), // #793 + INST(Vaddps , VexRvm_Lx , V(000F00,58,_,x,I,0,4,FV ), 0 , 103, 0 , 3256 , 189, 121), // #794 + INST(Vaddsd , VexRvm , V(F20F00,58,_,I,I,1,3,T1S), 0 , 104, 0 , 3263 , 190, 122), // #795 + INST(Vaddss , VexRvm , V(F30F00,58,_,I,I,0,2,T1S), 0 , 105, 0 , 3270 , 191, 122), // #796 + INST(Vaddsubpd , VexRvm_Lx , V(660F00,D0,_,x,I,_,_,_ ), 0 , 68 , 0 , 3277 , 192, 123), // #797 + INST(Vaddsubps , VexRvm_Lx , V(F20F00,D0,_,x,I,_,_,_ ), 0 , 106, 0 , 3287 , 192, 123), // #798 + INST(Vaesdec , VexRvm_Lx , V(660F38,DE,_,x,I,_,4,FVM), 0 , 107, 0 , 3297 , 193, 124), // #799 + INST(Vaesdeclast , VexRvm_Lx , V(660F38,DF,_,x,I,_,4,FVM), 0 , 107, 0 , 3305 , 193, 124), // #800 + INST(Vaesenc , VexRvm_Lx , V(660F38,DC,_,x,I,_,4,FVM), 0 , 107, 0 , 3317 , 193, 124), // #801 + INST(Vaesenclast , VexRvm_Lx , V(660F38,DD,_,x,I,_,4,FVM), 0 , 107, 0 , 3325 , 193, 124), // #802 + INST(Vaesimc , VexRm , V(660F38,DB,_,0,I,_,_,_ ), 0 , 96 , 0 , 3337 , 194, 125), // #803 + INST(Vaeskeygenassist , VexRmi , V(660F3A,DF,_,0,I,_,_,_ ), 0 , 72 , 0 , 3345 , 195, 125), // #804 + INST(Valignd , VexRvmi_Lx , E(660F3A,03,_,x,_,0,4,FV ), 0 , 108, 0 , 3362 , 196, 126), // #805 + INST(Valignq , VexRvmi_Lx , E(660F3A,03,_,x,_,1,4,FV ), 0 , 109, 0 , 3370 , 197, 126), // #806 + INST(Vandnpd , VexRvm_Lx , V(660F00,55,_,x,I,1,4,FV ), 0 , 102, 0 , 3378 , 198, 127), // #807 + INST(Vandnps , VexRvm_Lx , V(000F00,55,_,x,I,0,4,FV ), 0 , 103, 0 , 3386 , 199, 127), // #808 + INST(Vandpd , VexRvm_Lx , V(660F00,54,_,x,I,1,4,FV ), 0 , 102, 0 , 3394 , 200, 127), // #809 + INST(Vandps , VexRvm_Lx , V(000F00,54,_,x,I,0,4,FV ), 0 , 103, 0 , 3401 , 201, 127), // #810 + INST(Vblendmb , VexRvm_Lx , E(660F38,66,_,x,_,0,4,FVM), 0 , 110, 0 , 3408 , 202, 128), // #811 + INST(Vblendmd , VexRvm_Lx , E(660F38,64,_,x,_,0,4,FV ), 0 , 111, 0 , 3417 , 203, 126), // #812 + INST(Vblendmpd , VexRvm_Lx , E(660F38,65,_,x,_,1,4,FV ), 0 , 112, 0 , 3426 , 204, 126), // #813 + INST(Vblendmps , VexRvm_Lx , E(660F38,65,_,x,_,0,4,FV ), 0 , 111, 0 , 3436 , 203, 126), // #814 + INST(Vblendmq , VexRvm_Lx , E(660F38,64,_,x,_,1,4,FV ), 0 , 112, 0 , 3446 , 204, 126), // #815 + INST(Vblendmw , VexRvm_Lx , E(660F38,66,_,x,_,1,4,FVM), 0 , 113, 0 , 3455 , 202, 128), // #816 + INST(Vblendpd , VexRvmi_Lx , V(660F3A,0D,_,x,I,_,_,_ ), 0 , 72 , 0 , 3464 , 205, 123), // #817 + INST(Vblendps , VexRvmi_Lx , V(660F3A,0C,_,x,I,_,_,_ ), 0 , 72 , 0 , 3473 , 205, 123), // #818 + INST(Vblendvpd , VexRvmr_Lx , V(660F3A,4B,_,x,0,_,_,_ ), 0 , 72 , 0 , 3482 , 206, 123), // #819 + INST(Vblendvps , VexRvmr_Lx , V(660F3A,4A,_,x,0,_,_,_ ), 0 , 72 , 0 , 3492 , 206, 123), // #820 + INST(Vbroadcastf128 , VexRm , V(660F38,1A,_,1,0,_,_,_ ), 0 , 114, 0 , 3502 , 207, 123), // #821 + INST(Vbroadcastf32x2 , VexRm_Lx , E(660F38,19,_,x,_,0,3,T2 ), 0 , 115, 0 , 3517 , 208, 129), // #822 + INST(Vbroadcastf32x4 , VexRm_Lx , E(660F38,1A,_,x,_,0,4,T4 ), 0 , 116, 0 , 3533 , 209, 66 ), // #823 + INST(Vbroadcastf32x8 , VexRm , E(660F38,1B,_,2,_,0,5,T8 ), 0 , 117, 0 , 3549 , 210, 64 ), // #824 + INST(Vbroadcastf64x2 , VexRm_Lx , E(660F38,1A,_,x,_,1,4,T2 ), 0 , 118, 0 , 3565 , 209, 129), // #825 + INST(Vbroadcastf64x4 , VexRm , E(660F38,1B,_,2,_,1,5,T4 ), 0 , 119, 0 , 3581 , 210, 66 ), // #826 + INST(Vbroadcasti128 , VexRm , V(660F38,5A,_,1,0,_,_,_ ), 0 , 114, 0 , 3597 , 207, 130), // #827 + INST(Vbroadcasti32x2 , VexRm_Lx , E(660F38,59,_,x,_,0,3,T2 ), 0 , 115, 0 , 3612 , 211, 129), // #828 + INST(Vbroadcasti32x4 , VexRm_Lx , E(660F38,5A,_,x,_,0,4,T4 ), 0 , 116, 0 , 3628 , 209, 126), // #829 + INST(Vbroadcasti32x8 , VexRm , E(660F38,5B,_,2,_,0,5,T8 ), 0 , 117, 0 , 3644 , 210, 64 ), // #830 + INST(Vbroadcasti64x2 , VexRm_Lx , E(660F38,5A,_,x,_,1,4,T2 ), 0 , 118, 0 , 3660 , 209, 129), // #831 + INST(Vbroadcasti64x4 , VexRm , E(660F38,5B,_,2,_,1,5,T4 ), 0 , 119, 0 , 3676 , 210, 66 ), // #832 + INST(Vbroadcastsd , VexRm_Lx , V(660F38,19,_,x,0,1,3,T1S), 0 , 120, 0 , 3692 , 212, 131), // #833 + INST(Vbroadcastss , VexRm_Lx , V(660F38,18,_,x,0,0,2,T1S), 0 , 121, 0 , 3705 , 213, 131), // #834 + INST(Vcmppd , VexRvmi_Lx , V(660F00,C2,_,x,I,1,4,FV ), 0 , 102, 0 , 3718 , 214, 121), // #835 + INST(Vcmpps , VexRvmi_Lx , V(000F00,C2,_,x,I,0,4,FV ), 0 , 103, 0 , 3725 , 215, 121), // #836 + INST(Vcmpsd , VexRvmi , V(F20F00,C2,_,I,I,1,3,T1S), 0 , 104, 0 , 3732 , 216, 122), // #837 + INST(Vcmpss , VexRvmi , V(F30F00,C2,_,I,I,0,2,T1S), 0 , 105, 0 , 3739 , 217, 122), // #838 + INST(Vcomisd , VexRm , V(660F00,2F,_,I,I,1,3,T1S), 0 , 122, 0 , 3746 , 218, 132), // #839 + INST(Vcomiss , VexRm , V(000F00,2F,_,I,I,0,2,T1S), 0 , 123, 0 , 3754 , 219, 132), // #840 + INST(Vcompresspd , VexMr_Lx , E(660F38,8A,_,x,_,1,3,T1S), 0 , 124, 0 , 3762 , 220, 126), // #841 + INST(Vcompressps , VexMr_Lx , E(660F38,8A,_,x,_,0,2,T1S), 0 , 125, 0 , 3774 , 220, 126), // #842 + INST(Vcvtdq2pd , VexRm_Lx , V(F30F00,E6,_,x,I,0,3,HV ), 0 , 126, 0 , 3786 , 221, 121), // #843 + INST(Vcvtdq2ps , VexRm_Lx , V(000F00,5B,_,x,I,0,4,FV ), 0 , 103, 0 , 3796 , 222, 121), // #844 + INST(Vcvtne2ps2bf16 , VexRvm , E(F20F38,72,_,_,_,0,_,_ ), 0 , 127, 0 , 3806 , 203, 133), // #845 + INST(Vcvtneps2bf16 , VexRm , E(F30F38,72,_,_,_,0,_,_ ), 0 , 128, 0 , 3821 , 223, 133), // #846 + INST(Vcvtpd2dq , VexRm_Lx , V(F20F00,E6,_,x,I,1,4,FV ), 0 , 129, 0 , 3835 , 224, 121), // #847 + INST(Vcvtpd2ps , VexRm_Lx , V(660F00,5A,_,x,I,1,4,FV ), 0 , 102, 0 , 3845 , 224, 121), // #848 + INST(Vcvtpd2qq , VexRm_Lx , E(660F00,7B,_,x,_,1,4,FV ), 0 , 130, 0 , 3855 , 225, 129), // #849 + INST(Vcvtpd2udq , VexRm_Lx , E(000F00,79,_,x,_,1,4,FV ), 0 , 131, 0 , 3865 , 226, 126), // #850 + INST(Vcvtpd2uqq , VexRm_Lx , E(660F00,79,_,x,_,1,4,FV ), 0 , 130, 0 , 3876 , 225, 129), // #851 + INST(Vcvtph2ps , VexRm_Lx , V(660F38,13,_,x,0,0,3,HVM), 0 , 132, 0 , 3887 , 227, 134), // #852 + INST(Vcvtps2dq , VexRm_Lx , V(660F00,5B,_,x,I,0,4,FV ), 0 , 133, 0 , 3897 , 222, 121), // #853 + INST(Vcvtps2pd , VexRm_Lx , V(000F00,5A,_,x,I,0,4,HV ), 0 , 134, 0 , 3907 , 228, 121), // #854 + INST(Vcvtps2ph , VexMri_Lx , V(660F3A,1D,_,x,0,0,3,HVM), 0 , 135, 0 , 3917 , 229, 134), // #855 + INST(Vcvtps2qq , VexRm_Lx , E(660F00,7B,_,x,_,0,3,HV ), 0 , 136, 0 , 3927 , 230, 129), // #856 + INST(Vcvtps2udq , VexRm_Lx , E(000F00,79,_,x,_,0,4,FV ), 0 , 137, 0 , 3937 , 231, 126), // #857 + INST(Vcvtps2uqq , VexRm_Lx , E(660F00,79,_,x,_,0,3,HV ), 0 , 136, 0 , 3948 , 230, 129), // #858 + INST(Vcvtqq2pd , VexRm_Lx , E(F30F00,E6,_,x,_,1,4,FV ), 0 , 138, 0 , 3959 , 225, 129), // #859 + INST(Vcvtqq2ps , VexRm_Lx , E(000F00,5B,_,x,_,1,4,FV ), 0 , 131, 0 , 3969 , 226, 129), // #860 + INST(Vcvtsd2si , VexRm_Wx , V(F20F00,2D,_,I,x,x,3,T1F), 0 , 139, 0 , 3979 , 232, 122), // #861 + INST(Vcvtsd2ss , VexRvm , V(F20F00,5A,_,I,I,1,3,T1S), 0 , 104, 0 , 3989 , 190, 122), // #862 + INST(Vcvtsd2usi , VexRm_Wx , E(F20F00,79,_,I,_,x,3,T1F), 0 , 140, 0 , 3999 , 233, 66 ), // #863 + INST(Vcvtsi2sd , VexRvm_Wx , V(F20F00,2A,_,I,x,x,2,T1W), 0 , 141, 0 , 4010 , 234, 122), // #864 + INST(Vcvtsi2ss , VexRvm_Wx , V(F30F00,2A,_,I,x,x,2,T1W), 0 , 142, 0 , 4020 , 234, 122), // #865 + INST(Vcvtss2sd , VexRvm , V(F30F00,5A,_,I,I,0,2,T1S), 0 , 105, 0 , 4030 , 235, 122), // #866 + INST(Vcvtss2si , VexRm_Wx , V(F30F00,2D,_,I,x,x,2,T1F), 0 , 143, 0 , 4040 , 236, 122), // #867 + INST(Vcvtss2usi , VexRm_Wx , E(F30F00,79,_,I,_,x,2,T1F), 0 , 144, 0 , 4050 , 237, 66 ), // #868 + INST(Vcvttpd2dq , VexRm_Lx , V(660F00,E6,_,x,I,1,4,FV ), 0 , 102, 0 , 4061 , 238, 121), // #869 + INST(Vcvttpd2qq , VexRm_Lx , E(660F00,7A,_,x,_,1,4,FV ), 0 , 130, 0 , 4072 , 239, 126), // #870 + INST(Vcvttpd2udq , VexRm_Lx , E(000F00,78,_,x,_,1,4,FV ), 0 , 131, 0 , 4083 , 240, 126), // #871 + INST(Vcvttpd2uqq , VexRm_Lx , E(660F00,78,_,x,_,1,4,FV ), 0 , 130, 0 , 4095 , 239, 129), // #872 + INST(Vcvttps2dq , VexRm_Lx , V(F30F00,5B,_,x,I,0,4,FV ), 0 , 145, 0 , 4107 , 241, 121), // #873 + INST(Vcvttps2qq , VexRm_Lx , E(660F00,7A,_,x,_,0,3,HV ), 0 , 136, 0 , 4118 , 242, 129), // #874 + INST(Vcvttps2udq , VexRm_Lx , E(000F00,78,_,x,_,0,4,FV ), 0 , 137, 0 , 4129 , 243, 126), // #875 + INST(Vcvttps2uqq , VexRm_Lx , E(660F00,78,_,x,_,0,3,HV ), 0 , 136, 0 , 4141 , 242, 129), // #876 + INST(Vcvttsd2si , VexRm_Wx , V(F20F00,2C,_,I,x,x,3,T1F), 0 , 139, 0 , 4153 , 244, 122), // #877 + INST(Vcvttsd2usi , VexRm_Wx , E(F20F00,78,_,I,_,x,3,T1F), 0 , 140, 0 , 4164 , 245, 66 ), // #878 + INST(Vcvttss2si , VexRm_Wx , V(F30F00,2C,_,I,x,x,2,T1F), 0 , 143, 0 , 4176 , 246, 122), // #879 + INST(Vcvttss2usi , VexRm_Wx , E(F30F00,78,_,I,_,x,2,T1F), 0 , 144, 0 , 4187 , 247, 66 ), // #880 + INST(Vcvtudq2pd , VexRm_Lx , E(F30F00,7A,_,x,_,0,3,HV ), 0 , 146, 0 , 4199 , 248, 126), // #881 + INST(Vcvtudq2ps , VexRm_Lx , E(F20F00,7A,_,x,_,0,4,FV ), 0 , 147, 0 , 4210 , 231, 126), // #882 + INST(Vcvtuqq2pd , VexRm_Lx , E(F30F00,7A,_,x,_,1,4,FV ), 0 , 138, 0 , 4221 , 225, 129), // #883 + INST(Vcvtuqq2ps , VexRm_Lx , E(F20F00,7A,_,x,_,1,4,FV ), 0 , 148, 0 , 4232 , 226, 129), // #884 + INST(Vcvtusi2sd , VexRvm_Wx , E(F20F00,7B,_,I,_,x,2,T1W), 0 , 149, 0 , 4243 , 249, 66 ), // #885 + INST(Vcvtusi2ss , VexRvm_Wx , E(F30F00,7B,_,I,_,x,2,T1W), 0 , 150, 0 , 4254 , 249, 66 ), // #886 + INST(Vdbpsadbw , VexRvmi_Lx , E(660F3A,42,_,x,_,0,4,FVM), 0 , 151, 0 , 4265 , 250, 128), // #887 + INST(Vdivpd , VexRvm_Lx , V(660F00,5E,_,x,I,1,4,FV ), 0 , 102, 0 , 4275 , 188, 121), // #888 + INST(Vdivps , VexRvm_Lx , V(000F00,5E,_,x,I,0,4,FV ), 0 , 103, 0 , 4282 , 189, 121), // #889 + INST(Vdivsd , VexRvm , V(F20F00,5E,_,I,I,1,3,T1S), 0 , 104, 0 , 4289 , 190, 122), // #890 + INST(Vdivss , VexRvm , V(F30F00,5E,_,I,I,0,2,T1S), 0 , 105, 0 , 4296 , 191, 122), // #891 + INST(Vdpbf16ps , VexRvm , E(F30F38,52,_,_,_,0,_,_ ), 0 , 128, 0 , 4303 , 203, 133), // #892 + INST(Vdppd , VexRvmi_Lx , V(660F3A,41,_,x,I,_,_,_ ), 0 , 72 , 0 , 4313 , 251, 123), // #893 + INST(Vdpps , VexRvmi_Lx , V(660F3A,40,_,x,I,_,_,_ ), 0 , 72 , 0 , 4319 , 205, 123), // #894 + INST(Verr , X86M_NoSize , O(000F00,00,4,_,_,_,_,_ ), 0 , 97 , 0 , 4325 , 101, 10 ), // #895 + INST(Verw , X86M_NoSize , O(000F00,00,5,_,_,_,_,_ ), 0 , 75 , 0 , 4330 , 101, 10 ), // #896 + INST(Vexp2pd , VexRm , E(660F38,C8,_,2,_,1,4,FV ), 0 , 152, 0 , 4335 , 252, 135), // #897 + INST(Vexp2ps , VexRm , E(660F38,C8,_,2,_,0,4,FV ), 0 , 153, 0 , 4343 , 253, 135), // #898 + INST(Vexpandpd , VexRm_Lx , E(660F38,88,_,x,_,1,3,T1S), 0 , 124, 0 , 4351 , 254, 126), // #899 + INST(Vexpandps , VexRm_Lx , E(660F38,88,_,x,_,0,2,T1S), 0 , 125, 0 , 4361 , 254, 126), // #900 + INST(Vextractf128 , VexMri , V(660F3A,19,_,1,0,_,_,_ ), 0 , 154, 0 , 4371 , 255, 123), // #901 + INST(Vextractf32x4 , VexMri_Lx , E(660F3A,19,_,x,_,0,4,T4 ), 0 , 155, 0 , 4384 , 256, 126), // #902 + INST(Vextractf32x8 , VexMri , E(660F3A,1B,_,2,_,0,5,T8 ), 0 , 156, 0 , 4398 , 257, 64 ), // #903 + INST(Vextractf64x2 , VexMri_Lx , E(660F3A,19,_,x,_,1,4,T2 ), 0 , 157, 0 , 4412 , 256, 129), // #904 + INST(Vextractf64x4 , VexMri , E(660F3A,1B,_,2,_,1,5,T4 ), 0 , 158, 0 , 4426 , 257, 66 ), // #905 + INST(Vextracti128 , VexMri , V(660F3A,39,_,1,0,_,_,_ ), 0 , 154, 0 , 4440 , 255, 130), // #906 + INST(Vextracti32x4 , VexMri_Lx , E(660F3A,39,_,x,_,0,4,T4 ), 0 , 155, 0 , 4453 , 256, 126), // #907 + INST(Vextracti32x8 , VexMri , E(660F3A,3B,_,2,_,0,5,T8 ), 0 , 156, 0 , 4467 , 257, 64 ), // #908 + INST(Vextracti64x2 , VexMri_Lx , E(660F3A,39,_,x,_,1,4,T2 ), 0 , 157, 0 , 4481 , 256, 129), // #909 + INST(Vextracti64x4 , VexMri , E(660F3A,3B,_,2,_,1,5,T4 ), 0 , 158, 0 , 4495 , 257, 66 ), // #910 + INST(Vextractps , VexMri , V(660F3A,17,_,0,I,I,2,T1S), 0 , 159, 0 , 4509 , 258, 122), // #911 + INST(Vfixupimmpd , VexRvmi_Lx , E(660F3A,54,_,x,_,1,4,FV ), 0 , 109, 0 , 4520 , 259, 126), // #912 + INST(Vfixupimmps , VexRvmi_Lx , E(660F3A,54,_,x,_,0,4,FV ), 0 , 108, 0 , 4532 , 260, 126), // #913 + INST(Vfixupimmsd , VexRvmi , E(660F3A,55,_,I,_,1,3,T1S), 0 , 160, 0 , 4544 , 261, 66 ), // #914 + INST(Vfixupimmss , VexRvmi , E(660F3A,55,_,I,_,0,2,T1S), 0 , 161, 0 , 4556 , 262, 66 ), // #915 + INST(Vfmadd132pd , VexRvm_Lx , V(660F38,98,_,x,1,1,4,FV ), 0 , 162, 0 , 4568 , 188, 136), // #916 + INST(Vfmadd132ps , VexRvm_Lx , V(660F38,98,_,x,0,0,4,FV ), 0 , 163, 0 , 4580 , 189, 136), // #917 + INST(Vfmadd132sd , VexRvm , V(660F38,99,_,I,1,1,3,T1S), 0 , 164, 0 , 4592 , 190, 137), // #918 + INST(Vfmadd132ss , VexRvm , V(660F38,99,_,I,0,0,2,T1S), 0 , 121, 0 , 4604 , 191, 137), // #919 + INST(Vfmadd213pd , VexRvm_Lx , V(660F38,A8,_,x,1,1,4,FV ), 0 , 162, 0 , 4616 , 188, 136), // #920 + INST(Vfmadd213ps , VexRvm_Lx , V(660F38,A8,_,x,0,0,4,FV ), 0 , 163, 0 , 4628 , 189, 136), // #921 + INST(Vfmadd213sd , VexRvm , V(660F38,A9,_,I,1,1,3,T1S), 0 , 164, 0 , 4640 , 190, 137), // #922 + INST(Vfmadd213ss , VexRvm , V(660F38,A9,_,I,0,0,2,T1S), 0 , 121, 0 , 4652 , 191, 137), // #923 + INST(Vfmadd231pd , VexRvm_Lx , V(660F38,B8,_,x,1,1,4,FV ), 0 , 162, 0 , 4664 , 188, 136), // #924 + INST(Vfmadd231ps , VexRvm_Lx , V(660F38,B8,_,x,0,0,4,FV ), 0 , 163, 0 , 4676 , 189, 136), // #925 + INST(Vfmadd231sd , VexRvm , V(660F38,B9,_,I,1,1,3,T1S), 0 , 164, 0 , 4688 , 190, 137), // #926 + INST(Vfmadd231ss , VexRvm , V(660F38,B9,_,I,0,0,2,T1S), 0 , 121, 0 , 4700 , 191, 137), // #927 + INST(Vfmaddpd , Fma4_Lx , V(660F3A,69,_,x,x,_,_,_ ), 0 , 72 , 0 , 4712 , 263, 138), // #928 + INST(Vfmaddps , Fma4_Lx , V(660F3A,68,_,x,x,_,_,_ ), 0 , 72 , 0 , 4721 , 263, 138), // #929 + INST(Vfmaddsd , Fma4 , V(660F3A,6B,_,0,x,_,_,_ ), 0 , 72 , 0 , 4730 , 264, 138), // #930 + INST(Vfmaddss , Fma4 , V(660F3A,6A,_,0,x,_,_,_ ), 0 , 72 , 0 , 4739 , 265, 138), // #931 + INST(Vfmaddsub132pd , VexRvm_Lx , V(660F38,96,_,x,1,1,4,FV ), 0 , 162, 0 , 4748 , 188, 136), // #932 + INST(Vfmaddsub132ps , VexRvm_Lx , V(660F38,96,_,x,0,0,4,FV ), 0 , 163, 0 , 4763 , 189, 136), // #933 + INST(Vfmaddsub213pd , VexRvm_Lx , V(660F38,A6,_,x,1,1,4,FV ), 0 , 162, 0 , 4778 , 188, 136), // #934 + INST(Vfmaddsub213ps , VexRvm_Lx , V(660F38,A6,_,x,0,0,4,FV ), 0 , 163, 0 , 4793 , 189, 136), // #935 + INST(Vfmaddsub231pd , VexRvm_Lx , V(660F38,B6,_,x,1,1,4,FV ), 0 , 162, 0 , 4808 , 188, 136), // #936 + INST(Vfmaddsub231ps , VexRvm_Lx , V(660F38,B6,_,x,0,0,4,FV ), 0 , 163, 0 , 4823 , 189, 136), // #937 + INST(Vfmaddsubpd , Fma4_Lx , V(660F3A,5D,_,x,x,_,_,_ ), 0 , 72 , 0 , 4838 , 263, 138), // #938 + INST(Vfmaddsubps , Fma4_Lx , V(660F3A,5C,_,x,x,_,_,_ ), 0 , 72 , 0 , 4850 , 263, 138), // #939 + INST(Vfmsub132pd , VexRvm_Lx , V(660F38,9A,_,x,1,1,4,FV ), 0 , 162, 0 , 4862 , 188, 136), // #940 + INST(Vfmsub132ps , VexRvm_Lx , V(660F38,9A,_,x,0,0,4,FV ), 0 , 163, 0 , 4874 , 189, 136), // #941 + INST(Vfmsub132sd , VexRvm , V(660F38,9B,_,I,1,1,3,T1S), 0 , 164, 0 , 4886 , 190, 137), // #942 + INST(Vfmsub132ss , VexRvm , V(660F38,9B,_,I,0,0,2,T1S), 0 , 121, 0 , 4898 , 191, 137), // #943 + INST(Vfmsub213pd , VexRvm_Lx , V(660F38,AA,_,x,1,1,4,FV ), 0 , 162, 0 , 4910 , 188, 136), // #944 + INST(Vfmsub213ps , VexRvm_Lx , V(660F38,AA,_,x,0,0,4,FV ), 0 , 163, 0 , 4922 , 189, 136), // #945 + INST(Vfmsub213sd , VexRvm , V(660F38,AB,_,I,1,1,3,T1S), 0 , 164, 0 , 4934 , 190, 137), // #946 + INST(Vfmsub213ss , VexRvm , V(660F38,AB,_,I,0,0,2,T1S), 0 , 121, 0 , 4946 , 191, 137), // #947 + INST(Vfmsub231pd , VexRvm_Lx , V(660F38,BA,_,x,1,1,4,FV ), 0 , 162, 0 , 4958 , 188, 136), // #948 + INST(Vfmsub231ps , VexRvm_Lx , V(660F38,BA,_,x,0,0,4,FV ), 0 , 163, 0 , 4970 , 189, 136), // #949 + INST(Vfmsub231sd , VexRvm , V(660F38,BB,_,I,1,1,3,T1S), 0 , 164, 0 , 4982 , 190, 137), // #950 + INST(Vfmsub231ss , VexRvm , V(660F38,BB,_,I,0,0,2,T1S), 0 , 121, 0 , 4994 , 191, 137), // #951 + INST(Vfmsubadd132pd , VexRvm_Lx , V(660F38,97,_,x,1,1,4,FV ), 0 , 162, 0 , 5006 , 188, 136), // #952 + INST(Vfmsubadd132ps , VexRvm_Lx , V(660F38,97,_,x,0,0,4,FV ), 0 , 163, 0 , 5021 , 189, 136), // #953 + INST(Vfmsubadd213pd , VexRvm_Lx , V(660F38,A7,_,x,1,1,4,FV ), 0 , 162, 0 , 5036 , 188, 136), // #954 + INST(Vfmsubadd213ps , VexRvm_Lx , V(660F38,A7,_,x,0,0,4,FV ), 0 , 163, 0 , 5051 , 189, 136), // #955 + INST(Vfmsubadd231pd , VexRvm_Lx , V(660F38,B7,_,x,1,1,4,FV ), 0 , 162, 0 , 5066 , 188, 136), // #956 + INST(Vfmsubadd231ps , VexRvm_Lx , V(660F38,B7,_,x,0,0,4,FV ), 0 , 163, 0 , 5081 , 189, 136), // #957 + INST(Vfmsubaddpd , Fma4_Lx , V(660F3A,5F,_,x,x,_,_,_ ), 0 , 72 , 0 , 5096 , 263, 138), // #958 + INST(Vfmsubaddps , Fma4_Lx , V(660F3A,5E,_,x,x,_,_,_ ), 0 , 72 , 0 , 5108 , 263, 138), // #959 + INST(Vfmsubpd , Fma4_Lx , V(660F3A,6D,_,x,x,_,_,_ ), 0 , 72 , 0 , 5120 , 263, 138), // #960 + INST(Vfmsubps , Fma4_Lx , V(660F3A,6C,_,x,x,_,_,_ ), 0 , 72 , 0 , 5129 , 263, 138), // #961 + INST(Vfmsubsd , Fma4 , V(660F3A,6F,_,0,x,_,_,_ ), 0 , 72 , 0 , 5138 , 264, 138), // #962 + INST(Vfmsubss , Fma4 , V(660F3A,6E,_,0,x,_,_,_ ), 0 , 72 , 0 , 5147 , 265, 138), // #963 + INST(Vfnmadd132pd , VexRvm_Lx , V(660F38,9C,_,x,1,1,4,FV ), 0 , 162, 0 , 5156 , 188, 136), // #964 + INST(Vfnmadd132ps , VexRvm_Lx , V(660F38,9C,_,x,0,0,4,FV ), 0 , 163, 0 , 5169 , 189, 136), // #965 + INST(Vfnmadd132sd , VexRvm , V(660F38,9D,_,I,1,1,3,T1S), 0 , 164, 0 , 5182 , 190, 137), // #966 + INST(Vfnmadd132ss , VexRvm , V(660F38,9D,_,I,0,0,2,T1S), 0 , 121, 0 , 5195 , 191, 137), // #967 + INST(Vfnmadd213pd , VexRvm_Lx , V(660F38,AC,_,x,1,1,4,FV ), 0 , 162, 0 , 5208 , 188, 136), // #968 + INST(Vfnmadd213ps , VexRvm_Lx , V(660F38,AC,_,x,0,0,4,FV ), 0 , 163, 0 , 5221 , 189, 136), // #969 + INST(Vfnmadd213sd , VexRvm , V(660F38,AD,_,I,1,1,3,T1S), 0 , 164, 0 , 5234 , 190, 137), // #970 + INST(Vfnmadd213ss , VexRvm , V(660F38,AD,_,I,0,0,2,T1S), 0 , 121, 0 , 5247 , 191, 137), // #971 + INST(Vfnmadd231pd , VexRvm_Lx , V(660F38,BC,_,x,1,1,4,FV ), 0 , 162, 0 , 5260 , 188, 136), // #972 + INST(Vfnmadd231ps , VexRvm_Lx , V(660F38,BC,_,x,0,0,4,FV ), 0 , 163, 0 , 5273 , 189, 136), // #973 + INST(Vfnmadd231sd , VexRvm , V(660F38,BC,_,I,1,1,3,T1S), 0 , 164, 0 , 5286 , 190, 137), // #974 + INST(Vfnmadd231ss , VexRvm , V(660F38,BC,_,I,0,0,2,T1S), 0 , 121, 0 , 5299 , 191, 137), // #975 + INST(Vfnmaddpd , Fma4_Lx , V(660F3A,79,_,x,x,_,_,_ ), 0 , 72 , 0 , 5312 , 263, 138), // #976 + INST(Vfnmaddps , Fma4_Lx , V(660F3A,78,_,x,x,_,_,_ ), 0 , 72 , 0 , 5322 , 263, 138), // #977 + INST(Vfnmaddsd , Fma4 , V(660F3A,7B,_,0,x,_,_,_ ), 0 , 72 , 0 , 5332 , 264, 138), // #978 + INST(Vfnmaddss , Fma4 , V(660F3A,7A,_,0,x,_,_,_ ), 0 , 72 , 0 , 5342 , 265, 138), // #979 + INST(Vfnmsub132pd , VexRvm_Lx , V(660F38,9E,_,x,1,1,4,FV ), 0 , 162, 0 , 5352 , 188, 136), // #980 + INST(Vfnmsub132ps , VexRvm_Lx , V(660F38,9E,_,x,0,0,4,FV ), 0 , 163, 0 , 5365 , 189, 136), // #981 + INST(Vfnmsub132sd , VexRvm , V(660F38,9F,_,I,1,1,3,T1S), 0 , 164, 0 , 5378 , 190, 137), // #982 + INST(Vfnmsub132ss , VexRvm , V(660F38,9F,_,I,0,0,2,T1S), 0 , 121, 0 , 5391 , 191, 137), // #983 + INST(Vfnmsub213pd , VexRvm_Lx , V(660F38,AE,_,x,1,1,4,FV ), 0 , 162, 0 , 5404 , 188, 136), // #984 + INST(Vfnmsub213ps , VexRvm_Lx , V(660F38,AE,_,x,0,0,4,FV ), 0 , 163, 0 , 5417 , 189, 136), // #985 + INST(Vfnmsub213sd , VexRvm , V(660F38,AF,_,I,1,1,3,T1S), 0 , 164, 0 , 5430 , 190, 137), // #986 + INST(Vfnmsub213ss , VexRvm , V(660F38,AF,_,I,0,0,2,T1S), 0 , 121, 0 , 5443 , 191, 137), // #987 + INST(Vfnmsub231pd , VexRvm_Lx , V(660F38,BE,_,x,1,1,4,FV ), 0 , 162, 0 , 5456 , 188, 136), // #988 + INST(Vfnmsub231ps , VexRvm_Lx , V(660F38,BE,_,x,0,0,4,FV ), 0 , 163, 0 , 5469 , 189, 136), // #989 + INST(Vfnmsub231sd , VexRvm , V(660F38,BF,_,I,1,1,3,T1S), 0 , 164, 0 , 5482 , 190, 137), // #990 + INST(Vfnmsub231ss , VexRvm , V(660F38,BF,_,I,0,0,2,T1S), 0 , 121, 0 , 5495 , 191, 137), // #991 + INST(Vfnmsubpd , Fma4_Lx , V(660F3A,7D,_,x,x,_,_,_ ), 0 , 72 , 0 , 5508 , 263, 138), // #992 + INST(Vfnmsubps , Fma4_Lx , V(660F3A,7C,_,x,x,_,_,_ ), 0 , 72 , 0 , 5518 , 263, 138), // #993 + INST(Vfnmsubsd , Fma4 , V(660F3A,7F,_,0,x,_,_,_ ), 0 , 72 , 0 , 5528 , 264, 138), // #994 + INST(Vfnmsubss , Fma4 , V(660F3A,7E,_,0,x,_,_,_ ), 0 , 72 , 0 , 5538 , 265, 138), // #995 + INST(Vfpclasspd , VexRmi_Lx , E(660F3A,66,_,x,_,1,4,FV ), 0 , 109, 0 , 5548 , 266, 129), // #996 + INST(Vfpclassps , VexRmi_Lx , E(660F3A,66,_,x,_,0,4,FV ), 0 , 108, 0 , 5559 , 267, 129), // #997 + INST(Vfpclasssd , VexRmi_Lx , E(660F3A,67,_,I,_,1,3,T1S), 0 , 160, 0 , 5570 , 268, 64 ), // #998 + INST(Vfpclassss , VexRmi_Lx , E(660F3A,67,_,I,_,0,2,T1S), 0 , 161, 0 , 5581 , 269, 64 ), // #999 + INST(Vfrczpd , VexRm_Lx , V(XOP_M9,81,_,x,0,_,_,_ ), 0 , 77 , 0 , 5592 , 270, 139), // #1000 + INST(Vfrczps , VexRm_Lx , V(XOP_M9,80,_,x,0,_,_,_ ), 0 , 77 , 0 , 5600 , 270, 139), // #1001 + INST(Vfrczsd , VexRm , V(XOP_M9,83,_,0,0,_,_,_ ), 0 , 77 , 0 , 5608 , 271, 139), // #1002 + INST(Vfrczss , VexRm , V(XOP_M9,82,_,0,0,_,_,_ ), 0 , 77 , 0 , 5616 , 272, 139), // #1003 + INST(Vgatherdpd , VexRmvRm_VM , V(660F38,92,_,x,1,_,_,_ ), V(660F38,92,_,x,_,1,3,T1S), 165, 79 , 5624 , 273, 140), // #1004 + INST(Vgatherdps , VexRmvRm_VM , V(660F38,92,_,x,0,_,_,_ ), V(660F38,92,_,x,_,0,2,T1S), 96 , 80 , 5635 , 274, 140), // #1005 + INST(Vgatherpf0dpd , VexM_VM , E(660F38,C6,1,2,_,1,3,T1S), 0 , 166, 0 , 5646 , 275, 141), // #1006 + INST(Vgatherpf0dps , VexM_VM , E(660F38,C6,1,2,_,0,2,T1S), 0 , 167, 0 , 5660 , 276, 141), // #1007 + INST(Vgatherpf0qpd , VexM_VM , E(660F38,C7,1,2,_,1,3,T1S), 0 , 166, 0 , 5674 , 277, 141), // #1008 + INST(Vgatherpf0qps , VexM_VM , E(660F38,C7,1,2,_,0,2,T1S), 0 , 167, 0 , 5688 , 277, 141), // #1009 + INST(Vgatherpf1dpd , VexM_VM , E(660F38,C6,2,2,_,1,3,T1S), 0 , 168, 0 , 5702 , 275, 141), // #1010 + INST(Vgatherpf1dps , VexM_VM , E(660F38,C6,2,2,_,0,2,T1S), 0 , 169, 0 , 5716 , 276, 141), // #1011 + INST(Vgatherpf1qpd , VexM_VM , E(660F38,C7,2,2,_,1,3,T1S), 0 , 168, 0 , 5730 , 277, 141), // #1012 + INST(Vgatherpf1qps , VexM_VM , E(660F38,C7,2,2,_,0,2,T1S), 0 , 169, 0 , 5744 , 277, 141), // #1013 + INST(Vgatherqpd , VexRmvRm_VM , V(660F38,93,_,x,1,_,_,_ ), V(660F38,93,_,x,_,1,3,T1S), 165, 81 , 5758 , 278, 140), // #1014 + INST(Vgatherqps , VexRmvRm_VM , V(660F38,93,_,x,0,_,_,_ ), V(660F38,93,_,x,_,0,2,T1S), 96 , 82 , 5769 , 279, 140), // #1015 + INST(Vgetexppd , VexRm_Lx , E(660F38,42,_,x,_,1,4,FV ), 0 , 112, 0 , 5780 , 239, 126), // #1016 + INST(Vgetexpps , VexRm_Lx , E(660F38,42,_,x,_,0,4,FV ), 0 , 111, 0 , 5790 , 243, 126), // #1017 + INST(Vgetexpsd , VexRvm , E(660F38,43,_,I,_,1,3,T1S), 0 , 124, 0 , 5800 , 280, 66 ), // #1018 + INST(Vgetexpss , VexRvm , E(660F38,43,_,I,_,0,2,T1S), 0 , 125, 0 , 5810 , 281, 66 ), // #1019 + INST(Vgetmantpd , VexRmi_Lx , E(660F3A,26,_,x,_,1,4,FV ), 0 , 109, 0 , 5820 , 282, 126), // #1020 + INST(Vgetmantps , VexRmi_Lx , E(660F3A,26,_,x,_,0,4,FV ), 0 , 108, 0 , 5831 , 283, 126), // #1021 + INST(Vgetmantsd , VexRvmi , E(660F3A,27,_,I,_,1,3,T1S), 0 , 160, 0 , 5842 , 261, 66 ), // #1022 + INST(Vgetmantss , VexRvmi , E(660F3A,27,_,I,_,0,2,T1S), 0 , 161, 0 , 5853 , 262, 66 ), // #1023 + INST(Vgf2p8affineinvqb, VexRvmi_Lx , V(660F3A,CF,_,x,1,1,4,FV ), 0 , 170, 0 , 5864 , 284, 142), // #1024 + INST(Vgf2p8affineqb , VexRvmi_Lx , V(660F3A,CE,_,x,1,1,4,FV ), 0 , 170, 0 , 5882 , 284, 142), // #1025 + INST(Vgf2p8mulb , VexRvm_Lx , V(660F38,CF,_,x,0,0,4,FV ), 0 , 163, 0 , 5897 , 285, 142), // #1026 + INST(Vhaddpd , VexRvm_Lx , V(660F00,7C,_,x,I,_,_,_ ), 0 , 68 , 0 , 5908 , 192, 123), // #1027 + INST(Vhaddps , VexRvm_Lx , V(F20F00,7C,_,x,I,_,_,_ ), 0 , 106, 0 , 5916 , 192, 123), // #1028 + INST(Vhsubpd , VexRvm_Lx , V(660F00,7D,_,x,I,_,_,_ ), 0 , 68 , 0 , 5924 , 192, 123), // #1029 + INST(Vhsubps , VexRvm_Lx , V(F20F00,7D,_,x,I,_,_,_ ), 0 , 106, 0 , 5932 , 192, 123), // #1030 + INST(Vinsertf128 , VexRvmi , V(660F3A,18,_,1,0,_,_,_ ), 0 , 154, 0 , 5940 , 286, 123), // #1031 + INST(Vinsertf32x4 , VexRvmi_Lx , E(660F3A,18,_,x,_,0,4,T4 ), 0 , 155, 0 , 5952 , 287, 126), // #1032 + INST(Vinsertf32x8 , VexRvmi , E(660F3A,1A,_,2,_,0,5,T8 ), 0 , 156, 0 , 5965 , 288, 64 ), // #1033 + INST(Vinsertf64x2 , VexRvmi_Lx , E(660F3A,18,_,x,_,1,4,T2 ), 0 , 157, 0 , 5978 , 287, 129), // #1034 + INST(Vinsertf64x4 , VexRvmi , E(660F3A,1A,_,2,_,1,5,T4 ), 0 , 158, 0 , 5991 , 288, 66 ), // #1035 + INST(Vinserti128 , VexRvmi , V(660F3A,38,_,1,0,_,_,_ ), 0 , 154, 0 , 6004 , 286, 130), // #1036 + INST(Vinserti32x4 , VexRvmi_Lx , E(660F3A,38,_,x,_,0,4,T4 ), 0 , 155, 0 , 6016 , 287, 126), // #1037 + INST(Vinserti32x8 , VexRvmi , E(660F3A,3A,_,2,_,0,5,T8 ), 0 , 156, 0 , 6029 , 288, 64 ), // #1038 + INST(Vinserti64x2 , VexRvmi_Lx , E(660F3A,38,_,x,_,1,4,T2 ), 0 , 157, 0 , 6042 , 287, 129), // #1039 + INST(Vinserti64x4 , VexRvmi , E(660F3A,3A,_,2,_,1,5,T4 ), 0 , 158, 0 , 6055 , 288, 66 ), // #1040 + INST(Vinsertps , VexRvmi , V(660F3A,21,_,0,I,0,2,T1S), 0 , 159, 0 , 6068 , 289, 122), // #1041 + INST(Vlddqu , VexRm_Lx , V(F20F00,F0,_,x,I,_,_,_ ), 0 , 106, 0 , 6078 , 290, 123), // #1042 + INST(Vldmxcsr , VexM , V(000F00,AE,2,0,I,_,_,_ ), 0 , 171, 0 , 6085 , 291, 123), // #1043 + INST(Vmaskmovdqu , VexRm_ZDI , V(660F00,F7,_,0,I,_,_,_ ), 0 , 68 , 0 , 6094 , 292, 123), // #1044 + INST(Vmaskmovpd , VexRvmMvr_Lx , V(660F38,2D,_,x,0,_,_,_ ), V(660F38,2F,_,x,0,_,_,_ ), 96 , 83 , 6106 , 293, 123), // #1045 + INST(Vmaskmovps , VexRvmMvr_Lx , V(660F38,2C,_,x,0,_,_,_ ), V(660F38,2E,_,x,0,_,_,_ ), 96 , 84 , 6117 , 293, 123), // #1046 + INST(Vmaxpd , VexRvm_Lx , V(660F00,5F,_,x,I,1,4,FV ), 0 , 102, 0 , 6128 , 294, 121), // #1047 + INST(Vmaxps , VexRvm_Lx , V(000F00,5F,_,x,I,0,4,FV ), 0 , 103, 0 , 6135 , 295, 121), // #1048 + INST(Vmaxsd , VexRvm , V(F20F00,5F,_,I,I,1,3,T1S), 0 , 104, 0 , 6142 , 296, 121), // #1049 + INST(Vmaxss , VexRvm , V(F30F00,5F,_,I,I,0,2,T1S), 0 , 105, 0 , 6149 , 235, 121), // #1050 + INST(Vmcall , X86Op , O(000F01,C1,_,_,_,_,_,_ ), 0 , 21 , 0 , 6156 , 30 , 56 ), // #1051 + INST(Vmclear , X86M_Only , O(660F00,C7,6,_,_,_,_,_ ), 0 , 25 , 0 , 6163 , 32 , 56 ), // #1052 + INST(Vmfunc , X86Op , O(000F01,D4,_,_,_,_,_,_ ), 0 , 21 , 0 , 6171 , 30 , 56 ), // #1053 + INST(Vminpd , VexRvm_Lx , V(660F00,5D,_,x,I,1,4,FV ), 0 , 102, 0 , 6178 , 294, 121), // #1054 + INST(Vminps , VexRvm_Lx , V(000F00,5D,_,x,I,0,4,FV ), 0 , 103, 0 , 6185 , 295, 121), // #1055 + INST(Vminsd , VexRvm , V(F20F00,5D,_,I,I,1,3,T1S), 0 , 104, 0 , 6192 , 296, 121), // #1056 + INST(Vminss , VexRvm , V(F30F00,5D,_,I,I,0,2,T1S), 0 , 105, 0 , 6199 , 235, 121), // #1057 + INST(Vmlaunch , X86Op , O(000F01,C2,_,_,_,_,_,_ ), 0 , 21 , 0 , 6206 , 30 , 56 ), // #1058 + INST(Vmload , X86Op_xAX , O(000F01,DA,_,_,_,_,_,_ ), 0 , 21 , 0 , 6215 , 297, 22 ), // #1059 + INST(Vmmcall , X86Op , O(000F01,D9,_,_,_,_,_,_ ), 0 , 21 , 0 , 6222 , 30 , 22 ), // #1060 + INST(Vmovapd , VexRmMr_Lx , V(660F00,28,_,x,I,1,4,FVM), V(660F00,29,_,x,I,1,4,FVM), 172, 85 , 6230 , 298, 121), // #1061 + INST(Vmovaps , VexRmMr_Lx , V(000F00,28,_,x,I,0,4,FVM), V(000F00,29,_,x,I,0,4,FVM), 173, 86 , 6238 , 298, 121), // #1062 + INST(Vmovd , VexMovdMovq , V(660F00,6E,_,0,0,0,2,T1S), V(660F00,7E,_,0,0,0,2,T1S), 174, 87 , 6246 , 299, 122), // #1063 + INST(Vmovddup , VexRm_Lx , V(F20F00,12,_,x,I,1,3,DUP), 0 , 175, 0 , 6252 , 300, 121), // #1064 + INST(Vmovdqa , VexRmMr_Lx , V(660F00,6F,_,x,I,_,_,_ ), V(660F00,7F,_,x,I,_,_,_ ), 68 , 88 , 6261 , 301, 123), // #1065 + INST(Vmovdqa32 , VexRmMr_Lx , E(660F00,6F,_,x,_,0,4,FVM), E(660F00,7F,_,x,_,0,4,FVM), 176, 89 , 6269 , 302, 126), // #1066 + INST(Vmovdqa64 , VexRmMr_Lx , E(660F00,6F,_,x,_,1,4,FVM), E(660F00,7F,_,x,_,1,4,FVM), 177, 90 , 6279 , 302, 126), // #1067 + INST(Vmovdqu , VexRmMr_Lx , V(F30F00,6F,_,x,I,_,_,_ ), V(F30F00,7F,_,x,I,_,_,_ ), 178, 91 , 6289 , 301, 123), // #1068 + INST(Vmovdqu16 , VexRmMr_Lx , E(F20F00,6F,_,x,_,1,4,FVM), E(F20F00,7F,_,x,_,1,4,FVM), 179, 92 , 6297 , 302, 128), // #1069 + INST(Vmovdqu32 , VexRmMr_Lx , E(F30F00,6F,_,x,_,0,4,FVM), E(F30F00,7F,_,x,_,0,4,FVM), 180, 93 , 6307 , 302, 126), // #1070 + INST(Vmovdqu64 , VexRmMr_Lx , E(F30F00,6F,_,x,_,1,4,FVM), E(F30F00,7F,_,x,_,1,4,FVM), 181, 94 , 6317 , 302, 126), // #1071 + INST(Vmovdqu8 , VexRmMr_Lx , E(F20F00,6F,_,x,_,0,4,FVM), E(F20F00,7F,_,x,_,0,4,FVM), 182, 95 , 6327 , 302, 128), // #1072 + INST(Vmovhlps , VexRvm , V(000F00,12,_,0,I,0,_,_ ), 0 , 71 , 0 , 6336 , 303, 122), // #1073 + INST(Vmovhpd , VexRvmMr , V(660F00,16,_,0,I,1,3,T1S), V(660F00,17,_,0,I,1,3,T1S), 122, 96 , 6345 , 304, 122), // #1074 + INST(Vmovhps , VexRvmMr , V(000F00,16,_,0,I,0,3,T2 ), V(000F00,17,_,0,I,0,3,T2 ), 183, 97 , 6353 , 304, 122), // #1075 + INST(Vmovlhps , VexRvm , V(000F00,16,_,0,I,0,_,_ ), 0 , 71 , 0 , 6361 , 303, 122), // #1076 + INST(Vmovlpd , VexRvmMr , V(660F00,12,_,0,I,1,3,T1S), V(660F00,13,_,0,I,1,3,T1S), 122, 98 , 6370 , 304, 122), // #1077 + INST(Vmovlps , VexRvmMr , V(000F00,12,_,0,I,0,3,T2 ), V(000F00,13,_,0,I,0,3,T2 ), 183, 99 , 6378 , 304, 122), // #1078 + INST(Vmovmskpd , VexRm_Lx , V(660F00,50,_,x,I,_,_,_ ), 0 , 68 , 0 , 6386 , 305, 123), // #1079 + INST(Vmovmskps , VexRm_Lx , V(000F00,50,_,x,I,_,_,_ ), 0 , 71 , 0 , 6396 , 305, 123), // #1080 + INST(Vmovntdq , VexMr_Lx , V(660F00,E7,_,x,I,0,4,FVM), 0 , 184, 0 , 6406 , 306, 121), // #1081 + INST(Vmovntdqa , VexRm_Lx , V(660F38,2A,_,x,I,0,4,FVM), 0 , 107, 0 , 6415 , 307, 131), // #1082 + INST(Vmovntpd , VexMr_Lx , V(660F00,2B,_,x,I,1,4,FVM), 0 , 172, 0 , 6425 , 306, 121), // #1083 + INST(Vmovntps , VexMr_Lx , V(000F00,2B,_,x,I,0,4,FVM), 0 , 173, 0 , 6434 , 306, 121), // #1084 + INST(Vmovq , VexMovdMovq , V(660F00,6E,_,0,I,1,3,T1S), V(660F00,7E,_,0,I,1,3,T1S), 122, 100, 6443 , 308, 122), // #1085 + INST(Vmovsd , VexMovssMovsd , V(F20F00,10,_,I,I,1,3,T1S), V(F20F00,11,_,I,I,1,3,T1S), 104, 101, 6449 , 309, 122), // #1086 + INST(Vmovshdup , VexRm_Lx , V(F30F00,16,_,x,I,0,4,FVM), 0 , 185, 0 , 6456 , 310, 121), // #1087 + INST(Vmovsldup , VexRm_Lx , V(F30F00,12,_,x,I,0,4,FVM), 0 , 185, 0 , 6466 , 310, 121), // #1088 + INST(Vmovss , VexMovssMovsd , V(F30F00,10,_,I,I,0,2,T1S), V(F30F00,11,_,I,I,0,2,T1S), 105, 102, 6476 , 311, 122), // #1089 + INST(Vmovupd , VexRmMr_Lx , V(660F00,10,_,x,I,1,4,FVM), V(660F00,11,_,x,I,1,4,FVM), 172, 103, 6483 , 298, 121), // #1090 + INST(Vmovups , VexRmMr_Lx , V(000F00,10,_,x,I,0,4,FVM), V(000F00,11,_,x,I,0,4,FVM), 173, 104, 6491 , 298, 121), // #1091 + INST(Vmpsadbw , VexRvmi_Lx , V(660F3A,42,_,x,I,_,_,_ ), 0 , 72 , 0 , 6499 , 205, 143), // #1092 + INST(Vmptrld , X86M_Only , O(000F00,C7,6,_,_,_,_,_ ), 0 , 78 , 0 , 6508 , 32 , 56 ), // #1093 + INST(Vmptrst , X86M_Only , O(000F00,C7,7,_,_,_,_,_ ), 0 , 22 , 0 , 6516 , 32 , 56 ), // #1094 + INST(Vmread , X86Mr_NoSize , O(000F00,78,_,_,_,_,_,_ ), 0 , 4 , 0 , 6524 , 312, 56 ), // #1095 + INST(Vmresume , X86Op , O(000F01,C3,_,_,_,_,_,_ ), 0 , 21 , 0 , 6531 , 30 , 56 ), // #1096 + INST(Vmrun , X86Op_xAX , O(000F01,D8,_,_,_,_,_,_ ), 0 , 21 , 0 , 6540 , 297, 22 ), // #1097 + INST(Vmsave , X86Op_xAX , O(000F01,DB,_,_,_,_,_,_ ), 0 , 21 , 0 , 6546 , 297, 22 ), // #1098 + INST(Vmulpd , VexRvm_Lx , V(660F00,59,_,x,I,1,4,FV ), 0 , 102, 0 , 6553 , 188, 121), // #1099 + INST(Vmulps , VexRvm_Lx , V(000F00,59,_,x,I,0,4,FV ), 0 , 103, 0 , 6560 , 189, 121), // #1100 + INST(Vmulsd , VexRvm_Lx , V(F20F00,59,_,I,I,1,3,T1S), 0 , 104, 0 , 6567 , 190, 122), // #1101 + INST(Vmulss , VexRvm_Lx , V(F30F00,59,_,I,I,0,2,T1S), 0 , 105, 0 , 6574 , 191, 122), // #1102 + INST(Vmwrite , X86Rm_NoSize , O(000F00,79,_,_,_,_,_,_ ), 0 , 4 , 0 , 6581 , 313, 56 ), // #1103 + INST(Vmxon , X86M_Only , O(F30F00,C7,6,_,_,_,_,_ ), 0 , 24 , 0 , 6589 , 32 , 56 ), // #1104 + INST(Vorpd , VexRvm_Lx , V(660F00,56,_,x,I,1,4,FV ), 0 , 102, 0 , 6595 , 200, 127), // #1105 + INST(Vorps , VexRvm_Lx , V(000F00,56,_,x,I,0,4,FV ), 0 , 103, 0 , 6601 , 201, 127), // #1106 + INST(Vp2intersectd , VexRvm_Lx_2xK , E(F20F38,68,_,_,_,0,4,FV ), 0 , 186, 0 , 6607 , 314, 144), // #1107 + INST(Vp2intersectq , VexRvm_Lx_2xK , E(F20F38,68,_,_,_,1,4,FV ), 0 , 187, 0 , 6621 , 315, 144), // #1108 + INST(Vp4dpwssd , VexRm_T1_4X , E(F20F38,52,_,2,_,0,2,T4X), 0 , 101, 0 , 6635 , 186, 145), // #1109 + INST(Vp4dpwssds , VexRm_T1_4X , E(F20F38,53,_,2,_,0,2,T4X), 0 , 101, 0 , 6645 , 186, 145), // #1110 + INST(Vpabsb , VexRm_Lx , V(660F38,1C,_,x,I,_,4,FVM), 0 , 107, 0 , 6656 , 310, 146), // #1111 + INST(Vpabsd , VexRm_Lx , V(660F38,1E,_,x,I,0,4,FV ), 0 , 163, 0 , 6663 , 310, 131), // #1112 + INST(Vpabsq , VexRm_Lx , E(660F38,1F,_,x,_,1,4,FV ), 0 , 112, 0 , 6670 , 254, 126), // #1113 + INST(Vpabsw , VexRm_Lx , V(660F38,1D,_,x,I,_,4,FVM), 0 , 107, 0 , 6677 , 310, 146), // #1114 + INST(Vpackssdw , VexRvm_Lx , V(660F00,6B,_,x,I,0,4,FV ), 0 , 133, 0 , 6684 , 199, 146), // #1115 + INST(Vpacksswb , VexRvm_Lx , V(660F00,63,_,x,I,I,4,FVM), 0 , 184, 0 , 6694 , 285, 146), // #1116 + INST(Vpackusdw , VexRvm_Lx , V(660F38,2B,_,x,I,0,4,FV ), 0 , 163, 0 , 6704 , 199, 146), // #1117 + INST(Vpackuswb , VexRvm_Lx , V(660F00,67,_,x,I,I,4,FVM), 0 , 184, 0 , 6714 , 285, 146), // #1118 + INST(Vpaddb , VexRvm_Lx , V(660F00,FC,_,x,I,I,4,FVM), 0 , 184, 0 , 6724 , 285, 146), // #1119 + INST(Vpaddd , VexRvm_Lx , V(660F00,FE,_,x,I,0,4,FV ), 0 , 133, 0 , 6731 , 199, 131), // #1120 + INST(Vpaddq , VexRvm_Lx , V(660F00,D4,_,x,I,1,4,FV ), 0 , 102, 0 , 6738 , 198, 131), // #1121 + INST(Vpaddsb , VexRvm_Lx , V(660F00,EC,_,x,I,I,4,FVM), 0 , 184, 0 , 6745 , 285, 146), // #1122 + INST(Vpaddsw , VexRvm_Lx , V(660F00,ED,_,x,I,I,4,FVM), 0 , 184, 0 , 6753 , 285, 146), // #1123 + INST(Vpaddusb , VexRvm_Lx , V(660F00,DC,_,x,I,I,4,FVM), 0 , 184, 0 , 6761 , 285, 146), // #1124 + INST(Vpaddusw , VexRvm_Lx , V(660F00,DD,_,x,I,I,4,FVM), 0 , 184, 0 , 6770 , 285, 146), // #1125 + INST(Vpaddw , VexRvm_Lx , V(660F00,FD,_,x,I,I,4,FVM), 0 , 184, 0 , 6779 , 285, 146), // #1126 + INST(Vpalignr , VexRvmi_Lx , V(660F3A,0F,_,x,I,I,4,FVM), 0 , 188, 0 , 6786 , 284, 146), // #1127 + INST(Vpand , VexRvm_Lx , V(660F00,DB,_,x,I,_,_,_ ), 0 , 68 , 0 , 6795 , 316, 143), // #1128 + INST(Vpandd , VexRvm_Lx , E(660F00,DB,_,x,_,0,4,FV ), 0 , 189, 0 , 6801 , 317, 126), // #1129 + INST(Vpandn , VexRvm_Lx , V(660F00,DF,_,x,I,_,_,_ ), 0 , 68 , 0 , 6808 , 318, 143), // #1130 + INST(Vpandnd , VexRvm_Lx , E(660F00,DF,_,x,_,0,4,FV ), 0 , 189, 0 , 6815 , 319, 126), // #1131 + INST(Vpandnq , VexRvm_Lx , E(660F00,DF,_,x,_,1,4,FV ), 0 , 130, 0 , 6823 , 320, 126), // #1132 + INST(Vpandq , VexRvm_Lx , E(660F00,DB,_,x,_,1,4,FV ), 0 , 130, 0 , 6831 , 321, 126), // #1133 + INST(Vpavgb , VexRvm_Lx , V(660F00,E0,_,x,I,I,4,FVM), 0 , 184, 0 , 6838 , 285, 146), // #1134 + INST(Vpavgw , VexRvm_Lx , V(660F00,E3,_,x,I,I,4,FVM), 0 , 184, 0 , 6845 , 285, 146), // #1135 + INST(Vpblendd , VexRvmi_Lx , V(660F3A,02,_,x,0,_,_,_ ), 0 , 72 , 0 , 6852 , 205, 130), // #1136 + INST(Vpblendvb , VexRvmr , V(660F3A,4C,_,x,0,_,_,_ ), 0 , 72 , 0 , 6861 , 206, 143), // #1137 + INST(Vpblendw , VexRvmi_Lx , V(660F3A,0E,_,x,I,_,_,_ ), 0 , 72 , 0 , 6871 , 205, 143), // #1138 + INST(Vpbroadcastb , VexRm_Lx_Bcst , V(660F38,78,_,x,0,0,0,T1S), E(660F38,7A,_,x,0,0,0,T1S), 190, 105, 6880 , 322, 147), // #1139 + INST(Vpbroadcastd , VexRm_Lx_Bcst , V(660F38,58,_,x,0,0,2,T1S), E(660F38,7C,_,x,0,0,0,T1S), 121, 106, 6893 , 323, 140), // #1140 + INST(Vpbroadcastmb2d , VexRm_Lx , E(F30F38,3A,_,x,_,0,_,_ ), 0 , 128, 0 , 6906 , 324, 148), // #1141 + INST(Vpbroadcastmb2q , VexRm_Lx , E(F30F38,2A,_,x,_,1,_,_ ), 0 , 191, 0 , 6922 , 324, 148), // #1142 + INST(Vpbroadcastq , VexRm_Lx_Bcst , V(660F38,59,_,x,0,1,3,T1S), E(660F38,7C,_,x,0,1,0,T1S), 120, 107, 6938 , 325, 140), // #1143 + INST(Vpbroadcastw , VexRm_Lx_Bcst , V(660F38,79,_,x,0,0,1,T1S), E(660F38,7B,_,x,0,0,0,T1S), 192, 108, 6951 , 326, 147), // #1144 + INST(Vpclmulqdq , VexRvmi_Lx , V(660F3A,44,_,x,I,_,4,FVM), 0 , 188, 0 , 6964 , 327, 149), // #1145 + INST(Vpcmov , VexRvrmRvmr_Lx , V(XOP_M8,A2,_,x,x,_,_,_ ), 0 , 193, 0 , 6975 , 263, 139), // #1146 + INST(Vpcmpb , VexRvmi_Lx , E(660F3A,3F,_,x,_,0,4,FVM), 0 , 151, 0 , 6982 , 328, 128), // #1147 + INST(Vpcmpd , VexRvmi_Lx , E(660F3A,1F,_,x,_,0,4,FV ), 0 , 108, 0 , 6989 , 329, 126), // #1148 + INST(Vpcmpeqb , VexRvm_Lx , V(660F00,74,_,x,I,I,4,FV ), 0 , 133, 0 , 6996 , 330, 146), // #1149 + INST(Vpcmpeqd , VexRvm_Lx , V(660F00,76,_,x,I,0,4,FVM), 0 , 184, 0 , 7005 , 331, 131), // #1150 + INST(Vpcmpeqq , VexRvm_Lx , V(660F38,29,_,x,I,1,4,FVM), 0 , 194, 0 , 7014 , 332, 131), // #1151 + INST(Vpcmpeqw , VexRvm_Lx , V(660F00,75,_,x,I,I,4,FV ), 0 , 133, 0 , 7023 , 330, 146), // #1152 + INST(Vpcmpestri , VexRmi , V(660F3A,61,_,0,I,_,_,_ ), 0 , 72 , 0 , 7032 , 333, 150), // #1153 + INST(Vpcmpestrm , VexRmi , V(660F3A,60,_,0,I,_,_,_ ), 0 , 72 , 0 , 7043 , 334, 150), // #1154 + INST(Vpcmpgtb , VexRvm_Lx , V(660F00,64,_,x,I,I,4,FV ), 0 , 133, 0 , 7054 , 330, 146), // #1155 + INST(Vpcmpgtd , VexRvm_Lx , V(660F00,66,_,x,I,0,4,FVM), 0 , 184, 0 , 7063 , 331, 131), // #1156 + INST(Vpcmpgtq , VexRvm_Lx , V(660F38,37,_,x,I,1,4,FVM), 0 , 194, 0 , 7072 , 332, 131), // #1157 + INST(Vpcmpgtw , VexRvm_Lx , V(660F00,65,_,x,I,I,4,FV ), 0 , 133, 0 , 7081 , 330, 146), // #1158 + INST(Vpcmpistri , VexRmi , V(660F3A,63,_,0,I,_,_,_ ), 0 , 72 , 0 , 7090 , 335, 150), // #1159 + INST(Vpcmpistrm , VexRmi , V(660F3A,62,_,0,I,_,_,_ ), 0 , 72 , 0 , 7101 , 336, 150), // #1160 + INST(Vpcmpq , VexRvmi_Lx , E(660F3A,1F,_,x,_,1,4,FV ), 0 , 109, 0 , 7112 , 337, 126), // #1161 + INST(Vpcmpub , VexRvmi_Lx , E(660F3A,3E,_,x,_,0,4,FVM), 0 , 151, 0 , 7119 , 328, 128), // #1162 + INST(Vpcmpud , VexRvmi_Lx , E(660F3A,1E,_,x,_,0,4,FV ), 0 , 108, 0 , 7127 , 329, 126), // #1163 + INST(Vpcmpuq , VexRvmi_Lx , E(660F3A,1E,_,x,_,1,4,FV ), 0 , 109, 0 , 7135 , 337, 126), // #1164 + INST(Vpcmpuw , VexRvmi_Lx , E(660F3A,3E,_,x,_,1,4,FVM), 0 , 195, 0 , 7143 , 337, 128), // #1165 + INST(Vpcmpw , VexRvmi_Lx , E(660F3A,3F,_,x,_,1,4,FVM), 0 , 195, 0 , 7151 , 337, 128), // #1166 + INST(Vpcomb , VexRvmi , V(XOP_M8,CC,_,0,0,_,_,_ ), 0 , 193, 0 , 7158 , 251, 139), // #1167 + INST(Vpcomd , VexRvmi , V(XOP_M8,CE,_,0,0,_,_,_ ), 0 , 193, 0 , 7165 , 251, 139), // #1168 + INST(Vpcompressb , VexMr_Lx , E(660F38,63,_,x,_,0,0,T1S), 0 , 196, 0 , 7172 , 220, 151), // #1169 + INST(Vpcompressd , VexMr_Lx , E(660F38,8B,_,x,_,0,2,T1S), 0 , 125, 0 , 7184 , 220, 126), // #1170 + INST(Vpcompressq , VexMr_Lx , E(660F38,8B,_,x,_,1,3,T1S), 0 , 124, 0 , 7196 , 220, 126), // #1171 + INST(Vpcompressw , VexMr_Lx , E(660F38,63,_,x,_,1,1,T1S), 0 , 197, 0 , 7208 , 220, 151), // #1172 + INST(Vpcomq , VexRvmi , V(XOP_M8,CF,_,0,0,_,_,_ ), 0 , 193, 0 , 7220 , 251, 139), // #1173 + INST(Vpcomub , VexRvmi , V(XOP_M8,EC,_,0,0,_,_,_ ), 0 , 193, 0 , 7227 , 251, 139), // #1174 + INST(Vpcomud , VexRvmi , V(XOP_M8,EE,_,0,0,_,_,_ ), 0 , 193, 0 , 7235 , 251, 139), // #1175 + INST(Vpcomuq , VexRvmi , V(XOP_M8,EF,_,0,0,_,_,_ ), 0 , 193, 0 , 7243 , 251, 139), // #1176 + INST(Vpcomuw , VexRvmi , V(XOP_M8,ED,_,0,0,_,_,_ ), 0 , 193, 0 , 7251 , 251, 139), // #1177 + INST(Vpcomw , VexRvmi , V(XOP_M8,CD,_,0,0,_,_,_ ), 0 , 193, 0 , 7259 , 251, 139), // #1178 + INST(Vpconflictd , VexRm_Lx , E(660F38,C4,_,x,_,0,4,FV ), 0 , 111, 0 , 7266 , 338, 148), // #1179 + INST(Vpconflictq , VexRm_Lx , E(660F38,C4,_,x,_,1,4,FV ), 0 , 112, 0 , 7278 , 338, 148), // #1180 + INST(Vpdpbusd , VexRvm_Lx , E(660F38,50,_,x,_,0,4,FV ), 0 , 111, 0 , 7290 , 203, 152), // #1181 + INST(Vpdpbusds , VexRvm_Lx , E(660F38,51,_,x,_,0,4,FV ), 0 , 111, 0 , 7299 , 203, 152), // #1182 + INST(Vpdpwssd , VexRvm_Lx , E(660F38,52,_,x,_,0,4,FV ), 0 , 111, 0 , 7309 , 203, 152), // #1183 + INST(Vpdpwssds , VexRvm_Lx , E(660F38,53,_,x,_,0,4,FV ), 0 , 111, 0 , 7318 , 203, 152), // #1184 + INST(Vperm2f128 , VexRvmi , V(660F3A,06,_,1,0,_,_,_ ), 0 , 154, 0 , 7328 , 339, 123), // #1185 + INST(Vperm2i128 , VexRvmi , V(660F3A,46,_,1,0,_,_,_ ), 0 , 154, 0 , 7339 , 339, 130), // #1186 + INST(Vpermb , VexRvm_Lx , E(660F38,8D,_,x,_,0,4,FVM), 0 , 110, 0 , 7350 , 202, 153), // #1187 + INST(Vpermd , VexRvm_Lx , V(660F38,36,_,x,0,0,4,FV ), 0 , 163, 0 , 7357 , 340, 140), // #1188 + INST(Vpermi2b , VexRvm_Lx , E(660F38,75,_,x,_,0,4,FVM), 0 , 110, 0 , 7364 , 202, 153), // #1189 + INST(Vpermi2d , VexRvm_Lx , E(660F38,76,_,x,_,0,4,FV ), 0 , 111, 0 , 7373 , 203, 126), // #1190 + INST(Vpermi2pd , VexRvm_Lx , E(660F38,77,_,x,_,1,4,FV ), 0 , 112, 0 , 7382 , 204, 126), // #1191 + INST(Vpermi2ps , VexRvm_Lx , E(660F38,77,_,x,_,0,4,FV ), 0 , 111, 0 , 7392 , 203, 126), // #1192 + INST(Vpermi2q , VexRvm_Lx , E(660F38,76,_,x,_,1,4,FV ), 0 , 112, 0 , 7402 , 204, 126), // #1193 + INST(Vpermi2w , VexRvm_Lx , E(660F38,75,_,x,_,1,4,FVM), 0 , 113, 0 , 7411 , 202, 128), // #1194 + INST(Vpermil2pd , VexRvrmiRvmri_Lx , V(660F3A,49,_,x,x,_,_,_ ), 0 , 72 , 0 , 7420 , 341, 139), // #1195 + INST(Vpermil2ps , VexRvrmiRvmri_Lx , V(660F3A,48,_,x,x,_,_,_ ), 0 , 72 , 0 , 7431 , 341, 139), // #1196 + INST(Vpermilpd , VexRvmRmi_Lx , V(660F38,0D,_,x,0,1,4,FV ), V(660F3A,05,_,x,0,1,4,FV ), 198, 109, 7442 , 342, 121), // #1197 + INST(Vpermilps , VexRvmRmi_Lx , V(660F38,0C,_,x,0,0,4,FV ), V(660F3A,04,_,x,0,0,4,FV ), 163, 110, 7452 , 342, 121), // #1198 + INST(Vpermpd , VexRvmRmi_Lx , E(660F38,16,_,x,1,1,4,FV ), V(660F3A,01,_,x,1,1,4,FV ), 199, 111, 7462 , 343, 140), // #1199 + INST(Vpermps , VexRvm_Lx , V(660F38,16,_,x,0,0,4,FV ), 0 , 163, 0 , 7470 , 340, 140), // #1200 + INST(Vpermq , VexRvmRmi_Lx , V(660F38,36,_,x,_,1,4,FV ), V(660F3A,00,_,x,1,1,4,FV ), 198, 112, 7478 , 343, 140), // #1201 + INST(Vpermt2b , VexRvm_Lx , E(660F38,7D,_,x,_,0,4,FVM), 0 , 110, 0 , 7485 , 202, 153), // #1202 + INST(Vpermt2d , VexRvm_Lx , E(660F38,7E,_,x,_,0,4,FV ), 0 , 111, 0 , 7494 , 203, 126), // #1203 + INST(Vpermt2pd , VexRvm_Lx , E(660F38,7F,_,x,_,1,4,FV ), 0 , 112, 0 , 7503 , 204, 126), // #1204 + INST(Vpermt2ps , VexRvm_Lx , E(660F38,7F,_,x,_,0,4,FV ), 0 , 111, 0 , 7513 , 203, 126), // #1205 + INST(Vpermt2q , VexRvm_Lx , E(660F38,7E,_,x,_,1,4,FV ), 0 , 112, 0 , 7523 , 204, 126), // #1206 + INST(Vpermt2w , VexRvm_Lx , E(660F38,7D,_,x,_,1,4,FVM), 0 , 113, 0 , 7532 , 202, 128), // #1207 + INST(Vpermw , VexRvm_Lx , E(660F38,8D,_,x,_,1,4,FVM), 0 , 113, 0 , 7541 , 202, 128), // #1208 + INST(Vpexpandb , VexRm_Lx , E(660F38,62,_,x,_,0,0,T1S), 0 , 196, 0 , 7548 , 254, 151), // #1209 + INST(Vpexpandd , VexRm_Lx , E(660F38,89,_,x,_,0,2,T1S), 0 , 125, 0 , 7558 , 254, 126), // #1210 + INST(Vpexpandq , VexRm_Lx , E(660F38,89,_,x,_,1,3,T1S), 0 , 124, 0 , 7568 , 254, 126), // #1211 + INST(Vpexpandw , VexRm_Lx , E(660F38,62,_,x,_,1,1,T1S), 0 , 197, 0 , 7578 , 254, 151), // #1212 + INST(Vpextrb , VexMri , V(660F3A,14,_,0,0,I,0,T1S), 0 , 200, 0 , 7588 , 344, 154), // #1213 + INST(Vpextrd , VexMri , V(660F3A,16,_,0,0,0,2,T1S), 0 , 159, 0 , 7596 , 258, 155), // #1214 + INST(Vpextrq , VexMri , V(660F3A,16,_,0,1,1,3,T1S), 0 , 201, 0 , 7604 , 345, 155), // #1215 + INST(Vpextrw , VexMri , V(660F3A,15,_,0,0,I,1,T1S), 0 , 202, 0 , 7612 , 346, 154), // #1216 + INST(Vpgatherdd , VexRmvRm_VM , V(660F38,90,_,x,0,_,_,_ ), V(660F38,90,_,x,_,0,2,T1S), 96 , 113, 7620 , 274, 140), // #1217 + INST(Vpgatherdq , VexRmvRm_VM , V(660F38,90,_,x,1,_,_,_ ), V(660F38,90,_,x,_,1,3,T1S), 165, 114, 7631 , 273, 140), // #1218 + INST(Vpgatherqd , VexRmvRm_VM , V(660F38,91,_,x,0,_,_,_ ), V(660F38,91,_,x,_,0,2,T1S), 96 , 115, 7642 , 279, 140), // #1219 + INST(Vpgatherqq , VexRmvRm_VM , V(660F38,91,_,x,1,_,_,_ ), V(660F38,91,_,x,_,1,3,T1S), 165, 116, 7653 , 278, 140), // #1220 + INST(Vphaddbd , VexRm , V(XOP_M9,C2,_,0,0,_,_,_ ), 0 , 77 , 0 , 7664 , 194, 139), // #1221 + INST(Vphaddbq , VexRm , V(XOP_M9,C3,_,0,0,_,_,_ ), 0 , 77 , 0 , 7673 , 194, 139), // #1222 + INST(Vphaddbw , VexRm , V(XOP_M9,C1,_,0,0,_,_,_ ), 0 , 77 , 0 , 7682 , 194, 139), // #1223 + INST(Vphaddd , VexRvm_Lx , V(660F38,02,_,x,I,_,_,_ ), 0 , 96 , 0 , 7691 , 192, 143), // #1224 + INST(Vphadddq , VexRm , V(XOP_M9,CB,_,0,0,_,_,_ ), 0 , 77 , 0 , 7699 , 194, 139), // #1225 + INST(Vphaddsw , VexRvm_Lx , V(660F38,03,_,x,I,_,_,_ ), 0 , 96 , 0 , 7708 , 192, 143), // #1226 + INST(Vphaddubd , VexRm , V(XOP_M9,D2,_,0,0,_,_,_ ), 0 , 77 , 0 , 7717 , 194, 139), // #1227 + INST(Vphaddubq , VexRm , V(XOP_M9,D3,_,0,0,_,_,_ ), 0 , 77 , 0 , 7727 , 194, 139), // #1228 + INST(Vphaddubw , VexRm , V(XOP_M9,D1,_,0,0,_,_,_ ), 0 , 77 , 0 , 7737 , 194, 139), // #1229 + INST(Vphaddudq , VexRm , V(XOP_M9,DB,_,0,0,_,_,_ ), 0 , 77 , 0 , 7747 , 194, 139), // #1230 + INST(Vphadduwd , VexRm , V(XOP_M9,D6,_,0,0,_,_,_ ), 0 , 77 , 0 , 7757 , 194, 139), // #1231 + INST(Vphadduwq , VexRm , V(XOP_M9,D7,_,0,0,_,_,_ ), 0 , 77 , 0 , 7767 , 194, 139), // #1232 + INST(Vphaddw , VexRvm_Lx , V(660F38,01,_,x,I,_,_,_ ), 0 , 96 , 0 , 7777 , 192, 143), // #1233 + INST(Vphaddwd , VexRm , V(XOP_M9,C6,_,0,0,_,_,_ ), 0 , 77 , 0 , 7785 , 194, 139), // #1234 + INST(Vphaddwq , VexRm , V(XOP_M9,C7,_,0,0,_,_,_ ), 0 , 77 , 0 , 7794 , 194, 139), // #1235 + INST(Vphminposuw , VexRm , V(660F38,41,_,0,I,_,_,_ ), 0 , 96 , 0 , 7803 , 194, 123), // #1236 + INST(Vphsubbw , VexRm , V(XOP_M9,E1,_,0,0,_,_,_ ), 0 , 77 , 0 , 7815 , 194, 139), // #1237 + INST(Vphsubd , VexRvm_Lx , V(660F38,06,_,x,I,_,_,_ ), 0 , 96 , 0 , 7824 , 192, 143), // #1238 + INST(Vphsubdq , VexRm , V(XOP_M9,E3,_,0,0,_,_,_ ), 0 , 77 , 0 , 7832 , 194, 139), // #1239 + INST(Vphsubsw , VexRvm_Lx , V(660F38,07,_,x,I,_,_,_ ), 0 , 96 , 0 , 7841 , 192, 143), // #1240 + INST(Vphsubw , VexRvm_Lx , V(660F38,05,_,x,I,_,_,_ ), 0 , 96 , 0 , 7850 , 192, 143), // #1241 + INST(Vphsubwd , VexRm , V(XOP_M9,E2,_,0,0,_,_,_ ), 0 , 77 , 0 , 7858 , 194, 139), // #1242 + INST(Vpinsrb , VexRvmi , V(660F3A,20,_,0,0,I,0,T1S), 0 , 200, 0 , 7867 , 347, 154), // #1243 + INST(Vpinsrd , VexRvmi , V(660F3A,22,_,0,0,0,2,T1S), 0 , 159, 0 , 7875 , 348, 155), // #1244 + INST(Vpinsrq , VexRvmi , V(660F3A,22,_,0,1,1,3,T1S), 0 , 201, 0 , 7883 , 349, 155), // #1245 + INST(Vpinsrw , VexRvmi , V(660F00,C4,_,0,0,I,1,T1S), 0 , 203, 0 , 7891 , 350, 154), // #1246 + INST(Vplzcntd , VexRm_Lx , E(660F38,44,_,x,_,0,4,FV ), 0 , 111, 0 , 7899 , 338, 148), // #1247 + INST(Vplzcntq , VexRm_Lx , E(660F38,44,_,x,_,1,4,FV ), 0 , 112, 0 , 7908 , 351, 148), // #1248 + INST(Vpmacsdd , VexRvmr , V(XOP_M8,9E,_,0,0,_,_,_ ), 0 , 193, 0 , 7917 , 352, 139), // #1249 + INST(Vpmacsdqh , VexRvmr , V(XOP_M8,9F,_,0,0,_,_,_ ), 0 , 193, 0 , 7926 , 352, 139), // #1250 + INST(Vpmacsdql , VexRvmr , V(XOP_M8,97,_,0,0,_,_,_ ), 0 , 193, 0 , 7936 , 352, 139), // #1251 + INST(Vpmacssdd , VexRvmr , V(XOP_M8,8E,_,0,0,_,_,_ ), 0 , 193, 0 , 7946 , 352, 139), // #1252 + INST(Vpmacssdqh , VexRvmr , V(XOP_M8,8F,_,0,0,_,_,_ ), 0 , 193, 0 , 7956 , 352, 139), // #1253 + INST(Vpmacssdql , VexRvmr , V(XOP_M8,87,_,0,0,_,_,_ ), 0 , 193, 0 , 7967 , 352, 139), // #1254 + INST(Vpmacsswd , VexRvmr , V(XOP_M8,86,_,0,0,_,_,_ ), 0 , 193, 0 , 7978 , 352, 139), // #1255 + INST(Vpmacssww , VexRvmr , V(XOP_M8,85,_,0,0,_,_,_ ), 0 , 193, 0 , 7988 , 352, 139), // #1256 + INST(Vpmacswd , VexRvmr , V(XOP_M8,96,_,0,0,_,_,_ ), 0 , 193, 0 , 7998 , 352, 139), // #1257 + INST(Vpmacsww , VexRvmr , V(XOP_M8,95,_,0,0,_,_,_ ), 0 , 193, 0 , 8007 , 352, 139), // #1258 + INST(Vpmadcsswd , VexRvmr , V(XOP_M8,A6,_,0,0,_,_,_ ), 0 , 193, 0 , 8016 , 352, 139), // #1259 + INST(Vpmadcswd , VexRvmr , V(XOP_M8,B6,_,0,0,_,_,_ ), 0 , 193, 0 , 8027 , 352, 139), // #1260 + INST(Vpmadd52huq , VexRvm_Lx , E(660F38,B5,_,x,_,1,4,FV ), 0 , 112, 0 , 8037 , 204, 156), // #1261 + INST(Vpmadd52luq , VexRvm_Lx , E(660F38,B4,_,x,_,1,4,FV ), 0 , 112, 0 , 8049 , 204, 156), // #1262 + INST(Vpmaddubsw , VexRvm_Lx , V(660F38,04,_,x,I,I,4,FVM), 0 , 107, 0 , 8061 , 285, 146), // #1263 + INST(Vpmaddwd , VexRvm_Lx , V(660F00,F5,_,x,I,I,4,FVM), 0 , 184, 0 , 8072 , 285, 146), // #1264 + INST(Vpmaskmovd , VexRvmMvr_Lx , V(660F38,8C,_,x,0,_,_,_ ), V(660F38,8E,_,x,0,_,_,_ ), 96 , 117, 8081 , 293, 130), // #1265 + INST(Vpmaskmovq , VexRvmMvr_Lx , V(660F38,8C,_,x,1,_,_,_ ), V(660F38,8E,_,x,1,_,_,_ ), 165, 118, 8092 , 293, 130), // #1266 + INST(Vpmaxsb , VexRvm_Lx , V(660F38,3C,_,x,I,I,4,FVM), 0 , 107, 0 , 8103 , 353, 146), // #1267 + INST(Vpmaxsd , VexRvm_Lx , V(660F38,3D,_,x,I,0,4,FV ), 0 , 163, 0 , 8111 , 201, 131), // #1268 + INST(Vpmaxsq , VexRvm_Lx , E(660F38,3D,_,x,_,1,4,FV ), 0 , 112, 0 , 8119 , 204, 126), // #1269 + INST(Vpmaxsw , VexRvm_Lx , V(660F00,EE,_,x,I,I,4,FVM), 0 , 184, 0 , 8127 , 353, 146), // #1270 + INST(Vpmaxub , VexRvm_Lx , V(660F00,DE,_,x,I,I,4,FVM), 0 , 184, 0 , 8135 , 353, 146), // #1271 + INST(Vpmaxud , VexRvm_Lx , V(660F38,3F,_,x,I,0,4,FV ), 0 , 163, 0 , 8143 , 201, 131), // #1272 + INST(Vpmaxuq , VexRvm_Lx , E(660F38,3F,_,x,_,1,4,FV ), 0 , 112, 0 , 8151 , 204, 126), // #1273 + INST(Vpmaxuw , VexRvm_Lx , V(660F38,3E,_,x,I,I,4,FVM), 0 , 107, 0 , 8159 , 353, 146), // #1274 + INST(Vpminsb , VexRvm_Lx , V(660F38,38,_,x,I,I,4,FVM), 0 , 107, 0 , 8167 , 353, 146), // #1275 + INST(Vpminsd , VexRvm_Lx , V(660F38,39,_,x,I,0,4,FV ), 0 , 163, 0 , 8175 , 201, 131), // #1276 + INST(Vpminsq , VexRvm_Lx , E(660F38,39,_,x,_,1,4,FV ), 0 , 112, 0 , 8183 , 204, 126), // #1277 + INST(Vpminsw , VexRvm_Lx , V(660F00,EA,_,x,I,I,4,FVM), 0 , 184, 0 , 8191 , 353, 146), // #1278 + INST(Vpminub , VexRvm_Lx , V(660F00,DA,_,x,I,_,4,FVM), 0 , 184, 0 , 8199 , 353, 146), // #1279 + INST(Vpminud , VexRvm_Lx , V(660F38,3B,_,x,I,0,4,FV ), 0 , 163, 0 , 8207 , 201, 131), // #1280 + INST(Vpminuq , VexRvm_Lx , E(660F38,3B,_,x,_,1,4,FV ), 0 , 112, 0 , 8215 , 204, 126), // #1281 + INST(Vpminuw , VexRvm_Lx , V(660F38,3A,_,x,I,_,4,FVM), 0 , 107, 0 , 8223 , 353, 146), // #1282 + INST(Vpmovb2m , VexRm_Lx , E(F30F38,29,_,x,_,0,_,_ ), 0 , 128, 0 , 8231 , 354, 128), // #1283 + INST(Vpmovd2m , VexRm_Lx , E(F30F38,39,_,x,_,0,_,_ ), 0 , 128, 0 , 8240 , 354, 129), // #1284 + INST(Vpmovdb , VexMr_Lx , E(F30F38,31,_,x,_,0,2,QVM), 0 , 204, 0 , 8249 , 355, 126), // #1285 + INST(Vpmovdw , VexMr_Lx , E(F30F38,33,_,x,_,0,3,HVM), 0 , 205, 0 , 8257 , 356, 126), // #1286 + INST(Vpmovm2b , VexRm_Lx , E(F30F38,28,_,x,_,0,_,_ ), 0 , 128, 0 , 8265 , 324, 128), // #1287 + INST(Vpmovm2d , VexRm_Lx , E(F30F38,38,_,x,_,0,_,_ ), 0 , 128, 0 , 8274 , 324, 129), // #1288 + INST(Vpmovm2q , VexRm_Lx , E(F30F38,38,_,x,_,1,_,_ ), 0 , 191, 0 , 8283 , 324, 129), // #1289 + INST(Vpmovm2w , VexRm_Lx , E(F30F38,28,_,x,_,1,_,_ ), 0 , 191, 0 , 8292 , 324, 128), // #1290 + INST(Vpmovmskb , VexRm_Lx , V(660F00,D7,_,x,I,_,_,_ ), 0 , 68 , 0 , 8301 , 305, 143), // #1291 + INST(Vpmovq2m , VexRm_Lx , E(F30F38,39,_,x,_,1,_,_ ), 0 , 191, 0 , 8311 , 354, 129), // #1292 + INST(Vpmovqb , VexMr_Lx , E(F30F38,32,_,x,_,0,1,OVM), 0 , 206, 0 , 8320 , 357, 126), // #1293 + INST(Vpmovqd , VexMr_Lx , E(F30F38,35,_,x,_,0,3,HVM), 0 , 205, 0 , 8328 , 356, 126), // #1294 + INST(Vpmovqw , VexMr_Lx , E(F30F38,34,_,x,_,0,2,QVM), 0 , 204, 0 , 8336 , 355, 126), // #1295 + INST(Vpmovsdb , VexMr_Lx , E(F30F38,21,_,x,_,0,2,QVM), 0 , 204, 0 , 8344 , 355, 126), // #1296 + INST(Vpmovsdw , VexMr_Lx , E(F30F38,23,_,x,_,0,3,HVM), 0 , 205, 0 , 8353 , 356, 126), // #1297 + INST(Vpmovsqb , VexMr_Lx , E(F30F38,22,_,x,_,0,1,OVM), 0 , 206, 0 , 8362 , 357, 126), // #1298 + INST(Vpmovsqd , VexMr_Lx , E(F30F38,25,_,x,_,0,3,HVM), 0 , 205, 0 , 8371 , 356, 126), // #1299 + INST(Vpmovsqw , VexMr_Lx , E(F30F38,24,_,x,_,0,2,QVM), 0 , 204, 0 , 8380 , 355, 126), // #1300 + INST(Vpmovswb , VexMr_Lx , E(F30F38,20,_,x,_,0,3,HVM), 0 , 205, 0 , 8389 , 356, 128), // #1301 + INST(Vpmovsxbd , VexRm_Lx , V(660F38,21,_,x,I,I,2,QVM), 0 , 207, 0 , 8398 , 358, 131), // #1302 + INST(Vpmovsxbq , VexRm_Lx , V(660F38,22,_,x,I,I,1,OVM), 0 , 208, 0 , 8408 , 359, 131), // #1303 + INST(Vpmovsxbw , VexRm_Lx , V(660F38,20,_,x,I,I,3,HVM), 0 , 132, 0 , 8418 , 360, 146), // #1304 + INST(Vpmovsxdq , VexRm_Lx , V(660F38,25,_,x,I,0,3,HVM), 0 , 132, 0 , 8428 , 360, 131), // #1305 + INST(Vpmovsxwd , VexRm_Lx , V(660F38,23,_,x,I,I,3,HVM), 0 , 132, 0 , 8438 , 360, 131), // #1306 + INST(Vpmovsxwq , VexRm_Lx , V(660F38,24,_,x,I,I,2,QVM), 0 , 207, 0 , 8448 , 358, 131), // #1307 + INST(Vpmovusdb , VexMr_Lx , E(F30F38,11,_,x,_,0,2,QVM), 0 , 204, 0 , 8458 , 355, 126), // #1308 + INST(Vpmovusdw , VexMr_Lx , E(F30F38,13,_,x,_,0,3,HVM), 0 , 205, 0 , 8468 , 356, 126), // #1309 + INST(Vpmovusqb , VexMr_Lx , E(F30F38,12,_,x,_,0,1,OVM), 0 , 206, 0 , 8478 , 357, 126), // #1310 + INST(Vpmovusqd , VexMr_Lx , E(F30F38,15,_,x,_,0,3,HVM), 0 , 205, 0 , 8488 , 356, 126), // #1311 + INST(Vpmovusqw , VexMr_Lx , E(F30F38,14,_,x,_,0,2,QVM), 0 , 204, 0 , 8498 , 355, 126), // #1312 + INST(Vpmovuswb , VexMr_Lx , E(F30F38,10,_,x,_,0,3,HVM), 0 , 205, 0 , 8508 , 356, 128), // #1313 + INST(Vpmovw2m , VexRm_Lx , E(F30F38,29,_,x,_,1,_,_ ), 0 , 191, 0 , 8518 , 354, 128), // #1314 + INST(Vpmovwb , VexMr_Lx , E(F30F38,30,_,x,_,0,3,HVM), 0 , 205, 0 , 8527 , 356, 128), // #1315 + INST(Vpmovzxbd , VexRm_Lx , V(660F38,31,_,x,I,I,2,QVM), 0 , 207, 0 , 8535 , 358, 131), // #1316 + INST(Vpmovzxbq , VexRm_Lx , V(660F38,32,_,x,I,I,1,OVM), 0 , 208, 0 , 8545 , 359, 131), // #1317 + INST(Vpmovzxbw , VexRm_Lx , V(660F38,30,_,x,I,I,3,HVM), 0 , 132, 0 , 8555 , 360, 146), // #1318 + INST(Vpmovzxdq , VexRm_Lx , V(660F38,35,_,x,I,0,3,HVM), 0 , 132, 0 , 8565 , 360, 131), // #1319 + INST(Vpmovzxwd , VexRm_Lx , V(660F38,33,_,x,I,I,3,HVM), 0 , 132, 0 , 8575 , 360, 131), // #1320 + INST(Vpmovzxwq , VexRm_Lx , V(660F38,34,_,x,I,I,2,QVM), 0 , 207, 0 , 8585 , 358, 131), // #1321 + INST(Vpmuldq , VexRvm_Lx , V(660F38,28,_,x,I,1,4,FV ), 0 , 198, 0 , 8595 , 198, 131), // #1322 + INST(Vpmulhrsw , VexRvm_Lx , V(660F38,0B,_,x,I,I,4,FVM), 0 , 107, 0 , 8603 , 285, 146), // #1323 + INST(Vpmulhuw , VexRvm_Lx , V(660F00,E4,_,x,I,I,4,FVM), 0 , 184, 0 , 8613 , 285, 146), // #1324 + INST(Vpmulhw , VexRvm_Lx , V(660F00,E5,_,x,I,I,4,FVM), 0 , 184, 0 , 8622 , 285, 146), // #1325 + INST(Vpmulld , VexRvm_Lx , V(660F38,40,_,x,I,0,4,FV ), 0 , 163, 0 , 8630 , 199, 131), // #1326 + INST(Vpmullq , VexRvm_Lx , E(660F38,40,_,x,_,1,4,FV ), 0 , 112, 0 , 8638 , 204, 129), // #1327 + INST(Vpmullw , VexRvm_Lx , V(660F00,D5,_,x,I,I,4,FVM), 0 , 184, 0 , 8646 , 285, 146), // #1328 + INST(Vpmultishiftqb , VexRvm_Lx , E(660F38,83,_,x,_,1,4,FV ), 0 , 112, 0 , 8654 , 204, 153), // #1329 + INST(Vpmuludq , VexRvm_Lx , V(660F00,F4,_,x,I,1,4,FV ), 0 , 102, 0 , 8669 , 198, 131), // #1330 + INST(Vpopcntb , VexRm_Lx , E(660F38,54,_,x,_,0,4,FV ), 0 , 111, 0 , 8678 , 254, 157), // #1331 + INST(Vpopcntd , VexRm_Lx , E(660F38,55,_,x,_,0,4,FVM), 0 , 110, 0 , 8687 , 338, 158), // #1332 + INST(Vpopcntq , VexRm_Lx , E(660F38,55,_,x,_,1,4,FVM), 0 , 113, 0 , 8696 , 351, 158), // #1333 + INST(Vpopcntw , VexRm_Lx , E(660F38,54,_,x,_,1,4,FV ), 0 , 112, 0 , 8705 , 254, 157), // #1334 + INST(Vpor , VexRvm_Lx , V(660F00,EB,_,x,I,_,_,_ ), 0 , 68 , 0 , 8714 , 316, 143), // #1335 + INST(Vpord , VexRvm_Lx , E(660F00,EB,_,x,_,0,4,FV ), 0 , 189, 0 , 8719 , 317, 126), // #1336 + INST(Vporq , VexRvm_Lx , E(660F00,EB,_,x,_,1,4,FV ), 0 , 130, 0 , 8725 , 321, 126), // #1337 + INST(Vpperm , VexRvrmRvmr , V(XOP_M8,A3,_,0,x,_,_,_ ), 0 , 193, 0 , 8731 , 361, 139), // #1338 + INST(Vprold , VexVmi_Lx , E(660F00,72,1,x,_,0,4,FV ), 0 , 209, 0 , 8738 , 362, 126), // #1339 + INST(Vprolq , VexVmi_Lx , E(660F00,72,1,x,_,1,4,FV ), 0 , 210, 0 , 8745 , 363, 126), // #1340 + INST(Vprolvd , VexRvm_Lx , E(660F38,15,_,x,_,0,4,FV ), 0 , 111, 0 , 8752 , 203, 126), // #1341 + INST(Vprolvq , VexRvm_Lx , E(660F38,15,_,x,_,1,4,FV ), 0 , 112, 0 , 8760 , 204, 126), // #1342 + INST(Vprord , VexVmi_Lx , E(660F00,72,0,x,_,0,4,FV ), 0 , 189, 0 , 8768 , 362, 126), // #1343 + INST(Vprorq , VexVmi_Lx , E(660F00,72,0,x,_,1,4,FV ), 0 , 130, 0 , 8775 , 363, 126), // #1344 + INST(Vprorvd , VexRvm_Lx , E(660F38,14,_,x,_,0,4,FV ), 0 , 111, 0 , 8782 , 203, 126), // #1345 + INST(Vprorvq , VexRvm_Lx , E(660F38,14,_,x,_,1,4,FV ), 0 , 112, 0 , 8790 , 204, 126), // #1346 + INST(Vprotb , VexRvmRmvRmi , V(XOP_M9,90,_,0,x,_,_,_ ), V(XOP_M8,C0,_,0,x,_,_,_ ), 77 , 119, 8798 , 364, 139), // #1347 + INST(Vprotd , VexRvmRmvRmi , V(XOP_M9,92,_,0,x,_,_,_ ), V(XOP_M8,C2,_,0,x,_,_,_ ), 77 , 120, 8805 , 364, 139), // #1348 + INST(Vprotq , VexRvmRmvRmi , V(XOP_M9,93,_,0,x,_,_,_ ), V(XOP_M8,C3,_,0,x,_,_,_ ), 77 , 121, 8812 , 364, 139), // #1349 + INST(Vprotw , VexRvmRmvRmi , V(XOP_M9,91,_,0,x,_,_,_ ), V(XOP_M8,C1,_,0,x,_,_,_ ), 77 , 122, 8819 , 364, 139), // #1350 + INST(Vpsadbw , VexRvm_Lx , V(660F00,F6,_,x,I,I,4,FVM), 0 , 184, 0 , 8826 , 193, 146), // #1351 + INST(Vpscatterdd , VexMr_VM , E(660F38,A0,_,x,_,0,2,T1S), 0 , 125, 0 , 8834 , 365, 126), // #1352 + INST(Vpscatterdq , VexMr_VM , E(660F38,A0,_,x,_,1,3,T1S), 0 , 124, 0 , 8846 , 365, 126), // #1353 + INST(Vpscatterqd , VexMr_VM , E(660F38,A1,_,x,_,0,2,T1S), 0 , 125, 0 , 8858 , 366, 126), // #1354 + INST(Vpscatterqq , VexMr_VM , E(660F38,A1,_,x,_,1,3,T1S), 0 , 124, 0 , 8870 , 367, 126), // #1355 + INST(Vpshab , VexRvmRmv , V(XOP_M9,98,_,0,x,_,_,_ ), 0 , 77 , 0 , 8882 , 368, 139), // #1356 + INST(Vpshad , VexRvmRmv , V(XOP_M9,9A,_,0,x,_,_,_ ), 0 , 77 , 0 , 8889 , 368, 139), // #1357 + INST(Vpshaq , VexRvmRmv , V(XOP_M9,9B,_,0,x,_,_,_ ), 0 , 77 , 0 , 8896 , 368, 139), // #1358 + INST(Vpshaw , VexRvmRmv , V(XOP_M9,99,_,0,x,_,_,_ ), 0 , 77 , 0 , 8903 , 368, 139), // #1359 + INST(Vpshlb , VexRvmRmv , V(XOP_M9,94,_,0,x,_,_,_ ), 0 , 77 , 0 , 8910 , 368, 139), // #1360 + INST(Vpshld , VexRvmRmv , V(XOP_M9,96,_,0,x,_,_,_ ), 0 , 77 , 0 , 8917 , 368, 139), // #1361 + INST(Vpshldd , VexRvmi_Lx , E(660F3A,71,_,x,_,0,4,FV ), 0 , 108, 0 , 8924 , 196, 151), // #1362 + INST(Vpshldq , VexRvmi_Lx , E(660F3A,71,_,x,_,1,4,FV ), 0 , 109, 0 , 8932 , 197, 151), // #1363 + INST(Vpshldvd , VexRvm_Lx , E(660F38,71,_,x,_,0,4,FV ), 0 , 111, 0 , 8940 , 203, 151), // #1364 + INST(Vpshldvq , VexRvm_Lx , E(660F38,71,_,x,_,1,4,FV ), 0 , 112, 0 , 8949 , 204, 151), // #1365 + INST(Vpshldvw , VexRvm_Lx , E(660F38,70,_,x,_,0,4,FVM), 0 , 110, 0 , 8958 , 202, 151), // #1366 + INST(Vpshldw , VexRvmi_Lx , E(660F3A,70,_,x,_,0,4,FVM), 0 , 151, 0 , 8967 , 250, 151), // #1367 + INST(Vpshlq , VexRvmRmv , V(XOP_M9,97,_,0,x,_,_,_ ), 0 , 77 , 0 , 8975 , 368, 139), // #1368 + INST(Vpshlw , VexRvmRmv , V(XOP_M9,95,_,0,x,_,_,_ ), 0 , 77 , 0 , 8982 , 368, 139), // #1369 + INST(Vpshrdd , VexRvmi_Lx , E(660F3A,73,_,x,_,0,4,FV ), 0 , 108, 0 , 8989 , 196, 151), // #1370 + INST(Vpshrdq , VexRvmi_Lx , E(660F3A,73,_,x,_,1,4,FV ), 0 , 109, 0 , 8997 , 197, 151), // #1371 + INST(Vpshrdvd , VexRvm_Lx , E(660F38,73,_,x,_,0,4,FV ), 0 , 111, 0 , 9005 , 203, 151), // #1372 + INST(Vpshrdvq , VexRvm_Lx , E(660F38,73,_,x,_,1,4,FV ), 0 , 112, 0 , 9014 , 204, 151), // #1373 + INST(Vpshrdvw , VexRvm_Lx , E(660F38,72,_,x,_,0,4,FVM), 0 , 110, 0 , 9023 , 202, 151), // #1374 + INST(Vpshrdw , VexRvmi_Lx , E(660F3A,72,_,x,_,0,4,FVM), 0 , 151, 0 , 9032 , 250, 151), // #1375 + INST(Vpshufb , VexRvm_Lx , V(660F38,00,_,x,I,I,4,FVM), 0 , 107, 0 , 9040 , 285, 146), // #1376 + INST(Vpshufbitqmb , VexRvm_Lx , E(660F38,8F,_,x,0,0,4,FVM), 0 , 110, 0 , 9048 , 369, 157), // #1377 + INST(Vpshufd , VexRmi_Lx , V(660F00,70,_,x,I,0,4,FV ), 0 , 133, 0 , 9061 , 370, 131), // #1378 + INST(Vpshufhw , VexRmi_Lx , V(F30F00,70,_,x,I,I,4,FVM), 0 , 185, 0 , 9069 , 371, 146), // #1379 + INST(Vpshuflw , VexRmi_Lx , V(F20F00,70,_,x,I,I,4,FVM), 0 , 211, 0 , 9078 , 371, 146), // #1380 + INST(Vpsignb , VexRvm_Lx , V(660F38,08,_,x,I,_,_,_ ), 0 , 96 , 0 , 9087 , 192, 143), // #1381 + INST(Vpsignd , VexRvm_Lx , V(660F38,0A,_,x,I,_,_,_ ), 0 , 96 , 0 , 9095 , 192, 143), // #1382 + INST(Vpsignw , VexRvm_Lx , V(660F38,09,_,x,I,_,_,_ ), 0 , 96 , 0 , 9103 , 192, 143), // #1383 + INST(Vpslld , VexRvmVmi_Lx , V(660F00,F2,_,x,I,0,4,128), V(660F00,72,6,x,I,0,4,FV ), 212, 123, 9111 , 372, 131), // #1384 + INST(Vpslldq , VexEvexVmi_Lx , V(660F00,73,7,x,I,I,4,FVM), 0 , 213, 0 , 9118 , 373, 146), // #1385 + INST(Vpsllq , VexRvmVmi_Lx , V(660F00,F3,_,x,I,1,4,128), V(660F00,73,6,x,I,1,4,FV ), 214, 124, 9126 , 374, 131), // #1386 + INST(Vpsllvd , VexRvm_Lx , V(660F38,47,_,x,0,0,4,FV ), 0 , 163, 0 , 9133 , 199, 140), // #1387 + INST(Vpsllvq , VexRvm_Lx , V(660F38,47,_,x,1,1,4,FV ), 0 , 162, 0 , 9141 , 198, 140), // #1388 + INST(Vpsllvw , VexRvm_Lx , E(660F38,12,_,x,_,1,4,FVM), 0 , 113, 0 , 9149 , 202, 128), // #1389 + INST(Vpsllw , VexRvmVmi_Lx , V(660F00,F1,_,x,I,I,4,FVM), V(660F00,71,6,x,I,I,4,FVM), 184, 125, 9157 , 375, 146), // #1390 + INST(Vpsrad , VexRvmVmi_Lx , V(660F00,E2,_,x,I,0,4,128), V(660F00,72,4,x,I,0,4,FV ), 212, 126, 9164 , 372, 131), // #1391 + INST(Vpsraq , VexRvmVmi_Lx , E(660F00,E2,_,x,_,1,4,128), E(660F00,72,4,x,_,1,4,FV ), 215, 127, 9171 , 376, 126), // #1392 + INST(Vpsravd , VexRvm_Lx , V(660F38,46,_,x,0,0,4,FV ), 0 , 163, 0 , 9178 , 199, 140), // #1393 + INST(Vpsravq , VexRvm_Lx , E(660F38,46,_,x,_,1,4,FV ), 0 , 112, 0 , 9186 , 204, 126), // #1394 + INST(Vpsravw , VexRvm_Lx , E(660F38,11,_,x,_,1,4,FVM), 0 , 113, 0 , 9194 , 202, 128), // #1395 + INST(Vpsraw , VexRvmVmi_Lx , V(660F00,E1,_,x,I,I,4,128), V(660F00,71,4,x,I,I,4,FVM), 212, 128, 9202 , 375, 146), // #1396 + INST(Vpsrld , VexRvmVmi_Lx , V(660F00,D2,_,x,I,0,4,128), V(660F00,72,2,x,I,0,4,FV ), 212, 129, 9209 , 372, 131), // #1397 + INST(Vpsrldq , VexEvexVmi_Lx , V(660F00,73,3,x,I,I,4,FVM), 0 , 216, 0 , 9216 , 373, 146), // #1398 + INST(Vpsrlq , VexRvmVmi_Lx , V(660F00,D3,_,x,I,1,4,128), V(660F00,73,2,x,I,1,4,FV ), 214, 130, 9224 , 374, 131), // #1399 + INST(Vpsrlvd , VexRvm_Lx , V(660F38,45,_,x,0,0,4,FV ), 0 , 163, 0 , 9231 , 199, 140), // #1400 + INST(Vpsrlvq , VexRvm_Lx , V(660F38,45,_,x,1,1,4,FV ), 0 , 162, 0 , 9239 , 198, 140), // #1401 + INST(Vpsrlvw , VexRvm_Lx , E(660F38,10,_,x,_,1,4,FVM), 0 , 113, 0 , 9247 , 202, 128), // #1402 + INST(Vpsrlw , VexRvmVmi_Lx , V(660F00,D1,_,x,I,I,4,128), V(660F00,71,2,x,I,I,4,FVM), 212, 131, 9255 , 375, 146), // #1403 + INST(Vpsubb , VexRvm_Lx , V(660F00,F8,_,x,I,I,4,FVM), 0 , 184, 0 , 9262 , 377, 146), // #1404 + INST(Vpsubd , VexRvm_Lx , V(660F00,FA,_,x,I,0,4,FV ), 0 , 133, 0 , 9269 , 378, 131), // #1405 + INST(Vpsubq , VexRvm_Lx , V(660F00,FB,_,x,I,1,4,FV ), 0 , 102, 0 , 9276 , 379, 131), // #1406 + INST(Vpsubsb , VexRvm_Lx , V(660F00,E8,_,x,I,I,4,FVM), 0 , 184, 0 , 9283 , 377, 146), // #1407 + INST(Vpsubsw , VexRvm_Lx , V(660F00,E9,_,x,I,I,4,FVM), 0 , 184, 0 , 9291 , 377, 146), // #1408 + INST(Vpsubusb , VexRvm_Lx , V(660F00,D8,_,x,I,I,4,FVM), 0 , 184, 0 , 9299 , 377, 146), // #1409 + INST(Vpsubusw , VexRvm_Lx , V(660F00,D9,_,x,I,I,4,FVM), 0 , 184, 0 , 9308 , 377, 146), // #1410 + INST(Vpsubw , VexRvm_Lx , V(660F00,F9,_,x,I,I,4,FVM), 0 , 184, 0 , 9317 , 377, 146), // #1411 + INST(Vpternlogd , VexRvmi_Lx , E(660F3A,25,_,x,_,0,4,FV ), 0 , 108, 0 , 9324 , 196, 126), // #1412 + INST(Vpternlogq , VexRvmi_Lx , E(660F3A,25,_,x,_,1,4,FV ), 0 , 109, 0 , 9335 , 197, 126), // #1413 + INST(Vptest , VexRm_Lx , V(660F38,17,_,x,I,_,_,_ ), 0 , 96 , 0 , 9346 , 270, 150), // #1414 + INST(Vptestmb , VexRvm_Lx , E(660F38,26,_,x,_,0,4,FVM), 0 , 110, 0 , 9353 , 369, 128), // #1415 + INST(Vptestmd , VexRvm_Lx , E(660F38,27,_,x,_,0,4,FV ), 0 , 111, 0 , 9362 , 380, 126), // #1416 + INST(Vptestmq , VexRvm_Lx , E(660F38,27,_,x,_,1,4,FV ), 0 , 112, 0 , 9371 , 381, 126), // #1417 + INST(Vptestmw , VexRvm_Lx , E(660F38,26,_,x,_,1,4,FVM), 0 , 113, 0 , 9380 , 369, 128), // #1418 + INST(Vptestnmb , VexRvm_Lx , E(F30F38,26,_,x,_,0,4,FVM), 0 , 217, 0 , 9389 , 369, 128), // #1419 + INST(Vptestnmd , VexRvm_Lx , E(F30F38,27,_,x,_,0,4,FV ), 0 , 218, 0 , 9399 , 380, 126), // #1420 + INST(Vptestnmq , VexRvm_Lx , E(F30F38,27,_,x,_,1,4,FV ), 0 , 219, 0 , 9409 , 381, 126), // #1421 + INST(Vptestnmw , VexRvm_Lx , E(F30F38,26,_,x,_,1,4,FVM), 0 , 220, 0 , 9419 , 369, 128), // #1422 + INST(Vpunpckhbw , VexRvm_Lx , V(660F00,68,_,x,I,I,4,FVM), 0 , 184, 0 , 9429 , 285, 146), // #1423 + INST(Vpunpckhdq , VexRvm_Lx , V(660F00,6A,_,x,I,0,4,FV ), 0 , 133, 0 , 9440 , 199, 131), // #1424 + INST(Vpunpckhqdq , VexRvm_Lx , V(660F00,6D,_,x,I,1,4,FV ), 0 , 102, 0 , 9451 , 198, 131), // #1425 + INST(Vpunpckhwd , VexRvm_Lx , V(660F00,69,_,x,I,I,4,FVM), 0 , 184, 0 , 9463 , 285, 146), // #1426 + INST(Vpunpcklbw , VexRvm_Lx , V(660F00,60,_,x,I,I,4,FVM), 0 , 184, 0 , 9474 , 285, 146), // #1427 + INST(Vpunpckldq , VexRvm_Lx , V(660F00,62,_,x,I,0,4,FV ), 0 , 133, 0 , 9485 , 199, 131), // #1428 + INST(Vpunpcklqdq , VexRvm_Lx , V(660F00,6C,_,x,I,1,4,FV ), 0 , 102, 0 , 9496 , 198, 131), // #1429 + INST(Vpunpcklwd , VexRvm_Lx , V(660F00,61,_,x,I,I,4,FVM), 0 , 184, 0 , 9508 , 285, 146), // #1430 + INST(Vpxor , VexRvm_Lx , V(660F00,EF,_,x,I,_,_,_ ), 0 , 68 , 0 , 9519 , 318, 143), // #1431 + INST(Vpxord , VexRvm_Lx , E(660F00,EF,_,x,_,0,4,FV ), 0 , 189, 0 , 9525 , 319, 126), // #1432 + INST(Vpxorq , VexRvm_Lx , E(660F00,EF,_,x,_,1,4,FV ), 0 , 130, 0 , 9532 , 320, 126), // #1433 + INST(Vrangepd , VexRvmi_Lx , E(660F3A,50,_,x,_,1,4,FV ), 0 , 109, 0 , 9539 , 259, 129), // #1434 + INST(Vrangeps , VexRvmi_Lx , E(660F3A,50,_,x,_,0,4,FV ), 0 , 108, 0 , 9548 , 260, 129), // #1435 + INST(Vrangesd , VexRvmi , E(660F3A,51,_,I,_,1,3,T1S), 0 , 160, 0 , 9557 , 261, 64 ), // #1436 + INST(Vrangess , VexRvmi , E(660F3A,51,_,I,_,0,2,T1S), 0 , 161, 0 , 9566 , 262, 64 ), // #1437 + INST(Vrcp14pd , VexRm_Lx , E(660F38,4C,_,x,_,1,4,FV ), 0 , 112, 0 , 9575 , 351, 126), // #1438 + INST(Vrcp14ps , VexRm_Lx , E(660F38,4C,_,x,_,0,4,FV ), 0 , 111, 0 , 9584 , 338, 126), // #1439 + INST(Vrcp14sd , VexRvm , E(660F38,4D,_,I,_,1,3,T1S), 0 , 124, 0 , 9593 , 382, 66 ), // #1440 + INST(Vrcp14ss , VexRvm , E(660F38,4D,_,I,_,0,2,T1S), 0 , 125, 0 , 9602 , 383, 66 ), // #1441 + INST(Vrcp28pd , VexRm , E(660F38,CA,_,2,_,1,4,FV ), 0 , 152, 0 , 9611 , 252, 135), // #1442 + INST(Vrcp28ps , VexRm , E(660F38,CA,_,2,_,0,4,FV ), 0 , 153, 0 , 9620 , 253, 135), // #1443 + INST(Vrcp28sd , VexRvm , E(660F38,CB,_,I,_,1,3,T1S), 0 , 124, 0 , 9629 , 280, 135), // #1444 + INST(Vrcp28ss , VexRvm , E(660F38,CB,_,I,_,0,2,T1S), 0 , 125, 0 , 9638 , 281, 135), // #1445 + INST(Vrcpps , VexRm_Lx , V(000F00,53,_,x,I,_,_,_ ), 0 , 71 , 0 , 9647 , 270, 123), // #1446 + INST(Vrcpss , VexRvm , V(F30F00,53,_,I,I,_,_,_ ), 0 , 178, 0 , 9654 , 384, 123), // #1447 + INST(Vreducepd , VexRmi_Lx , E(660F3A,56,_,x,_,1,4,FV ), 0 , 109, 0 , 9661 , 363, 129), // #1448 + INST(Vreduceps , VexRmi_Lx , E(660F3A,56,_,x,_,0,4,FV ), 0 , 108, 0 , 9671 , 362, 129), // #1449 + INST(Vreducesd , VexRvmi , E(660F3A,57,_,I,_,1,3,T1S), 0 , 160, 0 , 9681 , 385, 64 ), // #1450 + INST(Vreducess , VexRvmi , E(660F3A,57,_,I,_,0,2,T1S), 0 , 161, 0 , 9691 , 386, 64 ), // #1451 + INST(Vrndscalepd , VexRmi_Lx , E(660F3A,09,_,x,_,1,4,FV ), 0 , 109, 0 , 9701 , 282, 126), // #1452 + INST(Vrndscaleps , VexRmi_Lx , E(660F3A,08,_,x,_,0,4,FV ), 0 , 108, 0 , 9713 , 283, 126), // #1453 + INST(Vrndscalesd , VexRvmi , E(660F3A,0B,_,I,_,1,3,T1S), 0 , 160, 0 , 9725 , 261, 66 ), // #1454 + INST(Vrndscaless , VexRvmi , E(660F3A,0A,_,I,_,0,2,T1S), 0 , 161, 0 , 9737 , 262, 66 ), // #1455 + INST(Vroundpd , VexRmi_Lx , V(660F3A,09,_,x,I,_,_,_ ), 0 , 72 , 0 , 9749 , 387, 123), // #1456 + INST(Vroundps , VexRmi_Lx , V(660F3A,08,_,x,I,_,_,_ ), 0 , 72 , 0 , 9758 , 387, 123), // #1457 + INST(Vroundsd , VexRvmi , V(660F3A,0B,_,I,I,_,_,_ ), 0 , 72 , 0 , 9767 , 388, 123), // #1458 + INST(Vroundss , VexRvmi , V(660F3A,0A,_,I,I,_,_,_ ), 0 , 72 , 0 , 9776 , 389, 123), // #1459 + INST(Vrsqrt14pd , VexRm_Lx , E(660F38,4E,_,x,_,1,4,FV ), 0 , 112, 0 , 9785 , 351, 126), // #1460 + INST(Vrsqrt14ps , VexRm_Lx , E(660F38,4E,_,x,_,0,4,FV ), 0 , 111, 0 , 9796 , 338, 126), // #1461 + INST(Vrsqrt14sd , VexRvm , E(660F38,4F,_,I,_,1,3,T1S), 0 , 124, 0 , 9807 , 382, 66 ), // #1462 + INST(Vrsqrt14ss , VexRvm , E(660F38,4F,_,I,_,0,2,T1S), 0 , 125, 0 , 9818 , 383, 66 ), // #1463 + INST(Vrsqrt28pd , VexRm , E(660F38,CC,_,2,_,1,4,FV ), 0 , 152, 0 , 9829 , 252, 135), // #1464 + INST(Vrsqrt28ps , VexRm , E(660F38,CC,_,2,_,0,4,FV ), 0 , 153, 0 , 9840 , 253, 135), // #1465 + INST(Vrsqrt28sd , VexRvm , E(660F38,CD,_,I,_,1,3,T1S), 0 , 124, 0 , 9851 , 280, 135), // #1466 + INST(Vrsqrt28ss , VexRvm , E(660F38,CD,_,I,_,0,2,T1S), 0 , 125, 0 , 9862 , 281, 135), // #1467 + INST(Vrsqrtps , VexRm_Lx , V(000F00,52,_,x,I,_,_,_ ), 0 , 71 , 0 , 9873 , 270, 123), // #1468 + INST(Vrsqrtss , VexRvm , V(F30F00,52,_,I,I,_,_,_ ), 0 , 178, 0 , 9882 , 384, 123), // #1469 + INST(Vscalefpd , VexRvm_Lx , E(660F38,2C,_,x,_,1,4,FV ), 0 , 112, 0 , 9891 , 390, 126), // #1470 + INST(Vscalefps , VexRvm_Lx , E(660F38,2C,_,x,_,0,4,FV ), 0 , 111, 0 , 9901 , 391, 126), // #1471 + INST(Vscalefsd , VexRvm , E(660F38,2D,_,I,_,1,3,T1S), 0 , 124, 0 , 9911 , 392, 66 ), // #1472 + INST(Vscalefss , VexRvm , E(660F38,2D,_,I,_,0,2,T1S), 0 , 125, 0 , 9921 , 393, 66 ), // #1473 + INST(Vscatterdpd , VexMr_Lx , E(660F38,A2,_,x,_,1,3,T1S), 0 , 124, 0 , 9931 , 394, 126), // #1474 + INST(Vscatterdps , VexMr_Lx , E(660F38,A2,_,x,_,0,2,T1S), 0 , 125, 0 , 9943 , 365, 126), // #1475 + INST(Vscatterpf0dpd , VexM_VM , E(660F38,C6,5,2,_,1,3,T1S), 0 , 221, 0 , 9955 , 275, 141), // #1476 + INST(Vscatterpf0dps , VexM_VM , E(660F38,C6,5,2,_,0,2,T1S), 0 , 222, 0 , 9970 , 276, 141), // #1477 + INST(Vscatterpf0qpd , VexM_VM , E(660F38,C7,5,2,_,1,3,T1S), 0 , 221, 0 , 9985 , 277, 141), // #1478 + INST(Vscatterpf0qps , VexM_VM , E(660F38,C7,5,2,_,0,2,T1S), 0 , 222, 0 , 10000, 277, 141), // #1479 + INST(Vscatterpf1dpd , VexM_VM , E(660F38,C6,6,2,_,1,3,T1S), 0 , 223, 0 , 10015, 275, 141), // #1480 + INST(Vscatterpf1dps , VexM_VM , E(660F38,C6,6,2,_,0,2,T1S), 0 , 224, 0 , 10030, 276, 141), // #1481 + INST(Vscatterpf1qpd , VexM_VM , E(660F38,C7,6,2,_,1,3,T1S), 0 , 223, 0 , 10045, 277, 141), // #1482 + INST(Vscatterpf1qps , VexM_VM , E(660F38,C7,6,2,_,0,2,T1S), 0 , 224, 0 , 10060, 277, 141), // #1483 + INST(Vscatterqpd , VexMr_Lx , E(660F38,A3,_,x,_,1,3,T1S), 0 , 124, 0 , 10075, 367, 126), // #1484 + INST(Vscatterqps , VexMr_Lx , E(660F38,A3,_,x,_,0,2,T1S), 0 , 125, 0 , 10087, 366, 126), // #1485 + INST(Vshuff32x4 , VexRvmi_Lx , E(660F3A,23,_,x,_,0,4,FV ), 0 , 108, 0 , 10099, 395, 126), // #1486 + INST(Vshuff64x2 , VexRvmi_Lx , E(660F3A,23,_,x,_,1,4,FV ), 0 , 109, 0 , 10110, 396, 126), // #1487 + INST(Vshufi32x4 , VexRvmi_Lx , E(660F3A,43,_,x,_,0,4,FV ), 0 , 108, 0 , 10121, 395, 126), // #1488 + INST(Vshufi64x2 , VexRvmi_Lx , E(660F3A,43,_,x,_,1,4,FV ), 0 , 109, 0 , 10132, 396, 126), // #1489 + INST(Vshufpd , VexRvmi_Lx , V(660F00,C6,_,x,I,1,4,FV ), 0 , 102, 0 , 10143, 397, 121), // #1490 + INST(Vshufps , VexRvmi_Lx , V(000F00,C6,_,x,I,0,4,FV ), 0 , 103, 0 , 10151, 398, 121), // #1491 + INST(Vsqrtpd , VexRm_Lx , V(660F00,51,_,x,I,1,4,FV ), 0 , 102, 0 , 10159, 399, 121), // #1492 + INST(Vsqrtps , VexRm_Lx , V(000F00,51,_,x,I,0,4,FV ), 0 , 103, 0 , 10167, 222, 121), // #1493 + INST(Vsqrtsd , VexRvm , V(F20F00,51,_,I,I,1,3,T1S), 0 , 104, 0 , 10175, 190, 122), // #1494 + INST(Vsqrtss , VexRvm , V(F30F00,51,_,I,I,0,2,T1S), 0 , 105, 0 , 10183, 191, 122), // #1495 + INST(Vstmxcsr , VexM , V(000F00,AE,3,0,I,_,_,_ ), 0 , 225, 0 , 10191, 291, 123), // #1496 + INST(Vsubpd , VexRvm_Lx , V(660F00,5C,_,x,I,1,4,FV ), 0 , 102, 0 , 10200, 188, 121), // #1497 + INST(Vsubps , VexRvm_Lx , V(000F00,5C,_,x,I,0,4,FV ), 0 , 103, 0 , 10207, 189, 121), // #1498 + INST(Vsubsd , VexRvm , V(F20F00,5C,_,I,I,1,3,T1S), 0 , 104, 0 , 10214, 190, 122), // #1499 + INST(Vsubss , VexRvm , V(F30F00,5C,_,I,I,0,2,T1S), 0 , 105, 0 , 10221, 191, 122), // #1500 + INST(Vtestpd , VexRm_Lx , V(660F38,0F,_,x,0,_,_,_ ), 0 , 96 , 0 , 10228, 270, 150), // #1501 + INST(Vtestps , VexRm_Lx , V(660F38,0E,_,x,0,_,_,_ ), 0 , 96 , 0 , 10236, 270, 150), // #1502 + INST(Vucomisd , VexRm , V(660F00,2E,_,I,I,1,3,T1S), 0 , 122, 0 , 10244, 218, 132), // #1503 + INST(Vucomiss , VexRm , V(000F00,2E,_,I,I,0,2,T1S), 0 , 123, 0 , 10253, 219, 132), // #1504 + INST(Vunpckhpd , VexRvm_Lx , V(660F00,15,_,x,I,1,4,FV ), 0 , 102, 0 , 10262, 198, 121), // #1505 + INST(Vunpckhps , VexRvm_Lx , V(000F00,15,_,x,I,0,4,FV ), 0 , 103, 0 , 10272, 199, 121), // #1506 + INST(Vunpcklpd , VexRvm_Lx , V(660F00,14,_,x,I,1,4,FV ), 0 , 102, 0 , 10282, 198, 121), // #1507 + INST(Vunpcklps , VexRvm_Lx , V(000F00,14,_,x,I,0,4,FV ), 0 , 103, 0 , 10292, 199, 121), // #1508 + INST(Vxorpd , VexRvm_Lx , V(660F00,57,_,x,I,1,4,FV ), 0 , 102, 0 , 10302, 379, 127), // #1509 + INST(Vxorps , VexRvm_Lx , V(000F00,57,_,x,I,0,4,FV ), 0 , 103, 0 , 10309, 378, 127), // #1510 + INST(Vzeroall , VexOp , V(000F00,77,_,1,I,_,_,_ ), 0 , 67 , 0 , 10316, 400, 123), // #1511 + INST(Vzeroupper , VexOp , V(000F00,77,_,0,I,_,_,_ ), 0 , 71 , 0 , 10325, 400, 123), // #1512 + INST(Wbinvd , X86Op , O(000F00,09,_,_,_,_,_,_ ), 0 , 4 , 0 , 10336, 30 , 0 ), // #1513 + INST(Wbnoinvd , X86Op , O(F30F00,09,_,_,_,_,_,_ ), 0 , 6 , 0 , 10343, 30 , 159), // #1514 + INST(Wrfsbase , X86M , O(F30F00,AE,2,_,x,_,_,_ ), 0 , 226, 0 , 10352, 166, 102), // #1515 + INST(Wrgsbase , X86M , O(F30F00,AE,3,_,x,_,_,_ ), 0 , 227, 0 , 10361, 166, 102), // #1516 + INST(Wrmsr , X86Op , O(000F00,30,_,_,_,_,_,_ ), 0 , 4 , 0 , 10370, 167, 103), // #1517 + INST(Wrssd , X86Mr , O(000F38,F6,_,_,_,_,_,_ ), 0 , 82 , 0 , 10376, 401, 54 ), // #1518 + INST(Wrssq , X86Mr , O(000F38,F6,_,_,1,_,_,_ ), 0 , 228, 0 , 10382, 402, 54 ), // #1519 + INST(Wrussd , X86Mr , O(660F38,F5,_,_,_,_,_,_ ), 0 , 2 , 0 , 10388, 401, 54 ), // #1520 + INST(Wrussq , X86Mr , O(660F38,F5,_,_,1,_,_,_ ), 0 , 229, 0 , 10395, 402, 54 ), // #1521 + INST(Xabort , X86Op_Mod11RM_I8 , O(000000,C6,7,_,_,_,_,_ ), 0 , 26 , 0 , 10402, 77 , 160), // #1522 + INST(Xadd , X86Xadd , O(000F00,C0,_,_,x,_,_,_ ), 0 , 4 , 0 , 10409, 403, 37 ), // #1523 + INST(Xbegin , X86JmpRel , O(000000,C7,7,_,_,_,_,_ ), 0 , 26 , 0 , 10414, 404, 160), // #1524 + INST(Xchg , X86Xchg , O(000000,86,_,_,x,_,_,_ ), 0 , 0 , 0 , 457 , 405, 0 ), // #1525 + INST(Xend , X86Op , O(000F01,D5,_,_,_,_,_,_ ), 0 , 21 , 0 , 10421, 30 , 160), // #1526 + INST(Xgetbv , X86Op , O(000F01,D0,_,_,_,_,_,_ ), 0 , 21 , 0 , 10426, 167, 161), // #1527 + INST(Xlatb , X86Op , O(000000,D7,_,_,_,_,_,_ ), 0 , 0 , 0 , 10433, 30 , 0 ), // #1528 + INST(Xor , X86Arith , O(000000,30,6,_,x,_,_,_ ), 0 , 31 , 0 , 9521 , 171, 1 ), // #1529 + INST(Xorpd , ExtRm , O(660F00,57,_,_,_,_,_,_ ), 0 , 3 , 0 , 10303, 144, 4 ), // #1530 + INST(Xorps , ExtRm , O(000F00,57,_,_,_,_,_,_ ), 0 , 4 , 0 , 10310, 144, 5 ), // #1531 + INST(Xresldtrk , X86Op , O(F20F01,E9,_,_,_,_,_,_ ), 0 , 92 , 0 , 10439, 30 , 162), // #1532 + INST(Xrstor , X86M_Only , O(000F00,AE,5,_,_,_,_,_ ), 0 , 75 , 0 , 1159 , 406, 161), // #1533 + INST(Xrstor64 , X86M_Only , O(000F00,AE,5,_,1,_,_,_ ), 0 , 230, 0 , 1167 , 407, 161), // #1534 + INST(Xrstors , X86M_Only , O(000F00,C7,3,_,_,_,_,_ ), 0 , 76 , 0 , 10449, 406, 163), // #1535 + INST(Xrstors64 , X86M_Only , O(000F00,C7,3,_,1,_,_,_ ), 0 , 231, 0 , 10457, 407, 163), // #1536 + INST(Xsave , X86M_Only , O(000F00,AE,4,_,_,_,_,_ ), 0 , 97 , 0 , 1177 , 406, 161), // #1537 + INST(Xsave64 , X86M_Only , O(000F00,AE,4,_,1,_,_,_ ), 0 , 232, 0 , 1184 , 407, 161), // #1538 + INST(Xsavec , X86M_Only , O(000F00,C7,4,_,_,_,_,_ ), 0 , 97 , 0 , 10467, 406, 164), // #1539 + INST(Xsavec64 , X86M_Only , O(000F00,C7,4,_,1,_,_,_ ), 0 , 232, 0 , 10474, 407, 164), // #1540 + INST(Xsaveopt , X86M_Only , O(000F00,AE,6,_,_,_,_,_ ), 0 , 78 , 0 , 10483, 406, 165), // #1541 + INST(Xsaveopt64 , X86M_Only , O(000F00,AE,6,_,1,_,_,_ ), 0 , 233, 0 , 10492, 407, 165), // #1542 + INST(Xsaves , X86M_Only , O(000F00,C7,5,_,_,_,_,_ ), 0 , 75 , 0 , 10503, 406, 163), // #1543 + INST(Xsaves64 , X86M_Only , O(000F00,C7,5,_,1,_,_,_ ), 0 , 230, 0 , 10510, 407, 163), // #1544 + INST(Xsetbv , X86Op , O(000F01,D1,_,_,_,_,_,_ ), 0 , 21 , 0 , 10519, 167, 161), // #1545 + INST(Xsusldtrk , X86Op , O(F20F01,E8,_,_,_,_,_,_ ), 0 , 92 , 0 , 10526, 30 , 162), // #1546 + INST(Xtest , X86Op , O(000F01,D6,_,_,_,_,_,_ ), 0 , 21 , 0 , 10536, 30 , 166) // #1547 // ${InstInfo:End} }; #undef NAME_DATA_INDEX @@ -1610,17 +1654,17 @@ const InstDB::InstInfo InstDB::_instInfoTable[] = { // ${MainOpcodeTable:Begin} // ------------------- Automatically generated, do not edit ------------------- const uint32_t InstDB::_mainOpcodeTable[] = { - O(000000,00,0,0,0,0,0,_ ), // #0 [ref=55x] + O(000000,00,0,0,0,0,0,_ ), // #0 [ref=56x] O(000000,00,2,0,0,0,0,_ ), // #1 [ref=4x] - O(660F38,00,0,0,0,0,0,_ ), // #2 [ref=42x] + O(660F38,00,0,0,0,0,0,_ ), // #2 [ref=43x] O(660F00,00,0,0,0,0,0,_ ), // #3 [ref=38x] - O(000F00,00,0,0,0,0,0,_ ), // #4 [ref=231x] + O(000F00,00,0,0,0,0,0,_ ), // #4 [ref=233x] O(F20F00,00,0,0,0,0,0,_ ), // #5 [ref=24x] O(F30F00,00,0,0,0,0,0,_ ), // #6 [ref=29x] O(F30F38,00,0,0,0,0,0,_ ), // #7 [ref=2x] O(660F3A,00,0,0,0,0,0,_ ), // #8 [ref=22x] O(000000,00,4,0,0,0,0,_ ), // #9 [ref=5x] - V(000F38,00,0,0,0,0,0,_ ), // #10 [ref=3x] + V(000F38,00,0,0,0,0,0,_ ), // #10 [ref=6x] V(XOP_M9,00,1,0,0,0,0,_ ), // #11 [ref=3x] V(XOP_M9,00,6,0,0,0,0,_ ), // #12 [ref=2x] V(XOP_M9,00,5,0,0,0,0,_ ), // #13 [ref=1x] @@ -1630,208 +1674,220 @@ const uint32_t InstDB::_mainOpcodeTable[] = { V(000F38,00,2,0,0,0,0,_ ), // #17 [ref=1x] V(000F38,00,1,0,0,0,0,_ ), // #18 [ref=1x] O(660000,00,0,0,0,0,0,_ ), // #19 [ref=7x] - O(000000,00,0,0,1,0,0,_ ), // #20 [ref=4x] - O(000F01,00,0,0,0,0,0,_ ), // #21 [ref=25x] + O(000000,00,0,0,1,0,0,_ ), // #20 [ref=3x] + O(000F01,00,0,0,0,0,0,_ ), // #21 [ref=29x] O(000F00,00,7,0,0,0,0,_ ), // #22 [ref=5x] - O(660F00,00,7,0,0,0,0,_ ), // #23 [ref=2x] - O(660F00,00,6,0,0,0,0,_ ), // #24 [ref=2x] - O(000000,00,7,0,0,0,0,_ ), // #25 [ref=5x] - O(000F00,00,1,0,1,0,0,_ ), // #26 [ref=2x] - O(000F00,00,1,0,0,0,0,_ ), // #27 [ref=6x] - O(F20F38,00,0,0,0,0,0,_ ), // #28 [ref=2x] - O(000000,00,1,0,0,0,0,_ ), // #29 [ref=3x] - O(000000,00,6,0,0,0,0,_ ), // #30 [ref=3x] - O_FPU(00,D900,_) , // #31 [ref=29x] - O_FPU(00,C000,0) , // #32 [ref=1x] - O_FPU(00,DE00,_) , // #33 [ref=7x] - O_FPU(00,0000,4) , // #34 [ref=4x] - O_FPU(00,0000,6) , // #35 [ref=4x] - O_FPU(9B,DB00,_) , // #36 [ref=2x] - O_FPU(00,DA00,_) , // #37 [ref=5x] - O_FPU(00,DB00,_) , // #38 [ref=8x] - O_FPU(00,D000,2) , // #39 [ref=1x] - O_FPU(00,DF00,_) , // #40 [ref=2x] - O_FPU(00,D800,3) , // #41 [ref=1x] - O_FPU(00,F000,6) , // #42 [ref=1x] - O_FPU(00,F800,7) , // #43 [ref=1x] - O_FPU(00,DD00,_) , // #44 [ref=3x] - O_FPU(00,0000,0) , // #45 [ref=3x] - O_FPU(00,0000,2) , // #46 [ref=3x] - O_FPU(00,0000,3) , // #47 [ref=3x] - O_FPU(00,0000,7) , // #48 [ref=3x] - O_FPU(00,0000,1) , // #49 [ref=2x] - O_FPU(00,0000,5) , // #50 [ref=2x] - O_FPU(00,C800,1) , // #51 [ref=1x] - O_FPU(9B,0000,6) , // #52 [ref=2x] - O_FPU(9B,0000,7) , // #53 [ref=2x] - O_FPU(00,E000,4) , // #54 [ref=1x] - O_FPU(00,E800,5) , // #55 [ref=1x] - O_FPU(00,0000,_) , // #56 [ref=1x] - O(000F00,00,0,0,1,0,0,_ ), // #57 [ref=1x] - O(000000,00,5,0,0,0,0,_ ), // #58 [ref=3x] - V(660F00,00,0,1,0,0,0,_ ), // #59 [ref=7x] - V(660F00,00,0,1,1,0,0,_ ), // #60 [ref=6x] - V(000F00,00,0,1,1,0,0,_ ), // #61 [ref=7x] - V(000F00,00,0,1,0,0,0,_ ), // #62 [ref=8x] - V(660F00,00,0,0,0,0,0,_ ), // #63 [ref=15x] - V(660F00,00,0,0,1,0,0,_ ), // #64 [ref=4x] - V(000F00,00,0,0,1,0,0,_ ), // #65 [ref=4x] - V(000F00,00,0,0,0,0,0,_ ), // #66 [ref=10x] - V(660F3A,00,0,0,0,0,0,_ ), // #67 [ref=45x] - V(660F3A,00,0,0,1,0,0,_ ), // #68 [ref=4x] - O(000F00,00,2,0,0,0,0,_ ), // #69 [ref=5x] - O(000F00,00,5,0,0,0,0,_ ), // #70 [ref=4x] - O(000F00,00,3,0,0,0,0,_ ), // #71 [ref=5x] - V(XOP_M9,00,0,0,0,0,0,_ ), // #72 [ref=32x] - O(000F00,00,6,0,0,0,0,_ ), // #73 [ref=5x] - V(XOP_MA,00,0,0,0,0,0,_ ), // #74 [ref=1x] - V(XOP_MA,00,1,0,0,0,0,_ ), // #75 [ref=1x] - O(000F38,00,0,0,0,0,0,_ ), // #76 [ref=23x] - V(F20F38,00,0,0,0,0,0,_ ), // #77 [ref=3x] - O(000000,00,3,0,0,0,0,_ ), // #78 [ref=3x] - O(000F3A,00,0,0,0,0,0,_ ), // #79 [ref=4x] - O(F30000,00,0,0,0,0,0,_ ), // #80 [ref=1x] - O(000F0F,00,0,0,0,0,0,_ ), // #81 [ref=26x] - V(F30F38,00,0,0,0,0,0,_ ), // #82 [ref=2x] - O(000F3A,00,0,0,1,0,0,_ ), // #83 [ref=1x] - O(660F3A,00,0,0,1,0,0,_ ), // #84 [ref=1x] - O(F30F00,00,1,0,0,0,0,_ ), // #85 [ref=1x] - O(F30F00,00,7,0,0,0,0,_ ), // #86 [ref=1x] - V(F20F3A,00,0,0,0,0,0,_ ), // #87 [ref=1x] - V(660F38,00,0,0,0,0,0,_ ), // #88 [ref=22x] - O(000F00,00,4,0,0,0,0,_ ), // #89 [ref=4x] - V(XOP_M9,00,7,0,0,0,0,_ ), // #90 [ref=1x] - V(XOP_M9,00,4,0,0,0,0,_ ), // #91 [ref=1x] - E(F20F38,00,0,2,0,0,2,T4X), // #92 [ref=6x] - V(660F00,00,0,0,0,1,4,FV ), // #93 [ref=22x] - V(000F00,00,0,0,0,0,4,FV ), // #94 [ref=16x] - V(F20F00,00,0,0,0,1,3,T1S), // #95 [ref=10x] - V(F30F00,00,0,0,0,0,2,T1S), // #96 [ref=10x] - V(F20F00,00,0,0,0,0,0,_ ), // #97 [ref=4x] - V(660F38,00,0,0,0,0,4,FVM), // #98 [ref=14x] - E(660F3A,00,0,0,0,0,4,FV ), // #99 [ref=14x] - E(660F3A,00,0,0,0,1,4,FV ), // #100 [ref=14x] - E(660F38,00,0,0,0,0,4,FVM), // #101 [ref=9x] - E(660F38,00,0,0,0,0,4,FV ), // #102 [ref=22x] - E(660F38,00,0,0,0,1,4,FV ), // #103 [ref=28x] - E(660F38,00,0,0,0,1,4,FVM), // #104 [ref=9x] - V(660F38,00,0,1,0,0,0,_ ), // #105 [ref=2x] - E(660F38,00,0,0,0,0,3,T2 ), // #106 [ref=2x] - E(660F38,00,0,0,0,0,4,T4 ), // #107 [ref=2x] - E(660F38,00,0,2,0,0,5,T8 ), // #108 [ref=2x] - E(660F38,00,0,0,0,1,4,T2 ), // #109 [ref=2x] - E(660F38,00,0,2,0,1,5,T4 ), // #110 [ref=2x] - V(660F38,00,0,0,0,1,3,T1S), // #111 [ref=2x] - V(660F38,00,0,0,0,0,2,T1S), // #112 [ref=14x] - V(660F00,00,0,0,0,1,3,T1S), // #113 [ref=5x] - V(000F00,00,0,0,0,0,2,T1S), // #114 [ref=2x] - E(660F38,00,0,0,0,1,3,T1S), // #115 [ref=14x] - E(660F38,00,0,0,0,0,2,T1S), // #116 [ref=14x] - V(F30F00,00,0,0,0,0,3,HV ), // #117 [ref=1x] - E(F20F38,00,0,0,0,0,0,_ ), // #118 [ref=1x] - E(F30F38,00,0,0,0,0,0,_ ), // #119 [ref=7x] - V(F20F00,00,0,0,0,1,4,FV ), // #120 [ref=1x] - E(660F00,00,0,0,0,1,4,FV ), // #121 [ref=9x] - E(000F00,00,0,0,0,1,4,FV ), // #122 [ref=3x] - V(660F38,00,0,0,0,0,3,HVM), // #123 [ref=7x] - V(660F00,00,0,0,0,0,4,FV ), // #124 [ref=11x] - V(000F00,00,0,0,0,0,4,HV ), // #125 [ref=1x] - V(660F3A,00,0,0,0,0,3,HVM), // #126 [ref=1x] - E(660F00,00,0,0,0,0,3,HV ), // #127 [ref=4x] - E(000F00,00,0,0,0,0,4,FV ), // #128 [ref=2x] - E(F30F00,00,0,0,0,1,4,FV ), // #129 [ref=2x] - V(F20F00,00,0,0,0,0,3,T1F), // #130 [ref=2x] - E(F20F00,00,0,0,0,0,3,T1F), // #131 [ref=2x] - V(F20F00,00,0,0,0,0,2,T1W), // #132 [ref=1x] - V(F30F00,00,0,0,0,0,2,T1W), // #133 [ref=1x] - V(F30F00,00,0,0,0,0,2,T1F), // #134 [ref=2x] - E(F30F00,00,0,0,0,0,2,T1F), // #135 [ref=2x] - V(F30F00,00,0,0,0,0,4,FV ), // #136 [ref=1x] - E(F30F00,00,0,0,0,0,3,HV ), // #137 [ref=1x] - E(F20F00,00,0,0,0,0,4,FV ), // #138 [ref=1x] - E(F20F00,00,0,0,0,1,4,FV ), // #139 [ref=1x] - E(F20F00,00,0,0,0,0,2,T1W), // #140 [ref=1x] - E(F30F00,00,0,0,0,0,2,T1W), // #141 [ref=1x] - E(660F3A,00,0,0,0,0,4,FVM), // #142 [ref=5x] - E(660F38,00,0,2,0,1,4,FV ), // #143 [ref=3x] - E(660F38,00,0,2,0,0,4,FV ), // #144 [ref=3x] - V(660F3A,00,0,1,0,0,0,_ ), // #145 [ref=6x] - E(660F3A,00,0,0,0,0,4,T4 ), // #146 [ref=4x] - E(660F3A,00,0,2,0,0,5,T8 ), // #147 [ref=4x] - E(660F3A,00,0,0,0,1,4,T2 ), // #148 [ref=4x] - E(660F3A,00,0,2,0,1,5,T4 ), // #149 [ref=4x] - V(660F3A,00,0,0,0,0,2,T1S), // #150 [ref=4x] - E(660F3A,00,0,0,0,1,3,T1S), // #151 [ref=6x] - E(660F3A,00,0,0,0,0,2,T1S), // #152 [ref=6x] - V(660F38,00,0,0,1,1,4,FV ), // #153 [ref=20x] - V(660F38,00,0,0,0,0,4,FV ), // #154 [ref=32x] - V(660F38,00,0,0,1,1,3,T1S), // #155 [ref=12x] - V(660F38,00,0,0,1,0,0,_ ), // #156 [ref=5x] - E(660F38,00,1,2,0,1,3,T1S), // #157 [ref=2x] - E(660F38,00,1,2,0,0,2,T1S), // #158 [ref=2x] - E(660F38,00,2,2,0,1,3,T1S), // #159 [ref=2x] - E(660F38,00,2,2,0,0,2,T1S), // #160 [ref=2x] - V(660F3A,00,0,0,1,1,4,FV ), // #161 [ref=2x] - V(000F00,00,2,0,0,0,0,_ ), // #162 [ref=1x] - V(660F00,00,0,0,0,1,4,FVM), // #163 [ref=3x] - V(000F00,00,0,0,0,0,4,FVM), // #164 [ref=3x] - V(660F00,00,0,0,0,0,2,T1S), // #165 [ref=1x] - V(F20F00,00,0,0,0,1,3,DUP), // #166 [ref=1x] - E(660F00,00,0,0,0,0,4,FVM), // #167 [ref=1x] - E(660F00,00,0,0,0,1,4,FVM), // #168 [ref=1x] - V(F30F00,00,0,0,0,0,0,_ ), // #169 [ref=3x] - E(F20F00,00,0,0,0,1,4,FVM), // #170 [ref=1x] - E(F30F00,00,0,0,0,0,4,FVM), // #171 [ref=1x] - E(F30F00,00,0,0,0,1,4,FVM), // #172 [ref=1x] - E(F20F00,00,0,0,0,0,4,FVM), // #173 [ref=1x] - V(000F00,00,0,0,0,0,3,T2 ), // #174 [ref=2x] - V(660F00,00,0,0,0,0,4,FVM), // #175 [ref=33x] - V(F30F00,00,0,0,0,0,4,FVM), // #176 [ref=3x] - O(F30F00,00,6,0,0,0,0,_ ), // #177 [ref=1x] - V(660F3A,00,0,0,0,0,4,FVM), // #178 [ref=2x] - E(660F00,00,0,0,0,0,4,FV ), // #179 [ref=5x] - V(660F38,00,0,0,0,0,0,T1S), // #180 [ref=1x] - E(F30F38,00,0,0,0,1,0,_ ), // #181 [ref=5x] - V(660F38,00,0,0,0,0,1,T1S), // #182 [ref=1x] - V(XOP_M8,00,0,0,0,0,0,_ ), // #183 [ref=22x] - V(660F38,00,0,0,0,1,4,FVM), // #184 [ref=2x] - E(660F3A,00,0,0,0,1,4,FVM), // #185 [ref=2x] - E(660F38,00,0,0,0,0,0,T1S), // #186 [ref=2x] - E(660F38,00,0,0,0,1,1,T1S), // #187 [ref=2x] - V(660F38,00,0,0,0,1,4,FV ), // #188 [ref=3x] - E(660F38,00,0,0,1,1,4,FV ), // #189 [ref=1x] - V(660F3A,00,0,0,0,0,0,T1S), // #190 [ref=2x] - V(660F3A,00,0,0,1,1,3,T1S), // #191 [ref=2x] - V(660F3A,00,0,0,0,0,1,T1S), // #192 [ref=1x] - V(660F00,00,0,0,0,0,1,T1S), // #193 [ref=1x] - E(F30F38,00,0,0,0,0,2,QVM), // #194 [ref=6x] - E(F30F38,00,0,0,0,0,3,HVM), // #195 [ref=9x] - E(F30F38,00,0,0,0,0,1,OVM), // #196 [ref=3x] - V(660F38,00,0,0,0,0,2,QVM), // #197 [ref=4x] - V(660F38,00,0,0,0,0,1,OVM), // #198 [ref=2x] - E(660F00,00,1,0,0,0,4,FV ), // #199 [ref=1x] - E(660F00,00,1,0,0,1,4,FV ), // #200 [ref=1x] - V(F20F00,00,0,0,0,0,4,FVM), // #201 [ref=1x] - V(660F00,00,0,0,0,0,4,128), // #202 [ref=5x] - V(660F00,00,7,0,0,0,4,FVM), // #203 [ref=1x] - V(660F00,00,0,0,0,1,4,128), // #204 [ref=2x] - E(660F00,00,0,0,0,1,4,128), // #205 [ref=1x] - V(660F00,00,3,0,0,0,4,FVM), // #206 [ref=1x] - E(F30F38,00,0,0,0,0,4,FVM), // #207 [ref=1x] - E(F30F38,00,0,0,0,0,4,FV ), // #208 [ref=1x] - E(F30F38,00,0,0,0,1,4,FV ), // #209 [ref=1x] - E(F30F38,00,0,0,0,1,4,FVM), // #210 [ref=1x] - E(660F38,00,5,2,0,1,3,T1S), // #211 [ref=2x] - E(660F38,00,5,2,0,0,2,T1S), // #212 [ref=2x] - E(660F38,00,6,2,0,1,3,T1S), // #213 [ref=2x] - E(660F38,00,6,2,0,0,2,T1S), // #214 [ref=2x] - V(000F00,00,3,0,0,0,0,_ ), // #215 [ref=1x] - O(F30F00,00,2,0,0,0,0,_ ), // #216 [ref=1x] - O(F30F00,00,3,0,0,0,0,_ ), // #217 [ref=1x] - O(000F00,00,5,0,1,0,0,_ ), // #218 [ref=2x] - O(000F00,00,3,0,1,0,0,_ ), // #219 [ref=1x] - O(000F00,00,4,0,1,0,0,_ ), // #220 [ref=2x] - O(000F00,00,6,0,1,0,0,_ ) // #221 [ref=1x] + O(660F00,00,7,0,0,0,0,_ ), // #23 [ref=1x] + O(F30F00,00,6,0,0,0,0,_ ), // #24 [ref=3x] + O(660F00,00,6,0,0,0,0,_ ), // #25 [ref=3x] + O(000000,00,7,0,0,0,0,_ ), // #26 [ref=5x] + O(000F00,00,1,0,1,0,0,_ ), // #27 [ref=2x] + O(000F00,00,1,0,0,0,0,_ ), // #28 [ref=6x] + O(F20F38,00,0,0,0,0,0,_ ), // #29 [ref=2x] + O(000000,00,1,0,0,0,0,_ ), // #30 [ref=3x] + O(000000,00,6,0,0,0,0,_ ), // #31 [ref=3x] + O(F30F00,00,7,0,0,0,0,3 ), // #32 [ref=1x] + O(F30F00,00,7,0,0,0,0,2 ), // #33 [ref=1x] + O_FPU(00,D900,_) , // #34 [ref=29x] + O_FPU(00,C000,0) , // #35 [ref=1x] + O_FPU(00,DE00,_) , // #36 [ref=7x] + O_FPU(00,0000,4) , // #37 [ref=4x] + O_FPU(00,0000,6) , // #38 [ref=4x] + O_FPU(9B,DB00,_) , // #39 [ref=2x] + O_FPU(00,DA00,_) , // #40 [ref=5x] + O_FPU(00,DB00,_) , // #41 [ref=8x] + O_FPU(00,D000,2) , // #42 [ref=1x] + O_FPU(00,DF00,_) , // #43 [ref=2x] + O_FPU(00,D800,3) , // #44 [ref=1x] + O_FPU(00,F000,6) , // #45 [ref=1x] + O_FPU(00,F800,7) , // #46 [ref=1x] + O_FPU(00,DD00,_) , // #47 [ref=3x] + O_FPU(00,0000,0) , // #48 [ref=3x] + O_FPU(00,0000,2) , // #49 [ref=3x] + O_FPU(00,0000,3) , // #50 [ref=3x] + O_FPU(00,0000,7) , // #51 [ref=3x] + O_FPU(00,0000,1) , // #52 [ref=2x] + O_FPU(00,0000,5) , // #53 [ref=2x] + O_FPU(00,C800,1) , // #54 [ref=1x] + O_FPU(9B,0000,6) , // #55 [ref=2x] + O_FPU(9B,0000,7) , // #56 [ref=2x] + O_FPU(00,E000,4) , // #57 [ref=1x] + O_FPU(00,E800,5) , // #58 [ref=1x] + O_FPU(00,0000,_) , // #59 [ref=1x] + O(000F00,00,0,0,1,0,0,_ ), // #60 [ref=1x] + O(000000,00,5,0,0,0,0,_ ), // #61 [ref=3x] + O(F30F00,00,5,0,0,0,0,_ ), // #62 [ref=2x] + O(F30F00,00,5,0,1,0,0,_ ), // #63 [ref=1x] + V(660F00,00,0,1,0,0,0,_ ), // #64 [ref=7x] + V(660F00,00,0,1,1,0,0,_ ), // #65 [ref=6x] + V(000F00,00,0,1,1,0,0,_ ), // #66 [ref=7x] + V(000F00,00,0,1,0,0,0,_ ), // #67 [ref=8x] + V(660F00,00,0,0,0,0,0,_ ), // #68 [ref=15x] + V(660F00,00,0,0,1,0,0,_ ), // #69 [ref=4x] + V(000F00,00,0,0,1,0,0,_ ), // #70 [ref=4x] + V(000F00,00,0,0,0,0,0,_ ), // #71 [ref=10x] + V(660F3A,00,0,0,0,0,0,_ ), // #72 [ref=45x] + V(660F3A,00,0,0,1,0,0,_ ), // #73 [ref=4x] + O(000F00,00,2,0,0,0,0,_ ), // #74 [ref=5x] + O(000F00,00,5,0,0,0,0,_ ), // #75 [ref=4x] + O(000F00,00,3,0,0,0,0,_ ), // #76 [ref=5x] + V(XOP_M9,00,0,0,0,0,0,_ ), // #77 [ref=32x] + O(000F00,00,6,0,0,0,0,_ ), // #78 [ref=5x] + V(XOP_MA,00,0,0,0,0,0,_ ), // #79 [ref=1x] + V(XOP_MA,00,1,0,0,0,0,_ ), // #80 [ref=1x] + O(F30F01,00,0,0,0,0,0,_ ), // #81 [ref=5x] + O(000F38,00,0,0,0,0,0,_ ), // #82 [ref=24x] + V(F20F38,00,0,0,0,0,0,_ ), // #83 [ref=6x] + O(000000,00,3,0,0,0,0,_ ), // #84 [ref=3x] + O(000F3A,00,0,0,0,0,0,_ ), // #85 [ref=4x] + O(F30000,00,0,0,0,0,0,_ ), // #86 [ref=1x] + O(000F0F,00,0,0,0,0,0,_ ), // #87 [ref=26x] + V(F30F38,00,0,0,0,0,0,_ ), // #88 [ref=5x] + O(000F3A,00,0,0,1,0,0,_ ), // #89 [ref=1x] + O(660F3A,00,0,0,1,0,0,_ ), // #90 [ref=1x] + O(F30F00,00,4,0,0,0,0,_ ), // #91 [ref=1x] + O(F20F01,00,0,0,0,0,0,_ ), // #92 [ref=4x] + O(F30F00,00,1,0,0,0,0,_ ), // #93 [ref=3x] + O(F30F00,00,7,0,0,0,0,_ ), // #94 [ref=1x] + V(F20F3A,00,0,0,0,0,0,_ ), // #95 [ref=1x] + V(660F38,00,0,0,0,0,0,_ ), // #96 [ref=25x] + O(000F00,00,4,0,0,0,0,_ ), // #97 [ref=4x] + V(XOP_M9,00,7,0,0,0,0,_ ), // #98 [ref=1x] + V(XOP_M9,00,4,0,0,0,0,_ ), // #99 [ref=1x] + O(F20F00,00,6,0,0,0,0,_ ), // #100 [ref=1x] + E(F20F38,00,0,2,0,0,2,T4X), // #101 [ref=6x] + V(660F00,00,0,0,0,1,4,FV ), // #102 [ref=22x] + V(000F00,00,0,0,0,0,4,FV ), // #103 [ref=16x] + V(F20F00,00,0,0,0,1,3,T1S), // #104 [ref=10x] + V(F30F00,00,0,0,0,0,2,T1S), // #105 [ref=10x] + V(F20F00,00,0,0,0,0,0,_ ), // #106 [ref=4x] + V(660F38,00,0,0,0,0,4,FVM), // #107 [ref=14x] + E(660F3A,00,0,0,0,0,4,FV ), // #108 [ref=14x] + E(660F3A,00,0,0,0,1,4,FV ), // #109 [ref=14x] + E(660F38,00,0,0,0,0,4,FVM), // #110 [ref=9x] + E(660F38,00,0,0,0,0,4,FV ), // #111 [ref=22x] + E(660F38,00,0,0,0,1,4,FV ), // #112 [ref=28x] + E(660F38,00,0,0,0,1,4,FVM), // #113 [ref=9x] + V(660F38,00,0,1,0,0,0,_ ), // #114 [ref=2x] + E(660F38,00,0,0,0,0,3,T2 ), // #115 [ref=2x] + E(660F38,00,0,0,0,0,4,T4 ), // #116 [ref=2x] + E(660F38,00,0,2,0,0,5,T8 ), // #117 [ref=2x] + E(660F38,00,0,0,0,1,4,T2 ), // #118 [ref=2x] + E(660F38,00,0,2,0,1,5,T4 ), // #119 [ref=2x] + V(660F38,00,0,0,0,1,3,T1S), // #120 [ref=2x] + V(660F38,00,0,0,0,0,2,T1S), // #121 [ref=14x] + V(660F00,00,0,0,0,1,3,T1S), // #122 [ref=5x] + V(000F00,00,0,0,0,0,2,T1S), // #123 [ref=2x] + E(660F38,00,0,0,0,1,3,T1S), // #124 [ref=14x] + E(660F38,00,0,0,0,0,2,T1S), // #125 [ref=14x] + V(F30F00,00,0,0,0,0,3,HV ), // #126 [ref=1x] + E(F20F38,00,0,0,0,0,0,_ ), // #127 [ref=1x] + E(F30F38,00,0,0,0,0,0,_ ), // #128 [ref=7x] + V(F20F00,00,0,0,0,1,4,FV ), // #129 [ref=1x] + E(660F00,00,0,0,0,1,4,FV ), // #130 [ref=9x] + E(000F00,00,0,0,0,1,4,FV ), // #131 [ref=3x] + V(660F38,00,0,0,0,0,3,HVM), // #132 [ref=7x] + V(660F00,00,0,0,0,0,4,FV ), // #133 [ref=11x] + V(000F00,00,0,0,0,0,4,HV ), // #134 [ref=1x] + V(660F3A,00,0,0,0,0,3,HVM), // #135 [ref=1x] + E(660F00,00,0,0,0,0,3,HV ), // #136 [ref=4x] + E(000F00,00,0,0,0,0,4,FV ), // #137 [ref=2x] + E(F30F00,00,0,0,0,1,4,FV ), // #138 [ref=2x] + V(F20F00,00,0,0,0,0,3,T1F), // #139 [ref=2x] + E(F20F00,00,0,0,0,0,3,T1F), // #140 [ref=2x] + V(F20F00,00,0,0,0,0,2,T1W), // #141 [ref=1x] + V(F30F00,00,0,0,0,0,2,T1W), // #142 [ref=1x] + V(F30F00,00,0,0,0,0,2,T1F), // #143 [ref=2x] + E(F30F00,00,0,0,0,0,2,T1F), // #144 [ref=2x] + V(F30F00,00,0,0,0,0,4,FV ), // #145 [ref=1x] + E(F30F00,00,0,0,0,0,3,HV ), // #146 [ref=1x] + E(F20F00,00,0,0,0,0,4,FV ), // #147 [ref=1x] + E(F20F00,00,0,0,0,1,4,FV ), // #148 [ref=1x] + E(F20F00,00,0,0,0,0,2,T1W), // #149 [ref=1x] + E(F30F00,00,0,0,0,0,2,T1W), // #150 [ref=1x] + E(660F3A,00,0,0,0,0,4,FVM), // #151 [ref=5x] + E(660F38,00,0,2,0,1,4,FV ), // #152 [ref=3x] + E(660F38,00,0,2,0,0,4,FV ), // #153 [ref=3x] + V(660F3A,00,0,1,0,0,0,_ ), // #154 [ref=6x] + E(660F3A,00,0,0,0,0,4,T4 ), // #155 [ref=4x] + E(660F3A,00,0,2,0,0,5,T8 ), // #156 [ref=4x] + E(660F3A,00,0,0,0,1,4,T2 ), // #157 [ref=4x] + E(660F3A,00,0,2,0,1,5,T4 ), // #158 [ref=4x] + V(660F3A,00,0,0,0,0,2,T1S), // #159 [ref=4x] + E(660F3A,00,0,0,0,1,3,T1S), // #160 [ref=6x] + E(660F3A,00,0,0,0,0,2,T1S), // #161 [ref=6x] + V(660F38,00,0,0,1,1,4,FV ), // #162 [ref=20x] + V(660F38,00,0,0,0,0,4,FV ), // #163 [ref=32x] + V(660F38,00,0,0,1,1,3,T1S), // #164 [ref=12x] + V(660F38,00,0,0,1,0,0,_ ), // #165 [ref=5x] + E(660F38,00,1,2,0,1,3,T1S), // #166 [ref=2x] + E(660F38,00,1,2,0,0,2,T1S), // #167 [ref=2x] + E(660F38,00,2,2,0,1,3,T1S), // #168 [ref=2x] + E(660F38,00,2,2,0,0,2,T1S), // #169 [ref=2x] + V(660F3A,00,0,0,1,1,4,FV ), // #170 [ref=2x] + V(000F00,00,2,0,0,0,0,_ ), // #171 [ref=1x] + V(660F00,00,0,0,0,1,4,FVM), // #172 [ref=3x] + V(000F00,00,0,0,0,0,4,FVM), // #173 [ref=3x] + V(660F00,00,0,0,0,0,2,T1S), // #174 [ref=1x] + V(F20F00,00,0,0,0,1,3,DUP), // #175 [ref=1x] + E(660F00,00,0,0,0,0,4,FVM), // #176 [ref=1x] + E(660F00,00,0,0,0,1,4,FVM), // #177 [ref=1x] + V(F30F00,00,0,0,0,0,0,_ ), // #178 [ref=3x] + E(F20F00,00,0,0,0,1,4,FVM), // #179 [ref=1x] + E(F30F00,00,0,0,0,0,4,FVM), // #180 [ref=1x] + E(F30F00,00,0,0,0,1,4,FVM), // #181 [ref=1x] + E(F20F00,00,0,0,0,0,4,FVM), // #182 [ref=1x] + V(000F00,00,0,0,0,0,3,T2 ), // #183 [ref=2x] + V(660F00,00,0,0,0,0,4,FVM), // #184 [ref=33x] + V(F30F00,00,0,0,0,0,4,FVM), // #185 [ref=3x] + E(F20F38,00,0,0,0,0,4,FV ), // #186 [ref=1x] + E(F20F38,00,0,0,0,1,4,FV ), // #187 [ref=1x] + V(660F3A,00,0,0,0,0,4,FVM), // #188 [ref=2x] + E(660F00,00,0,0,0,0,4,FV ), // #189 [ref=5x] + V(660F38,00,0,0,0,0,0,T1S), // #190 [ref=1x] + E(F30F38,00,0,0,0,1,0,_ ), // #191 [ref=5x] + V(660F38,00,0,0,0,0,1,T1S), // #192 [ref=1x] + V(XOP_M8,00,0,0,0,0,0,_ ), // #193 [ref=22x] + V(660F38,00,0,0,0,1,4,FVM), // #194 [ref=2x] + E(660F3A,00,0,0,0,1,4,FVM), // #195 [ref=2x] + E(660F38,00,0,0,0,0,0,T1S), // #196 [ref=2x] + E(660F38,00,0,0,0,1,1,T1S), // #197 [ref=2x] + V(660F38,00,0,0,0,1,4,FV ), // #198 [ref=3x] + E(660F38,00,0,0,1,1,4,FV ), // #199 [ref=1x] + V(660F3A,00,0,0,0,0,0,T1S), // #200 [ref=2x] + V(660F3A,00,0,0,1,1,3,T1S), // #201 [ref=2x] + V(660F3A,00,0,0,0,0,1,T1S), // #202 [ref=1x] + V(660F00,00,0,0,0,0,1,T1S), // #203 [ref=1x] + E(F30F38,00,0,0,0,0,2,QVM), // #204 [ref=6x] + E(F30F38,00,0,0,0,0,3,HVM), // #205 [ref=9x] + E(F30F38,00,0,0,0,0,1,OVM), // #206 [ref=3x] + V(660F38,00,0,0,0,0,2,QVM), // #207 [ref=4x] + V(660F38,00,0,0,0,0,1,OVM), // #208 [ref=2x] + E(660F00,00,1,0,0,0,4,FV ), // #209 [ref=1x] + E(660F00,00,1,0,0,1,4,FV ), // #210 [ref=1x] + V(F20F00,00,0,0,0,0,4,FVM), // #211 [ref=1x] + V(660F00,00,0,0,0,0,4,128), // #212 [ref=5x] + V(660F00,00,7,0,0,0,4,FVM), // #213 [ref=1x] + V(660F00,00,0,0,0,1,4,128), // #214 [ref=2x] + E(660F00,00,0,0,0,1,4,128), // #215 [ref=1x] + V(660F00,00,3,0,0,0,4,FVM), // #216 [ref=1x] + E(F30F38,00,0,0,0,0,4,FVM), // #217 [ref=1x] + E(F30F38,00,0,0,0,0,4,FV ), // #218 [ref=1x] + E(F30F38,00,0,0,0,1,4,FV ), // #219 [ref=1x] + E(F30F38,00,0,0,0,1,4,FVM), // #220 [ref=1x] + E(660F38,00,5,2,0,1,3,T1S), // #221 [ref=2x] + E(660F38,00,5,2,0,0,2,T1S), // #222 [ref=2x] + E(660F38,00,6,2,0,1,3,T1S), // #223 [ref=2x] + E(660F38,00,6,2,0,0,2,T1S), // #224 [ref=2x] + V(000F00,00,3,0,0,0,0,_ ), // #225 [ref=1x] + O(F30F00,00,2,0,0,0,0,_ ), // #226 [ref=1x] + O(F30F00,00,3,0,0,0,0,_ ), // #227 [ref=1x] + O(000F38,00,0,0,1,0,0,_ ), // #228 [ref=1x] + O(660F38,00,0,0,1,0,0,_ ), // #229 [ref=1x] + O(000F00,00,5,0,1,0,0,_ ), // #230 [ref=2x] + O(000F00,00,3,0,1,0,0,_ ), // #231 [ref=1x] + O(000F00,00,4,0,1,0,0,_ ), // #232 [ref=2x] + O(000F00,00,6,0,1,0,0,_ ) // #233 [ref=1x] }; // ---------------------------------------------------------------------------- // ${MainOpcodeTable:End} @@ -1839,7 +1895,7 @@ const uint32_t InstDB::_mainOpcodeTable[] = { // ${AltOpcodeTable:Begin} // ------------------- Automatically generated, do not edit ------------------- const uint32_t InstDB::_altOpcodeTable[] = { - 0 , // #0 [ref=1359x] + 0 , // #0 [ref=1403x] O(660F00,1B,_,_,_,_,_,_ ), // #1 [ref=1x] O(000F00,BA,4,_,x,_,_,_ ), // #2 [ref=1x] O(000F00,BA,7,_,x,_,_,_ ), // #3 [ref=1x] @@ -1975,10 +2031,10 @@ const uint32_t InstDB::_altOpcodeTable[] = { // ---------------------------------------------------------------------------- // ${AltOpcodeTable:End} -#undef O_FPU #undef O #undef V #undef E +#undef O_FPU // ============================================================================ // [asmjit::x86::InstDB - CommonInfoTableA] @@ -1991,397 +2047,413 @@ const uint32_t InstDB::_altOpcodeTable[] = { #define SINGLE_REG(VAL) InstDB::kSingleReg##VAL const InstDB::CommonInfo InstDB::_commonInfoTable[] = { { 0 , 0 , 0 , CONTROL(None) , SINGLE_REG(None), 0 }, // #0 [ref=1x] - { 0 , 339, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #1 [ref=4x] - { 0 , 340, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #2 [ref=2x] + { 0 , 347, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #1 [ref=4x] + { 0 , 348, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #2 [ref=2x] { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(None), 0 }, // #3 [ref=2x] - { 0 , 151, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #4 [ref=2x] + { 0 , 156, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #4 [ref=2x] { F(Vec) , 70 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #5 [ref=54x] { F(Vec) , 97 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #6 [ref=19x] - { F(Vec) , 222, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #7 [ref=16x] - { F(Vec) , 183, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #8 [ref=20x] + { F(Vec) , 230, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #7 [ref=16x] + { F(Vec) , 188, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #8 [ref=20x] { F(Lock)|F(XAcquire)|F(XRelease) , 28 , 11, CONTROL(None) , SINGLE_REG(RO) , 0 }, // #9 [ref=1x] - { F(Vex) , 237, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #10 [ref=3x] + { F(Vex) , 245, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #10 [ref=3x] { F(Vec) , 70 , 1 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #11 [ref=12x] - { 0 , 341, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #12 [ref=1x] - { F(Vex) , 239, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #13 [ref=5x] - { F(Vex) , 151, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #14 [ref=12x] - { F(Vec) , 342, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #15 [ref=4x] - { 0 , 241, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #16 [ref=3x] - { F(Mib) , 343, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #17 [ref=1x] - { 0 , 344, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #18 [ref=1x] - { 0 , 243, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #19 [ref=1x] - { F(Mib) , 345, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #20 [ref=1x] - { 0 , 245, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #21 [ref=1x] - { 0 , 150, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #22 [ref=35x] - { 0 , 346, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #23 [ref=3x] - { 0 , 114, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #24 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 114, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #25 [ref=3x] - { F(Rep)|F(RepIgnored) , 247, 2 , CONTROL(Call) , SINGLE_REG(None), 0 }, // #26 [ref=1x] - { 0 , 347, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #27 [ref=1x] - { 0 , 348, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #28 [ref=2x] - { 0 , 322, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #29 [ref=1x] - { 0 , 257, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #30 [ref=74x] - { 0 , 349, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #31 [ref=24x] - { 0 , 350, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #32 [ref=1x] - { 0 , 16 , 12, CONTROL(None) , SINGLE_REG(None), 0 }, // #33 [ref=1x] - { F(Rep) , 351, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #34 [ref=1x] - { F(Vec) , 352, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #35 [ref=2x] - { F(Vec) , 353, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #36 [ref=3x] - { F(Lock)|F(XAcquire)|F(XRelease) , 118, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #37 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 354, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #38 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 355, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #39 [ref=1x] - { 0 , 356, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #40 [ref=1x] - { 0 , 357, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #41 [ref=1x] - { 0 , 249, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #42 [ref=1x] - { F(Mmx)|F(Vec) , 358, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #43 [ref=2x] - { F(Mmx)|F(Vec) , 359, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #44 [ref=2x] - { F(Mmx)|F(Vec) , 360, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #45 [ref=2x] - { F(Vec) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #46 [ref=2x] - { F(Vec) , 362, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #47 [ref=2x] - { F(Vec) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #48 [ref=2x] - { 0 , 364, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #49 [ref=1x] - { 0 , 365, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #50 [ref=2x] - { F(Lock)|F(XAcquire)|F(XRelease) , 251, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #51 [ref=2x] - { 0 , 39 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #52 [ref=3x] - { F(Mmx) , 257, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #53 [ref=1x] - { 0 , 253, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #54 [ref=2x] - { 0 , 366, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #55 [ref=1x] - { F(Vec) , 367, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #56 [ref=2x] - { F(Vec) , 255, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #57 [ref=1x] - { F(FpuM32)|F(FpuM64) , 153, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #58 [ref=6x] - { 0 , 257, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #59 [ref=9x] - { F(FpuM80) , 368, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #60 [ref=2x] - { 0 , 258, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #61 [ref=13x] - { F(FpuM32)|F(FpuM64) , 259, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #62 [ref=2x] - { F(FpuM16)|F(FpuM32) , 369, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #63 [ref=9x] - { F(FpuM16)|F(FpuM32)|F(FpuM64) , 370, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #64 [ref=3x] - { F(FpuM32)|F(FpuM64)|F(FpuM80) , 371, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #65 [ref=2x] - { F(FpuM16) , 372, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #66 [ref=3x] - { F(FpuM16) , 373, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #67 [ref=2x] - { F(FpuM32)|F(FpuM64) , 260, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #68 [ref=1x] - { 0 , 374, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #69 [ref=2x] - { 0 , 39 , 10, CONTROL(None) , SINGLE_REG(None), 0 }, // #70 [ref=1x] - { 0 , 375, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #71 [ref=1x] - { F(Rep) , 376, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #72 [ref=1x] - { F(Vec) , 261, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #73 [ref=1x] - { 0 , 377, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #74 [ref=2x] - { 0 , 378, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #75 [ref=8x] - { 0 , 263, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #76 [ref=3x] - { 0 , 265, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #77 [ref=1x] - { 0 , 257, 1 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #78 [ref=3x] - { 0 , 379, 1 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #79 [ref=1x] - { F(Rep)|F(RepIgnored) , 267, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #80 [ref=30x] - { F(Rep)|F(RepIgnored) , 269, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #81 [ref=1x] - { F(Rep)|F(RepIgnored) , 271, 2 , CONTROL(Jump) , SINGLE_REG(None), 0 }, // #82 [ref=1x] - { F(Vec)|F(Vex) , 380, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #83 [ref=27x] - { F(Vec)|F(Vex) , 273, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #84 [ref=1x] - { F(Vec)|F(Vex) , 275, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #85 [ref=1x] - { F(Vec)|F(Vex) , 277, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #86 [ref=1x] - { F(Vec)|F(Vex) , 279, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #87 [ref=1x] - { F(Vec)|F(Vex) , 381, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #88 [ref=12x] - { F(Vec)|F(Vex) , 382, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #89 [ref=8x] - { 0 , 383, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #90 [ref=2x] - { 0 , 281, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #91 [ref=1x] - { F(Vec) , 192, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #92 [ref=2x] - { 0 , 384, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #93 [ref=2x] - { 0 , 283, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #94 [ref=2x] - { 0 , 385, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #95 [ref=1x] - { 0 , 156, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #96 [ref=3x] - { 0 , 386, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #97 [ref=5x] - { F(Vex) , 387, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #98 [ref=2x] - { F(Rep) , 388, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #99 [ref=1x] - { 0 , 269, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #100 [ref=3x] - { 0 , 285, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #101 [ref=1x] - { F(Vex) , 389, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #102 [ref=2x] - { F(Vec) , 390, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #103 [ref=1x] - { F(Mmx) , 391, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #104 [ref=1x] - { 0 , 392, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #105 [ref=2x] - { F(XRelease) , 0 , 16, CONTROL(None) , SINGLE_REG(None), 0 }, // #106 [ref=1x] - { F(Vec) , 70 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #107 [ref=6x] - { 0 , 64 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #108 [ref=1x] - { F(Mmx)|F(Vec) , 287, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #109 [ref=1x] - { 0 , 393, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #110 [ref=1x] - { 0 , 68 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #111 [ref=2x] - { F(Mmx)|F(Vec) , 394, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #112 [ref=1x] - { F(Vec) , 256, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #113 [ref=2x] - { F(Vec) , 198, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #114 [ref=4x] - { F(Vec) , 395, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #115 [ref=2x] - { F(Vec) , 71 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #116 [ref=3x] - { F(Mmx) , 396, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #117 [ref=1x] - { F(Vec) , 98 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #118 [ref=1x] - { F(Vec) , 201, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #119 [ref=1x] - { F(Mmx)|F(Vec) , 94 , 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #120 [ref=1x] - { F(Mmx)|F(Vec) , 397, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #121 [ref=1x] - { F(Rep) , 398, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #122 [ref=1x] - { F(Vec) , 97 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #123 [ref=1x] - { F(Vec) , 289, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #124 [ref=1x] - { 0 , 291, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #125 [ref=2x] - { 0 , 399, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #126 [ref=1x] - { F(Vex) , 293, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #127 [ref=1x] - { 0 , 400, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #128 [ref=1x] - { 0 , 401, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #129 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 252, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #130 [ref=2x] - { 0 , 295, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #131 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(RO) , 0 }, // #132 [ref=1x] - { 0 , 402, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #133 [ref=1x] - { F(Rep) , 403, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #134 [ref=1x] - { F(Mmx)|F(Vec) , 297, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #135 [ref=40x] - { F(Mmx)|F(Vec) , 299, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #136 [ref=1x] - { F(Mmx)|F(Vec) , 297, 2 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #137 [ref=6x] - { F(Mmx)|F(Vec) , 297, 2 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #138 [ref=16x] - { F(Mmx) , 297, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #139 [ref=26x] - { F(Vec) , 70 , 1 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #140 [ref=4x] - { F(Vec) , 404, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #141 [ref=1x] - { F(Vec) , 405, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #142 [ref=1x] - { F(Vec) , 406, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #143 [ref=1x] - { F(Vec) , 407, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #144 [ref=1x] - { F(Vec) , 408, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #145 [ref=1x] - { F(Vec) , 409, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #146 [ref=1x] - { F(Mmx)|F(Vec) , 301, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #147 [ref=1x] - { F(Vec) , 410, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #148 [ref=1x] - { F(Vec) , 411, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #149 [ref=1x] - { F(Vec) , 412, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #150 [ref=1x] - { F(Mmx)|F(Vec) , 413, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #151 [ref=1x] - { F(Mmx)|F(Vec) , 414, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #152 [ref=1x] - { F(Vec) , 225, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #153 [ref=2x] - { 0 , 122, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #154 [ref=1x] - { 0 , 379, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #155 [ref=6x] - { F(Mmx) , 299, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #156 [ref=1x] - { F(Mmx)|F(Vec) , 303, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #157 [ref=8x] - { F(Vec) , 415, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #158 [ref=2x] - { 0 , 126, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #159 [ref=1x] - { 0 , 416, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #160 [ref=8x] - { 0 , 417, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #161 [ref=4x] - { 0 , 418, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #162 [ref=6x] - { 0 , 305, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #163 [ref=1x] - { F(Rep)|F(RepIgnored) , 307, 2 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #164 [ref=1x] - { F(Vex) , 309, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #165 [ref=1x] - { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(WO) , 0 }, // #166 [ref=3x] - { F(Rep) , 419, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #167 [ref=1x] - { 0 , 420, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #168 [ref=30x] - { 0 , 159, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #169 [ref=2x] - { 0 , 421, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #170 [ref=3x] - { F(Rep) , 422, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #171 [ref=1x] - { 0 , 57 , 7 , CONTROL(None) , SINGLE_REG(None), 0 }, // #172 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512T4X)|F(Avx512KZ) , 423, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #173 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512T4X)|F(Avx512KZ) , 424, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #174 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #175 [ref=22x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #176 [ref=22x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #177 [ref=18x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #178 [ref=17x] - { F(Vec)|F(Vex) , 162, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #179 [ref=15x] - { F(Vec)|F(Vex)|F(Evex) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #180 [ref=5x] - { F(Vec)|F(Vex) , 70 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #181 [ref=17x] - { F(Vec)|F(Vex) , 183, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #182 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #183 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #184 [ref=4x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #185 [ref=10x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #186 [ref=12x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #187 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #188 [ref=6x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #189 [ref=13x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #190 [ref=16x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #191 [ref=19x] - { F(Vec)|F(Vex) , 165, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #192 [ref=6x] - { F(Vec)|F(Vex) , 311, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #193 [ref=3x] - { F(Vec)|F(Vex) , 427, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #194 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 428, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #195 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 429, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #196 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 430, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #197 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 431, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #198 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 428, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #199 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 432, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #200 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 168, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #201 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 168, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #202 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 433, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #203 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 434, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #204 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 97 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #205 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 222, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #206 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 171, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #207 [ref=6x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #208 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #209 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #210 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #211 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #212 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #213 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #214 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #215 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 180, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #216 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #217 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #218 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #219 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #220 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 435, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #221 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #222 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #223 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #224 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #225 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #226 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #227 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #228 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #229 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #230 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #231 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512SAE) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #232 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #233 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512SAE) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #234 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #235 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 435, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #236 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #237 [ref=3x] - { F(Vec)|F(Vex) , 165, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #238 [ref=9x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 74 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #239 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 74 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #240 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #241 [ref=9x] - { F(Vec)|F(Vex) , 181, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #242 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 436, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #243 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 182, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #244 [ref=4x] - { F(Vec)|F(Vex)|F(Evex) , 367, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #245 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #246 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #247 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 437, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #248 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #249 [ref=4x] - { F(Vec)|F(Vex) , 130, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #250 [ref=13x] - { F(Vec)|F(Vex) , 315, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #251 [ref=4x] - { F(Vec)|F(Vex) , 317, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #252 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512K_B64) , 439, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #253 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K_B32) , 439, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #254 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K) , 440, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #255 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K) , 441, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #256 [ref=1x] - { F(Vec)|F(Vex) , 177, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #257 [ref=7x] - { F(Vec)|F(Vex) , 97 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #258 [ref=1x] - { F(Vec)|F(Vex) , 222, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #259 [ref=1x] - { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 99 , 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #260 [ref=2x] - { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 104, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #261 [ref=2x] - { F(Vsib)|F(Evex)|F(Avx512K) , 442, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #262 [ref=4x] - { F(Vsib)|F(Evex)|F(Avx512K) , 443, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #263 [ref=4x] - { F(Vsib)|F(Evex)|F(Avx512K) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #264 [ref=8x] - { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 109, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #265 [ref=2x] - { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 134, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #266 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #267 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #268 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #269 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #270 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #271 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #272 [ref=22x] - { F(Vec)|F(Vex) , 319, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #273 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 319, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #274 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 445, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #275 [ref=4x] - { F(Vec)|F(Vex)|F(Evex) , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #276 [ref=1x] - { F(Vec)|F(Vex) , 192, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #277 [ref=1x] - { F(Vex) , 384, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #278 [ref=2x] - { F(Vec)|F(Vex) , 390, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #279 [ref=1x] - { F(Vec)|F(Vex) , 138, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #280 [ref=4x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #281 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #282 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #283 [ref=2x] - { 0 , 446, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #284 [ref=4x] - { 0 , 321, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #285 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 70 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #286 [ref=4x] - { F(Vec)|F(Vex)|F(Evex) , 323, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #287 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 186, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #288 [ref=1x] - { F(Vec)|F(Vex) , 70 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #289 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 70 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #290 [ref=6x] - { F(Vec)|F(Vex)|F(Evex) , 200, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #291 [ref=2x] - { F(Vec)|F(Vex)|F(Evex) , 325, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #292 [ref=4x] - { F(Vec)|F(Vex) , 447, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #293 [ref=3x] - { F(Vec)|F(Vex)|F(Evex) , 189, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #294 [ref=3x] - { F(Vec)|F(Vex)|F(Evex) , 192, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #295 [ref=1x] - { F(Vec)|F(Vex)|F(Evex) , 195, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #296 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 198, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #297 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #298 [ref=5x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 201, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #299 [ref=1x] - { 0 , 327, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #300 [ref=1x] - { 0 , 329, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #301 [ref=1x] - { F(Vec)|F(Vex) , 162, 2 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #302 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #303 [ref=2x] - { F(Vec)|F(Vex) , 162, 2 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #304 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #305 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #306 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #307 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 448, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #308 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 449, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #309 [ref=1x] - { F(Vec)|F(Evex) , 450, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #310 [ref=6x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 204, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #311 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 451, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #312 [ref=1x] - { F(Vec)|F(Vex)|F(Evex) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #313 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K) , 207, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #314 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512K_B32) , 207, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #315 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512K) , 210, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #316 [ref=4x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512K_B32) , 210, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #317 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512K_B64) , 210, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #318 [ref=2x] - { F(Vec)|F(Vex) , 404, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #319 [ref=1x] - { F(Vec)|F(Vex) , 405, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #320 [ref=1x] - { F(Vec)|F(Vex) , 406, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #321 [ref=1x] - { F(Vec)|F(Vex) , 407, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #322 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512K_B64) , 207, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #323 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #324 [ref=6x] - { F(Vec)|F(Vex) , 166, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #325 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 163, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #326 [ref=2x] - { F(Vec)|F(Vex) , 142, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #327 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 76 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #328 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 146, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #329 [ref=2x] - { F(Vec)|F(Vex)|F(Evex) , 408, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #330 [ref=1x] - { F(Vec)|F(Vex)|F(Evex) , 409, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #331 [ref=1x] - { F(Vec)|F(Vex)|F(Evex) , 452, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #332 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 453, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #333 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 454, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #334 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 455, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #335 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 456, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #336 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #337 [ref=4x] - { F(Vec)|F(Vex) , 311, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #338 [ref=12x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #339 [ref=8x] - { F(Vec)|F(Evex) , 457, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #340 [ref=4x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 213, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #341 [ref=6x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 216, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #342 [ref=9x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 219, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #343 [ref=3x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 222, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #344 [ref=4x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 225, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #345 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #346 [ref=6x] - { F(Vec)|F(Vex) , 130, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #347 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #348 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #349 [ref=3x] - { F(Vec)|F(Vex) , 331, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #350 [ref=4x] - { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 228, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #351 [ref=3x] - { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 333, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #352 [ref=2x] - { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 231, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #353 [ref=2x] - { F(Vec)|F(Vex) , 335, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #354 [ref=8x] - { F(Vec)|F(Evex)|F(Avx512K) , 234, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #355 [ref=5x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #356 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #357 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 82 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #358 [ref=3x] - { F(Vec)|F(Vex)|F(Evex) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #359 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 82 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #360 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 82 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #361 [ref=3x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 88 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #362 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #363 [ref=6x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #364 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #365 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512K_B32) , 234, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #366 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512K_B64) , 234, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #367 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #368 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #369 [ref=2x] - { F(Vec)|F(Vex) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #370 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 437, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #371 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ) , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #372 [ref=1x] - { F(Vec)|F(Vex) , 183, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #373 [ref=2x] - { F(Vec)|F(Vex) , 437, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #374 [ref=1x] - { F(Vec)|F(Vex) , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #375 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #376 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #377 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #378 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #379 [ref=1x] - { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 337, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #380 [ref=1x] - { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 166, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #381 [ref=2x] - { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 166, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #382 [ref=2x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #383 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #384 [ref=1x] - { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #385 [ref=1x] - { F(Vec)|F(Vex) , 257, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #386 [ref=2x] - { F(Lock)|F(XAcquire)|F(XRelease) , 49 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #387 [ref=1x] - { 0 , 458, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #388 [ref=1x] - { F(Lock)|F(XAcquire) , 49 , 8 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #389 [ref=1x] - { 0 , 459, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #390 [ref=6x] - { 0 , 460, 1 , CONTROL(None) , SINGLE_REG(None), 0 } // #391 [ref=6x] + { 0 , 349, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #12 [ref=1x] + { F(Vex) , 247, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #13 [ref=5x] + { F(Vex) , 156, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #14 [ref=12x] + { F(Vec) , 350, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #15 [ref=4x] + { 0 , 249, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #16 [ref=3x] + { F(Mib) , 351, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #17 [ref=1x] + { 0 , 352, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #18 [ref=1x] + { 0 , 251, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #19 [ref=1x] + { F(Mib) , 353, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #20 [ref=1x] + { 0 , 253, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #21 [ref=1x] + { 0 , 155, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #22 [ref=35x] + { 0 , 354, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #23 [ref=3x] + { 0 , 119, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #24 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 119, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #25 [ref=3x] + { F(Rep)|F(RepIgnored) , 255, 2 , CONTROL(Call) , SINGLE_REG(None), 0 }, // #26 [ref=1x] + { 0 , 355, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #27 [ref=1x] + { 0 , 356, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #28 [ref=2x] + { 0 , 330, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #29 [ref=1x] + { 0 , 99 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #30 [ref=83x] + { 0 , 357, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #31 [ref=24x] + { 0 , 358, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #32 [ref=6x] + { 0 , 359, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #33 [ref=1x] + { 0 , 16 , 12, CONTROL(None) , SINGLE_REG(None), 0 }, // #34 [ref=1x] + { F(Rep) , 360, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #35 [ref=1x] + { F(Vec) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #36 [ref=2x] + { F(Vec) , 362, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #37 [ref=3x] + { F(Lock)|F(XAcquire)|F(XRelease) , 123, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #38 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #39 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 364, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #40 [ref=1x] + { 0 , 365, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #41 [ref=1x] + { 0 , 366, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #42 [ref=1x] + { 0 , 257, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #43 [ref=1x] + { F(Mmx)|F(Vec) , 367, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #44 [ref=2x] + { F(Mmx)|F(Vec) , 368, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #45 [ref=2x] + { F(Mmx)|F(Vec) , 369, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #46 [ref=2x] + { F(Vec) , 370, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #47 [ref=2x] + { F(Vec) , 371, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #48 [ref=2x] + { F(Vec) , 372, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #49 [ref=2x] + { 0 , 373, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #50 [ref=1x] + { 0 , 374, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #51 [ref=2x] + { F(Lock)|F(XAcquire)|F(XRelease) , 259, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #52 [ref=2x] + { 0 , 39 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #53 [ref=3x] + { F(Mmx) , 99 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #54 [ref=1x] + { 0 , 261, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #55 [ref=2x] + { 0 , 375, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #56 [ref=1x] + { F(Vec) , 376, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #57 [ref=2x] + { F(Vec) , 263, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #58 [ref=1x] + { F(FpuM32)|F(FpuM64) , 158, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #59 [ref=6x] + { 0 , 265, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #60 [ref=9x] + { F(FpuM80) , 377, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #61 [ref=2x] + { 0 , 266, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #62 [ref=13x] + { F(FpuM32)|F(FpuM64) , 267, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #63 [ref=2x] + { F(FpuM16)|F(FpuM32) , 378, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #64 [ref=9x] + { F(FpuM16)|F(FpuM32)|F(FpuM64) , 379, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #65 [ref=3x] + { F(FpuM32)|F(FpuM64)|F(FpuM80) , 380, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #66 [ref=2x] + { F(FpuM16) , 381, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #67 [ref=3x] + { F(FpuM16) , 382, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #68 [ref=2x] + { F(FpuM32)|F(FpuM64) , 268, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #69 [ref=1x] + { 0 , 383, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #70 [ref=2x] + { 0 , 39 , 10, CONTROL(None) , SINGLE_REG(None), 0 }, // #71 [ref=1x] + { 0 , 384, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #72 [ref=1x] + { 0 , 385, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #73 [ref=2x] + { 0 , 314, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #74 [ref=2x] + { F(Rep) , 386, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #75 [ref=1x] + { F(Vec) , 269, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #76 [ref=1x] + { 0 , 387, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #77 [ref=2x] + { 0 , 388, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #78 [ref=8x] + { 0 , 271, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #79 [ref=3x] + { 0 , 273, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #80 [ref=1x] + { 0 , 99 , 1 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #81 [ref=3x] + { 0 , 389, 1 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #82 [ref=1x] + { F(Rep)|F(RepIgnored) , 275, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #83 [ref=30x] + { F(Rep)|F(RepIgnored) , 277, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #84 [ref=1x] + { F(Rep)|F(RepIgnored) , 279, 2 , CONTROL(Jump) , SINGLE_REG(None), 0 }, // #85 [ref=1x] + { F(Vec)|F(Vex) , 390, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #86 [ref=27x] + { F(Vec)|F(Vex) , 281, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #87 [ref=1x] + { F(Vec)|F(Vex) , 283, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #88 [ref=1x] + { F(Vec)|F(Vex) , 285, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #89 [ref=1x] + { F(Vec)|F(Vex) , 287, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #90 [ref=1x] + { F(Vec)|F(Vex) , 391, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #91 [ref=12x] + { F(Vec)|F(Vex) , 392, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #92 [ref=8x] + { 0 , 393, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #93 [ref=2x] + { 0 , 289, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #94 [ref=1x] + { F(Vec) , 197, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #95 [ref=2x] + { 0 , 394, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #96 [ref=2x] + { 0 , 291, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #97 [ref=2x] + { F(Vex) , 395, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #98 [ref=2x] + { 0 , 396, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #99 [ref=1x] + { 0 , 161, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #100 [ref=3x] + { 0 , 397, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #101 [ref=5x] + { F(Vex) , 398, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #102 [ref=2x] + { F(Rep) , 399, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #103 [ref=1x] + { 0 , 277, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #104 [ref=3x] + { 0 , 293, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #105 [ref=1x] + { F(Vex) , 400, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #106 [ref=2x] + { F(Vec) , 401, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #107 [ref=1x] + { F(Mmx) , 402, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #108 [ref=1x] + { 0 , 403, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #109 [ref=2x] + { F(XRelease) , 0 , 16, CONTROL(None) , SINGLE_REG(None), 0 }, // #110 [ref=1x] + { F(Vec) , 70 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #111 [ref=6x] + { 0 , 64 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #112 [ref=1x] + { F(Mmx)|F(Vec) , 295, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #113 [ref=1x] + { 0 , 404, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #114 [ref=1x] + { 0 , 68 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #115 [ref=2x] + { F(Mmx)|F(Vec) , 405, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #116 [ref=1x] + { F(Vec) , 264, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #117 [ref=2x] + { F(Vec) , 203, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #118 [ref=4x] + { F(Vec) , 406, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #119 [ref=2x] + { F(Vec) , 71 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #120 [ref=3x] + { F(Mmx) , 407, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #121 [ref=1x] + { F(Vec) , 98 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #122 [ref=1x] + { F(Vec) , 206, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #123 [ref=1x] + { F(Mmx)|F(Vec) , 94 , 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #124 [ref=1x] + { F(Mmx)|F(Vec) , 408, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #125 [ref=1x] + { F(Rep) , 409, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #126 [ref=1x] + { F(Vec) , 97 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #127 [ref=1x] + { F(Vec) , 297, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #128 [ref=1x] + { 0 , 299, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #129 [ref=2x] + { 0 , 301, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #130 [ref=1x] + { F(Vex) , 303, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #131 [ref=1x] + { 0 , 410, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #132 [ref=1x] + { 0 , 411, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #133 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 260, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #134 [ref=2x] + { 0 , 99 , 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #135 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(RO) , 0 }, // #136 [ref=1x] + { 0 , 412, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #137 [ref=1x] + { F(Rep) , 413, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #138 [ref=1x] + { F(Mmx)|F(Vec) , 305, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #139 [ref=40x] + { F(Mmx)|F(Vec) , 307, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #140 [ref=1x] + { F(Mmx)|F(Vec) , 305, 2 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #141 [ref=6x] + { F(Mmx)|F(Vec) , 305, 2 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #142 [ref=16x] + { F(Mmx) , 305, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #143 [ref=26x] + { F(Vec) , 70 , 1 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #144 [ref=4x] + { F(Vec) , 414, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #145 [ref=1x] + { F(Vec) , 415, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #146 [ref=1x] + { F(Vec) , 416, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #147 [ref=1x] + { F(Vec) , 417, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #148 [ref=1x] + { F(Vec) , 418, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #149 [ref=1x] + { F(Vec) , 419, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #150 [ref=1x] + { F(Mmx)|F(Vec) , 309, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #151 [ref=1x] + { F(Vec) , 420, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #152 [ref=1x] + { F(Vec) , 421, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #153 [ref=1x] + { F(Vec) , 422, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #154 [ref=1x] + { F(Mmx)|F(Vec) , 423, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #155 [ref=1x] + { F(Mmx)|F(Vec) , 424, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #156 [ref=1x] + { F(Vec) , 233, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #157 [ref=2x] + { 0 , 127, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #158 [ref=1x] + { 0 , 389, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #159 [ref=9x] + { F(Mmx) , 307, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #160 [ref=1x] + { F(Mmx)|F(Vec) , 311, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #161 [ref=8x] + { F(Vec) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #162 [ref=2x] + { 0 , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #163 [ref=1x] + { 0 , 131, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #164 [ref=1x] + { 0 , 427, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #165 [ref=8x] + { 0 , 428, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #166 [ref=4x] + { 0 , 429, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #167 [ref=8x] + { 0 , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #168 [ref=1x] + { F(Rep)|F(RepIgnored) , 315, 2 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #169 [ref=1x] + { F(Vex) , 317, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #170 [ref=1x] + { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(WO) , 0 }, // #171 [ref=3x] + { F(Rep) , 430, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #172 [ref=1x] + { 0 , 431, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #173 [ref=30x] + { 0 , 164, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #174 [ref=2x] + { 0 , 432, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #175 [ref=3x] + { F(Rep) , 433, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #176 [ref=1x] + { F(Vex) , 434, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #177 [ref=5x] + { 0 , 57 , 7 , CONTROL(None) , SINGLE_REG(None), 0 }, // #178 [ref=1x] + { F(Tsib)|F(Vex) , 435, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #179 [ref=2x] + { F(Vex) , 389, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #180 [ref=1x] + { F(Tsib)|F(Vex) , 436, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #181 [ref=1x] + { F(Vex) , 437, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #182 [ref=1x] + { 0 , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #183 [ref=2x] + { 0 , 439, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #184 [ref=2x] + { 0 , 440, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #185 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512T4X)|F(Avx512KZ) , 441, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #186 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512T4X)|F(Avx512KZ) , 442, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #187 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #188 [ref=22x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #189 [ref=22x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE) , 443, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #190 [ref=18x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #191 [ref=17x] + { F(Vec)|F(Vex) , 167, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #192 [ref=15x] + { F(Vec)|F(Vex)|F(Evex) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #193 [ref=5x] + { F(Vec)|F(Vex) , 70 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #194 [ref=17x] + { F(Vec)|F(Vex) , 188, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #195 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 170, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #196 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 170, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #197 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #198 [ref=10x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #199 [ref=12x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 167, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #200 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 167, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #201 [ref=6x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #202 [ref=13x] + { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #203 [ref=16x] + { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #204 [ref=19x] + { F(Vec)|F(Vex) , 170, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #205 [ref=6x] + { F(Vec)|F(Vex) , 319, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #206 [ref=3x] + { F(Vec)|F(Vex) , 445, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #207 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 446, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #208 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 447, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #209 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 448, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #210 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 449, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #211 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 446, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #212 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 450, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #213 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 173, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #214 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 173, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #215 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 451, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #216 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 452, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #217 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 97 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #218 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 230, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #219 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 176, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #220 [ref=6x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 179, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #221 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #222 [ref=3x] + { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 321, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #223 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 321, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #224 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #225 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 321, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #226 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 179, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #227 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 179, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #228 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 185, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #229 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 179, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #230 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #231 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 370, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #232 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 370, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #233 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 453, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #234 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #235 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 372, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #236 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 372, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #237 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 321, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #238 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #239 [ref=3x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 321, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #240 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #241 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 179, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #242 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #243 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 370, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #244 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512SAE) , 370, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #245 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 372, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #246 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512SAE) , 372, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #247 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 179, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #248 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 453, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #249 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 170, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #250 [ref=3x] + { F(Vec)|F(Vex) , 170, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #251 [ref=9x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 74 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #252 [ref=3x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 74 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #253 [ref=3x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #254 [ref=9x] + { F(Vec)|F(Vex) , 186, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #255 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 454, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #256 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 187, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #257 [ref=4x] + { F(Vec)|F(Vex)|F(Evex) , 376, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #258 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 170, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #259 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 170, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #260 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 455, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #261 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 456, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #262 [ref=4x] + { F(Vec)|F(Vex) , 135, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #263 [ref=13x] + { F(Vec)|F(Vex) , 323, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #264 [ref=4x] + { F(Vec)|F(Vex) , 325, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #265 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512K_B64) , 457, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #266 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512K_B32) , 457, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #267 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512K) , 458, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #268 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512K) , 459, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #269 [ref=1x] + { F(Vec)|F(Vex) , 182, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #270 [ref=7x] + { F(Vec)|F(Vex) , 97 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #271 [ref=1x] + { F(Vec)|F(Vex) , 230, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #272 [ref=1x] + { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 104, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #273 [ref=2x] + { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 109, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #274 [ref=2x] + { F(Vsib)|F(Evex)|F(Avx512K) , 460, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #275 [ref=4x] + { F(Vsib)|F(Evex)|F(Avx512K) , 461, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #276 [ref=4x] + { F(Vsib)|F(Evex)|F(Avx512K) , 462, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #277 [ref=8x] + { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 114, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #278 [ref=2x] + { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 139, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #279 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 443, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #280 [ref=3x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #281 [ref=3x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 188, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #282 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 188, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #283 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 170, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #284 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #285 [ref=22x] + { F(Vec)|F(Vex) , 327, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #286 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 327, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #287 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 463, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #288 [ref=4x] + { F(Vec)|F(Vex)|F(Evex) , 456, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #289 [ref=1x] + { F(Vec)|F(Vex) , 197, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #290 [ref=1x] + { F(Vex) , 394, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #291 [ref=2x] + { F(Vec)|F(Vex) , 401, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #292 [ref=1x] + { F(Vec)|F(Vex) , 143, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #293 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #294 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #295 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 443, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #296 [ref=2x] + { 0 , 329, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #297 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 70 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #298 [ref=4x] + { F(Vec)|F(Vex)|F(Evex) , 331, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #299 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 191, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #300 [ref=1x] + { F(Vec)|F(Vex) , 70 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #301 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 70 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #302 [ref=6x] + { F(Vec)|F(Vex)|F(Evex) , 205, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #303 [ref=2x] + { F(Vec)|F(Vex)|F(Evex) , 333, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #304 [ref=4x] + { F(Vec)|F(Vex) , 464, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #305 [ref=3x] + { F(Vec)|F(Vex)|F(Evex) , 194, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #306 [ref=3x] + { F(Vec)|F(Vex)|F(Evex) , 197, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #307 [ref=1x] + { F(Vec)|F(Vex)|F(Evex) , 200, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #308 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 203, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #309 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #310 [ref=5x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 206, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #311 [ref=1x] + { 0 , 335, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #312 [ref=1x] + { 0 , 337, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #313 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512B32) , 209, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #314 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512B64) , 209, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #315 [ref=1x] + { F(Vec)|F(Vex) , 167, 2 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #316 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 167, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #317 [ref=2x] + { F(Vec)|F(Vex) , 167, 2 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #318 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 167, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #319 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 167, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #320 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 167, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #321 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 465, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #322 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 466, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #323 [ref=1x] + { F(Vec)|F(Evex) , 467, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #324 [ref=6x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 212, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #325 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 468, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #326 [ref=1x] + { F(Vec)|F(Vex)|F(Evex) , 170, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #327 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512K) , 215, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #328 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512K_B32) , 215, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #329 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512K) , 218, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #330 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512K_B32) , 218, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #331 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512K_B64) , 218, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #332 [ref=2x] + { F(Vec)|F(Vex) , 414, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #333 [ref=1x] + { F(Vec)|F(Vex) , 415, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #334 [ref=1x] + { F(Vec)|F(Vex) , 416, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #335 [ref=1x] + { F(Vec)|F(Vex) , 417, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #336 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512K_B64) , 215, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #337 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #338 [ref=6x] + { F(Vec)|F(Vex) , 171, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #339 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 168, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #340 [ref=2x] + { F(Vec)|F(Vex) , 147, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #341 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 76 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #342 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 151, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #343 [ref=2x] + { F(Vec)|F(Vex)|F(Evex) , 418, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #344 [ref=1x] + { F(Vec)|F(Vex)|F(Evex) , 419, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #345 [ref=1x] + { F(Vec)|F(Vex)|F(Evex) , 469, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #346 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 470, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #347 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 471, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #348 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 472, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #349 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 473, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #350 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #351 [ref=4x] + { F(Vec)|F(Vex) , 319, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #352 [ref=12x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 167, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #353 [ref=8x] + { F(Vec)|F(Evex) , 474, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #354 [ref=4x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 221, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #355 [ref=6x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 224, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #356 [ref=9x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 227, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #357 [ref=3x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 230, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #358 [ref=4x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 233, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #359 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 179, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #360 [ref=6x] + { F(Vec)|F(Vex) , 135, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #361 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 188, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #362 [ref=3x] + { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 188, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #363 [ref=3x] + { F(Vec)|F(Vex) , 339, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #364 [ref=4x] + { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 236, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #365 [ref=3x] + { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 341, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #366 [ref=2x] + { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 239, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #367 [ref=2x] + { F(Vec)|F(Vex) , 343, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #368 [ref=8x] + { F(Vec)|F(Evex)|F(Avx512K) , 242, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #369 [ref=5x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 188, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #370 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 188, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #371 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 82 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #372 [ref=3x] + { F(Vec)|F(Vex)|F(Evex) , 188, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #373 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 82 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #374 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 82 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #375 [ref=3x] + { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 88 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #376 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 167, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #377 [ref=6x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 167, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #378 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 167, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #379 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512K_B32) , 242, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #380 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512K_B64) , 242, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #381 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 443, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #382 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #383 [ref=2x] + { F(Vec)|F(Vex) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #384 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 455, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #385 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ) , 456, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #386 [ref=1x] + { F(Vec)|F(Vex) , 188, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #387 [ref=2x] + { F(Vec)|F(Vex) , 455, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #388 [ref=1x] + { F(Vec)|F(Vex) , 456, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #389 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #390 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 167, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #391 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE) , 443, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #392 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #393 [ref=1x] + { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 345, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #394 [ref=1x] + { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 171, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #395 [ref=2x] + { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 171, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #396 [ref=2x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 170, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #397 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 170, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #398 [ref=1x] + { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 182, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #399 [ref=1x] + { F(Vec)|F(Vex) , 99 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #400 [ref=2x] + { 0 , 23 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #401 [ref=2x] + { 0 , 52 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #402 [ref=2x] + { F(Lock)|F(XAcquire)|F(XRelease) , 49 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #403 [ref=1x] + { 0 , 475, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #404 [ref=1x] + { F(Lock)|F(XAcquire) , 49 , 8 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #405 [ref=1x] + { 0 , 476, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #406 [ref=6x] + { 0 , 477, 1 , CONTROL(None) , SINGLE_REG(None), 0 } // #407 [ref=6x] }; #undef SINGLE_REG #undef CONTROL @@ -2397,7 +2469,7 @@ const InstDB::CommonInfo InstDB::_commonInfoTable[] = { // ------------------- Automatically generated, do not edit ------------------- #define EXT(VAL) uint32_t(Features::k##VAL) const InstDB::CommonInfoTableB InstDB::_commonInfoTableB[] = { - { { 0 }, 0, 0 }, // #0 [ref=144x] + { { 0 }, 0, 0 }, // #0 [ref=146x] { { 0 }, 1, 0 }, // #1 [ref=32x] { { 0 }, 2, 0 }, // #2 [ref=2x] { { EXT(ADX) }, 3, 0 }, // #3 [ref=1x] @@ -2421,139 +2493,156 @@ const InstDB::CommonInfoTableB InstDB::_commonInfoTableB[] = { { { EXT(CLFLUSHOPT) }, 0, 0 }, // #21 [ref=1x] { { EXT(SVM) }, 0, 0 }, // #22 [ref=6x] { { 0 }, 10, 0 }, // #23 [ref=2x] - { { EXT(CLWB) }, 0, 0 }, // #24 [ref=1x] - { { EXT(CLZERO) }, 0, 0 }, // #25 [ref=1x] - { { 0 }, 3, 0 }, // #26 [ref=1x] - { { EXT(CMOV) }, 11, 0 }, // #27 [ref=6x] - { { EXT(CMOV) }, 12, 0 }, // #28 [ref=8x] - { { EXT(CMOV) }, 13, 0 }, // #29 [ref=6x] - { { EXT(CMOV) }, 14, 0 }, // #30 [ref=4x] - { { EXT(CMOV) }, 15, 0 }, // #31 [ref=4x] - { { EXT(CMOV) }, 16, 0 }, // #32 [ref=2x] - { { EXT(CMOV) }, 17, 0 }, // #33 [ref=6x] - { { EXT(CMOV) }, 18, 0 }, // #34 [ref=2x] - { { 0 }, 19, 0 }, // #35 [ref=2x] - { { EXT(I486) }, 1, 0 }, // #36 [ref=2x] - { { EXT(CMPXCHG16B) }, 5, 0 }, // #37 [ref=1x] - { { EXT(CMPXCHG8B) }, 5, 0 }, // #38 [ref=1x] - { { EXT(SSE2) }, 1, 0 }, // #39 [ref=2x] - { { EXT(SSE) }, 1, 0 }, // #40 [ref=2x] - { { EXT(I486) }, 0, 0 }, // #41 [ref=4x] - { { EXT(SSE4_2) }, 0, 0 }, // #42 [ref=2x] - { { 0 }, 20, 0 }, // #43 [ref=2x] - { { EXT(MMX) }, 0, 0 }, // #44 [ref=1x] - { { EXT(ENQCMD) }, 0, 0 }, // #45 [ref=2x] - { { EXT(SSE4A) }, 0, 0 }, // #46 [ref=4x] - { { 0 }, 21, 0 }, // #47 [ref=4x] - { { EXT(3DNOW) }, 0, 0 }, // #48 [ref=21x] - { { EXT(FXSR) }, 0, 0 }, // #49 [ref=4x] - { { EXT(SMX) }, 0, 0 }, // #50 [ref=1x] - { { EXT(GFNI) }, 0, 0 }, // #51 [ref=3x] - { { 0 }, 16, 0 }, // #52 [ref=5x] - { { EXT(VMX) }, 0, 0 }, // #53 [ref=12x] - { { 0 }, 11, 0 }, // #54 [ref=8x] - { { 0 }, 12, 0 }, // #55 [ref=12x] - { { 0 }, 13, 0 }, // #56 [ref=10x] - { { 0 }, 14, 0 }, // #57 [ref=8x] - { { 0 }, 15, 0 }, // #58 [ref=8x] - { { 0 }, 17, 0 }, // #59 [ref=8x] - { { 0 }, 18, 0 }, // #60 [ref=4x] - { { EXT(AVX512_DQ) }, 0, 0 }, // #61 [ref=23x] - { { EXT(AVX512_BW) }, 0, 0 }, // #62 [ref=22x] - { { EXT(AVX512_F) }, 0, 0 }, // #63 [ref=37x] - { { EXT(AVX512_DQ) }, 1, 0 }, // #64 [ref=3x] - { { EXT(AVX512_BW) }, 1, 0 }, // #65 [ref=4x] - { { EXT(AVX512_F) }, 1, 0 }, // #66 [ref=1x] - { { EXT(LAHFSAHF) }, 22, 0 }, // #67 [ref=1x] - { { EXT(LWP) }, 0, 0 }, // #68 [ref=4x] - { { 0 }, 23, 0 }, // #69 [ref=3x] - { { EXT(LZCNT) }, 1, 0 }, // #70 [ref=1x] - { { EXT(MMX2) }, 0, 0 }, // #71 [ref=8x] - { { EXT(MONITOR) }, 0, 0 }, // #72 [ref=2x] - { { EXT(MONITORX) }, 0, 0 }, // #73 [ref=2x] - { { EXT(MOVBE) }, 0, 0 }, // #74 [ref=1x] - { { EXT(MMX), EXT(SSE2) }, 0, 0 }, // #75 [ref=46x] - { { EXT(MOVDIR64B) }, 0, 0 }, // #76 [ref=1x] - { { EXT(MOVDIRI) }, 0, 0 }, // #77 [ref=1x] - { { EXT(BMI2) }, 0, 0 }, // #78 [ref=7x] - { { EXT(SSSE3) }, 0, 0 }, // #79 [ref=15x] - { { EXT(MMX2), EXT(SSE2) }, 0, 0 }, // #80 [ref=10x] - { { EXT(PCLMULQDQ) }, 0, 0 }, // #81 [ref=1x] - { { EXT(SSE4_2) }, 1, 0 }, // #82 [ref=4x] - { { EXT(PCOMMIT) }, 0, 0 }, // #83 [ref=1x] - { { EXT(MMX2), EXT(SSE2), EXT(SSE4_1) }, 0, 0 }, // #84 [ref=1x] - { { EXT(3DNOW2) }, 0, 0 }, // #85 [ref=5x] - { { EXT(GEODE) }, 0, 0 }, // #86 [ref=2x] - { { EXT(POPCNT) }, 1, 0 }, // #87 [ref=1x] - { { 0 }, 24, 0 }, // #88 [ref=3x] - { { EXT(PREFETCHW) }, 1, 0 }, // #89 [ref=1x] - { { EXT(PREFETCHWT1) }, 1, 0 }, // #90 [ref=1x] - { { EXT(SSE4_1) }, 1, 0 }, // #91 [ref=1x] - { { 0 }, 25, 0 }, // #92 [ref=3x] - { { 0 }, 26, 0 }, // #93 [ref=2x] - { { EXT(FSGSBASE) }, 0, 0 }, // #94 [ref=4x] - { { EXT(MSR) }, 0, 0 }, // #95 [ref=2x] - { { EXT(RDPID) }, 0, 0 }, // #96 [ref=1x] - { { EXT(RDRAND) }, 1, 0 }, // #97 [ref=1x] - { { EXT(RDSEED) }, 1, 0 }, // #98 [ref=1x] - { { EXT(RDTSC) }, 0, 0 }, // #99 [ref=1x] - { { EXT(RDTSCP) }, 0, 0 }, // #100 [ref=1x] - { { 0 }, 27, 0 }, // #101 [ref=2x] - { { EXT(LAHFSAHF) }, 28, 0 }, // #102 [ref=1x] - { { EXT(SHA) }, 0, 0 }, // #103 [ref=7x] - { { EXT(SKINIT) }, 0, 0 }, // #104 [ref=2x] - { { EXT(AVX512_4FMAPS) }, 0, 0 }, // #105 [ref=4x] - { { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #106 [ref=46x] - { { EXT(AVX), EXT(AVX512_F) }, 0, 0 }, // #107 [ref=32x] - { { EXT(AVX) }, 0, 0 }, // #108 [ref=37x] - { { EXT(AESNI), EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(VAES) }, 0, 0 }, // #109 [ref=4x] - { { EXT(AESNI), EXT(AVX) }, 0, 0 }, // #110 [ref=2x] - { { EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #111 [ref=112x] - { { EXT(AVX), EXT(AVX512_DQ), EXT(AVX512_VL) }, 0, 0 }, // #112 [ref=8x] - { { EXT(AVX512_BW), EXT(AVX512_VL) }, 0, 0 }, // #113 [ref=26x] - { { EXT(AVX512_DQ), EXT(AVX512_VL) }, 0, 0 }, // #114 [ref=30x] - { { EXT(AVX2) }, 0, 0 }, // #115 [ref=7x] - { { EXT(AVX), EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #116 [ref=39x] - { { EXT(AVX), EXT(AVX512_F) }, 1, 0 }, // #117 [ref=4x] - { { EXT(AVX512_BF16), EXT(AVX512_VL) }, 0, 0 }, // #118 [ref=3x] - { { EXT(AVX512_F), EXT(AVX512_VL), EXT(F16C) }, 0, 0 }, // #119 [ref=2x] - { { EXT(AVX512_ERI) }, 0, 0 }, // #120 [ref=10x] - { { EXT(AVX512_F), EXT(AVX512_VL), EXT(FMA) }, 0, 0 }, // #121 [ref=36x] - { { EXT(AVX512_F), EXT(FMA) }, 0, 0 }, // #122 [ref=24x] - { { EXT(FMA4) }, 0, 0 }, // #123 [ref=20x] - { { EXT(XOP) }, 0, 0 }, // #124 [ref=55x] - { { EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #125 [ref=19x] - { { EXT(AVX512_PFI) }, 0, 0 }, // #126 [ref=16x] - { { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(GFNI) }, 0, 0 }, // #127 [ref=3x] - { { EXT(AVX), EXT(AVX2) }, 0, 0 }, // #128 [ref=17x] - { { EXT(AVX512_4VNNIW) }, 0, 0 }, // #129 [ref=2x] - { { EXT(AVX), EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) }, 0, 0 }, // #130 [ref=54x] - { { EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) }, 0, 0 }, // #131 [ref=2x] - { { EXT(AVX512_CDI), EXT(AVX512_VL) }, 0, 0 }, // #132 [ref=6x] - { { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(PCLMULQDQ), EXT(VPCLMULQDQ) }, 0, 0 }, // #133 [ref=1x] - { { EXT(AVX) }, 1, 0 }, // #134 [ref=7x] - { { EXT(AVX512_VBMI2), EXT(AVX512_VL) }, 0, 0 }, // #135 [ref=16x] - { { EXT(AVX512_VL), EXT(AVX512_VNNI) }, 0, 0 }, // #136 [ref=4x] - { { EXT(AVX512_VBMI), EXT(AVX512_VL) }, 0, 0 }, // #137 [ref=4x] - { { EXT(AVX), EXT(AVX512_BW) }, 0, 0 }, // #138 [ref=4x] - { { EXT(AVX), EXT(AVX512_DQ) }, 0, 0 }, // #139 [ref=4x] - { { EXT(AVX512_IFMA), EXT(AVX512_VL) }, 0, 0 }, // #140 [ref=2x] - { { EXT(AVX512_BITALG), EXT(AVX512_VL) }, 0, 0 }, // #141 [ref=3x] - { { EXT(AVX512_VL), EXT(AVX512_VPOPCNTDQ) }, 0, 0 }, // #142 [ref=2x] - { { EXT(WBNOINVD) }, 0, 0 }, // #143 [ref=1x] - { { EXT(RTM) }, 0, 0 }, // #144 [ref=3x] - { { EXT(XSAVE) }, 0, 0 }, // #145 [ref=6x] - { { EXT(XSAVES) }, 0, 0 }, // #146 [ref=4x] - { { EXT(XSAVEC) }, 0, 0 }, // #147 [ref=2x] - { { EXT(XSAVEOPT) }, 0, 0 }, // #148 [ref=2x] - { { EXT(TSX) }, 1, 0 } // #149 [ref=1x] + { { EXT(CET_SS) }, 1, 0 }, // #24 [ref=3x] + { { EXT(CLWB) }, 0, 0 }, // #25 [ref=1x] + { { EXT(CLZERO) }, 0, 0 }, // #26 [ref=1x] + { { 0 }, 3, 0 }, // #27 [ref=1x] + { { EXT(CMOV) }, 11, 0 }, // #28 [ref=6x] + { { EXT(CMOV) }, 12, 0 }, // #29 [ref=8x] + { { EXT(CMOV) }, 13, 0 }, // #30 [ref=6x] + { { EXT(CMOV) }, 14, 0 }, // #31 [ref=4x] + { { EXT(CMOV) }, 15, 0 }, // #32 [ref=4x] + { { EXT(CMOV) }, 16, 0 }, // #33 [ref=2x] + { { EXT(CMOV) }, 17, 0 }, // #34 [ref=6x] + { { EXT(CMOV) }, 18, 0 }, // #35 [ref=2x] + { { 0 }, 19, 0 }, // #36 [ref=2x] + { { EXT(I486) }, 1, 0 }, // #37 [ref=2x] + { { EXT(CMPXCHG16B) }, 5, 0 }, // #38 [ref=1x] + { { EXT(CMPXCHG8B) }, 5, 0 }, // #39 [ref=1x] + { { EXT(SSE2) }, 1, 0 }, // #40 [ref=2x] + { { EXT(SSE) }, 1, 0 }, // #41 [ref=2x] + { { EXT(I486) }, 0, 0 }, // #42 [ref=4x] + { { EXT(SSE4_2) }, 0, 0 }, // #43 [ref=2x] + { { 0 }, 20, 0 }, // #44 [ref=2x] + { { EXT(MMX) }, 0, 0 }, // #45 [ref=1x] + { { EXT(CET_IBT) }, 0, 0 }, // #46 [ref=2x] + { { EXT(ENQCMD) }, 0, 0 }, // #47 [ref=2x] + { { EXT(SSE4A) }, 0, 0 }, // #48 [ref=4x] + { { 0 }, 21, 0 }, // #49 [ref=4x] + { { EXT(3DNOW) }, 0, 0 }, // #50 [ref=21x] + { { EXT(FXSR) }, 0, 0 }, // #51 [ref=4x] + { { EXT(SMX) }, 0, 0 }, // #52 [ref=1x] + { { EXT(GFNI) }, 0, 0 }, // #53 [ref=3x] + { { EXT(CET_SS) }, 0, 0 }, // #54 [ref=9x] + { { 0 }, 16, 0 }, // #55 [ref=5x] + { { EXT(VMX) }, 0, 0 }, // #56 [ref=12x] + { { 0 }, 11, 0 }, // #57 [ref=8x] + { { 0 }, 12, 0 }, // #58 [ref=12x] + { { 0 }, 13, 0 }, // #59 [ref=10x] + { { 0 }, 14, 0 }, // #60 [ref=8x] + { { 0 }, 15, 0 }, // #61 [ref=8x] + { { 0 }, 17, 0 }, // #62 [ref=8x] + { { 0 }, 18, 0 }, // #63 [ref=4x] + { { EXT(AVX512_DQ) }, 0, 0 }, // #64 [ref=23x] + { { EXT(AVX512_BW) }, 0, 0 }, // #65 [ref=22x] + { { EXT(AVX512_F) }, 0, 0 }, // #66 [ref=37x] + { { EXT(AVX512_DQ) }, 1, 0 }, // #67 [ref=3x] + { { EXT(AVX512_BW) }, 1, 0 }, // #68 [ref=4x] + { { EXT(AVX512_F) }, 1, 0 }, // #69 [ref=1x] + { { EXT(LAHFSAHF) }, 22, 0 }, // #70 [ref=1x] + { { EXT(AMX_TILE) }, 0, 0 }, // #71 [ref=7x] + { { EXT(LWP) }, 0, 0 }, // #72 [ref=4x] + { { 0 }, 23, 0 }, // #73 [ref=3x] + { { EXT(LZCNT) }, 1, 0 }, // #74 [ref=1x] + { { EXT(MMX2) }, 0, 0 }, // #75 [ref=8x] + { { EXT(MCOMMIT) }, 1, 0 }, // #76 [ref=1x] + { { EXT(MONITOR) }, 0, 0 }, // #77 [ref=2x] + { { EXT(MONITORX) }, 0, 0 }, // #78 [ref=2x] + { { EXT(MOVBE) }, 0, 0 }, // #79 [ref=1x] + { { EXT(MMX), EXT(SSE2) }, 0, 0 }, // #80 [ref=46x] + { { EXT(MOVDIR64B) }, 0, 0 }, // #81 [ref=1x] + { { EXT(MOVDIRI) }, 0, 0 }, // #82 [ref=1x] + { { EXT(BMI2) }, 0, 0 }, // #83 [ref=7x] + { { EXT(SSSE3) }, 0, 0 }, // #84 [ref=15x] + { { EXT(MMX2), EXT(SSE2) }, 0, 0 }, // #85 [ref=10x] + { { EXT(PCLMULQDQ) }, 0, 0 }, // #86 [ref=1x] + { { EXT(SSE4_2) }, 1, 0 }, // #87 [ref=4x] + { { EXT(PCONFIG) }, 0, 0 }, // #88 [ref=1x] + { { EXT(MMX2), EXT(SSE2), EXT(SSE4_1) }, 0, 0 }, // #89 [ref=1x] + { { EXT(3DNOW2) }, 0, 0 }, // #90 [ref=5x] + { { EXT(GEODE) }, 0, 0 }, // #91 [ref=2x] + { { EXT(POPCNT) }, 1, 0 }, // #92 [ref=1x] + { { 0 }, 24, 0 }, // #93 [ref=3x] + { { EXT(PREFETCHW) }, 1, 0 }, // #94 [ref=1x] + { { EXT(PREFETCHWT1) }, 1, 0 }, // #95 [ref=1x] + { { EXT(SNP) }, 20, 0 }, // #96 [ref=3x] + { { EXT(SSE4_1) }, 1, 0 }, // #97 [ref=1x] + { { EXT(PTWRITE) }, 0, 0 }, // #98 [ref=1x] + { { 0 }, 25, 0 }, // #99 [ref=3x] + { { EXT(SNP) }, 1, 0 }, // #100 [ref=1x] + { { 0 }, 26, 0 }, // #101 [ref=2x] + { { EXT(FSGSBASE) }, 0, 0 }, // #102 [ref=4x] + { { EXT(MSR) }, 0, 0 }, // #103 [ref=2x] + { { EXT(RDPID) }, 0, 0 }, // #104 [ref=1x] + { { EXT(OSPKE) }, 0, 0 }, // #105 [ref=1x] + { { EXT(RDPRU) }, 0, 0 }, // #106 [ref=1x] + { { EXT(RDRAND) }, 1, 0 }, // #107 [ref=1x] + { { EXT(RDSEED) }, 1, 0 }, // #108 [ref=1x] + { { EXT(RDTSC) }, 0, 0 }, // #109 [ref=1x] + { { EXT(RDTSCP) }, 0, 0 }, // #110 [ref=1x] + { { 0 }, 27, 0 }, // #111 [ref=2x] + { { EXT(LAHFSAHF) }, 28, 0 }, // #112 [ref=1x] + { { EXT(SERIALIZE) }, 0, 0 }, // #113 [ref=1x] + { { EXT(SHA) }, 0, 0 }, // #114 [ref=7x] + { { EXT(SKINIT) }, 0, 0 }, // #115 [ref=2x] + { { EXT(AMX_BF16) }, 0, 0 }, // #116 [ref=1x] + { { EXT(AMX_INT8) }, 0, 0 }, // #117 [ref=4x] + { { EXT(WAITPKG) }, 1, 0 }, // #118 [ref=2x] + { { EXT(WAITPKG) }, 0, 0 }, // #119 [ref=1x] + { { EXT(AVX512_4FMAPS) }, 0, 0 }, // #120 [ref=4x] + { { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #121 [ref=46x] + { { EXT(AVX), EXT(AVX512_F) }, 0, 0 }, // #122 [ref=32x] + { { EXT(AVX) }, 0, 0 }, // #123 [ref=37x] + { { EXT(AESNI), EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(VAES) }, 0, 0 }, // #124 [ref=4x] + { { EXT(AESNI), EXT(AVX) }, 0, 0 }, // #125 [ref=2x] + { { EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #126 [ref=112x] + { { EXT(AVX), EXT(AVX512_DQ), EXT(AVX512_VL) }, 0, 0 }, // #127 [ref=8x] + { { EXT(AVX512_BW), EXT(AVX512_VL) }, 0, 0 }, // #128 [ref=26x] + { { EXT(AVX512_DQ), EXT(AVX512_VL) }, 0, 0 }, // #129 [ref=30x] + { { EXT(AVX2) }, 0, 0 }, // #130 [ref=7x] + { { EXT(AVX), EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #131 [ref=39x] + { { EXT(AVX), EXT(AVX512_F) }, 1, 0 }, // #132 [ref=4x] + { { EXT(AVX512_BF16), EXT(AVX512_VL) }, 0, 0 }, // #133 [ref=3x] + { { EXT(AVX512_F), EXT(AVX512_VL), EXT(F16C) }, 0, 0 }, // #134 [ref=2x] + { { EXT(AVX512_ERI) }, 0, 0 }, // #135 [ref=10x] + { { EXT(AVX512_F), EXT(AVX512_VL), EXT(FMA) }, 0, 0 }, // #136 [ref=36x] + { { EXT(AVX512_F), EXT(FMA) }, 0, 0 }, // #137 [ref=24x] + { { EXT(FMA4) }, 0, 0 }, // #138 [ref=20x] + { { EXT(XOP) }, 0, 0 }, // #139 [ref=55x] + { { EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #140 [ref=19x] + { { EXT(AVX512_PFI) }, 0, 0 }, // #141 [ref=16x] + { { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(GFNI) }, 0, 0 }, // #142 [ref=3x] + { { EXT(AVX), EXT(AVX2) }, 0, 0 }, // #143 [ref=17x] + { { EXT(AVX512_VP2INTERSECT) }, 0, 0 }, // #144 [ref=2x] + { { EXT(AVX512_4VNNIW) }, 0, 0 }, // #145 [ref=2x] + { { EXT(AVX), EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) }, 0, 0 }, // #146 [ref=54x] + { { EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) }, 0, 0 }, // #147 [ref=2x] + { { EXT(AVX512_CDI), EXT(AVX512_VL) }, 0, 0 }, // #148 [ref=6x] + { { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(PCLMULQDQ), EXT(VPCLMULQDQ) }, 0, 0 }, // #149 [ref=1x] + { { EXT(AVX) }, 1, 0 }, // #150 [ref=7x] + { { EXT(AVX512_VBMI2), EXT(AVX512_VL) }, 0, 0 }, // #151 [ref=16x] + { { EXT(AVX512_VL), EXT(AVX512_VNNI) }, 0, 0 }, // #152 [ref=4x] + { { EXT(AVX512_VBMI), EXT(AVX512_VL) }, 0, 0 }, // #153 [ref=4x] + { { EXT(AVX), EXT(AVX512_BW) }, 0, 0 }, // #154 [ref=4x] + { { EXT(AVX), EXT(AVX512_DQ) }, 0, 0 }, // #155 [ref=4x] + { { EXT(AVX512_IFMA), EXT(AVX512_VL) }, 0, 0 }, // #156 [ref=2x] + { { EXT(AVX512_BITALG), EXT(AVX512_VL) }, 0, 0 }, // #157 [ref=3x] + { { EXT(AVX512_VL), EXT(AVX512_VPOPCNTDQ) }, 0, 0 }, // #158 [ref=2x] + { { EXT(WBNOINVD) }, 0, 0 }, // #159 [ref=1x] + { { EXT(RTM) }, 0, 0 }, // #160 [ref=3x] + { { EXT(XSAVE) }, 0, 0 }, // #161 [ref=6x] + { { EXT(TSXLDTRK) }, 0, 0 }, // #162 [ref=2x] + { { EXT(XSAVES) }, 0, 0 }, // #163 [ref=4x] + { { EXT(XSAVEC) }, 0, 0 }, // #164 [ref=2x] + { { EXT(XSAVEOPT) }, 0, 0 }, // #165 [ref=2x] + { { EXT(TSX) }, 1, 0 } // #166 [ref=1x] }; #undef EXT #define FLAG(VAL) uint32_t(Status::k##VAL) const InstDB::RWFlagsInfoTable InstDB::_rwFlagsInfoTable[] = { - { 0, 0 }, // #0 [ref=1281x] - { 0, FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #1 [ref=76x] + { 0, 0 }, // #0 [ref=1315x] + { 0, FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #1 [ref=83x] { FLAG(CF), FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #2 [ref=2x] { FLAG(CF), FLAG(CF) }, // #3 [ref=2x] { FLAG(OF), FLAG(OF) }, // #4 [ref=1x] @@ -2572,7 +2661,7 @@ const InstDB::RWFlagsInfoTable InstDB::_rwFlagsInfoTable[] = { { FLAG(PF), 0 }, // #17 [ref=14x] { FLAG(SF), 0 }, // #18 [ref=6x] { FLAG(DF), FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #19 [ref=2x] - { 0, FLAG(AF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #20 [ref=2x] + { 0, FLAG(AF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #20 [ref=5x] { 0, FLAG(CF) | FLAG(PF) | FLAG(ZF) }, // #21 [ref=4x] { FLAG(AF) | FLAG(CF) | FLAG(PF) | FLAG(SF) | FLAG(ZF), 0 }, // #22 [ref=1x] { FLAG(DF), 0 }, // #23 [ref=3x] @@ -2597,65 +2686,69 @@ const char InstDB::_nameData[] = "\0" "aaa\0" "aad\0" "aam\0" "aas\0" "adc\0" "adcx\0" "adox\0" "arpl\0" "bextr\0" "blcfill\0" "blci\0" "blcic\0" "blcmsk\0" "blcs\0" "blsfill\0" "blsi\0" "blsic\0" "blsmsk\0" "blsr\0" "bndcl\0" "bndcn\0" "bndcu\0" "bndldx\0" "bndmk\0" "bndmov\0" "bndstx\0" "bound\0" "bsf\0" "bsr\0" "bswap\0" "bt\0" "btc\0" "btr\0" "bts\0" "bzhi\0" "cbw\0" - "cdq\0" "cdqe\0" "clac\0" "clc\0" "cld\0" "cldemote\0" "clflush\0" "clflushopt\0" "clgi\0" "cli\0" "clts\0" "clwb\0" - "clzero\0" "cmc\0" "cmova\0" "cmovae\0" "cmovc\0" "cmovg\0" "cmovge\0" "cmovl\0" "cmovle\0" "cmovna\0" "cmovnae\0" - "cmovnc\0" "cmovng\0" "cmovnge\0" "cmovnl\0" "cmovnle\0" "cmovno\0" "cmovnp\0" "cmovns\0" "cmovnz\0" "cmovo\0" - "cmovp\0" "cmovpe\0" "cmovpo\0" "cmovs\0" "cmovz\0" "cmp\0" "cmps\0" "cmpxchg\0" "cmpxchg16b\0" "cmpxchg8b\0" - "cpuid\0" "cqo\0" "crc32\0" "cvtpd2pi\0" "cvtpi2pd\0" "cvtpi2ps\0" "cvtps2pi\0" "cvttpd2pi\0" "cvttps2pi\0" "cwd\0" - "cwde\0" "daa\0" "das\0" "enqcmd\0" "enqcmds\0" "f2xm1\0" "fabs\0" "faddp\0" "fbld\0" "fbstp\0" "fchs\0" "fclex\0" - "fcmovb\0" "fcmovbe\0" "fcmove\0" "fcmovnb\0" "fcmovnbe\0" "fcmovne\0" "fcmovnu\0" "fcmovu\0" "fcom\0" "fcomi\0" - "fcomip\0" "fcomp\0" "fcompp\0" "fcos\0" "fdecstp\0" "fdiv\0" "fdivp\0" "fdivr\0" "fdivrp\0" "femms\0" "ffree\0" - "fiadd\0" "ficom\0" "ficomp\0" "fidiv\0" "fidivr\0" "fild\0" "fimul\0" "fincstp\0" "finit\0" "fist\0" "fistp\0" - "fisttp\0" "fisub\0" "fisubr\0" "fld\0" "fld1\0" "fldcw\0" "fldenv\0" "fldl2e\0" "fldl2t\0" "fldlg2\0" "fldln2\0" - "fldpi\0" "fldz\0" "fmulp\0" "fnclex\0" "fninit\0" "fnop\0" "fnsave\0" "fnstcw\0" "fnstenv\0" "fnstsw\0" "fpatan\0" - "fprem\0" "fprem1\0" "fptan\0" "frndint\0" "frstor\0" "fsave\0" "fscale\0" "fsin\0" "fsincos\0" "fsqrt\0" "fst\0" - "fstcw\0" "fstenv\0" "fstp\0" "fstsw\0" "fsubp\0" "fsubrp\0" "ftst\0" "fucom\0" "fucomi\0" "fucomip\0" "fucomp\0" - "fucompp\0" "fwait\0" "fxam\0" "fxch\0" "fxrstor\0" "fxrstor64\0" "fxsave\0" "fxsave64\0" "fxtract\0" "fyl2x\0" - "fyl2xp1\0" "getsec\0" "hlt\0" "inc\0" "insertq\0" "int3\0" "into\0" "invept\0" "invlpg\0" "invlpga\0" "invpcid\0" - "invvpid\0" "iret\0" "iretd\0" "iretq\0" "iretw\0" "ja\0" "jae\0" "jb\0" "jbe\0" "jc\0" "je\0" "jecxz\0" "jg\0" - "jge\0" "jl\0" "jle\0" "jmp\0" "jna\0" "jnae\0" "jnb\0" "jnbe\0" "jnc\0" "jne\0" "jng\0" "jnge\0" "jnl\0" "jnle\0" - "jno\0" "jnp\0" "jns\0" "jnz\0" "jo\0" "jp\0" "jpe\0" "jpo\0" "js\0" "jz\0" "kaddb\0" "kaddd\0" "kaddq\0" "kaddw\0" - "kandb\0" "kandd\0" "kandnb\0" "kandnd\0" "kandnq\0" "kandnw\0" "kandq\0" "kandw\0" "kmovb\0" "kmovw\0" "knotb\0" - "knotd\0" "knotq\0" "knotw\0" "korb\0" "kord\0" "korq\0" "kortestb\0" "kortestd\0" "kortestq\0" "kortestw\0" "korw\0" - "kshiftlb\0" "kshiftld\0" "kshiftlq\0" "kshiftlw\0" "kshiftrb\0" "kshiftrd\0" "kshiftrq\0" "kshiftrw\0" "ktestb\0" - "ktestd\0" "ktestq\0" "ktestw\0" "kunpckbw\0" "kunpckdq\0" "kunpckwd\0" "kxnorb\0" "kxnord\0" "kxnorq\0" "kxnorw\0" - "kxorb\0" "kxord\0" "kxorq\0" "kxorw\0" "lahf\0" "lar\0" "lds\0" "lea\0" "leave\0" "les\0" "lfence\0" "lfs\0" + "cdq\0" "cdqe\0" "clac\0" "clc\0" "cld\0" "cldemote\0" "clflush\0" "clflushopt\0" "clgi\0" "cli\0" "clrssbsy\0" + "clts\0" "clwb\0" "clzero\0" "cmc\0" "cmova\0" "cmovae\0" "cmovc\0" "cmovg\0" "cmovge\0" "cmovl\0" "cmovle\0" + "cmovna\0" "cmovnae\0" "cmovnc\0" "cmovng\0" "cmovnge\0" "cmovnl\0" "cmovnle\0" "cmovno\0" "cmovnp\0" "cmovns\0" + "cmovnz\0" "cmovo\0" "cmovp\0" "cmovpe\0" "cmovpo\0" "cmovs\0" "cmovz\0" "cmp\0" "cmps\0" "cmpxchg\0" "cmpxchg16b\0" + "cmpxchg8b\0" "cpuid\0" "cqo\0" "crc32\0" "cvtpd2pi\0" "cvtpi2pd\0" "cvtpi2ps\0" "cvtps2pi\0" "cvttpd2pi\0" + "cvttps2pi\0" "cwd\0" "cwde\0" "daa\0" "das\0" "endbr32\0" "endbr64\0" "enqcmd\0" "enqcmds\0" "f2xm1\0" "fabs\0" + "faddp\0" "fbld\0" "fbstp\0" "fchs\0" "fclex\0" "fcmovb\0" "fcmovbe\0" "fcmove\0" "fcmovnb\0" "fcmovnbe\0" + "fcmovne\0" "fcmovnu\0" "fcmovu\0" "fcom\0" "fcomi\0" "fcomip\0" "fcomp\0" "fcompp\0" "fcos\0" "fdecstp\0" "fdiv\0" + "fdivp\0" "fdivr\0" "fdivrp\0" "femms\0" "ffree\0" "fiadd\0" "ficom\0" "ficomp\0" "fidiv\0" "fidivr\0" "fild\0" + "fimul\0" "fincstp\0" "finit\0" "fist\0" "fistp\0" "fisttp\0" "fisub\0" "fisubr\0" "fld\0" "fld1\0" "fldcw\0" + "fldenv\0" "fldl2e\0" "fldl2t\0" "fldlg2\0" "fldln2\0" "fldpi\0" "fldz\0" "fmulp\0" "fnclex\0" "fninit\0" "fnop\0" + "fnsave\0" "fnstcw\0" "fnstenv\0" "fnstsw\0" "fpatan\0" "fprem\0" "fprem1\0" "fptan\0" "frndint\0" "frstor\0" + "fsave\0" "fscale\0" "fsin\0" "fsincos\0" "fsqrt\0" "fst\0" "fstcw\0" "fstenv\0" "fstp\0" "fstsw\0" "fsubp\0" + "fsubrp\0" "ftst\0" "fucom\0" "fucomi\0" "fucomip\0" "fucomp\0" "fucompp\0" "fwait\0" "fxam\0" "fxch\0" "fxrstor\0" + "fxrstor64\0" "fxsave\0" "fxsave64\0" "fxtract\0" "fyl2x\0" "fyl2xp1\0" "getsec\0" "hlt\0" "inc\0" "incsspd\0" + "incsspq\0" "insertq\0" "int3\0" "into\0" "invept\0" "invlpg\0" "invlpga\0" "invpcid\0" "invvpid\0" "iret\0" + "iretd\0" "iretq\0" "iretw\0" "ja\0" "jae\0" "jb\0" "jbe\0" "jc\0" "je\0" "jecxz\0" "jg\0" "jge\0" "jl\0" "jle\0" + "jmp\0" "jna\0" "jnae\0" "jnb\0" "jnbe\0" "jnc\0" "jne\0" "jng\0" "jnge\0" "jnl\0" "jnle\0" "jno\0" "jnp\0" "jns\0" + "jnz\0" "jo\0" "jp\0" "jpe\0" "jpo\0" "js\0" "jz\0" "kaddb\0" "kaddd\0" "kaddq\0" "kaddw\0" "kandb\0" "kandd\0" + "kandnb\0" "kandnd\0" "kandnq\0" "kandnw\0" "kandq\0" "kandw\0" "kmovb\0" "kmovw\0" "knotb\0" "knotd\0" "knotq\0" + "knotw\0" "korb\0" "kord\0" "korq\0" "kortestb\0" "kortestd\0" "kortestq\0" "kortestw\0" "korw\0" "kshiftlb\0" + "kshiftld\0" "kshiftlq\0" "kshiftlw\0" "kshiftrb\0" "kshiftrd\0" "kshiftrq\0" "kshiftrw\0" "ktestb\0" "ktestd\0" + "ktestq\0" "ktestw\0" "kunpckbw\0" "kunpckdq\0" "kunpckwd\0" "kxnorb\0" "kxnord\0" "kxnorq\0" "kxnorw\0" "kxorb\0" + "kxord\0" "kxorq\0" "kxorw\0" "lahf\0" "lar\0" "lds\0" "ldtilecfg\0" "lea\0" "leave\0" "les\0" "lfence\0" "lfs\0" "lgdt\0" "lgs\0" "lidt\0" "lldt\0" "llwpcb\0" "lmsw\0" "lods\0" "loop\0" "loope\0" "loopne\0" "lsl\0" "ltr\0" - "lwpins\0" "lwpval\0" "lzcnt\0" "mfence\0" "monitor\0" "monitorx\0" "movdir64b\0" "movdiri\0" "movdq2q\0" "movnti\0" - "movntq\0" "movntsd\0" "movntss\0" "movq2dq\0" "movsx\0" "movsxd\0" "movzx\0" "mulx\0" "mwait\0" "mwaitx\0" "neg\0" - "not\0" "out\0" "outs\0" "pause\0" "pavgusb\0" "pcommit\0" "pdep\0" "pext\0" "pf2id\0" "pf2iw\0" "pfacc\0" "pfadd\0" - "pfcmpeq\0" "pfcmpge\0" "pfcmpgt\0" "pfmax\0" "pfmin\0" "pfmul\0" "pfnacc\0" "pfpnacc\0" "pfrcp\0" "pfrcpit1\0" - "pfrcpit2\0" "pfrcpv\0" "pfrsqit1\0" "pfrsqrt\0" "pfrsqrtv\0" "pfsub\0" "pfsubr\0" "pi2fd\0" "pi2fw\0" "pmulhrw\0" - "pop\0" "popa\0" "popad\0" "popcnt\0" "popf\0" "popfd\0" "popfq\0" "prefetch\0" "prefetchnta\0" "prefetcht0\0" - "prefetcht1\0" "prefetcht2\0" "prefetchw\0" "prefetchwt1\0" "pshufw\0" "pswapd\0" "push\0" "pusha\0" "pushad\0" - "pushf\0" "pushfd\0" "pushfq\0" "rcl\0" "rcr\0" "rdfsbase\0" "rdgsbase\0" "rdmsr\0" "rdpid\0" "rdpmc\0" "rdrand\0" - "rdseed\0" "rdtsc\0" "rdtscp\0" "rol\0" "ror\0" "rorx\0" "rsm\0" "sahf\0" "sal\0" "sar\0" "sarx\0" "sbb\0" "scas\0" - "seta\0" "setae\0" "setb\0" "setbe\0" "setc\0" "sete\0" "setg\0" "setge\0" "setl\0" "setle\0" "setna\0" "setnae\0" - "setnb\0" "setnbe\0" "setnc\0" "setne\0" "setng\0" "setnge\0" "setnl\0" "setnle\0" "setno\0" "setnp\0" "setns\0" - "setnz\0" "seto\0" "setp\0" "setpe\0" "setpo\0" "sets\0" "setz\0" "sfence\0" "sgdt\0" "sha1msg1\0" "sha1msg2\0" - "sha1nexte\0" "sha1rnds4\0" "sha256msg1\0" "sha256msg2\0" "sha256rnds2\0" "shl\0" "shlx\0" "shr\0" "shrd\0" "shrx\0" - "sidt\0" "skinit\0" "sldt\0" "slwpcb\0" "smsw\0" "stac\0" "stc\0" "stgi\0" "sti\0" "stos\0" "str\0" "swapgs\0" - "syscall\0" "sysenter\0" "sysexit\0" "sysexit64\0" "sysret\0" "sysret64\0" "t1mskc\0" "tzcnt\0" "tzmsk\0" "ud2\0" - "v4fmaddps\0" "v4fmaddss\0" "v4fnmaddps\0" "v4fnmaddss\0" "vaddpd\0" "vaddps\0" "vaddsd\0" "vaddss\0" "vaddsubpd\0" - "vaddsubps\0" "vaesdec\0" "vaesdeclast\0" "vaesenc\0" "vaesenclast\0" "vaesimc\0" "vaeskeygenassist\0" "valignd\0" - "valignq\0" "vandnpd\0" "vandnps\0" "vandpd\0" "vandps\0" "vblendmb\0" "vblendmd\0" "vblendmpd\0" "vblendmps\0" - "vblendmq\0" "vblendmw\0" "vblendpd\0" "vblendps\0" "vblendvpd\0" "vblendvps\0" "vbroadcastf128\0" - "vbroadcastf32x2\0" "vbroadcastf32x4\0" "vbroadcastf32x8\0" "vbroadcastf64x2\0" "vbroadcastf64x4\0" - "vbroadcasti128\0" "vbroadcasti32x2\0" "vbroadcasti32x4\0" "vbroadcasti32x8\0" "vbroadcasti64x2\0" - "vbroadcasti64x4\0" "vbroadcastsd\0" "vbroadcastss\0" "vcmppd\0" "vcmpps\0" "vcmpsd\0" "vcmpss\0" "vcomisd\0" - "vcomiss\0" "vcompresspd\0" "vcompressps\0" "vcvtdq2pd\0" "vcvtdq2ps\0" "vcvtne2ps2bf16\0" "vcvtneps2bf16\0" - "vcvtpd2dq\0" "vcvtpd2ps\0" "vcvtpd2qq\0" "vcvtpd2udq\0" "vcvtpd2uqq\0" "vcvtph2ps\0" "vcvtps2dq\0" "vcvtps2pd\0" - "vcvtps2ph\0" "vcvtps2qq\0" "vcvtps2udq\0" "vcvtps2uqq\0" "vcvtqq2pd\0" "vcvtqq2ps\0" "vcvtsd2si\0" "vcvtsd2ss\0" - "vcvtsd2usi\0" "vcvtsi2sd\0" "vcvtsi2ss\0" "vcvtss2sd\0" "vcvtss2si\0" "vcvtss2usi\0" "vcvttpd2dq\0" "vcvttpd2qq\0" - "vcvttpd2udq\0" "vcvttpd2uqq\0" "vcvttps2dq\0" "vcvttps2qq\0" "vcvttps2udq\0" "vcvttps2uqq\0" "vcvttsd2si\0" - "vcvttsd2usi\0" "vcvttss2si\0" "vcvttss2usi\0" "vcvtudq2pd\0" "vcvtudq2ps\0" "vcvtuqq2pd\0" "vcvtuqq2ps\0" - "vcvtusi2sd\0" "vcvtusi2ss\0" "vdbpsadbw\0" "vdivpd\0" "vdivps\0" "vdivsd\0" "vdivss\0" "vdpbf16ps\0" "vdppd\0" - "vdpps\0" "verr\0" "verw\0" "vexp2pd\0" "vexp2ps\0" "vexpandpd\0" "vexpandps\0" "vextractf128\0" "vextractf32x4\0" - "vextractf32x8\0" "vextractf64x2\0" "vextractf64x4\0" "vextracti128\0" "vextracti32x4\0" "vextracti32x8\0" - "vextracti64x2\0" "vextracti64x4\0" "vextractps\0" "vfixupimmpd\0" "vfixupimmps\0" "vfixupimmsd\0" "vfixupimmss\0" - "vfmadd132pd\0" "vfmadd132ps\0" "vfmadd132sd\0" "vfmadd132ss\0" "vfmadd213pd\0" "vfmadd213ps\0" "vfmadd213sd\0" - "vfmadd213ss\0" "vfmadd231pd\0" "vfmadd231ps\0" "vfmadd231sd\0" "vfmadd231ss\0" "vfmaddpd\0" "vfmaddps\0" - "vfmaddsd\0" "vfmaddss\0" "vfmaddsub132pd\0" "vfmaddsub132ps\0" "vfmaddsub213pd\0" "vfmaddsub213ps\0" + "lwpins\0" "lwpval\0" "lzcnt\0" "mcommit\0" "mfence\0" "monitorx\0" "movdir64b\0" "movdiri\0" "movdq2q\0" "movnti\0" + "movntq\0" "movntsd\0" "movntss\0" "movq2dq\0" "movsx\0" "movsxd\0" "movzx\0" "mulx\0" "mwaitx\0" "neg\0" "not\0" + "out\0" "outs\0" "pavgusb\0" "pconfig\0" "pdep\0" "pext\0" "pf2id\0" "pf2iw\0" "pfacc\0" "pfadd\0" "pfcmpeq\0" + "pfcmpge\0" "pfcmpgt\0" "pfmax\0" "pfmin\0" "pfmul\0" "pfnacc\0" "pfpnacc\0" "pfrcp\0" "pfrcpit1\0" "pfrcpit2\0" + "pfrcpv\0" "pfrsqit1\0" "pfrsqrt\0" "pfrsqrtv\0" "pfsub\0" "pfsubr\0" "pi2fd\0" "pi2fw\0" "pmulhrw\0" "pop\0" + "popa\0" "popad\0" "popcnt\0" "popf\0" "popfd\0" "popfq\0" "prefetch\0" "prefetchnta\0" "prefetcht0\0" "prefetcht1\0" + "prefetcht2\0" "prefetchw\0" "prefetchwt1\0" "pshufw\0" "psmash\0" "pswapd\0" "ptwrite\0" "push\0" "pusha\0" + "pushad\0" "pushf\0" "pushfd\0" "pushfq\0" "pvalidate\0" "rcl\0" "rcr\0" "rdfsbase\0" "rdgsbase\0" "rdmsr\0" + "rdpid\0" "rdpkru\0" "rdpmc\0" "rdpru\0" "rdrand\0" "rdseed\0" "rdsspd\0" "rdsspq\0" "rdtsc\0" "rdtscp\0" + "rmpadjust\0" "rmpupdate\0" "rol\0" "ror\0" "rorx\0" "rsm\0" "rstorssp\0" "sahf\0" "sal\0" "sar\0" "sarx\0" + "saveprevssp\0" "sbb\0" "scas\0" "serialize\0" "seta\0" "setae\0" "setb\0" "setbe\0" "setc\0" "sete\0" "setg\0" + "setge\0" "setl\0" "setle\0" "setna\0" "setnae\0" "setnb\0" "setnbe\0" "setnc\0" "setne\0" "setng\0" "setnge\0" + "setnl\0" "setnle\0" "setno\0" "setnp\0" "setns\0" "setnz\0" "seto\0" "setp\0" "setpe\0" "setpo\0" "sets\0" + "setssbsy\0" "setz\0" "sfence\0" "sgdt\0" "sha1msg1\0" "sha1msg2\0" "sha1nexte\0" "sha1rnds4\0" "sha256msg1\0" + "sha256msg2\0" "sha256rnds2\0" "shl\0" "shlx\0" "shr\0" "shrd\0" "shrx\0" "sidt\0" "skinit\0" "sldt\0" "slwpcb\0" + "smsw\0" "stac\0" "stc\0" "stgi\0" "sti\0" "stos\0" "str\0" "sttilecfg\0" "swapgs\0" "syscall\0" "sysenter\0" + "sysexit\0" "sysexit64\0" "sysret\0" "sysret64\0" "t1mskc\0" "tdpbf16ps\0" "tdpbssd\0" "tdpbsud\0" "tdpbusd\0" + "tdpbuud\0" "tileloadd\0" "tileloaddt1\0" "tilerelease\0" "tilestored\0" "tilezero\0" "tpause\0" "tzcnt\0" "tzmsk\0" + "ud0\0" "ud1\0" "ud2\0" "umonitor\0" "umwait\0" "v4fmaddps\0" "v4fmaddss\0" "v4fnmaddps\0" "v4fnmaddss\0" "vaddpd\0" + "vaddps\0" "vaddsd\0" "vaddss\0" "vaddsubpd\0" "vaddsubps\0" "vaesdec\0" "vaesdeclast\0" "vaesenc\0" "vaesenclast\0" + "vaesimc\0" "vaeskeygenassist\0" "valignd\0" "valignq\0" "vandnpd\0" "vandnps\0" "vandpd\0" "vandps\0" "vblendmb\0" + "vblendmd\0" "vblendmpd\0" "vblendmps\0" "vblendmq\0" "vblendmw\0" "vblendpd\0" "vblendps\0" "vblendvpd\0" + "vblendvps\0" "vbroadcastf128\0" "vbroadcastf32x2\0" "vbroadcastf32x4\0" "vbroadcastf32x8\0" "vbroadcastf64x2\0" + "vbroadcastf64x4\0" "vbroadcasti128\0" "vbroadcasti32x2\0" "vbroadcasti32x4\0" "vbroadcasti32x8\0" + "vbroadcasti64x2\0" "vbroadcasti64x4\0" "vbroadcastsd\0" "vbroadcastss\0" "vcmppd\0" "vcmpps\0" "vcmpsd\0" "vcmpss\0" + "vcomisd\0" "vcomiss\0" "vcompresspd\0" "vcompressps\0" "vcvtdq2pd\0" "vcvtdq2ps\0" "vcvtne2ps2bf16\0" + "vcvtneps2bf16\0" "vcvtpd2dq\0" "vcvtpd2ps\0" "vcvtpd2qq\0" "vcvtpd2udq\0" "vcvtpd2uqq\0" "vcvtph2ps\0" "vcvtps2dq\0" + "vcvtps2pd\0" "vcvtps2ph\0" "vcvtps2qq\0" "vcvtps2udq\0" "vcvtps2uqq\0" "vcvtqq2pd\0" "vcvtqq2ps\0" "vcvtsd2si\0" + "vcvtsd2ss\0" "vcvtsd2usi\0" "vcvtsi2sd\0" "vcvtsi2ss\0" "vcvtss2sd\0" "vcvtss2si\0" "vcvtss2usi\0" "vcvttpd2dq\0" + "vcvttpd2qq\0" "vcvttpd2udq\0" "vcvttpd2uqq\0" "vcvttps2dq\0" "vcvttps2qq\0" "vcvttps2udq\0" "vcvttps2uqq\0" + "vcvttsd2si\0" "vcvttsd2usi\0" "vcvttss2si\0" "vcvttss2usi\0" "vcvtudq2pd\0" "vcvtudq2ps\0" "vcvtuqq2pd\0" + "vcvtuqq2ps\0" "vcvtusi2sd\0" "vcvtusi2ss\0" "vdbpsadbw\0" "vdivpd\0" "vdivps\0" "vdivsd\0" "vdivss\0" "vdpbf16ps\0" + "vdppd\0" "vdpps\0" "verr\0" "verw\0" "vexp2pd\0" "vexp2ps\0" "vexpandpd\0" "vexpandps\0" "vextractf128\0" + "vextractf32x4\0" "vextractf32x8\0" "vextractf64x2\0" "vextractf64x4\0" "vextracti128\0" "vextracti32x4\0" + "vextracti32x8\0" "vextracti64x2\0" "vextracti64x4\0" "vextractps\0" "vfixupimmpd\0" "vfixupimmps\0" "vfixupimmsd\0" + "vfixupimmss\0" "vfmadd132pd\0" "vfmadd132ps\0" "vfmadd132sd\0" "vfmadd132ss\0" "vfmadd213pd\0" "vfmadd213ps\0" + "vfmadd213sd\0" "vfmadd213ss\0" "vfmadd231pd\0" "vfmadd231ps\0" "vfmadd231sd\0" "vfmadd231ss\0" "vfmaddpd\0" + "vfmaddps\0" "vfmaddsd\0" "vfmaddss\0" "vfmaddsub132pd\0" "vfmaddsub132ps\0" "vfmaddsub213pd\0" "vfmaddsub213ps\0" "vfmaddsub231pd\0" "vfmaddsub231ps\0" "vfmaddsubpd\0" "vfmaddsubps\0" "vfmsub132pd\0" "vfmsub132ps\0" "vfmsub132sd\0" "vfmsub132ss\0" "vfmsub213pd\0" "vfmsub213ps\0" "vfmsub213sd\0" "vfmsub213ss\0" "vfmsub231pd\0" "vfmsub231ps\0" "vfmsub231sd\0" "vfmsub231ss\0" "vfmsubadd132pd\0" "vfmsubadd132ps\0" "vfmsubadd213pd\0" "vfmsubadd213ps\0" @@ -2678,35 +2771,35 @@ const char InstDB::_nameData[] = "vmovlhps\0" "vmovlpd\0" "vmovlps\0" "vmovmskpd\0" "vmovmskps\0" "vmovntdq\0" "vmovntdqa\0" "vmovntpd\0" "vmovntps\0" "vmovq\0" "vmovsd\0" "vmovshdup\0" "vmovsldup\0" "vmovss\0" "vmovupd\0" "vmovups\0" "vmpsadbw\0" "vmptrld\0" "vmptrst\0" "vmread\0" "vmresume\0" "vmrun\0" "vmsave\0" "vmulpd\0" "vmulps\0" "vmulsd\0" "vmulss\0" "vmwrite\0" - "vmxon\0" "vorpd\0" "vorps\0" "vp4dpwssd\0" "vp4dpwssds\0" "vpabsb\0" "vpabsd\0" "vpabsq\0" "vpabsw\0" "vpackssdw\0" - "vpacksswb\0" "vpackusdw\0" "vpackuswb\0" "vpaddb\0" "vpaddd\0" "vpaddq\0" "vpaddsb\0" "vpaddsw\0" "vpaddusb\0" - "vpaddusw\0" "vpaddw\0" "vpalignr\0" "vpand\0" "vpandd\0" "vpandn\0" "vpandnd\0" "vpandnq\0" "vpandq\0" "vpavgb\0" - "vpavgw\0" "vpblendd\0" "vpblendvb\0" "vpblendw\0" "vpbroadcastb\0" "vpbroadcastd\0" "vpbroadcastmb2d\0" - "vpbroadcastmb2q\0" "vpbroadcastq\0" "vpbroadcastw\0" "vpclmulqdq\0" "vpcmov\0" "vpcmpb\0" "vpcmpd\0" "vpcmpeqb\0" - "vpcmpeqd\0" "vpcmpeqq\0" "vpcmpeqw\0" "vpcmpestri\0" "vpcmpestrm\0" "vpcmpgtb\0" "vpcmpgtd\0" "vpcmpgtq\0" - "vpcmpgtw\0" "vpcmpistri\0" "vpcmpistrm\0" "vpcmpq\0" "vpcmpub\0" "vpcmpud\0" "vpcmpuq\0" "vpcmpuw\0" "vpcmpw\0" - "vpcomb\0" "vpcomd\0" "vpcompressb\0" "vpcompressd\0" "vpcompressq\0" "vpcompressw\0" "vpcomq\0" "vpcomub\0" - "vpcomud\0" "vpcomuq\0" "vpcomuw\0" "vpcomw\0" "vpconflictd\0" "vpconflictq\0" "vpdpbusd\0" "vpdpbusds\0" - "vpdpwssd\0" "vpdpwssds\0" "vperm2f128\0" "vperm2i128\0" "vpermb\0" "vpermd\0" "vpermi2b\0" "vpermi2d\0" - "vpermi2pd\0" "vpermi2ps\0" "vpermi2q\0" "vpermi2w\0" "vpermil2pd\0" "vpermil2ps\0" "vpermilpd\0" "vpermilps\0" - "vpermpd\0" "vpermps\0" "vpermq\0" "vpermt2b\0" "vpermt2d\0" "vpermt2pd\0" "vpermt2ps\0" "vpermt2q\0" "vpermt2w\0" - "vpermw\0" "vpexpandb\0" "vpexpandd\0" "vpexpandq\0" "vpexpandw\0" "vpextrb\0" "vpextrd\0" "vpextrq\0" "vpextrw\0" - "vpgatherdd\0" "vpgatherdq\0" "vpgatherqd\0" "vpgatherqq\0" "vphaddbd\0" "vphaddbq\0" "vphaddbw\0" "vphaddd\0" - "vphadddq\0" "vphaddsw\0" "vphaddubd\0" "vphaddubq\0" "vphaddubw\0" "vphaddudq\0" "vphadduwd\0" "vphadduwq\0" - "vphaddw\0" "vphaddwd\0" "vphaddwq\0" "vphminposuw\0" "vphsubbw\0" "vphsubd\0" "vphsubdq\0" "vphsubsw\0" "vphsubw\0" - "vphsubwd\0" "vpinsrb\0" "vpinsrd\0" "vpinsrq\0" "vpinsrw\0" "vplzcntd\0" "vplzcntq\0" "vpmacsdd\0" "vpmacsdqh\0" - "vpmacsdql\0" "vpmacssdd\0" "vpmacssdqh\0" "vpmacssdql\0" "vpmacsswd\0" "vpmacssww\0" "vpmacswd\0" "vpmacsww\0" - "vpmadcsswd\0" "vpmadcswd\0" "vpmadd52huq\0" "vpmadd52luq\0" "vpmaddubsw\0" "vpmaddwd\0" "vpmaskmovd\0" - "vpmaskmovq\0" "vpmaxsb\0" "vpmaxsd\0" "vpmaxsq\0" "vpmaxsw\0" "vpmaxub\0" "vpmaxud\0" "vpmaxuq\0" "vpmaxuw\0" - "vpminsb\0" "vpminsd\0" "vpminsq\0" "vpminsw\0" "vpminub\0" "vpminud\0" "vpminuq\0" "vpminuw\0" "vpmovb2m\0" - "vpmovd2m\0" "vpmovdb\0" "vpmovdw\0" "vpmovm2b\0" "vpmovm2d\0" "vpmovm2q\0" "vpmovm2w\0" "vpmovmskb\0" "vpmovq2m\0" - "vpmovqb\0" "vpmovqd\0" "vpmovqw\0" "vpmovsdb\0" "vpmovsdw\0" "vpmovsqb\0" "vpmovsqd\0" "vpmovsqw\0" "vpmovswb\0" - "vpmovsxbd\0" "vpmovsxbq\0" "vpmovsxbw\0" "vpmovsxdq\0" "vpmovsxwd\0" "vpmovsxwq\0" "vpmovusdb\0" "vpmovusdw\0" - "vpmovusqb\0" "vpmovusqd\0" "vpmovusqw\0" "vpmovuswb\0" "vpmovw2m\0" "vpmovwb\0" "vpmovzxbd\0" "vpmovzxbq\0" - "vpmovzxbw\0" "vpmovzxdq\0" "vpmovzxwd\0" "vpmovzxwq\0" "vpmuldq\0" "vpmulhrsw\0" "vpmulhuw\0" "vpmulhw\0" - "vpmulld\0" "vpmullq\0" "vpmullw\0" "vpmultishiftqb\0" "vpmuludq\0" "vpopcntb\0" "vpopcntd\0" "vpopcntq\0" - "vpopcntw\0" "vpor\0" "vpord\0" "vporq\0" "vpperm\0" "vprold\0" "vprolq\0" "vprolvd\0" "vprolvq\0" "vprord\0" - "vprorq\0" "vprorvd\0" "vprorvq\0" "vprotb\0" "vprotd\0" "vprotq\0" "vprotw\0" "vpsadbw\0" "vpscatterdd\0" + "vmxon\0" "vorpd\0" "vorps\0" "vp2intersectd\0" "vp2intersectq\0" "vp4dpwssd\0" "vp4dpwssds\0" "vpabsb\0" "vpabsd\0" + "vpabsq\0" "vpabsw\0" "vpackssdw\0" "vpacksswb\0" "vpackusdw\0" "vpackuswb\0" "vpaddb\0" "vpaddd\0" "vpaddq\0" + "vpaddsb\0" "vpaddsw\0" "vpaddusb\0" "vpaddusw\0" "vpaddw\0" "vpalignr\0" "vpand\0" "vpandd\0" "vpandn\0" "vpandnd\0" + "vpandnq\0" "vpandq\0" "vpavgb\0" "vpavgw\0" "vpblendd\0" "vpblendvb\0" "vpblendw\0" "vpbroadcastb\0" + "vpbroadcastd\0" "vpbroadcastmb2d\0" "vpbroadcastmb2q\0" "vpbroadcastq\0" "vpbroadcastw\0" "vpclmulqdq\0" "vpcmov\0" + "vpcmpb\0" "vpcmpd\0" "vpcmpeqb\0" "vpcmpeqd\0" "vpcmpeqq\0" "vpcmpeqw\0" "vpcmpestri\0" "vpcmpestrm\0" "vpcmpgtb\0" + "vpcmpgtd\0" "vpcmpgtq\0" "vpcmpgtw\0" "vpcmpistri\0" "vpcmpistrm\0" "vpcmpq\0" "vpcmpub\0" "vpcmpud\0" "vpcmpuq\0" + "vpcmpuw\0" "vpcmpw\0" "vpcomb\0" "vpcomd\0" "vpcompressb\0" "vpcompressd\0" "vpcompressq\0" "vpcompressw\0" + "vpcomq\0" "vpcomub\0" "vpcomud\0" "vpcomuq\0" "vpcomuw\0" "vpcomw\0" "vpconflictd\0" "vpconflictq\0" "vpdpbusd\0" + "vpdpbusds\0" "vpdpwssd\0" "vpdpwssds\0" "vperm2f128\0" "vperm2i128\0" "vpermb\0" "vpermd\0" "vpermi2b\0" + "vpermi2d\0" "vpermi2pd\0" "vpermi2ps\0" "vpermi2q\0" "vpermi2w\0" "vpermil2pd\0" "vpermil2ps\0" "vpermilpd\0" + "vpermilps\0" "vpermpd\0" "vpermps\0" "vpermq\0" "vpermt2b\0" "vpermt2d\0" "vpermt2pd\0" "vpermt2ps\0" "vpermt2q\0" + "vpermt2w\0" "vpermw\0" "vpexpandb\0" "vpexpandd\0" "vpexpandq\0" "vpexpandw\0" "vpextrb\0" "vpextrd\0" "vpextrq\0" + "vpextrw\0" "vpgatherdd\0" "vpgatherdq\0" "vpgatherqd\0" "vpgatherqq\0" "vphaddbd\0" "vphaddbq\0" "vphaddbw\0" + "vphaddd\0" "vphadddq\0" "vphaddsw\0" "vphaddubd\0" "vphaddubq\0" "vphaddubw\0" "vphaddudq\0" "vphadduwd\0" + "vphadduwq\0" "vphaddw\0" "vphaddwd\0" "vphaddwq\0" "vphminposuw\0" "vphsubbw\0" "vphsubd\0" "vphsubdq\0" + "vphsubsw\0" "vphsubw\0" "vphsubwd\0" "vpinsrb\0" "vpinsrd\0" "vpinsrq\0" "vpinsrw\0" "vplzcntd\0" "vplzcntq\0" + "vpmacsdd\0" "vpmacsdqh\0" "vpmacsdql\0" "vpmacssdd\0" "vpmacssdqh\0" "vpmacssdql\0" "vpmacsswd\0" "vpmacssww\0" + "vpmacswd\0" "vpmacsww\0" "vpmadcsswd\0" "vpmadcswd\0" "vpmadd52huq\0" "vpmadd52luq\0" "vpmaddubsw\0" "vpmaddwd\0" + "vpmaskmovd\0" "vpmaskmovq\0" "vpmaxsb\0" "vpmaxsd\0" "vpmaxsq\0" "vpmaxsw\0" "vpmaxub\0" "vpmaxud\0" "vpmaxuq\0" + "vpmaxuw\0" "vpminsb\0" "vpminsd\0" "vpminsq\0" "vpminsw\0" "vpminub\0" "vpminud\0" "vpminuq\0" "vpminuw\0" + "vpmovb2m\0" "vpmovd2m\0" "vpmovdb\0" "vpmovdw\0" "vpmovm2b\0" "vpmovm2d\0" "vpmovm2q\0" "vpmovm2w\0" "vpmovmskb\0" + "vpmovq2m\0" "vpmovqb\0" "vpmovqd\0" "vpmovqw\0" "vpmovsdb\0" "vpmovsdw\0" "vpmovsqb\0" "vpmovsqd\0" "vpmovsqw\0" + "vpmovswb\0" "vpmovsxbd\0" "vpmovsxbq\0" "vpmovsxbw\0" "vpmovsxdq\0" "vpmovsxwd\0" "vpmovsxwq\0" "vpmovusdb\0" + "vpmovusdw\0" "vpmovusqb\0" "vpmovusqd\0" "vpmovusqw\0" "vpmovuswb\0" "vpmovw2m\0" "vpmovwb\0" "vpmovzxbd\0" + "vpmovzxbq\0" "vpmovzxbw\0" "vpmovzxdq\0" "vpmovzxwd\0" "vpmovzxwq\0" "vpmuldq\0" "vpmulhrsw\0" "vpmulhuw\0" + "vpmulhw\0" "vpmulld\0" "vpmullq\0" "vpmullw\0" "vpmultishiftqb\0" "vpmuludq\0" "vpopcntb\0" "vpopcntd\0" + "vpopcntq\0" "vpopcntw\0" "vpor\0" "vpord\0" "vporq\0" "vpperm\0" "vprold\0" "vprolq\0" "vprolvd\0" "vprolvq\0" + "vprord\0" "vprorq\0" "vprorvd\0" "vprorvq\0" "vprotb\0" "vprotd\0" "vprotq\0" "vprotw\0" "vpsadbw\0" "vpscatterdd\0" "vpscatterdq\0" "vpscatterqd\0" "vpscatterqq\0" "vpshab\0" "vpshad\0" "vpshaq\0" "vpshaw\0" "vpshlb\0" "vpshld\0" "vpshldd\0" "vpshldq\0" "vpshldvd\0" "vpshldvq\0" "vpshldvw\0" "vpshldw\0" "vpshlq\0" "vpshlw\0" "vpshrdd\0" "vpshrdq\0" "vpshrdvd\0" "vpshrdvq\0" "vpshrdvw\0" "vpshrdw\0" "vpshufb\0" "vpshufbitqmb\0" "vpshufd\0" "vpshufhw\0" @@ -2726,9 +2819,9 @@ const char InstDB::_nameData[] = "vshuff64x2\0" "vshufi32x4\0" "vshufi64x2\0" "vshufpd\0" "vshufps\0" "vsqrtpd\0" "vsqrtps\0" "vsqrtsd\0" "vsqrtss\0" "vstmxcsr\0" "vsubpd\0" "vsubps\0" "vsubsd\0" "vsubss\0" "vtestpd\0" "vtestps\0" "vucomisd\0" "vucomiss\0" "vunpckhpd\0" "vunpckhps\0" "vunpcklpd\0" "vunpcklps\0" "vxorpd\0" "vxorps\0" "vzeroall\0" "vzeroupper\0" "wbinvd\0" - "wbnoinvd\0" "wrfsbase\0" "wrgsbase\0" "wrmsr\0" "xabort\0" "xadd\0" "xbegin\0" "xend\0" "xgetbv\0" "xlatb\0" - "xrstors\0" "xrstors64\0" "xsavec\0" "xsavec64\0" "xsaveopt\0" "xsaveopt64\0" "xsaves\0" "xsaves64\0" "xsetbv\0" - "xtest"; + "wbnoinvd\0" "wrfsbase\0" "wrgsbase\0" "wrmsr\0" "wrssd\0" "wrssq\0" "wrussd\0" "wrussq\0" "xabort\0" "xadd\0" + "xbegin\0" "xend\0" "xgetbv\0" "xlatb\0" "xresldtrk\0" "xrstors\0" "xrstors64\0" "xsavec\0" "xsavec64\0" "xsaveopt\0" + "xsaveopt64\0" "xsaves\0" "xsaves64\0" "xsetbv\0" "xsusldtrk\0" "xtest"; const InstDB::InstNameIndex InstDB::instNameIndex[26] = { { Inst::kIdAaa , Inst::kIdArpl + 1 }, @@ -2748,12 +2841,12 @@ const InstDB::InstNameIndex InstDB::instNameIndex[26] = { { Inst::kIdOr , Inst::kIdOuts + 1 }, { Inst::kIdPabsb , Inst::kIdPxor + 1 }, { Inst::kIdNone , Inst::kIdNone + 1 }, - { Inst::kIdRcl , Inst::kIdRsqrtss + 1 }, + { Inst::kIdRcl , Inst::kIdRstorssp + 1 }, { Inst::kIdSahf , Inst::kIdSysret64 + 1 }, { Inst::kIdT1mskc , Inst::kIdTzmsk + 1 }, { Inst::kIdUcomisd , Inst::kIdUnpcklps + 1 }, { Inst::kIdV4fmaddps , Inst::kIdVzeroupper + 1 }, - { Inst::kIdWbinvd , Inst::kIdWrmsr + 1 }, + { Inst::kIdWbinvd , Inst::kIdWrussq + 1 }, { Inst::kIdXabort , Inst::kIdXtest + 1 }, { Inst::kIdNone , Inst::kIdNone + 1 }, { Inst::kIdNone , Inst::kIdNone + 1 } @@ -2800,7 +2893,7 @@ const InstDB::InstSignature InstDB::_instSignatureTable[] = { ROW(2, 1, 1, 0, 25 , 26 , 0 , 0 , 0 , 0 ), // {r16|m16|r32|m32|r64|m64|mem, i8} ROW(2, 1, 1, 0, 1 , 2 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi|m8|mem, r8lo|r8hi} ROW(2, 1, 1, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem, r16} - ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, r32} + ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // #23 {r32|m32|mem, r32} ROW(2, 1, 1, 0, 2 , 18 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi, m8|mem} ROW(2, 1, 1, 0, 4 , 21 , 0 , 0 , 0 , 0 ), // {r16, m16|mem} ROW(2, 1, 1, 0, 6 , 29 , 0 , 0 , 0 , 0 ), // {r32, m32|mem} @@ -2829,7 +2922,7 @@ const InstDB::InstSignature InstDB::_instSignatureTable[] = { ROW(2, 1, 1, 0, 1 , 2 , 0 , 0 , 0 , 0 ), // #49 {r8lo|r8hi|m8|mem, r8lo|r8hi} ROW(2, 1, 1, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem, r16} ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, r32} - ROW(2, 0, 1, 0, 15 , 8 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, r64} + ROW(2, 0, 1, 0, 15 , 8 , 0 , 0 , 0 , 0 ), // #52 {r64|m64|mem, r64} ROW(2, 1, 1, 0, 2 , 18 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi, m8|mem} ROW(2, 1, 1, 0, 4 , 21 , 0 , 0 , 0 , 0 ), // {r16, m16|mem} ROW(2, 1, 1, 0, 6 , 29 , 0 , 0 , 0 , 0 ), // {r32, m32|mem} @@ -2876,368 +2969,385 @@ const InstDB::InstSignature InstDB::_instSignatureTable[] = { ROW(2, 0, 1, 0, 45 , 15 , 0 , 0 , 0 , 0 ), // {xmm, r64|m64|mem} ROW(2, 1, 1, 0, 45 , 60 , 0 , 0 , 0 , 0 ), // #97 {xmm, xmm|m64|mem} ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // #98 {m64|mem, xmm} - ROW(3, 1, 1, 0, 45 , 61 , 45 , 0 , 0 , 0 ), // #99 {xmm, vm32x, xmm} - ROW(3, 1, 1, 0, 48 , 61 , 48 , 0 , 0 , 0 ), // {ymm, vm32x, ymm} - ROW(2, 1, 1, 0, 45 , 61 , 0 , 0 , 0 , 0 ), // {xmm, vm32x} - ROW(2, 1, 1, 0, 48 , 62 , 0 , 0 , 0 , 0 ), // {ymm, vm32y} - ROW(2, 1, 1, 0, 51 , 63 , 0 , 0 , 0 , 0 ), // {zmm, vm32z} - ROW(3, 1, 1, 0, 45 , 61 , 45 , 0 , 0 , 0 ), // #104 {xmm, vm32x, xmm} - ROW(3, 1, 1, 0, 48 , 62 , 48 , 0 , 0 , 0 ), // {ymm, vm32y, ymm} - ROW(2, 1, 1, 0, 45 , 61 , 0 , 0 , 0 , 0 ), // {xmm, vm32x} - ROW(2, 1, 1, 0, 48 , 62 , 0 , 0 , 0 , 0 ), // {ymm, vm32y} - ROW(2, 1, 1, 0, 51 , 63 , 0 , 0 , 0 , 0 ), // {zmm, vm32z} - ROW(3, 1, 1, 0, 45 , 64 , 45 , 0 , 0 , 0 ), // #109 {xmm, vm64x, xmm} - ROW(3, 1, 1, 0, 48 , 65 , 48 , 0 , 0 , 0 ), // {ymm, vm64y, ymm} - ROW(2, 1, 1, 0, 45 , 64 , 0 , 0 , 0 , 0 ), // {xmm, vm64x} - ROW(2, 1, 1, 0, 48 , 65 , 0 , 0 , 0 , 0 ), // {ymm, vm64y} - ROW(2, 1, 1, 0, 51 , 66 , 0 , 0 , 0 , 0 ), // {zmm, vm64z} - ROW(2, 1, 1, 0, 25 , 10 , 0 , 0 , 0 , 0 ), // #114 {r16|m16|r32|m32|r64|m64|mem, i8|u8} + ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #99 {} + ROW(1, 1, 1, 0, 61 , 0 , 0 , 0 , 0 , 0 ), // {r16|m16|r32|m32|r64|m64} + ROW(2, 1, 1, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem, r16} + ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, r32} + ROW(2, 1, 1, 0, 15 , 8 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, r64} + ROW(3, 1, 1, 0, 45 , 62 , 45 , 0 , 0 , 0 ), // #104 {xmm, vm32x, xmm} + ROW(3, 1, 1, 0, 48 , 62 , 48 , 0 , 0 , 0 ), // {ymm, vm32x, ymm} + ROW(2, 1, 1, 0, 45 , 62 , 0 , 0 , 0 , 0 ), // {xmm, vm32x} + ROW(2, 1, 1, 0, 48 , 63 , 0 , 0 , 0 , 0 ), // {ymm, vm32y} + ROW(2, 1, 1, 0, 51 , 64 , 0 , 0 , 0 , 0 ), // {zmm, vm32z} + ROW(3, 1, 1, 0, 45 , 62 , 45 , 0 , 0 , 0 ), // #109 {xmm, vm32x, xmm} + ROW(3, 1, 1, 0, 48 , 63 , 48 , 0 , 0 , 0 ), // {ymm, vm32y, ymm} + ROW(2, 1, 1, 0, 45 , 62 , 0 , 0 , 0 , 0 ), // {xmm, vm32x} + ROW(2, 1, 1, 0, 48 , 63 , 0 , 0 , 0 , 0 ), // {ymm, vm32y} + ROW(2, 1, 1, 0, 51 , 64 , 0 , 0 , 0 , 0 ), // {zmm, vm32z} + ROW(3, 1, 1, 0, 45 , 65 , 45 , 0 , 0 , 0 ), // #114 {xmm, vm64x, xmm} + ROW(3, 1, 1, 0, 48 , 66 , 48 , 0 , 0 , 0 ), // {ymm, vm64y, ymm} + ROW(2, 1, 1, 0, 45 , 65 , 0 , 0 , 0 , 0 ), // {xmm, vm64x} + ROW(2, 1, 1, 0, 48 , 66 , 0 , 0 , 0 , 0 ), // {ymm, vm64y} + ROW(2, 1, 1, 0, 51 , 67 , 0 , 0 , 0 , 0 ), // {zmm, vm64z} + ROW(2, 1, 1, 0, 25 , 10 , 0 , 0 , 0 , 0 ), // #119 {r16|m16|r32|m32|r64|m64|mem, i8|u8} ROW(2, 1, 1, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem, r16} ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, r32} ROW(2, 0, 1, 0, 15 , 8 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, r64} - ROW(3, 1, 1, 1, 1 , 2 , 67 , 0 , 0 , 0 ), // #118 {r8lo|r8hi|m8|mem, r8lo|r8hi, } + ROW(3, 1, 1, 1, 1 , 2 , 68 , 0 , 0 , 0 ), // #123 {r8lo|r8hi|m8|mem, r8lo|r8hi, } ROW(3, 1, 1, 1, 27 , 4 , 33 , 0 , 0 , 0 ), // {r16|m16|mem, r16, } ROW(3, 1, 1, 1, 28 , 6 , 36 , 0 , 0 , 0 ), // {r32|m32|mem, r32, } ROW(3, 0, 1, 1, 15 , 8 , 38 , 0 , 0 , 0 ), // {r64|m64|mem, r64, } - ROW(1, 1, 1, 0, 68 , 0 , 0 , 0 , 0 , 0 ), // #122 {r16|m16|r64|m64|mem} + ROW(1, 1, 1, 0, 69 , 0 , 0 , 0 , 0 , 0 ), // #127 {r16|m16|r64|m64} ROW(1, 1, 0, 0, 13 , 0 , 0 , 0 , 0 , 0 ), // {r32|m32} - ROW(1, 1, 0, 0, 69 , 0 , 0 , 0 , 0 , 0 ), // {ds|es|ss} - ROW(1, 1, 1, 0, 70 , 0 , 0 , 0 , 0 , 0 ), // {fs|gs} - ROW(1, 1, 1, 0, 71 , 0 , 0 , 0 , 0 , 0 ), // #126 {r16|m16|r64|m64|mem|i8|i16|i32} - ROW(1, 1, 0, 0, 72 , 0 , 0 , 0 , 0 , 0 ), // {r32|m32|i32|u32} - ROW(1, 1, 0, 0, 73 , 0 , 0 , 0 , 0 , 0 ), // {cs|ss|ds|es} - ROW(1, 1, 1, 0, 70 , 0 , 0 , 0 , 0 , 0 ), // {fs|gs} - ROW(4, 1, 1, 0, 45 , 45 , 45 , 46 , 0 , 0 ), // #130 {xmm, xmm, xmm, xmm|m128|mem} + ROW(1, 1, 0, 0, 70 , 0 , 0 , 0 , 0 , 0 ), // {ds|es|ss} + ROW(1, 1, 1, 0, 71 , 0 , 0 , 0 , 0 , 0 ), // {fs|gs} + ROW(1, 1, 1, 0, 72 , 0 , 0 , 0 , 0 , 0 ), // #131 {r16|m16|r64|m64|i8|i16|i32} + ROW(1, 1, 0, 0, 73 , 0 , 0 , 0 , 0 , 0 ), // {r32|m32|i32|u32} + ROW(1, 1, 0, 0, 74 , 0 , 0 , 0 , 0 , 0 ), // {cs|ss|ds|es} + ROW(1, 1, 1, 0, 71 , 0 , 0 , 0 , 0 , 0 ), // {fs|gs} + ROW(4, 1, 1, 0, 45 , 45 , 45 , 46 , 0 , 0 ), // #135 {xmm, xmm, xmm, xmm|m128|mem} ROW(4, 1, 1, 0, 45 , 45 , 47 , 45 , 0 , 0 ), // {xmm, xmm, m128|mem, xmm} ROW(4, 1, 1, 0, 48 , 48 , 48 , 49 , 0 , 0 ), // {ymm, ymm, ymm, ymm|m256|mem} ROW(4, 1, 1, 0, 48 , 48 , 50 , 48 , 0 , 0 ), // {ymm, ymm, m256|mem, ymm} - ROW(3, 1, 1, 0, 45 , 74 , 45 , 0 , 0 , 0 ), // #134 {xmm, vm64x|vm64y, xmm} - ROW(2, 1, 1, 0, 45 , 64 , 0 , 0 , 0 , 0 ), // {xmm, vm64x} - ROW(2, 1, 1, 0, 48 , 65 , 0 , 0 , 0 , 0 ), // {ymm, vm64y} - ROW(2, 1, 1, 0, 51 , 66 , 0 , 0 , 0 , 0 ), // {zmm, vm64z} - ROW(3, 1, 1, 0, 47 , 45 , 45 , 0 , 0 , 0 ), // #138 {m128|mem, xmm, xmm} + ROW(3, 1, 1, 0, 45 , 75 , 45 , 0 , 0 , 0 ), // #139 {xmm, vm64x|vm64y, xmm} + ROW(2, 1, 1, 0, 45 , 65 , 0 , 0 , 0 , 0 ), // {xmm, vm64x} + ROW(2, 1, 1, 0, 48 , 66 , 0 , 0 , 0 , 0 ), // {ymm, vm64y} + ROW(2, 1, 1, 0, 51 , 67 , 0 , 0 , 0 , 0 ), // {zmm, vm64z} + ROW(3, 1, 1, 0, 47 , 45 , 45 , 0 , 0 , 0 ), // #143 {m128|mem, xmm, xmm} ROW(3, 1, 1, 0, 50 , 48 , 48 , 0 , 0 , 0 ), // {m256|mem, ymm, ymm} ROW(3, 1, 1, 0, 45 , 45 , 47 , 0 , 0 , 0 ), // {xmm, xmm, m128|mem} ROW(3, 1, 1, 0, 48 , 48 , 50 , 0 , 0 , 0 ), // {ymm, ymm, m256|mem} - ROW(5, 1, 1, 0, 45 , 45 , 46 , 45 , 75 , 0 ), // #142 {xmm, xmm, xmm|m128|mem, xmm, i4|u4} - ROW(5, 1, 1, 0, 45 , 45 , 45 , 47 , 75 , 0 ), // {xmm, xmm, xmm, m128|mem, i4|u4} - ROW(5, 1, 1, 0, 48 , 48 , 49 , 48 , 75 , 0 ), // {ymm, ymm, ymm|m256|mem, ymm, i4|u4} - ROW(5, 1, 1, 0, 48 , 48 , 48 , 50 , 75 , 0 ), // {ymm, ymm, ymm, m256|mem, i4|u4} - ROW(3, 1, 1, 0, 48 , 49 , 10 , 0 , 0 , 0 ), // #146 {ymm, ymm|m256|mem, i8|u8} + ROW(5, 1, 1, 0, 45 , 45 , 46 , 45 , 76 , 0 ), // #147 {xmm, xmm, xmm|m128|mem, xmm, i4|u4} + ROW(5, 1, 1, 0, 45 , 45 , 45 , 47 , 76 , 0 ), // {xmm, xmm, xmm, m128|mem, i4|u4} + ROW(5, 1, 1, 0, 48 , 48 , 49 , 48 , 76 , 0 ), // {ymm, ymm, ymm|m256|mem, ymm, i4|u4} + ROW(5, 1, 1, 0, 48 , 48 , 48 , 50 , 76 , 0 ), // {ymm, ymm, ymm, m256|mem, i4|u4} + ROW(3, 1, 1, 0, 48 , 49 , 10 , 0 , 0 , 0 ), // #151 {ymm, ymm|m256|mem, i8|u8} ROW(3, 1, 1, 0, 48 , 48 , 49 , 0 , 0 , 0 ), // {ymm, ymm, ymm|m256|mem} ROW(3, 1, 1, 0, 51 , 51 , 56 , 0 , 0 , 0 ), // {zmm, zmm, zmm|m512|mem|i8|u8} ROW(3, 1, 1, 0, 51 , 53 , 10 , 0 , 0 , 0 ), // {zmm, m512|mem, i8|u8} - ROW(2, 1, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #150 {r16, r16|m16|mem} - ROW(2, 1, 1, 0, 6 , 28 , 0 , 0 , 0 , 0 ), // #151 {r32, r32|m32|mem} + ROW(2, 1, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #155 {r16, r16|m16|mem} + ROW(2, 1, 1, 0, 6 , 28 , 0 , 0 , 0 , 0 ), // #156 {r32, r32|m32|mem} ROW(2, 0, 1, 0, 8 , 15 , 0 , 0 , 0 , 0 ), // {r64, r64|m64|mem} - ROW(1, 1, 1, 0, 76 , 0 , 0 , 0 , 0 , 0 ), // #153 {m32|m64} - ROW(2, 1, 1, 0, 77 , 78 , 0 , 0 , 0 , 0 ), // {st0, st} - ROW(2, 1, 1, 0, 78 , 77 , 0 , 0 , 0 , 0 ), // {st, st0} - ROW(2, 1, 1, 0, 4 , 29 , 0 , 0 , 0 , 0 ), // #156 {r16, m32|mem} - ROW(2, 1, 1, 0, 6 , 79 , 0 , 0 , 0 , 0 ), // {r32, m48|mem} - ROW(2, 0, 1, 0, 8 , 80 , 0 , 0 , 0 , 0 ), // {r64, m80|mem} - ROW(3, 1, 1, 0, 27 , 4 , 81 , 0 , 0 , 0 ), // #159 {r16|m16|mem, r16, cl|i8|u8} - ROW(3, 1, 1, 0, 28 , 6 , 81 , 0 , 0 , 0 ), // {r32|m32|mem, r32, cl|i8|u8} - ROW(3, 0, 1, 0, 15 , 8 , 81 , 0 , 0 , 0 ), // {r64|m64|mem, r64, cl|i8|u8} - ROW(3, 1, 1, 0, 45 , 45 , 46 , 0 , 0 , 0 ), // #162 {xmm, xmm, xmm|m128|mem} - ROW(3, 1, 1, 0, 48 , 48 , 49 , 0 , 0 , 0 ), // #163 {ymm, ymm, ymm|m256|mem} + ROW(1, 1, 1, 0, 77 , 0 , 0 , 0 , 0 , 0 ), // #158 {m32|m64} + ROW(2, 1, 1, 0, 78 , 79 , 0 , 0 , 0 , 0 ), // {st0, st} + ROW(2, 1, 1, 0, 79 , 78 , 0 , 0 , 0 , 0 ), // {st, st0} + ROW(2, 1, 1, 0, 4 , 29 , 0 , 0 , 0 , 0 ), // #161 {r16, m32|mem} + ROW(2, 1, 1, 0, 6 , 80 , 0 , 0 , 0 , 0 ), // {r32, m48|mem} + ROW(2, 0, 1, 0, 8 , 81 , 0 , 0 , 0 , 0 ), // {r64, m80|mem} + ROW(3, 1, 1, 0, 27 , 4 , 82 , 0 , 0 , 0 ), // #164 {r16|m16|mem, r16, cl|i8|u8} + ROW(3, 1, 1, 0, 28 , 6 , 82 , 0 , 0 , 0 ), // {r32|m32|mem, r32, cl|i8|u8} + ROW(3, 0, 1, 0, 15 , 8 , 82 , 0 , 0 , 0 ), // {r64|m64|mem, r64, cl|i8|u8} + ROW(3, 1, 1, 0, 45 , 45 , 46 , 0 , 0 , 0 ), // #167 {xmm, xmm, xmm|m128|mem} + ROW(3, 1, 1, 0, 48 , 48 , 49 , 0 , 0 , 0 ), // #168 {ymm, ymm, ymm|m256|mem} ROW(3, 1, 1, 0, 51 , 51 , 52 , 0 , 0 , 0 ), // {zmm, zmm, zmm|m512|mem} - ROW(4, 1, 1, 0, 45 , 45 , 46 , 10 , 0 , 0 ), // #165 {xmm, xmm, xmm|m128|mem, i8|u8} - ROW(4, 1, 1, 0, 48 , 48 , 49 , 10 , 0 , 0 ), // #166 {ymm, ymm, ymm|m256|mem, i8|u8} + ROW(4, 1, 1, 0, 45 , 45 , 46 , 10 , 0 , 0 ), // #170 {xmm, xmm, xmm|m128|mem, i8|u8} + ROW(4, 1, 1, 0, 48 , 48 , 49 , 10 , 0 , 0 ), // #171 {ymm, ymm, ymm|m256|mem, i8|u8} ROW(4, 1, 1, 0, 51 , 51 , 52 , 10 , 0 , 0 ), // {zmm, zmm, zmm|m512|mem, i8|u8} - ROW(4, 1, 1, 0, 82 , 45 , 46 , 10 , 0 , 0 ), // #168 {xmm|k, xmm, xmm|m128|mem, i8|u8} - ROW(4, 1, 1, 0, 83 , 48 , 49 , 10 , 0 , 0 ), // {ymm|k, ymm, ymm|m256|mem, i8|u8} - ROW(4, 1, 1, 0, 84 , 51 , 52 , 10 , 0 , 0 ), // {k, zmm, zmm|m512|mem, i8|u8} - ROW(2, 1, 1, 0, 46 , 45 , 0 , 0 , 0 , 0 ), // #171 {xmm|m128|mem, xmm} + ROW(4, 1, 1, 0, 83 , 45 , 46 , 10 , 0 , 0 ), // #173 {xmm|k, xmm, xmm|m128|mem, i8|u8} + ROW(4, 1, 1, 0, 84 , 48 , 49 , 10 , 0 , 0 ), // {ymm|k, ymm, ymm|m256|mem, i8|u8} + ROW(4, 1, 1, 0, 85 , 51 , 52 , 10 , 0 , 0 ), // {k, zmm, zmm|m512|mem, i8|u8} + ROW(2, 1, 1, 0, 46 , 45 , 0 , 0 , 0 , 0 ), // #176 {xmm|m128|mem, xmm} ROW(2, 1, 1, 0, 49 , 48 , 0 , 0 , 0 , 0 ), // {ymm|m256|mem, ymm} ROW(2, 1, 1, 0, 52 , 51 , 0 , 0 , 0 , 0 ), // {zmm|m512|mem, zmm} - ROW(2, 1, 1, 0, 45 , 60 , 0 , 0 , 0 , 0 ), // #174 {xmm, xmm|m64|mem} + ROW(2, 1, 1, 0, 45 , 60 , 0 , 0 , 0 , 0 ), // #179 {xmm, xmm|m64|mem} ROW(2, 1, 1, 0, 48 , 46 , 0 , 0 , 0 , 0 ), // {ymm, xmm|m128|mem} ROW(2, 1, 1, 0, 51 , 49 , 0 , 0 , 0 , 0 ), // {zmm, ymm|m256|mem} - ROW(2, 1, 1, 0, 45 , 46 , 0 , 0 , 0 , 0 ), // #177 {xmm, xmm|m128|mem} + ROW(2, 1, 1, 0, 45 , 46 , 0 , 0 , 0 , 0 ), // #182 {xmm, xmm|m128|mem} ROW(2, 1, 1, 0, 48 , 49 , 0 , 0 , 0 , 0 ), // {ymm, ymm|m256|mem} ROW(2, 1, 1, 0, 51 , 52 , 0 , 0 , 0 , 0 ), // {zmm, zmm|m512|mem} - ROW(3, 1, 1, 0, 60 , 45 , 10 , 0 , 0 , 0 ), // #180 {xmm|m64|mem, xmm, i8|u8} - ROW(3, 1, 1, 0, 46 , 48 , 10 , 0 , 0 , 0 ), // #181 {xmm|m128|mem, ymm, i8|u8} - ROW(3, 1, 1, 0, 49 , 51 , 10 , 0 , 0 , 0 ), // #182 {ymm|m256|mem, zmm, i8|u8} - ROW(3, 1, 1, 0, 45 , 46 , 10 , 0 , 0 , 0 ), // #183 {xmm, xmm|m128|mem, i8|u8} + ROW(3, 1, 1, 0, 60 , 45 , 10 , 0 , 0 , 0 ), // #185 {xmm|m64|mem, xmm, i8|u8} + ROW(3, 1, 1, 0, 46 , 48 , 10 , 0 , 0 , 0 ), // #186 {xmm|m128|mem, ymm, i8|u8} + ROW(3, 1, 1, 0, 49 , 51 , 10 , 0 , 0 , 0 ), // #187 {ymm|m256|mem, zmm, i8|u8} + ROW(3, 1, 1, 0, 45 , 46 , 10 , 0 , 0 , 0 ), // #188 {xmm, xmm|m128|mem, i8|u8} ROW(3, 1, 1, 0, 48 , 49 , 10 , 0 , 0 , 0 ), // {ymm, ymm|m256|mem, i8|u8} ROW(3, 1, 1, 0, 51 , 52 , 10 , 0 , 0 , 0 ), // {zmm, zmm|m512|mem, i8|u8} - ROW(2, 1, 1, 0, 45 , 60 , 0 , 0 , 0 , 0 ), // #186 {xmm, xmm|m64|mem} + ROW(2, 1, 1, 0, 45 , 60 , 0 , 0 , 0 , 0 ), // #191 {xmm, xmm|m64|mem} ROW(2, 1, 1, 0, 48 , 49 , 0 , 0 , 0 , 0 ), // {ymm, ymm|m256|mem} ROW(2, 1, 1, 0, 51 , 52 , 0 , 0 , 0 , 0 ), // {zmm, zmm|m512|mem} - ROW(2, 1, 1, 0, 47 , 45 , 0 , 0 , 0 , 0 ), // #189 {m128|mem, xmm} + ROW(2, 1, 1, 0, 47 , 45 , 0 , 0 , 0 , 0 ), // #194 {m128|mem, xmm} ROW(2, 1, 1, 0, 50 , 48 , 0 , 0 , 0 , 0 ), // {m256|mem, ymm} ROW(2, 1, 1, 0, 53 , 51 , 0 , 0 , 0 , 0 ), // {m512|mem, zmm} - ROW(2, 1, 1, 0, 45 , 47 , 0 , 0 , 0 , 0 ), // #192 {xmm, m128|mem} + ROW(2, 1, 1, 0, 45 , 47 , 0 , 0 , 0 , 0 ), // #197 {xmm, m128|mem} ROW(2, 1, 1, 0, 48 , 50 , 0 , 0 , 0 , 0 ), // {ymm, m256|mem} ROW(2, 1, 1, 0, 51 , 53 , 0 , 0 , 0 , 0 ), // {zmm, m512|mem} - ROW(2, 0, 1, 0, 15 , 45 , 0 , 0 , 0 , 0 ), // #195 {r64|m64|mem, xmm} - ROW(2, 1, 1, 0, 45 , 85 , 0 , 0 , 0 , 0 ), // {xmm, xmm|m64|mem|r64} + ROW(2, 0, 1, 0, 15 , 45 , 0 , 0 , 0 , 0 ), // #200 {r64|m64|mem, xmm} + ROW(2, 1, 1, 0, 45 , 86 , 0 , 0 , 0 , 0 ), // {xmm, xmm|m64|mem|r64} ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // {m64|mem, xmm} - ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // #198 {m64|mem, xmm} + ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // #203 {m64|mem, xmm} ROW(2, 1, 1, 0, 45 , 30 , 0 , 0 , 0 , 0 ), // {xmm, m64|mem} - ROW(3, 1, 1, 0, 45 , 45 , 45 , 0 , 0 , 0 ), // #200 {xmm, xmm, xmm} - ROW(2, 1, 1, 0, 29 , 45 , 0 , 0 , 0 , 0 ), // #201 {m32|mem, xmm} + ROW(3, 1, 1, 0, 45 , 45 , 45 , 0 , 0 , 0 ), // #205 {xmm, xmm, xmm} + ROW(2, 1, 1, 0, 29 , 45 , 0 , 0 , 0 , 0 ), // #206 {m32|mem, xmm} ROW(2, 1, 1, 0, 45 , 29 , 0 , 0 , 0 , 0 ), // {xmm, m32|mem} ROW(3, 1, 1, 0, 45 , 45 , 45 , 0 , 0 , 0 ), // {xmm, xmm, xmm} - ROW(2, 1, 1, 0, 86 , 85 , 0 , 0 , 0 , 0 ), // #204 {xmm|ymm, xmm|m64|mem|r64} + ROW(4, 1, 1, 0, 85 , 85 , 45 , 46 , 0 , 0 ), // #209 {k, k, xmm, xmm|m128|mem} + ROW(4, 1, 1, 0, 85 , 85 , 48 , 49 , 0 , 0 ), // {k, k, ymm, ymm|m256|mem} + ROW(4, 1, 1, 0, 85 , 85 , 51 , 52 , 0 , 0 ), // {k, k, zmm, zmm|m512|mem} + ROW(2, 1, 1, 0, 87 , 86 , 0 , 0 , 0 , 0 ), // #212 {xmm|ymm, xmm|m64|mem|r64} ROW(2, 0, 1, 0, 51 , 8 , 0 , 0 , 0 , 0 ), // {zmm, r64} ROW(2, 1, 1, 0, 51 , 60 , 0 , 0 , 0 , 0 ), // {zmm, xmm|m64|mem} - ROW(4, 1, 1, 0, 84 , 45 , 46 , 10 , 0 , 0 ), // #207 {k, xmm, xmm|m128|mem, i8|u8} - ROW(4, 1, 1, 0, 84 , 48 , 49 , 10 , 0 , 0 ), // {k, ymm, ymm|m256|mem, i8|u8} - ROW(4, 1, 1, 0, 84 , 51 , 52 , 10 , 0 , 0 ), // {k, zmm, zmm|m512|mem, i8|u8} - ROW(3, 1, 1, 0, 82 , 45 , 46 , 0 , 0 , 0 ), // #210 {xmm|k, xmm, xmm|m128|mem} - ROW(3, 1, 1, 0, 83 , 48 , 49 , 0 , 0 , 0 ), // {ymm|k, ymm, ymm|m256|mem} - ROW(3, 1, 1, 0, 84 , 51 , 52 , 0 , 0 , 0 ), // {k, zmm, zmm|m512|mem} - ROW(2, 1, 1, 0, 87 , 45 , 0 , 0 , 0 , 0 ), // #213 {xmm|m32|mem, xmm} + ROW(4, 1, 1, 0, 85 , 45 , 46 , 10 , 0 , 0 ), // #215 {k, xmm, xmm|m128|mem, i8|u8} + ROW(4, 1, 1, 0, 85 , 48 , 49 , 10 , 0 , 0 ), // {k, ymm, ymm|m256|mem, i8|u8} + ROW(4, 1, 1, 0, 85 , 51 , 52 , 10 , 0 , 0 ), // {k, zmm, zmm|m512|mem, i8|u8} + ROW(3, 1, 1, 0, 83 , 45 , 46 , 0 , 0 , 0 ), // #218 {xmm|k, xmm, xmm|m128|mem} + ROW(3, 1, 1, 0, 84 , 48 , 49 , 0 , 0 , 0 ), // {ymm|k, ymm, ymm|m256|mem} + ROW(3, 1, 1, 0, 85 , 51 , 52 , 0 , 0 , 0 ), // {k, zmm, zmm|m512|mem} + ROW(2, 1, 1, 0, 88 , 45 , 0 , 0 , 0 , 0 ), // #221 {xmm|m32|mem, xmm} ROW(2, 1, 1, 0, 60 , 48 , 0 , 0 , 0 , 0 ), // {xmm|m64|mem, ymm} ROW(2, 1, 1, 0, 46 , 51 , 0 , 0 , 0 , 0 ), // {xmm|m128|mem, zmm} - ROW(2, 1, 1, 0, 60 , 45 , 0 , 0 , 0 , 0 ), // #216 {xmm|m64|mem, xmm} + ROW(2, 1, 1, 0, 60 , 45 , 0 , 0 , 0 , 0 ), // #224 {xmm|m64|mem, xmm} ROW(2, 1, 1, 0, 46 , 48 , 0 , 0 , 0 , 0 ), // {xmm|m128|mem, ymm} ROW(2, 1, 1, 0, 49 , 51 , 0 , 0 , 0 , 0 ), // {ymm|m256|mem, zmm} - ROW(2, 1, 1, 0, 88 , 45 , 0 , 0 , 0 , 0 ), // #219 {xmm|m16|mem, xmm} - ROW(2, 1, 1, 0, 87 , 48 , 0 , 0 , 0 , 0 ), // {xmm|m32|mem, ymm} + ROW(2, 1, 1, 0, 89 , 45 , 0 , 0 , 0 , 0 ), // #227 {xmm|m16|mem, xmm} + ROW(2, 1, 1, 0, 88 , 48 , 0 , 0 , 0 , 0 ), // {xmm|m32|mem, ymm} ROW(2, 1, 1, 0, 60 , 51 , 0 , 0 , 0 , 0 ), // {xmm|m64|mem, zmm} - ROW(2, 1, 1, 0, 45 , 87 , 0 , 0 , 0 , 0 ), // #222 {xmm, xmm|m32|mem} + ROW(2, 1, 1, 0, 45 , 88 , 0 , 0 , 0 , 0 ), // #230 {xmm, xmm|m32|mem} ROW(2, 1, 1, 0, 48 , 60 , 0 , 0 , 0 , 0 ), // {ymm, xmm|m64|mem} ROW(2, 1, 1, 0, 51 , 46 , 0 , 0 , 0 , 0 ), // {zmm, xmm|m128|mem} - ROW(2, 1, 1, 0, 45 , 88 , 0 , 0 , 0 , 0 ), // #225 {xmm, xmm|m16|mem} - ROW(2, 1, 1, 0, 48 , 87 , 0 , 0 , 0 , 0 ), // {ymm, xmm|m32|mem} + ROW(2, 1, 1, 0, 45 , 89 , 0 , 0 , 0 , 0 ), // #233 {xmm, xmm|m16|mem} + ROW(2, 1, 1, 0, 48 , 88 , 0 , 0 , 0 , 0 ), // {ymm, xmm|m32|mem} ROW(2, 1, 1, 0, 51 , 60 , 0 , 0 , 0 , 0 ), // {zmm, xmm|m64|mem} - ROW(2, 1, 1, 0, 61 , 45 , 0 , 0 , 0 , 0 ), // #228 {vm32x, xmm} - ROW(2, 1, 1, 0, 62 , 48 , 0 , 0 , 0 , 0 ), // {vm32y, ymm} - ROW(2, 1, 1, 0, 63 , 51 , 0 , 0 , 0 , 0 ), // {vm32z, zmm} - ROW(2, 1, 1, 0, 64 , 45 , 0 , 0 , 0 , 0 ), // #231 {vm64x, xmm} - ROW(2, 1, 1, 0, 65 , 48 , 0 , 0 , 0 , 0 ), // {vm64y, ymm} - ROW(2, 1, 1, 0, 66 , 51 , 0 , 0 , 0 , 0 ), // {vm64z, zmm} - ROW(3, 1, 1, 0, 84 , 45 , 46 , 0 , 0 , 0 ), // #234 {k, xmm, xmm|m128|mem} - ROW(3, 1, 1, 0, 84 , 48 , 49 , 0 , 0 , 0 ), // {k, ymm, ymm|m256|mem} - ROW(3, 1, 1, 0, 84 , 51 , 52 , 0 , 0 , 0 ), // {k, zmm, zmm|m512|mem} - ROW(3, 1, 1, 0, 6 , 6 , 28 , 0 , 0 , 0 ), // #237 {r32, r32, r32|m32|mem} + ROW(2, 1, 1, 0, 62 , 45 , 0 , 0 , 0 , 0 ), // #236 {vm32x, xmm} + ROW(2, 1, 1, 0, 63 , 48 , 0 , 0 , 0 , 0 ), // {vm32y, ymm} + ROW(2, 1, 1, 0, 64 , 51 , 0 , 0 , 0 , 0 ), // {vm32z, zmm} + ROW(2, 1, 1, 0, 65 , 45 , 0 , 0 , 0 , 0 ), // #239 {vm64x, xmm} + ROW(2, 1, 1, 0, 66 , 48 , 0 , 0 , 0 , 0 ), // {vm64y, ymm} + ROW(2, 1, 1, 0, 67 , 51 , 0 , 0 , 0 , 0 ), // {vm64z, zmm} + ROW(3, 1, 1, 0, 85 , 45 , 46 , 0 , 0 , 0 ), // #242 {k, xmm, xmm|m128|mem} + ROW(3, 1, 1, 0, 85 , 48 , 49 , 0 , 0 , 0 ), // {k, ymm, ymm|m256|mem} + ROW(3, 1, 1, 0, 85 , 51 , 52 , 0 , 0 , 0 ), // {k, zmm, zmm|m512|mem} + ROW(3, 1, 1, 0, 6 , 6 , 28 , 0 , 0 , 0 ), // #245 {r32, r32, r32|m32|mem} ROW(3, 0, 1, 0, 8 , 8 , 15 , 0 , 0 , 0 ), // {r64, r64, r64|m64|mem} - ROW(3, 1, 1, 0, 6 , 28 , 6 , 0 , 0 , 0 ), // #239 {r32, r32|m32|mem, r32} + ROW(3, 1, 1, 0, 6 , 28 , 6 , 0 , 0 , 0 ), // #247 {r32, r32|m32|mem, r32} ROW(3, 0, 1, 0, 8 , 15 , 8 , 0 , 0 , 0 ), // {r64, r64|m64|mem, r64} - ROW(2, 1, 0, 0, 89 , 28 , 0 , 0 , 0 , 0 ), // #241 {bnd, r32|m32|mem} - ROW(2, 0, 1, 0, 89 , 15 , 0 , 0 , 0 , 0 ), // {bnd, r64|m64|mem} - ROW(2, 1, 1, 0, 89 , 90 , 0 , 0 , 0 , 0 ), // #243 {bnd, bnd|mem} - ROW(2, 1, 1, 0, 91 , 89 , 0 , 0 , 0 , 0 ), // {mem, bnd} - ROW(2, 1, 0, 0, 4 , 29 , 0 , 0 , 0 , 0 ), // #245 {r16, m32|mem} + ROW(2, 1, 0, 0, 90 , 28 , 0 , 0 , 0 , 0 ), // #249 {bnd, r32|m32|mem} + ROW(2, 0, 1, 0, 90 , 15 , 0 , 0 , 0 , 0 ), // {bnd, r64|m64|mem} + ROW(2, 1, 1, 0, 90 , 91 , 0 , 0 , 0 , 0 ), // #251 {bnd, bnd|mem} + ROW(2, 1, 1, 0, 92 , 90 , 0 , 0 , 0 , 0 ), // {mem, bnd} + ROW(2, 1, 0, 0, 4 , 29 , 0 , 0 , 0 , 0 ), // #253 {r16, m32|mem} ROW(2, 1, 0, 0, 6 , 30 , 0 , 0 , 0 , 0 ), // {r32, m64|mem} - ROW(1, 1, 0, 0, 92 , 0 , 0 , 0 , 0 , 0 ), // #247 {rel16|r16|m16|r32|m32} - ROW(1, 1, 1, 0, 93 , 0 , 0 , 0 , 0 , 0 ), // {rel32|r64|m64|mem} - ROW(2, 1, 1, 0, 6 , 94 , 0 , 0 , 0 , 0 ), // #249 {r32, r8lo|r8hi|m8|r16|m16|r32|m32} - ROW(2, 0, 1, 0, 8 , 95 , 0 , 0 , 0 , 0 ), // {r64, r8lo|r8hi|m8|r64|m64} - ROW(1, 1, 0, 0, 96 , 0 , 0 , 0 , 0 , 0 ), // #251 {r16|r32} - ROW(1, 1, 1, 0, 31 , 0 , 0 , 0 , 0 , 0 ), // #252 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem} - ROW(2, 1, 0, 0, 97 , 53 , 0 , 0 , 0 , 0 ), // #253 {es:[memBase], m512|mem} - ROW(2, 0, 1, 0, 97 , 53 , 0 , 0 , 0 , 0 ), // {es:[memBase], m512|mem} - ROW(3, 1, 1, 0, 45 , 10 , 10 , 0 , 0 , 0 ), // #255 {xmm, i8|u8, i8|u8} - ROW(2, 1, 1, 0, 45 , 45 , 0 , 0 , 0 , 0 ), // #256 {xmm, xmm} - ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #257 {} - ROW(1, 1, 1, 0, 78 , 0 , 0 , 0 , 0 , 0 ), // #258 {st} - ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #259 {} - ROW(1, 1, 1, 0, 98 , 0 , 0 , 0 , 0 , 0 ), // #260 {m32|m64|st} - ROW(2, 1, 1, 0, 45 , 45 , 0 , 0 , 0 , 0 ), // #261 {xmm, xmm} + ROW(1, 1, 0, 0, 93 , 0 , 0 , 0 , 0 , 0 ), // #255 {rel16|r16|m16|r32|m32} + ROW(1, 1, 1, 0, 94 , 0 , 0 , 0 , 0 , 0 ), // {rel32|r64|m64|mem} + ROW(2, 1, 1, 0, 6 , 95 , 0 , 0 , 0 , 0 ), // #257 {r32, r8lo|r8hi|m8|r16|m16|r32|m32} + ROW(2, 0, 1, 0, 8 , 96 , 0 , 0 , 0 , 0 ), // {r64, r8lo|r8hi|m8|r64|m64} + ROW(1, 1, 0, 0, 97 , 0 , 0 , 0 , 0 , 0 ), // #259 {r16|r32} + ROW(1, 1, 1, 0, 31 , 0 , 0 , 0 , 0 , 0 ), // #260 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem} + ROW(2, 1, 0, 0, 98 , 53 , 0 , 0 , 0 , 0 ), // #261 {es:[memBase], m512|mem} + ROW(2, 0, 1, 0, 98 , 53 , 0 , 0 , 0 , 0 ), // {es:[memBase], m512|mem} + ROW(3, 1, 1, 0, 45 , 10 , 10 , 0 , 0 , 0 ), // #263 {xmm, i8|u8, i8|u8} + ROW(2, 1, 1, 0, 45 , 45 , 0 , 0 , 0 , 0 ), // #264 {xmm, xmm} + ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #265 {} + ROW(1, 1, 1, 0, 79 , 0 , 0 , 0 , 0 , 0 ), // #266 {st} + ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #267 {} + ROW(1, 1, 1, 0, 99 , 0 , 0 , 0 , 0 , 0 ), // #268 {m32|m64|st} + ROW(2, 1, 1, 0, 45 , 45 , 0 , 0 , 0 , 0 ), // #269 {xmm, xmm} ROW(4, 1, 1, 0, 45 , 45 , 10 , 10 , 0 , 0 ), // {xmm, xmm, i8|u8, i8|u8} - ROW(2, 1, 0, 0, 6 , 47 , 0 , 0 , 0 , 0 ), // #263 {r32, m128|mem} + ROW(2, 1, 0, 0, 6 , 47 , 0 , 0 , 0 , 0 ), // #271 {r32, m128|mem} ROW(2, 0, 1, 0, 8 , 47 , 0 , 0 , 0 , 0 ), // {r64, m128|mem} - ROW(2, 1, 0, 2, 36 , 99 , 0 , 0 , 0 , 0 ), // #265 {, } - ROW(2, 0, 1, 2, 100, 99 , 0 , 0 , 0 , 0 ), // {, } - ROW(1, 1, 1, 0, 101, 0 , 0 , 0 , 0 , 0 ), // #267 {rel8|rel32} - ROW(1, 1, 0, 0, 102, 0 , 0 , 0 , 0 , 0 ), // {rel16} - ROW(2, 1, 0, 1, 103, 104, 0 , 0 , 0 , 0 ), // #269 {, rel8} - ROW(2, 0, 1, 1, 105, 104, 0 , 0 , 0 , 0 ), // {, rel8} - ROW(1, 1, 1, 0, 106, 0 , 0 , 0 , 0 , 0 ), // #271 {rel8|rel32|r64|m64|mem} - ROW(1, 1, 0, 0, 107, 0 , 0 , 0 , 0 , 0 ), // {rel16|r32|m32|mem} - ROW(2, 1, 1, 0, 84 , 108, 0 , 0 , 0 , 0 ), // #273 {k, k|m8|mem|r32|r8lo|r8hi|r16} - ROW(2, 1, 1, 0, 109, 84 , 0 , 0 , 0 , 0 ), // {m8|mem|r32|r8lo|r8hi|r16, k} - ROW(2, 1, 1, 0, 84 , 110, 0 , 0 , 0 , 0 ), // #275 {k, k|m32|mem|r32} - ROW(2, 1, 1, 0, 28 , 84 , 0 , 0 , 0 , 0 ), // {m32|mem|r32, k} - ROW(2, 1, 1, 0, 84 , 111, 0 , 0 , 0 , 0 ), // #277 {k, k|m64|mem|r64} - ROW(2, 1, 1, 0, 15 , 84 , 0 , 0 , 0 , 0 ), // {m64|mem|r64, k} - ROW(2, 1, 1, 0, 84 , 112, 0 , 0 , 0 , 0 ), // #279 {k, k|m16|mem|r32|r16} - ROW(2, 1, 1, 0, 113, 84 , 0 , 0 , 0 , 0 ), // {m16|mem|r32|r16, k} - ROW(2, 1, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #281 {r16, r16|m16|mem} - ROW(2, 1, 1, 0, 6 , 113, 0 , 0 , 0 , 0 ), // {r32, r32|m16|mem|r16} - ROW(2, 1, 0, 0, 4 , 29 , 0 , 0 , 0 , 0 ), // #283 {r16, m32|mem} - ROW(2, 1, 0, 0, 6 , 79 , 0 , 0 , 0 , 0 ), // {r32, m48|mem} - ROW(2, 1, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #285 {r16, r16|m16|mem} - ROW(2, 1, 1, 0, 114, 113, 0 , 0 , 0 , 0 ), // {r32|r64, r32|m16|mem|r16} - ROW(2, 1, 1, 0, 59 , 28 , 0 , 0 , 0 , 0 ), // #287 {mm|xmm, r32|m32|mem} + ROW(2, 1, 0, 2, 36 , 100, 0 , 0 , 0 , 0 ), // #273 {, } + ROW(2, 0, 1, 2, 101, 100, 0 , 0 , 0 , 0 ), // {, } + ROW(1, 1, 1, 0, 102, 0 , 0 , 0 , 0 , 0 ), // #275 {rel8|rel32} + ROW(1, 1, 0, 0, 103, 0 , 0 , 0 , 0 , 0 ), // {rel16} + ROW(2, 1, 0, 1, 104, 105, 0 , 0 , 0 , 0 ), // #277 {, rel8} + ROW(2, 0, 1, 1, 106, 105, 0 , 0 , 0 , 0 ), // {, rel8} + ROW(1, 1, 1, 0, 107, 0 , 0 , 0 , 0 , 0 ), // #279 {rel8|rel32|r64|m64|mem} + ROW(1, 1, 0, 0, 108, 0 , 0 , 0 , 0 , 0 ), // {rel16|r32|m32|mem} + ROW(2, 1, 1, 0, 85 , 109, 0 , 0 , 0 , 0 ), // #281 {k, k|m8|mem|r32|r8lo|r8hi|r16} + ROW(2, 1, 1, 0, 110, 85 , 0 , 0 , 0 , 0 ), // {m8|mem|r32|r8lo|r8hi|r16, k} + ROW(2, 1, 1, 0, 85 , 111, 0 , 0 , 0 , 0 ), // #283 {k, k|m32|mem|r32} + ROW(2, 1, 1, 0, 28 , 85 , 0 , 0 , 0 , 0 ), // {m32|mem|r32, k} + ROW(2, 1, 1, 0, 85 , 112, 0 , 0 , 0 , 0 ), // #285 {k, k|m64|mem|r64} + ROW(2, 1, 1, 0, 15 , 85 , 0 , 0 , 0 , 0 ), // {m64|mem|r64, k} + ROW(2, 1, 1, 0, 85 , 113, 0 , 0 , 0 , 0 ), // #287 {k, k|m16|mem|r32|r16} + ROW(2, 1, 1, 0, 114, 85 , 0 , 0 , 0 , 0 ), // {m16|mem|r32|r16, k} + ROW(2, 1, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #289 {r16, r16|m16|mem} + ROW(2, 1, 1, 0, 6 , 114, 0 , 0 , 0 , 0 ), // {r32, r32|m16|mem|r16} + ROW(2, 1, 0, 0, 4 , 29 , 0 , 0 , 0 , 0 ), // #291 {r16, m32|mem} + ROW(2, 1, 0, 0, 6 , 80 , 0 , 0 , 0 , 0 ), // {r32, m48|mem} + ROW(2, 1, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #293 {r16, r16|m16|mem} + ROW(2, 1, 1, 0, 115, 114, 0 , 0 , 0 , 0 ), // {r32|r64, r32|m16|mem|r16} + ROW(2, 1, 1, 0, 59 , 28 , 0 , 0 , 0 , 0 ), // #295 {mm|xmm, r32|m32|mem} ROW(2, 1, 1, 0, 28 , 59 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, mm|xmm} - ROW(2, 1, 1, 0, 45 , 87 , 0 , 0 , 0 , 0 ), // #289 {xmm, xmm|m32|mem} + ROW(2, 1, 1, 0, 45 , 88 , 0 , 0 , 0 , 0 ), // #297 {xmm, xmm|m32|mem} ROW(2, 1, 1, 0, 29 , 45 , 0 , 0 , 0 , 0 ), // {m32|mem, xmm} - ROW(2, 1, 1, 0, 4 , 9 , 0 , 0 , 0 , 0 ), // #291 {r16, r8lo|r8hi|m8} - ROW(2, 1, 1, 0, 114, 115, 0 , 0 , 0 , 0 ), // {r32|r64, r8lo|r8hi|m8|r16|m16} - ROW(4, 1, 1, 1, 6 , 6 , 28 , 35 , 0 , 0 ), // #293 {r32, r32, r32|m32|mem, } + ROW(2, 1, 1, 0, 4 , 9 , 0 , 0 , 0 , 0 ), // #299 {r16, r8lo|r8hi|m8} + ROW(2, 1, 1, 0, 115, 116, 0 , 0 , 0 , 0 ), // {r32|r64, r8lo|r8hi|m8|r16|m16} + ROW(2, 0, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #301 {r16, r16|m16|mem} + ROW(2, 0, 1, 0, 115, 28 , 0 , 0 , 0 , 0 ), // {r32|r64, r32|m32|mem} + ROW(4, 1, 1, 1, 6 , 6 , 28 , 35 , 0 , 0 ), // #303 {r32, r32, r32|m32|mem, } ROW(4, 0, 1, 1, 8 , 8 , 15 , 37 , 0 , 0 ), // {r64, r64, r64|m64|mem, } - ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #295 {} - ROW(1, 1, 1, 0, 116, 0 , 0 , 0 , 0 , 0 ), // {r16|m16|r32|m32} - ROW(2, 1, 1, 0, 57 , 117, 0 , 0 , 0 , 0 ), // #297 {mm, mm|m64|mem} + ROW(2, 1, 1, 0, 57 , 117, 0 , 0 , 0 , 0 ), // #305 {mm, mm|m64|mem} ROW(2, 1, 1, 0, 45 , 46 , 0 , 0 , 0 , 0 ), // {xmm, xmm|m128|mem} - ROW(3, 1, 1, 0, 57 , 117, 10 , 0 , 0 , 0 ), // #299 {mm, mm|m64|mem, i8|u8} + ROW(3, 1, 1, 0, 57 , 117, 10 , 0 , 0 , 0 ), // #307 {mm, mm|m64|mem, i8|u8} ROW(3, 1, 1, 0, 45 , 46 , 10 , 0 , 0 , 0 ), // {xmm, xmm|m128|mem, i8|u8} - ROW(3, 1, 1, 0, 6 , 59 , 10 , 0 , 0 , 0 ), // #301 {r32, mm|xmm, i8|u8} + ROW(3, 1, 1, 0, 6 , 59 , 10 , 0 , 0 , 0 ), // #309 {r32, mm|xmm, i8|u8} ROW(3, 1, 1, 0, 21 , 45 , 10 , 0 , 0 , 0 ), // {m16|mem, xmm, i8|u8} - ROW(2, 1, 1, 0, 57 , 118, 0 , 0 , 0 , 0 ), // #303 {mm, i8|u8|mm|m64|mem} + ROW(2, 1, 1, 0, 57 , 118, 0 , 0 , 0 , 0 ), // #311 {mm, i8|u8|mm|m64|mem} ROW(2, 1, 1, 0, 45 , 54 , 0 , 0 , 0 , 0 ), // {xmm, i8|u8|xmm|m128|mem} - ROW(1, 1, 0, 0, 6 , 0 , 0 , 0 , 0 , 0 ), // #305 {r32} - ROW(1, 0, 1, 0, 8 , 0 , 0 , 0 , 0 , 0 ), // {r64} - ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #307 {} + ROW(1, 1, 0, 0, 6 , 0 , 0 , 0 , 0 , 0 ), // #313 {r32} + ROW(1, 0, 1, 0, 8 , 0 , 0 , 0 , 0 , 0 ), // #314 {r64} + ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #315 {} ROW(1, 1, 1, 0, 119, 0 , 0 , 0 , 0 , 0 ), // {u16} - ROW(3, 1, 1, 0, 6 , 28 , 10 , 0 , 0 , 0 ), // #309 {r32, r32|m32|mem, i8|u8} + ROW(3, 1, 1, 0, 6 , 28 , 10 , 0 , 0 , 0 ), // #317 {r32, r32|m32|mem, i8|u8} ROW(3, 0, 1, 0, 8 , 15 , 10 , 0 , 0 , 0 ), // {r64, r64|m64|mem, i8|u8} - ROW(4, 1, 1, 0, 45 , 45 , 46 , 45 , 0 , 0 ), // #311 {xmm, xmm, xmm|m128|mem, xmm} + ROW(4, 1, 1, 0, 45 , 45 , 46 , 45 , 0 , 0 ), // #319 {xmm, xmm, xmm|m128|mem, xmm} ROW(4, 1, 1, 0, 48 , 48 , 49 , 48 , 0 , 0 ), // {ymm, ymm, ymm|m256|mem, ymm} - ROW(2, 1, 1, 0, 45 , 120, 0 , 0 , 0 , 0 ), // #313 {xmm, xmm|m128|ymm|m256} + ROW(2, 1, 1, 0, 45 , 120, 0 , 0 , 0 , 0 ), // #321 {xmm, xmm|m128|ymm|m256} ROW(2, 1, 1, 0, 48 , 52 , 0 , 0 , 0 , 0 ), // {ymm, zmm|m512|mem} - ROW(4, 1, 1, 0, 45 , 45 , 45 , 60 , 0 , 0 ), // #315 {xmm, xmm, xmm, xmm|m64|mem} + ROW(4, 1, 1, 0, 45 , 45 , 45 , 60 , 0 , 0 ), // #323 {xmm, xmm, xmm, xmm|m64|mem} ROW(4, 1, 1, 0, 45 , 45 , 30 , 45 , 0 , 0 ), // {xmm, xmm, m64|mem, xmm} - ROW(4, 1, 1, 0, 45 , 45 , 45 , 87 , 0 , 0 ), // #317 {xmm, xmm, xmm, xmm|m32|mem} + ROW(4, 1, 1, 0, 45 , 45 , 45 , 88 , 0 , 0 ), // #325 {xmm, xmm, xmm, xmm|m32|mem} ROW(4, 1, 1, 0, 45 , 45 , 29 , 45 , 0 , 0 ), // {xmm, xmm, m32|mem, xmm} - ROW(4, 1, 1, 0, 48 , 48 , 46 , 10 , 0 , 0 ), // #319 {ymm, ymm, xmm|m128|mem, i8|u8} + ROW(4, 1, 1, 0, 48 , 48 , 46 , 10 , 0 , 0 ), // #327 {ymm, ymm, xmm|m128|mem, i8|u8} ROW(4, 1, 1, 0, 51 , 51 , 46 , 10 , 0 , 0 ), // {zmm, zmm, xmm|m128|mem, i8|u8} - ROW(1, 1, 0, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #321 {} - ROW(1, 0, 1, 1, 38 , 0 , 0 , 0 , 0 , 0 ), // #322 {} - ROW(2, 1, 1, 0, 28 , 45 , 0 , 0 , 0 , 0 ), // #323 {r32|m32|mem, xmm} + ROW(1, 1, 0, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #329 {} + ROW(1, 0, 1, 1, 38 , 0 , 0 , 0 , 0 , 0 ), // #330 {} + ROW(2, 1, 1, 0, 28 , 45 , 0 , 0 , 0 , 0 ), // #331 {r32|m32|mem, xmm} ROW(2, 1, 1, 0, 45 , 28 , 0 , 0 , 0 , 0 ), // {xmm, r32|m32|mem} - ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // #325 {m64|mem, xmm} + ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // #333 {m64|mem, xmm} ROW(3, 1, 1, 0, 45 , 45 , 30 , 0 , 0 , 0 ), // {xmm, xmm, m64|mem} - ROW(2, 1, 0, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // #327 {r32|m32|mem, r32} + ROW(2, 1, 0, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // #335 {r32|m32|mem, r32} ROW(2, 0, 1, 0, 15 , 8 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, r64} - ROW(2, 1, 0, 0, 6 , 28 , 0 , 0 , 0 , 0 ), // #329 {r32, r32|m32|mem} + ROW(2, 1, 0, 0, 6 , 28 , 0 , 0 , 0 , 0 ), // #337 {r32, r32|m32|mem} ROW(2, 0, 1, 0, 8 , 15 , 0 , 0 , 0 , 0 ), // {r64, r64|m64|mem} - ROW(3, 1, 1, 0, 45 , 45 , 54 , 0 , 0 , 0 ), // #331 {xmm, xmm, xmm|m128|mem|i8|u8} + ROW(3, 1, 1, 0, 45 , 45 , 54 , 0 , 0 , 0 ), // #339 {xmm, xmm, xmm|m128|mem|i8|u8} ROW(3, 1, 1, 0, 45 , 47 , 121, 0 , 0 , 0 ), // {xmm, m128|mem, i8|u8|xmm} - ROW(2, 1, 1, 0, 74 , 45 , 0 , 0 , 0 , 0 ), // #333 {vm64x|vm64y, xmm} - ROW(2, 1, 1, 0, 66 , 48 , 0 , 0 , 0 , 0 ), // {vm64z, ymm} - ROW(3, 1, 1, 0, 45 , 45 , 46 , 0 , 0 , 0 ), // #335 {xmm, xmm, xmm|m128|mem} + ROW(2, 1, 1, 0, 75 , 45 , 0 , 0 , 0 , 0 ), // #341 {vm64x|vm64y, xmm} + ROW(2, 1, 1, 0, 67 , 48 , 0 , 0 , 0 , 0 ), // {vm64z, ymm} + ROW(3, 1, 1, 0, 45 , 45 , 46 , 0 , 0 , 0 ), // #343 {xmm, xmm, xmm|m128|mem} ROW(3, 1, 1, 0, 45 , 47 , 45 , 0 , 0 , 0 ), // {xmm, m128|mem, xmm} - ROW(2, 1, 1, 0, 61 , 86 , 0 , 0 , 0 , 0 ), // #337 {vm32x, xmm|ymm} - ROW(2, 1, 1, 0, 62 , 51 , 0 , 0 , 0 , 0 ), // {vm32y, zmm} - ROW(1, 1, 0, 1, 33 , 0 , 0 , 0 , 0 , 0 ), // #339 {} - ROW(2, 1, 0, 1, 33 , 10 , 0 , 0 , 0 , 0 ), // #340 {, i8|u8} - ROW(2, 1, 0, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // #341 {r16|m16|mem, r16} - ROW(3, 1, 1, 1, 45 , 46 , 122, 0 , 0 , 0 ), // #342 {xmm, xmm|m128|mem, } - ROW(2, 1, 1, 0, 89 , 123, 0 , 0 , 0 , 0 ), // #343 {bnd, mib} - ROW(2, 1, 1, 0, 89 , 91 , 0 , 0 , 0 , 0 ), // #344 {bnd, mem} - ROW(2, 1, 1, 0, 123, 89 , 0 , 0 , 0 , 0 ), // #345 {mib, bnd} - ROW(1, 1, 1, 0, 124, 0 , 0 , 0 , 0 , 0 ), // #346 {r16|r32|r64} - ROW(1, 1, 1, 1, 33 , 0 , 0 , 0 , 0 , 0 ), // #347 {} - ROW(2, 1, 1, 2, 35 , 36 , 0 , 0 , 0 , 0 ), // #348 {, } - ROW(1, 1, 1, 0, 91 , 0 , 0 , 0 , 0 , 0 ), // #349 {mem} - ROW(1, 1, 1, 1, 125, 0 , 0 , 0 , 0 , 0 ), // #350 {} - ROW(2, 1, 1, 2, 126, 127, 0 , 0 , 0 , 0 ), // #351 {, } - ROW(3, 1, 1, 0, 45 , 60 , 10 , 0 , 0 , 0 ), // #352 {xmm, xmm|m64|mem, i8|u8} - ROW(3, 1, 1, 0, 45 , 87 , 10 , 0 , 0 , 0 ), // #353 {xmm, xmm|m32|mem, i8|u8} - ROW(5, 0, 1, 4, 47 , 37 , 38 , 128, 129, 0 ), // #354 {m128|mem, , , , } - ROW(5, 1, 1, 4, 30 , 35 , 36 , 99 , 130, 0 ), // #355 {m64|mem, , , , } - ROW(4, 1, 1, 4, 36 , 130, 99 , 35 , 0 , 0 ), // #356 {, , , } - ROW(2, 0, 1, 2, 37 , 38 , 0 , 0 , 0 , 0 ), // #357 {, } - ROW(2, 1, 1, 0, 57 , 46 , 0 , 0 , 0 , 0 ), // #358 {mm, xmm|m128|mem} - ROW(2, 1, 1, 0, 45 , 117, 0 , 0 , 0 , 0 ), // #359 {xmm, mm|m64|mem} - ROW(2, 1, 1, 0, 57 , 60 , 0 , 0 , 0 , 0 ), // #360 {mm, xmm|m64|mem} - ROW(2, 1, 1, 0, 114, 60 , 0 , 0 , 0 , 0 ), // #361 {r32|r64, xmm|m64|mem} - ROW(2, 1, 1, 0, 45 , 131, 0 , 0 , 0 , 0 ), // #362 {xmm, r32|m32|mem|r64|m64} - ROW(2, 1, 1, 0, 114, 87 , 0 , 0 , 0 , 0 ), // #363 {r32|r64, xmm|m32|mem} - ROW(2, 1, 1, 2, 34 , 33 , 0 , 0 , 0 , 0 ), // #364 {, } - ROW(1, 1, 1, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #365 {} - ROW(2, 1, 1, 0, 12 , 10 , 0 , 0 , 0 , 0 ), // #366 {i16|u16, i8|u8} - ROW(3, 1, 1, 0, 28 , 45 , 10 , 0 , 0 , 0 ), // #367 {r32|m32|mem, xmm, i8|u8} - ROW(1, 1, 1, 0, 80 , 0 , 0 , 0 , 0 , 0 ), // #368 {m80|mem} - ROW(1, 1, 1, 0, 132, 0 , 0 , 0 , 0 , 0 ), // #369 {m16|m32} - ROW(1, 1, 1, 0, 133, 0 , 0 , 0 , 0 , 0 ), // #370 {m16|m32|m64} - ROW(1, 1, 1, 0, 134, 0 , 0 , 0 , 0 , 0 ), // #371 {m32|m64|m80|st} - ROW(1, 1, 1, 0, 21 , 0 , 0 , 0 , 0 , 0 ), // #372 {m16|mem} - ROW(1, 1, 1, 0, 135, 0 , 0 , 0 , 0 , 0 ), // #373 {ax|m16|mem} - ROW(1, 0, 1, 0, 91 , 0 , 0 , 0 , 0 , 0 ), // #374 {mem} - ROW(2, 1, 1, 0, 136, 137, 0 , 0 , 0 , 0 ), // #375 {al|ax|eax, i8|u8|dx} - ROW(2, 1, 1, 0, 138, 139, 0 , 0 , 0 , 0 ), // #376 {es:[memBase|zdi], dx} - ROW(1, 1, 1, 0, 10 , 0 , 0 , 0 , 0 , 0 ), // #377 {i8|u8} - ROW(0, 1, 0, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #378 {} - ROW(0, 0, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #379 {} - ROW(3, 1, 1, 0, 84 , 84 , 84 , 0 , 0 , 0 ), // #380 {k, k, k} - ROW(2, 1, 1, 0, 84 , 84 , 0 , 0 , 0 , 0 ), // #381 {k, k} - ROW(3, 1, 1, 0, 84 , 84 , 10 , 0 , 0 , 0 ), // #382 {k, k, i8|u8} - ROW(1, 1, 1, 1, 140, 0 , 0 , 0 , 0 , 0 ), // #383 {} - ROW(1, 1, 1, 0, 29 , 0 , 0 , 0 , 0 , 0 ), // #384 {m32|mem} - ROW(2, 1, 1, 0, 124, 141, 0 , 0 , 0 , 0 ), // #385 {r16|r32|r64, mem|m8|m16|m32|m48|m64|m80|m128|m256|m512|m1024} - ROW(1, 1, 1, 0, 27 , 0 , 0 , 0 , 0 , 0 ), // #386 {r16|m16|mem} - ROW(1, 1, 1, 0, 114, 0 , 0 , 0 , 0 , 0 ), // #387 {r32|r64} - ROW(2, 1, 1, 2, 142, 126, 0 , 0 , 0 , 0 ), // #388 {, } - ROW(3, 1, 1, 0, 114, 28 , 14 , 0 , 0 , 0 ), // #389 {r32|r64, r32|m32|mem, i32|u32} - ROW(3, 1, 1, 1, 45 , 45 , 143, 0 , 0 , 0 ), // #390 {xmm, xmm, } - ROW(3, 1, 1, 1, 57 , 57 , 143, 0 , 0 , 0 ), // #391 {mm, mm, } - ROW(3, 1, 1, 3, 125, 99 , 35 , 0 , 0 , 0 ), // #392 {, , } - ROW(2, 1, 1, 0, 97 , 53 , 0 , 0 , 0 , 0 ), // #393 {es:[memBase], m512|mem} - ROW(2, 1, 1, 0, 57 , 45 , 0 , 0 , 0 , 0 ), // #394 {mm, xmm} - ROW(2, 1, 1, 0, 6 , 45 , 0 , 0 , 0 , 0 ), // #395 {r32, xmm} - ROW(2, 1, 1, 0, 30 , 57 , 0 , 0 , 0 , 0 ), // #396 {m64|mem, mm} - ROW(2, 1, 1, 0, 45 , 57 , 0 , 0 , 0 , 0 ), // #397 {xmm, mm} - ROW(2, 1, 1, 2, 127, 126, 0 , 0 , 0 , 0 ), // #398 {, } - ROW(2, 0, 1, 0, 8 , 28 , 0 , 0 , 0 , 0 ), // #399 {r64, r32|m32|mem} - ROW(2, 1, 1, 2, 36 , 99 , 0 , 0 , 0 , 0 ), // #400 {, } - ROW(3, 1, 1, 3, 36 , 99 , 130, 0 , 0 , 0 ), // #401 {, , } - ROW(2, 1, 1, 0, 144, 136, 0 , 0 , 0 , 0 ), // #402 {u8|dx, al|ax|eax} - ROW(2, 1, 1, 0, 139, 145, 0 , 0 , 0 , 0 ), // #403 {dx, ds:[memBase|zsi]} - ROW(6, 1, 1, 3, 45 , 46 , 10 , 99 , 36 , 35 ), // #404 {xmm, xmm|m128|mem, i8|u8, , , } - ROW(6, 1, 1, 3, 45 , 46 , 10 , 122, 36 , 35 ), // #405 {xmm, xmm|m128|mem, i8|u8, , , } - ROW(4, 1, 1, 1, 45 , 46 , 10 , 99 , 0 , 0 ), // #406 {xmm, xmm|m128|mem, i8|u8, } - ROW(4, 1, 1, 1, 45 , 46 , 10 , 122, 0 , 0 ), // #407 {xmm, xmm|m128|mem, i8|u8, } - ROW(3, 1, 1, 0, 109, 45 , 10 , 0 , 0 , 0 ), // #408 {r32|m8|mem|r8lo|r8hi|r16, xmm, i8|u8} - ROW(3, 0, 1, 0, 15 , 45 , 10 , 0 , 0 , 0 ), // #409 {r64|m64|mem, xmm, i8|u8} - ROW(3, 1, 1, 0, 45 , 109, 10 , 0 , 0 , 0 ), // #410 {xmm, r32|m8|mem|r8lo|r8hi|r16, i8|u8} - ROW(3, 1, 1, 0, 45 , 28 , 10 , 0 , 0 , 0 ), // #411 {xmm, r32|m32|mem, i8|u8} - ROW(3, 0, 1, 0, 45 , 15 , 10 , 0 , 0 , 0 ), // #412 {xmm, r64|m64|mem, i8|u8} - ROW(3, 1, 1, 0, 59 , 113, 10 , 0 , 0 , 0 ), // #413 {mm|xmm, r32|m16|mem|r16, i8|u8} - ROW(2, 1, 1, 0, 6 , 59 , 0 , 0 , 0 , 0 ), // #414 {r32, mm|xmm} - ROW(2, 1, 1, 0, 45 , 10 , 0 , 0 , 0 , 0 ), // #415 {xmm, i8|u8} - ROW(2, 1, 1, 0, 31 , 81 , 0 , 0 , 0 , 0 ), // #416 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem, cl|i8|u8} - ROW(1, 0, 1, 0, 114, 0 , 0 , 0 , 0 , 0 ), // #417 {r32|r64} - ROW(3, 1, 1, 3, 35 , 36 , 99 , 0 , 0 , 0 ), // #418 {, , } - ROW(2, 1, 1, 2, 142, 127, 0 , 0 , 0 , 0 ), // #419 {, } - ROW(1, 1, 1, 0, 1 , 0 , 0 , 0 , 0 , 0 ), // #420 {r8lo|r8hi|m8|mem} - ROW(1, 1, 1, 0, 146, 0 , 0 , 0 , 0 , 0 ), // #421 {r16|m16|mem|r32|r64} - ROW(2, 1, 1, 2, 127, 142, 0 , 0 , 0 , 0 ), // #422 {, } - ROW(6, 1, 1, 0, 51 , 51 , 51 , 51 , 51 , 47 ), // #423 {zmm, zmm, zmm, zmm, zmm, m128|mem} - ROW(6, 1, 1, 0, 45 , 45 , 45 , 45 , 45 , 47 ), // #424 {xmm, xmm, xmm, xmm, xmm, m128|mem} - ROW(3, 1, 1, 0, 45 , 45 , 60 , 0 , 0 , 0 ), // #425 {xmm, xmm, xmm|m64|mem} - ROW(3, 1, 1, 0, 45 , 45 , 87 , 0 , 0 , 0 ), // #426 {xmm, xmm, xmm|m32|mem} - ROW(2, 1, 1, 0, 48 , 47 , 0 , 0 , 0 , 0 ), // #427 {ymm, m128|mem} - ROW(2, 1, 1, 0, 147, 60 , 0 , 0 , 0 , 0 ), // #428 {ymm|zmm, xmm|m64|mem} - ROW(2, 1, 1, 0, 147, 47 , 0 , 0 , 0 , 0 ), // #429 {ymm|zmm, m128|mem} - ROW(2, 1, 1, 0, 51 , 50 , 0 , 0 , 0 , 0 ), // #430 {zmm, m256|mem} - ROW(2, 1, 1, 0, 148, 60 , 0 , 0 , 0 , 0 ), // #431 {xmm|ymm|zmm, xmm|m64|mem} - ROW(2, 1, 1, 0, 148, 87 , 0 , 0 , 0 , 0 ), // #432 {xmm|ymm|zmm, m32|mem|xmm} - ROW(4, 1, 1, 0, 82 , 45 , 60 , 10 , 0 , 0 ), // #433 {xmm|k, xmm, xmm|m64|mem, i8|u8} - ROW(4, 1, 1, 0, 82 , 45 , 87 , 10 , 0 , 0 ), // #434 {xmm|k, xmm, xmm|m32|mem, i8|u8} - ROW(3, 1, 1, 0, 45 , 45 , 131, 0 , 0 , 0 ), // #435 {xmm, xmm, r32|m32|mem|r64|m64} - ROW(3, 1, 1, 0, 46 , 147, 10 , 0 , 0 , 0 ), // #436 {xmm|m128|mem, ymm|zmm, i8|u8} - ROW(4, 1, 1, 0, 45 , 45 , 60 , 10 , 0 , 0 ), // #437 {xmm, xmm, xmm|m64|mem, i8|u8} - ROW(4, 1, 1, 0, 45 , 45 , 87 , 10 , 0 , 0 ), // #438 {xmm, xmm, xmm|m32|mem, i8|u8} - ROW(3, 1, 1, 0, 84 , 149, 10 , 0 , 0 , 0 ), // #439 {k, xmm|m128|ymm|m256|zmm|m512, i8|u8} - ROW(3, 1, 1, 0, 84 , 60 , 10 , 0 , 0 , 0 ), // #440 {k, xmm|m64|mem, i8|u8} - ROW(3, 1, 1, 0, 84 , 87 , 10 , 0 , 0 , 0 ), // #441 {k, xmm|m32|mem, i8|u8} - ROW(1, 1, 1, 0, 62 , 0 , 0 , 0 , 0 , 0 ), // #442 {vm32y} - ROW(1, 1, 1, 0, 63 , 0 , 0 , 0 , 0 , 0 ), // #443 {vm32z} - ROW(1, 1, 1, 0, 66 , 0 , 0 , 0 , 0 , 0 ), // #444 {vm64z} - ROW(4, 1, 1, 0, 51 , 51 , 49 , 10 , 0 , 0 ), // #445 {zmm, zmm, ymm|m256|mem, i8|u8} - ROW(1, 1, 1, 0, 30 , 0 , 0 , 0 , 0 , 0 ), // #446 {m64|mem} - ROW(2, 1, 1, 0, 6 , 86 , 0 , 0 , 0 , 0 ), // #447 {r32, xmm|ymm} - ROW(2, 1, 1, 0, 148, 150, 0 , 0 , 0 , 0 ), // #448 {xmm|ymm|zmm, xmm|m8|mem|r32|r8lo|r8hi|r16} - ROW(2, 1, 1, 0, 148, 151, 0 , 0 , 0 , 0 ), // #449 {xmm|ymm|zmm, xmm|m32|mem|r32} - ROW(2, 1, 1, 0, 148, 84 , 0 , 0 , 0 , 0 ), // #450 {xmm|ymm|zmm, k} - ROW(2, 1, 1, 0, 148, 152, 0 , 0 , 0 , 0 ), // #451 {xmm|ymm|zmm, xmm|m16|mem|r32|r16} - ROW(3, 1, 1, 0, 113, 45 , 10 , 0 , 0 , 0 ), // #452 {r32|m16|mem|r16, xmm, i8|u8} - ROW(4, 1, 1, 0, 45 , 45 , 109, 10 , 0 , 0 ), // #453 {xmm, xmm, r32|m8|mem|r8lo|r8hi|r16, i8|u8} - ROW(4, 1, 1, 0, 45 , 45 , 28 , 10 , 0 , 0 ), // #454 {xmm, xmm, r32|m32|mem, i8|u8} - ROW(4, 0, 1, 0, 45 , 45 , 15 , 10 , 0 , 0 ), // #455 {xmm, xmm, r64|m64|mem, i8|u8} - ROW(4, 1, 1, 0, 45 , 45 , 113, 10 , 0 , 0 ), // #456 {xmm, xmm, r32|m16|mem|r16, i8|u8} - ROW(2, 1, 1, 0, 84 , 148, 0 , 0 , 0 , 0 ), // #457 {k, xmm|ymm|zmm} - ROW(1, 1, 1, 0, 102, 0 , 0 , 0 , 0 , 0 ), // #458 {rel16|rel32} - ROW(3, 1, 1, 2, 91 , 35 , 36 , 0 , 0 , 0 ), // #459 {mem, , } - ROW(3, 0, 1, 2, 91 , 35 , 36 , 0 , 0 , 0 ) // #460 {mem, , } + ROW(2, 1, 1, 0, 62 , 87 , 0 , 0 , 0 , 0 ), // #345 {vm32x, xmm|ymm} + ROW(2, 1, 1, 0, 63 , 51 , 0 , 0 , 0 , 0 ), // {vm32y, zmm} + ROW(1, 1, 0, 1, 33 , 0 , 0 , 0 , 0 , 0 ), // #347 {} + ROW(2, 1, 0, 1, 33 , 10 , 0 , 0 , 0 , 0 ), // #348 {, i8|u8} + ROW(2, 1, 0, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // #349 {r16|m16|mem, r16} + ROW(3, 1, 1, 1, 45 , 46 , 122, 0 , 0 , 0 ), // #350 {xmm, xmm|m128|mem, } + ROW(2, 1, 1, 0, 90 , 123, 0 , 0 , 0 , 0 ), // #351 {bnd, mib} + ROW(2, 1, 1, 0, 90 , 92 , 0 , 0 , 0 , 0 ), // #352 {bnd, mem} + ROW(2, 1, 1, 0, 123, 90 , 0 , 0 , 0 , 0 ), // #353 {mib, bnd} + ROW(1, 1, 1, 0, 124, 0 , 0 , 0 , 0 , 0 ), // #354 {r16|r32|r64} + ROW(1, 1, 1, 1, 33 , 0 , 0 , 0 , 0 , 0 ), // #355 {} + ROW(2, 1, 1, 2, 35 , 36 , 0 , 0 , 0 , 0 ), // #356 {, } + ROW(1, 1, 1, 0, 92 , 0 , 0 , 0 , 0 , 0 ), // #357 {mem} + ROW(1, 1, 1, 0, 30 , 0 , 0 , 0 , 0 , 0 ), // #358 {m64|mem} + ROW(1, 1, 1, 1, 125, 0 , 0 , 0 , 0 , 0 ), // #359 {} + ROW(2, 1, 1, 2, 126, 127, 0 , 0 , 0 , 0 ), // #360 {, } + ROW(3, 1, 1, 0, 45 , 60 , 10 , 0 , 0 , 0 ), // #361 {xmm, xmm|m64|mem, i8|u8} + ROW(3, 1, 1, 0, 45 , 88 , 10 , 0 , 0 , 0 ), // #362 {xmm, xmm|m32|mem, i8|u8} + ROW(5, 0, 1, 4, 47 , 37 , 38 , 128, 129, 0 ), // #363 {m128|mem, , , , } + ROW(5, 1, 1, 4, 30 , 35 , 36 , 100, 130, 0 ), // #364 {m64|mem, , , , } + ROW(4, 1, 1, 4, 36 , 130, 100, 35 , 0 , 0 ), // #365 {, , , } + ROW(2, 0, 1, 2, 37 , 38 , 0 , 0 , 0 , 0 ), // #366 {, } + ROW(2, 1, 1, 0, 57 , 46 , 0 , 0 , 0 , 0 ), // #367 {mm, xmm|m128|mem} + ROW(2, 1, 1, 0, 45 , 117, 0 , 0 , 0 , 0 ), // #368 {xmm, mm|m64|mem} + ROW(2, 1, 1, 0, 57 , 60 , 0 , 0 , 0 , 0 ), // #369 {mm, xmm|m64|mem} + ROW(2, 1, 1, 0, 115, 60 , 0 , 0 , 0 , 0 ), // #370 {r32|r64, xmm|m64|mem} + ROW(2, 1, 1, 0, 45 , 131, 0 , 0 , 0 , 0 ), // #371 {xmm, r32|m32|mem|r64|m64} + ROW(2, 1, 1, 0, 115, 88 , 0 , 0 , 0 , 0 ), // #372 {r32|r64, xmm|m32|mem} + ROW(2, 1, 1, 2, 34 , 33 , 0 , 0 , 0 , 0 ), // #373 {, } + ROW(1, 1, 1, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #374 {} + ROW(2, 1, 1, 0, 12 , 10 , 0 , 0 , 0 , 0 ), // #375 {i16|u16, i8|u8} + ROW(3, 1, 1, 0, 28 , 45 , 10 , 0 , 0 , 0 ), // #376 {r32|m32|mem, xmm, i8|u8} + ROW(1, 1, 1, 0, 81 , 0 , 0 , 0 , 0 , 0 ), // #377 {m80|mem} + ROW(1, 1, 1, 0, 132, 0 , 0 , 0 , 0 , 0 ), // #378 {m16|m32} + ROW(1, 1, 1, 0, 133, 0 , 0 , 0 , 0 , 0 ), // #379 {m16|m32|m64} + ROW(1, 1, 1, 0, 134, 0 , 0 , 0 , 0 , 0 ), // #380 {m32|m64|m80|st} + ROW(1, 1, 1, 0, 21 , 0 , 0 , 0 , 0 , 0 ), // #381 {m16|mem} + ROW(1, 1, 1, 0, 135, 0 , 0 , 0 , 0 , 0 ), // #382 {ax|m16|mem} + ROW(1, 0, 1, 0, 92 , 0 , 0 , 0 , 0 , 0 ), // #383 {mem} + ROW(2, 1, 1, 0, 136, 137, 0 , 0 , 0 , 0 ), // #384 {al|ax|eax, i8|u8|dx} + ROW(1, 1, 1, 0, 6 , 0 , 0 , 0 , 0 , 0 ), // #385 {r32} + ROW(2, 1, 1, 0, 138, 139, 0 , 0 , 0 , 0 ), // #386 {es:[memBase|zdi], dx} + ROW(1, 1, 1, 0, 10 , 0 , 0 , 0 , 0 , 0 ), // #387 {i8|u8} + ROW(0, 1, 0, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #388 {} + ROW(0, 0, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #389 {} + ROW(3, 1, 1, 0, 85 , 85 , 85 , 0 , 0 , 0 ), // #390 {k, k, k} + ROW(2, 1, 1, 0, 85 , 85 , 0 , 0 , 0 , 0 ), // #391 {k, k} + ROW(3, 1, 1, 0, 85 , 85 , 10 , 0 , 0 , 0 ), // #392 {k, k, i8|u8} + ROW(1, 1, 1, 1, 140, 0 , 0 , 0 , 0 , 0 ), // #393 {} + ROW(1, 1, 1, 0, 29 , 0 , 0 , 0 , 0 , 0 ), // #394 {m32|mem} + ROW(1, 0, 1, 0, 53 , 0 , 0 , 0 , 0 , 0 ), // #395 {m512|mem} + ROW(2, 1, 1, 0, 124, 141, 0 , 0 , 0 , 0 ), // #396 {r16|r32|r64, mem|m8|m16|m32|m48|m64|m80|m128|m256|m512|m1024} + ROW(1, 1, 1, 0, 27 , 0 , 0 , 0 , 0 , 0 ), // #397 {r16|m16|mem} + ROW(1, 1, 1, 0, 115, 0 , 0 , 0 , 0 , 0 ), // #398 {r32|r64} + ROW(2, 1, 1, 2, 142, 126, 0 , 0 , 0 , 0 ), // #399 {, } + ROW(3, 1, 1, 0, 115, 28 , 14 , 0 , 0 , 0 ), // #400 {r32|r64, r32|m32|mem, i32|u32} + ROW(3, 1, 1, 1, 45 , 45 , 143, 0 , 0 , 0 ), // #401 {xmm, xmm, } + ROW(3, 1, 1, 1, 57 , 57 , 143, 0 , 0 , 0 ), // #402 {mm, mm, } + ROW(3, 1, 1, 3, 125, 100, 35 , 0 , 0 , 0 ), // #403 {, , } + ROW(2, 1, 1, 0, 98 , 53 , 0 , 0 , 0 , 0 ), // #404 {es:[memBase], m512|mem} + ROW(2, 1, 1, 0, 57 , 45 , 0 , 0 , 0 , 0 ), // #405 {mm, xmm} + ROW(2, 1, 1, 0, 6 , 45 , 0 , 0 , 0 , 0 ), // #406 {r32, xmm} + ROW(2, 1, 1, 0, 30 , 57 , 0 , 0 , 0 , 0 ), // #407 {m64|mem, mm} + ROW(2, 1, 1, 0, 45 , 57 , 0 , 0 , 0 , 0 ), // #408 {xmm, mm} + ROW(2, 1, 1, 2, 127, 126, 0 , 0 , 0 , 0 ), // #409 {, } + ROW(2, 1, 1, 2, 36 , 100, 0 , 0 , 0 , 0 ), // #410 {, } + ROW(3, 1, 1, 3, 36 , 100, 130, 0 , 0 , 0 ), // #411 {, , } + ROW(2, 1, 1, 0, 144, 136, 0 , 0 , 0 , 0 ), // #412 {u8|dx, al|ax|eax} + ROW(2, 1, 1, 0, 139, 145, 0 , 0 , 0 , 0 ), // #413 {dx, ds:[memBase|zsi]} + ROW(6, 1, 1, 3, 45 , 46 , 10 , 100, 36 , 35 ), // #414 {xmm, xmm|m128|mem, i8|u8, , , } + ROW(6, 1, 1, 3, 45 , 46 , 10 , 122, 36 , 35 ), // #415 {xmm, xmm|m128|mem, i8|u8, , , } + ROW(4, 1, 1, 1, 45 , 46 , 10 , 100, 0 , 0 ), // #416 {xmm, xmm|m128|mem, i8|u8, } + ROW(4, 1, 1, 1, 45 , 46 , 10 , 122, 0 , 0 ), // #417 {xmm, xmm|m128|mem, i8|u8, } + ROW(3, 1, 1, 0, 110, 45 , 10 , 0 , 0 , 0 ), // #418 {r32|m8|mem|r8lo|r8hi|r16, xmm, i8|u8} + ROW(3, 0, 1, 0, 15 , 45 , 10 , 0 , 0 , 0 ), // #419 {r64|m64|mem, xmm, i8|u8} + ROW(3, 1, 1, 0, 45 , 110, 10 , 0 , 0 , 0 ), // #420 {xmm, r32|m8|mem|r8lo|r8hi|r16, i8|u8} + ROW(3, 1, 1, 0, 45 , 28 , 10 , 0 , 0 , 0 ), // #421 {xmm, r32|m32|mem, i8|u8} + ROW(3, 0, 1, 0, 45 , 15 , 10 , 0 , 0 , 0 ), // #422 {xmm, r64|m64|mem, i8|u8} + ROW(3, 1, 1, 0, 59 , 114, 10 , 0 , 0 , 0 ), // #423 {mm|xmm, r32|m16|mem|r16, i8|u8} + ROW(2, 1, 1, 0, 6 , 59 , 0 , 0 , 0 , 0 ), // #424 {r32, mm|xmm} + ROW(2, 1, 1, 0, 45 , 10 , 0 , 0 , 0 , 0 ), // #425 {xmm, i8|u8} + ROW(1, 1, 1, 0, 131, 0 , 0 , 0 , 0 , 0 ), // #426 {r32|m32|mem|r64|m64} + ROW(2, 1, 1, 0, 31 , 82 , 0 , 0 , 0 , 0 ), // #427 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem, cl|i8|u8} + ROW(1, 0, 1, 0, 115, 0 , 0 , 0 , 0 , 0 ), // #428 {r32|r64} + ROW(3, 1, 1, 3, 35 , 36 , 100, 0 , 0 , 0 ), // #429 {, , } + ROW(2, 1, 1, 2, 142, 127, 0 , 0 , 0 , 0 ), // #430 {, } + ROW(1, 1, 1, 0, 1 , 0 , 0 , 0 , 0 , 0 ), // #431 {r8lo|r8hi|m8|mem} + ROW(1, 1, 1, 0, 146, 0 , 0 , 0 , 0 , 0 ), // #432 {r16|m16|mem|r32|r64} + ROW(2, 1, 1, 2, 127, 142, 0 , 0 , 0 , 0 ), // #433 {, } + ROW(3, 0, 1, 0, 147, 147, 147, 0 , 0 , 0 ), // #434 {tmm, tmm, tmm} + ROW(2, 0, 1, 0, 147, 92 , 0 , 0 , 0 , 0 ), // #435 {tmm, tmem} + ROW(2, 0, 1, 0, 92 , 147, 0 , 0 , 0 , 0 ), // #436 {tmem, tmm} + ROW(1, 0, 1, 0, 147, 0 , 0 , 0 , 0 , 0 ), // #437 {tmm} + ROW(3, 1, 1, 2, 6 , 35 , 36 , 0 , 0 , 0 ), // #438 {r32, , } + ROW(1, 1, 1, 0, 28 , 0 , 0 , 0 , 0 , 0 ), // #439 {r32|m32|mem} + ROW(1, 1, 1, 0, 148, 0 , 0 , 0 , 0 , 0 ), // #440 {ds:[memBase]} + ROW(6, 1, 1, 0, 51 , 51 , 51 , 51 , 51 , 47 ), // #441 {zmm, zmm, zmm, zmm, zmm, m128|mem} + ROW(6, 1, 1, 0, 45 , 45 , 45 , 45 , 45 , 47 ), // #442 {xmm, xmm, xmm, xmm, xmm, m128|mem} + ROW(3, 1, 1, 0, 45 , 45 , 60 , 0 , 0 , 0 ), // #443 {xmm, xmm, xmm|m64|mem} + ROW(3, 1, 1, 0, 45 , 45 , 88 , 0 , 0 , 0 ), // #444 {xmm, xmm, xmm|m32|mem} + ROW(2, 1, 1, 0, 48 , 47 , 0 , 0 , 0 , 0 ), // #445 {ymm, m128|mem} + ROW(2, 1, 1, 0, 149, 60 , 0 , 0 , 0 , 0 ), // #446 {ymm|zmm, xmm|m64|mem} + ROW(2, 1, 1, 0, 149, 47 , 0 , 0 , 0 , 0 ), // #447 {ymm|zmm, m128|mem} + ROW(2, 1, 1, 0, 51 , 50 , 0 , 0 , 0 , 0 ), // #448 {zmm, m256|mem} + ROW(2, 1, 1, 0, 150, 60 , 0 , 0 , 0 , 0 ), // #449 {xmm|ymm|zmm, xmm|m64|mem} + ROW(2, 1, 1, 0, 150, 88 , 0 , 0 , 0 , 0 ), // #450 {xmm|ymm|zmm, m32|mem|xmm} + ROW(4, 1, 1, 0, 83 , 45 , 60 , 10 , 0 , 0 ), // #451 {xmm|k, xmm, xmm|m64|mem, i8|u8} + ROW(4, 1, 1, 0, 83 , 45 , 88 , 10 , 0 , 0 ), // #452 {xmm|k, xmm, xmm|m32|mem, i8|u8} + ROW(3, 1, 1, 0, 45 , 45 , 131, 0 , 0 , 0 ), // #453 {xmm, xmm, r32|m32|mem|r64|m64} + ROW(3, 1, 1, 0, 46 , 149, 10 , 0 , 0 , 0 ), // #454 {xmm|m128|mem, ymm|zmm, i8|u8} + ROW(4, 1, 1, 0, 45 , 45 , 60 , 10 , 0 , 0 ), // #455 {xmm, xmm, xmm|m64|mem, i8|u8} + ROW(4, 1, 1, 0, 45 , 45 , 88 , 10 , 0 , 0 ), // #456 {xmm, xmm, xmm|m32|mem, i8|u8} + ROW(3, 1, 1, 0, 85 , 151, 10 , 0 , 0 , 0 ), // #457 {k, xmm|m128|ymm|m256|zmm|m512, i8|u8} + ROW(3, 1, 1, 0, 85 , 60 , 10 , 0 , 0 , 0 ), // #458 {k, xmm|m64|mem, i8|u8} + ROW(3, 1, 1, 0, 85 , 88 , 10 , 0 , 0 , 0 ), // #459 {k, xmm|m32|mem, i8|u8} + ROW(1, 1, 1, 0, 63 , 0 , 0 , 0 , 0 , 0 ), // #460 {vm32y} + ROW(1, 1, 1, 0, 64 , 0 , 0 , 0 , 0 , 0 ), // #461 {vm32z} + ROW(1, 1, 1, 0, 67 , 0 , 0 , 0 , 0 , 0 ), // #462 {vm64z} + ROW(4, 1, 1, 0, 51 , 51 , 49 , 10 , 0 , 0 ), // #463 {zmm, zmm, ymm|m256|mem, i8|u8} + ROW(2, 1, 1, 0, 6 , 87 , 0 , 0 , 0 , 0 ), // #464 {r32, xmm|ymm} + ROW(2, 1, 1, 0, 150, 152, 0 , 0 , 0 , 0 ), // #465 {xmm|ymm|zmm, xmm|m8|mem|r32|r8lo|r8hi|r16} + ROW(2, 1, 1, 0, 150, 153, 0 , 0 , 0 , 0 ), // #466 {xmm|ymm|zmm, xmm|m32|mem|r32} + ROW(2, 1, 1, 0, 150, 85 , 0 , 0 , 0 , 0 ), // #467 {xmm|ymm|zmm, k} + ROW(2, 1, 1, 0, 150, 154, 0 , 0 , 0 , 0 ), // #468 {xmm|ymm|zmm, xmm|m16|mem|r32|r16} + ROW(3, 1, 1, 0, 114, 45 , 10 , 0 , 0 , 0 ), // #469 {r32|m16|mem|r16, xmm, i8|u8} + ROW(4, 1, 1, 0, 45 , 45 , 110, 10 , 0 , 0 ), // #470 {xmm, xmm, r32|m8|mem|r8lo|r8hi|r16, i8|u8} + ROW(4, 1, 1, 0, 45 , 45 , 28 , 10 , 0 , 0 ), // #471 {xmm, xmm, r32|m32|mem, i8|u8} + ROW(4, 0, 1, 0, 45 , 45 , 15 , 10 , 0 , 0 ), // #472 {xmm, xmm, r64|m64|mem, i8|u8} + ROW(4, 1, 1, 0, 45 , 45 , 114, 10 , 0 , 0 ), // #473 {xmm, xmm, r32|m16|mem|r16, i8|u8} + ROW(2, 1, 1, 0, 85 , 150, 0 , 0 , 0 , 0 ), // #474 {k, xmm|ymm|zmm} + ROW(1, 1, 1, 0, 103, 0 , 0 , 0 , 0 , 0 ), // #475 {rel16|rel32} + ROW(3, 1, 1, 2, 92 , 35 , 36 , 0 , 0 , 0 ), // #476 {mem, , } + ROW(3, 0, 1, 2, 92 , 35 , 36 , 0 , 0 , 0 ) // #477 {mem, , } }; #undef ROW @@ -3306,6 +3416,7 @@ const InstDB::OpSignature InstDB::_opSignatureTable[] = { ROW(F(Gpq) | F(Mm) | F(Mem), M(M64) | M(Any), 0, 0x00), ROW(F(Xmm) | F(Mm), 0, 0, 0x00), ROW(F(Xmm) | F(Mem), M(M64) | M(Any), 0, 0x00), + ROW(F(Gpw) | F(Gpd) | F(Gpq) | F(Mem), M(M16) | M(M32) | M(M64), 0, 0x00), ROW(F(Vm), M(Vm32x), 0, 0x00), ROW(F(Vm), M(Vm32y), 0, 0x00), ROW(F(Vm), M(Vm32z), 0, 0x00), @@ -3313,10 +3424,10 @@ const InstDB::OpSignature InstDB::_opSignatureTable[] = { ROW(F(Vm), M(Vm64y), 0, 0x00), ROW(F(Vm), M(Vm64z), 0, 0x00), ROW(F(GpbLo) | F(Implicit), 0, 0, 0x01), - ROW(F(Gpw) | F(Gpq) | F(Mem), M(M16) | M(M64) | M(Any), 0, 0x00), + ROW(F(Gpw) | F(Gpq) | F(Mem), M(M16) | M(M64), 0, 0x00), ROW(F(SReg), 0, 0, 0x1A), ROW(F(SReg), 0, 0, 0x60), - ROW(F(Gpw) | F(Gpq) | F(Mem) | F(I8) | F(I16) | F(I32), M(M16) | M(M64) | M(Any), 0, 0x00), + ROW(F(Gpw) | F(Gpq) | F(Mem) | F(I8) | F(I16) | F(I32), M(M16) | M(M64), 0, 0x00), ROW(F(Gpd) | F(Mem) | F(I32) | F(U32), M(M32), 0, 0x00), ROW(F(SReg), 0, 0, 0x1E), ROW(F(Vm), M(Vm64x) | M(Vm64y), 0, 0x00), @@ -3361,7 +3472,6 @@ const InstDB::OpSignature InstDB::_opSignatureTable[] = { ROW(F(Gpw) | F(Gpd) | F(Mem), M(M16) | M(Any), 0, 0x00), ROW(F(Gpd) | F(Gpq), 0, 0, 0x00), ROW(F(GpbLo) | F(GpbHi) | F(Gpw) | F(Mem), M(M8) | M(M16), 0, 0x00), - ROW(F(Gpw) | F(Gpd) | F(Mem), M(M16) | M(M32), 0, 0x00), ROW(F(Mm) | F(Mem), M(M64) | M(Any), 0, 0x00), ROW(F(Mm) | F(Mem) | F(I8) | F(U8), M(M64) | M(Any), 0, 0x00), ROW(F(U16), 0, 0, 0x00), @@ -3392,6 +3502,8 @@ const InstDB::OpSignature InstDB::_opSignatureTable[] = { ROW(F(Gpw) | F(U8), 0, 0, 0x04), ROW(F(Mem), M(BaseOnly) | M(Ds), 0, 0x40), ROW(F(Gpw) | F(Gpd) | F(Gpq) | F(Mem), M(M16) | M(Any), 0, 0x00), + ROW(F(Tmm), 0, 0, 0x00), + ROW(F(Mem), M(BaseOnly) | M(Ds), 0, 0x00), ROW(F(Ymm) | F(Zmm), 0, 0, 0x00), ROW(F(Xmm) | F(Ymm) | F(Zmm), 0, 0, 0x00), ROW(F(Xmm) | F(Ymm) | F(Zmm) | F(Mem), M(M128) | M(M256) | M(M512), 0, 0x00), @@ -3412,496 +3524,538 @@ const InstDB::OpSignature InstDB::_opSignatureTable[] = { // ${InstRWInfoTable:Begin} // ------------------- Automatically generated, do not edit ------------------- -const uint8_t InstDB::rwInfoIndex[Inst::_kIdCount * 2] = { - 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 2, 0, 3, 0, 2, 0, 4, 0, 4, 0, 5, 0, 6, 0, 4, 0, - 4, 0, 3, 0, 4, 0, 4, 0, 4, 0, 4, 0, 7, 0, 0, 7, 2, 0, 0, 8, 4, 0, 4, 0, 4, 0, - 4, 0, 9, 0, 0, 10, 11, 0, 11, 0, 11, 0, 11, 0, 11, 0, 0, 4, 0, 4, 0, 12, 0, 12, - 11, 0, 11, 0, 11, 0, 11, 0, 11, 0, 13, 0, 13, 0, 13, 0, 14, 0, 14, 0, 15, 0, - 16, 0, 17, 0, 11, 0, 11, 0, 0, 18, 19, 0, 20, 0, 20, 0, 20, 0, 0, 10, 0, 21, - 0, 1, 22, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 24, 0, 24, 0, 24, 0, 0, 0, 0, 0, 0, 0, - 24, 0, 25, 0, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, - 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, - 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 26, 0, 0, 4, 0, 4, 27, 0, 0, 5, 0, - 6, 0, 28, 0, 29, 0, 30, 31, 0, 32, 0, 0, 33, 34, 0, 35, 0, 36, 0, 7, 0, 37, 0, - 37, 0, 37, 0, 36, 0, 38, 0, 7, 0, 36, 0, 39, 0, 40, 0, 41, 0, 42, 0, 43, 0, 44, - 0, 45, 0, 37, 0, 37, 0, 7, 0, 39, 0, 40, 0, 45, 0, 46, 0, 0, 47, 0, 1, 0, 1, - 0, 48, 49, 50, 4, 0, 4, 0, 5, 0, 6, 0, 0, 4, 0, 4, 0, 0, 51, 0, 51, 0, 0, 0, - 0, 52, 53, 54, 0, 0, 0, 0, 55, 56, 0, 57, 0, 58, 0, 59, 0, 0, 0, 0, 0, 57, 0, - 57, 0, 57, 0, 57, 0, 57, 0, 57, 0, 57, 0, 57, 0, 60, 0, 61, 0, 61, 0, 60, 0, - 0, 0, 0, 0, 0, 55, 56, 0, 57, 55, 56, 0, 57, 0, 0, 0, 57, 0, 56, 0, 56, 0, 56, - 0, 56, 0, 56, 0, 56, 0, 56, 0, 0, 0, 0, 0, 62, 0, 62, 0, 62, 0, 56, 0, 56, 0, - 60, 0, 0, 0, 63, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 56, 0, 57, 0, - 0, 0, 0, 0, 0, 0, 64, 0, 65, 0, 64, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, - 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 65, 0, 64, 0, 67, 0, 66, 55, 56, 0, - 57, 55, 56, 0, 57, 0, 0, 0, 61, 0, 61, 0, 61, 0, 61, 0, 0, 0, 0, 0, 0, 0, 57, - 0, 24, 0, 24, 0, 64, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, 4, 0, 4, 0, - 4, 0, 0, 0, 4, 0, 4, 0, 49, 50, 68, 69, 70, 0, 0, 48, 71, 0, 0, 72, 53, 53, 0, - 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, 24, 74, 0, 73, 0, 73, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, +const uint8_t InstDB::rwInfoIndexA[Inst::_kIdCount] = { + 0, 0, 1, 1, 0, 2, 3, 2, 4, 4, 5, 6, 4, 4, 3, 4, 4, 4, 4, 7, 0, 2, 0, 4, 4, 4, + 4, 8, 0, 9, 9, 9, 9, 9, 0, 0, 0, 0, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, 12, 13, + 14, 9, 9, 0, 15, 16, 16, 16, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 18, 0, 0, 19, 0, 0, 0, 0, 0, 20, 21, 0, 22, 23, 24, 7, 25, + 25, 25, 24, 26, 7, 24, 27, 28, 29, 30, 31, 32, 33, 25, 25, 7, 27, 28, 33, 34, + 0, 0, 0, 0, 35, 4, 4, 5, 6, 0, 0, 0, 0, 0, 36, 36, 0, 0, 37, 0, 0, 38, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 38, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 38, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 0, 4, 4, 35, + 39, 40, 0, 0, 0, 41, 0, 37, 0, 0, 0, 0, 42, 0, 43, 42, 42, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 46, 47, 48, 49, 50, 51, + 52, 0, 0, 0, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 54, 55, 56, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 58, 0, 59, 0, 60, 0, 59, 0, 59, 0, 59, 0, 0, + 0, 0, 61, 62, 62, 62, 57, 59, 0, 0, 0, 9, 0, 0, 4, 4, 5, 6, 0, 0, 4, 4, 5, 6, + 0, 0, 63, 64, 64, 65, 46, 24, 36, 65, 51, 64, 64, 66, 67, 67, 68, 69, 69, 70, + 70, 58, 58, 65, 58, 58, 69, 69, 71, 47, 51, 72, 47, 7, 7, 46, 73, 9, 64, 64, + 73, 0, 35, 4, 4, 5, 6, 0, 74, 0, 0, 75, 0, 2, 4, 4, 76, 77, 9, 9, 9, 3, 3, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 0, 3, 78, 3, 0, 0, 0, 3, 3, 4, 3, 0, 0, 3, + 3, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 27, 78, 78, 78, 27, 27, 78, 78, 78, 3, 3, 3, 79, 3, 3, 3, 27, 27, 0, 0, + 0, 0, 3, 3, 4, 4, 3, 3, 4, 4, 4, 4, 3, 3, 4, 4, 80, 81, 82, 24, 24, 24, 81, 81, + 82, 24, 24, 24, 81, 4, 3, 78, 3, 3, 4, 3, 3, 0, 0, 0, 9, 0, 0, 0, 3, 0, 0, + 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 3, 3, 3, 3, 83, 3, 3, 0, 3, 3, 3, 83, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 27, 84, 0, 3, 3, 4, 3, 3, 3, 4, 3, 0, 0, 0, 0, 0, 0, 0, + 3, 85, 7, 86, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 85, 85, 0, + 0, 0, 0, 0, 0, 7, 86, 0, 0, 85, 85, 0, 0, 2, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, + 4, 0, 4, 4, 0, 85, 0, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 26, 86, 0, 0, + 0, 0, 0, 0, 89, 0, 0, 2, 4, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 15, + 90, 90, 0, 91, 0, 0, 9, 9, 20, 21, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 92, 28, 93, 94, 93, 94, 92, 28, 93, 94, 93, 94, 95, 96, 0, 0, 0, 0, 20, 21, + 97, 97, 98, 9, 0, 73, 99, 99, 9, 99, 9, 98, 9, 98, 0, 98, 9, 98, 9, 99, 28, + 0, 28, 0, 0, 0, 33, 33, 99, 9, 99, 9, 9, 98, 9, 98, 28, 28, 33, 33, 98, 9, 9, + 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 100, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 0, 77, 0, 78, 0, 79, 0, 76, 0, - 77, 0, 76, 0, 77, 0, 78, 0, 79, 0, 78, 0, 79, 80, 0, 81, 0, 82, 0, 83, 0, 84, - 0, 85, 0, 86, 0, 87, 0, 0, 76, 0, 77, 0, 78, 88, 0, 89, 0, 90, 0, 91, 0, 0, 79, - 0, 84, 0, 85, 0, 86, 0, 87, 0, 84, 0, 85, 0, 86, 0, 87, 88, 0, 89, 0, 90, 0, - 91, 0, 0, 92, 0, 93, 0, 94, 0, 76, 0, 77, 0, 78, 0, 79, 0, 76, 0, 77, 0, 78, - 0, 79, 0, 95, 96, 0, 97, 0, 0, 98, 99, 0, 100, 0, 0, 0, 99, 0, 0, 0, 99, 0, 0, - 24, 99, 0, 0, 24, 0, 101, 0, 102, 0, 101, 103, 0, 104, 0, 104, 0, 104, 0, 96, - 0, 99, 0, 0, 101, 0, 105, 0, 105, 11, 0, 0, 106, 0, 107, 4, 0, 4, 0, 5, 0, 6, - 0, 0, 0, 4, 0, 4, 0, 5, 0, 6, 0, 0, 108, 0, 108, 109, 0, 110, 0, 110, 0, 111, - 0, 81, 0, 36, 0, 112, 0, 111, 0, 86, 0, 110, 0, 110, 0, 113, 0, 114, 0, 114, - 0, 115, 0, 116, 0, 116, 0, 117, 0, 117, 0, 97, 0, 97, 0, 111, 0, 97, 0, 97, 0, - 116, 0, 116, 0, 118, 0, 82, 0, 86, 0, 119, 0, 82, 0, 7, 0, 7, 0, 81, 0, 120, - 0, 121, 0, 110, 0, 110, 0, 120, 0, 0, 4, 49, 122, 4, 0, 4, 0, 5, 0, 6, 0, 0, - 123, 124, 0, 0, 125, 0, 48, 0, 126, 0, 48, 2, 0, 4, 0, 4, 0, 127, 0, 128, 0, 11, - 0, 11, 0, 11, 0, 3, 0, 3, 0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, - 3, 0, 3, 0, 0, 3, 3, 0, 3, 0, 0, 0, 3, 0, 129, 0, 3, 0, 0, 12, 0, 4, 0, 4, 3, - 0, 3, 0, 4, 0, 3, 0, 0, 130, 0, 131, 3, 0, 3, 0, 4, 0, 3, 0, 0, 132, 0, 133, - 0, 0, 0, 8, 0, 8, 0, 134, 0, 52, 0, 135, 0, 136, 39, 0, 39, 0, 129, 0, 129, 0, - 129, 0, 129, 0, 129, 0, 129, 0, 129, 0, 129, 0, 129, 0, 129, 0, 39, 0, 129, - 0, 129, 0, 129, 0, 39, 0, 39, 0, 129, 0, 129, 0, 129, 0, 3, 0, 3, 0, 3, 0, 137, - 0, 3, 0, 3, 0, 3, 0, 39, 0, 39, 0, 0, 138, 0, 72, 0, 139, 0, 140, 3, 0, 3, 0, - 4, 0, 4, 0, 3, 0, 3, 0, 4, 0, 4, 0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 0, 4, 0, 141, - 0, 142, 0, 143, 0, 36, 0, 36, 0, 36, 0, 142, 0, 142, 0, 143, 0, 36, 0, 36, 0, - 36, 0, 142, 0, 4, 0, 3, 0, 129, 0, 3, 0, 3, 0, 4, 0, 3, 0, 3, 0, 0, 144, 0, 0, - 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, - 0, 24, 3, 0, 3, 0, 0, 7, 0, 7, 0, 7, 0, 39, 3, 0, 3, 0, 3, 0, 3, 0, 54, 0, - 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 54, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, - 3, 0, 3, 0, 3, 0, 39, 0, 145, 0, 3, 0, 3, 0, 4, 0, 3, 0, 3, 0, 3, 0, 4, 0, 3, - 0, 0, 146, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 147, 0, 7, 0, 148, 0, 147, 0, - 0, 149, 0, 149, 0, 150, 0, 149, 0, 150, 0, 149, 0, 149, 151, 0, 0, 152, 0, 0, - 147, 0, 147, 0, 0, 11, 0, 7, 0, 7, 0, 38, 0, 148, 0, 0, 7, 0, 148, 0, 0, 153, - 147, 0, 147, 0, 0, 10, 2, 0, 154, 0, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, - 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, - 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, - 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 0, 0, 64, 4, 0, 4, 0, 4, 0, 0, 4, - 4, 0, 4, 0, 0, 12, 147, 0, 0, 156, 0, 10, 147, 0, 0, 156, 0, 10, 0, 4, 0, 4, - 0, 64, 0, 47, 0, 157, 0, 149, 0, 157, 7, 0, 7, 0, 38, 0, 148, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 158, 159, 0, 0, 157, 2, 0, 4, 0, 4, 0, 5, 0, 6, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 19, 0, 11, 0, 11, 0, 31, 0, 32, 0, - 0, 0, 4, 0, 4, 0, 4, 0, 4, 0, 0, 160, 0, 161, 0, 160, 0, 161, 0, 8, 0, 8, 0, 162, - 0, 163, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 7, 0, 0, 7, 0, 8, 0, 8, 0, 8, - 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 164, 0, 164, - 165, 0, 40, 0, 166, 0, 167, 0, 166, 0, 167, 0, 165, 0, 40, 0, 166, 0, 167, - 0, 166, 0, 167, 0, 168, 0, 169, 0, 0, 8, 0, 8, 0, 170, 0, 171, 31, 0, 32, 0, - 172, 0, 172, 0, 173, 0, 11, 0, 0, 8, 120, 0, 174, 0, 174, 0, 11, 0, 174, 0, 11, - 0, 173, 0, 11, 0, 173, 0, 0, 175, 173, 0, 11, 0, 173, 0, 11, 0, 174, 0, 40, - 0, 0, 176, 40, 0, 0, 177, 0, 178, 0, 179, 45, 0, 45, 0, 174, 0, 11, 0, 174, 0, - 11, 0, 11, 0, 173, 0, 11, 0, 173, 0, 40, 0, 40, 0, 45, 0, 45, 0, 173, 0, 11, - 0, 11, 0, 174, 0, 0, 177, 0, 178, 0, 8, 0, 8, 0, 8, 0, 162, 0, 163, 0, 8, 0, 180, - 0, 8, 0, 101, 0, 101, 181, 0, 181, 0, 11, 0, 11, 0, 0, 182, 0, 183, 0, 184, - 0, 183, 0, 184, 0, 182, 0, 183, 0, 184, 0, 183, 0, 184, 0, 52, 0, 185, 0, 185, - 0, 186, 0, 187, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185, 0, 185, 0, 188, 0, - 189, 0, 185, 0, 185, 0, 188, 0, 189, 0, 190, 0, 190, 0, 191, 0, 192, 0, 185, 0, - 185, 0, 185, 0, 185, 0, 185, 0, 185, 0, 190, 0, 190, 0, 185, 0, 185, 0, 188, - 0, 189, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185, - 0, 185, 0, 185, 0, 185, 0, 185, 0, 185, 0, 190, 0, 190, 0, 190, 0, 190, 0, - 191, 0, 192, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185, 0, 185, 0, 188, 0, 189, 0, - 185, 0, 185, 0, 188, 0, 189, 0, 190, 0, 190, 0, 191, 0, 192, 0, 185, 0, 185, - 0, 188, 0, 189, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185, 0, 185, 0, 193, 0, 194, - 0, 190, 0, 190, 0, 191, 0, 192, 0, 195, 0, 195, 0, 39, 0, 121, 11, 0, 11, 0, - 39, 0, 196, 0, 99, 197, 99, 198, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0, - 24, 0, 24, 99, 198, 99, 199, 11, 0, 11, 0, 0, 200, 0, 201, 0, 11, 0, 11, 0, - 200, 0, 201, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 202, 0, 203, 0, 204, - 0, 203, 0, 204, 0, 202, 0, 203, 0, 204, 0, 203, 0, 204, 0, 163, 111, 0, 0, 98, - 0, 106, 0, 205, 0, 205, 0, 8, 0, 8, 0, 162, 0, 163, 0, 0, 0, 206, 0, 0, 0, 8, - 0, 8, 0, 162, 0, 163, 0, 0, 0, 207, 0, 0, 208, 0, 208, 0, 81, 0, 209, 0, 208, - 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 0, 210, 211, 212, - 211, 212, 0, 213, 116, 214, 116, 214, 215, 0, 216, 0, 111, 0, 111, 0, 111, 0, - 111, 0, 217, 0, 116, 218, 11, 0, 11, 0, 118, 219, 208, 0, 208, 0, 0, 8, 0, 220, - 0, 206, 172, 0, 0, 0, 0, 221, 0, 207, 0, 8, 0, 8, 0, 162, 0, 163, 222, 0, 0, - 220, 0, 8, 0, 8, 0, 223, 0, 223, 11, 0, 11, 0, 11, 0, 11, 0, 0, 8, 0, 8, 0, - 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, - 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 164, 0, 8, 224, 0, 45, 0, 225, 0, 225, - 0, 40, 0, 226, 0, 0, 8, 0, 190, 0, 227, 0, 227, 0, 8, 0, 8, 0, 8, 0, 8, 0, - 130, 0, 131, 0, 8, 0, 8, 0, 8, 0, 8, 0, 132, 0, 133, 0, 227, 0, 227, 0, 227, 0, - 227, 0, 227, 0, 227, 0, 180, 0, 180, 172, 0, 172, 0, 172, 0, 172, 0, 0, 180, - 0, 180, 0, 180, 0, 180, 0, 180, 0, 180, 11, 0, 11, 0, 0, 185, 0, 185, 0, 185, - 0, 185, 0, 228, 0, 228, 0, 8, 0, 8, 0, 8, 0, 185, 0, 8, 0, 8, 0, 185, 0, 185, - 0, 190, 0, 190, 0, 229, 0, 229, 0, 229, 0, 8, 0, 229, 0, 8, 0, 185, 0, 185, 0, - 185, 0, 185, 0, 185, 0, 8, 11, 0, 11, 0, 11, 0, 11, 0, 0, 134, 0, 52, 0, 135, - 0, 230, 99, 198, 99, 197, 99, 199, 99, 198, 7, 0, 7, 0, 7, 0, 0, 8, 7, 0, 0, - 8, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 0, 8, 7, 0, 7, 0, 137, 0, 7, 0, 0, 8, - 7, 0, 0, 8, 0, 8, 7, 0, 0, 231, 0, 163, 0, 162, 0, 232, 11, 0, 11, 0, 0, 233, - 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, - 0, 233, 0, 185, 0, 185, 0, 8, 0, 8, 0, 205, 0, 205, 0, 8, 0, 8, 0, 8, 0, 8, - 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 234, 0, - 234, 0, 235, 0, 175, 0, 225, 0, 225, 0, 225, 0, 225, 0, 141, 0, 234, 0, 236, - 0, 175, 0, 235, 0, 235, 0, 175, 0, 236, 0, 175, 0, 235, 0, 175, 0, 237, 0, 238, - 0, 173, 0, 173, 0, 173, 0, 237, 0, 235, 0, 175, 0, 236, 0, 175, 0, 235, 0, - 175, 0, 234, 0, 175, 0, 237, 0, 238, 0, 173, 0, 173, 0, 173, 0, 237, 0, 0, 8, - 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 11, 0, 11, 0, 11, 0, 11, 0, 0, - 8, 0, 8, 0, 8, 0, 239, 0, 11, 0, 11, 0, 8, 0, 8, 0, 11, 0, 11, 0, 8, 0, 8, 0, - 240, 0, 240, 0, 240, 0, 240, 0, 8, 111, 0, 111, 0, 241, 0, 111, 0, 0, 240, 0, - 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 8, 0, 8, 0, 185, 0, 185, 0, 185, 0, 8, - 0, 240, 0, 240, 0, 8, 0, 8, 0, 185, 0, 185, 0, 185, 0, 8, 0, 8, 0, 227, 0, 11, - 0, 11, 0, 11, 0, 8, 0, 8, 0, 8, 0, 242, 0, 243, 0, 242, 0, 8, 0, 8, 0, 8, 0, - 242, 0, 242, 0, 242, 0, 8, 0, 8, 0, 8, 0, 242, 0, 242, 0, 243, 0, 242, 0, 8, - 0, 8, 0, 8, 0, 242, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 185, 0, - 185, 222, 0, 0, 227, 0, 227, 0, 227, 0, 227, 0, 227, 0, 227, 0, 227, 0, 227, - 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, - 0, 200, 0, 201, 11, 0, 11, 0, 0, 200, 0, 201, 181, 0, 181, 0, 0, 200, 0, 201, - 11, 0, 0, 201, 0, 11, 0, 11, 0, 200, 0, 201, 0, 11, 0, 11, 0, 200, 0, 201, 0, - 11, 0, 11, 0, 200, 0, 201, 11, 0, 11, 0, 0, 200, 0, 201, 181, 0, 181, 0, 0, 200, - 0, 201, 11, 0, 0, 201, 0, 8, 0, 8, 0, 162, 0, 163, 111, 0, 111, 0, 0, 24, - 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 111, 0, 241, 0, 0, 8, 0, 8, 0, - 8, 0, 8, 0, 8, 0, 8, 11, 0, 11, 0, 0, 200, 0, 201, 0, 158, 0, 8, 0, 8, 0, 162, - 0, 163, 222, 0, 222, 0, 31, 0, 32, 0, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 102, 0, 244, 0, 0, 245, 0, 0, 0, 246, 0, 0, - 0, 0, 150, 0, 0, 2, 0, 4, 0, 4, 0, 0, 247, 0, 247, 0, 247, 0, 247, 0, 248, 0, - 248, 0, 248, 0, 248, 0, 248, 0, 248, 0, 248, 0, 248, 0, 244, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 27, 101, 59, 59, 0, 0, 0, 0, 0, + 0, 0, 0, 59, 59, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, + 102, 46, 103, 102, 102, 102, 102, 102, 102, 102, 102, 0, 104, 104, 0, 69, 69, + 105, 106, 65, 65, 65, 65, 107, 69, 9, 9, 71, 102, 102, 0, 0, 0, 97, 0, 0, 0, + 0, 0, 0, 0, 108, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 33, 110, 110, 28, 111, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 97, 97, + 97, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 0, 0, 0, 0, 59, 59, 59, 59, 7, + 7, 7, 0, 7, 0, 7, 7, 7, 7, 7, 7, 0, 7, 7, 79, 7, 0, 7, 0, 0, 7, 0, 0, 0, 0, 9, + 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 112, 113, 114, 110, 110, 110, 110, 80, 112, + 115, 114, 113, 113, 114, 115, 114, 113, 114, 116, 117, 98, 98, 98, 116, 113, 114, + 115, 114, 113, 114, 112, 114, 116, 117, 98, 98, 98, 116, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 65, + 118, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 100, 100, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 100, 100, 0, 0, 9, 0, 0, 0, 0, 0, 65, 65, 0, 0, + 0, 0, 0, 0, 0, 0, 65, 118, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 108, 108, + 20, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 120, 119, 120, 0, 121, + 0, 122, 0, 0, 0, 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -const InstDB::RWInfo InstDB::rwInfo[] = { - { InstDB::RWInfo::kCategoryGeneric , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #0 [ref=1609x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 1 , 0 , 0 , 0 , 0 , 0 } }, // #1 [ref=7x] +const uint8_t InstDB::rwInfoIndexB[Inst::_kIdCount] = { + 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 0, 0, 5, 5, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 0, 0, 0, 0, 4, 8, 1, 0, 9, 0, 0, 0, 10, 10, 10, 0, 0, 11, 0, 10, 12, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 5, 5, 0, 13, 14, 15, 16, 17, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 1, 1, 20, 21, 0, 0, 0, + 0, 5, 5, 0, 0, 0, 0, 0, 0, 22, 23, 0, 0, 24, 25, 26, 27, 0, 0, 25, 25, 25, 25, + 25, 25, 25, 25, 28, 29, 29, 28, 0, 0, 0, 24, 25, 24, 25, 0, 25, 24, 24, 24, 24, + 24, 24, 24, 0, 0, 30, 30, 30, 24, 24, 28, 0, 31, 10, 0, 0, 0, 0, 0, 0, 24, + 25, 0, 0, 0, 32, 33, 32, 34, 0, 0, 0, 0, 0, 10, 32, 0, 0, 0, 0, 35, 33, 32, 35, + 34, 24, 25, 24, 25, 0, 29, 29, 29, 29, 0, 0, 0, 25, 10, 10, 32, 32, 0, 0, 0, + 0, 5, 5, 0, 0, 0, 0, 0, 0, 21, 36, 0, 20, 37, 38, 0, 39, 40, 0, 0, 0, 0, 0, 10, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 41, 42, 41, 42, 43, + 44, 43, 44, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 0, 0, 0, 0, 44, 45, 46, 47, 48, + 45, 46, 47, 48, 0, 0, 0, 0, 49, 50, 51, 41, 42, 43, 44, 41, 42, 43, 44, 52, + 0, 0, 53, 0, 54, 0, 0, 0, 0, 0, 10, 0, 10, 55, 56, 55, 0, 0, 0, 0, 0, 0, 55, 57, + 57, 0, 58, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 60, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 5, 61, 0, 0, 0, 0, 62, 0, 63, 20, 64, 20, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 0, 0, 0, 6, 5, 5, 0, 0, + 0, 0, 66, 67, 0, 0, 0, 0, 68, 69, 0, 3, 3, 70, 22, 71, 72, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73, 39, + 74, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, + 10, 10, 0, 0, 2, 2, 2, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 79, 79, 80, 79, 80, 80, 80, 79, 79, 81, 82, 0, 83, 0, 0, 0, 0, 0, 84, + 2, 2, 85, 86, 0, 0, 0, 11, 87, 0, 0, 4, 0, 0, 0, 0, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 0, 88, 0, 32, 0, 0, 0, 5, 0, 0, 6, 0, 89, 4, 0, 89, 4, 5, 5, 32, + 19, 90, 79, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91, 0, 90, 92, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 93, 93, 93, 93, 93, 0, 0, 0, 0, 0, 94, 95, 0, 0, 0, 0, 96, + 96, 0, 56, 95, 0, 0, 0, 0, 97, 98, 97, 98, 3, 3, 99, 100, 3, 3, 3, 3, 3, 3, + 0, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 101, 101, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 3, 3, 102, 103, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 104, 0, 0, 0, 0, 0, 0, 105, 0, 106, 107, 108, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 107, 3, 3, 3, 99, 100, 3, 109, 3, 55, 55, 0, + 0, 0, 0, 110, 111, 112, 111, 112, 110, 111, 112, 111, 112, 22, 113, 113, 114, + 115, 113, 113, 116, 117, 113, 113, 116, 117, 113, 113, 116, 117, 118, 118, 119, + 120, 113, 113, 113, 113, 113, 113, 118, 118, 113, 113, 116, 117, 113, 113, + 116, 117, 113, 113, 116, 117, 113, 113, 113, 113, 113, 113, 118, 118, 118, 118, + 119, 120, 113, 113, 116, 117, 113, 113, 116, 117, 113, 113, 116, 117, 118, + 118, 119, 120, 113, 113, 116, 117, 113, 113, 116, 117, 113, 113, 121, 122, 118, + 118, 119, 120, 123, 123, 77, 124, 0, 0, 0, 0, 125, 126, 10, 10, 10, 10, 10, + 10, 10, 10, 126, 127, 0, 0, 128, 129, 84, 84, 128, 129, 3, 3, 3, 3, 3, 3, 3, 130, + 131, 132, 131, 132, 130, 131, 132, 131, 132, 100, 0, 53, 58, 133, 133, 3, + 3, 99, 100, 0, 134, 0, 3, 3, 99, 100, 0, 135, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 136, 137, 137, 138, 139, 139, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 141, 0, 0, + 3, 11, 134, 0, 0, 142, 135, 3, 3, 99, 100, 0, 11, 3, 3, 143, 143, 144, 144, + 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 101, 3, 0, 0, 0, 0, 0, 0, 3, 118, 145, 145, 3, 3, 3, 3, 66, 67, 3, 3, 3, 3, 68, + 69, 145, 145, 145, 145, 145, 145, 109, 109, 0, 0, 0, 0, 109, 109, 109, 109, + 109, 109, 0, 0, 113, 113, 113, 113, 146, 146, 3, 3, 3, 113, 3, 3, 113, 113, 118, + 118, 147, 147, 147, 3, 147, 3, 113, 113, 113, 113, 113, 3, 0, 0, 0, 0, 70, + 22, 71, 148, 126, 125, 127, 126, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, + 0, 0, 3, 0, 3, 3, 0, 149, 100, 99, 150, 0, 0, 151, 151, 151, 151, 151, 151, 151, + 151, 151, 151, 151, 151, 113, 113, 3, 3, 133, 133, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 0, 0, 0, 0, 3, 3, 3, 152, 84, 84, 3, 3, 84, 84, 3, 3, 153, 153, 153, + 153, 3, 0, 0, 0, 0, 153, 153, 153, 153, 153, 153, 3, 3, 113, 113, 113, 3, 153, + 153, 3, 3, 113, 113, 113, 3, 3, 145, 84, 84, 84, 3, 3, 3, 154, 155, 154, 3, + 3, 3, 154, 154, 154, 3, 3, 3, 154, 154, 155, 154, 3, 3, 3, 154, 3, 3, 3, 3, + 3, 3, 3, 3, 113, 113, 0, 145, 145, 145, 145, 145, 145, 145, 145, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 128, 129, 0, 0, 128, 129, 0, 0, 128, 129, 0, 129, 84, + 84, 128, 129, 84, 84, 128, 129, 84, 84, 128, 129, 0, 0, 128, 129, 0, 0, 128, + 129, 0, 129, 3, 3, 99, 100, 0, 0, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 3, 3, + 3, 3, 3, 3, 0, 0, 128, 129, 91, 3, 3, 99, 100, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, + 0, 0, 0, 0, 56, 56, 156, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 157, 157, + 157, 157, 158, 158, 158, 158, 158, 158, 158, 158, 156, 0, 0 +}; + +const InstDB::RWInfo InstDB::rwInfoA[] = { + { InstDB::RWInfo::kCategoryGeneric , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #0 [ref=931x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 1 , 0 , 0 , 0 , 0 , 0 } }, // #1 [ref=2x] { InstDB::RWInfo::kCategoryGeneric , 1 , { 2 , 3 , 0 , 0 , 0 , 0 } }, // #2 [ref=7x] - { InstDB::RWInfo::kCategoryGeneric , 2 , { 2 , 3 , 0 , 0 , 0 , 0 } }, // #3 [ref=100x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 4 , 5 , 0 , 0 , 0 , 0 } }, // #4 [ref=69x] - { InstDB::RWInfo::kCategoryGeneric , 4 , { 6 , 7 , 0 , 0 , 0 , 0 } }, // #5 [ref=7x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 8 , 9 , 0 , 0 , 0 , 0 } }, // #6 [ref=7x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 10, 5 , 0 , 0 , 0 , 0 } }, // #7 [ref=33x] - { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 3 , 3 , 0 , 0 , 0 } }, // #8 [ref=186x] - { InstDB::RWInfo::kCategoryGeneric , 7 , { 12, 13, 0 , 0 , 0 , 0 } }, // #9 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 2 , { 11, 3 , 3 , 0 , 0 , 0 } }, // #10 [ref=5x] - { InstDB::RWInfo::kCategoryGeneric , 2 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #11 [ref=80x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 4 , 5 , 14, 0 , 0 , 0 } }, // #12 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 2 , { 5 , 3 , 0 , 0 , 0 , 0 } }, // #13 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 8 , { 10, 3 , 0 , 0 , 0 , 0 } }, // #14 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 9 , { 10, 5 , 0 , 0 , 0 , 0 } }, // #15 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 5 , 0 , 0 , 0 , 0 } }, // #16 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 3 , 3 , 0 , 0 , 0 , 0 } }, // #17 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 2 , 0 , 0 , 0 , 0 , 0 } }, // #18 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 10, { 3 , 3 , 0 , 0 , 0 , 0 } }, // #19 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 10, { 2 , 3 , 0 , 0 , 0 , 0 } }, // #20 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 11, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #21 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 15, 16, 0 , 0 , 0 , 0 } }, // #22 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 17, 0 , 0 , 0 , 0 , 0 } }, // #23 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 8 , { 3 , 0 , 0 , 0 , 0 , 0 } }, // #24 [ref=34x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 18, 0 , 0 , 0 , 0 , 0 } }, // #25 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 1 , { 3 , 3 , 0 , 0 , 0 , 0 } }, // #26 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 19, 20, 0 , 0 , 0 , 0 } }, // #27 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 11, { 2 , 3 , 21, 0 , 0 , 0 } }, // #28 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 12, { 4 , 22, 17, 23, 24, 0 } }, // #29 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 13, { 25, 26, 27, 28, 29, 0 } }, // #30 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 4 , { 7 , 7 , 0 , 0 , 0 , 0 } }, // #31 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 9 , 9 , 0 , 0 , 0 , 0 } }, // #32 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 27, 30, 31, 15, 0 , 0 } }, // #33 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 32, 33, 0 , 0 , 0 , 0 } }, // #34 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 14, { 2 , 3 , 0 , 0 , 0 , 0 } }, // #35 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 4 , { 10, 7 , 0 , 0 , 0 , 0 } }, // #36 [ref=10x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 34, 5 , 0 , 0 , 0 , 0 } }, // #37 [ref=5x] - { InstDB::RWInfo::kCategoryGeneric , 4 , { 35, 7 , 0 , 0 , 0 , 0 } }, // #38 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 4 , { 34, 7 , 0 , 0 , 0 , 0 } }, // #39 [ref=13x] - { InstDB::RWInfo::kCategoryGeneric , 4 , { 11, 7 , 0 , 0 , 0 , 0 } }, // #40 [ref=9x] - { InstDB::RWInfo::kCategoryGeneric , 4 , { 36, 7 , 0 , 0 , 0 , 0 } }, // #41 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 14, { 35, 3 , 0 , 0 , 0 , 0 } }, // #42 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 14, { 36, 3 , 0 , 0 , 0 , 0 } }, // #43 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 35, 9 , 0 , 0 , 0 , 0 } }, // #44 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 11, 9 , 0 , 0 , 0 , 0 } }, // #45 [ref=7x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 37, 38, 0 , 0 , 0 , 0 } }, // #46 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 27, 0 , 0 , 0 , 0 , 0 } }, // #47 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 10, { 2 , 0 , 0 , 0 , 0 , 0 } }, // #48 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 15, { 1 , 39, 0 , 0 , 0 , 0 } }, // #49 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 6 , { 40, 41, 3 , 0 , 0 , 0 } }, // #50 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 16, { 42, 43, 0 , 0 , 0 , 0 } }, // #51 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 17, { 42, 5 , 0 , 0 , 0 , 0 } }, // #52 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 4 , 5 , 0 , 0 , 0 , 0 } }, // #53 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 4 , 0 , 0 , 0 , 0 , 0 } }, // #54 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 44, 45, 0 , 0 , 0 , 0 } }, // #55 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 18, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #56 [ref=15x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 44, 0 , 0 , 0 , 0 , 0 } }, // #57 [ref=16x] - { InstDB::RWInfo::kCategoryGeneric , 19, { 45, 0 , 0 , 0 , 0 , 0 } }, // #58 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 19, { 46, 0 , 0 , 0 , 0 , 0 } }, // #59 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 20, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #60 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 45, 0 , 0 , 0 , 0 , 0 } }, // #61 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 18, { 11, 0 , 0 , 0 , 0 , 0 } }, // #62 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 21, { 13, 0 , 0 , 0 , 0 , 0 } }, // #63 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 0 , 0 , 0 , 0 , 0 } }, // #64 [ref=8x] - { InstDB::RWInfo::kCategoryGeneric , 21, { 47, 0 , 0 , 0 , 0 , 0 } }, // #65 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 7 , { 48, 0 , 0 , 0 , 0 , 0 } }, // #66 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 20, { 11, 0 , 0 , 0 , 0 , 0 } }, // #67 [ref=2x] - { InstDB::RWInfo::kCategoryImul , 2 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #68 [ref=1x] - { InstDB::RWInfo::kCategoryImul , 22, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #69 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 49, 50, 0 , 0 , 0 , 0 } }, // #70 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 51, 50, 0 , 0 , 0 , 0 } }, // #71 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 4 , 9 , 0 , 0 , 0 , 0 } }, // #72 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 12, { 3 , 5 , 0 , 0 , 0 , 0 } }, // #73 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 21, 28, 0 , 0 , 0 , 0 } }, // #74 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 52, 0 , 0 , 0 , 0 , 0 } }, // #75 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 53, 39, 39, 0 , 0 , 0 } }, // #76 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 42, 9 , 9 , 0 , 0 , 0 } }, // #77 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 34, 7 , 7 , 0 , 0 , 0 } }, // #78 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 47, 13, 13, 0 , 0 , 0 } }, // #79 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 23, { 53, 39, 0 , 0 , 0 , 0 } }, // #80 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 24, { 42, 9 , 0 , 0 , 0 , 0 } }, // #81 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 25, { 34, 7 , 0 , 0 , 0 , 0 } }, // #82 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 26, { 47, 13, 0 , 0 , 0 , 0 } }, // #83 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 53, 39, 0 , 0 , 0 , 0 } }, // #84 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 42, 9 , 0 , 0 , 0 , 0 } }, // #85 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 34, 7 , 0 , 0 , 0 , 0 } }, // #86 [ref=5x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 47, 13, 0 , 0 , 0 , 0 } }, // #87 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 39, 39, 0 , 0 , 0 , 0 } }, // #88 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 9 , 9 , 0 , 0 , 0 , 0 } }, // #89 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 7 , 7 , 0 , 0 , 0 , 0 } }, // #90 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 13, 13, 0 , 0 , 0 , 0 } }, // #91 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 47, 39, 39, 0 , 0 , 0 } }, // #92 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 34, 9 , 9 , 0 , 0 , 0 } }, // #93 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 42, 13, 13, 0 , 0 , 0 } }, // #94 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 54, 0 , 0 , 0 , 0 , 0 } }, // #95 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 27, { 11, 3 , 0 , 0 , 0 , 0 } }, // #96 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 12, { 10, 5 , 0 , 0 , 0 , 0 } }, // #97 [ref=5x] - { InstDB::RWInfo::kCategoryGeneric , 28, { 9 , 0 , 0 , 0 , 0 , 0 } }, // #98 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 2 , 3 , 0 , 0 , 0 , 0 } }, // #99 [ref=13x] - { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #100 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 7 , { 13, 0 , 0 , 0 , 0 , 0 } }, // #101 [ref=5x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 3 , 0 , 0 , 0 , 0 , 0 } }, // #102 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 49, 19, 0 , 0 , 0 , 0 } }, // #103 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 55, 0 , 0 , 0 , 0 , 0 } }, // #104 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 3 , 9 , 0 , 0 , 0 , 0 } }, // #105 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 5 , 5 , 20, 0 , 0 , 0 } }, // #106 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 7 , 7 , 20, 0 , 0 , 0 } }, // #107 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 18, 28, 56, 0 , 0 , 0 } }, // #108 [ref=2x] - { InstDB::RWInfo::kCategoryMov , 29, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #109 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 30, { 10, 5 , 0 , 0 , 0 , 0 } }, // #110 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #111 [ref=14x] - { InstDB::RWInfo::kCategoryGeneric , 16, { 11, 43, 0 , 0 , 0 , 0 } }, // #112 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 35, 57, 0 , 0 , 0 , 0 } }, // #113 [ref=1x] - { InstDB::RWInfo::kCategoryMovh64 , 13, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #114 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 58, 7 , 0 , 0 , 0 , 0 } }, // #115 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 13, { 34, 7 , 0 , 0 , 0 , 0 } }, // #116 [ref=7x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 53, 5 , 0 , 0 , 0 , 0 } }, // #117 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 28, { 42, 9 , 0 , 0 , 0 , 0 } }, // #118 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 20, 19, 0 , 0 , 0 , 0 } }, // #119 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 14, { 11, 3 , 0 , 0 , 0 , 0 } }, // #120 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 34, 9 , 0 , 0 , 0 , 0 } }, // #121 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 6 , { 59, 41, 3 , 0 , 0 , 0 } }, // #122 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 11, 3 , 60, 0 , 0 } }, // #123 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 16, 28, 0 , 0 , 0 , 0 } }, // #124 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 16, 28, 29, 0 , 0 , 0 } }, // #125 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 10, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #126 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 50, 21, 0 , 0 , 0 , 0 } }, // #127 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 50, 61, 0 , 0 , 0 , 0 } }, // #128 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 4 , { 25, 7 , 0 , 0 , 0 , 0 } }, // #129 [ref=18x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 62, 16, 56 } }, // #130 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 63, 16, 56 } }, // #131 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 62, 0 , 0 } }, // #132 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 63, 0 , 0 } }, // #133 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 31, { 53, 5 , 0 , 0 , 0 , 0 } }, // #134 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 32, { 34, 5 , 0 , 0 , 0 , 0 } }, // #135 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 33, { 47, 3 , 0 , 0 , 0 , 0 } }, // #136 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 64, 5 , 0 , 0 , 0 , 0 } }, // #137 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 15, { 4 , 39, 0 , 0 , 0 , 0 } }, // #138 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 4 , { 4 , 7 , 0 , 0 , 0 , 0 } }, // #139 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 27, { 2 , 13, 0 , 0 , 0 , 0 } }, // #140 [ref=1x] - { InstDB::RWInfo::kCategoryVmov1_8 , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #141 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 10, 9 , 0 , 0 , 0 , 0 } }, // #142 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 27, { 10, 13, 0 , 0 , 0 , 0 } }, // #143 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 10, { 65, 0 , 0 , 0 , 0 , 0 } }, // #144 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 0 , 0 , 0 } }, // #145 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 10, { 60, 0 , 0 , 0 , 0 , 0 } }, // #146 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 10, { 2 , 66, 0 , 0 , 0 , 0 } }, // #147 [ref=8x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 36, 9 , 0 , 0 , 0 , 0 } }, // #148 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 11, 0 , 0 , 0 , 0 , 0 } }, // #149 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 15, 67, 28, 0 , 0 , 0 } }, // #150 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 15, 67, 0 , 0 , 0 , 0 } }, // #151 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 15, 67, 62, 0 , 0 , 0 } }, // #152 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 68, 0 , 0 , 0 , 0 , 0 } }, // #153 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 21, 20, 0 , 0 , 0 , 0 } }, // #154 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 31, { 69, 0 , 0 , 0 , 0 , 0 } }, // #155 [ref=30x] - { InstDB::RWInfo::kCategoryGeneric , 11, { 2 , 3 , 66, 0 , 0 , 0 } }, // #156 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 34, { 11, 0 , 0 , 0 , 0 , 0 } }, // #157 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 28, { 42, 0 , 0 , 0 , 0 , 0 } }, // #158 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 20, 21, 0 , 0 , 0 , 0 } }, // #159 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 12, { 70, 43, 43, 43, 43, 5 } }, // #160 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 12, { 4 , 5 , 5 , 5 , 5 , 5 } }, // #161 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 35, { 10, 5 , 7 , 0 , 0 , 0 } }, // #162 [ref=8x] - { InstDB::RWInfo::kCategoryGeneric , 36, { 10, 5 , 9 , 0 , 0 , 0 } }, // #163 [ref=9x] - { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 3 , 3 , 3 , 0 , 0 } }, // #164 [ref=3x] - { InstDB::RWInfo::kCategoryGeneric , 12, { 71, 5 , 0 , 0 , 0 , 0 } }, // #165 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 12, { 11, 5 , 0 , 0 , 0 , 0 } }, // #166 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 37, { 72, 73, 0 , 0 , 0 , 0 } }, // #167 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 38, { 11, 7 , 0 , 0 , 0 , 0 } }, // #168 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 39, { 11, 9 , 0 , 0 , 0 , 0 } }, // #169 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 35, { 11, 5 , 7 , 0 , 0 , 0 } }, // #170 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 36, { 11, 5 , 9 , 0 , 0 , 0 } }, // #171 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 11, { 11, 3 , 0 , 0 , 0 , 0 } }, // #172 [ref=7x] - { InstDB::RWInfo::kCategoryVmov2_1 , 40, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #173 [ref=14x] - { InstDB::RWInfo::kCategoryVmov1_2 , 14, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #174 [ref=7x] - { InstDB::RWInfo::kCategoryVmov1_2 , 41, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #175 [ref=10x] - { InstDB::RWInfo::kCategoryGeneric , 35, { 10, 74, 7 , 0 , 0 , 0 } }, // #176 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 42, { 10, 57, 3 , 0 , 0 , 0 } }, // #177 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 42, { 10, 74, 3 , 0 , 0 , 0 } }, // #178 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 36, { 10, 57, 9 , 0 , 0 , 0 } }, // #179 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 43, { 10, 5 , 5 , 0 , 0 , 0 } }, // #180 [ref=9x] - { InstDB::RWInfo::kCategoryGeneric , 44, { 72, 43, 0 , 0 , 0 , 0 } }, // #181 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 45, { 10, 73, 0 , 0 , 0 , 0 } }, // #182 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 45, { 10, 3 , 0 , 0 , 0 , 0 } }, // #183 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 46, { 71, 43, 0 , 0 , 0 , 0 } }, // #184 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 6 , { 2 , 3 , 3 , 0 , 0 , 0 } }, // #185 [ref=60x] - { InstDB::RWInfo::kCategoryGeneric , 35, { 4 , 57, 7 , 0 , 0 , 0 } }, // #186 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 36, { 4 , 74, 9 , 0 , 0 , 0 } }, // #187 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 35, { 6 , 7 , 7 , 0 , 0 , 0 } }, // #188 [ref=11x] - { InstDB::RWInfo::kCategoryGeneric , 36, { 8 , 9 , 9 , 0 , 0 , 0 } }, // #189 [ref=11x] - { InstDB::RWInfo::kCategoryGeneric , 47, { 11, 3 , 3 , 3 , 0 , 0 } }, // #190 [ref=15x] - { InstDB::RWInfo::kCategoryGeneric , 48, { 34, 7 , 7 , 7 , 0 , 0 } }, // #191 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 49, { 42, 9 , 9 , 9 , 0 , 0 } }, // #192 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 35, { 25, 7 , 7 , 0 , 0 , 0 } }, // #193 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 36, { 75, 9 , 9 , 0 , 0 , 0 } }, // #194 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 14, { 34, 3 , 0 , 0 , 0 , 0 } }, // #195 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 5 , { 42, 9 , 0 , 0 , 0 , 0 } }, // #196 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 8 , { 2 , 3 , 2 , 0 , 0 , 0 } }, // #197 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 2 , 3 , 2 , 0 , 0 , 0 } }, // #198 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 18, { 4 , 3 , 4 , 0 , 0 , 0 } }, // #199 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 35, { 10, 57, 7 , 0 , 0 , 0 } }, // #200 [ref=11x] - { InstDB::RWInfo::kCategoryGeneric , 36, { 10, 74, 9 , 0 , 0 , 0 } }, // #201 [ref=13x] - { InstDB::RWInfo::kCategoryGeneric , 43, { 71, 73, 5 , 0 , 0 , 0 } }, // #202 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 43, { 11, 3 , 5 , 0 , 0 , 0 } }, // #203 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 50, { 72, 43, 73, 0 , 0 , 0 } }, // #204 [ref=4x] - { InstDB::RWInfo::kCategoryVmaskmov , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #205 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 13, { 34, 0 , 0 , 0 , 0 , 0 } }, // #206 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 21, 0 , 0 , 0 , 0 , 0 } }, // #207 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 51, { 11, 3 , 0 , 0 , 0 , 0 } }, // #208 [ref=12x] - { InstDB::RWInfo::kCategoryVmovddup , 52, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #209 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 57, 57, 0 , 0 , 0 } }, // #210 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 13, { 34, 57, 0 , 0 , 0 , 0 } }, // #211 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 13, { 10, 7 , 7 , 0 , 0 , 0 } }, // #212 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 7 , 7 , 0 , 0 , 0 } }, // #213 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 13, { 10, 57, 7 , 0 , 0 , 0 } }, // #214 [ref=2x] - { InstDB::RWInfo::kCategoryVmovmskpd , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #215 [ref=1x] - { InstDB::RWInfo::kCategoryVmovmskps , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #216 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 53, { 34, 7 , 0 , 0 , 0 , 0 } }, // #217 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 57, 7 , 0 , 0 , 0 } }, // #218 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 74, 9 , 0 , 0 , 0 } }, // #219 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 13, { 7 , 0 , 0 , 0 , 0 , 0 } }, // #220 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 76, 0 , 0 , 0 , 0 , 0 } }, // #221 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 2 , { 3 , 3 , 0 , 0 , 0 , 0 } }, // #222 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 12, { 72, 43, 43, 43, 43, 5 } }, // #223 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 15, { 11, 39, 0 , 0 , 0 , 0 } }, // #224 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 11, 7 , 0 , 0 , 0 , 0 } }, // #225 [ref=6x] - { InstDB::RWInfo::kCategoryGeneric , 27, { 11, 13, 0 , 0 , 0 , 0 } }, // #226 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 6 , { 34, 3 , 3 , 0 , 0 , 0 } }, // #227 [ref=17x] - { InstDB::RWInfo::kCategoryGeneric , 50, { 71, 73, 73, 0 , 0 , 0 } }, // #228 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 22, { 11, 3 , 3 , 0 , 0 , 0 } }, // #229 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 7 , { 47, 5 , 0 , 0 , 0 , 0 } }, // #230 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 54, { 10, 5 , 39, 0 , 0 , 0 } }, // #231 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 55, { 10, 5 , 13, 0 , 0 , 0 } }, // #232 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 43, { 10, 5 , 5 , 5 , 0 , 0 } }, // #233 [ref=12x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 34, 3 , 0 , 0 , 0 , 0 } }, // #234 [ref=4x] - { InstDB::RWInfo::kCategoryVmov1_4 , 56, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #235 [ref=6x] - { InstDB::RWInfo::kCategoryVmov1_8 , 57, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #236 [ref=3x] - { InstDB::RWInfo::kCategoryVmov4_1 , 58, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #237 [ref=4x] - { InstDB::RWInfo::kCategoryVmov8_1 , 59, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #238 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 60, { 10, 5 , 5 , 5 , 0 , 0 } }, // #239 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 61, { 10, 5 , 5 , 0 , 0 , 0 } }, // #240 [ref=12x] - { InstDB::RWInfo::kCategoryGeneric , 18, { 11, 3 , 0 , 0 , 0 , 0 } }, // #241 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 22, { 11, 3 , 5 , 0 , 0 , 0 } }, // #242 [ref=9x] - { InstDB::RWInfo::kCategoryGeneric , 62, { 11, 3 , 0 , 0 , 0 , 0 } }, // #243 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 0 , { 56, 16, 28, 0 , 0 , 0 } }, // #244 [ref=2x] - { InstDB::RWInfo::kCategoryGeneric , 11, { 2 , 2 , 0 , 0 , 0 , 0 } }, // #245 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 51, { 2 , 2 , 0 , 0 , 0 , 0 } }, // #246 [ref=1x] - { InstDB::RWInfo::kCategoryGeneric , 8 , { 3 , 56, 16, 0 , 0 , 0 } }, // #247 [ref=4x] - { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 56, 16, 0 , 0 , 0 } } // #248 [ref=8x] + { InstDB::RWInfo::kCategoryGeneric , 2 , { 2 , 3 , 0 , 0 , 0 , 0 } }, // #3 [ref=99x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 4 , 5 , 0 , 0 , 0 , 0 } }, // #4 [ref=55x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 6 , 7 , 0 , 0 , 0 , 0 } }, // #5 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 8 , 9 , 0 , 0 , 0 , 0 } }, // #6 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 10, 5 , 0 , 0 , 0 , 0 } }, // #7 [ref=26x] + { InstDB::RWInfo::kCategoryGeneric , 7 , { 12, 13, 0 , 0 , 0 , 0 } }, // #8 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 2 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #9 [ref=65x] + { InstDB::RWInfo::kCategoryGeneric , 2 , { 5 , 3 , 0 , 0 , 0 , 0 } }, // #10 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 10, 3 , 0 , 0 , 0 , 0 } }, // #11 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 9 , { 10, 5 , 0 , 0 , 0 , 0 } }, // #12 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 15, 5 , 0 , 0 , 0 , 0 } }, // #13 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 3 , 3 , 0 , 0 , 0 , 0 } }, // #14 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 10, { 3 , 3 , 0 , 0 , 0 , 0 } }, // #15 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 10, { 2 , 3 , 0 , 0 , 0 , 0 } }, // #16 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 16, 17, 0 , 0 , 0 , 0 } }, // #17 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 1 , { 3 , 3 , 0 , 0 , 0 , 0 } }, // #18 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 20, 21, 0 , 0 , 0 , 0 } }, // #19 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 7 , 7 , 0 , 0 , 0 , 0 } }, // #20 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 9 , 9 , 0 , 0 , 0 , 0 } }, // #21 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 33, 34, 0 , 0 , 0 , 0 } }, // #22 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 14, { 2 , 3 , 0 , 0 , 0 , 0 } }, // #23 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 10, 7 , 0 , 0 , 0 , 0 } }, // #24 [ref=10x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 35, 5 , 0 , 0 , 0 , 0 } }, // #25 [ref=5x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 36, 7 , 0 , 0 , 0 , 0 } }, // #26 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 35, 7 , 0 , 0 , 0 , 0 } }, // #27 [ref=11x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 11, 7 , 0 , 0 , 0 , 0 } }, // #28 [ref=9x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 37, 7 , 0 , 0 , 0 , 0 } }, // #29 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 14, { 36, 3 , 0 , 0 , 0 , 0 } }, // #30 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 14, { 37, 3 , 0 , 0 , 0 , 0 } }, // #31 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 36, 9 , 0 , 0 , 0 , 0 } }, // #32 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 11, 9 , 0 , 0 , 0 , 0 } }, // #33 [ref=7x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 38, 39, 0 , 0 , 0 , 0 } }, // #34 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 15, { 1 , 40, 0 , 0 , 0 , 0 } }, // #35 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 16, { 11, 43, 0 , 0 , 0 , 0 } }, // #36 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 4 , 5 , 0 , 0 , 0 , 0 } }, // #37 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 45, 46, 0 , 0 , 0 , 0 } }, // #38 [ref=6x] + { InstDB::RWInfo::kCategoryImul , 2 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #39 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 50, 51, 0 , 0 , 0 , 0 } }, // #40 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 53, 51, 0 , 0 , 0 , 0 } }, // #41 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 13, { 3 , 5 , 0 , 0 , 0 , 0 } }, // #42 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 22, 29, 0 , 0 , 0 , 0 } }, // #43 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 54, 0 , 0 , 0 , 0 , 0 } }, // #44 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 23, { 55, 40, 0 , 0 , 0 , 0 } }, // #45 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 24, { 44, 9 , 0 , 0 , 0 , 0 } }, // #46 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 25, { 35, 7 , 0 , 0 , 0 , 0 } }, // #47 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 26, { 48, 13, 0 , 0 , 0 , 0 } }, // #48 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 55, 40, 0 , 0 , 0 , 0 } }, // #49 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 44, 9 , 0 , 0 , 0 , 0 } }, // #50 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 35, 7 , 0 , 0 , 0 , 0 } }, // #51 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 48, 13, 0 , 0 , 0 , 0 } }, // #52 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 40, 40, 0 , 0 , 0 , 0 } }, // #53 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 9 , 9 , 0 , 0 , 0 , 0 } }, // #54 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 7 , 7 , 0 , 0 , 0 , 0 } }, // #55 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 13, 13, 0 , 0 , 0 , 0 } }, // #56 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 27, { 11, 3 , 0 , 0 , 0 , 0 } }, // #57 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 13, { 10, 5 , 0 , 0 , 0 , 0 } }, // #58 [ref=5x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 2 , 3 , 0 , 0 , 0 , 0 } }, // #59 [ref=13x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #60 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 50, 20, 0 , 0 , 0 , 0 } }, // #61 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 57, 0 , 0 , 0 , 0 , 0 } }, // #62 [ref=3x] + { InstDB::RWInfo::kCategoryMov , 29, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #63 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 30, { 10, 5 , 0 , 0 , 0 , 0 } }, // #64 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #65 [ref=14x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 36, 60, 0 , 0 , 0 , 0 } }, // #66 [ref=1x] + { InstDB::RWInfo::kCategoryMovh64 , 12, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #67 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 61, 7 , 0 , 0 , 0 , 0 } }, // #68 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 12, { 35, 7 , 0 , 0 , 0 , 0 } }, // #69 [ref=7x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 55, 5 , 0 , 0 , 0 , 0 } }, // #70 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 28, { 44, 9 , 0 , 0 , 0 , 0 } }, // #71 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 62, 20, 0 , 0 , 0 , 0 } }, // #72 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 14, { 11, 3 , 0 , 0 , 0 , 0 } }, // #73 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 17, 29, 0 , 0 , 0 , 0 } }, // #74 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 11, { 3 , 3 , 0 , 0 , 0 , 0 } }, // #75 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 51, 22, 0 , 0 , 0 , 0 } }, // #76 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 51, 65, 0 , 0 , 0 , 0 } }, // #77 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 26, 7 , 0 , 0 , 0 , 0 } }, // #78 [ref=18x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 68, 5 , 0 , 0 , 0 , 0 } }, // #79 [ref=2x] + { InstDB::RWInfo::kCategoryVmov1_8 , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #80 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 10, 9 , 0 , 0 , 0 , 0 } }, // #81 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 27, { 10, 13, 0 , 0 , 0 , 0 } }, // #82 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 4 , 0 , 0 , 0 , 0 , 0 } }, // #83 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 0 , 0 , 0 } }, // #84 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 10, { 2 , 70, 0 , 0 , 0 , 0 } }, // #85 [ref=8x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 37, 9 , 0 , 0 , 0 , 0 } }, // #86 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 16, 71, 0 , 0 , 0 , 0 } }, // #87 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 22, 21, 0 , 0 , 0 , 0 } }, // #88 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 62, 22, 0 , 0 , 0 , 0 } }, // #89 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 74, 3 , 0 , 0 , 0 , 0 } }, // #90 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 43, 0 , 0 , 0 , 0 } }, // #91 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 13, { 76, 5 , 0 , 0 , 0 , 0 } }, // #92 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 13, { 11, 5 , 0 , 0 , 0 , 0 } }, // #93 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 37, { 74, 77, 0 , 0 , 0 , 0 } }, // #94 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 38, { 11, 7 , 0 , 0 , 0 , 0 } }, // #95 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 39, { 11, 9 , 0 , 0 , 0 , 0 } }, // #96 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 11, { 11, 3 , 0 , 0 , 0 , 0 } }, // #97 [ref=7x] + { InstDB::RWInfo::kCategoryVmov2_1 , 40, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #98 [ref=14x] + { InstDB::RWInfo::kCategoryVmov1_2 , 14, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #99 [ref=7x] + { InstDB::RWInfo::kCategoryGeneric , 44, { 74, 43, 0 , 0 , 0 , 0 } }, // #100 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 44, 9 , 0 , 0 , 0 , 0 } }, // #101 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 51, { 11, 3 , 0 , 0 , 0 , 0 } }, // #102 [ref=12x] + { InstDB::RWInfo::kCategoryVmovddup , 52, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #103 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 12, { 35, 60, 0 , 0 , 0 , 0 } }, // #104 [ref=2x] + { InstDB::RWInfo::kCategoryVmovmskpd , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #105 [ref=1x] + { InstDB::RWInfo::kCategoryVmovmskps , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #106 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 53, { 35, 7 , 0 , 0 , 0 , 0 } }, // #107 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 2 , { 3 , 3 , 0 , 0 , 0 , 0 } }, // #108 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 15, { 11, 40, 0 , 0 , 0 , 0 } }, // #109 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 11, 7 , 0 , 0 , 0 , 0 } }, // #110 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 27, { 11, 13, 0 , 0 , 0 , 0 } }, // #111 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 35, 3 , 0 , 0 , 0 , 0 } }, // #112 [ref=4x] + { InstDB::RWInfo::kCategoryVmov1_4 , 57, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #113 [ref=6x] + { InstDB::RWInfo::kCategoryVmov1_2 , 41, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #114 [ref=9x] + { InstDB::RWInfo::kCategoryVmov1_8 , 58, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #115 [ref=3x] + { InstDB::RWInfo::kCategoryVmov4_1 , 59, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #116 [ref=4x] + { InstDB::RWInfo::kCategoryVmov8_1 , 60, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #117 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 18, { 11, 3 , 0 , 0 , 0 , 0 } }, // #118 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 17, { 44, 9 , 0 , 0 , 0 , 0 } }, // #119 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 32, { 35, 7 , 0 , 0 , 0 , 0 } }, // #120 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 11, { 2 , 2 , 0 , 0 , 0 , 0 } }, // #121 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 51, { 2 , 2 , 0 , 0 , 0 , 0 } } // #122 [ref=1x] +}; + +const InstDB::RWInfo InstDB::rwInfoB[] = { + { InstDB::RWInfo::kCategoryGeneric , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #0 [ref=734x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 1 , 0 , 0 , 0 , 0 , 0 } }, // #1 [ref=5x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 10, 5 , 0 , 0 , 0 , 0 } }, // #2 [ref=7x] + { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 3 , 3 , 0 , 0 , 0 } }, // #3 [ref=186x] + { InstDB::RWInfo::kCategoryGeneric , 2 , { 11, 3 , 3 , 0 , 0 , 0 } }, // #4 [ref=5x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 4 , 5 , 0 , 0 , 0 , 0 } }, // #5 [ref=14x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 4 , 5 , 14, 0 , 0 , 0 } }, // #6 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 2 , 0 , 0 , 0 , 0 , 0 } }, // #7 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 11, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #8 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 18, 0 , 0 , 0 , 0 , 0 } }, // #9 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 3 , 0 , 0 , 0 , 0 , 0 } }, // #10 [ref=34x] + { InstDB::RWInfo::kCategoryGeneric , 12, { 7 , 0 , 0 , 0 , 0 , 0 } }, // #11 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 19, 0 , 0 , 0 , 0 , 0 } }, // #12 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 6 , 7 , 0 , 0 , 0 , 0 } }, // #13 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 8 , 9 , 0 , 0 , 0 , 0 } }, // #14 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 11, { 2 , 3 , 22, 0 , 0 , 0 } }, // #15 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 13, { 4 , 23, 18, 24, 25, 0 } }, // #16 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 12, { 26, 27, 28, 29, 30, 0 } }, // #17 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 28, 31, 32, 16, 0 , 0 } }, // #18 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 28, 0 , 0 , 0 , 0 , 0 } }, // #19 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 10, { 2 , 0 , 0 , 0 , 0 , 0 } }, // #20 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 6 , { 41, 42, 3 , 0 , 0 , 0 } }, // #21 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 17, { 44, 5 , 0 , 0 , 0 , 0 } }, // #22 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 4 , 0 , 0 , 0 , 0 , 0 } }, // #23 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 18, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #24 [ref=15x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 45, 0 , 0 , 0 , 0 , 0 } }, // #25 [ref=16x] + { InstDB::RWInfo::kCategoryGeneric , 19, { 46, 0 , 0 , 0 , 0 , 0 } }, // #26 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 19, { 47, 0 , 0 , 0 , 0 , 0 } }, // #27 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 20, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #28 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 46, 0 , 0 , 0 , 0 , 0 } }, // #29 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 18, { 11, 0 , 0 , 0 , 0 , 0 } }, // #30 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 21, { 13, 0 , 0 , 0 , 0 , 0 } }, // #31 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 0 , 0 , 0 , 0 , 0 } }, // #32 [ref=8x] + { InstDB::RWInfo::kCategoryGeneric , 21, { 48, 0 , 0 , 0 , 0 , 0 } }, // #33 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 7 , { 49, 0 , 0 , 0 , 0 , 0 } }, // #34 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 20, { 11, 0 , 0 , 0 , 0 , 0 } }, // #35 [ref=2x] + { InstDB::RWInfo::kCategoryImul , 22, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #36 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 52, 0 , 0 , 0 , 0 , 0 } }, // #37 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 26, 0 , 0 , 0 , 0 , 0 } }, // #38 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 4 , 9 , 0 , 0 , 0 , 0 } }, // #39 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 4 , 5 , 0 , 0 , 0 , 0 } }, // #40 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 55, 40, 40, 0 , 0 , 0 } }, // #41 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 44, 9 , 9 , 0 , 0 , 0 } }, // #42 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 35, 7 , 7 , 0 , 0 , 0 } }, // #43 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 48, 13, 13, 0 , 0 , 0 } }, // #44 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 55, 40, 0 , 0 , 0 , 0 } }, // #45 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 44, 9 , 0 , 0 , 0 , 0 } }, // #46 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 35, 7 , 0 , 0 , 0 , 0 } }, // #47 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 48, 13, 0 , 0 , 0 , 0 } }, // #48 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 48, 40, 40, 0 , 0 , 0 } }, // #49 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 35, 9 , 9 , 0 , 0 , 0 } }, // #50 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 44, 13, 13, 0 , 0 , 0 } }, // #51 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 56, 0 , 0 , 0 , 0 , 0 } }, // #52 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 28, { 9 , 0 , 0 , 0 , 0 , 0 } }, // #53 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 16, { 43, 0 , 0 , 0 , 0 , 0 } }, // #54 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 7 , { 13, 0 , 0 , 0 , 0 , 0 } }, // #55 [ref=5x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 3 , 0 , 0 , 0 , 0 , 0 } }, // #56 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 3 , 9 , 0 , 0 , 0 , 0 } }, // #57 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 5 , 5 , 58, 0 , 0 , 0 } }, // #58 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 7 , 7 , 58, 0 , 0 , 0 } }, // #59 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 19, 29, 59, 0 , 0 , 0 } }, // #60 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 6 , { 63, 42, 3 , 0 , 0 , 0 } }, // #61 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 11, 3 , 64, 0 , 0 } }, // #62 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 17, 29, 30, 0 , 0 , 0 } }, // #63 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 10, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #64 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 2 , { 2 , 3 , 0 , 0 , 0 , 0 } }, // #65 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 66, 17, 59 } }, // #66 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 67, 17, 59 } }, // #67 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 66, 0 , 0 } }, // #68 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 67, 0 , 0 } }, // #69 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 31, { 55, 5 , 0 , 0 , 0 , 0 } }, // #70 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 32, { 35, 5 , 0 , 0 , 0 , 0 } }, // #71 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 33, { 48, 3 , 0 , 0 , 0 , 0 } }, // #72 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 15, { 4 , 40, 0 , 0 , 0 , 0 } }, // #73 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 4 , 7 , 0 , 0 , 0 , 0 } }, // #74 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 27, { 2 , 13, 0 , 0 , 0 , 0 } }, // #75 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 10, { 69, 0 , 0 , 0 , 0 , 0 } }, // #76 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 35, 7 , 0 , 0 , 0 , 0 } }, // #77 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 10, { 64, 0 , 0 , 0 , 0 , 0 } }, // #78 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 11, 0 , 0 , 0 , 0 , 0 } }, // #79 [ref=6x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 16, 71, 29, 0 , 0 , 0 } }, // #80 [ref=5x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 44, 0 , 0 , 0 , 0 , 0 } }, // #81 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 35, 0 , 0 , 0 , 0 , 0 } }, // #82 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 16, 71, 66, 0 , 0 , 0 } }, // #83 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 2 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #84 [ref=16x] + { InstDB::RWInfo::kCategoryGeneric , 4 , { 36, 7 , 0 , 0 , 0 , 0 } }, // #85 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 37, 9 , 0 , 0 , 0 , 0 } }, // #86 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 72, 0 , 0 , 0 , 0 , 0 } }, // #87 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 31, { 73, 0 , 0 , 0 , 0 , 0 } }, // #88 [ref=30x] + { InstDB::RWInfo::kCategoryGeneric , 11, { 2 , 3 , 70, 0 , 0 , 0 } }, // #89 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 34, { 11, 0 , 0 , 0 , 0 , 0 } }, // #90 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 28, { 44, 0 , 0 , 0 , 0 , 0 } }, // #91 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 16, { 74, 0 , 0 , 0 , 0 , 0 } }, // #92 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 75, 43, 43, 0 , 0 , 0 } }, // #93 [ref=5x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 74, 0 , 0 , 0 , 0 , 0 } }, // #94 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 9 , 59, 17, 0 , 0 , 0 } }, // #95 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 17, { 52, 0 , 0 , 0 , 0 , 0 } }, // #96 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 13, { 75, 43, 43, 43, 43, 5 } }, // #97 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 13, { 4 , 5 , 5 , 5 , 5 , 5 } }, // #98 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 35, { 10, 5 , 7 , 0 , 0 , 0 } }, // #99 [ref=8x] + { InstDB::RWInfo::kCategoryGeneric , 36, { 10, 5 , 9 , 0 , 0 , 0 } }, // #100 [ref=9x] + { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 3 , 3 , 3 , 0 , 0 } }, // #101 [ref=3x] + { InstDB::RWInfo::kCategoryGeneric , 35, { 11, 5 , 7 , 0 , 0 , 0 } }, // #102 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 36, { 11, 5 , 9 , 0 , 0 , 0 } }, // #103 [ref=1x] + { InstDB::RWInfo::kCategoryVmov1_2 , 41, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #104 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 35, { 10, 78, 7 , 0 , 0 , 0 } }, // #105 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 42, { 10, 60, 3 , 0 , 0 , 0 } }, // #106 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 42, { 10, 78, 3 , 0 , 0 , 0 } }, // #107 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 36, { 10, 60, 9 , 0 , 0 , 0 } }, // #108 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 43, { 10, 5 , 5 , 0 , 0 , 0 } }, // #109 [ref=9x] + { InstDB::RWInfo::kCategoryGeneric , 45, { 10, 77, 0 , 0 , 0 , 0 } }, // #110 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 45, { 10, 3 , 0 , 0 , 0 , 0 } }, // #111 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 46, { 76, 43, 0 , 0 , 0 , 0 } }, // #112 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 6 , { 2 , 3 , 3 , 0 , 0 , 0 } }, // #113 [ref=60x] + { InstDB::RWInfo::kCategoryGeneric , 35, { 4 , 60, 7 , 0 , 0 , 0 } }, // #114 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 36, { 4 , 78, 9 , 0 , 0 , 0 } }, // #115 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 35, { 6 , 7 , 7 , 0 , 0 , 0 } }, // #116 [ref=11x] + { InstDB::RWInfo::kCategoryGeneric , 36, { 8 , 9 , 9 , 0 , 0 , 0 } }, // #117 [ref=11x] + { InstDB::RWInfo::kCategoryGeneric , 47, { 11, 3 , 3 , 3 , 0 , 0 } }, // #118 [ref=15x] + { InstDB::RWInfo::kCategoryGeneric , 48, { 35, 7 , 7 , 7 , 0 , 0 } }, // #119 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 49, { 44, 9 , 9 , 9 , 0 , 0 } }, // #120 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 35, { 26, 7 , 7 , 0 , 0 , 0 } }, // #121 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 36, { 52, 9 , 9 , 0 , 0 , 0 } }, // #122 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 14, { 35, 3 , 0 , 0 , 0 , 0 } }, // #123 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 5 , { 35, 9 , 0 , 0 , 0 , 0 } }, // #124 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 2 , 3 , 2 , 0 , 0 , 0 } }, // #125 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 2 , 3 , 2 , 0 , 0 , 0 } }, // #126 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 18, { 4 , 3 , 4 , 0 , 0 , 0 } }, // #127 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 35, { 10, 60, 7 , 0 , 0 , 0 } }, // #128 [ref=11x] + { InstDB::RWInfo::kCategoryGeneric , 36, { 10, 78, 9 , 0 , 0 , 0 } }, // #129 [ref=13x] + { InstDB::RWInfo::kCategoryGeneric , 43, { 76, 77, 5 , 0 , 0 , 0 } }, // #130 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 43, { 11, 3 , 5 , 0 , 0 , 0 } }, // #131 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 50, { 74, 43, 77, 0 , 0 , 0 } }, // #132 [ref=4x] + { InstDB::RWInfo::kCategoryVmaskmov , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #133 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 12, { 35, 0 , 0 , 0 , 0 , 0 } }, // #134 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 22, 0 , 0 , 0 , 0 , 0 } }, // #135 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 60, 60, 0 , 0 , 0 } }, // #136 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 12, { 10, 7 , 7 , 0 , 0 , 0 } }, // #137 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 7 , 7 , 0 , 0 , 0 } }, // #138 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 12, { 10, 60, 7 , 0 , 0 , 0 } }, // #139 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 60, 7 , 0 , 0 , 0 } }, // #140 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 78, 9 , 0 , 0 , 0 } }, // #141 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 79, 0 , 0 , 0 , 0 , 0 } }, // #142 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 54, { 35, 11, 3 , 3 , 0 , 0 } }, // #143 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 13, { 74, 43, 43, 43, 43, 5 } }, // #144 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 6 , { 35, 3 , 3 , 0 , 0 , 0 } }, // #145 [ref=17x] + { InstDB::RWInfo::kCategoryGeneric , 50, { 76, 77, 77, 0 , 0 , 0 } }, // #146 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 22, { 11, 3 , 3 , 0 , 0 , 0 } }, // #147 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 7 , { 48, 5 , 0 , 0 , 0 , 0 } }, // #148 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 55, { 10, 5 , 40, 0 , 0 , 0 } }, // #149 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 56, { 10, 5 , 13, 0 , 0 , 0 } }, // #150 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 43, { 10, 5 , 5 , 5 , 0 , 0 } }, // #151 [ref=12x] + { InstDB::RWInfo::kCategoryGeneric , 61, { 10, 5 , 5 , 5 , 0 , 0 } }, // #152 [ref=1x] + { InstDB::RWInfo::kCategoryGeneric , 62, { 10, 5 , 5 , 0 , 0 , 0 } }, // #153 [ref=12x] + { InstDB::RWInfo::kCategoryGeneric , 22, { 11, 3 , 5 , 0 , 0 , 0 } }, // #154 [ref=9x] + { InstDB::RWInfo::kCategoryGeneric , 63, { 11, 3 , 0 , 0 , 0 , 0 } }, // #155 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 0 , { 59, 17, 29, 0 , 0 , 0 } }, // #156 [ref=2x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 3 , 59, 17, 0 , 0 , 0 } }, // #157 [ref=4x] + { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 59, 17, 0 , 0 , 0 } } // #158 [ref=8x] }; const InstDB::RWInfoOp InstDB::rwInfoOp[] = { - { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, 0 }, // #0 [ref=14957x] + { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, 0 }, // #0 [ref=15421x] { 0x0000000000000003u, 0x0000000000000003u, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId }, // #1 [ref=10x] { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #2 [ref=217x] - { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #3 [ref=978x] + { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #3 [ref=989x] { 0x000000000000FFFFu, 0x000000000000FFFFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #4 [ref=92x] { 0x000000000000FFFFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #5 [ref=305x] { 0x00000000000000FFu, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kRW }, // #6 [ref=18x] - { 0x00000000000000FFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #7 [ref=181x] + { 0x00000000000000FFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #7 [ref=185x] { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kRW }, // #8 [ref=18x] - { 0x000000000000000Fu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #9 [ref=130x] + { 0x000000000000000Fu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #9 [ref=133x] { 0x0000000000000000u, 0x000000000000FFFFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #10 [ref=160x] - { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #11 [ref=415x] + { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #11 [ref=420x] { 0x0000000000000003u, 0x0000000000000003u, 0xFF, { 0 }, OpRWInfo::kRW }, // #12 [ref=1x] { 0x0000000000000003u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #13 [ref=34x] { 0x000000000000FFFFu, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #14 [ref=4x] - { 0x0000000000000000u, 0x000000000000000Fu, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #15 [ref=7x] - { 0x000000000000000Fu, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #16 [ref=21x] - { 0x00000000000000FFu, 0x00000000000000FFu, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #17 [ref=2x] - { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kMemPhysId }, // #18 [ref=3x] - { 0x0000000000000000u, 0x0000000000000000u, 0x06, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kMemPhysId }, // #19 [ref=3x] - { 0x0000000000000000u, 0x0000000000000000u, 0x07, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kMemPhysId }, // #20 [ref=7x] - { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #21 [ref=7x] - { 0x00000000000000FFu, 0x00000000000000FFu, 0x02, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #22 [ref=1x] - { 0x00000000000000FFu, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #23 [ref=1x] - { 0x00000000000000FFu, 0x0000000000000000u, 0x03, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #24 [ref=1x] - { 0x00000000000000FFu, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #25 [ref=20x] - { 0x000000000000000Fu, 0x000000000000000Fu, 0x02, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #26 [ref=1x] - { 0x000000000000000Fu, 0x000000000000000Fu, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #27 [ref=4x] - { 0x000000000000000Fu, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #28 [ref=11x] - { 0x000000000000000Fu, 0x0000000000000000u, 0x03, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #29 [ref=2x] - { 0x0000000000000000u, 0x000000000000000Fu, 0x03, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #30 [ref=1x] - { 0x000000000000000Fu, 0x000000000000000Fu, 0x01, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #31 [ref=1x] - { 0x0000000000000000u, 0x00000000000000FFu, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #32 [ref=1x] - { 0x00000000000000FFu, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #33 [ref=1x] - { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #34 [ref=76x] - { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kWrite }, // #35 [ref=6x] - { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kWrite }, // #36 [ref=6x] - { 0x0000000000000000u, 0x0000000000000003u, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId }, // #37 [ref=1x] - { 0x0000000000000003u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #38 [ref=1x] - { 0x0000000000000001u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #39 [ref=28x] - { 0x0000000000000000u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #40 [ref=2x] - { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #41 [ref=3x] - { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #42 [ref=29x] - { 0xFFFFFFFFFFFFFFFFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #43 [ref=33x] - { 0x00000000000003FFu, 0x00000000000003FFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #44 [ref=22x] - { 0x00000000000003FFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #45 [ref=13x] - { 0x0000000000000000u, 0x00000000000003FFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #46 [ref=1x] - { 0x0000000000000000u, 0x0000000000000003u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #47 [ref=15x] - { 0x0000000000000000u, 0x0000000000000003u, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #48 [ref=2x] - { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #49 [ref=2x] - { 0x0000000000000003u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #50 [ref=4x] - { 0x0000000000000000u, 0x0000000000000000u, 0x07, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kMemPhysId }, // #51 [ref=1x] - { 0x0000000000000000u, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #52 [ref=1x] - { 0x0000000000000000u, 0x0000000000000001u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #53 [ref=14x] - { 0x0000000000000000u, 0x0000000000000001u, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId }, // #54 [ref=1x] - { 0x0000000000000000u, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #55 [ref=3x] - { 0x000000000000000Fu, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #56 [ref=20x] - { 0x000000000000FF00u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #57 [ref=23x] - { 0x0000000000000000u, 0x000000000000FF00u, 0xFF, { 0 }, OpRWInfo::kWrite }, // #58 [ref=1x] - { 0x0000000000000000u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #59 [ref=1x] - { 0x0000000000000000u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #60 [ref=2x] - { 0x0000000000000000u, 0x0000000000000000u, 0x06, { 0 }, OpRWInfo::kRead | OpRWInfo::kMemPhysId }, // #61 [ref=1x] - { 0x0000000000000000u, 0x000000000000000Fu, 0x01, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #62 [ref=5x] - { 0x0000000000000000u, 0x000000000000FFFFu, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #63 [ref=4x] - { 0x0000000000000000u, 0x0000000000000007u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #64 [ref=2x] - { 0x0000000000000000u, 0x0000000000000000u, 0x04, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #65 [ref=1x] - { 0x0000000000000001u, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #66 [ref=10x] - { 0x0000000000000000u, 0x000000000000000Fu, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #67 [ref=5x] - { 0x0000000000000001u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #68 [ref=1x] - { 0x0000000000000000u, 0x0000000000000001u, 0xFF, { 0 }, OpRWInfo::kWrite }, // #69 [ref=30x] - { 0xFFFFFFFFFFFFFFFFu, 0xFFFFFFFFFFFFFFFFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #70 [ref=2x] - { 0x0000000000000000u, 0x00000000FFFFFFFFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #71 [ref=10x] - { 0x0000000000000000u, 0xFFFFFFFFFFFFFFFFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #72 [ref=16x] - { 0x00000000FFFFFFFFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #73 [ref=16x] - { 0x000000000000FFF0u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #74 [ref=18x] - { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #75 [ref=1x] - { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId } // #76 [ref=1x] + { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kMemBaseWrite | OpRWInfo::kMemIndexWrite }, // #15 [ref=1x] + { 0x0000000000000000u, 0x000000000000000Fu, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #16 [ref=9x] + { 0x000000000000000Fu, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #17 [ref=23x] + { 0x00000000000000FFu, 0x00000000000000FFu, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #18 [ref=2x] + { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kMemPhysId }, // #19 [ref=3x] + { 0x0000000000000000u, 0x0000000000000000u, 0x06, { 0 }, OpRWInfo::kRead | OpRWInfo::kMemBaseRW | OpRWInfo::kMemBasePostModify | OpRWInfo::kMemPhysId }, // #20 [ref=3x] + { 0x0000000000000000u, 0x0000000000000000u, 0x07, { 0 }, OpRWInfo::kRead | OpRWInfo::kMemBaseRW | OpRWInfo::kMemBasePostModify | OpRWInfo::kMemPhysId }, // #21 [ref=2x] + { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #22 [ref=7x] + { 0x00000000000000FFu, 0x00000000000000FFu, 0x02, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #23 [ref=1x] + { 0x00000000000000FFu, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #24 [ref=1x] + { 0x00000000000000FFu, 0x0000000000000000u, 0x03, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #25 [ref=1x] + { 0x00000000000000FFu, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #26 [ref=21x] + { 0x000000000000000Fu, 0x000000000000000Fu, 0x02, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #27 [ref=1x] + { 0x000000000000000Fu, 0x000000000000000Fu, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #28 [ref=4x] + { 0x000000000000000Fu, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #29 [ref=13x] + { 0x000000000000000Fu, 0x0000000000000000u, 0x03, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #30 [ref=2x] + { 0x0000000000000000u, 0x000000000000000Fu, 0x03, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #31 [ref=1x] + { 0x000000000000000Fu, 0x000000000000000Fu, 0x01, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #32 [ref=1x] + { 0x0000000000000000u, 0x00000000000000FFu, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #33 [ref=1x] + { 0x00000000000000FFu, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #34 [ref=1x] + { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #35 [ref=80x] + { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kWrite }, // #36 [ref=6x] + { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kWrite }, // #37 [ref=6x] + { 0x0000000000000000u, 0x0000000000000003u, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId }, // #38 [ref=1x] + { 0x0000000000000003u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #39 [ref=1x] + { 0x0000000000000001u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #40 [ref=28x] + { 0x0000000000000000u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #41 [ref=2x] + { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #42 [ref=3x] + { 0xFFFFFFFFFFFFFFFFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #43 [ref=45x] + { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #44 [ref=30x] + { 0x00000000000003FFu, 0x00000000000003FFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #45 [ref=22x] + { 0x00000000000003FFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #46 [ref=13x] + { 0x0000000000000000u, 0x00000000000003FFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #47 [ref=1x] + { 0x0000000000000000u, 0x0000000000000003u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #48 [ref=15x] + { 0x0000000000000000u, 0x0000000000000003u, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #49 [ref=2x] + { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #50 [ref=2x] + { 0x0000000000000003u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #51 [ref=4x] + { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #52 [ref=4x] + { 0x0000000000000000u, 0x0000000000000000u, 0x07, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kMemPhysId }, // #53 [ref=1x] + { 0x0000000000000000u, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #54 [ref=1x] + { 0x0000000000000000u, 0x0000000000000001u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #55 [ref=14x] + { 0x0000000000000000u, 0x0000000000000001u, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId }, // #56 [ref=1x] + { 0x0000000000000000u, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #57 [ref=3x] + { 0x0000000000000000u, 0x0000000000000000u, 0x07, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kMemPhysId }, // #58 [ref=3x] + { 0x000000000000000Fu, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #59 [ref=22x] + { 0x000000000000FF00u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #60 [ref=23x] + { 0x0000000000000000u, 0x000000000000FF00u, 0xFF, { 0 }, OpRWInfo::kWrite }, // #61 [ref=1x] + { 0x0000000000000000u, 0x0000000000000000u, 0x07, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kMemBaseRW | OpRWInfo::kMemBasePostModify | OpRWInfo::kMemPhysId }, // #62 [ref=2x] + { 0x0000000000000000u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #63 [ref=1x] + { 0x0000000000000000u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #64 [ref=2x] + { 0x0000000000000000u, 0x0000000000000000u, 0x06, { 0 }, OpRWInfo::kRead | OpRWInfo::kMemPhysId }, // #65 [ref=1x] + { 0x0000000000000000u, 0x000000000000000Fu, 0x01, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #66 [ref=5x] + { 0x0000000000000000u, 0x000000000000FFFFu, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #67 [ref=4x] + { 0x0000000000000000u, 0x0000000000000007u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #68 [ref=2x] + { 0x0000000000000000u, 0x0000000000000000u, 0x04, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #69 [ref=1x] + { 0x0000000000000001u, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #70 [ref=10x] + { 0x0000000000000000u, 0x000000000000000Fu, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #71 [ref=7x] + { 0x0000000000000001u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #72 [ref=1x] + { 0x0000000000000000u, 0x0000000000000001u, 0xFF, { 0 }, OpRWInfo::kWrite }, // #73 [ref=30x] + { 0x0000000000000000u, 0xFFFFFFFFFFFFFFFFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #74 [ref=20x] + { 0xFFFFFFFFFFFFFFFFu, 0xFFFFFFFFFFFFFFFFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #75 [ref=7x] + { 0x0000000000000000u, 0x00000000FFFFFFFFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #76 [ref=10x] + { 0x00000000FFFFFFFFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #77 [ref=16x] + { 0x000000000000FFF0u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #78 [ref=18x] + { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId } // #79 [ref=1x] }; const InstDB::RWInfoRm InstDB::rwInfoRm[] = { - { InstDB::RWInfoRm::kCategoryNone , 0x00, 0 , 0, 0 }, // #0 [ref=1809x] + { InstDB::RWInfoRm::kCategoryNone , 0x00, 0 , 0, 0 }, // #0 [ref=1880x] { InstDB::RWInfoRm::kCategoryConsistent, 0x03, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #1 [ref=8x] - { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , 0, 0 }, // #2 [ref=193x] + { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , 0, 0 }, // #2 [ref=194x] { InstDB::RWInfoRm::kCategoryFixed , 0x02, 16, 0, 0 }, // #3 [ref=122x] { InstDB::RWInfoRm::kCategoryFixed , 0x02, 8 , 0, 0 }, // #4 [ref=66x] - { InstDB::RWInfoRm::kCategoryFixed , 0x02, 4 , 0, 0 }, // #5 [ref=34x] + { InstDB::RWInfoRm::kCategoryFixed , 0x02, 4 , 0, 0 }, // #5 [ref=33x] { InstDB::RWInfoRm::kCategoryConsistent, 0x04, 0 , 0, 0 }, // #6 [ref=270x] { InstDB::RWInfoRm::kCategoryFixed , 0x01, 2 , 0, 0 }, // #7 [ref=9x] - { InstDB::RWInfoRm::kCategoryFixed , 0x00, 0 , 0, 0 }, // #8 [ref=60x] + { InstDB::RWInfoRm::kCategoryFixed , 0x00, 0 , 0, 0 }, // #8 [ref=63x] { InstDB::RWInfoRm::kCategoryFixed , 0x03, 0 , 0, 0 }, // #9 [ref=1x] - { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #10 [ref=20x] - { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , 0, 0 }, // #11 [ref=13x] - { InstDB::RWInfoRm::kCategoryFixed , 0x00, 16, 0, 0 }, // #12 [ref=21x] - { InstDB::RWInfoRm::kCategoryFixed , 0x00, 8 , 0, 0 }, // #13 [ref=20x] + { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #10 [ref=21x] + { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , 0, 0 }, // #11 [ref=14x] + { InstDB::RWInfoRm::kCategoryFixed , 0x00, 8 , 0, 0 }, // #12 [ref=22x] + { InstDB::RWInfoRm::kCategoryFixed , 0x00, 16, 0, 0 }, // #13 [ref=21x] { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #14 [ref=15x] { InstDB::RWInfoRm::kCategoryFixed , 0x02, 1 , 0, 0 }, // #15 [ref=5x] - { InstDB::RWInfoRm::kCategoryFixed , 0x00, 64, 0, 0 }, // #16 [ref=3x] - { InstDB::RWInfoRm::kCategoryFixed , 0x01, 4 , 0, 0 }, // #17 [ref=4x] + { InstDB::RWInfoRm::kCategoryFixed , 0x00, 64, 0, 0 }, // #16 [ref=5x] + { InstDB::RWInfoRm::kCategoryFixed , 0x01, 4 , 0, 0 }, // #17 [ref=8x] { InstDB::RWInfoRm::kCategoryNone , 0x00, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #18 [ref=22x] { InstDB::RWInfoRm::kCategoryFixed , 0x00, 10, 0, 0 }, // #19 [ref=2x] { InstDB::RWInfoRm::kCategoryNone , 0x01, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #20 [ref=5x] @@ -3916,7 +4070,7 @@ const InstDB::RWInfoRm InstDB::rwInfoRm[] = { { InstDB::RWInfoRm::kCategoryNone , 0x03, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #29 [ref=1x] { InstDB::RWInfoRm::kCategoryFixed , 0x03, 16, 0, 0 }, // #30 [ref=6x] { InstDB::RWInfoRm::kCategoryFixed , 0x01, 1 , 0, 0 }, // #31 [ref=32x] - { InstDB::RWInfoRm::kCategoryFixed , 0x01, 8 , 0, 0 }, // #32 [ref=2x] + { InstDB::RWInfoRm::kCategoryFixed , 0x01, 8 , 0, 0 }, // #32 [ref=4x] { InstDB::RWInfoRm::kCategoryFixed , 0x01, 2 , 0, Features::kSSE4_1 }, // #33 [ref=1x] { InstDB::RWInfoRm::kCategoryFixed , 0x01, 2 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #34 [ref=3x] { InstDB::RWInfoRm::kCategoryFixed , 0x04, 8 , 0, 0 }, // #35 [ref=34x] @@ -3938,15 +4092,16 @@ const InstDB::RWInfoRm InstDB::rwInfoRm[] = { { InstDB::RWInfoRm::kCategoryConsistent, 0x03, 0 , 0, 0 }, // #51 [ref=13x] { InstDB::RWInfoRm::kCategoryNone , 0x02, 0 , 0, 0 }, // #52 [ref=1x] { InstDB::RWInfoRm::kCategoryFixed , 0x03, 8 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #53 [ref=1x] - { InstDB::RWInfoRm::kCategoryFixed , 0x04, 1 , 0, 0 }, // #54 [ref=1x] - { InstDB::RWInfoRm::kCategoryFixed , 0x04, 2 , 0, 0 }, // #55 [ref=1x] - { InstDB::RWInfoRm::kCategoryQuarter , 0x01, 0 , 0, 0 }, // #56 [ref=6x] - { InstDB::RWInfoRm::kCategoryEighth , 0x01, 0 , 0, 0 }, // #57 [ref=3x] - { InstDB::RWInfoRm::kCategoryQuarter , 0x02, 0 , 0, 0 }, // #58 [ref=4x] - { InstDB::RWInfoRm::kCategoryEighth , 0x02, 0 , 0, 0 }, // #59 [ref=2x] - { InstDB::RWInfoRm::kCategoryFixed , 0x0C, 16, 0, 0 }, // #60 [ref=1x] - { InstDB::RWInfoRm::kCategoryFixed , 0x06, 16, 0, 0 }, // #61 [ref=12x] - { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , 0, Features::kAVX512_BW } // #62 [ref=2x] + { InstDB::RWInfoRm::kCategoryConsistent, 0x08, 0 , 0, 0 }, // #54 [ref=2x] + { InstDB::RWInfoRm::kCategoryFixed , 0x04, 1 , 0, 0 }, // #55 [ref=1x] + { InstDB::RWInfoRm::kCategoryFixed , 0x04, 2 , 0, 0 }, // #56 [ref=1x] + { InstDB::RWInfoRm::kCategoryQuarter , 0x01, 0 , 0, 0 }, // #57 [ref=6x] + { InstDB::RWInfoRm::kCategoryEighth , 0x01, 0 , 0, 0 }, // #58 [ref=3x] + { InstDB::RWInfoRm::kCategoryQuarter , 0x02, 0 , 0, 0 }, // #59 [ref=4x] + { InstDB::RWInfoRm::kCategoryEighth , 0x02, 0 , 0, 0 }, // #60 [ref=2x] + { InstDB::RWInfoRm::kCategoryFixed , 0x0C, 16, 0, 0 }, // #61 [ref=1x] + { InstDB::RWInfoRm::kCategoryFixed , 0x06, 16, 0, 0 }, // #62 [ref=12x] + { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , 0, Features::kAVX512_BW } // #63 [ref=2x] }; // ---------------------------------------------------------------------------- // ${InstRWInfoTable:End} diff --git a/libs/asmjit/src/asmjit/x86/x86instdb.h b/libs/asmjit/src/asmjit/x86/x86instdb.h index d03b870..6c40f6a 100644 --- a/libs/asmjit/src/asmjit/x86/x86instdb.h +++ b/libs/asmjit/src/asmjit/x86/x86instdb.h @@ -46,9 +46,9 @@ enum Mode : uint32_t { kModeAny = 0x03u //!< Both X86 and X64 modes supported. }; -static constexpr uint32_t modeFromArchId(uint32_t archId) noexcept { - return archId == ArchInfo::kIdX86 ? kModeX86 : - archId == ArchInfo::kIdX64 ? kModeX64 : kModeNone; +static constexpr uint32_t modeFromArch(uint32_t arch) noexcept { + return arch == Environment::kArchX86 ? kModeX86 : + arch == Environment::kArchX64 ? kModeX64 : kModeNone; } // ============================================================================ @@ -74,17 +74,18 @@ enum OpFlags : uint32_t { kOpDReg = 0x00001000u, //!< Operand can be DReg (debug register). kOpSt = 0x00002000u, //!< Operand can be 80-bit ST register (X87). kOpBnd = 0x00004000u, //!< Operand can be 128-bit BND register. - kOpAllRegs = 0x00007FFFu, //!< Combination of all possible registers. - - kOpI4 = 0x00010000u, //!< Operand can be unsigned 4-bit immediate. - kOpU4 = 0x00020000u, //!< Operand can be unsigned 4-bit immediate. - kOpI8 = 0x00040000u, //!< Operand can be signed 8-bit immediate. - kOpU8 = 0x00080000u, //!< Operand can be unsigned 8-bit immediate. - kOpI16 = 0x00100000u, //!< Operand can be signed 16-bit immediate. + kOpTmm = 0x00008000u, //!< Operand can be 0..8192-bit TMM register. + kOpAllRegs = 0x0000FFFFu, //!< Combination of all possible registers. + + kOpI4 = 0x00010000u, //!< Operand can be unsigned 4-bit immediate. + kOpU4 = 0x00020000u, //!< Operand can be unsigned 4-bit immediate. + kOpI8 = 0x00040000u, //!< Operand can be signed 8-bit immediate. + kOpU8 = 0x00080000u, //!< Operand can be unsigned 8-bit immediate. + kOpI16 = 0x00100000u, //!< Operand can be signed 16-bit immediate. kOpU16 = 0x00200000u, //!< Operand can be unsigned 16-bit immediate. - kOpI32 = 0x00400000u, //!< Operand can be signed 32-bit immediate. + kOpI32 = 0x00400000u, //!< Operand can be signed 32-bit immediate. kOpU32 = 0x00800000u, //!< Operand can be unsigned 32-bit immediate. - kOpI64 = 0x01000000u, //!< Operand can be signed 64-bit immediate. + kOpI64 = 0x01000000u, //!< Operand can be signed 64-bit immediate. kOpU64 = 0x02000000u, //!< Operand can be unsigned 64-bit immediate. kOpAllImm = 0x03FF0000u, //!< Operand can be any immediate. @@ -129,7 +130,8 @@ enum MemFlags : uint32_t { kMemOpDs = 0x1000u, //!< Implicit memory operand's DS segment. kMemOpEs = 0x2000u, //!< Implicit memory operand's ES segment. - kMemOpMib = 0x4000u //!< Operand must be MIB (base+index) pointer. + kMemOpMib = 0x4000u, //!< Operand must be MIB (base+index) pointer. + kMemOpTMem = 0x8000u //!< Operand is a sib_mem (ADX memory operand). }; // ============================================================================ @@ -142,12 +144,6 @@ enum MemFlags : uint32_t { enum Flags : uint32_t { kFlagNone = 0x00000000u, //!< No flags. - // TODO: Deprecated - // ---------------- - - kFlagVolatile = 0x00000040u, - kFlagPrivileged = 0x00000080u, //!< This is a privileged operation that cannot run in user mode. - // Instruction Family // ------------------ // @@ -162,6 +158,7 @@ enum Flags : uint32_t { // // These describe optional X86 prefixes that can be used to change the instruction's operation. + kFlagTsib = 0x00000800u, //!< Instruction uses TSIB (or SIB_MEM) encoding (MODRM followed by SIB). kFlagRep = 0x00001000u, //!< Instruction can be prefixed with using the REP(REPE) or REPNE prefix. kFlagRepIgnored = 0x00002000u, //!< Instruction ignores REP|REPNE prefixes, but they are accepted. kFlagLock = 0x00004000u, //!< Instruction can be prefixed with using the LOCK prefix. @@ -325,6 +322,8 @@ struct CommonInfo { inline bool isMibOp() const noexcept { return hasFlag(kFlagMib); } //! Tests whether the instruction uses VSIB. inline bool isVsibOp() const noexcept { return hasFlag(kFlagVsib); } + //! Tests whether the instruction uses TSIB (AMX, instruction requires MOD+SIB). + inline bool isTsibOp() const noexcept { return hasFlag(kFlagTsib); } //! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable). inline bool isVex() const noexcept { return hasFlag(kFlagVex); } //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable). @@ -367,21 +366,21 @@ ASMJIT_VARAPI const CommonInfo _commonInfoTable[]; //! Instruction information (X86). struct InstInfo { - //! Index to `_nameData`. + //! Index to \ref _nameData. uint32_t _nameDataIndex : 14; - //! Index to `_commonInfoTable`. + //! Index to \ref _commonInfoTable. uint32_t _commonInfoIndex : 10; - //! Index to `InstDB::_commonInfoTableB`. + //! Index to \ref _commonInfoTableB. uint32_t _commonInfoIndexB : 8; - //! Instruction encoding, see `InstDB::EncodingId`. + //! Instruction encoding (internal encoding identifier used by \ref Assembler). uint8_t _encoding; - //! Main opcode value (0.255). + //! Main opcode value (0..255). uint8_t _mainOpcodeValue; - //! Index to `InstDB::_mainOpcodeTable` that is combined with `_mainOpcodeValue` + //! Index to \ref _mainOpcodeTable` that is combined with \ref _mainOpcodeValue //! to form the final opcode. uint8_t _mainOpcodeIndex; - //! Index to `InstDB::_altOpcodeTable` that contains a full alternative opcode. + //! Index to \ref _altOpcodeTable that contains a full alternative opcode. uint8_t _altOpcodeIndex; // -------------------------------------------------------------------------- @@ -457,7 +456,7 @@ struct InstInfo { ASMJIT_VARAPI const InstInfo _instInfoTable[]; -inline const InstInfo& infoById(uint32_t instId) noexcept { +static inline const InstInfo& infoById(uint32_t instId) noexcept { ASMJIT_ASSERT(Inst::isDefinedId(instId)); return _instInfoTable[instId]; } diff --git a/libs/asmjit/src/asmjit/x86/x86instdb_p.h b/libs/asmjit/src/asmjit/x86/x86instdb_p.h index b8ec1db..9c48bed 100644 --- a/libs/asmjit/src/asmjit/x86/x86instdb_p.h +++ b/libs/asmjit/src/asmjit/x86/x86instdb_p.h @@ -48,8 +48,8 @@ namespace InstDB { enum EncodingId : uint32_t { kEncodingNone = 0, //!< Never used. kEncodingX86Op, //!< X86 [OP]. - kEncodingX86Op_O, //!< X86 [OP] (opcode and /0-7). - kEncodingX86Op_O_I8, //!< X86 [OP] (opcode and /0-7 + 8-bit immediate). + kEncodingX86Op_Mod11RM, //!< X86 [OP] (opcode with ModRM byte where MOD must be 11b). + kEncodingX86Op_Mod11RM_I8, //!< X86 [OP] (opcode with ModRM byte + 8-bit immediate). kEncodingX86Op_xAddr, //!< X86 [OP] (implicit address in the first register operand). kEncodingX86Op_xAX, //!< X86 [OP] (implicit or explicit '?AX' form). kEncodingX86Op_xDX_xAX, //!< X86 [OP] (implicit or explicit '?DX, ?AX' form). @@ -62,6 +62,8 @@ enum EncodingId : uint32_t { kEncodingX86M_Only, //!< X86 [M] (restricted to memory operand of any size). kEncodingX86M_Nop, //!< X86 [M] (special case of NOP instruction). kEncodingX86R_Native, //!< X86 [R] (register must be either 32-bit or 64-bit depending on arch). + kEncodingX86R_FromM, //!< X86 [R] - which specifies memory address. + kEncodingX86R32_EDX_EAX, //!< X86 [R32] followed by implicit EDX and EAX. kEncodingX86Rm, //!< X86 [RM] (doesn't handle single-byte size). kEncodingX86Rm_Raw66H, //!< X86 [RM] (used by LZCNT, POPCNT, and TZCNT). kEncodingX86Rm_NoSize, //!< X86 [RM] (doesn't add REX.W prefix if 64-bit reg is used). @@ -132,6 +134,7 @@ enum EncodingId : uint32_t { kEncodingExtInsertq, //!< EXT insrq (SSE4A). kEncodingExt3dNow, //!< EXT [RMI] (3DNOW specific). kEncodingVexOp, //!< VEX [OP]. + kEncodingVexOpMod, //!< VEX [OP] with MODR/M. kEncodingVexKmov, //!< VEX [RM|MR] (used by kmov[b|w|d|q]). kEncodingVexR_Wx, //!< VEX|EVEX [R] (propagatex VEX.W if GPQ used). kEncodingVexM, //!< VEX|EVEX [M]. @@ -154,6 +157,7 @@ enum EncodingId : uint32_t { kEncodingVexRvm_Wx, //!< VEX|EVEX [RVM] (propagates VEX|EVEX.W if GPQ used). kEncodingVexRvm_ZDX_Wx, //!< VEX|EVEX [RVM] (propagates VEX|EVEX.W if GPQ used). kEncodingVexRvm_Lx, //!< VEX|EVEX [RVM] (propagates VEX|EVEX.L if YMM used). + kEncodingVexRvm_Lx_2xK, //!< VEX|EVEX [RVM] (vp2intersectd/vp2intersectq). kEncodingVexRvmr, //!< VEX|EVEX [RVMR]. kEncodingVexRvmr_Lx, //!< VEX|EVEX [RVMR] (propagates VEX|EVEX.L if YMM used). kEncodingVexRvmi, //!< VEX|EVEX [RVMI]. @@ -187,6 +191,11 @@ enum EncodingId : uint32_t { kEncodingVexMovssMovsd, //!< VEX|EVEX vmovss, vmovsd. kEncodingFma4, //!< FMA4 [R, R, R/M, R/M]. kEncodingFma4_Lx, //!< FMA4 [R, R, R/M, R/M] (propagates AVX.L if YMM used). + kEncodingAmxCfg, //!< AMX ldtilecfg/sttilecfg. + kEncodingAmxR, //!< AMX [R] - tilezero. + kEncodingAmxRm, //!< AMX tileloadd/tileloaddt1. + kEncodingAmxMr, //!< AMX tilestored. + kEncodingAmxRmv, //!< AMX instructions that use TMM registers. kEncodingCount //!< Count of instruction encodings. }; @@ -288,8 +297,10 @@ struct RWFlagsInfoTable { uint32_t writeFlags; }; -extern const uint8_t rwInfoIndex[Inst::_kIdCount * 2]; -extern const RWInfo rwInfo[]; +extern const uint8_t rwInfoIndexA[Inst::_kIdCount]; +extern const uint8_t rwInfoIndexB[Inst::_kIdCount]; +extern const RWInfo rwInfoA[]; +extern const RWInfo rwInfoB[]; extern const RWInfoOp rwInfoOp[]; extern const RWInfoRm rwInfoRm[]; extern const RWFlagsInfoTable _rwFlagsInfoTable[]; diff --git a/libs/asmjit/src/asmjit/x86/x86internal.cpp b/libs/asmjit/src/asmjit/x86/x86internal.cpp deleted file mode 100644 index a35b0d3..0000000 --- a/libs/asmjit/src/asmjit/x86/x86internal.cpp +++ /dev/null @@ -1,1633 +0,0 @@ -// AsmJit - Machine code generation for C++ -// -// * Official AsmJit Home Page: https://asmjit.com -// * Official Github Repository: https://github.com/asmjit/asmjit -// -// Copyright (c) 2008-2020 The AsmJit Authors -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. - -#include "../core/api-build_p.h" -#ifdef ASMJIT_BUILD_X86 - -#include "../core/logging.h" -#include "../core/string.h" -#include "../core/support.h" -#include "../core/type.h" -#include "../x86/x86internal_p.h" - -// Can be used for debugging... -// #define ASMJIT_DUMP_ARGS_ASSIGNMENT - -ASMJIT_BEGIN_SUB_NAMESPACE(x86) - -// ============================================================================ -// [asmjit::X86Internal - Helpers] -// ============================================================================ - -static ASMJIT_INLINE uint32_t x86GetXmmMovInst(const FuncFrame& frame) { - bool avx = frame.isAvxEnabled(); - bool aligned = frame.hasAlignedVecSR(); - - return aligned ? (avx ? Inst::kIdVmovaps : Inst::kIdMovaps) - : (avx ? Inst::kIdVmovups : Inst::kIdMovups); -} - -static ASMJIT_INLINE uint32_t x86VecTypeIdToRegType(uint32_t typeId) noexcept { - return typeId <= Type::_kIdVec128End ? Reg::kTypeXmm : - typeId <= Type::_kIdVec256End ? Reg::kTypeYmm : Reg::kTypeZmm; -} - -//! Converts `size` to a 'kmov?' instructio. -static inline uint32_t x86KmovFromSize(uint32_t size) noexcept { - switch (size) { - case 1: return Inst::kIdKmovb; - case 2: return Inst::kIdKmovw; - case 4: return Inst::kIdKmovd; - case 8: return Inst::kIdKmovq; - default: return Inst::kIdNone; - } -} - -// ============================================================================ -// [asmjit::X86Internal - FuncDetail] -// ============================================================================ - -ASMJIT_FAVOR_SIZE Error X86Internal::initFuncDetail(FuncDetail& func, const FuncSignature& sign, uint32_t gpSize) noexcept { - DebugUtils::unused(sign); - - const CallConv& cc = func.callConv(); - uint32_t archId = cc.archId(); - uint32_t stackOffset = cc._spillZoneSize; - - uint32_t i; - uint32_t argCount = func.argCount(); - - if (func.retCount() != 0) { - uint32_t typeId = func._rets[0].typeId(); - switch (typeId) { - case Type::kIdI64: - case Type::kIdU64: { - if (archId == ArchInfo::kIdX86) { - // Convert a 64-bit return value to two 32-bit return values. - func._retCount = 2; - typeId -= 2; - - // 64-bit value is returned in EDX:EAX on X86. - func._rets[0].initReg(Reg::kTypeGpd, Gp::kIdAx, typeId); - func._rets[1].initReg(Reg::kTypeGpd, Gp::kIdDx, typeId); - break; - } - else { - func._rets[0].initReg(Reg::kTypeGpq, Gp::kIdAx, typeId); - } - break; - } - - case Type::kIdI8: - case Type::kIdI16: - case Type::kIdI32: { - func._rets[0].initReg(Reg::kTypeGpd, Gp::kIdAx, Type::kIdI32); - break; - } - - case Type::kIdU8: - case Type::kIdU16: - case Type::kIdU32: { - func._rets[0].initReg(Reg::kTypeGpd, Gp::kIdAx, Type::kIdU32); - break; - } - - case Type::kIdF32: - case Type::kIdF64: { - uint32_t regType = (archId == ArchInfo::kIdX86) ? Reg::kTypeSt : Reg::kTypeXmm; - func._rets[0].initReg(regType, 0, typeId); - break; - } - - case Type::kIdF80: { - // 80-bit floats are always returned by FP0. - func._rets[0].initReg(Reg::kTypeSt, 0, typeId); - break; - } - - case Type::kIdMmx32: - case Type::kIdMmx64: { - // MM registers are returned through XMM or GPQ (Win64). - uint32_t regType = Reg::kTypeMm; - if (archId != ArchInfo::kIdX86) - regType = cc.strategy() == CallConv::kStrategyDefault ? Reg::kTypeXmm : Reg::kTypeGpq; - - func._rets[0].initReg(regType, 0, typeId); - break; - } - - default: { - func._rets[0].initReg(x86VecTypeIdToRegType(typeId), 0, typeId); - break; - } - } - } - - if (cc.strategy() == CallConv::kStrategyDefault) { - uint32_t gpzPos = 0; - uint32_t vecPos = 0; - - for (i = 0; i < argCount; i++) { - FuncValue& arg = func._args[i]; - uint32_t typeId = arg.typeId(); - - if (Type::isInt(typeId)) { - uint32_t regId = gpzPos < CallConv::kMaxRegArgsPerGroup ? cc._passedOrder[Reg::kGroupGp].id[gpzPos] : uint8_t(BaseReg::kIdBad); - if (regId != BaseReg::kIdBad) { - uint32_t regType = (typeId <= Type::kIdU32) ? Reg::kTypeGpd : Reg::kTypeGpq; - arg.assignRegData(regType, regId); - func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId)); - gpzPos++; - } - else { - uint32_t size = Support::max(Type::sizeOf(typeId), gpSize); - arg.assignStackOffset(int32_t(stackOffset)); - stackOffset += size; - } - continue; - } - - if (Type::isFloat(typeId) || Type::isVec(typeId)) { - uint32_t regId = vecPos < CallConv::kMaxRegArgsPerGroup ? cc._passedOrder[Reg::kGroupVec].id[vecPos] : uint8_t(BaseReg::kIdBad); - - // If this is a float, but `floatByVec` is false, we have to pass by stack. - if (Type::isFloat(typeId) && !cc.hasFlag(CallConv::kFlagPassFloatsByVec)) - regId = BaseReg::kIdBad; - - if (regId != BaseReg::kIdBad) { - arg.initTypeId(typeId); - arg.assignRegData(x86VecTypeIdToRegType(typeId), regId); - func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId)); - vecPos++; - } - else { - uint32_t size = Type::sizeOf(typeId); - arg.assignStackOffset(int32_t(stackOffset)); - stackOffset += size; - } - continue; - } - } - } - - if (cc.strategy() == CallConv::kStrategyWin64) { - for (i = 0; i < argCount; i++) { - FuncValue& arg = func._args[i]; - - uint32_t typeId = arg.typeId(); - uint32_t size = Type::sizeOf(typeId); - - if (Type::isInt(typeId) || Type::isMmx(typeId)) { - uint32_t regId = i < CallConv::kMaxRegArgsPerGroup ? cc._passedOrder[Reg::kGroupGp].id[i] : uint8_t(BaseReg::kIdBad); - if (regId != BaseReg::kIdBad) { - uint32_t regType = (size <= 4 && !Type::isMmx(typeId)) ? Reg::kTypeGpd : Reg::kTypeGpq; - arg.assignRegData(regType, regId); - func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId)); - } - else { - arg.assignStackOffset(int32_t(stackOffset)); - stackOffset += gpSize; - } - continue; - } - - if (Type::isFloat(typeId) || Type::isVec(typeId)) { - uint32_t regId = BaseReg::kIdBad; - if (i < CallConv::kMaxRegArgsPerGroup) - regId = cc._passedOrder[Reg::kGroupVec].id[i]; - - if (regId != BaseReg::kIdBad && (Type::isFloat(typeId) || cc.hasFlag(CallConv::kFlagVectorCall))) { - uint32_t regType = x86VecTypeIdToRegType(typeId); - arg.assignRegData(regType, regId); - func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId)); - } - else { - arg.assignStackOffset(int32_t(stackOffset)); - stackOffset += 8; // Always 8 bytes (float/double). - } - continue; - } - } - } - - func._argStackSize = stackOffset; - return kErrorOk; -} - -// ============================================================================ -// [asmjit::X86FuncArgsContext] -// ============================================================================ - -static RegInfo x86GetRegForMemToMemMove(uint32_t archId, uint32_t dstTypeId, uint32_t srcTypeId) noexcept { - uint32_t dstSize = Type::sizeOf(dstTypeId); - uint32_t srcSize = Type::sizeOf(srcTypeId); - uint32_t maxSize = Support::max(dstSize, srcSize); - uint32_t gpSize = archId == ArchInfo::kIdX86 ? 4 : 8; - - uint32_t signature = 0; - if (maxSize <= gpSize || (Type::isInt(dstTypeId) && Type::isInt(srcTypeId))) - signature = maxSize <= 4 ? Gpd::kSignature : Gpq::kSignature; - else if (maxSize <= 16) - signature = Xmm::kSignature; - else if (maxSize <= 32) - signature = Ymm::kSignature; - else if (maxSize <= 64) - signature = Zmm::kSignature; - - return RegInfo { signature }; -} - -// Used by both `argsToFuncFrame()` and `emitArgsAssignment()`. -class X86FuncArgsContext { -public: - enum VarId : uint32_t { - kVarIdNone = 0xFF - }; - - //! Contains information about a single argument or SA register that may need shuffling. - struct Var { - inline void init(const FuncValue& cur_, const FuncValue& out_) noexcept { - cur = cur_; - out = out_; - } - - //! Reset the value to its unassigned state. - inline void reset() noexcept { - cur.reset(); - out.reset(); - } - - inline bool isDone() const noexcept { return cur.isDone(); } - inline void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); } - - FuncValue cur; - FuncValue out; - }; - - struct WorkData { - inline void reset() noexcept { - _archRegs = 0; - _workRegs = 0; - _usedRegs = 0; - _assignedRegs = 0; - _dstRegs = 0; - _dstShuf = 0; - _numSwaps = 0; - _numStackArgs = 0; - memset(_reserved, 0, sizeof(_reserved)); - memset(_physToVarId, kVarIdNone, 32); - } - - inline bool isAssigned(uint32_t regId) const noexcept { - ASMJIT_ASSERT(regId < 32); - return Support::bitTest(_assignedRegs, regId); - } - - inline void assign(uint32_t varId, uint32_t regId) noexcept { - ASMJIT_ASSERT(!isAssigned(regId)); - ASMJIT_ASSERT(_physToVarId[regId] == kVarIdNone); - - _physToVarId[regId] = uint8_t(varId); - _assignedRegs ^= Support::bitMask(regId); - } - - inline void reassign(uint32_t varId, uint32_t newId, uint32_t oldId) noexcept { - ASMJIT_ASSERT( isAssigned(oldId)); - ASMJIT_ASSERT(!isAssigned(newId)); - ASMJIT_ASSERT(_physToVarId[oldId] == varId); - ASMJIT_ASSERT(_physToVarId[newId] == kVarIdNone); - - _physToVarId[oldId] = uint8_t(kVarIdNone); - _physToVarId[newId] = uint8_t(varId); - _assignedRegs ^= Support::bitMask(newId) ^ Support::bitMask(oldId); - } - - inline void swap(uint32_t aVarId, uint32_t aRegId, uint32_t bVarId, uint32_t bRegId) noexcept { - ASMJIT_ASSERT(isAssigned(aRegId)); - ASMJIT_ASSERT(isAssigned(bRegId)); - ASMJIT_ASSERT(_physToVarId[aRegId] == aVarId); - ASMJIT_ASSERT(_physToVarId[bRegId] == bVarId); - - _physToVarId[aRegId] = uint8_t(bVarId); - _physToVarId[bRegId] = uint8_t(aVarId); - } - - inline void unassign(uint32_t varId, uint32_t regId) noexcept { - ASMJIT_ASSERT(isAssigned(regId)); - ASMJIT_ASSERT(_physToVarId[regId] == varId); - - DebugUtils::unused(varId); - _physToVarId[regId] = uint8_t(kVarIdNone); - _assignedRegs ^= Support::bitMask(regId); - } - - inline uint32_t archRegs() const noexcept { return _archRegs; } - inline uint32_t workRegs() const noexcept { return _workRegs; } - inline uint32_t usedRegs() const noexcept { return _usedRegs; } - inline uint32_t assignedRegs() const noexcept { return _assignedRegs; } - inline uint32_t dstRegs() const noexcept { return _dstRegs; } - inline uint32_t availableRegs() const noexcept { return _workRegs & ~_assignedRegs; } - - uint32_t _archRegs; //!< All allocable registers provided by the architecture. - uint32_t _workRegs; //!< All registers that can be used by the shuffler. - uint32_t _usedRegs; //!< Registers used by the shuffler (all). - uint32_t _assignedRegs; //!< Assigned registers. - uint32_t _dstRegs; //!< Destination registers assigned to arguments or SA. - uint32_t _dstShuf; //!< Destination registers that require shuffling. - uint8_t _numSwaps; //!< Number of register swaps. - uint8_t _numStackArgs; //!< Number of stack loads. - uint8_t _reserved[6]; //!< Reserved (only used as padding). - uint8_t _physToVarId[32]; //!< Physical ID to variable ID mapping. - }; - - uint8_t _archId; - bool _hasStackSrc; //!< Has arguments passed via stack (SRC). - bool _hasPreservedFP; //!< Has preserved frame-pointer (FP). - uint8_t _stackDstMask; //!< Has arguments assigned to stack (DST). - uint8_t _regSwapsMask; //!< Register swap groups (bit-mask). - uint8_t _saVarId; - uint32_t _varCount; - WorkData _workData[BaseReg::kGroupVirt]; - Var _vars[kFuncArgCountLoHi + 1]; - - X86FuncArgsContext() noexcept; - - inline uint32_t archId() const noexcept { return _archId; } - inline uint32_t varCount() const noexcept { return _varCount; } - - inline Var& var(uint32_t varId) noexcept { return _vars[varId]; } - inline const Var& var(uint32_t varId) const noexcept { return _vars[varId]; } - inline uint32_t indexOf(const Var* var) const noexcept { return uint32_t((size_t)(var - _vars)); } - - Error initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args) noexcept; - Error markScratchRegs(FuncFrame& frame) noexcept; - Error markDstRegsDirty(FuncFrame& frame) noexcept; - Error markStackArgsReg(FuncFrame& frame) noexcept; -}; - -X86FuncArgsContext::X86FuncArgsContext() noexcept { - _archId = ArchInfo::kIdNone; - _varCount = 0; - _hasStackSrc = false; - _hasPreservedFP = false; - _stackDstMask = 0; - _regSwapsMask = 0; - _saVarId = kVarIdNone; - - for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) - _workData[group].reset(); -} - -ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args) noexcept { - // The code has to be updated if this changes. - ASMJIT_ASSERT(BaseReg::kGroupVirt == 4); - - uint32_t i; - const FuncDetail& func = *args.funcDetail(); - - // Initialize ArchType. - uint32_t archId = func.callConv().archId(); - uint32_t archRegCount = (archId == ArchInfo::kIdX86) ? 8 : 16; - - _archId = uint8_t(archId); - - // Initialize `_archRegs`. - _workData[Reg::kGroupGp ]._archRegs = Support::lsbMask(archRegCount) & ~Support::bitMask(Gp::kIdSp); - _workData[Reg::kGroupVec ]._archRegs = Support::lsbMask(archRegCount); - _workData[Reg::kGroupMm ]._archRegs = Support::lsbMask(8); - _workData[Reg::kGroupKReg]._archRegs = Support::lsbMask(8); - - if (frame.hasPreservedFP()) - _workData[Reg::kGroupGp]._archRegs &= ~Support::bitMask(Gp::kIdBp); - - // Extract information from all function arguments/assignments and build Var[] array. - uint32_t varId = 0; - for (i = 0; i < kFuncArgCountLoHi; i++) { - const FuncValue& dst_ = args.arg(i); - if (!dst_.isAssigned()) continue; - - const FuncValue& src_ = func.arg(i); - if (ASMJIT_UNLIKELY(!src_.isAssigned())) - return DebugUtils::errored(kErrorInvalidState); - - Var& var = _vars[varId]; - var.init(src_, dst_); - - FuncValue& src = var.cur; - FuncValue& dst = var.out; - - uint32_t dstGroup = 0xFFFFFFFFu; - uint32_t dstId = BaseReg::kIdBad; - WorkData* dstWd = nullptr; - - if (dst.isReg()) { - uint32_t dstType = dst.regType(); - if (ASMJIT_UNLIKELY(dstType >= Reg::kTypeCount)) - return DebugUtils::errored(kErrorInvalidRegType); - - // Copy TypeId from source if the destination doesn't have it. The RA - // used by BaseCompiler would never leave TypeId undefined, but users - // of FuncAPI can just assign phys regs without specifying the type. - if (!dst.hasTypeId()) - dst.setTypeId(Reg::typeIdOf(dst.regType())); - - dstGroup = Reg::groupOf(dstType); - if (ASMJIT_UNLIKELY(dstGroup >= BaseReg::kGroupVirt)) - return DebugUtils::errored(kErrorInvalidRegGroup); - - dstWd = &_workData[dstGroup]; - dstId = dst.regId(); - if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId))) - return DebugUtils::errored(kErrorInvalidPhysId); - - if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId))) - return DebugUtils::errored(kErrorOverlappedRegs); - - dstWd->_dstRegs |= Support::bitMask(dstId); - dstWd->_dstShuf |= Support::bitMask(dstId); - dstWd->_usedRegs |= Support::bitMask(dstId); - } - else { - if (!dst.hasTypeId()) - dst.setTypeId(src.typeId()); - - RegInfo regInfo = x86GetRegForMemToMemMove(archId, dst.typeId(), src.typeId()); - if (ASMJIT_UNLIKELY(!regInfo.isValid())) - return DebugUtils::errored(kErrorInvalidState); - _stackDstMask = uint8_t(_stackDstMask | Support::bitMask(regInfo.group())); - } - - if (src.isReg()) { - uint32_t srcId = src.regId(); - uint32_t srcGroup = Reg::groupOf(src.regType()); - - if (dstGroup == srcGroup) { - dstWd->assign(varId, srcId); - - // The best case, register is allocated where it is expected to be. - if (dstId == srcId) - var.markDone(); - } - else { - if (ASMJIT_UNLIKELY(srcGroup >= BaseReg::kGroupVirt)) - return DebugUtils::errored(kErrorInvalidState); - - WorkData& srcData = _workData[srcGroup]; - srcData.assign(varId, srcId); - } - } - else { - if (dstWd) - dstWd->_numStackArgs++; - _hasStackSrc = true; - } - - varId++; - } - - // Initialize WorkData::workRegs. - for (i = 0; i < BaseReg::kGroupVirt; i++) - _workData[i]._workRegs = (_workData[i].archRegs() & (frame.dirtyRegs(i) | ~frame.preservedRegs(i))) | _workData[i].dstRegs() | _workData[i].assignedRegs(); - - // Create a variable that represents `SARegId` if necessary. - bool saRegRequired = _hasStackSrc && frame.hasDynamicAlignment() && !frame.hasPreservedFP(); - - WorkData& gpRegs = _workData[BaseReg::kGroupGp]; - uint32_t saCurRegId = frame.saRegId(); - uint32_t saOutRegId = args.saRegId(); - - if (saCurRegId != BaseReg::kIdBad) { - // Check if the provided `SARegId` doesn't collide with input registers. - if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId))) - return DebugUtils::errored(kErrorOverlappedRegs); - } - - if (saOutRegId != BaseReg::kIdBad) { - // Check if the provided `SARegId` doesn't collide with argument assignments. - if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId))) - return DebugUtils::errored(kErrorOverlappedRegs); - saRegRequired = true; - } - - if (saRegRequired) { - uint32_t ptrTypeId = (archId == ArchInfo::kIdX86) ? Type::kIdU32 : Type::kIdU64; - uint32_t ptrRegType = (archId == ArchInfo::kIdX86) ? BaseReg::kTypeGp32 : BaseReg::kTypeGp64; - - _saVarId = uint8_t(varId); - _hasPreservedFP = frame.hasPreservedFP(); - - Var& var = _vars[varId]; - var.reset(); - - if (saCurRegId == BaseReg::kIdBad) { - if (saOutRegId != BaseReg::kIdBad && !gpRegs.isAssigned(saOutRegId)) { - saCurRegId = saOutRegId; - } - else { - uint32_t availableRegs = gpRegs.availableRegs(); - if (!availableRegs) - availableRegs = gpRegs.archRegs() & ~gpRegs.workRegs(); - - if (ASMJIT_UNLIKELY(!availableRegs)) - return DebugUtils::errored(kErrorNoMorePhysRegs); - - saCurRegId = Support::ctz(availableRegs); - } - } - - var.cur.initReg(ptrRegType, saCurRegId, ptrTypeId); - gpRegs.assign(varId, saCurRegId); - gpRegs._workRegs |= Support::bitMask(saCurRegId); - - if (saOutRegId != BaseReg::kIdBad) { - var.out.initReg(ptrRegType, saOutRegId, ptrTypeId); - gpRegs._dstRegs |= Support::bitMask(saOutRegId); - gpRegs._workRegs |= Support::bitMask(saOutRegId); - } - else { - var.markDone(); - } - - varId++; - } - - _varCount = varId; - - // Detect register swaps. - for (varId = 0; varId < _varCount; varId++) { - Var& var = _vars[varId]; - if (var.cur.isReg() && var.out.isReg()) { - uint32_t srcId = var.cur.regId(); - uint32_t dstId = var.out.regId(); - - uint32_t group = Reg::groupOf(var.cur.regType()); - if (group != Reg::groupOf(var.out.regType())) - continue; - - WorkData& wd = _workData[group]; - if (wd.isAssigned(dstId)) { - Var& other = _vars[wd._physToVarId[dstId]]; - if (Reg::groupOf(other.out.regType()) == group && other.out.regId() == srcId) { - wd._numSwaps++; - _regSwapsMask = uint8_t(_regSwapsMask | Support::bitMask(group)); - } - } - } - } - - return kErrorOk; -} - -ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markDstRegsDirty(FuncFrame& frame) noexcept { - for (uint32_t i = 0; i < BaseReg::kGroupVirt; i++) { - WorkData& wd = _workData[i]; - uint32_t regs = wd.usedRegs() | wd._dstShuf; - - wd._workRegs |= regs; - frame.addDirtyRegs(i, regs); - } - - return kErrorOk; -} - -ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markScratchRegs(FuncFrame& frame) noexcept { - uint32_t groupMask = 0; - - // Handle stack to stack moves. - groupMask |= _stackDstMask; - - // Handle register swaps. - groupMask |= _regSwapsMask & ~Support::bitMask(BaseReg::kGroupGp); - - if (!groupMask) - return kErrorOk; - - // selects one dirty register per affected group that can be used as a scratch register. - for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) { - if (Support::bitTest(groupMask, group)) { - WorkData& wd = _workData[group]; - - // Initially, pick some clobbered or dirty register. - uint32_t workRegs = wd.workRegs(); - uint32_t regs = workRegs & ~(wd.usedRegs() | wd._dstShuf); - - // If that didn't work out pick some register which is not in 'used'. - if (!regs) regs = workRegs & ~wd.usedRegs(); - - // If that didn't work out pick any other register that is allocable. - // This last resort case will, however, result in marking one more - // register dirty. - if (!regs) regs = wd.archRegs() & ~workRegs; - - // If that didn't work out we will have to use XORs instead of MOVs. - if (!regs) continue; - - uint32_t regMask = Support::blsi(regs); - wd._workRegs |= regMask; - frame.addDirtyRegs(group, regMask); - } - } - - return kErrorOk; -} - -ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markStackArgsReg(FuncFrame& frame) noexcept { - // TODO: Validate, improve... - if (_saVarId != kVarIdNone) { - const Var& var = _vars[_saVarId]; - frame.setSARegId(var.cur.regId()); - } - else if (frame.hasPreservedFP()) { - // Always EBP|RBP if the frame-pointer isn't omitted. - frame.setSARegId(Gp::kIdBp); - } - - return kErrorOk; -} - -// ============================================================================ -// [asmjit::X86Internal - FrameLayout] -// ============================================================================ - -ASMJIT_FAVOR_SIZE Error X86Internal::initFuncFrame(FuncFrame& frame, const FuncDetail& func) noexcept { - uint32_t archId = func.callConv().archId(); - - // Initializing FuncFrame means making a copy of some properties of `func`. - // Properties like `_localStackSize` will be set by the user before the frame - // is finalized. - frame.reset(); - - frame._archId = uint8_t(archId); - frame._spRegId = Gp::kIdSp; - frame._saRegId = Gp::kIdBad; - - uint32_t naturalStackAlignment = func.callConv().naturalStackAlignment(); - uint32_t minDynamicAlignment = Support::max(naturalStackAlignment, 16); - - if (minDynamicAlignment == naturalStackAlignment) - minDynamicAlignment <<= 1; - - frame._naturalStackAlignment = uint8_t(naturalStackAlignment); - frame._minDynamicAlignment = uint8_t(minDynamicAlignment); - frame._redZoneSize = uint8_t(func.redZoneSize()); - frame._spillZoneSize = uint8_t(func.spillZoneSize()); - frame._finalStackAlignment = uint8_t(frame._naturalStackAlignment); - - if (func.hasFlag(CallConv::kFlagCalleePopsStack)) { - frame._calleeStackCleanup = uint16_t(func.argStackSize()); - } - - // Initial masks of dirty and preserved registers. - for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) { - frame._dirtyRegs[group] = func.usedRegs(group); - frame._preservedRegs[group] = func.preservedRegs(group); - } - - // Exclude ESP/RSP - this register is never included in saved GP regs. - frame._preservedRegs[BaseReg::kGroupGp] &= ~Support::bitMask(Gp::kIdSp); - - return kErrorOk; -} - -ASMJIT_FAVOR_SIZE Error X86Internal::finalizeFuncFrame(FuncFrame& frame) noexcept { - uint32_t gpSize = frame.archId() == ArchInfo::kIdX86 ? 4 : 8; - - // The final stack alignment must be updated accordingly to call and local stack alignments. - uint32_t stackAlignment = frame._finalStackAlignment; - ASMJIT_ASSERT(stackAlignment == Support::max(frame._naturalStackAlignment, - frame._callStackAlignment, - frame._localStackAlignment)); - - // TODO: Must be configurable. - uint32_t vecSize = 16; - - bool hasFP = frame.hasPreservedFP(); - bool hasDA = frame.hasDynamicAlignment(); - - // Include EBP|RBP if the function preserves the frame-pointer. - if (hasFP) - frame._dirtyRegs[Reg::kGroupGp] |= Support::bitMask(Gp::kIdBp); - - // These two are identical if the function doesn't align its stack dynamically. - uint32_t saRegId = frame.saRegId(); - if (saRegId == BaseReg::kIdBad) - saRegId = Gp::kIdSp; - - // Fix stack arguments base-register from ESP|RSP to EBP|RBP in case it was - // not picked before and the function performs dynamic stack alignment. - if (hasDA && saRegId == Gp::kIdSp) - saRegId = Gp::kIdBp; - - // Mark as dirty any register but ESP|RSP if used as SA pointer. - if (saRegId != Gp::kIdSp) - frame._dirtyRegs[Reg::kGroupGp] |= Support::bitMask(saRegId); - - frame._spRegId = uint8_t(Gp::kIdSp); - frame._saRegId = uint8_t(saRegId); - - // Setup stack size used to save preserved registers. - frame._gpSaveSize = uint16_t(Support::popcnt(frame.savedRegs(Reg::kGroupGp )) * gpSize); - frame._nonGpSaveSize = uint16_t(Support::popcnt(frame.savedRegs(Reg::kGroupVec )) * vecSize + - Support::popcnt(frame.savedRegs(Reg::kGroupMm )) * 8 + - Support::popcnt(frame.savedRegs(Reg::kGroupKReg)) * 8); - - uint32_t v = 0; // The beginning of the stack frame relative to SP after prolog. - v += frame.callStackSize(); // Count 'callStackSize' <- This is used to call functions. - v = Support::alignUp(v, stackAlignment); // Align to function's stack alignment. - - frame._localStackOffset = v; // Store 'localStackOffset' <- Function's local stack starts here. - v += frame.localStackSize(); // Count 'localStackSize' <- Function's local stack ends here. - - // If the function's stack must be aligned, calculate the alignment necessary - // to store vector registers, and set `FuncFrame::kAttrAlignedVecSR` to inform - // PEI that it can use instructions that perform aligned stores/loads. - if (stackAlignment >= vecSize && frame._nonGpSaveSize) { - frame.addAttributes(FuncFrame::kAttrAlignedVecSR); - v = Support::alignUp(v, vecSize); // Align '_nonGpSaveOffset'. - } - - frame._nonGpSaveOffset = v; // Store '_nonGpSaveOffset' <- Non-GP Save/Restore starts here. - v += frame._nonGpSaveSize; // Count '_nonGpSaveSize' <- Non-GP Save/Restore ends here. - - // Calculate if dynamic alignment (DA) slot (stored as offset relative to SP) is required and its offset. - if (hasDA && !hasFP) { - frame._daOffset = v; // Store 'daOffset' <- DA pointer would be stored here. - v += gpSize; // Count 'daOffset'. - } - else { - frame._daOffset = FuncFrame::kTagInvalidOffset; - } - - // The return address should be stored after GP save/restore regs. It has - // the same size as `gpSize` (basically the native register/pointer size). - // We don't adjust it now as `v` now contains the exact size that the - // function requires to adjust (call frame + stack frame, vec stack size). - // The stack (if we consider this size) is misaligned now, as it's always - // aligned before the function call - when `call()` is executed it pushes - // the current EIP|RIP onto the stack, and misaligns it by 12 or 8 bytes - // (depending on the architecture). So count number of bytes needed to align - // it up to the function's CallFrame (the beginning). - if (v || frame.hasFuncCalls()) - v += Support::alignUpDiff(v + frame.gpSaveSize() + gpSize, stackAlignment); - - frame._gpSaveOffset = v; // Store 'gpSaveOffset' <- Function's GP Save/Restore starts here. - frame._stackAdjustment = v; // Store 'stackAdjustment' <- SA used by 'add zsp, SA' and 'sub zsp, SA'. - - v += frame._gpSaveSize; // Count 'gpSaveSize' <- Function's GP Save/Restore ends here. - v += gpSize; // Count 'ReturnAddress' <- As CALL pushes onto stack. - - // If the function performs dynamic stack alignment then the stack-adjustment must be aligned. - if (hasDA) - frame._stackAdjustment = Support::alignUp(frame._stackAdjustment, stackAlignment); - - uint32_t saInvOff = FuncFrame::kTagInvalidOffset; - uint32_t saTmpOff = gpSize + frame._gpSaveSize; - - // Calculate where the function arguments start relative to SP. - frame._saOffsetFromSP = hasDA ? saInvOff : v; - - // Calculate where the function arguments start relative to FP or user-provided register. - frame._saOffsetFromSA = hasFP ? gpSize * 2 // Return address + frame pointer. - : saTmpOff; // Return address + all saved GP regs. - - return kErrorOk; -} - -// ============================================================================ -// [asmjit::X86Internal - ArgsToFrameInfo] -// ============================================================================ - -ASMJIT_FAVOR_SIZE Error X86Internal::argsToFuncFrame(const FuncArgsAssignment& args, FuncFrame& frame) noexcept { - X86FuncArgsContext ctx; - ASMJIT_PROPAGATE(ctx.initWorkData(frame, args)); - ASMJIT_PROPAGATE(ctx.markDstRegsDirty(frame)); - ASMJIT_PROPAGATE(ctx.markScratchRegs(frame)); - ASMJIT_PROPAGATE(ctx.markStackArgsReg(frame)); - return kErrorOk; -} - -// ============================================================================ -// [asmjit::X86Internal - Emit Helpers] -// ============================================================================ - -ASMJIT_FAVOR_SIZE Error X86Internal::emitRegMove(Emitter* emitter, - const Operand_& dst_, - const Operand_& src_, uint32_t typeId, bool avxEnabled, const char* comment) { - - // Invalid or abstract TypeIds are not allowed. - ASMJIT_ASSERT(Type::isValid(typeId) && !Type::isAbstract(typeId)); - - Operand dst(dst_); - Operand src(src_); - - uint32_t instId = Inst::kIdNone; - uint32_t memFlags = 0; - uint32_t overrideMemSize = 0; - - enum MemFlags : uint32_t { - kDstMem = 0x1, - kSrcMem = 0x2 - }; - - // Detect memory operands and patch them to have the same size as the register. - // BaseCompiler always sets memory size of allocs and spills, so it shouldn't - // be really necessary, however, after this function was separated from Compiler - // it's better to make sure that the size is always specified, as we can use - // 'movzx' and 'movsx' that rely on it. - if (dst.isMem()) { memFlags |= kDstMem; dst.as().setSize(src.size()); } - if (src.isMem()) { memFlags |= kSrcMem; src.as().setSize(dst.size()); } - - switch (typeId) { - case Type::kIdI8: - case Type::kIdU8: - case Type::kIdI16: - case Type::kIdU16: - // Special case - 'movzx' load. - if (memFlags & kSrcMem) { - instId = Inst::kIdMovzx; - dst.setSignature(Reg::signatureOfT()); - } - else if (!memFlags) { - // Change both destination and source registers to GPD (safer, no dependencies). - dst.setSignature(Reg::signatureOfT()); - src.setSignature(Reg::signatureOfT()); - } - ASMJIT_FALLTHROUGH; - - case Type::kIdI32: - case Type::kIdU32: - case Type::kIdI64: - case Type::kIdU64: - instId = Inst::kIdMov; - break; - - case Type::kIdMmx32: - instId = Inst::kIdMovd; - if (memFlags) break; - ASMJIT_FALLTHROUGH; - - case Type::kIdMmx64 : instId = Inst::kIdMovq ; break; - case Type::kIdMask8 : instId = Inst::kIdKmovb; break; - case Type::kIdMask16: instId = Inst::kIdKmovw; break; - case Type::kIdMask32: instId = Inst::kIdKmovd; break; - case Type::kIdMask64: instId = Inst::kIdKmovq; break; - - default: { - uint32_t elementTypeId = Type::baseOf(typeId); - if (Type::isVec32(typeId) && memFlags) { - overrideMemSize = 4; - if (elementTypeId == Type::kIdF32) - instId = avxEnabled ? Inst::kIdVmovss : Inst::kIdMovss; - else - instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd; - break; - } - - if (Type::isVec64(typeId) && memFlags) { - overrideMemSize = 8; - if (elementTypeId == Type::kIdF64) - instId = avxEnabled ? Inst::kIdVmovsd : Inst::kIdMovsd; - else - instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq; - break; - } - - if (elementTypeId == Type::kIdF32) - instId = avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps; - else if (elementTypeId == Type::kIdF64) - instId = avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd; - else if (typeId <= Type::_kIdVec256End) - instId = avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa; - else if (elementTypeId <= Type::kIdU32) - instId = Inst::kIdVmovdqa32; - else - instId = Inst::kIdVmovdqa64; - break; - } - } - - if (!instId) - return DebugUtils::errored(kErrorInvalidState); - - if (overrideMemSize) { - if (dst.isMem()) dst.as().setSize(overrideMemSize); - if (src.isMem()) src.as().setSize(overrideMemSize); - } - - emitter->setInlineComment(comment); - return emitter->emit(instId, dst, src); -} - -ASMJIT_FAVOR_SIZE Error X86Internal::emitArgMove(Emitter* emitter, - const Reg& dst_, uint32_t dstTypeId, - const Operand_& src_, uint32_t srcTypeId, bool avxEnabled, const char* comment) { - - // Deduce optional `dstTypeId`, which may be `Type::kIdVoid` in some cases. - if (!dstTypeId) dstTypeId = opData.archRegs.regTypeToTypeId[dst_.type()]; - - // Invalid or abstract TypeIds are not allowed. - ASMJIT_ASSERT(Type::isValid(dstTypeId) && !Type::isAbstract(dstTypeId)); - ASMJIT_ASSERT(Type::isValid(srcTypeId) && !Type::isAbstract(srcTypeId)); - - Reg dst(dst_); - Operand src(src_); - - uint32_t dstSize = Type::sizeOf(dstTypeId); - uint32_t srcSize = Type::sizeOf(srcTypeId); - - uint32_t instId = Inst::kIdNone; - - // Not a real loop, just 'break' is nicer than 'goto'. - for (;;) { - if (Type::isInt(dstTypeId)) { - if (Type::isInt(srcTypeId)) { - instId = Inst::kIdMovsx; - uint32_t typeOp = (dstTypeId << 8) | srcTypeId; - - // Sign extend by using 'movsx'. - if (typeOp == ((Type::kIdI16 << 8) | Type::kIdI8 ) || - typeOp == ((Type::kIdI32 << 8) | Type::kIdI8 ) || - typeOp == ((Type::kIdI32 << 8) | Type::kIdI16) || - typeOp == ((Type::kIdI64 << 8) | Type::kIdI8 ) || - typeOp == ((Type::kIdI64 << 8) | Type::kIdI16)) break; - - // Sign extend by using 'movsxd'. - instId = Inst::kIdMovsxd; - if (typeOp == ((Type::kIdI64 << 8) | Type::kIdI32)) break; - } - - if (Type::isInt(srcTypeId) || src_.isMem()) { - // Zero extend by using 'movzx' or 'mov'. - if (dstSize <= 4 && srcSize < 4) { - instId = Inst::kIdMovzx; - dst.setSignature(Reg::signatureOfT()); - } - else { - // We should have caught all possibilities where `srcSize` is less - // than 4, so we don't have to worry about 'movzx' anymore. Minimum - // size is enough to determine if we want 32-bit or 64-bit move. - instId = Inst::kIdMov; - srcSize = Support::min(srcSize, dstSize); - - dst.setSignature(srcSize == 4 ? Reg::signatureOfT() - : Reg::signatureOfT()); - if (src.isReg()) src.setSignature(dst.signature()); - } - break; - } - - // NOTE: The previous branch caught all memory sources, from here it's - // always register to register conversion, so catch the remaining cases. - srcSize = Support::min(srcSize, dstSize); - - if (Type::isMmx(srcTypeId)) { - // 64-bit move. - instId = Inst::kIdMovq; - if (srcSize == 8) break; - - // 32-bit move. - instId = Inst::kIdMovd; - dst.setSignature(Reg::signatureOfT()); - break; - } - - if (Type::isMask(srcTypeId)) { - instId = x86KmovFromSize(srcSize); - dst.setSignature(srcSize <= 4 ? Reg::signatureOfT() - : Reg::signatureOfT()); - break; - } - - if (Type::isVec(srcTypeId)) { - // 64-bit move. - instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq; - if (srcSize == 8) break; - - // 32-bit move. - instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd; - dst.setSignature(Reg::signatureOfT()); - break; - } - } - - if (Type::isMmx(dstTypeId)) { - instId = Inst::kIdMovq; - srcSize = Support::min(srcSize, dstSize); - - if (Type::isInt(srcTypeId) || src.isMem()) { - // 64-bit move. - if (srcSize == 8) break; - - // 32-bit move. - instId = Inst::kIdMovd; - if (src.isReg()) src.setSignature(Reg::signatureOfT()); - break; - } - - if (Type::isMmx(srcTypeId)) break; - - // This will hurt if `avxEnabled`. - instId = Inst::kIdMovdq2q; - if (Type::isVec(srcTypeId)) break; - } - - if (Type::isMask(dstTypeId)) { - srcSize = Support::min(srcSize, dstSize); - - if (Type::isInt(srcTypeId) || Type::isMask(srcTypeId) || src.isMem()) { - instId = x86KmovFromSize(srcSize); - if (Reg::isGp(src) && srcSize <= 4) src.setSignature(Reg::signatureOfT()); - break; - } - } - - if (Type::isVec(dstTypeId)) { - // By default set destination to XMM, will be set to YMM|ZMM if needed. - dst.setSignature(Reg::signatureOfT()); - - // This will hurt if `avxEnabled`. - if (Reg::isMm(src)) { - // 64-bit move. - instId = Inst::kIdMovq2dq; - break; - } - - // Argument conversion. - uint32_t dstElement = Type::baseOf(dstTypeId); - uint32_t srcElement = Type::baseOf(srcTypeId); - - if (dstElement == Type::kIdF32 && srcElement == Type::kIdF64) { - srcSize = Support::min(dstSize * 2, srcSize); - dstSize = srcSize / 2; - - if (srcSize <= 8) - instId = avxEnabled ? Inst::kIdVcvtss2sd : Inst::kIdCvtss2sd; - else - instId = avxEnabled ? Inst::kIdVcvtps2pd : Inst::kIdCvtps2pd; - - if (dstSize == 32) - dst.setSignature(Reg::signatureOfT()); - if (src.isReg()) - src.setSignature(Reg::signatureOfVecBySize(srcSize)); - break; - } - - if (dstElement == Type::kIdF64 && srcElement == Type::kIdF32) { - srcSize = Support::min(dstSize, srcSize * 2) / 2; - dstSize = srcSize * 2; - - if (srcSize <= 4) - instId = avxEnabled ? Inst::kIdVcvtsd2ss : Inst::kIdCvtsd2ss; - else - instId = avxEnabled ? Inst::kIdVcvtpd2ps : Inst::kIdCvtpd2ps; - - dst.setSignature(Reg::signatureOfVecBySize(dstSize)); - if (src.isReg() && srcSize >= 32) - src.setSignature(Reg::signatureOfT()); - break; - } - - srcSize = Support::min(srcSize, dstSize); - if (Reg::isGp(src) || src.isMem()) { - // 32-bit move. - if (srcSize <= 4) { - instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd; - if (src.isReg()) src.setSignature(Reg::signatureOfT()); - break; - } - - // 64-bit move. - if (srcSize == 8) { - instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq; - break; - } - } - - if (Reg::isVec(src) || src.isMem()) { - instId = avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps; - uint32_t sign = Reg::signatureOfVecBySize(srcSize); - - dst.setSignature(sign); - if (src.isReg()) src.setSignature(sign); - break; - } - } - - return DebugUtils::errored(kErrorInvalidState); - } - - if (src.isMem()) - src.as().setSize(srcSize); - - emitter->setInlineComment(comment); - return emitter->emit(instId, dst, src); -} - -// ============================================================================ -// [asmjit::X86Internal - Emit Prolog & Epilog] -// ============================================================================ - -static ASMJIT_INLINE void X86Internal_setupSaveRestoreInfo(uint32_t group, const FuncFrame& frame, Reg& xReg, uint32_t& xInst, uint32_t& xSize) noexcept { - switch (group) { - case Reg::kGroupVec: - xReg = xmm(0); - xInst = x86GetXmmMovInst(frame); - xSize = xReg.size(); - break; - case Reg::kGroupMm: - xReg = mm(0); - xInst = Inst::kIdMovq; - xSize = xReg.size(); - break; - case Reg::kGroupKReg: - xReg = k(0); - xInst = Inst::kIdKmovq; - xSize = xReg.size(); - break; - } -} - -ASMJIT_FAVOR_SIZE Error X86Internal::emitProlog(Emitter* emitter, const FuncFrame& frame) { - uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp); - - Gp zsp = emitter->zsp(); // ESP|RSP register. - Gp zbp = emitter->zbp(); // EBP|RBP register. - Gp gpReg = zsp; // General purpose register (temporary). - Gp saReg = zsp; // Stack-arguments base pointer. - - // Emit: 'push zbp' - // 'mov zbp, zsp'. - if (frame.hasPreservedFP()) { - gpSaved &= ~Support::bitMask(Gp::kIdBp); - ASMJIT_PROPAGATE(emitter->push(zbp)); - ASMJIT_PROPAGATE(emitter->mov(zbp, zsp)); - } - - // Emit: 'push gp' sequence. - { - Support::BitWordIterator it(gpSaved); - while (it.hasNext()) { - gpReg.setId(it.next()); - ASMJIT_PROPAGATE(emitter->push(gpReg)); - } - } - - // Emit: 'mov saReg, zsp'. - uint32_t saRegId = frame.saRegId(); - if (saRegId != BaseReg::kIdBad && saRegId != Gp::kIdSp) { - saReg.setId(saRegId); - if (frame.hasPreservedFP()) { - if (saRegId != Gp::kIdBp) - ASMJIT_PROPAGATE(emitter->mov(saReg, zbp)); - } - else { - ASMJIT_PROPAGATE(emitter->mov(saReg, zsp)); - } - } - - // Emit: 'and zsp, StackAlignment'. - if (frame.hasDynamicAlignment()) { - ASMJIT_PROPAGATE(emitter->and_(zsp, -int32_t(frame.finalStackAlignment()))); - } - - // Emit: 'sub zsp, StackAdjustment'. - if (frame.hasStackAdjustment()) { - ASMJIT_PROPAGATE(emitter->sub(zsp, frame.stackAdjustment())); - } - - // Emit: 'mov [zsp + DAOffset], saReg'. - if (frame.hasDynamicAlignment() && frame.hasDAOffset()) { - Mem saMem = ptr(zsp, int32_t(frame.daOffset())); - ASMJIT_PROPAGATE(emitter->mov(saMem, saReg)); - } - - // Emit 'movxxx [zsp + X], {[x|y|z]mm, k}'. - { - Reg xReg; - Mem xBase = ptr(zsp, int32_t(frame.nonGpSaveOffset())); - - uint32_t xInst; - uint32_t xSize; - - for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) { - Support::BitWordIterator it(frame.savedRegs(group)); - if (it.hasNext()) { - X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize); - do { - xReg.setId(it.next()); - ASMJIT_PROPAGATE(emitter->emit(xInst, xBase, xReg)); - xBase.addOffsetLo32(int32_t(xSize)); - } while (it.hasNext()); - } - } - } - - return kErrorOk; -} - -ASMJIT_FAVOR_SIZE Error X86Internal::emitEpilog(Emitter* emitter, const FuncFrame& frame) { - uint32_t i; - uint32_t regId; - - uint32_t gpSize = emitter->gpSize(); - uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp); - - Gp zsp = emitter->zsp(); // ESP|RSP register. - Gp zbp = emitter->zbp(); // EBP|RBP register. - Gp gpReg = emitter->zsp(); // General purpose register (temporary). - - // Don't emit 'pop zbp' in the pop sequence, this case is handled separately. - if (frame.hasPreservedFP()) - gpSaved &= ~Support::bitMask(Gp::kIdBp); - - // Emit 'movxxx {[x|y|z]mm, k}, [zsp + X]'. - { - Reg xReg; - Mem xBase = ptr(zsp, int32_t(frame.nonGpSaveOffset())); - - uint32_t xInst; - uint32_t xSize; - - for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) { - Support::BitWordIterator it(frame.savedRegs(group)); - if (it.hasNext()) { - X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize); - do { - xReg.setId(it.next()); - ASMJIT_PROPAGATE(emitter->emit(xInst, xReg, xBase)); - xBase.addOffsetLo32(int32_t(xSize)); - } while (it.hasNext()); - } - } - } - - // Emit 'emms' and/or 'vzeroupper'. - if (frame.hasMmxCleanup()) ASMJIT_PROPAGATE(emitter->emms()); - if (frame.hasAvxCleanup()) ASMJIT_PROPAGATE(emitter->vzeroupper()); - - if (frame.hasPreservedFP()) { - // Emit 'mov zsp, zbp' or 'lea zsp, [zbp - x]' - int32_t count = int32_t(frame.gpSaveSize() - gpSize); - if (!count) - ASMJIT_PROPAGATE(emitter->mov(zsp, zbp)); - else - ASMJIT_PROPAGATE(emitter->lea(zsp, ptr(zbp, -count))); - } - else { - if (frame.hasDynamicAlignment() && frame.hasDAOffset()) { - // Emit 'mov zsp, [zsp + DsaSlot]'. - Mem saMem = ptr(zsp, int32_t(frame.daOffset())); - ASMJIT_PROPAGATE(emitter->mov(zsp, saMem)); - } - else if (frame.hasStackAdjustment()) { - // Emit 'add zsp, StackAdjustment'. - ASMJIT_PROPAGATE(emitter->add(zsp, int32_t(frame.stackAdjustment()))); - } - } - - // Emit 'pop gp' sequence. - if (gpSaved) { - i = gpSaved; - regId = 16; - - do { - regId--; - if (i & 0x8000) { - gpReg.setId(regId); - ASMJIT_PROPAGATE(emitter->pop(gpReg)); - } - i <<= 1; - } while (regId != 0); - } - - // Emit 'pop zbp'. - if (frame.hasPreservedFP()) - ASMJIT_PROPAGATE(emitter->pop(zbp)); - - // Emit 'ret' or 'ret x'. - if (frame.hasCalleeStackCleanup()) - ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet, int(frame.calleeStackCleanup()))); - else - ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet)); - - return kErrorOk; -} - -// ============================================================================ -// [asmjit::X86Internal - Emit Arguments Assignment] -// ============================================================================ - -#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT -static void dumpFuncValue(String& sb, uint32_t archId, const FuncValue& value) noexcept { - Logging::formatTypeId(sb, value.typeId()); - sb.appendChar('@'); - if (value.isReg()) { - Logging::formatRegister(sb, 0, nullptr, archId, value.regType(), value.regId()); - } - else if (value.isStack()) { - sb.appendFormat("[%d]", value.stackOffset()); - } - else { - sb.appendString(""); - } -} - -static void dumpAssignment(String& sb, const X86FuncArgsContext& ctx) noexcept { - typedef X86FuncArgsContext::Var Var; - - uint32_t archId = ctx.archId(); - uint32_t varCount = ctx.varCount(); - - for (uint32_t i = 0; i < varCount; i++) { - const Var& var = ctx.var(i); - const FuncValue& dst = var.out; - const FuncValue& cur = var.cur; - - sb.appendFormat("Var%u: ", i); - dumpFuncValue(sb, archId, dst); - sb.appendString(" <- "); - dumpFuncValue(sb, archId, cur); - - if (var.isDone()) - sb.appendString(" {Done}"); - - sb.appendChar('\n'); - } -} -#endif - -ASMJIT_FAVOR_SIZE Error X86Internal::emitArgsAssignment(Emitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) { - typedef X86FuncArgsContext::Var Var; - typedef X86FuncArgsContext::WorkData WorkData; - - enum WorkFlags : uint32_t { - kWorkNone = 0x00, - kWorkDidSome = 0x01, - kWorkPending = 0x02, - kWorkPostponed = 0x04 - }; - - X86FuncArgsContext ctx; - ASMJIT_PROPAGATE(ctx.initWorkData(frame, args)); - -#ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT - { - String sb; - dumpAssignment(sb, ctx); - printf("%s\n", sb.data()); - } -#endif - - uint32_t archId = ctx.archId(); - uint32_t varCount = ctx._varCount; - WorkData* workData = ctx._workData; - - // Use AVX if it's enabled. - bool avxEnabled = frame.isAvxEnabled(); - - uint32_t saVarId = ctx._saVarId; - uint32_t saRegId = Gp::kIdSp; - - if (frame.hasDynamicAlignment()) { - if (frame.hasPreservedFP()) - saRegId = Gp::kIdBp; - else - saRegId = saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId(); - } - - // -------------------------------------------------------------------------- - // Register to stack and stack to stack moves must be first as now we have - // the biggest chance of having as many as possible unassigned registers. - // -------------------------------------------------------------------------- - - if (ctx._stackDstMask) { - // Base address of all arguments passed by stack. - Mem baseArgPtr = ptr(emitter->gpz(saRegId), int32_t(frame.saOffset(saRegId))); - Mem baseStackPtr = ptr(emitter->gpz(Gp::kIdSp), int32_t(0)); - - for (uint32_t varId = 0; varId < varCount; varId++) { - Var& var = ctx._vars[varId]; - if (!var.out.isStack()) continue; - - ASMJIT_ASSERT(var.cur.isReg() || var.cur.isStack()); - Reg reg; - - if (var.cur.isReg()) { - WorkData& wd = workData[Reg::groupOf(var.cur.regType())]; - uint32_t rId = var.cur.regId(); - - reg.setSignatureAndId(Reg::signatureOf(var.cur.regType()), rId); - wd.unassign(varId, rId); - } - else { - // Stack to reg move - tricky since we move stack to stack we can decide which - // register to use. In general we follow the rule that IntToInt moves will use - // GP regs with possibility to sign or zero extend, and all other moves will - // either use GP or VEC regs depending on the size of the move. - RegInfo rInfo = x86GetRegForMemToMemMove(archId, var.out.typeId(), var.cur.typeId()); - if (ASMJIT_UNLIKELY(!rInfo.isValid())) - return DebugUtils::errored(kErrorInvalidState); - - WorkData& wd = workData[rInfo.group()]; - uint32_t availableRegs = wd.availableRegs(); - if (ASMJIT_UNLIKELY(!availableRegs)) - return DebugUtils::errored(kErrorInvalidState); - - uint32_t rId = Support::ctz(availableRegs); - reg.setSignatureAndId(rInfo.signature(), rId); - - ASMJIT_PROPAGATE( - emitArgMove(emitter, - reg, - var.out.typeId(), - baseArgPtr.cloneAdjusted(var.cur.stackOffset()), - var.cur.typeId(), - avxEnabled)); - } - - // Register to stack move. - ASMJIT_PROPAGATE( - emitRegMove(emitter, baseStackPtr.cloneAdjusted(var.out.stackOffset()), reg, var.cur.typeId(), avxEnabled)); - - var.markDone(); - } - } - - // -------------------------------------------------------------------------- - // Shuffle all registers that are currently assigned accordingly to the assignment. - // -------------------------------------------------------------------------- - - uint32_t workFlags = kWorkNone; - for (;;) { - for (uint32_t varId = 0; varId < varCount; varId++) { - Var& var = ctx._vars[varId]; - if (var.isDone() || !var.cur.isReg()) continue; - - uint32_t curType = var.cur.regType(); - uint32_t outType = var.out.regType(); - - uint32_t curGroup = Reg::groupOf(curType); - uint32_t outGroup = Reg::groupOf(outType); - - uint32_t curId = var.cur.regId(); - uint32_t outId = var.out.regId(); - - if (curGroup != outGroup) { - ASMJIT_ASSERT(false); - - // Requires a conversion between two register groups. - if (workData[outGroup]._numSwaps) { - // TODO: Postponed - workFlags |= kWorkPending; - } - else { - // TODO: - workFlags |= kWorkPending; - } - } - else { - WorkData& wd = workData[outGroup]; - if (!wd.isAssigned(outId)) { -EmitMove: - ASMJIT_PROPAGATE( - emitArgMove(emitter, - Reg::fromTypeAndId(outType, outId), var.out.typeId(), - Reg::fromTypeAndId(curType, curId), var.cur.typeId(), avxEnabled)); - - wd.reassign(varId, outId, curId); - var.cur.initReg(outType, outId, var.out.typeId()); - - if (outId == var.out.regId()) - var.markDone(); - workFlags |= kWorkDidSome | kWorkPending; - } - else { - uint32_t altId = wd._physToVarId[outId]; - Var& altVar = ctx._vars[altId]; - - if (!altVar.out.isInitialized() || (altVar.out.isReg() && altVar.out.regId() == curId)) { - // Swap operation is possible only between two GP registers. - if (curGroup == Reg::kGroupGp) { - uint32_t highestType = Support::max(var.cur.regType(), altVar.cur.regType()); - uint32_t signature = highestType == Reg::kTypeGpq ? Reg::signatureOfT() - : Reg::signatureOfT(); - - ASMJIT_PROPAGATE(emitter->emit(Inst::kIdXchg, Reg(signature, outId), Reg(signature, curId))); - wd.swap(varId, curId, altId, outId); - var.cur.setRegId(outId); - var.markDone(); - altVar.cur.setRegId(curId); - - if (altVar.out.isInitialized()) - altVar.markDone(); - workFlags |= kWorkDidSome; - } - else { - // If there is a scratch register it can be used to perform the swap. - uint32_t availableRegs = wd.availableRegs(); - if (availableRegs) { - uint32_t inOutRegs = wd.dstRegs(); - if (availableRegs & ~inOutRegs) - availableRegs &= ~inOutRegs; - outId = Support::ctz(availableRegs); - goto EmitMove; - } - else { - workFlags |= kWorkPending; - } - } - } - else { - workFlags |= kWorkPending; - } - } - } - } - - if (!(workFlags & kWorkPending)) - break; - - // If we did nothing twice it means that something is really broken. - if ((workFlags & (kWorkDidSome | kWorkPostponed)) == kWorkPostponed) - return DebugUtils::errored(kErrorInvalidState); - - workFlags = (workFlags & kWorkDidSome) ? kWorkNone : kWorkPostponed; - } - - // -------------------------------------------------------------------------- - // Load arguments passed by stack into registers. This is pretty simple and - // it never requires multiple iterations like the previous phase. - // -------------------------------------------------------------------------- - - if (ctx._hasStackSrc) { - uint32_t iterCount = 1; - if (frame.hasDynamicAlignment() && !frame.hasPreservedFP()) - saRegId = saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId(); - - // Base address of all arguments passed by stack. - Mem baseArgPtr = ptr(emitter->gpz(saRegId), int32_t(frame.saOffset(saRegId))); - - for (uint32_t iter = 0; iter < iterCount; iter++) { - for (uint32_t varId = 0; varId < varCount; varId++) { - Var& var = ctx._vars[varId]; - if (var.isDone()) continue; - - if (var.cur.isStack()) { - ASMJIT_ASSERT(var.out.isReg()); - - uint32_t outId = var.out.regId(); - uint32_t outType = var.out.regType(); - - uint32_t group = Reg::groupOf(outType); - WorkData& wd = ctx._workData[group]; - - if (outId == saRegId && group == BaseReg::kGroupGp) { - // This register will be processed last as we still need `saRegId`. - if (iterCount == 1) { - iterCount++; - continue; - } - wd.unassign(wd._physToVarId[outId], outId); - } - - Reg dstReg = Reg::fromTypeAndId(outType, outId); - Mem srcMem = baseArgPtr.cloneAdjusted(var.cur.stackOffset()); - - ASMJIT_PROPAGATE( - emitArgMove(emitter, - dstReg, var.out.typeId(), - srcMem, var.cur.typeId(), avxEnabled)); - - wd.assign(varId, outId); - var.cur.initReg(outType, outId, var.cur.typeId(), FuncValue::kFlagIsDone); - } - } - } - } - - return kErrorOk; -} - -ASMJIT_END_SUB_NAMESPACE - -#endif // ASMJIT_BUILD_X86 diff --git a/libs/asmjit/src/asmjit/x86/x86internal_p.h b/libs/asmjit/src/asmjit/x86/x86internal_p.h deleted file mode 100644 index a1b7696..0000000 --- a/libs/asmjit/src/asmjit/x86/x86internal_p.h +++ /dev/null @@ -1,87 +0,0 @@ -// AsmJit - Machine code generation for C++ -// -// * Official AsmJit Home Page: https://asmjit.com -// * Official Github Repository: https://github.com/asmjit/asmjit -// -// Copyright (c) 2008-2020 The AsmJit Authors -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. - -#ifndef ASMJIT_X86_X86INTERNAL_P_H_INCLUDED -#define ASMJIT_X86_X86INTERNAL_P_H_INCLUDED - -#include "../core/api-config.h" - -#include "../core/func.h" -#include "../x86/x86emitter.h" -#include "../x86/x86operand.h" - -ASMJIT_BEGIN_SUB_NAMESPACE(x86) - -//! \cond INTERNAL -//! \addtogroup asmjit_x86 -//! \{ - -// ============================================================================ -// [asmjit::X86Internal] -// ============================================================================ - -//! X86 utilities used at multiple places, not part of public API, not exported. -struct X86Internal { - //! Initialize `FuncDetail` (X86 specific). - static Error initFuncDetail(FuncDetail& func, const FuncSignature& sign, uint32_t gpSize) noexcept; - - //! Initialize `FuncFrame` (X86 specific). - static Error initFuncFrame(FuncFrame& frame, const FuncDetail& func) noexcept; - - //! Finalize `FuncFrame` (X86 specific). - static Error finalizeFuncFrame(FuncFrame& frame) noexcept; - - static Error argsToFuncFrame(const FuncArgsAssignment& args, FuncFrame& frame) noexcept; - - //! Emit function prolog. - static Error emitProlog(Emitter* emitter, const FuncFrame& frame); - - //! Emit function epilog. - static Error emitEpilog(Emitter* emitter, const FuncFrame& frame); - - //! Emit a pure move operation between two registers or the same type or - //! between a register and its home slot. This function does not handle - //! register conversion. - static Error emitRegMove(Emitter* emitter, - const Operand_& dst_, - const Operand_& src_, uint32_t typeId, bool avxEnabled, const char* comment = nullptr); - - //! Emit move from a function argument (either register or stack) to a register. - //! - //! This function can handle the necessary conversion from one argument to - //! another, and from one register type to another, if it's possible. Any - //! attempt of conversion that requires third register of a different group - //! (for example conversion from K to MMX) will fail. - static Error emitArgMove(Emitter* emitter, - const Reg& dst_, uint32_t dstTypeId, - const Operand_& src_, uint32_t srcTypeId, bool avxEnabled, const char* comment = nullptr); - - static Error emitArgsAssignment(Emitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args); -}; - -//! \} -//! \endcond - -ASMJIT_END_SUB_NAMESPACE - -#endif // ASMJIT_X86_X86INTERNAL_P_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/x86/x86opcode_p.h b/libs/asmjit/src/asmjit/x86/x86opcode_p.h index 69cafe5..5f936bf 100644 --- a/libs/asmjit/src/asmjit/x86/x86opcode_p.h +++ b/libs/asmjit/src/asmjit/x86/x86opcode_p.h @@ -24,8 +24,6 @@ #ifndef ASMJIT_X86_X86OPCODE_P_H_INCLUDED #define ASMJIT_X86_X86OPCODE_P_H_INCLUDED -#include "../core/logging.h" -#include "../core/string.h" #include "../x86/x86globals.h" ASMJIT_BEGIN_SUB_NAMESPACE(x86) @@ -232,21 +230,41 @@ struct Opcode { kCDTT_T4X = kCDTT_T1_4X, // Alias to have only 3 letters. - // `O` Field in MorR/M - // ------------------- + // `O` Field in ModR/M (??:xxx:???) + // -------------------------------- + + kModO_Shift = 18, + kModO_Mask = 0x7u << kModO_Shift, - kO_Shift = 18, - kO_Mask = 0x7u << kO_Shift, + kModO__ = 0x0u, + kModO_0 = 0x0u << kModO_Shift, + kModO_1 = 0x1u << kModO_Shift, + kModO_2 = 0x2u << kModO_Shift, + kModO_3 = 0x3u << kModO_Shift, + kModO_4 = 0x4u << kModO_Shift, + kModO_5 = 0x5u << kModO_Shift, + kModO_6 = 0x6u << kModO_Shift, + kModO_7 = 0x7u << kModO_Shift, - kO__ = 0x0u, - kO_0 = 0x0u << kO_Shift, - kO_1 = 0x1u << kO_Shift, - kO_2 = 0x2u << kO_Shift, - kO_3 = 0x3u << kO_Shift, - kO_4 = 0x4u << kO_Shift, - kO_5 = 0x5u << kO_Shift, - kO_6 = 0x6u << kO_Shift, - kO_7 = 0x7u << kO_Shift, + // `RM` Field in ModR/M (??:???:xxx) + // --------------------------------- + // + // Second data field used by ModR/M byte. This is only used by few + // instructions that use OPCODE+MOD/RM where both values in Mod/RM + // are part of the opcode. + + kModRM_Shift = 10, + kModRM_Mask = 0x7u << kModRM_Shift, + + kModRM__ = 0x0u, + kModRM_0 = 0x0u << kModRM_Shift, + kModRM_1 = 0x1u << kModRM_Shift, + kModRM_2 = 0x2u << kModRM_Shift, + kModRM_3 = 0x3u << kModRM_Shift, + kModRM_4 = 0x4u << kModRM_Shift, + kModRM_5 = 0x5u << kModRM_Shift, + kModRM_6 = 0x6u << kModRM_Shift, + kModRM_7 = 0x7u << kModRM_Shift, // `PP` Field // ---------- @@ -339,14 +357,17 @@ struct Opcode { k000F3A = kPP_00 | kMM_0F3A, // '0F3A' k660000 = kPP_66 | kMM_00, // '66' k660F00 = kPP_66 | kMM_0F, // '660F' + k660F01 = kPP_66 | kMM_0F01, // '660F01' k660F38 = kPP_66 | kMM_0F38, // '660F38' k660F3A = kPP_66 | kMM_0F3A, // '660F3A' kF20000 = kPP_F2 | kMM_00, // 'F2' kF20F00 = kPP_F2 | kMM_0F, // 'F20F' + kF20F01 = kPP_F2 | kMM_0F01, // 'F20F01' kF20F38 = kPP_F2 | kMM_0F38, // 'F20F38' kF20F3A = kPP_F2 | kMM_0F3A, // 'F20F3A' kF30000 = kPP_F3 | kMM_00, // 'F3' kF30F00 = kPP_F3 | kMM_0F, // 'F30F' + kF30F01 = kPP_F3 | kMM_0F01, // 'F30F01' kF30F38 = kPP_F3 | kMM_0F38, // 'F30F38' kF30F3A = kPP_F3 | kMM_0F3A, // 'F30F3A' kFPU_00 = kPP_00 | kMM_00, // '__' (FPU) @@ -411,9 +432,14 @@ struct Opcode { return operator|=(mask[size & 0xF]); } - //! Extract `O` field from the opcode. - ASMJIT_INLINE uint32_t extractO() const noexcept { - return (v >> kO_Shift) & 0x07; + //! Extract `O` field (R) from the opcode (specified as /0..7 in instruction manuals). + ASMJIT_INLINE uint32_t extractModO() const noexcept { + return (v >> kModO_Shift) & 0x07; + } + + //! Extract `RM` field (RM) from the opcode (usually specified as another opcode value). + ASMJIT_INLINE uint32_t extractModRM() const noexcept { + return (v >> kModRM_Shift) & 0x07; } //! Extract `REX` prefix from opcode combined with `options`. diff --git a/libs/asmjit/src/asmjit/x86/x86operand.cpp b/libs/asmjit/src/asmjit/x86/x86operand.cpp index ca7ce5a..4c81d8e 100644 --- a/libs/asmjit/src/asmjit/x86/x86operand.cpp +++ b/libs/asmjit/src/asmjit/x86/x86operand.cpp @@ -29,29 +29,6 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) -// ============================================================================ -// [asmjit::x86::OpData] -// ============================================================================ - -const OpData opData = { - { - // RegInfo[] - #define VALUE(X) { RegTraits::kSignature } - { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) }, - #undef VALUE - - // RegCount[] - #define VALUE(X) RegTraits::kCount - { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) }, - #undef VALUE - - // RegTypeToTypeId[] - #define VALUE(X) RegTraits::kTypeId - { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) } - #undef VALUE - } -}; - // ============================================================================ // [asmjit::x86::Operand - Unit] // ============================================================================ @@ -167,7 +144,7 @@ UNIT(x86_operand) { EXPECT(zmm7.ymm() == ymm7); EXPECT(zmm7.zmm() == zmm7); - INFO("Checking x86::FpMm register properties"); + INFO("Checking x86::Mm register properties"); EXPECT(Mm().isReg() == true); EXPECT(mm2.isReg() == true); EXPECT(mm2.id() == 2); diff --git a/libs/asmjit/src/asmjit/x86/x86operand.h b/libs/asmjit/src/asmjit/x86/x86operand.h index 7e22021..79e9ce1 100644 --- a/libs/asmjit/src/asmjit/x86/x86operand.h +++ b/libs/asmjit/src/asmjit/x86/x86operand.h @@ -24,13 +24,65 @@ #ifndef ASMJIT_X86_X86OPERAND_H_INCLUDED #define ASMJIT_X86_X86OPERAND_H_INCLUDED -#include "../core/arch.h" +#include "../core/archtraits.h" #include "../core/operand.h" #include "../core/type.h" #include "../x86/x86globals.h" ASMJIT_BEGIN_SUB_NAMESPACE(x86) +#define ASMJIT_MEM_PTR(FUNC, SIZE) \ + static constexpr Mem FUNC(const Gp& base, int32_t offset = 0) noexcept { \ + return Mem(base, offset, SIZE); \ + } \ + static constexpr Mem FUNC(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \ + return Mem(base, index, shift, offset, SIZE); \ + } \ + static constexpr Mem FUNC(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \ + return Mem(base, index, shift, offset, SIZE); \ + } \ + static constexpr Mem FUNC(const Label& base, int32_t offset = 0) noexcept { \ + return Mem(base, offset, SIZE); \ + } \ + static constexpr Mem FUNC(const Label& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \ + return Mem(base, index, shift, offset, SIZE); \ + } \ + static constexpr Mem FUNC(const Rip& rip_, int32_t offset = 0) noexcept { \ + return Mem(rip_, offset, SIZE); \ + } \ + static constexpr Mem FUNC(uint64_t base) noexcept { \ + return Mem(base, SIZE); \ + } \ + static constexpr Mem FUNC(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \ + return Mem(base, index, shift, SIZE); \ + } \ + static constexpr Mem FUNC(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \ + return Mem(base, index, shift, SIZE); \ + } \ + \ + static constexpr Mem FUNC##_abs(uint64_t base) noexcept { \ + return Mem(base, SIZE, Mem::kSignatureMemAbs); \ + } \ + static constexpr Mem FUNC##_abs(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \ + return Mem(base, index, shift, SIZE, Mem::kSignatureMemAbs); \ + } \ + static constexpr Mem FUNC##_abs(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \ + return Mem(base, index, shift, SIZE, Mem::kSignatureMemAbs); \ + } \ + \ + static constexpr Mem FUNC##_rel(uint64_t base) noexcept { \ + return Mem(base, SIZE, Mem::kSignatureMemRel); \ + } \ + static constexpr Mem FUNC##_rel(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \ + return Mem(base, index, shift, SIZE, Mem::kSignatureMemRel); \ + } \ + static constexpr Mem FUNC##_rel(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \ + return Mem(base, index, shift, SIZE, Mem::kSignatureMemRel); \ + } + +//! \addtogroup asmjit_x86 +//! \{ + // ============================================================================ // [Forward Declarations] // ============================================================================ @@ -56,13 +108,11 @@ class CReg; class DReg; class St; class Bnd; +class Tmm; class Rip; -//! \addtogroup asmjit_x86 -//! \{ - // ============================================================================ -// [asmjit::x86::RegTraits] +// [asmjit::x86::Reg] // ============================================================================ //! Register traits (X86). @@ -93,13 +143,10 @@ ASMJIT_DEFINE_REG_TRAITS(CReg , BaseReg::kTypeCustom + 1, BaseReg::kGroupVirt + ASMJIT_DEFINE_REG_TRAITS(DReg , BaseReg::kTypeCustom + 2, BaseReg::kGroupVirt + 2, 0 , 16, Type::kIdVoid ); ASMJIT_DEFINE_REG_TRAITS(St , BaseReg::kTypeCustom + 3, BaseReg::kGroupVirt + 3, 10, 8 , Type::kIdF80 ); ASMJIT_DEFINE_REG_TRAITS(Bnd , BaseReg::kTypeCustom + 4, BaseReg::kGroupVirt + 4, 16, 4 , Type::kIdVoid ); -ASMJIT_DEFINE_REG_TRAITS(Rip , BaseReg::kTypeIP , BaseReg::kGroupVirt + 5, 0 , 1 , Type::kIdVoid ); +ASMJIT_DEFINE_REG_TRAITS(Tmm , BaseReg::kTypeCustom + 5, BaseReg::kGroupVirt + 5, 0 , 8 , Type::kIdVoid ); +ASMJIT_DEFINE_REG_TRAITS(Rip , BaseReg::kTypeIP , BaseReg::kGroupVirt + 6, 0 , 1 , Type::kIdVoid ); //! \endcond -// ============================================================================ -// [asmjit::x86::Reg] -// ============================================================================ - //! Register (X86). class Reg : public BaseReg { public: @@ -107,77 +154,116 @@ class Reg : public BaseReg { //! Register type. enum RegType : uint32_t { - kTypeNone = BaseReg::kTypeNone, //!< No register type or invalid register. - kTypeGpbLo = BaseReg::kTypeGp8Lo, //!< Low GPB register (AL, BL, CL, DL, ...). - kTypeGpbHi = BaseReg::kTypeGp8Hi, //!< High GPB register (AH, BH, CH, DH only). - kTypeGpw = BaseReg::kTypeGp16, //!< GPW register. - kTypeGpd = BaseReg::kTypeGp32, //!< GPD register. - kTypeGpq = BaseReg::kTypeGp64, //!< GPQ register (64-bit). - kTypeXmm = BaseReg::kTypeVec128, //!< XMM register (SSE+). - kTypeYmm = BaseReg::kTypeVec256, //!< YMM register (AVX+). - kTypeZmm = BaseReg::kTypeVec512, //!< ZMM register (AVX512+). - kTypeMm = BaseReg::kTypeOther0, //!< MMX register. - kTypeKReg = BaseReg::kTypeOther1, //!< K register (AVX512+). - kTypeSReg = BaseReg::kTypeCustom+0, //!< Segment register (None, ES, CS, SS, DS, FS, GS). - kTypeCReg = BaseReg::kTypeCustom+1, //!< Control register (CR). - kTypeDReg = BaseReg::kTypeCustom+2, //!< Debug register (DR). - kTypeSt = BaseReg::kTypeCustom+3, //!< FPU (x87) register. - kTypeBnd = BaseReg::kTypeCustom+4, //!< Bound register (BND). - kTypeRip = BaseReg::kTypeIP, //!< Instruction pointer (EIP, RIP). - kTypeCount = BaseReg::kTypeCustom+5 //!< Count of register types. + //! No register type or invalid register. + kTypeNone = BaseReg::kTypeNone, + + //! Low GPB register (AL, BL, CL, DL, ...). + kTypeGpbLo = BaseReg::kTypeGp8Lo, + //! High GPB register (AH, BH, CH, DH only). + kTypeGpbHi = BaseReg::kTypeGp8Hi, + //! GPW register. + kTypeGpw = BaseReg::kTypeGp16, + //! GPD register. + kTypeGpd = BaseReg::kTypeGp32, + //! GPQ register (64-bit). + kTypeGpq = BaseReg::kTypeGp64, + //! XMM register (SSE+). + kTypeXmm = BaseReg::kTypeVec128, + //! YMM register (AVX+). + kTypeYmm = BaseReg::kTypeVec256, + //! ZMM register (AVX512+). + kTypeZmm = BaseReg::kTypeVec512, + //! MMX register. + kTypeMm = BaseReg::kTypeOther0, + //! K register (AVX512+). + kTypeKReg = BaseReg::kTypeOther1, + //! Instruction pointer (EIP, RIP). + kTypeRip = BaseReg::kTypeIP, + //! Segment register (None, ES, CS, SS, DS, FS, GS). + kTypeSReg = BaseReg::kTypeCustom + 0, + //! Control register (CR). + kTypeCReg = BaseReg::kTypeCustom + 1, + //! Debug register (DR). + kTypeDReg = BaseReg::kTypeCustom + 2, + //! FPU (x87) register. + kTypeSt = BaseReg::kTypeCustom + 3, + //! Bound register (BND). + kTypeBnd = BaseReg::kTypeCustom + 4, + //! TMM register (AMX_TILE) + kTypeTmm = BaseReg::kTypeCustom + 5, + + //! Count of register types. + kTypeCount = BaseReg::kTypeCustom + 6 }; //! Register group. enum RegGroup : uint32_t { - kGroupGp = BaseReg::kGroupGp, //!< GP register group or none (universal). - kGroupVec = BaseReg::kGroupVec, //!< XMM|YMM|ZMM register group (universal). - kGroupMm = BaseReg::kGroupOther0, //!< MMX register group (legacy). - kGroupKReg = BaseReg::kGroupOther1, //!< K register group. - - // These are not managed by BaseCompiler nor used by Func-API: - kGroupSReg = BaseReg::kGroupVirt+0, //!< Segment register group. - kGroupCReg = BaseReg::kGroupVirt+1, //!< Control register group. - kGroupDReg = BaseReg::kGroupVirt+2, //!< Debug register group. - kGroupSt = BaseReg::kGroupVirt+3, //!< FPU register group. - kGroupBnd = BaseReg::kGroupVirt+4, //!< Bound register group. - kGroupRip = BaseReg::kGroupVirt+5, //!< Instrucion pointer (IP). - kGroupCount //!< Count of all register groups. + //! GP register group or none (universal). + kGroupGp = BaseReg::kGroupGp, + //! XMM|YMM|ZMM register group (universal). + kGroupVec = BaseReg::kGroupVec, + //! MMX register group (legacy). + kGroupMm = BaseReg::kGroupOther0, + //! K register group. + kGroupKReg = BaseReg::kGroupOther1, + + // These are not managed by Compiler nor used by Func-API: + + //! Segment register group. + kGroupSReg = BaseReg::kGroupVirt+0, + //! Control register group. + kGroupCReg = BaseReg::kGroupVirt+1, + //! Debug register group. + kGroupDReg = BaseReg::kGroupVirt+2, + //! FPU register group. + kGroupSt = BaseReg::kGroupVirt+3, + //! Bound register group. + kGroupBnd = BaseReg::kGroupVirt+4, + //! TMM register group. + kGroupTmm = BaseReg::kGroupVirt+5, + //! Instrucion pointer (IP). + kGroupRip = BaseReg::kGroupVirt+6, + + //! Count of all register groups. + kGroupCount }; //! Tests whether the register is a GPB register (8-bit). constexpr bool isGpb() const noexcept { return size() == 1; } //! Tests whether the register is a low GPB register (8-bit). - constexpr bool isGpbLo() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isGpbLo() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a high GPB register (8-bit). - constexpr bool isGpbHi() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isGpbHi() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a GPW register (16-bit). - constexpr bool isGpw() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isGpw() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a GPD register (32-bit). - constexpr bool isGpd() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isGpd() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a GPQ register (64-bit). - constexpr bool isGpq() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isGpq() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is an XMM register (128-bit). - constexpr bool isXmm() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isXmm() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a YMM register (256-bit). - constexpr bool isYmm() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isYmm() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a ZMM register (512-bit). - constexpr bool isZmm() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isZmm() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is an MMX register (64-bit). - constexpr bool isMm() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isMm() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a K register (64-bit). - constexpr bool isKReg() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isKReg() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a segment register. - constexpr bool isSReg() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isSReg() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a control register. - constexpr bool isCReg() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isCReg() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a debug register. - constexpr bool isDReg() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isDReg() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is an FPU register (80-bit). - constexpr bool isSt() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isSt() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is a bound register. - constexpr bool isBnd() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isBnd() const noexcept { return hasBaseSignature(RegTraits::kSignature); } + //! Tests whether the register is a TMM register. + constexpr bool isTmm() const noexcept { return hasBaseSignature(RegTraits::kSignature); } //! Tests whether the register is RIP. - constexpr bool isRip() const noexcept { return hasSignature(RegTraits::kSignature); } + constexpr bool isRip() const noexcept { return hasBaseSignature(RegTraits::kSignature); } template inline void setRegT(uint32_t rId) noexcept { @@ -191,15 +277,16 @@ class Reg : public BaseReg { setId(rId); } - static inline uint32_t groupOf(uint32_t rType) noexcept; + static inline uint32_t groupOf(uint32_t rType) noexcept { return _archTraits[Environment::kArchX86].regTypeToGroup(rType); } + static inline uint32_t typeIdOf(uint32_t rType) noexcept { return _archTraits[Environment::kArchX86].regTypeToTypeId(rType); } + static inline uint32_t signatureOf(uint32_t rType) noexcept { return _archTraits[Environment::kArchX86].regTypeToSignature(rType); } + template static inline uint32_t groupOfT() noexcept { return RegTraits::kGroup; } - static inline uint32_t typeIdOf(uint32_t rType) noexcept; template static inline uint32_t typeIdOfT() noexcept { return RegTraits::kTypeId; } - static inline uint32_t signatureOf(uint32_t rType) noexcept; template static inline uint32_t signatureOfT() noexcept { return RegTraits::kSignature; } @@ -216,9 +303,9 @@ class Reg : public BaseReg { //! Tests whether the `op` operand is either a low or high 8-bit GPB register. static inline bool isGpb(const Operand_& op) noexcept { // Check operand type, register group, and size. Not interested in register type. - const uint32_t kSgn = (Operand::kOpReg << kSignatureOpShift ) | - (1 << kSignatureSizeShift) ; - return (op.signature() & (kSignatureOpMask | kSignatureSizeMask)) == kSgn; + const uint32_t kSgn = (Operand::kOpReg << kSignatureOpTypeShift) | + (1 << kSignatureSizeShift ) ; + return (op.signature() & (kSignatureOpTypeMask | kSignatureSizeMask)) == kSgn; } static inline bool isGpbLo(const Operand_& op) noexcept { return op.as().isGpbLo(); } @@ -236,6 +323,7 @@ class Reg : public BaseReg { static inline bool isDReg(const Operand_& op) noexcept { return op.as().isDReg(); } static inline bool isSt(const Operand_& op) noexcept { return op.as().isSt(); } static inline bool isBnd(const Operand_& op) noexcept { return op.as().isBnd(); } + static inline bool isTmm(const Operand_& op) noexcept { return op.as().isTmm(); } static inline bool isRip(const Operand_& op) noexcept { return op.as().isRip(); } static inline bool isGpb(const Operand_& op, uint32_t rId) noexcept { return isGpb(op) & (op.id() == rId); } @@ -254,6 +342,7 @@ class Reg : public BaseReg { static inline bool isDReg(const Operand_& op, uint32_t rId) noexcept { return isDReg(op) & (op.id() == rId); } static inline bool isSt(const Operand_& op, uint32_t rId) noexcept { return isSt(op) & (op.id() == rId); } static inline bool isBnd(const Operand_& op, uint32_t rId) noexcept { return isBnd(op) & (op.id() == rId); } + static inline bool isTmm(const Operand_& op, uint32_t rId) noexcept { return isTmm(op) & (op.id() == rId); } static inline bool isRip(const Operand_& op, uint32_t rId) noexcept { return isRip(op) & (op.id() == rId); } }; @@ -313,7 +402,7 @@ class Vec : public Reg { //! Casts this register to a register that has half the size (or XMM if it's already XMM). inline Vec half() const noexcept { - return Vec(type() == kTypeZmm ? signatureOf(kTypeYmm) : signatureOf(kTypeXmm), id()); + return Vec::fromSignatureAndId(type() == kTypeZmm ? signatureOfT() : signatureOfT(), id()); } }; @@ -323,19 +412,26 @@ class SReg : public Reg { //! X86 segment id. enum Id : uint32_t { - kIdNone = 0, //!< No segment (default). - kIdEs = 1, //!< ES segment. - kIdCs = 2, //!< CS segment. - kIdSs = 3, //!< SS segment. - kIdDs = 4, //!< DS segment. - kIdFs = 5, //!< FS segment. - kIdGs = 6, //!< GS segment. - - //! Count of segment registers supported by AsmJit. + //! No segment (default). + kIdNone = 0, + //! ES segment. + kIdEs = 1, + //! CS segment. + kIdCs = 2, + //! SS segment. + kIdSs = 3, + //! DS segment. + kIdDs = 4, + //! FS segment. + kIdFs = 5, + //! GS segment. + kIdGs = 6, + + //! Count of X86 segment registers supported by AsmJit. //! //! \note X86 architecture has 6 segment registers - ES, CS, SS, DS, FS, GS. //! X64 architecture lowers them down to just FS and GS. AsmJit supports 7 - //! segment registers - all addressable in both and X64 modes and one + //! segment registers - all addressable in both X86 and X64 modes and one //! extra called `SReg::kIdNone`, which is AsmJit specific and means that //! there is no segment register specified. kIdCount = 7 @@ -388,6 +484,8 @@ class DReg : public Reg { ASMJIT_DEFINE_FINAL_REG(DReg, Reg, RegTraits) }; //! 128-bit BND register (BND+). class Bnd : public Reg { ASMJIT_DEFINE_FINAL_REG(Bnd, Reg, RegTraits) }; +//! 8192-bit TMM register (AMX). +class Tmm : public Reg { ASMJIT_DEFINE_FINAL_REG(Tmm, Reg, RegTraits) }; //! RIP register (X86). class Rip : public Reg { ASMJIT_DEFINE_FINAL_REG(Rip, Reg, RegTraits) }; @@ -403,6 +501,314 @@ inline Ymm Vec::ymm() const noexcept { return Ymm(id()); } inline Zmm Vec::zmm() const noexcept { return Zmm(id()); } //! \endcond +//! \namespace asmjit::x86::regs +//! +//! Registers provided by X86 and X64 ISAs are in both `asmjit::x86` and +//! `asmjit::x86::regs` namespaces so they can be included with using directive. +//! For example `using namespace asmjit::x86::regs` would include all registers, +//! but not other X86-specific API, whereas `using namespace asmjit::x86` would +//! include everything X86-specific. +#ifndef _DOXYGEN +namespace regs { +#endif + +//! Creates an 8-bit low GPB register operand. +static constexpr GpbLo gpb(uint32_t rId) noexcept { return GpbLo(rId); } +//! Creates an 8-bit low GPB register operand. +static constexpr GpbLo gpb_lo(uint32_t rId) noexcept { return GpbLo(rId); } +//! Creates an 8-bit high GPB register operand. +static constexpr GpbHi gpb_hi(uint32_t rId) noexcept { return GpbHi(rId); } +//! Creates a 16-bit GPW register operand. +static constexpr Gpw gpw(uint32_t rId) noexcept { return Gpw(rId); } +//! Creates a 32-bit GPD register operand. +static constexpr Gpd gpd(uint32_t rId) noexcept { return Gpd(rId); } +//! Creates a 64-bit GPQ register operand (64-bit). +static constexpr Gpq gpq(uint32_t rId) noexcept { return Gpq(rId); } +//! Creates a 128-bit XMM register operand. +static constexpr Xmm xmm(uint32_t rId) noexcept { return Xmm(rId); } +//! Creates a 256-bit YMM register operand. +static constexpr Ymm ymm(uint32_t rId) noexcept { return Ymm(rId); } +//! Creates a 512-bit ZMM register operand. +static constexpr Zmm zmm(uint32_t rId) noexcept { return Zmm(rId); } +//! Creates a 64-bit Mm register operand. +static constexpr Mm mm(uint32_t rId) noexcept { return Mm(rId); } +//! Creates a 64-bit K register operand. +static constexpr KReg k(uint32_t rId) noexcept { return KReg(rId); } +//! Creates a 32-bit or 64-bit control register operand. +static constexpr CReg cr(uint32_t rId) noexcept { return CReg(rId); } +//! Creates a 32-bit or 64-bit debug register operand. +static constexpr DReg dr(uint32_t rId) noexcept { return DReg(rId); } +//! Creates an 80-bit st register operand. +static constexpr St st(uint32_t rId) noexcept { return St(rId); } +//! Creates a 128-bit bound register operand. +static constexpr Bnd bnd(uint32_t rId) noexcept { return Bnd(rId); } +//! Creates a TMM register operand. +static constexpr Tmm tmm(uint32_t rId) noexcept { return Tmm(rId); } + +static constexpr Gp al = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdAx)); +static constexpr Gp bl = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdBx)); +static constexpr Gp cl = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdCx)); +static constexpr Gp dl = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdDx)); +static constexpr Gp spl = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdSp)); +static constexpr Gp bpl = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdBp)); +static constexpr Gp sil = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdSi)); +static constexpr Gp dil = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdDi)); +static constexpr Gp r8b = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdR8)); +static constexpr Gp r9b = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdR9)); +static constexpr Gp r10b = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdR10)); +static constexpr Gp r11b = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdR11)); +static constexpr Gp r12b = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdR12)); +static constexpr Gp r13b = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdR13)); +static constexpr Gp r14b = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdR14)); +static constexpr Gp r15b = Gp(Gp::SignatureAndId(GpbLo::kSignature, Gp::kIdR15)); + +static constexpr Gp ah = Gp(Gp::SignatureAndId(GpbHi::kSignature, Gp::kIdAx)); +static constexpr Gp bh = Gp(Gp::SignatureAndId(GpbHi::kSignature, Gp::kIdBx)); +static constexpr Gp ch = Gp(Gp::SignatureAndId(GpbHi::kSignature, Gp::kIdCx)); +static constexpr Gp dh = Gp(Gp::SignatureAndId(GpbHi::kSignature, Gp::kIdDx)); + +static constexpr Gp ax = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdAx)); +static constexpr Gp bx = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdBx)); +static constexpr Gp cx = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdCx)); +static constexpr Gp dx = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdDx)); +static constexpr Gp sp = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdSp)); +static constexpr Gp bp = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdBp)); +static constexpr Gp si = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdSi)); +static constexpr Gp di = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdDi)); +static constexpr Gp r8w = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdR8)); +static constexpr Gp r9w = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdR9)); +static constexpr Gp r10w = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdR10)); +static constexpr Gp r11w = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdR11)); +static constexpr Gp r12w = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdR12)); +static constexpr Gp r13w = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdR13)); +static constexpr Gp r14w = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdR14)); +static constexpr Gp r15w = Gp(Gp::SignatureAndId(Gpw::kSignature, Gp::kIdR15)); + +static constexpr Gp eax = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdAx)); +static constexpr Gp ebx = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdBx)); +static constexpr Gp ecx = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdCx)); +static constexpr Gp edx = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdDx)); +static constexpr Gp esp = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdSp)); +static constexpr Gp ebp = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdBp)); +static constexpr Gp esi = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdSi)); +static constexpr Gp edi = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdDi)); +static constexpr Gp r8d = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdR8)); +static constexpr Gp r9d = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdR9)); +static constexpr Gp r10d = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdR10)); +static constexpr Gp r11d = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdR11)); +static constexpr Gp r12d = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdR12)); +static constexpr Gp r13d = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdR13)); +static constexpr Gp r14d = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdR14)); +static constexpr Gp r15d = Gp(Gp::SignatureAndId(Gpd::kSignature, Gp::kIdR15)); + +static constexpr Gp rax = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdAx)); +static constexpr Gp rbx = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdBx)); +static constexpr Gp rcx = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdCx)); +static constexpr Gp rdx = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdDx)); +static constexpr Gp rsp = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdSp)); +static constexpr Gp rbp = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdBp)); +static constexpr Gp rsi = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdSi)); +static constexpr Gp rdi = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdDi)); +static constexpr Gp r8 = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdR8)); +static constexpr Gp r9 = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdR9)); +static constexpr Gp r10 = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdR10)); +static constexpr Gp r11 = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdR11)); +static constexpr Gp r12 = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdR12)); +static constexpr Gp r13 = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdR13)); +static constexpr Gp r14 = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdR14)); +static constexpr Gp r15 = Gp(Gp::SignatureAndId(Gpq::kSignature, Gp::kIdR15)); + +static constexpr Xmm xmm0 = Xmm(0); +static constexpr Xmm xmm1 = Xmm(1); +static constexpr Xmm xmm2 = Xmm(2); +static constexpr Xmm xmm3 = Xmm(3); +static constexpr Xmm xmm4 = Xmm(4); +static constexpr Xmm xmm5 = Xmm(5); +static constexpr Xmm xmm6 = Xmm(6); +static constexpr Xmm xmm7 = Xmm(7); +static constexpr Xmm xmm8 = Xmm(8); +static constexpr Xmm xmm9 = Xmm(9); +static constexpr Xmm xmm10 = Xmm(10); +static constexpr Xmm xmm11 = Xmm(11); +static constexpr Xmm xmm12 = Xmm(12); +static constexpr Xmm xmm13 = Xmm(13); +static constexpr Xmm xmm14 = Xmm(14); +static constexpr Xmm xmm15 = Xmm(15); +static constexpr Xmm xmm16 = Xmm(16); +static constexpr Xmm xmm17 = Xmm(17); +static constexpr Xmm xmm18 = Xmm(18); +static constexpr Xmm xmm19 = Xmm(19); +static constexpr Xmm xmm20 = Xmm(20); +static constexpr Xmm xmm21 = Xmm(21); +static constexpr Xmm xmm22 = Xmm(22); +static constexpr Xmm xmm23 = Xmm(23); +static constexpr Xmm xmm24 = Xmm(24); +static constexpr Xmm xmm25 = Xmm(25); +static constexpr Xmm xmm26 = Xmm(26); +static constexpr Xmm xmm27 = Xmm(27); +static constexpr Xmm xmm28 = Xmm(28); +static constexpr Xmm xmm29 = Xmm(29); +static constexpr Xmm xmm30 = Xmm(30); +static constexpr Xmm xmm31 = Xmm(31); + +static constexpr Ymm ymm0 = Ymm(0); +static constexpr Ymm ymm1 = Ymm(1); +static constexpr Ymm ymm2 = Ymm(2); +static constexpr Ymm ymm3 = Ymm(3); +static constexpr Ymm ymm4 = Ymm(4); +static constexpr Ymm ymm5 = Ymm(5); +static constexpr Ymm ymm6 = Ymm(6); +static constexpr Ymm ymm7 = Ymm(7); +static constexpr Ymm ymm8 = Ymm(8); +static constexpr Ymm ymm9 = Ymm(9); +static constexpr Ymm ymm10 = Ymm(10); +static constexpr Ymm ymm11 = Ymm(11); +static constexpr Ymm ymm12 = Ymm(12); +static constexpr Ymm ymm13 = Ymm(13); +static constexpr Ymm ymm14 = Ymm(14); +static constexpr Ymm ymm15 = Ymm(15); +static constexpr Ymm ymm16 = Ymm(16); +static constexpr Ymm ymm17 = Ymm(17); +static constexpr Ymm ymm18 = Ymm(18); +static constexpr Ymm ymm19 = Ymm(19); +static constexpr Ymm ymm20 = Ymm(20); +static constexpr Ymm ymm21 = Ymm(21); +static constexpr Ymm ymm22 = Ymm(22); +static constexpr Ymm ymm23 = Ymm(23); +static constexpr Ymm ymm24 = Ymm(24); +static constexpr Ymm ymm25 = Ymm(25); +static constexpr Ymm ymm26 = Ymm(26); +static constexpr Ymm ymm27 = Ymm(27); +static constexpr Ymm ymm28 = Ymm(28); +static constexpr Ymm ymm29 = Ymm(29); +static constexpr Ymm ymm30 = Ymm(30); +static constexpr Ymm ymm31 = Ymm(31); + +static constexpr Zmm zmm0 = Zmm(0); +static constexpr Zmm zmm1 = Zmm(1); +static constexpr Zmm zmm2 = Zmm(2); +static constexpr Zmm zmm3 = Zmm(3); +static constexpr Zmm zmm4 = Zmm(4); +static constexpr Zmm zmm5 = Zmm(5); +static constexpr Zmm zmm6 = Zmm(6); +static constexpr Zmm zmm7 = Zmm(7); +static constexpr Zmm zmm8 = Zmm(8); +static constexpr Zmm zmm9 = Zmm(9); +static constexpr Zmm zmm10 = Zmm(10); +static constexpr Zmm zmm11 = Zmm(11); +static constexpr Zmm zmm12 = Zmm(12); +static constexpr Zmm zmm13 = Zmm(13); +static constexpr Zmm zmm14 = Zmm(14); +static constexpr Zmm zmm15 = Zmm(15); +static constexpr Zmm zmm16 = Zmm(16); +static constexpr Zmm zmm17 = Zmm(17); +static constexpr Zmm zmm18 = Zmm(18); +static constexpr Zmm zmm19 = Zmm(19); +static constexpr Zmm zmm20 = Zmm(20); +static constexpr Zmm zmm21 = Zmm(21); +static constexpr Zmm zmm22 = Zmm(22); +static constexpr Zmm zmm23 = Zmm(23); +static constexpr Zmm zmm24 = Zmm(24); +static constexpr Zmm zmm25 = Zmm(25); +static constexpr Zmm zmm26 = Zmm(26); +static constexpr Zmm zmm27 = Zmm(27); +static constexpr Zmm zmm28 = Zmm(28); +static constexpr Zmm zmm29 = Zmm(29); +static constexpr Zmm zmm30 = Zmm(30); +static constexpr Zmm zmm31 = Zmm(31); + +static constexpr Mm mm0 = Mm(0); +static constexpr Mm mm1 = Mm(1); +static constexpr Mm mm2 = Mm(2); +static constexpr Mm mm3 = Mm(3); +static constexpr Mm mm4 = Mm(4); +static constexpr Mm mm5 = Mm(5); +static constexpr Mm mm6 = Mm(6); +static constexpr Mm mm7 = Mm(7); + +static constexpr KReg k0 = KReg(0); +static constexpr KReg k1 = KReg(1); +static constexpr KReg k2 = KReg(2); +static constexpr KReg k3 = KReg(3); +static constexpr KReg k4 = KReg(4); +static constexpr KReg k5 = KReg(5); +static constexpr KReg k6 = KReg(6); +static constexpr KReg k7 = KReg(7); + +static constexpr SReg no_seg = SReg(SReg::kIdNone); +static constexpr SReg es = SReg(SReg::kIdEs); +static constexpr SReg cs = SReg(SReg::kIdCs); +static constexpr SReg ss = SReg(SReg::kIdSs); +static constexpr SReg ds = SReg(SReg::kIdDs); +static constexpr SReg fs = SReg(SReg::kIdFs); +static constexpr SReg gs = SReg(SReg::kIdGs); + +static constexpr CReg cr0 = CReg(0); +static constexpr CReg cr1 = CReg(1); +static constexpr CReg cr2 = CReg(2); +static constexpr CReg cr3 = CReg(3); +static constexpr CReg cr4 = CReg(4); +static constexpr CReg cr5 = CReg(5); +static constexpr CReg cr6 = CReg(6); +static constexpr CReg cr7 = CReg(7); +static constexpr CReg cr8 = CReg(8); +static constexpr CReg cr9 = CReg(9); +static constexpr CReg cr10 = CReg(10); +static constexpr CReg cr11 = CReg(11); +static constexpr CReg cr12 = CReg(12); +static constexpr CReg cr13 = CReg(13); +static constexpr CReg cr14 = CReg(14); +static constexpr CReg cr15 = CReg(15); + +static constexpr DReg dr0 = DReg(0); +static constexpr DReg dr1 = DReg(1); +static constexpr DReg dr2 = DReg(2); +static constexpr DReg dr3 = DReg(3); +static constexpr DReg dr4 = DReg(4); +static constexpr DReg dr5 = DReg(5); +static constexpr DReg dr6 = DReg(6); +static constexpr DReg dr7 = DReg(7); +static constexpr DReg dr8 = DReg(8); +static constexpr DReg dr9 = DReg(9); +static constexpr DReg dr10 = DReg(10); +static constexpr DReg dr11 = DReg(11); +static constexpr DReg dr12 = DReg(12); +static constexpr DReg dr13 = DReg(13); +static constexpr DReg dr14 = DReg(14); +static constexpr DReg dr15 = DReg(15); + +static constexpr St st0 = St(0); +static constexpr St st1 = St(1); +static constexpr St st2 = St(2); +static constexpr St st3 = St(3); +static constexpr St st4 = St(4); +static constexpr St st5 = St(5); +static constexpr St st6 = St(6); +static constexpr St st7 = St(7); + +static constexpr Bnd bnd0 = Bnd(0); +static constexpr Bnd bnd1 = Bnd(1); +static constexpr Bnd bnd2 = Bnd(2); +static constexpr Bnd bnd3 = Bnd(3); + +static constexpr Tmm tmm0 = Tmm(0); +static constexpr Tmm tmm1 = Tmm(1); +static constexpr Tmm tmm2 = Tmm(2); +static constexpr Tmm tmm3 = Tmm(3); +static constexpr Tmm tmm4 = Tmm(4); +static constexpr Tmm tmm5 = Tmm(5); +static constexpr Tmm tmm6 = Tmm(6); +static constexpr Tmm tmm7 = Tmm(7); + +static constexpr Rip rip = Rip(0); + +#ifndef _DOXYGEN +} // {regs} + +// Make `x86::regs` accessible through `x86` namespace as well. +using namespace regs; +#endif + // ============================================================================ // [asmjit::x86::Mem] // ============================================================================ @@ -410,28 +816,65 @@ inline Zmm Vec::zmm() const noexcept { return Zmm(id()); } //! Memory operand. class Mem : public BaseMem { public: - //! Additional bits of operand's signature used by `Mem`. + //! Additional bits of operand's signature used by `x86::Mem`. enum AdditionalBits : uint32_t { - kSignatureMemSegmentShift = 16, - kSignatureMemSegmentMask = 0x07u << kSignatureMemSegmentShift, - - kSignatureMemShiftShift = 19, - kSignatureMemShiftMask = 0x03u << kSignatureMemShiftShift, - + // Memory address type (2 bits). + // |........|........|XX......|........| + kSignatureMemAddrTypeShift = 14, + kSignatureMemAddrTypeMask = 0x03u << kSignatureMemAddrTypeShift, + + // Memory shift amount (2 bits). + // |........|......XX|........|........| + kSignatureMemShiftValueShift = 16, + kSignatureMemShiftValueMask = 0x03u << kSignatureMemShiftValueShift, + + // Memory segment reg (3 bits). + // |........|...XXX..|........|........| + kSignatureMemSegmentShift = 18, + kSignatureMemSegmentMask = 0x07u << kSignatureMemSegmentShift, + + // Memory broadcast type (3 bits). + // |........|XXX.....|........|........| kSignatureMemBroadcastShift = 21, - kSignatureMemBroadcastMask = 0x7u << kSignatureMemBroadcastShift + kSignatureMemBroadcastMask = 0x7u << kSignatureMemBroadcastShift }; + //! Address type. + enum AddrType : uint32_t { + //! Default address type, Assembler will select the best type when necessary. + kAddrTypeDefault = 0, + //! Absolute address type. + kAddrTypeAbs = 1, + //! Relative address type. + kAddrTypeRel = 2 + }; + + //! Memory broadcast type. enum Broadcast : uint32_t { + //! Broadcast {1to1}. kBroadcast1To1 = 0, + //! Broadcast {1to2}. kBroadcast1To2 = 1, + //! Broadcast {1to4}. kBroadcast1To4 = 2, + //! Broadcast {1to8}. kBroadcast1To8 = 3, + //! Broadcast {1to16}. kBroadcast1To16 = 4, + //! Broadcast {1to32}. kBroadcast1To32 = 5, + //! Broadcast {1to64}. kBroadcast1To64 = 6 }; + //! \cond + //! Shortcuts. + enum SignatureMem : uint32_t { + kSignatureMemAbs = kAddrTypeAbs << kSignatureMemAddrTypeShift, + kSignatureMemRel = kAddrTypeRel << kSignatureMemAddrTypeShift + }; + //! \endcond + // -------------------------------------------------------------------------- // [Construction / Destruction] // -------------------------------------------------------------------------- @@ -454,19 +897,19 @@ class Mem : public BaseMem { : BaseMem(Decomposed { Label::kLabelTag, base.id(), 0, 0, off, size, flags }) {} constexpr Mem(const Label& base, const BaseReg& index, uint32_t shift, int32_t off, uint32_t size = 0, uint32_t flags = 0) noexcept - : BaseMem(Decomposed { Label::kLabelTag, base.id(), index.type(), index.id(), off, size, flags | (shift << kSignatureMemShiftShift) }) {} + : BaseMem(Decomposed { Label::kLabelTag, base.id(), index.type(), index.id(), off, size, flags | (shift << kSignatureMemShiftValueShift) }) {} constexpr Mem(const BaseReg& base, int32_t off, uint32_t size = 0, uint32_t flags = 0) noexcept : BaseMem(Decomposed { base.type(), base.id(), 0, 0, off, size, flags }) {} constexpr Mem(const BaseReg& base, const BaseReg& index, uint32_t shift, int32_t off, uint32_t size = 0, uint32_t flags = 0) noexcept - : BaseMem(Decomposed { base.type(), base.id(), index.type(), index.id(), off, size, flags | (shift << kSignatureMemShiftShift) }) {} + : BaseMem(Decomposed { base.type(), base.id(), index.type(), index.id(), off, size, flags | (shift << kSignatureMemShiftValueShift) }) {} constexpr explicit Mem(uint64_t base, uint32_t size = 0, uint32_t flags = 0) noexcept : BaseMem(Decomposed { 0, uint32_t(base >> 32), 0, 0, int32_t(uint32_t(base & 0xFFFFFFFFu)), size, flags }) {} constexpr Mem(uint64_t base, const BaseReg& index, uint32_t shift = 0, uint32_t size = 0, uint32_t flags = 0) noexcept - : BaseMem(Decomposed { 0, uint32_t(base >> 32), index.type(), index.id(), int32_t(uint32_t(base & 0xFFFFFFFFu)), size, flags | (shift << kSignatureMemShiftShift) }) {} + : BaseMem(Decomposed { 0, uint32_t(base >> 32), index.type(), index.id(), int32_t(uint32_t(base & 0xFFFFFFFFu)), size, flags | (shift << kSignatureMemShiftValueShift) }) {} constexpr Mem(Globals::Init_, uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3) noexcept : BaseMem(Globals::Init, u0, u1, u2, u3) {} @@ -513,6 +956,25 @@ class Mem : public BaseMem { setShift(shift); } + //! Returns the address type (see \ref AddrType) of the memory operand. + //! + //! By default, address type of newly created memory operands is always \ref kAddrTypeDefault. + constexpr uint32_t addrType() const noexcept { return _getSignaturePart(); } + //! Sets the address type to `addrType`, see \ref AddrType. + inline void setAddrType(uint32_t addrType) noexcept { _setSignaturePart(addrType); } + //! Resets the address type to \ref kAddrTypeDefault. + inline void resetAddrType() noexcept { _setSignaturePart(0); } + + //! Tests whether the address type is \ref kAddrTypeAbs. + constexpr bool isAbs() const noexcept { return addrType() == kAddrTypeAbs; } + //! Sets the address type to \ref kAddrTypeAbs. + inline void setAbs() noexcept { setAddrType(kAddrTypeAbs); } + + //! Tests whether the address type is \ref kAddrTypeRel. + constexpr bool isRel() const noexcept { return addrType() == kAddrTypeRel; } + //! Sets the address type to \ref kAddrTypeRel. + inline void setRel() noexcept { setAddrType(kAddrTypeRel); } + //! Tests whether the memory operand has a segment override. constexpr bool hasSegment() const noexcept { return _hasSignaturePart(); } //! Returns the associated segment override as `SReg` operand. @@ -528,13 +990,13 @@ class Mem : public BaseMem { inline void resetSegment() noexcept { _setSignaturePart(0); } //! Tests whether the memory operand has shift (aka scale) value. - constexpr bool hasShift() const noexcept { return _hasSignaturePart(); } + constexpr bool hasShift() const noexcept { return _hasSignaturePart(); } //! Returns the memory operand's shift (aka scale) value. - constexpr uint32_t shift() const noexcept { return _getSignaturePart(); } + constexpr uint32_t shift() const noexcept { return _getSignaturePart(); } //! Sets the memory operand's shift (aka scale) value. - inline void setShift(uint32_t shift) noexcept { _setSignaturePart(shift); } + inline void setShift(uint32_t shift) noexcept { _setSignaturePart(shift); } //! Resets the memory operand's shift (aka scale) value to zero. - inline void resetShift() noexcept { _setSignaturePart(0); } + inline void resetShift() noexcept { _setSignaturePart(0); } //! Tests whether the memory operand has broadcast {1tox}. constexpr bool hasBroadcast() const noexcept { return _hasSignaturePart(); } @@ -552,328 +1014,6 @@ class Mem : public BaseMem { inline Mem& operator=(const Mem& other) noexcept = default; }; -// ============================================================================ -// [asmjit::x86::OpData] -// ============================================================================ - -struct OpData { - //! Information about all architecture registers. - ArchRegs archRegs; -}; -ASMJIT_VARAPI const OpData opData; - -//! \cond -// ... Reg methods that require `opData`. -inline uint32_t Reg::groupOf(uint32_t rType) noexcept { - ASMJIT_ASSERT(rType <= BaseReg::kTypeMax); - return opData.archRegs.regInfo[rType].group(); -} - -inline uint32_t Reg::typeIdOf(uint32_t rType) noexcept { - ASMJIT_ASSERT(rType <= BaseReg::kTypeMax); - return opData.archRegs.regTypeToTypeId[rType]; -} - -inline uint32_t Reg::signatureOf(uint32_t rType) noexcept { - ASMJIT_ASSERT(rType <= BaseReg::kTypeMax); - return opData.archRegs.regInfo[rType].signature(); -} -//! \endcond - -// ============================================================================ -// [asmjit::x86::regs] -// ============================================================================ - -namespace regs { - -//! Creates an 8-bit low GPB register operand. -static constexpr GpbLo gpb(uint32_t rId) noexcept { return GpbLo(rId); } -//! Creates an 8-bit low GPB register operand. -static constexpr GpbLo gpb_lo(uint32_t rId) noexcept { return GpbLo(rId); } -//! Creates an 8-bit high GPB register operand. -static constexpr GpbHi gpb_hi(uint32_t rId) noexcept { return GpbHi(rId); } -//! Creates a 16-bit GPW register operand. -static constexpr Gpw gpw(uint32_t rId) noexcept { return Gpw(rId); } -//! Creates a 32-bit GPD register operand. -static constexpr Gpd gpd(uint32_t rId) noexcept { return Gpd(rId); } -//! Creates a 64-bit GPQ register operand (64-bit). -static constexpr Gpq gpq(uint32_t rId) noexcept { return Gpq(rId); } -//! Creates a 128-bit XMM register operand. -static constexpr Xmm xmm(uint32_t rId) noexcept { return Xmm(rId); } -//! Creates a 256-bit YMM register operand. -static constexpr Ymm ymm(uint32_t rId) noexcept { return Ymm(rId); } -//! Creates a 512-bit ZMM register operand. -static constexpr Zmm zmm(uint32_t rId) noexcept { return Zmm(rId); } -//! Creates a 64-bit Mm register operand. -static constexpr Mm mm(uint32_t rId) noexcept { return Mm(rId); } -//! Creates a 64-bit K register operand. -static constexpr KReg k(uint32_t rId) noexcept { return KReg(rId); } -//! Creates a 32-bit or 64-bit control register operand. -static constexpr CReg cr(uint32_t rId) noexcept { return CReg(rId); } -//! Creates a 32-bit or 64-bit debug register operand. -static constexpr DReg dr(uint32_t rId) noexcept { return DReg(rId); } -//! Creates an 80-bit st register operand. -static constexpr St st(uint32_t rId) noexcept { return St(rId); } -//! Creates a 128-bit bound register operand. -static constexpr Bnd bnd(uint32_t rId) noexcept { return Bnd(rId); } - -static constexpr Gp al(GpbLo::kSignature, Gp::kIdAx); -static constexpr Gp bl(GpbLo::kSignature, Gp::kIdBx); -static constexpr Gp cl(GpbLo::kSignature, Gp::kIdCx); -static constexpr Gp dl(GpbLo::kSignature, Gp::kIdDx); -static constexpr Gp spl(GpbLo::kSignature, Gp::kIdSp); -static constexpr Gp bpl(GpbLo::kSignature, Gp::kIdBp); -static constexpr Gp sil(GpbLo::kSignature, Gp::kIdSi); -static constexpr Gp dil(GpbLo::kSignature, Gp::kIdDi); -static constexpr Gp r8b(GpbLo::kSignature, Gp::kIdR8); -static constexpr Gp r9b(GpbLo::kSignature, Gp::kIdR9); -static constexpr Gp r10b(GpbLo::kSignature, Gp::kIdR10); -static constexpr Gp r11b(GpbLo::kSignature, Gp::kIdR11); -static constexpr Gp r12b(GpbLo::kSignature, Gp::kIdR12); -static constexpr Gp r13b(GpbLo::kSignature, Gp::kIdR13); -static constexpr Gp r14b(GpbLo::kSignature, Gp::kIdR14); -static constexpr Gp r15b(GpbLo::kSignature, Gp::kIdR15); - -static constexpr Gp ah(GpbHi::kSignature, Gp::kIdAx); -static constexpr Gp bh(GpbHi::kSignature, Gp::kIdBx); -static constexpr Gp ch(GpbHi::kSignature, Gp::kIdCx); -static constexpr Gp dh(GpbHi::kSignature, Gp::kIdDx); - -static constexpr Gp ax(Gpw::kSignature, Gp::kIdAx); -static constexpr Gp bx(Gpw::kSignature, Gp::kIdBx); -static constexpr Gp cx(Gpw::kSignature, Gp::kIdCx); -static constexpr Gp dx(Gpw::kSignature, Gp::kIdDx); -static constexpr Gp sp(Gpw::kSignature, Gp::kIdSp); -static constexpr Gp bp(Gpw::kSignature, Gp::kIdBp); -static constexpr Gp si(Gpw::kSignature, Gp::kIdSi); -static constexpr Gp di(Gpw::kSignature, Gp::kIdDi); -static constexpr Gp r8w(Gpw::kSignature, Gp::kIdR8); -static constexpr Gp r9w(Gpw::kSignature, Gp::kIdR9); -static constexpr Gp r10w(Gpw::kSignature, Gp::kIdR10); -static constexpr Gp r11w(Gpw::kSignature, Gp::kIdR11); -static constexpr Gp r12w(Gpw::kSignature, Gp::kIdR12); -static constexpr Gp r13w(Gpw::kSignature, Gp::kIdR13); -static constexpr Gp r14w(Gpw::kSignature, Gp::kIdR14); -static constexpr Gp r15w(Gpw::kSignature, Gp::kIdR15); - -static constexpr Gp eax(Gpd::kSignature, Gp::kIdAx); -static constexpr Gp ebx(Gpd::kSignature, Gp::kIdBx); -static constexpr Gp ecx(Gpd::kSignature, Gp::kIdCx); -static constexpr Gp edx(Gpd::kSignature, Gp::kIdDx); -static constexpr Gp esp(Gpd::kSignature, Gp::kIdSp); -static constexpr Gp ebp(Gpd::kSignature, Gp::kIdBp); -static constexpr Gp esi(Gpd::kSignature, Gp::kIdSi); -static constexpr Gp edi(Gpd::kSignature, Gp::kIdDi); -static constexpr Gp r8d(Gpd::kSignature, Gp::kIdR8); -static constexpr Gp r9d(Gpd::kSignature, Gp::kIdR9); -static constexpr Gp r10d(Gpd::kSignature, Gp::kIdR10); -static constexpr Gp r11d(Gpd::kSignature, Gp::kIdR11); -static constexpr Gp r12d(Gpd::kSignature, Gp::kIdR12); -static constexpr Gp r13d(Gpd::kSignature, Gp::kIdR13); -static constexpr Gp r14d(Gpd::kSignature, Gp::kIdR14); -static constexpr Gp r15d(Gpd::kSignature, Gp::kIdR15); - -static constexpr Gp rax(Gpq::kSignature, Gp::kIdAx); -static constexpr Gp rbx(Gpq::kSignature, Gp::kIdBx); -static constexpr Gp rcx(Gpq::kSignature, Gp::kIdCx); -static constexpr Gp rdx(Gpq::kSignature, Gp::kIdDx); -static constexpr Gp rsp(Gpq::kSignature, Gp::kIdSp); -static constexpr Gp rbp(Gpq::kSignature, Gp::kIdBp); -static constexpr Gp rsi(Gpq::kSignature, Gp::kIdSi); -static constexpr Gp rdi(Gpq::kSignature, Gp::kIdDi); -static constexpr Gp r8(Gpq::kSignature, Gp::kIdR8); -static constexpr Gp r9(Gpq::kSignature, Gp::kIdR9); -static constexpr Gp r10(Gpq::kSignature, Gp::kIdR10); -static constexpr Gp r11(Gpq::kSignature, Gp::kIdR11); -static constexpr Gp r12(Gpq::kSignature, Gp::kIdR12); -static constexpr Gp r13(Gpq::kSignature, Gp::kIdR13); -static constexpr Gp r14(Gpq::kSignature, Gp::kIdR14); -static constexpr Gp r15(Gpq::kSignature, Gp::kIdR15); - -static constexpr Xmm xmm0(0); -static constexpr Xmm xmm1(1); -static constexpr Xmm xmm2(2); -static constexpr Xmm xmm3(3); -static constexpr Xmm xmm4(4); -static constexpr Xmm xmm5(5); -static constexpr Xmm xmm6(6); -static constexpr Xmm xmm7(7); -static constexpr Xmm xmm8(8); -static constexpr Xmm xmm9(9); -static constexpr Xmm xmm10(10); -static constexpr Xmm xmm11(11); -static constexpr Xmm xmm12(12); -static constexpr Xmm xmm13(13); -static constexpr Xmm xmm14(14); -static constexpr Xmm xmm15(15); -static constexpr Xmm xmm16(16); -static constexpr Xmm xmm17(17); -static constexpr Xmm xmm18(18); -static constexpr Xmm xmm19(19); -static constexpr Xmm xmm20(20); -static constexpr Xmm xmm21(21); -static constexpr Xmm xmm22(22); -static constexpr Xmm xmm23(23); -static constexpr Xmm xmm24(24); -static constexpr Xmm xmm25(25); -static constexpr Xmm xmm26(26); -static constexpr Xmm xmm27(27); -static constexpr Xmm xmm28(28); -static constexpr Xmm xmm29(29); -static constexpr Xmm xmm30(30); -static constexpr Xmm xmm31(31); - -static constexpr Ymm ymm0(0); -static constexpr Ymm ymm1(1); -static constexpr Ymm ymm2(2); -static constexpr Ymm ymm3(3); -static constexpr Ymm ymm4(4); -static constexpr Ymm ymm5(5); -static constexpr Ymm ymm6(6); -static constexpr Ymm ymm7(7); -static constexpr Ymm ymm8(8); -static constexpr Ymm ymm9(9); -static constexpr Ymm ymm10(10); -static constexpr Ymm ymm11(11); -static constexpr Ymm ymm12(12); -static constexpr Ymm ymm13(13); -static constexpr Ymm ymm14(14); -static constexpr Ymm ymm15(15); -static constexpr Ymm ymm16(16); -static constexpr Ymm ymm17(17); -static constexpr Ymm ymm18(18); -static constexpr Ymm ymm19(19); -static constexpr Ymm ymm20(20); -static constexpr Ymm ymm21(21); -static constexpr Ymm ymm22(22); -static constexpr Ymm ymm23(23); -static constexpr Ymm ymm24(24); -static constexpr Ymm ymm25(25); -static constexpr Ymm ymm26(26); -static constexpr Ymm ymm27(27); -static constexpr Ymm ymm28(28); -static constexpr Ymm ymm29(29); -static constexpr Ymm ymm30(30); -static constexpr Ymm ymm31(31); - -static constexpr Zmm zmm0(0); -static constexpr Zmm zmm1(1); -static constexpr Zmm zmm2(2); -static constexpr Zmm zmm3(3); -static constexpr Zmm zmm4(4); -static constexpr Zmm zmm5(5); -static constexpr Zmm zmm6(6); -static constexpr Zmm zmm7(7); -static constexpr Zmm zmm8(8); -static constexpr Zmm zmm9(9); -static constexpr Zmm zmm10(10); -static constexpr Zmm zmm11(11); -static constexpr Zmm zmm12(12); -static constexpr Zmm zmm13(13); -static constexpr Zmm zmm14(14); -static constexpr Zmm zmm15(15); -static constexpr Zmm zmm16(16); -static constexpr Zmm zmm17(17); -static constexpr Zmm zmm18(18); -static constexpr Zmm zmm19(19); -static constexpr Zmm zmm20(20); -static constexpr Zmm zmm21(21); -static constexpr Zmm zmm22(22); -static constexpr Zmm zmm23(23); -static constexpr Zmm zmm24(24); -static constexpr Zmm zmm25(25); -static constexpr Zmm zmm26(26); -static constexpr Zmm zmm27(27); -static constexpr Zmm zmm28(28); -static constexpr Zmm zmm29(29); -static constexpr Zmm zmm30(30); -static constexpr Zmm zmm31(31); - -static constexpr Mm mm0(0); -static constexpr Mm mm1(1); -static constexpr Mm mm2(2); -static constexpr Mm mm3(3); -static constexpr Mm mm4(4); -static constexpr Mm mm5(5); -static constexpr Mm mm6(6); -static constexpr Mm mm7(7); - -static constexpr KReg k0(0); -static constexpr KReg k1(1); -static constexpr KReg k2(2); -static constexpr KReg k3(3); -static constexpr KReg k4(4); -static constexpr KReg k5(5); -static constexpr KReg k6(6); -static constexpr KReg k7(7); - -static constexpr SReg no_seg(SReg::kIdNone); -static constexpr SReg es(SReg::kIdEs); -static constexpr SReg cs(SReg::kIdCs); -static constexpr SReg ss(SReg::kIdSs); -static constexpr SReg ds(SReg::kIdDs); -static constexpr SReg fs(SReg::kIdFs); -static constexpr SReg gs(SReg::kIdGs); - -static constexpr CReg cr0(0); -static constexpr CReg cr1(1); -static constexpr CReg cr2(2); -static constexpr CReg cr3(3); -static constexpr CReg cr4(4); -static constexpr CReg cr5(5); -static constexpr CReg cr6(6); -static constexpr CReg cr7(7); -static constexpr CReg cr8(8); -static constexpr CReg cr9(9); -static constexpr CReg cr10(10); -static constexpr CReg cr11(11); -static constexpr CReg cr12(12); -static constexpr CReg cr13(13); -static constexpr CReg cr14(14); -static constexpr CReg cr15(15); - -static constexpr DReg dr0(0); -static constexpr DReg dr1(1); -static constexpr DReg dr2(2); -static constexpr DReg dr3(3); -static constexpr DReg dr4(4); -static constexpr DReg dr5(5); -static constexpr DReg dr6(6); -static constexpr DReg dr7(7); -static constexpr DReg dr8(8); -static constexpr DReg dr9(9); -static constexpr DReg dr10(10); -static constexpr DReg dr11(11); -static constexpr DReg dr12(12); -static constexpr DReg dr13(13); -static constexpr DReg dr14(14); -static constexpr DReg dr15(15); - -static constexpr St st0(0); -static constexpr St st1(1); -static constexpr St st2(2); -static constexpr St st3(3); -static constexpr St st4(4); -static constexpr St st5(5); -static constexpr St st6(6); -static constexpr St st7(7); - -static constexpr Bnd bnd0(0); -static constexpr Bnd bnd1(1); -static constexpr Bnd bnd2(2); -static constexpr Bnd bnd3(3); - -static constexpr Rip rip(0); - -} // {regs} - -// Make `x86::regs` accessible through `x86` namespace as well. -using namespace regs; - -// ============================================================================ -// [asmjit::x86::ptr] -// ============================================================================ - //! Creates `[base.reg + offset]` memory operand. static constexpr Mem ptr(const Gp& base, int32_t offset = 0, uint32_t size = 0) noexcept { return Mem(base, offset, size); @@ -920,94 +1060,30 @@ static constexpr Mem ptr(uint64_t base, const Vec& index, uint32_t shift = 0, ui //! Creates `[base]` absolute memory operand (absolute). static constexpr Mem ptr_abs(uint64_t base, uint32_t size = 0) noexcept { - return Mem(base, size, BaseMem::kSignatureMemAbs); + return Mem(base, size, Mem::kSignatureMemAbs); } //! Creates `[base + (index.reg << shift)]` absolute memory operand (absolute). static constexpr Mem ptr_abs(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept { - return Mem(base, index, shift, size, BaseMem::kSignatureMemAbs); + return Mem(base, index, shift, size, Mem::kSignatureMemAbs); } //! Creates `[base + (index.reg << shift)]` absolute memory operand (absolute). static constexpr Mem ptr_abs(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept { - return Mem(base, index, shift, size, BaseMem::kSignatureMemAbs); + return Mem(base, index, shift, size, Mem::kSignatureMemAbs); } //! Creates `[base]` relative memory operand (relative). static constexpr Mem ptr_rel(uint64_t base, uint32_t size = 0) noexcept { - return Mem(base, size, BaseMem::kSignatureMemRel); + return Mem(base, size, Mem::kSignatureMemRel); } //! Creates `[base + (index.reg << shift)]` relative memory operand (relative). static constexpr Mem ptr_rel(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept { - return Mem(base, index, shift, size, BaseMem::kSignatureMemRel); + return Mem(base, index, shift, size, Mem::kSignatureMemRel); } //! Creates `[base + (index.reg << shift)]` relative memory operand (relative). static constexpr Mem ptr_rel(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept { - return Mem(base, index, shift, size, BaseMem::kSignatureMemRel); + return Mem(base, index, shift, size, Mem::kSignatureMemRel); } -#define ASMJIT_MEM_PTR(FUNC, SIZE) \ - /*! Creates `[base + offset]` memory operand. */ \ - static constexpr Mem FUNC(const Gp& base, int32_t offset = 0) noexcept { \ - return Mem(base, offset, SIZE); \ - } \ - /*! Creates `[base + (index << shift) + offset]` memory operand. */ \ - static constexpr Mem FUNC(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \ - return Mem(base, index, shift, offset, SIZE); \ - } \ - /*! Creates `[base + (vec_index << shift) + offset]` memory operand. */ \ - static constexpr Mem FUNC(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \ - return Mem(base, index, shift, offset, SIZE); \ - } \ - /*! Creates `[base + offset]` memory operand. */ \ - static constexpr Mem FUNC(const Label& base, int32_t offset = 0) noexcept { \ - return Mem(base, offset, SIZE); \ - } \ - /*! Creates `[base + (index << shift) + offset]` memory operand. */ \ - static constexpr Mem FUNC(const Label& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \ - return Mem(base, index, shift, offset, SIZE); \ - } \ - /*! Creates `[rip + offset]` memory operand. */ \ - static constexpr Mem FUNC(const Rip& rip_, int32_t offset = 0) noexcept { \ - return Mem(rip_, offset, SIZE); \ - } \ - /*! Creates `[ptr]` memory operand. */ \ - static constexpr Mem FUNC(uint64_t base) noexcept { \ - return Mem(base, SIZE); \ - } \ - /*! Creates `[base + (index << shift) + offset]` memory operand. */ \ - static constexpr Mem FUNC(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \ - return Mem(base, index, shift, SIZE); \ - } \ - /*! Creates `[base + (vec_index << shift) + offset]` memory operand. */ \ - static constexpr Mem FUNC(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \ - return Mem(base, index, shift, SIZE); \ - } \ - \ - /*! Creates `[base + offset]` memory operand (absolute). */ \ - static constexpr Mem FUNC##_abs(uint64_t base) noexcept { \ - return Mem(base, SIZE, BaseMem::kSignatureMemAbs); \ - } \ - /*! Creates `[base + (index << shift) + offset]` memory operand (absolute). */ \ - static constexpr Mem FUNC##_abs(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \ - return Mem(base, index, shift, SIZE, BaseMem::kSignatureMemAbs); \ - } \ - /*! Creates `[base + (vec_index << shift) + offset]` memory operand (absolute). */ \ - static constexpr Mem FUNC##_abs(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \ - return Mem(base, index, shift, SIZE, BaseMem::kSignatureMemAbs); \ - } \ - \ - /*! Creates `[base + offset]` memory operand (relative). */ \ - static constexpr Mem FUNC##_rel(uint64_t base) noexcept { \ - return Mem(base, SIZE, BaseMem::kSignatureMemRel); \ - } \ - /*! Creates `[base + (index << shift) + offset]` memory operand (relative). */ \ - static constexpr Mem FUNC##_rel(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \ - return Mem(base, index, shift, SIZE, BaseMem::kSignatureMemRel); \ - } \ - /*! Creates `[base + (vec_index << shift) + offset]` memory operand (relative). */ \ - static constexpr Mem FUNC##_rel(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \ - return Mem(base, index, shift, SIZE, BaseMem::kSignatureMemRel); \ - } - // Definition of memory operand constructors that use platform independent naming. ASMJIT_MEM_PTR(ptr_8, 1) ASMJIT_MEM_PTR(ptr_16, 2) @@ -1032,8 +1108,6 @@ ASMJIT_MEM_PTR(xmmword_ptr, 16) ASMJIT_MEM_PTR(ymmword_ptr, 32) ASMJIT_MEM_PTR(zmmword_ptr, 64) -#undef ASMJIT_MEM_PTR - //! \} ASMJIT_END_SUB_NAMESPACE @@ -1043,7 +1117,6 @@ ASMJIT_END_SUB_NAMESPACE // ============================================================================ //! \cond INTERNAL - ASMJIT_BEGIN_NAMESPACE ASMJIT_DEFINE_TYPE_ID(x86::Gpb, kIdI8); ASMJIT_DEFINE_TYPE_ID(x86::Gpw, kIdI16); @@ -1054,7 +1127,8 @@ ASMJIT_DEFINE_TYPE_ID(x86::Xmm, kIdI32x4); ASMJIT_DEFINE_TYPE_ID(x86::Ymm, kIdI32x8); ASMJIT_DEFINE_TYPE_ID(x86::Zmm, kIdI32x16); ASMJIT_END_NAMESPACE - //! \endcond +#undef ASMJIT_MEM_PTR + #endif // ASMJIT_X86_X86OPERAND_H_INCLUDED diff --git a/libs/asmjit/src/asmjit/x86/x86rapass.cpp b/libs/asmjit/src/asmjit/x86/x86rapass.cpp index cd6ebb5..faf05dc 100644 --- a/libs/asmjit/src/asmjit/x86/x86rapass.cpp +++ b/libs/asmjit/src/asmjit/x86/x86rapass.cpp @@ -31,7 +31,7 @@ #include "../x86/x86compiler.h" #include "../x86/x86instapi_p.h" #include "../x86/x86instdb_p.h" -#include "../x86/x86internal_p.h" +#include "../x86/x86emithelper_p.h" #include "../x86/x86rapass_p.h" ASMJIT_BEGIN_SUB_NAMESPACE(x86) @@ -86,20 +86,20 @@ static ASMJIT_INLINE uint32_t raMemIndexRwFlags(uint32_t flags) noexcept { } // ============================================================================ -// [asmjit::x86::X86RACFGBuilder] +// [asmjit::x86::RACFGBuilder] // ============================================================================ -class X86RACFGBuilder : public RACFGBuilder { +class RACFGBuilder : public RACFGBuilderT { public: - uint32_t _archId; + uint32_t _arch; bool _is64Bit; bool _avxEnabled; - inline X86RACFGBuilder(X86RAPass* pass) noexcept - : RACFGBuilder(pass), - _archId(pass->cc()->archId()), - _is64Bit(pass->gpSize() == 8), - _avxEnabled(pass->_avxEnabled) { + inline RACFGBuilder(X86RAPass* pass) noexcept + : RACFGBuilderT(pass), + _arch(pass->cc()->arch()), + _is64Bit(pass->registerSize() == 8), + _avxEnabled(pass->avxEnabled()) { } inline Compiler* cc() const noexcept { return static_cast(_cc); } @@ -110,29 +110,30 @@ class X86RACFGBuilder : public RACFGBuilder { Error onInst(InstNode* inst, uint32_t& controlType, RAInstBuilder& ib) noexcept; - Error onBeforeCall(FuncCallNode* call) noexcept; - Error onCall(FuncCallNode* call, RAInstBuilder& ib) noexcept; + Error onBeforeInvoke(InvokeNode* invokeNode) noexcept; + Error onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept; - Error moveImmToRegArg(FuncCallNode* call, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept; - Error moveImmToStackArg(FuncCallNode* call, const FuncValue& arg, const Imm& imm_) noexcept; - Error moveRegToStackArg(FuncCallNode* call, const FuncValue& arg, const BaseReg& reg) noexcept; + Error moveVecToPtr(InvokeNode* invokeNode, const FuncValue& arg, const Vec& src, BaseReg* out) noexcept; + Error moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept; + Error moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept; + Error moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept; Error onBeforeRet(FuncRetNode* funcRet) noexcept; Error onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept; }; // ============================================================================ -// [asmjit::x86::X86RACFGBuilder - OnInst] +// [asmjit::x86::RACFGBuilder - OnInst] // ============================================================================ -Error X86RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuilder& ib) noexcept { +Error RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuilder& ib) noexcept { InstRWInfo rwInfo; uint32_t instId = inst->id(); if (Inst::isDefinedId(instId)) { uint32_t opCount = inst->opCount(); const Operand* opArray = inst->operands(); - ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_archId, inst->baseInst(), opArray, opCount, rwInfo)); + ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_arch, inst->baseInst(), opArray, opCount, &rwInfo)); const InstDB::InstInfo& instInfo = InstDB::infoById(instId); bool hasGpbHiConstraint = false; @@ -343,10 +344,10 @@ Error X86RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuild if (singleRegOps == opCount) { singleRegCase = instInfo.singleRegCase(); } - else if (opCount == 2 && inst->opType(1).isImm()) { + else if (opCount == 2 && inst->op(1).isImm()) { // Handle some tricks used by X86 asm. - const BaseReg& reg = inst->opType(0).as(); - const Imm& imm = inst->opType(1).as(); + const BaseReg& reg = inst->op(0).as(); + const Imm& imm = inst->op(1).as(); const RAWorkReg* workReg = _pass->workRegById(ib[0]->workId()); uint32_t workRegSize = workReg->info().size(); @@ -355,7 +356,7 @@ Error X86RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuild case Inst::kIdOr: { // Sets the value of the destination register to -1, previous content unused. if (reg.size() >= 4 || reg.size() >= workRegSize) { - if (imm.i64() == -1 || imm.u64() == raImmMaskFromSize(reg.size())) + if (imm.value() == -1 || imm.valueAs() == raImmMaskFromSize(reg.size())) singleRegCase = InstDB::kSingleRegWO; } ASMJIT_FALLTHROUGH; @@ -372,7 +373,7 @@ Error X86RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuild case Inst::kIdXor: { // Updates [E|R]FLAGS without changing the content. if (reg.size() != 4 || reg.size() >= workRegSize) { - if (imm.u64() == 0) + if (imm.value() == 0) singleRegCase = InstDB::kSingleRegRO; } break; @@ -399,23 +400,24 @@ Error X86RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuild } // ============================================================================ -// [asmjit::x86::X86RACFGBuilder - OnCall] +// [asmjit::x86::RACFGBuilder - OnInvoke] // ============================================================================ -Error X86RACFGBuilder::onBeforeCall(FuncCallNode* call) noexcept { - uint32_t argCount = call->argCount(); - uint32_t retCount = call->retCount(); - const FuncDetail& fd = call->detail(); +Error RACFGBuilder::onBeforeInvoke(InvokeNode* invokeNode) noexcept { + const FuncDetail& fd = invokeNode->detail(); + uint32_t argCount = invokeNode->argCount(); - cc()->_setCursor(call->prev()); + cc()->_setCursor(invokeNode->prev()); + uint32_t nativeRegType = cc()->_gpRegInfo.type(); for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { - for (uint32_t argHi = 0; argHi <= kFuncArgHi; argHi += kFuncArgHi) { - if (!fd.hasArg(argIndex + argHi)) - continue; + const FuncValuePack& argPack = fd.argPack(argIndex); + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + if (!argPack[valueIndex]) + break; - const FuncValue& arg = fd.arg(argIndex + argHi); - const Operand& op = call->arg(argIndex + argHi); + const FuncValue& arg = argPack[valueIndex]; + const Operand& op = invokeNode->arg(argIndex, valueIndex); if (op.isNone()) continue; @@ -429,79 +431,112 @@ Error X86RACFGBuilder::onBeforeCall(FuncCallNode* call) noexcept { uint32_t regGroup = workReg->group(); uint32_t argGroup = Reg::groupOf(arg.regType()); - if (regGroup != argGroup) { - // TODO: - ASMJIT_ASSERT(false); + if (arg.isIndirect()) { + if (reg.isGp()) { + if (reg.type() != nativeRegType) + return DebugUtils::errored(kErrorInvalidAssignment); + // It's considered allocated if this is an indirect argument and the user used GP. + continue; + } + + BaseReg indirectReg; + moveVecToPtr(invokeNode, arg, reg.as(), &indirectReg); + invokeNode->_args[argIndex][valueIndex] = indirectReg; + } + else { + if (regGroup != argGroup) { + // TODO: Conversion is not supported. + return DebugUtils::errored(kErrorInvalidAssignment); + } } } else { - ASMJIT_PROPAGATE(moveRegToStackArg(call, arg, op.as())); + if (arg.isIndirect()) { + if (reg.isGp()) { + if (reg.type() != nativeRegType) + return DebugUtils::errored(kErrorInvalidAssignment); + + ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg)); + continue; + } + + BaseReg indirectReg; + moveVecToPtr(invokeNode, arg, reg.as(), &indirectReg); + ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, indirectReg)); + } + else { + ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg)); + } } } else if (op.isImm()) { if (arg.isReg()) { BaseReg reg; - ASMJIT_PROPAGATE(moveImmToRegArg(call, arg, op.as(), ®)); - call->_args[argIndex + argHi] = reg; + ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, op.as(), ®)); + invokeNode->_args[argIndex][valueIndex] = reg; } else { - ASMJIT_PROPAGATE(moveImmToStackArg(call, arg, op.as())); + ASMJIT_PROPAGATE(moveImmToStackArg(invokeNode, arg, op.as())); } } } } - cc()->_setCursor(call); + cc()->_setCursor(invokeNode); if (fd.hasFlag(CallConv::kFlagCalleePopsStack)) ASMJIT_PROPAGATE(cc()->sub(cc()->zsp(), fd.argStackSize())); - for (uint32_t retIndex = 0; retIndex < retCount; retIndex++) { - const FuncValue& ret = fd.ret(retIndex); - const Operand& op = call->ret(retIndex); - - if (op.isReg()) { - const Reg& reg = op.as(); - RAWorkReg* workReg; - ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg)); - - if (ret.isReg()) { - if (ret.regType() == Reg::kTypeSt) { - if (workReg->group() != Reg::kGroupVec) - return DebugUtils::errored(kErrorInvalidAssignment); - - Reg dst = Reg(workReg->signature(), workReg->virtId()); - Mem mem; - - uint32_t typeId = Type::baseOf(workReg->typeId()); - if (ret.hasTypeId()) - typeId = ret.typeId(); - - switch (typeId) { - case Type::kIdF32: - ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4)); - mem.setSize(4); - ASMJIT_PROPAGATE(cc()->fstp(mem)); - ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), dst.as(), mem)); - break; + if (fd.hasRet()) { + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + const FuncValue& ret = fd.ret(valueIndex); + if (!ret) + break; - case Type::kIdF64: - ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4)); - mem.setSize(8); - ASMJIT_PROPAGATE(cc()->fstp(mem)); - ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), dst.as(), mem)); - break; + const Operand& op = invokeNode->ret(valueIndex); + if (op.isReg()) { + const Reg& reg = op.as(); + RAWorkReg* workReg; + ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg)); - default: + if (ret.isReg()) { + if (ret.regType() == Reg::kTypeSt) { + if (workReg->group() != Reg::kGroupVec) return DebugUtils::errored(kErrorInvalidAssignment); + + Reg dst = Reg::fromSignatureAndId(workReg->signature(), workReg->virtId()); + Mem mem; + + uint32_t typeId = Type::baseOf(workReg->typeId()); + if (ret.hasTypeId()) + typeId = ret.typeId(); + + switch (typeId) { + case Type::kIdF32: + ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4)); + mem.setSize(4); + ASMJIT_PROPAGATE(cc()->fstp(mem)); + ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), dst.as(), mem)); + break; + + case Type::kIdF64: + ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4)); + mem.setSize(8); + ASMJIT_PROPAGATE(cc()->fstp(mem)); + ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), dst.as(), mem)); + break; + + default: + return DebugUtils::errored(kErrorInvalidAssignment); + } } - } - else { - uint32_t regGroup = workReg->group(); - uint32_t retGroup = Reg::groupOf(ret.regType()); + else { + uint32_t regGroup = workReg->group(); + uint32_t retGroup = Reg::groupOf(ret.regType()); - if (regGroup != retGroup) { - // TODO: - ASMJIT_ASSERT(false); + if (regGroup != retGroup) { + // TODO: Conversion is not supported. + return DebugUtils::errored(kErrorInvalidAssignment); + } } } } @@ -516,18 +551,18 @@ Error X86RACFGBuilder::onBeforeCall(FuncCallNode* call) noexcept { return kErrorOk; } -Error X86RACFGBuilder::onCall(FuncCallNode* call, RAInstBuilder& ib) noexcept { - uint32_t argCount = call->argCount(); - uint32_t retCount = call->retCount(); - const FuncDetail& fd = call->detail(); +Error RACFGBuilder::onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept { + uint32_t argCount = invokeNode->argCount(); + const FuncDetail& fd = invokeNode->detail(); for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { - for (uint32_t argHi = 0; argHi <= kFuncArgHi; argHi += kFuncArgHi) { - if (!fd.hasArg(argIndex + argHi)) + const FuncValuePack& argPack = fd.argPack(argIndex); + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + if (!argPack[valueIndex]) continue; - const FuncValue& arg = fd.arg(argIndex + argHi); - const Operand& op = call->arg(argIndex + argHi); + const FuncValue& arg = argPack[valueIndex]; + const Operand& op = invokeNode->arg(argIndex, valueIndex); if (op.isNone()) continue; @@ -537,7 +572,13 @@ Error X86RACFGBuilder::onCall(FuncCallNode* call, RAInstBuilder& ib) noexcept { RAWorkReg* workReg; ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg)); - if (arg.isReg()) { + if (arg.isIndirect()) { + uint32_t regGroup = workReg->group(); + if (regGroup != BaseReg::kGroupGp) + return DebugUtils::errored(kErrorInvalidState); + ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId())); + } + else if (arg.isReg()) { uint32_t regGroup = workReg->group(); uint32_t argGroup = Reg::groupOf(arg.regType()); @@ -549,11 +590,13 @@ Error X86RACFGBuilder::onCall(FuncCallNode* call, RAInstBuilder& ib) noexcept { } } - for (uint32_t retIndex = 0; retIndex < retCount; retIndex++) { + for (uint32_t retIndex = 0; retIndex < Globals::kMaxValuePack; retIndex++) { const FuncValue& ret = fd.ret(retIndex); - const Operand& op = call->ret(retIndex); + if (!ret) + break; // Not handled here... + const Operand& op = invokeNode->ret(retIndex); if (ret.regType() == Reg::kTypeSt) continue; @@ -586,11 +629,62 @@ Error X86RACFGBuilder::onCall(FuncCallNode* call, RAInstBuilder& ib) noexcept { } // ============================================================================ -// [asmjit::x86::X86RACFGBuilder - MoveImmToRegArg] +// [asmjit::x86::RACFGBuilder - MoveVecToPtr] // ============================================================================ -Error X86RACFGBuilder::moveImmToRegArg(FuncCallNode* call, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept { - DebugUtils::unused(call); +static uint32_t x86VecRegSignatureBySize(uint32_t size) noexcept { + if (size >= 64) + return Zmm::kSignature; + else if (size >= 32) + return Ymm::kSignature; + else + return Xmm::kSignature; +} + +Error RACFGBuilder::moveVecToPtr(InvokeNode* invokeNode, const FuncValue& arg, const Vec& src, BaseReg* out) noexcept { + DebugUtils::unused(invokeNode); + ASMJIT_ASSERT(arg.isReg()); + + uint32_t argSize = Type::sizeOf(arg.typeId()); + if (argSize == 0) + return DebugUtils::errored(kErrorInvalidState); + + if (argSize < 16) + argSize = 16; + + uint32_t argStackOffset = Support::alignUp(invokeNode->detail()._argStackSize, argSize); + _funcNode->frame().updateCallStackAlignment(argSize); + invokeNode->detail()._argStackSize = argStackOffset + argSize; + + Vec vecReg = Vec::fromSignatureAndId(x86VecRegSignatureBySize(argSize), src.id()); + Mem vecPtr = ptr(_pass->_sp.as(), int32_t(argStackOffset)); + + uint32_t vMovInstId = choose(Inst::kIdMovaps, Inst::kIdVmovaps); + if (argSize > 16) + vMovInstId = Inst::kIdVmovaps; + + ASMJIT_PROPAGATE(cc()->_newReg(out, cc()->_gpRegInfo.type(), nullptr)); + + VirtReg* vReg = cc()->virtRegById(out->id()); + vReg->setWeight(BaseRAPass::kCallArgWeight); + + ASMJIT_PROPAGATE(cc()->lea(out->as(), vecPtr)); + ASMJIT_PROPAGATE(cc()->emit(vMovInstId, ptr(out->as()), vecReg)); + + if (arg.isStack()) { + Mem stackPtr = ptr(_pass->_sp.as(), arg.stackOffset()); + ASMJIT_PROPAGATE(cc()->mov(stackPtr, out->as())); + } + + return kErrorOk; +} + +// ============================================================================ +// [asmjit::x86::RACFGBuilder - MoveImmToRegArg] +// ============================================================================ + +Error RACFGBuilder::moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept { + DebugUtils::unused(invokeNode); ASMJIT_ASSERT(arg.isReg()); Imm imm(imm_); @@ -620,27 +714,27 @@ Error X86RACFGBuilder::moveImmToRegArg(FuncCallNode* call, const FuncValue& arg, break; default: - return DebugUtils::errored(kErrorInvalidState); + return DebugUtils::errored(kErrorInvalidAssignment); } - ASMJIT_PROPAGATE(cc()->_newReg(*out, rTypeId, nullptr)); - cc()->virtRegById(out->id())->setWeight(RAPass::kCallArgWeight); + ASMJIT_PROPAGATE(cc()->_newReg(out, rTypeId, nullptr)); + cc()->virtRegById(out->id())->setWeight(BaseRAPass::kCallArgWeight); return cc()->mov(out->as(), imm); } // ============================================================================ -// [asmjit::x86::X86RACFGBuilder - MoveImmToStackArg] +// [asmjit::x86::RACFGBuilder - MoveImmToStackArg] // ============================================================================ -Error X86RACFGBuilder::moveImmToStackArg(FuncCallNode* call, const FuncValue& arg, const Imm& imm_) noexcept { - DebugUtils::unused(call); +Error RACFGBuilder::moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept { + DebugUtils::unused(invokeNode); ASMJIT_ASSERT(arg.isStack()); - Mem mem = ptr(_pass->_sp.as(), arg.stackOffset()); + Mem stackPtr = ptr(_pass->_sp.as(), arg.stackOffset()); Imm imm[2]; - mem.setSize(4); + stackPtr.setSize(4); imm[0] = imm_; uint32_t nMovs = 0; @@ -669,41 +763,41 @@ Error X86RACFGBuilder::moveImmToStackArg(FuncCallNode* call, const FuncValue& ar case Type::kIdMmx32: case Type::kIdMmx64: if (_is64Bit && imm[0].isInt32()) { - mem.setSize(8); + stackPtr.setSize(8); nMovs = 1; break; } - imm[1].setU32(imm[0].u32Hi()); + imm[1].setValue(imm[0].uint32Hi()); imm[0].zeroExtend32Bits(); nMovs = 2; break; default: - return DebugUtils::errored(kErrorInvalidState); + return DebugUtils::errored(kErrorInvalidAssignment); } for (uint32_t i = 0; i < nMovs; i++) { - ASMJIT_PROPAGATE(cc()->mov(mem, imm[i])); - mem.addOffsetLo32(int32_t(mem.size())); + ASMJIT_PROPAGATE(cc()->mov(stackPtr, imm[i])); + stackPtr.addOffsetLo32(int32_t(stackPtr.size())); } return kErrorOk; } // ============================================================================ -// [asmjit::x86::X86RACFGBuilder - MoveRegToStackArg] +// [asmjit::x86::RACFGBuilder - MoveRegToStackArg] // ============================================================================ -Error X86RACFGBuilder::moveRegToStackArg(FuncCallNode* call, const FuncValue& arg, const BaseReg& reg) noexcept { - DebugUtils::unused(call); +Error RACFGBuilder::moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept { + DebugUtils::unused(invokeNode); ASMJIT_ASSERT(arg.isStack()); - Mem mem = ptr(_pass->_sp.as(), arg.stackOffset()); + Mem stackPtr = ptr(_pass->_sp.as(), arg.stackOffset()); Reg r0, r1; VirtReg* vr = cc()->virtRegById(reg.id()); - uint32_t gpSize = cc()->gpSize(); + uint32_t registerSize = cc()->registerSize(); uint32_t instId = 0; uint32_t dstTypeId = arg.typeId(); @@ -812,82 +906,96 @@ Error X86RACFGBuilder::moveRegToStackArg(FuncCallNode* call, const FuncValue& ar break; default: - // TODO: Vector types by stack. + if (Type::isVec(dstTypeId) && reg.as().isVec()) { + stackPtr.setSize(Type::sizeOf(dstTypeId)); + uint32_t vMovInstId = choose(Inst::kIdMovaps, Inst::kIdVmovaps); + + if (Type::isVec128(dstTypeId)) + r0.setRegT(reg.id()); + else if (Type::isVec256(dstTypeId)) + r0.setRegT(reg.id()); + else if (Type::isVec512(dstTypeId)) + r0.setRegT(reg.id()); + else + break; + + return cc()->emit(vMovInstId, stackPtr, r0); + } break; } - return DebugUtils::errored(kErrorInvalidState); + return DebugUtils::errored(kErrorInvalidAssignment); // Extend+Move Gp. ExtendMovGpD: - mem.setSize(4); + stackPtr.setSize(4); r0.setRegT(reg.id()); ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1)); - ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, mem, r0)); + ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0)); return kErrorOk; ExtendMovGpXQ: - if (gpSize == 8) { - mem.setSize(8); + if (registerSize == 8) { + stackPtr.setSize(8); r0.setRegT(reg.id()); ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1)); - ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, mem, r0)); + ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0)); } else { - mem.setSize(4); + stackPtr.setSize(4); r0.setRegT(reg.id()); ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1)); ExtendMovGpDQ: - ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, mem, r0)); - mem.addOffsetLo32(4); - ASMJIT_PROPAGATE(cc()->emit(Inst::kIdAnd, mem, 0)); + ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, stackPtr, r0)); + stackPtr.addOffsetLo32(4); + ASMJIT_PROPAGATE(cc()->emit(Inst::kIdAnd, stackPtr, 0)); } return kErrorOk; ZeroExtendGpDQ: - mem.setSize(4); + stackPtr.setSize(4); r0.setRegT(reg.id()); goto ExtendMovGpDQ; MovGpD: - mem.setSize(4); + stackPtr.setSize(4); r0.setRegT(reg.id()); - return cc()->emit(Inst::kIdMov, mem, r0); + return cc()->emit(Inst::kIdMov, stackPtr, r0); MovGpQ: - mem.setSize(8); + stackPtr.setSize(8); r0.setRegT(reg.id()); - return cc()->emit(Inst::kIdMov, mem, r0); + return cc()->emit(Inst::kIdMov, stackPtr, r0); MovMmD: - mem.setSize(4); + stackPtr.setSize(4); r0.setRegT(reg.id()); - return cc()->emit(choose(Inst::kIdMovd, Inst::kIdVmovd), mem, r0); + return cc()->emit(choose(Inst::kIdMovd, Inst::kIdVmovd), stackPtr, r0); MovMmQ: - mem.setSize(8); + stackPtr.setSize(8); r0.setRegT(reg.id()); - return cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), mem, r0); + return cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), stackPtr, r0); MovXmmD: - mem.setSize(4); + stackPtr.setSize(4); r0.setRegT(reg.id()); - return cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), mem, r0); + return cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), stackPtr, r0); MovXmmQ: - mem.setSize(8); + stackPtr.setSize(8); r0.setRegT(reg.id()); - return cc()->emit(choose(Inst::kIdMovlps, Inst::kIdVmovlps), mem, r0); + return cc()->emit(choose(Inst::kIdMovlps, Inst::kIdVmovlps), stackPtr, r0); } // ============================================================================ -// [asmjit::x86::X86RACFGBuilder - OnReg] +// [asmjit::x86::RACFGBuilder - OnReg] // ============================================================================ -Error X86RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept { +Error RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept { const FuncDetail& funcDetail = _pass->func()->detail(); const Operand* opArray = funcRet->operands(); uint32_t opCount = funcRet->opCount(); @@ -912,7 +1020,7 @@ Error X86RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept { if (workReg->group() != Reg::kGroupVec) return DebugUtils::errored(kErrorInvalidAssignment); - Reg src = Reg(workReg->signature(), workReg->virtId()); + Reg src = Reg::fromSignatureAndId(workReg->signature(), workReg->virtId()); Mem mem; uint32_t typeId = Type::baseOf(workReg->typeId()); @@ -944,7 +1052,7 @@ Error X86RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept { return kErrorOk; } -Error X86RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept { +Error RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept { const FuncDetail& funcDetail = _pass->func()->detail(); const Operand* opArray = funcRet->operands(); uint32_t opCount = funcRet->opCount(); @@ -988,8 +1096,7 @@ Error X86RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept { // ============================================================================ X86RAPass::X86RAPass() noexcept - : RAPass(), - _avxEnabled(false) {} + : BaseRAPass() { _iEmitHelper = &_emitHelper; } X86RAPass::~X86RAPass() noexcept {} // ============================================================================ @@ -997,12 +1104,13 @@ X86RAPass::~X86RAPass() noexcept {} // ============================================================================ void X86RAPass::onInit() noexcept { - uint32_t archId = cc()->archId(); - uint32_t baseRegCount = archId == ArchInfo::kIdX86 ? 8u : 16u; + uint32_t arch = cc()->arch(); + uint32_t baseRegCount = Environment::is32Bit(arch) ? 8u : 16u; - _archRegsInfo = &opData.archRegs; - _archTraits[Reg::kGroupGp] |= RAArchTraits::kHasSwap; + _emitHelper._emitter = _cb; + _emitHelper._avxEnabled = _func->frame().isAvxEnabled(); + _archTraits = &ArchTraits::byArch(arch); _physRegCount.set(Reg::kGroupGp , baseRegCount); _physRegCount.set(Reg::kGroupVec , baseRegCount); _physRegCount.set(Reg::kGroupMm , 8); @@ -1027,7 +1135,6 @@ void X86RAPass::onInit() noexcept { _sp = cc()->zsp(); _fp = cc()->zbp(); - _avxEnabled = _func->frame().isAvxEnabled(); } void X86RAPass::onDone() noexcept {} @@ -1037,17 +1144,17 @@ void X86RAPass::onDone() noexcept {} // ============================================================================ Error X86RAPass::buildCFG() noexcept { - return X86RACFGBuilder(this).run(); + return RACFGBuilder(this).run(); } // ============================================================================ // [asmjit::x86::X86RAPass - OnEmit] // ============================================================================ -Error X86RAPass::onEmitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept { +Error X86RAPass::emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept { RAWorkReg* wReg = workRegById(workId); - BaseReg dst(wReg->info().signature(), dstPhysId); - BaseReg src(wReg->info().signature(), srcPhysId); + BaseReg dst = BaseReg::fromSignatureAndId(wReg->info().signature(), dstPhysId); + BaseReg src = BaseReg::fromSignatureAndId(wReg->info().signature(), srcPhysId); const char* comment = nullptr; @@ -1058,10 +1165,10 @@ Error X86RAPass::onEmitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhy } #endif - return X86Internal::emitRegMove(cc()->as(), dst, src, wReg->typeId(), _avxEnabled, comment); + return _emitHelper.emitRegMove(dst, src, wReg->typeId(), comment); } -Error X86RAPass::onEmitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept { +Error X86RAPass::emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept { RAWorkReg* waReg = workRegById(aWorkId); RAWorkReg* wbReg = workRegById(bWorkId); @@ -1076,13 +1183,15 @@ Error X86RAPass::onEmitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId } #endif - return cc()->emit(Inst::kIdXchg, Reg(sign, aPhysId), Reg(sign, bPhysId)); + return cc()->emit(Inst::kIdXchg, + Reg::fromSignatureAndId(sign, aPhysId), + Reg::fromSignatureAndId(sign, bPhysId)); } -Error X86RAPass::onEmitLoad(uint32_t workId, uint32_t dstPhysId) noexcept { +Error X86RAPass::emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept { RAWorkReg* wReg = workRegById(workId); - BaseReg dstReg(wReg->info().signature(), dstPhysId); - BaseMem srcMem(workRegAsMem(wReg)); + BaseReg dstReg = BaseReg::fromSignatureAndId(wReg->info().signature(), dstPhysId); + BaseMem srcMem = BaseMem(workRegAsMem(wReg)); const char* comment = nullptr; @@ -1093,13 +1202,13 @@ Error X86RAPass::onEmitLoad(uint32_t workId, uint32_t dstPhysId) noexcept { } #endif - return X86Internal::emitRegMove(cc()->as(), dstReg, srcMem, wReg->typeId(), _avxEnabled, comment); + return _emitHelper.emitRegMove(dstReg, srcMem, wReg->typeId(), comment); } -Error X86RAPass::onEmitSave(uint32_t workId, uint32_t srcPhysId) noexcept { +Error X86RAPass::emitSave(uint32_t workId, uint32_t srcPhysId) noexcept { RAWorkReg* wReg = workRegById(workId); - BaseMem dstMem(workRegAsMem(wReg)); - BaseReg srcReg(wReg->info().signature(), srcPhysId); + BaseMem dstMem = BaseMem(workRegAsMem(wReg)); + BaseReg srcReg = BaseReg::fromSignatureAndId(wReg->info().signature(), srcPhysId); const char* comment = nullptr; @@ -1110,28 +1219,29 @@ Error X86RAPass::onEmitSave(uint32_t workId, uint32_t srcPhysId) noexcept { } #endif - return X86Internal::emitRegMove(cc()->as(), dstMem, srcReg, wReg->typeId(), _avxEnabled, comment); + return _emitHelper.emitRegMove(dstMem, srcReg, wReg->typeId(), comment); } -Error X86RAPass::onEmitJump(const Label& label) noexcept { +Error X86RAPass::emitJump(const Label& label) noexcept { return cc()->jmp(label); } -Error X86RAPass::onEmitPreCall(FuncCallNode* call) noexcept { - if (call->detail().hasVarArgs()) { - uint32_t argCount = call->argCount(); - const FuncDetail& fd = call->detail(); +Error X86RAPass::emitPreCall(InvokeNode* invokeNode) noexcept { + if (invokeNode->detail().hasVarArgs() && cc()->is64Bit()) { + const FuncDetail& fd = invokeNode->detail(); + uint32_t argCount = invokeNode->argCount(); - switch (call->detail().callConv().id()) { - case CallConv::kIdX86SysV64: { + switch (invokeNode->detail().callConv().id()) { + case CallConv::kIdX64SystemV: { // AL register contains the number of arguments passed in XMM register(s). uint32_t n = 0; for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { - for (uint32_t argHi = 0; argHi <= kFuncArgHi; argHi += kFuncArgHi) { - if (!fd.hasArg(argIndex + argHi)) - continue; + const FuncValuePack& argPack = fd.argPack(argIndex); + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + const FuncValue& arg = argPack[valueIndex]; + if (!arg) + break; - const FuncValue& arg = fd.arg(argIndex + argHi); if (arg.isReg() && Reg::groupOf(arg.regType()) == Reg::kGroupVec) n++; } @@ -1144,14 +1254,15 @@ Error X86RAPass::onEmitPreCall(FuncCallNode* call) noexcept { break; } - case CallConv::kIdX86Win64: { + case CallConv::kIdX64Windows: { // Each double-precision argument passed in XMM must be also passed in GP. for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) { - for (uint32_t argHi = 0; argHi <= kFuncArgHi; argHi += kFuncArgHi) { - if (!fd.hasArg(argIndex + argHi)) - continue; + const FuncValuePack& argPack = fd.argPack(argIndex); + for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) { + const FuncValue& arg = argPack[valueIndex]; + if (!arg) + break; - const FuncValue& arg = fd.arg(argIndex + argHi); if (arg.isReg() && Reg::groupOf(arg.regType()) == Reg::kGroupVec) { Gp dst = gpq(fd.callConv().passedOrder(Reg::kGroupGp)[argIndex]); Xmm src = xmm(arg.regId()); @@ -1161,6 +1272,9 @@ Error X86RAPass::onEmitPreCall(FuncCallNode* call) noexcept { } break; } + + default: + return DebugUtils::errored(kErrorInvalidState); } } diff --git a/libs/asmjit/src/asmjit/x86/x86rapass_p.h b/libs/asmjit/src/asmjit/x86/x86rapass_p.h index 4fa688b..efcfd3c 100644 --- a/libs/asmjit/src/asmjit/x86/x86rapass_p.h +++ b/libs/asmjit/src/asmjit/x86/x86rapass_p.h @@ -32,17 +32,12 @@ #include "../core/rapass_p.h" #include "../x86/x86assembler.h" #include "../x86/x86compiler.h" +#include "../x86/x86emithelper_p.h" ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! \cond INTERNAL - -//! \defgroup asmjit_x86_ra X86 RA -//! \ingroup asmjit_x86 -//! -//! \brief X86/X64 register allocation. - -//! \addtogroup asmjit_x86_ra +//! \addtogroup asmjit_x86 //! \{ // ============================================================================ @@ -53,12 +48,12 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! //! Takes care of generating function prologs and epilogs, and also performs //! register allocation. -class X86RAPass : public RAPass { +class X86RAPass : public BaseRAPass { public: ASMJIT_NONCOPYABLE(X86RAPass) - typedef RAPass Base; + typedef BaseRAPass Base; - bool _avxEnabled; + EmitHelper _emitHelper; // -------------------------------------------------------------------------- // [Construction / Destruction] @@ -74,12 +69,17 @@ class X86RAPass : public RAPass { //! Returns the compiler casted to `x86::Compiler`. inline Compiler* cc() const noexcept { return static_cast(_cb); } + //! Returns emit helper. + inline EmitHelper* emitHelper() noexcept { return &_emitHelper; } + // -------------------------------------------------------------------------- // [Utilities] // -------------------------------------------------------------------------- + inline bool avxEnabled() const noexcept { return _emitHelper._avxEnabled; } + inline uint32_t choose(uint32_t sseInstId, uint32_t avxInstId) noexcept { - return _avxEnabled ? avxInstId : sseInstId; + return avxEnabled() ? avxInstId : sseInstId; } // -------------------------------------------------------------------------- @@ -99,14 +99,14 @@ class X86RAPass : public RAPass { // [Emit] // -------------------------------------------------------------------------- - Error onEmitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override; - Error onEmitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override; + Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override; + Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override; - Error onEmitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override; - Error onEmitSave(uint32_t workId, uint32_t srcPhysId) noexcept override; + Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override; + Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept override; - Error onEmitJump(const Label& label) noexcept override; - Error onEmitPreCall(FuncCallNode* node) noexcept override; + Error emitJump(const Label& label) noexcept override; + Error emitPreCall(InvokeNode* invokeNode) noexcept override; }; //! \} diff --git a/libs/asmjit/test/asmjit_bench_x86.cpp b/libs/asmjit/test/asmjit_bench_x86.cpp index 95f792d..590419c 100644 --- a/libs/asmjit/test/asmjit_bench_x86.cpp +++ b/libs/asmjit/test/asmjit_bench_x86.cpp @@ -21,7 +21,12 @@ // misrepresented as being the original software. // 3. This notice may not be removed or altered from any source distribution. +#include + +#ifdef ASMJIT_BUILD_X86 #include +#endif + #include #include @@ -37,7 +42,7 @@ using namespace asmjit; // [Configuration] // ============================================================================ -static constexpr uint32_t kNumRepeats = 25; +static constexpr uint32_t kNumRepeats = 20; static constexpr uint32_t kNumIterations = 1000; // ============================================================================ @@ -80,12 +85,12 @@ namespace BenchUtils { } template - static void bench(CodeHolder& code, uint32_t archId, const char* testName, const FuncT& func) noexcept { + static void bench(CodeHolder& code, uint32_t arch, const char* testName, const FuncT& func) noexcept { EmitterT emitter; const char* archName = - archId == ArchInfo::kIdX86 ? "X86" : - archId == ArchInfo::kIdX64 ? "X64" : "???"; + arch == Environment::kArchX86 ? "X86" : + arch == Environment::kArchX64 ? "X64" : "???"; const char* emitterName = emitter.isAssembler() ? "Assembler" : @@ -95,14 +100,13 @@ namespace BenchUtils { Performance perf; uint64_t codeSize = 0; - CodeInfo codeInfo(archId); - codeInfo.setCdeclCallConv(archId == ArchInfo::kIdX86 ? CallConv::kIdX86CDecl : CallConv::kIdX86SysV64); + Environment env(arch); for (uint32_t r = 0; r < kNumRepeats; r++) { perf.start(); codeSize = 0; for (uint32_t i = 0; i < kNumIterations; i++) { - code.init(codeInfo); + code.init(env); code.attach(&emitter); func(emitter); @@ -113,7 +117,7 @@ namespace BenchUtils { perf.end(); } - printf("[%s] %-9s %-8s | Time:%6u [ms] | ", archName, emitterName, testName, perf.best); + printf("[%s] %-9s %-10s | Time:%6u [ms] | ", archName, emitterName, testName, perf.best); if (codeSize) printf("Speed: %7.3f [MB/s]", mbps(perf.best, codeSize)); else @@ -127,30 +131,35 @@ namespace BenchUtils { // ============================================================================ #ifdef ASMJIT_BUILD_X86 -static void benchX86(uint32_t archId) noexcept { +static void benchX86(uint32_t arch) noexcept { CodeHolder code; - BenchUtils::bench(code, archId, "[raw]", [](x86::Assembler& a) { + BenchUtils::bench(code, arch, "[fast]", [](x86::Assembler& a) { + asmtest::generateOpcodes(a.as()); + }); + + BenchUtils::bench(code, arch, "[validate]", [](x86::Assembler& a) { + a.addValidationOptions(BaseEmitter::kValidationOptionAssembler); asmtest::generateOpcodes(a.as()); }); #ifndef ASMJIT_NO_BUILDER - BenchUtils::bench(code, archId, "[raw]", [](x86::Builder& cb) { + BenchUtils::bench(code, arch, "[no-asm]", [](x86::Builder& cb) { asmtest::generateOpcodes(cb.as()); }); - BenchUtils::bench(code, archId, "[final]", [](x86::Builder& cb) { + BenchUtils::bench(code, arch, "[asm]", [](x86::Builder& cb) { asmtest::generateOpcodes(cb.as()); cb.finalize(); }); #endif #ifndef ASMJIT_NO_COMPILER - BenchUtils::bench(code, archId, "[raw]", [](x86::Compiler& cc) { + BenchUtils::bench(code, arch, "[no-asm]", [](x86::Compiler& cc) { asmtest::generateAlphaBlend(cc); }); - BenchUtils::bench(code, archId, "[final]", [](x86::Compiler& cc) { + BenchUtils::bench(code, arch, "[asm]", [](x86::Compiler& cc) { asmtest::generateAlphaBlend(cc); cc.finalize(); }); @@ -160,8 +169,8 @@ static void benchX86(uint32_t archId) noexcept { int main() { #ifdef ASMJIT_BUILD_X86 - benchX86(ArchInfo::kIdX86); - benchX86(ArchInfo::kIdX64); + benchX86(Environment::kArchX86); + benchX86(Environment::kArchX64); #endif return 0; diff --git a/libs/asmjit/test/asmjit_test_compiler.cpp b/libs/asmjit/test/asmjit_test_compiler.cpp new file mode 100644 index 0000000..6a83e7c --- /dev/null +++ b/libs/asmjit/test/asmjit_test_compiler.cpp @@ -0,0 +1,247 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include + +#include +#include +#include + +#include +#include + +#include "./cmdline.h" +#include "./asmjit_test_compiler.h" + +#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86 +#include +void compiler_add_x86_tests(TestApp& app); +#endif + +#if defined(ASMJIT_BUILD_ARM) && ASMJIT_ARCH_ARM == 64 +#include +void compiler_add_a64_tests(TestApp& app); +#endif + +#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86 + #define ASMJIT_HAVE_WORKING_JIT +#endif + +#if defined(ASMJIT_BUILD_ARM) && ASMJIT_ARCH_ARM == 64 + #define ASMJIT_HAVE_WORKING_JIT +#endif + +using namespace asmjit; + +// ============================================================================ +// [TestApp] +// ============================================================================ + +static const char* archAsString(uint32_t arch) { + switch (arch) { + case Environment::kArchX86: return "X86"; + case Environment::kArchX64: return "X64"; + case Environment::kArchARM: return "A32"; + case Environment::kArchThumb: return "T32"; + case Environment::kArchAArch64: return "A64"; + default: return "Unknown"; + } +} + +int TestApp::handleArgs(int argc, const char* const* argv) { + CmdLine cmd(argc, argv); + + if (cmd.hasArg("--verbose")) _verbose = true; + if (cmd.hasArg("--dump-asm")) _dumpAsm = true; + if (cmd.hasArg("--dump-hex")) _dumpHex = true; + + return 0; +} + +void TestApp::showInfo() { + printf("AsmJit Compiler Test-Suite v%u.%u.%u [Arch=%s]:\n", + unsigned((ASMJIT_LIBRARY_VERSION >> 16) ), + unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF), + unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF), + archAsString(Environment::kArchHost)); + printf(" [%s] Verbose (use --verbose to turn verbose output ON)\n", _verbose ? "x" : " "); + printf(" [%s] DumpAsm (use --dump-asm to turn assembler dumps ON)\n", _dumpAsm ? "x" : " "); + printf(" [%s] DumpHex (use --dump-hex to dump binary in hexadecimal)\n", _dumpHex ? "x" : " "); + printf("\n"); +} + +int TestApp::run() { +#ifndef ASMJIT_HAVE_WORKING_JIT + return 0; +#else +#ifndef ASMJIT_NO_LOGGING + uint32_t kFormatFlags = FormatOptions::kFlagMachineCode | + FormatOptions::kFlagExplainImms | + FormatOptions::kFlagRegCasts | + FormatOptions::kFlagAnnotations | + FormatOptions::kFlagDebugPasses | + FormatOptions::kFlagDebugRA ; + + FileLogger fileLogger(stdout); + fileLogger.addFlags(kFormatFlags); + + StringLogger stringLogger; + stringLogger.addFlags(kFormatFlags); +#endif + + for (std::unique_ptr& test : _tests) { + JitRuntime runtime; + CodeHolder code; + SimpleErrorHandler errorHandler; + + code.init(runtime.environment()); + code.setErrorHandler(&errorHandler); + +#ifndef ASMJIT_NO_LOGGING + if (_verbose) { + code.setLogger(&fileLogger); + } + else { + stringLogger.clear(); + code.setLogger(&stringLogger); + } +#endif + + printf("[Test] %s", test->name()); + +#ifndef ASMJIT_NO_LOGGING + if (_verbose) printf("\n"); +#endif + +#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86 + x86::Compiler cc(&code); +#endif + +#if defined(ASMJIT_BUILD_ARM) && ASMJIT_ARCH_ARM == 64 + arm::Compiler cc(&code); +#endif + + test->compile(cc); + + void* func = nullptr; + Error err = errorHandler._err; + + if (!err) + err = cc.finalize(); + +#ifndef ASMJIT_NO_LOGGING + if (_dumpAsm) { + if (!_verbose) printf("\n"); + + String sb; + Formatter::formatNodeList(sb, kFormatFlags, &cc); + printf("%s", sb.data()); + } +#endif + + if (err == kErrorOk) + err = runtime.add(&func, &code); + + if (err == kErrorOk && _dumpHex) { + String sb; + sb.appendHex((void*)func, code.codeSize()); + printf("\n (HEX: %s)\n", sb.data()); + } + + if (_verbose) + fflush(stdout); + + if (err == kErrorOk) { + _outputSize += code.codeSize(); + + StringTmp<128> result; + StringTmp<128> expect; + + if (test->run(func, result, expect)) { + if (!_verbose) printf(" [OK]\n"); + } + else { + if (!_verbose) printf(" [FAILED]\n"); + +#ifndef ASMJIT_NO_LOGGING + if (!_verbose) printf("%s", stringLogger.data()); +#endif + + printf("[Status]\n"); + printf(" Returned: %s\n", result.data()); + printf(" Expected: %s\n", expect.data()); + + _nFailed++; + } + + if (_dumpAsm) + printf("\n"); + + runtime.release(func); + } + else { + if (!_verbose) printf(" [FAILED]\n"); + +#ifndef ASMJIT_NO_LOGGING + if (!_verbose) printf("%s", stringLogger.data()); +#endif + + printf("[Status]\n"); + printf(" ERROR 0x%08X: %s\n", unsigned(err), errorHandler._message.data()); + + _nFailed++; + } + } + + if (_nFailed == 0) + printf("\nSuccess:\n All %u tests passed\n", unsigned(_tests.size())); + else + printf("\nFailure:\n %u %s of %u failed\n", _nFailed, _nFailed == 1 ? "test" : "tests", unsigned(_tests.size())); + + printf(" OutputSize=%zu\n", _outputSize); + printf("\n"); + + return _nFailed == 0 ? 0 : 1; +#endif +} + +// ============================================================================ +// [Main] +// ============================================================================ + +int main(int argc, char* argv[]) { + TestApp app; + + app.handleArgs(argc, argv); + app.showInfo(); + +#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86 + compiler_add_x86_tests(app); +#endif + +#if defined(ASMJIT_BUILD_ARM) && ASMJIT_ARCH_ARM == 64 + compiler_add_a64_tests(app); +#endif + + return app.run(); +} diff --git a/libs/asmjit/test/asmjit_test_compiler.h b/libs/asmjit/test/asmjit_test_compiler.h new file mode 100644 index 0000000..5933e4a --- /dev/null +++ b/libs/asmjit/test/asmjit_test_compiler.h @@ -0,0 +1,103 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#ifndef ASMJIT_TEST_COMPILER_H_INCLUDED +#define ASMJIT_TEST_COMPILER_H_INCLUDED + +#include + +#include +#include + +// ============================================================================ +// [SimpleErrorHandler] +// ============================================================================ + +class SimpleErrorHandler : public asmjit::ErrorHandler { +public: + SimpleErrorHandler() + : _err(asmjit::kErrorOk) {} + + virtual void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) { + asmjit::DebugUtils::unused(origin); + _err = err; + _message.assign(message); + } + + asmjit::Error _err; + asmjit::String _message; +}; + +// ============================================================================ +// [TestCase] +// ============================================================================ + +//! A test case interface for testing AsmJit's Compiler. +class TestCase { +public: + TestCase(const char* name = nullptr) { + if (name) + _name.assign(name); + } + + virtual ~TestCase() {} + + inline const char* name() const { return _name.data(); } + + virtual void compile(asmjit::BaseCompiler& cc) = 0; + virtual bool run(void* func, asmjit::String& result, asmjit::String& expect) = 0; + + asmjit::String _name; +}; + +// ============================================================================ +// [TestApp] +// ============================================================================ + +class TestApp { +public: + std::vector> _tests; + + unsigned _nFailed = 0; + size_t _outputSize = 0; + + bool _verbose = false; + bool _dumpAsm = false; + bool _dumpHex = false; + + TestApp() noexcept {} + ~TestApp() noexcept {} + + void add(TestCase* test) noexcept { + _tests.push_back(std::unique_ptr(test)); + } + + template + inline void addT() { T::add(*this); } + + int handleArgs(int argc, const char* const* argv); + void showInfo(); + int run(); +}; + +#endif // ASMJIT_TEST_COMPILER_H_INCLUDED diff --git a/libs/asmjit/test/asmjit_test_compiler_x86.cpp b/libs/asmjit/test/asmjit_test_compiler_x86.cpp new file mode 100644 index 0000000..4caa7d8 --- /dev/null +++ b/libs/asmjit/test/asmjit_test_compiler_x86.cpp @@ -0,0 +1,4238 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include +#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86 + +#include +#include +#include +#include +#include + +// Required for function tests that pass / return XMM registers. +#include + +#include "./asmjit_test_misc.h" +#include "./asmjit_test_compiler.h" + +#ifdef _MSC_VER +// Interaction between '_setjmp' and C++ object destruction is non-portable. +#pragma warning(disable: 4611) +#endif + +using namespace asmjit; + +// ============================================================================ +// [X86TestCase] +// ============================================================================ + +class X86TestCase : public TestCase { +public: + X86TestCase(const char* name = nullptr) + : TestCase(name) {} + + virtual void compile(BaseCompiler& cc) override { + compile(static_cast(cc)); + } + + virtual void compile(x86::Compiler& cc) = 0; +}; + +// ============================================================================ +// [X86Test_AlignBase] +// ============================================================================ + +class X86Test_AlignBase : public X86TestCase { +public: + X86Test_AlignBase(uint32_t argCount, uint32_t alignment, bool preserveFP) + : _argCount(argCount), + _alignment(alignment), + _preserveFP(preserveFP) { + _name.assignFormat("AlignBase {NumArgs=%u Alignment=%u PreserveFP=%c}", argCount, alignment, preserveFP ? 'Y' : 'N'); + } + + static void add(TestApp& app) { + for (uint32_t i = 0; i <= 16; i++) { + for (uint32_t a = 16; a <= 32; a += 16) { + app.add(new X86Test_AlignBase(i, a, true)); + app.add(new X86Test_AlignBase(i, a, false)); + } + } + } + + virtual void compile(x86::Compiler& cc) { + uint32_t i; + uint32_t argCount = _argCount; + + FuncSignatureBuilder signature(CallConv::kIdHost); + signature.setRetT(); + for (i = 0; i < argCount; i++) + signature.addArgT(); + + cc.addFunc(signature); + if (_preserveFP) + cc.func()->frame().setPreservedFP(); + + x86::Gp gpVar = cc.newIntPtr("gpVar"); + x86::Gp gpSum; + x86::Mem stack = cc.newStack(_alignment, _alignment); + + // Do a sum of arguments to verify a possible relocation when misaligned. + if (argCount) { + for (i = 0; i < argCount; i++) { + x86::Gp gpArg = cc.newInt32("gpArg%u", i); + cc.setArg(i, gpArg); + + if (i == 0) + gpSum = gpArg; + else + cc.add(gpSum, gpArg); + } + } + + // Check alignment of xmmVar (has to be 16). + cc.lea(gpVar, stack); + cc.and_(gpVar, _alignment - 1); + + // Add a sum of all arguments to check if they are correct. + if (argCount) + cc.or_(gpVar.r32(), gpSum); + + cc.ret(gpVar); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef unsigned int U; + + typedef U (*Func0)(); + typedef U (*Func1)(U); + typedef U (*Func2)(U, U); + typedef U (*Func3)(U, U, U); + typedef U (*Func4)(U, U, U, U); + typedef U (*Func5)(U, U, U, U, U); + typedef U (*Func6)(U, U, U, U, U, U); + typedef U (*Func7)(U, U, U, U, U, U, U); + typedef U (*Func8)(U, U, U, U, U, U, U, U); + typedef U (*Func9)(U, U, U, U, U, U, U, U, U); + typedef U (*Func10)(U, U, U, U, U, U, U, U, U, U); + typedef U (*Func11)(U, U, U, U, U, U, U, U, U, U, U); + typedef U (*Func12)(U, U, U, U, U, U, U, U, U, U, U, U); + typedef U (*Func13)(U, U, U, U, U, U, U, U, U, U, U, U, U); + typedef U (*Func14)(U, U, U, U, U, U, U, U, U, U, U, U, U, U); + typedef U (*Func15)(U, U, U, U, U, U, U, U, U, U, U, U, U, U, U); + typedef U (*Func16)(U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U); + + unsigned int resultRet = 0; + unsigned int expectRet = 0; + + switch (_argCount) { + case 0: + resultRet = ptr_as_func(_func)(); + expectRet = 0; + break; + case 1: + resultRet = ptr_as_func(_func)(1); + expectRet = 1; + break; + case 2: + resultRet = ptr_as_func(_func)(1, 2); + expectRet = 1 + 2; + break; + case 3: + resultRet = ptr_as_func(_func)(1, 2, 3); + expectRet = 1 + 2 + 3; + break; + case 4: + resultRet = ptr_as_func(_func)(1, 2, 3, 4); + expectRet = 1 + 2 + 3 + 4; + break; + case 5: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5); + expectRet = 1 + 2 + 3 + 4 + 5; + break; + case 6: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6); + expectRet = 1 + 2 + 3 + 4 + 5 + 6; + break; + case 7: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7; + break; + case 8: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7, 8); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8; + break; + case 9: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9; + break; + case 10: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10; + break; + case 11: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11; + break; + case 12: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12; + break; + case 13: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13; + break; + case 14: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14; + break; + case 15: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15; + break; + case 16: + resultRet = ptr_as_func(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16; + break; + } + + result.assignFormat("ret={%u, %u}", resultRet >> 28, resultRet & 0x0FFFFFFFu); + expect.assignFormat("ret={%u, %u}", expectRet >> 28, expectRet & 0x0FFFFFFFu); + + return resultRet == expectRet; + } + + uint32_t _argCount; + uint32_t _alignment; + bool _preserveFP; +}; + +// ============================================================================ +// [X86Test_NoCode] +// ============================================================================ + +class X86Test_NoCode : public X86TestCase { +public: + X86Test_NoCode() : X86TestCase("NoCode") {} + + static void add(TestApp& app) { + app.add(new X86Test_NoCode()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + DebugUtils::unused(result, expect); + + typedef void(*Func)(void); + Func func = ptr_as_func(_func); + + func(); + return true; + } +}; + +// ============================================================================ +// [X86Test_AlignNone] +// ============================================================================ + +class X86Test_NoAlign : public X86TestCase { +public: + X86Test_NoAlign() : X86TestCase("NoAlign") {} + + static void add(TestApp& app) { + app.add(new X86Test_NoAlign()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.align(kAlignCode, 0); + cc.align(kAlignCode, 1); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + DebugUtils::unused(result, expect); + + typedef void (*Func)(void); + Func func = ptr_as_func(_func); + + func(); + return true; + } +}; + +// ============================================================================ +// [X86Test_JumpMerge] +// ============================================================================ + +class X86Test_JumpMerge : public X86TestCase { +public: + X86Test_JumpMerge() : X86TestCase("JumpMerge") {} + + static void add(TestApp& app) { + app.add(new X86Test_JumpMerge()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + Label L0 = cc.newLabel(); + Label L1 = cc.newLabel(); + Label L2 = cc.newLabel(); + Label LEnd = cc.newLabel(); + + x86::Gp dst = cc.newIntPtr("dst"); + x86::Gp val = cc.newInt32("val"); + + cc.setArg(0, dst); + cc.setArg(1, val); + + cc.cmp(val, 0); + cc.je(L2); + + cc.cmp(val, 1); + cc.je(L1); + + cc.cmp(val, 2); + cc.je(L0); + + cc.mov(x86::dword_ptr(dst), val); + cc.jmp(LEnd); + + // On purpose. This tests whether the CFG constructs a single basic-block + // from multiple labels next to each other. + cc.bind(L0); + cc.bind(L1); + cc.bind(L2); + cc.mov(x86::dword_ptr(dst), 0); + + cc.bind(LEnd); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void(*Func)(int*, int); + Func func = ptr_as_func(_func); + + int arr[5] = { -1, -1, -1, -1, -1 }; + int exp[5] = { 0, 0, 0, 3, 4 }; + + for (int i = 0; i < 5; i++) + func(&arr[i], i); + + result.assignFormat("ret={%d, %d, %d, %d, %d}", arr[0], arr[1], arr[2], arr[3], arr[4]); + expect.assignFormat("ret={%d, %d, %d, %d, %d}", exp[0], exp[1], exp[2], exp[3], exp[4]); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_JumpCross] +// ============================================================================ + +class X86Test_JumpCross : public X86TestCase { +public: + X86Test_JumpCross() : X86TestCase("JumpCross") {} + + static void add(TestApp& app) { + app.add(new X86Test_JumpCross()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + Label L1 = cc.newLabel(); + Label L2 = cc.newLabel(); + Label L3 = cc.newLabel(); + + cc.jmp(L2); + + cc.bind(L1); + cc.jmp(L3); + + cc.bind(L2); + cc.jmp(L1); + + cc.bind(L3); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + DebugUtils::unused(result, expect); + + typedef void (*Func)(void); + Func func = ptr_as_func(_func); + + func(); + return true; + } +}; + +// ============================================================================ +// [X86Test_JumpMany] +// ============================================================================ + +class X86Test_JumpMany : public X86TestCase { +public: + X86Test_JumpMany() : X86TestCase("JumpMany") {} + + static void add(TestApp& app) { + app.add(new X86Test_JumpMany()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + for (uint32_t i = 0; i < 1000; i++) { + Label L = cc.newLabel(); + cc.jmp(L); + cc.bind(L); + } + + x86::Gp ret = cc.newInt32("ret"); + cc.xor_(ret, ret); + cc.ret(ret); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = 0; + + result.assignFormat("ret={%d}", resultRet); + expect.assignFormat("ret={%d}", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_JumpUnreachable1] +// ============================================================================ + +class X86Test_JumpUnreachable1 : public X86TestCase { +public: + X86Test_JumpUnreachable1() : X86TestCase("JumpUnreachable1") {} + + static void add(TestApp& app) { + app.add(new X86Test_JumpUnreachable1()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + Label L_1 = cc.newLabel(); + Label L_2 = cc.newLabel(); + Label L_3 = cc.newLabel(); + Label L_4 = cc.newLabel(); + Label L_5 = cc.newLabel(); + Label L_6 = cc.newLabel(); + Label L_7 = cc.newLabel(); + + x86::Gp v0 = cc.newUInt32("v0"); + x86::Gp v1 = cc.newUInt32("v1"); + + cc.bind(L_2); + cc.bind(L_3); + + cc.jmp(L_1); + + cc.bind(L_5); + cc.mov(v0, 0); + + cc.bind(L_6); + cc.jmp(L_3); + cc.mov(v1, 1); + cc.jmp(L_1); + + cc.bind(L_4); + cc.jmp(L_2); + cc.bind(L_7); + cc.add(v0, v1); + + cc.align(kAlignCode, 16); + cc.bind(L_1); + cc.ret(); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(void); + Func func = ptr_as_func(_func); + + func(); + + result.append("ret={}"); + expect.append("ret={}"); + + return true; + } +}; + +// ============================================================================ +// [X86Test_JumpUnreachable2] +// ============================================================================ + +class X86Test_JumpUnreachable2 : public X86TestCase { +public: + X86Test_JumpUnreachable2() : X86TestCase("JumpUnreachable2") {} + + static void add(TestApp& app) { + app.add(new X86Test_JumpUnreachable2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + Label L_1 = cc.newLabel(); + Label L_2 = cc.newLabel(); + + x86::Gp v0 = cc.newUInt32("v0"); + x86::Gp v1 = cc.newUInt32("v1"); + + cc.jmp(L_1); + cc.bind(L_2); + cc.mov(v0, 1); + cc.mov(v1, 2); + cc.cmp(v0, v1); + cc.jz(L_2); + cc.jmp(L_1); + + cc.bind(L_1); + cc.ret(); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(void); + Func func = ptr_as_func(_func); + + func(); + + result.append("ret={}"); + expect.append("ret={}"); + + return true; + } +}; + +// ============================================================================ +// [X86Test_JumpTable] +// ============================================================================ + +class X86Test_JumpTable : public X86TestCase { +public: + bool _annotated; + + X86Test_JumpTable(bool annotated) + : X86TestCase("X86Test_JumpTable"), + _annotated(annotated) { + _name.assignFormat("JumpTable {%s}", annotated ? "Annotated" : "Unknown Reg/Mem"); + } + + enum Operator { + kOperatorAdd = 0, + kOperatorSub = 1, + kOperatorMul = 2, + kOperatorDiv = 3 + }; + + static void add(TestApp& app) { + app.add(new X86Test_JumpTable(false)); + app.add(new X86Test_JumpTable(true)); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm a = cc.newXmmSs("a"); + x86::Xmm b = cc.newXmmSs("b"); + x86::Gp op = cc.newUInt32("op"); + x86::Gp target = cc.newIntPtr("target"); + x86::Gp offset = cc.newIntPtr("offset"); + + Label L_End = cc.newLabel(); + + Label L_Table = cc.newLabel(); + Label L_Add = cc.newLabel(); + Label L_Sub = cc.newLabel(); + Label L_Mul = cc.newLabel(); + Label L_Div = cc.newLabel(); + + cc.setArg(0, a); + cc.setArg(1, b); + cc.setArg(2, op); + + cc.lea(offset, x86::ptr(L_Table)); + if (cc.is64Bit()) + cc.movsxd(target, x86::dword_ptr(offset, op.cloneAs(offset), 2)); + else + cc.mov(target, x86::dword_ptr(offset, op.cloneAs(offset), 2)); + cc.add(target, offset); + + // JumpAnnotation allows to annotate all possible jump targets of + // instructions where it cannot be deduced from operands. + if (_annotated) { + JumpAnnotation* annotation = cc.newJumpAnnotation(); + annotation->addLabel(L_Add); + annotation->addLabel(L_Sub); + annotation->addLabel(L_Mul); + annotation->addLabel(L_Div); + cc.jmp(target, annotation); + } + else { + cc.jmp(target); + } + + cc.bind(L_Add); + cc.addss(a, b); + cc.jmp(L_End); + + cc.bind(L_Sub); + cc.subss(a, b); + cc.jmp(L_End); + + cc.bind(L_Mul); + cc.mulss(a, b); + cc.jmp(L_End); + + cc.bind(L_Div); + cc.divss(a, b); + + cc.bind(L_End); + cc.ret(a); + + cc.endFunc(); + + cc.bind(L_Table); + cc.embedLabelDelta(L_Add, L_Table, 4); + cc.embedLabelDelta(L_Sub, L_Table, 4); + cc.embedLabelDelta(L_Mul, L_Table, 4); + cc.embedLabelDelta(L_Div, L_Table, 4); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef float (*Func)(float, float, uint32_t); + Func func = ptr_as_func(_func); + + float results[4]; + float expected[4]; + + results[0] = func(33.0f, 14.0f, kOperatorAdd); + results[1] = func(33.0f, 14.0f, kOperatorSub); + results[2] = func(10.0f, 6.0f, kOperatorMul); + results[3] = func(80.0f, 8.0f, kOperatorDiv); + + expected[0] = 47.0f; + expected[1] = 19.0f; + expected[2] = 60.0f; + expected[3] = 10.0f; + + result.assignFormat("ret={%f, %f, %f, %f}", results[0], results[1], results[2], results[3]); + expect.assignFormat("ret={%f, %f, %f, %f}", expected[0], expected[1], expected[2], expected[3]); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocBase] +// ============================================================================ + +class X86Test_AllocBase : public X86TestCase { +public: + X86Test_AllocBase() : X86TestCase("AllocBase") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocBase()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp v0 = cc.newInt32("v0"); + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + x86::Gp v3 = cc.newInt32("v3"); + x86::Gp v4 = cc.newInt32("v4"); + + cc.xor_(v0, v0); + + cc.mov(v1, 1); + cc.mov(v2, 2); + cc.mov(v3, 3); + cc.mov(v4, 4); + + cc.add(v0, v1); + cc.add(v0, v2); + cc.add(v0, v3); + cc.add(v0, v4); + + cc.ret(v0); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = 1 + 2 + 3 + 4; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocMany1] +// ============================================================================ + +class X86Test_AllocMany1 : public X86TestCase { +public: + X86Test_AllocMany1() : X86TestCase("AllocMany1") {} + + enum { kCount = 8 }; + + static void add(TestApp& app) { + app.add(new X86Test_AllocMany1()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp a0 = cc.newIntPtr("a0"); + x86::Gp a1 = cc.newIntPtr("a1"); + + cc.setArg(0, a0); + cc.setArg(1, a1); + + // Create some variables. + x86::Gp t = cc.newInt32("t"); + x86::Gp x[kCount]; + + uint32_t i; + + // Setup variables (use mov with reg/imm to se if register allocator works). + for (i = 0; i < kCount; i++) x[i] = cc.newInt32("x%u", i); + for (i = 0; i < kCount; i++) cc.mov(x[i], int(i + 1)); + + // Make sum (addition). + cc.xor_(t, t); + for (i = 0; i < kCount; i++) cc.add(t, x[i]); + + // Store result to a given pointer in first argument. + cc.mov(x86::dword_ptr(a0), t); + + // Clear t. + cc.xor_(t, t); + + // Make sum (subtraction). + for (i = 0; i < kCount; i++) cc.sub(t, x[i]); + + // Store result to a given pointer in second argument. + cc.mov(x86::dword_ptr(a1), t); + + // End of function. + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(int*, int*); + Func func = ptr_as_func(_func); + + int resultX; + int resultY; + + int expectX = 36; + int expectY = -36; + + func(&resultX, &resultY); + + result.assignFormat("ret={x=%d, y=%d}", resultX, resultY); + expect.assignFormat("ret={x=%d, y=%d}", expectX, expectY); + + return resultX == expectX && resultY == expectY; + } +}; + +// ============================================================================ +// [X86Test_AllocMany2] +// ============================================================================ + +class X86Test_AllocMany2 : public X86TestCase { +public: + X86Test_AllocMany2() : X86TestCase("AllocMany2") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocMany2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp a = cc.newIntPtr("a"); + x86::Gp v[32]; + + uint32_t i; + cc.setArg(0, a); + + for (i = 0; i < ASMJIT_ARRAY_SIZE(v); i++) v[i] = cc.newInt32("v%d", i); + for (i = 0; i < ASMJIT_ARRAY_SIZE(v); i++) cc.xor_(v[i], v[i]); + + x86::Gp x = cc.newInt32("x"); + Label L = cc.newLabel(); + + cc.mov(x, 32); + cc.bind(L); + for (i = 0; i < ASMJIT_ARRAY_SIZE(v); i++) cc.add(v[i], i); + + cc.dec(x); + cc.jnz(L); + for (i = 0; i < ASMJIT_ARRAY_SIZE(v); i++) cc.mov(x86::dword_ptr(a, int(i * 4)), v[i]); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(uint32_t*); + Func func = ptr_as_func(_func); + + uint32_t i; + uint32_t resultBuf[32]; + uint32_t expectBuf[32]; + + for (i = 0; i < ASMJIT_ARRAY_SIZE(resultBuf); i++) + expectBuf[i] = i * 32; + func(resultBuf); + + for (i = 0; i < ASMJIT_ARRAY_SIZE(resultBuf); i++) { + if (i != 0) { + result.append(','); + expect.append(','); + } + + result.appendFormat("%u", resultBuf[i]); + expect.appendFormat("%u", expectBuf[i]); + } + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocImul1] +// ============================================================================ + +class X86Test_AllocImul1 : public X86TestCase { +public: + X86Test_AllocImul1() : X86TestCase("AllocImul1") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocImul1()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp dstHi = cc.newIntPtr("dstHi"); + x86::Gp dstLo = cc.newIntPtr("dstLo"); + + x86::Gp vHi = cc.newInt32("vHi"); + x86::Gp vLo = cc.newInt32("vLo"); + x86::Gp src = cc.newInt32("src"); + + cc.setArg(0, dstHi); + cc.setArg(1, dstLo); + cc.setArg(2, vLo); + cc.setArg(3, src); + + cc.imul(vHi, vLo, src); + + cc.mov(x86::dword_ptr(dstHi), vHi); + cc.mov(x86::dword_ptr(dstLo), vLo); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(int*, int*, int, int); + Func func = ptr_as_func(_func); + + int v0 = 4; + int v1 = 4; + + int resultHi; + int resultLo; + + int expectHi = 0; + int expectLo = v0 * v1; + + func(&resultHi, &resultLo, v0, v1); + + result.assignFormat("hi=%d, lo=%d", resultHi, resultLo); + expect.assignFormat("hi=%d, lo=%d", expectHi, expectLo); + + return resultHi == expectHi && resultLo == expectLo; + } +}; + +// ============================================================================ +// [X86Test_AllocImul2] +// ============================================================================ + +class X86Test_AllocImul2 : public X86TestCase { +public: + X86Test_AllocImul2() : X86TestCase("AllocImul2") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocImul2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp dst = cc.newIntPtr("dst"); + x86::Gp src = cc.newIntPtr("src"); + + cc.setArg(0, dst); + cc.setArg(1, src); + + for (unsigned int i = 0; i < 4; i++) { + x86::Gp x = cc.newInt32("x"); + x86::Gp y = cc.newInt32("y"); + x86::Gp hi = cc.newInt32("hi"); + + cc.mov(x, x86::dword_ptr(src, 0)); + cc.mov(y, x86::dword_ptr(src, 4)); + + cc.imul(hi, x, y); + cc.add(x86::dword_ptr(dst, 0), hi); + cc.add(x86::dword_ptr(dst, 4), x); + } + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(int*, const int*); + Func func = ptr_as_func(_func); + + int src[2] = { 4, 9 }; + int resultRet[2] = { 0, 0 }; + int expectRet[2] = { 0, (4 * 9) * 4 }; + + func(resultRet, src); + + result.assignFormat("ret={%d, %d}", resultRet[0], resultRet[1]); + expect.assignFormat("ret={%d, %d}", expectRet[0], expectRet[1]); + + return resultRet[0] == expectRet[0] && resultRet[1] == expectRet[1]; + } +}; + +// ============================================================================ +// [X86Test_AllocIdiv1] +// ============================================================================ + +class X86Test_AllocIdiv1 : public X86TestCase { +public: + X86Test_AllocIdiv1() : X86TestCase("AllocIdiv1") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocIdiv1()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp a = cc.newInt32("a"); + x86::Gp b = cc.newInt32("b"); + x86::Gp dummy = cc.newInt32("dummy"); + + cc.setArg(0, a); + cc.setArg(1, b); + + cc.xor_(dummy, dummy); + cc.idiv(dummy, a, b); + + cc.ret(a); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int v0 = 2999; + int v1 = 245; + + int resultRet = func(v0, v1); + int expectRet = 2999 / 245; + + result.assignFormat("result=%d", resultRet); + expect.assignFormat("result=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocSetz] +// ============================================================================ + +class X86Test_AllocSetz : public X86TestCase { +public: + X86Test_AllocSetz() : X86TestCase("AllocSetz") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocSetz()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp src0 = cc.newInt32("src0"); + x86::Gp src1 = cc.newInt32("src1"); + x86::Gp dst0 = cc.newIntPtr("dst0"); + + cc.setArg(0, src0); + cc.setArg(1, src1); + cc.setArg(2, dst0); + + cc.cmp(src0, src1); + cc.setz(x86::byte_ptr(dst0)); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(int, int, char*); + Func func = ptr_as_func(_func); + + char resultBuf[4]; + char expectBuf[4] = { 1, 0, 0, 1 }; + + func(0, 0, &resultBuf[0]); // We are expecting 1 (0 == 0). + func(0, 1, &resultBuf[1]); // We are expecting 0 (0 != 1). + func(1, 0, &resultBuf[2]); // We are expecting 0 (1 != 0). + func(1, 1, &resultBuf[3]); // We are expecting 1 (1 == 1). + + result.assignFormat("out={%d, %d, %d, %d}", resultBuf[0], resultBuf[1], resultBuf[2], resultBuf[3]); + expect.assignFormat("out={%d, %d, %d, %d}", expectBuf[0], expectBuf[1], expectBuf[2], expectBuf[3]); + + return resultBuf[0] == expectBuf[0] && + resultBuf[1] == expectBuf[1] && + resultBuf[2] == expectBuf[2] && + resultBuf[3] == expectBuf[3] ; + } +}; + +// ============================================================================ +// [X86Test_AllocShlRor] +// ============================================================================ + +class X86Test_AllocShlRor : public X86TestCase { +public: + X86Test_AllocShlRor() : X86TestCase("AllocShlRor") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocShlRor()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp dst = cc.newIntPtr("dst"); + x86::Gp var = cc.newInt32("var"); + x86::Gp vShlParam = cc.newInt32("vShlParam"); + x86::Gp vRorParam = cc.newInt32("vRorParam"); + + cc.setArg(0, dst); + cc.setArg(1, var); + cc.setArg(2, vShlParam); + cc.setArg(3, vRorParam); + + cc.shl(var, vShlParam); + cc.ror(var, vRorParam); + + cc.mov(x86::dword_ptr(dst), var); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(int*, int, int, int); + Func func = ptr_as_func(_func); + + int v0 = 0x000000FF; + + int resultRet; + int expectRet = 0x0000FF00; + + func(&resultRet, v0, 16, 8); + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocGpbLo] +// ============================================================================ + +class X86Test_AllocGpbLo1 : public X86TestCase { +public: + X86Test_AllocGpbLo1() : X86TestCase("AllocGpbLo1") {} + + enum { kCount = 32 }; + + static void add(TestApp& app) { + app.add(new X86Test_AllocGpbLo1()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp rPtr = cc.newUIntPtr("rPtr"); + x86::Gp rSum = cc.newUInt32("rSum"); + + cc.setArg(0, rPtr); + + x86::Gp x[kCount]; + uint32_t i; + + for (i = 0; i < kCount; i++) { + x[i] = cc.newUInt32("x%u", i); + } + + // Init pseudo-regs with values from our array. + for (i = 0; i < kCount; i++) { + cc.mov(x[i], x86::dword_ptr(rPtr, int(i * 4))); + } + + for (i = 2; i < kCount; i++) { + // Add and truncate to 8 bit; no purpose, just mess with jit. + cc.add (x[i ], x[i-1]); + cc.movzx(x[i ], x[i ].r8()); + cc.movzx(x[i-2], x[i-1].r8()); + cc.movzx(x[i-1], x[i-2].r8()); + } + + // Sum up all computed values. + cc.mov(rSum, 0); + for (i = 0; i < kCount; i++) { + cc.add(rSum, x[i]); + } + + // Return the sum. + cc.ret(rSum); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint32_t (*Func)(uint32_t*); + Func func = ptr_as_func(_func); + + uint32_t i; + uint32_t buf[kCount]; + uint32_t resultRet; + uint32_t expectRet; + + expectRet = 0; + for (i = 0; i < kCount; i++) { + buf[i] = 1; + } + + for (i = 2; i < kCount; i++) { + buf[i ]+= buf[i-1]; + buf[i ] = buf[i ] & 0xFF; + buf[i-2] = buf[i-1] & 0xFF; + buf[i-1] = buf[i-2] & 0xFF; + } + + for (i = 0; i < kCount; i++) { + expectRet += buf[i]; + } + + for (i = 0; i < kCount; i++) { + buf[i] = 1; + } + resultRet = func(buf); + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocGpbLo2] +// ============================================================================ + +class X86Test_AllocGpbLo2 : public X86TestCase { +public: + X86Test_AllocGpbLo2() : X86TestCase("AllocGpbLo2") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocGpbLo2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp v = cc.newUInt32("v"); + cc.setArg(0, v); + cc.mov(v.r8(), 0xFF); + cc.ret(v); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint32_t (*Func)(uint32_t); + Func func = ptr_as_func(_func); + + uint32_t resultRet = func(0x12345678u); + uint32_t expectRet = 0x123456FFu; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocRepMovsb] +// ============================================================================ + +class X86Test_AllocRepMovsb : public X86TestCase { +public: + X86Test_AllocRepMovsb() : X86TestCase("AllocRepMovsb") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocRepMovsb()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp dst = cc.newIntPtr("dst"); + x86::Gp src = cc.newIntPtr("src"); + x86::Gp cnt = cc.newIntPtr("cnt"); + + cc.setArg(0, dst); + cc.setArg(1, src); + cc.setArg(2, cnt); + + cc.rep(cnt).movs(x86::byte_ptr(dst), x86::byte_ptr(src)); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(void*, void*, size_t); + Func func = ptr_as_func(_func); + + char dst[20] = { 0 }; + char src[20] = "Hello AsmJit!"; + func(dst, src, strlen(src) + 1); + + result.assignFormat("ret=\"%s\"", dst); + expect.assignFormat("ret=\"%s\"", src); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocIfElse1] +// ============================================================================ + +class X86Test_AllocIfElse1 : public X86TestCase { +public: + X86Test_AllocIfElse1() : X86TestCase("AllocIfElse1") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocIfElse1()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + + Label L_1 = cc.newLabel(); + Label L_2 = cc.newLabel(); + + cc.setArg(0, v1); + cc.setArg(1, v2); + + cc.cmp(v1, v2); + cc.jg(L_1); + + cc.mov(v1, 1); + cc.jmp(L_2); + + cc.bind(L_1); + cc.mov(v1, 2); + + cc.bind(L_2); + cc.ret(v1); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int a = func(0, 1); + int b = func(1, 0); + + result.appendFormat("ret={%d, %d}", a, b); + expect.appendFormat("ret={%d, %d}", 1, 2); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocIfElse2] +// ============================================================================ + +class X86Test_AllocIfElse2 : public X86TestCase { +public: + X86Test_AllocIfElse2() : X86TestCase("AllocIfElse2") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocIfElse2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + + Label L_1 = cc.newLabel(); + Label L_2 = cc.newLabel(); + Label L_3 = cc.newLabel(); + Label L_4 = cc.newLabel(); + + cc.setArg(0, v1); + cc.setArg(1, v2); + + cc.jmp(L_1); + cc.bind(L_2); + cc.jmp(L_4); + cc.bind(L_1); + + cc.cmp(v1, v2); + cc.jg(L_3); + + cc.mov(v1, 1); + cc.jmp(L_2); + + cc.bind(L_3); + cc.mov(v1, 2); + cc.jmp(L_2); + + cc.bind(L_4); + + cc.ret(v1); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int a = func(0, 1); + int b = func(1, 0); + + result.appendFormat("ret={%d, %d}", a, b); + expect.appendFormat("ret={%d, %d}", 1, 2); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocIfElse3] +// ============================================================================ + +class X86Test_AllocIfElse3 : public X86TestCase { +public: + X86Test_AllocIfElse3() : X86TestCase("AllocIfElse3") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocIfElse3()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + x86::Gp counter = cc.newInt32("counter"); + + Label L_1 = cc.newLabel(); + Label L_Loop = cc.newLabel(); + Label L_Exit = cc.newLabel(); + + cc.setArg(0, v1); + cc.setArg(1, v2); + + cc.cmp(v1, v2); + cc.jg(L_1); + + cc.mov(counter, 0); + + cc.bind(L_Loop); + cc.mov(v1, counter); + + cc.inc(counter); + cc.cmp(counter, 1); + cc.jle(L_Loop); + cc.jmp(L_Exit); + + cc.bind(L_1); + cc.mov(v1, 2); + + cc.bind(L_Exit); + cc.ret(v1); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int a = func(0, 1); + int b = func(1, 0); + + result.appendFormat("ret={%d, %d}", a, b); + expect.appendFormat("ret={%d, %d}", 1, 2); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocIfElse4] +// ============================================================================ + +class X86Test_AllocIfElse4 : public X86TestCase { +public: + X86Test_AllocIfElse4() : X86TestCase("AllocIfElse4") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocIfElse4()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + x86::Gp counter = cc.newInt32("counter"); + + Label L_1 = cc.newLabel(); + Label L_Loop1 = cc.newLabel(); + Label L_Loop2 = cc.newLabel(); + Label L_Exit = cc.newLabel(); + + cc.mov(counter, 0); + + cc.setArg(0, v1); + cc.setArg(1, v2); + + cc.cmp(v1, v2); + cc.jg(L_1); + + cc.bind(L_Loop1); + cc.mov(v1, counter); + + cc.inc(counter); + cc.cmp(counter, 1); + cc.jle(L_Loop1); + cc.jmp(L_Exit); + + cc.bind(L_1); + cc.bind(L_Loop2); + cc.mov(v1, counter); + cc.inc(counter); + cc.cmp(counter, 2); + cc.jle(L_Loop2); + + cc.bind(L_Exit); + cc.ret(v1); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int a = func(0, 1); + int b = func(1, 0); + + result.appendFormat("ret={%d, %d}", a, b); + expect.appendFormat("ret={%d, %d}", 1, 2); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocInt8] +// ============================================================================ + +class X86Test_AllocInt8 : public X86TestCase { +public: + X86Test_AllocInt8() : X86TestCase("AllocInt8") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocInt8()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp x = cc.newInt8("x"); + x86::Gp y = cc.newInt32("y"); + + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.setArg(0, x); + + cc.movsx(y, x); + + cc.ret(y); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(char); + Func func = ptr_as_func(_func); + + int resultRet = func(-13); + int expectRet = -13; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocUnhandledArg] +// ============================================================================ + +class X86Test_AllocUnhandledArg : public X86TestCase { +public: + X86Test_AllocUnhandledArg() : X86TestCase("AllocUnhandledArg") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocUnhandledArg()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp x = cc.newInt32("x"); + cc.setArg(2, x); + cc.ret(x); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int, int); + Func func = ptr_as_func(_func); + + int resultRet = func(42, 155, 199); + int expectRet = 199; + + result.assignFormat("ret={%d}", resultRet); + expect.assignFormat("ret={%d}", expectRet); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocArgsIntPtr] +// ============================================================================ + +class X86Test_AllocArgsIntPtr : public X86TestCase { +public: + X86Test_AllocArgsIntPtr() : X86TestCase("AllocArgsIntPtr") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocArgsIntPtr()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + uint32_t i; + x86::Gp var[8]; + + for (i = 0; i < 8; i++) { + var[i] = cc.newIntPtr("var%u", i); + cc.setArg(i, var[i]); + } + + for (i = 0; i < 8; i++) { + cc.add(var[i], int(i + 1)); + } + + // Move some data into buffer provided by arguments so we can verify if it + // really works without looking into assembler output. + for (i = 0; i < 8; i++) { + cc.add(x86::byte_ptr(var[i]), int(i + 1)); + } + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(void*, void*, void*, void*, void*, void*, void*, void*); + Func func = ptr_as_func(_func); + + uint8_t resultBuf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + uint8_t expectBuf[9] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + + func(resultBuf, resultBuf, resultBuf, resultBuf, + resultBuf, resultBuf, resultBuf, resultBuf); + + result.assignFormat("buf={%d, %d, %d, %d, %d, %d, %d, %d, %d}", + resultBuf[0], resultBuf[1], resultBuf[2], resultBuf[3], + resultBuf[4], resultBuf[5], resultBuf[6], resultBuf[7], + resultBuf[8]); + expect.assignFormat("buf={%d, %d, %d, %d, %d, %d, %d, %d, %d}", + expectBuf[0], expectBuf[1], expectBuf[2], expectBuf[3], + expectBuf[4], expectBuf[5], expectBuf[6], expectBuf[7], + expectBuf[8]); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocArgsFloat] +// ============================================================================ + +class X86Test_AllocArgsFloat : public X86TestCase { +public: + X86Test_AllocArgsFloat() : X86TestCase("AllocArgsFloat") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocArgsFloat()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + uint32_t i; + + x86::Gp p = cc.newIntPtr("p"); + x86::Xmm xv[7]; + + for (i = 0; i < 7; i++) { + xv[i] = cc.newXmmSs("xv%u", i); + cc.setArg(i, xv[i]); + } + + cc.setArg(7, p); + + cc.addss(xv[0], xv[1]); + cc.addss(xv[0], xv[2]); + cc.addss(xv[0], xv[3]); + cc.addss(xv[0], xv[4]); + cc.addss(xv[0], xv[5]); + cc.addss(xv[0], xv[6]); + + cc.movss(x86::ptr(p), xv[0]); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(float, float, float, float, float, float, float, float*); + Func func = ptr_as_func(_func); + + float resultRet; + float expectRet = 1.0f + 2.0f + 3.0f + 4.0f + 5.0f + 6.0f + 7.0f; + + func(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, &resultRet); + + result.assignFormat("ret={%g}", resultRet); + expect.assignFormat("ret={%g}", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocArgsDouble] +// ============================================================================ + +class X86Test_AllocArgsDouble : public X86TestCase { +public: + X86Test_AllocArgsDouble() : X86TestCase("AllocArgsDouble") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocArgsDouble()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + uint32_t i; + + x86::Gp p = cc.newIntPtr("p"); + x86::Xmm xv[7]; + + for (i = 0; i < 7; i++) { + xv[i] = cc.newXmmSd("xv%u", i); + cc.setArg(i, xv[i]); + } + + cc.setArg(7, p); + + cc.addsd(xv[0], xv[1]); + cc.addsd(xv[0], xv[2]); + cc.addsd(xv[0], xv[3]); + cc.addsd(xv[0], xv[4]); + cc.addsd(xv[0], xv[5]); + cc.addsd(xv[0], xv[6]); + + cc.movsd(x86::ptr(p), xv[0]); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(double, double, double, double, double, double, double, double*); + Func func = ptr_as_func(_func); + + double resultRet; + double expectRet = 1.0 + 2.0 + 3.0 + 4.0 + 5.0 + 6.0 + 7.0; + + func(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, &resultRet); + + result.assignFormat("ret={%g}", resultRet); + expect.assignFormat("ret={%g}", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocArgsVec] +// ============================================================================ + +class X86Test_AllocArgsVec : public X86TestCase { +public: + X86Test_AllocArgsVec() : X86TestCase("AllocArgsVec") {} + + static void add(TestApp& app) { + // Not supported on Windows. +#ifndef _WIN32 + app.add(new X86Test_AllocArgsVec()); +#endif + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm a = cc.newXmm("aXmm"); + x86::Xmm b = cc.newXmm("bXmm"); + + cc.setArg(0, a); + cc.setArg(1, b); + + cc.paddb(a, b); + cc.ret(a); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef __m128i (*Func)(__m128i, __m128i); + Func func = ptr_as_func(_func); + + uint8_t aData[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + uint8_t bData[16] = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + + uint8_t rData[16]; + uint8_t eData[16] = { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 }; + + __m128i aVec = _mm_loadu_si128(reinterpret_cast(aData)); + __m128i bVec = _mm_loadu_si128(reinterpret_cast(bData)); + + __m128i rVec = func(aVec, bVec); + _mm_storeu_si128(reinterpret_cast<__m128i*>(rData), rVec); + + result.appendHex(rData, 16); + expect.appendHex(eData, 16); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocRetFloat1] +// ============================================================================ + +class X86Test_AllocRetFloat1 : public X86TestCase { +public: + X86Test_AllocRetFloat1() : X86TestCase("AllocRetFloat1") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocRetFloat1()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm x = cc.newXmmSs("x"); + cc.setArg(0, x); + cc.ret(x); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef float (*Func)(float); + Func func = ptr_as_func(_func); + + float resultRet = func(42.0f); + float expectRet = 42.0f; + + result.assignFormat("ret={%g}", resultRet); + expect.assignFormat("ret={%g}", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocRetFloat2] +// ============================================================================ + +class X86Test_AllocRetFloat2 : public X86TestCase { +public: + X86Test_AllocRetFloat2() : X86TestCase("AllocRetFloat2") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocRetFloat2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm x = cc.newXmmSs("x"); + x86::Xmm y = cc.newXmmSs("y"); + + cc.setArg(0, x); + cc.setArg(1, y); + + cc.addss(x, y); + cc.ret(x); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef float (*Func)(float, float); + Func func = ptr_as_func(_func); + + float resultRet = func(1.0f, 2.0f); + float expectRet = 1.0f + 2.0f; + + result.assignFormat("ret={%g}", resultRet); + expect.assignFormat("ret={%g}", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocRetDouble1] +// ============================================================================ + +class X86Test_AllocRetDouble1 : public X86TestCase { +public: + X86Test_AllocRetDouble1() : X86TestCase("AllocRetDouble1") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocRetDouble1()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm x = cc.newXmmSd("x"); + cc.setArg(0, x); + cc.ret(x); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef double (*Func)(double); + Func func = ptr_as_func(_func); + + double resultRet = func(42.0); + double expectRet = 42.0; + + result.assignFormat("ret={%g}", resultRet); + expect.assignFormat("ret={%g}", expectRet); + + return resultRet == expectRet; + } +}; +// ============================================================================ +// [X86Test_AllocRetDouble2] +// ============================================================================ + +class X86Test_AllocRetDouble2 : public X86TestCase { +public: + X86Test_AllocRetDouble2() : X86TestCase("AllocRetDouble2") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocRetDouble2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm x = cc.newXmmSd("x"); + x86::Xmm y = cc.newXmmSd("y"); + + cc.setArg(0, x); + cc.setArg(1, y); + + cc.addsd(x, y); + cc.ret(x); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef double (*Func)(double, double); + Func func = ptr_as_func(_func); + + double resultRet = func(1.0, 2.0); + double expectRet = 1.0 + 2.0; + + result.assignFormat("ret={%g}", resultRet); + expect.assignFormat("ret={%g}", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocStack] +// ============================================================================ + +class X86Test_AllocStack : public X86TestCase { +public: + X86Test_AllocStack() : X86TestCase("AllocStack") {} + + enum { kSize = 256 }; + + static void add(TestApp& app) { + app.add(new X86Test_AllocStack()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Mem stack = cc.newStack(kSize, 1); + stack.setSize(1); + + x86::Gp i = cc.newIntPtr("i"); + x86::Gp a = cc.newInt32("a"); + x86::Gp b = cc.newInt32("b"); + + Label L_1 = cc.newLabel(); + Label L_2 = cc.newLabel(); + + // Fill stack by sequence [0, 1, 2, 3 ... 255]. + cc.xor_(i, i); + + x86::Mem stackWithIndex = stack.clone(); + stackWithIndex.setIndex(i, 0); + + cc.bind(L_1); + cc.mov(stackWithIndex, i.r8()); + cc.inc(i); + cc.cmp(i, 255); + cc.jle(L_1); + + // Sum sequence in stack. + cc.xor_(i, i); + cc.xor_(a, a); + + cc.bind(L_2); + cc.movzx(b, stackWithIndex); + cc.add(a, b); + cc.inc(i); + cc.cmp(i, 255); + cc.jle(L_2); + + cc.ret(a); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = 32640; + + result.assignInt(resultRet); + expect.assignInt(expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_AllocMemcpy] +// ============================================================================ + +class X86Test_AllocMemcpy : public X86TestCase { +public: + X86Test_AllocMemcpy() : X86TestCase("AllocMemcpy") {} + + enum { kCount = 32 }; + + static void add(TestApp& app) { + app.add(new X86Test_AllocMemcpy()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp dst = cc.newIntPtr("dst"); + x86::Gp src = cc.newIntPtr("src"); + x86::Gp cnt = cc.newUIntPtr("cnt"); + + Label L_Loop = cc.newLabel(); // Create base labels we use + Label L_Exit = cc.newLabel(); // in our function. + + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.setArg(0, dst); + cc.setArg(1, src); + cc.setArg(2, cnt); + + cc.test(cnt, cnt); // Exit if the size is zero. + cc.jz(L_Exit); + + cc.bind(L_Loop); // Bind the loop label here. + + x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes). + cc.mov(tmp, x86::dword_ptr(src)); + cc.mov(x86::dword_ptr(dst), tmp); + + cc.add(src, 4); // Increment dst/src pointers. + cc.add(dst, 4); + + cc.dec(cnt); // Loop until cnt isn't zero. + cc.jnz(L_Loop); + + cc.bind(L_Exit); // Bind the exit label here. + cc.endFunc(); // End of function. + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(uint32_t*, const uint32_t*, size_t); + Func func = ptr_as_func(_func); + + uint32_t i; + + uint32_t dstBuffer[kCount]; + uint32_t srcBuffer[kCount]; + + for (i = 0; i < kCount; i++) { + dstBuffer[i] = 0; + srcBuffer[i] = i; + } + + func(dstBuffer, srcBuffer, kCount); + + result.assign("buf={"); + expect.assign("buf={"); + + for (i = 0; i < kCount; i++) { + if (i != 0) { + result.append(", "); + expect.append(", "); + } + + result.appendFormat("%u", unsigned(dstBuffer[i])); + expect.appendFormat("%u", unsigned(srcBuffer[i])); + } + + result.append("}"); + expect.append("}"); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocExtraBlock] +// ============================================================================ + +class X86Test_AllocExtraBlock : public X86TestCase { +public: + X86Test_AllocExtraBlock() : X86TestCase("AllocExtraBlock") {} + + static void add(TestApp& app) { + app.add(new X86Test_AllocExtraBlock()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp cond = cc.newInt32("cond"); + x86::Gp ret = cc.newInt32("ret"); + x86::Gp a = cc.newInt32("a"); + x86::Gp b = cc.newInt32("b"); + + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.setArg(0, cond); + cc.setArg(1, a); + cc.setArg(2, b); + + Label L_Ret = cc.newLabel(); + Label L_Extra = cc.newLabel(); + + cc.test(cond, cond); + cc.jnz(L_Extra); + + cc.mov(ret, a); + cc.add(ret, b); + + cc.bind(L_Ret); + cc.ret(ret); + + // Emit code sequence at the end of the function. + BaseNode* prevCursor = cc.setCursor(cc.func()->endNode()->prev()); + cc.bind(L_Extra); + cc.mov(ret, a); + cc.sub(ret, b); + cc.jmp(L_Ret); + cc.setCursor(prevCursor); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int, int); + Func func = ptr_as_func(_func); + + int ret1 = func(0, 4, 5); + int ret2 = func(1, 4, 5); + + int exp1 = 4 + 5; + int exp2 = 4 - 5; + + result.assignFormat("ret={%d, %d}", ret1, ret2); + expect.assignFormat("ret={%d, %d}", exp1, exp2); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_AllocAlphaBlend] +// ============================================================================ + +class X86Test_AllocAlphaBlend : public X86TestCase { +public: + X86Test_AllocAlphaBlend() : X86TestCase("AllocAlphaBlend") {} + + enum { kCount = 17 }; + + static void add(TestApp& app) { + app.add(new X86Test_AllocAlphaBlend()); + } + + static uint32_t blendSrcOver(uint32_t d, uint32_t s) { + uint32_t saInv = ~s >> 24; + + uint32_t d_20 = (d ) & 0x00FF00FF; + uint32_t d_31 = (d >> 8) & 0x00FF00FF; + + d_20 *= saInv; + d_31 *= saInv; + + d_20 = ((d_20 + ((d_20 >> 8) & 0x00FF00FFu) + 0x00800080u) & 0xFF00FF00u) >> 8; + d_31 = ((d_31 + ((d_31 >> 8) & 0x00FF00FFu) + 0x00800080u) & 0xFF00FF00u); + + return d_20 + d_31 + s; + } + + virtual void compile(x86::Compiler& cc) { + asmtest::generateAlphaBlend(cc); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(void*, const void*, size_t); + Func func = ptr_as_func(_func); + + static const uint32_t dstConstData[] = { 0x00000000, 0x10101010, 0x20100804, 0x30200003, 0x40204040, 0x5000004D, 0x60302E2C, 0x706F6E6D, 0x807F4F2F, 0x90349001, 0xA0010203, 0xB03204AB, 0xC023AFBD, 0xD0D0D0C0, 0xE0AABBCC, 0xFFFFFFFF, 0xF8F4F2F1 }; + static const uint32_t srcConstData[] = { 0xE0E0E0E0, 0xA0008080, 0x341F1E1A, 0xFEFEFEFE, 0x80302010, 0x49490A0B, 0x998F7798, 0x00000000, 0x01010101, 0xA0264733, 0xBAB0B1B9, 0xFF000000, 0xDAB0A0C1, 0xE0BACFDA, 0x99887766, 0xFFFFFF80, 0xEE0A5FEC }; + + uint32_t _dstBuffer[kCount + 3]; + uint32_t _srcBuffer[kCount + 3]; + + // Has to be aligned. + uint32_t* dstBuffer = (uint32_t*)Support::alignUp((intptr_t)_dstBuffer, 16); + uint32_t* srcBuffer = (uint32_t*)Support::alignUp((intptr_t)_srcBuffer, 16); + + memcpy(dstBuffer, dstConstData, sizeof(dstConstData)); + memcpy(srcBuffer, srcConstData, sizeof(srcConstData)); + + uint32_t i; + uint32_t expBuffer[kCount]; + + for (i = 0; i < kCount; i++) { + expBuffer[i] = blendSrcOver(dstBuffer[i], srcBuffer[i]); + } + + func(dstBuffer, srcBuffer, kCount); + + result.assign("buf={"); + expect.assign("buf={"); + + for (i = 0; i < kCount; i++) { + if (i != 0) { + result.append(", "); + expect.append(", "); + } + + result.appendFormat("%08X", unsigned(dstBuffer[i])); + expect.appendFormat("%08X", unsigned(expBuffer[i])); + } + + result.append("}"); + expect.append("}"); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_FuncCallBase1] +// ============================================================================ + +class X86Test_FuncCallBase1 : public X86TestCase { +public: + X86Test_FuncCallBase1() : X86TestCase("FuncCallBase1") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallBase1()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp v0 = cc.newInt32("v0"); + x86::Gp v1 = cc.newInt32("v1"); + x86::Gp v2 = cc.newInt32("v2"); + + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.setArg(0, v0); + cc.setArg(1, v1); + cc.setArg(2, v2); + + // Just do something. + cc.shl(v0, 1); + cc.shl(v1, 1); + cc.shl(v2, 1); + + // Call a function. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, v2); + invokeNode->setArg(1, v1); + invokeNode->setArg(2, v0); + invokeNode->setRet(0, v0); + + cc.ret(v0); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int, int); + Func func = ptr_as_func(_func); + + int resultRet = func(3, 2, 1); + int expectRet = 36; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } + + static int calledFunc(int a, int b, int c) { return (a + b) * c; } +}; + +// ============================================================================ +// [X86Test_FuncCallBase2] +// ============================================================================ + +class X86Test_FuncCallBase2 : public X86TestCase { +public: + X86Test_FuncCallBase2() : X86TestCase("FuncCallBase2") {} + + enum { kSize = 256 }; + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallBase2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + const int kTokenSize = 32; + + x86::Mem s1 = cc.newStack(kTokenSize, 32); + x86::Mem s2 = cc.newStack(kTokenSize, 32); + + x86::Gp p1 = cc.newIntPtr("p1"); + x86::Gp p2 = cc.newIntPtr("p2"); + + x86::Gp ret = cc.newInt32("ret"); + Label L_Exit = cc.newLabel(); + + static const char token[kTokenSize] = "-+:|abcdefghijklmnopqrstuvwxyz|"; + InvokeNode* invokeNode; + + cc.lea(p1, s1); + cc.lea(p2, s2); + + // Try to corrupt the stack if wrongly allocated. + cc.invoke(&invokeNode, imm((void*)memcpy), FuncSignatureT(CallConv::kIdCDecl)); + invokeNode->setArg(0, p1); + invokeNode->setArg(1, imm(token)); + invokeNode->setArg(2, imm(kTokenSize)); + invokeNode->setRet(0, p1); + + cc.invoke(&invokeNode, imm((void*)memcpy), FuncSignatureT(CallConv::kIdCDecl)); + invokeNode->setArg(0, p2); + invokeNode->setArg(1, imm(token)); + invokeNode->setArg(2, imm(kTokenSize)); + invokeNode->setRet(0, p2); + + cc.invoke(&invokeNode, imm((void*)memcmp), FuncSignatureT(CallConv::kIdCDecl)); + invokeNode->setArg(0, p1); + invokeNode->setArg(1, p2); + invokeNode->setArg(2, imm(kTokenSize)); + invokeNode->setRet(0, ret); + + // This should be 0 on success, however, if both `p1` and `p2` were + // allocated in the same address this check will still pass. + cc.cmp(ret, 0); + cc.jnz(L_Exit); + + // Checks whether `p1` and `p2` are different (must be). + cc.xor_(ret, ret); + cc.cmp(p1, p2); + cc.setz(ret.r8()); + + cc.bind(L_Exit); + cc.ret(ret); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = 0; // Must be zero, stack addresses must be different. + + result.assignInt(resultRet); + expect.assignInt(expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallStd] +// ============================================================================ + +class X86Test_FuncCallStd : public X86TestCase { +public: + X86Test_FuncCallStd() : X86TestCase("FuncCallStd") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallStd()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp x = cc.newInt32("x"); + x86::Gp y = cc.newInt32("y"); + x86::Gp z = cc.newInt32("z"); + + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.setArg(0, x); + cc.setArg(1, y); + cc.setArg(2, z); + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)calledFunc), + FuncSignatureT(CallConv::kIdStdCall)); + invokeNode->setArg(0, x); + invokeNode->setArg(1, y); + invokeNode->setArg(2, z); + invokeNode->setRet(0, x); + + cc.ret(x); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int, int); + Func func = ptr_as_func(_func); + + int resultRet = func(1, 42, 3); + int expectRet = calledFunc(1, 42, 3); + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } + + // STDCALL function that is called inside the generated one. + static int ASMJIT_STDCALL calledFunc(int a, int b, int c) noexcept { + return (a + b) * c; + } +}; + +// ============================================================================ +// [X86Test_FuncCallFast] +// ============================================================================ + +class X86Test_FuncCallFast : public X86TestCase { +public: + X86Test_FuncCallFast() : X86TestCase("FuncCallFast") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallFast()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp var = cc.newInt32("var"); + + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.setArg(0, var); + + InvokeNode* invokeNode; + + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdFastCall)); + invokeNode->setArg(0, var); + invokeNode->setRet(0, var); + + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdFastCall)); + invokeNode->setArg(0, var); + invokeNode->setRet(0, var); + + cc.ret(var); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int); + Func func = ptr_as_func(_func); + + int resultRet = func(9); + int expectRet = (9 * 9) * (9 * 9); + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } + + // FASTCALL function that is called inside the generated one. + static int ASMJIT_FASTCALL calledFunc(int a) noexcept { + return a * a; + } +}; + +// ============================================================================ +// [X86Test_FuncCallSIMD] +// ============================================================================ + +class X86Test_FuncCallSIMD : public X86TestCase { +public: + bool _useVectorCall; + + X86Test_FuncCallSIMD(bool useVectorCall) + : X86TestCase(), + _useVectorCall(useVectorCall) { + _name.assignFormat("FuncCallSIMD {%s}", _useVectorCall ? "__vectorcall" : "__cdecl"); + } + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallSIMD(false)); +#ifdef _MSC_VER + app.add(new X86Test_FuncCallSIMD(true)); +#endif + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp resultPtr = cc.newIntPtr("resultPtr"); + x86::Gp aPtr = cc.newIntPtr("aPtr"); + x86::Gp bPtr = cc.newIntPtr("bPtr"); + x86::Gp pFn = cc.newIntPtr("pFn"); + + x86::Xmm aXmm = cc.newXmm("aXmm"); + x86::Xmm bXmm = cc.newXmm("bXmm"); + + cc.setArg(0, resultPtr); + cc.setArg(1, aPtr); + cc.setArg(2, bPtr); + + uint32_t ccId = CallConv::kIdCDecl; + Imm pFnImm = imm((void*)calledFunc_cdecl); + +#ifdef _MSC_VER + if (_useVectorCall) { + ccId = CallConv::kIdVectorCall; + pFnImm = imm((void*)calledFunc_vcall); + } +#endif + + cc.mov(pFn, pFnImm); + cc.movdqu(aXmm, x86::ptr(aPtr)); + cc.movdqu(bXmm, x86::ptr(bPtr)); + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, pFn, FuncSignatureT(ccId)); + + invokeNode->setArg(0, aXmm); + invokeNode->setArg(1, bXmm); + invokeNode->setRet(0, aXmm); + + cc.movdqu(x86::ptr(resultPtr), aXmm); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(void*, const void*, const void*); + Func func = ptr_as_func(_func); + + uint8_t aData[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + uint8_t bData[16] = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + + uint8_t rData[16]; + uint8_t eData[16] = { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 }; + + func(rData, aData, bData); + + result.appendHex(rData, 16); + expect.appendHex(eData, 16); + + return result == expect; + } + + static __m128i calledFunc_cdecl(__m128i a, __m128i b) { + return _mm_add_epi8(a, b); + } + +#ifdef _MSC_VER + static __m128i __vectorcall calledFunc_vcall(__m128i a, __m128i b) { + return _mm_add_epi8(a, b); + } +#endif +}; + +// ============================================================================ +// [X86Test_FuncCallLight] +// ============================================================================ + +class X86Test_FuncCallLight : public X86TestCase { +public: + X86Test_FuncCallLight() : X86TestCase("FuncCallLight") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallLight()); + } + + virtual void compile(x86::Compiler& cc) { + FuncSignatureT funcSig(CallConv::kIdCDecl); + FuncSignatureT fastSig(CallConv::kIdLightCall2); + + FuncNode* func = cc.newFunc(funcSig); + FuncNode* fast = cc.newFunc(fastSig); + + { + x86::Gp aPtr = cc.newIntPtr("aPtr"); + x86::Gp bPtr = cc.newIntPtr("bPtr"); + x86::Gp cPtr = cc.newIntPtr("cPtr"); + x86::Gp dPtr = cc.newIntPtr("dPtr"); + x86::Gp pOut = cc.newIntPtr("pOut"); + + x86::Xmm aXmm = cc.newXmm("aXmm"); + x86::Xmm bXmm = cc.newXmm("bXmm"); + x86::Xmm cXmm = cc.newXmm("cXmm"); + x86::Xmm dXmm = cc.newXmm("dXmm"); + + cc.addFunc(func); + + cc.setArg(0, aPtr); + cc.setArg(1, bPtr); + cc.setArg(2, cPtr); + cc.setArg(3, dPtr); + cc.setArg(4, pOut); + + cc.movups(aXmm, x86::ptr(aPtr)); + cc.movups(bXmm, x86::ptr(bPtr)); + cc.movups(cXmm, x86::ptr(cPtr)); + cc.movups(dXmm, x86::ptr(dPtr)); + + x86::Xmm xXmm = cc.newXmm("xXmm"); + x86::Xmm yXmm = cc.newXmm("yXmm"); + + InvokeNode* invokeNode; + + cc.invoke(&invokeNode, fast->label(), fastSig); + invokeNode->setArg(0, aXmm); + invokeNode->setArg(1, bXmm); + invokeNode->setRet(0, xXmm); + + cc.invoke(&invokeNode, fast->label(), fastSig); + invokeNode->setArg(0, cXmm); + invokeNode->setArg(1, dXmm); + invokeNode->setRet(0, yXmm); + + cc.pmullw(xXmm, yXmm); + cc.movups(x86::ptr(pOut), xXmm); + + cc.endFunc(); + } + + { + x86::Xmm aXmm = cc.newXmm("aXmm"); + x86::Xmm bXmm = cc.newXmm("bXmm"); + + cc.addFunc(fast); + cc.setArg(0, aXmm); + cc.setArg(1, bXmm); + cc.paddw(aXmm, bXmm); + cc.ret(aXmm); + cc.endFunc(); + } + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(const void*, const void*, const void*, const void*, void*); + + Func func = ptr_as_func(_func); + + int16_t a[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + int16_t b[8] = { 7, 6, 5, 4, 3, 2, 1, 0 }; + int16_t c[8] = { 1, 3, 9, 7, 5, 4, 2, 1 }; + int16_t d[8] = { 2, 0,-6,-4,-2,-1, 1, 2 }; + + int16_t o[8]; + int oExp = 7 * 3; + + func(a, b, c, d, o); + + result.assignFormat("ret={%02X %02X %02X %02X %02X %02X %02X %02X}", o[0], o[1], o[2], o[3], o[4], o[5], o[6], o[7]); + expect.assignFormat("ret={%02X %02X %02X %02X %02X %02X %02X %02X}", oExp, oExp, oExp, oExp, oExp, oExp, oExp, oExp); + + return result == expect; + } +}; + +// ============================================================================ +// [X86Test_FuncCallManyArgs] +// ============================================================================ + +class X86Test_FuncCallManyArgs : public X86TestCase { +public: + X86Test_FuncCallManyArgs() : X86TestCase("FuncCallManyArgs") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallManyArgs()); + } + + static int calledFunc(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j) { + return (a * b * c * d * e) + (f * g * h * i * j); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + // Prepare. + x86::Gp va = cc.newInt32("va"); + x86::Gp vb = cc.newInt32("vb"); + x86::Gp vc = cc.newInt32("vc"); + x86::Gp vd = cc.newInt32("vd"); + x86::Gp ve = cc.newInt32("ve"); + x86::Gp vf = cc.newInt32("vf"); + x86::Gp vg = cc.newInt32("vg"); + x86::Gp vh = cc.newInt32("vh"); + x86::Gp vi = cc.newInt32("vi"); + x86::Gp vj = cc.newInt32("vj"); + + cc.mov(va, 0x03); + cc.mov(vb, 0x12); + cc.mov(vc, 0xA0); + cc.mov(vd, 0x0B); + cc.mov(ve, 0x2F); + cc.mov(vf, 0x02); + cc.mov(vg, 0x0C); + cc.mov(vh, 0x12); + cc.mov(vi, 0x18); + cc.mov(vj, 0x1E); + + // Function call. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)calledFunc), + FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, va); + invokeNode->setArg(1, vb); + invokeNode->setArg(2, vc); + invokeNode->setArg(3, vd); + invokeNode->setArg(4, ve); + invokeNode->setArg(5, vf); + invokeNode->setArg(6, vg); + invokeNode->setArg(7, vh); + invokeNode->setArg(8, vi); + invokeNode->setArg(9, vj); + invokeNode->setRet(0, va); + + cc.ret(va); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = calledFunc(0x03, 0x12, 0xA0, 0x0B, 0x2F, 0x02, 0x0C, 0x12, 0x18, 0x1E); + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallDuplicateArgs] +// ============================================================================ + +class X86Test_FuncCallDuplicateArgs : public X86TestCase { +public: + X86Test_FuncCallDuplicateArgs() : X86TestCase("FuncCallDuplicateArgs") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallDuplicateArgs()); + } + + static int calledFunc(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j) { + return (a * b * c * d * e) + (f * g * h * i * j); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + // Prepare. + x86::Gp a = cc.newInt32("a"); + cc.mov(a, 3); + + // Call function. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)calledFunc), + FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, a); + invokeNode->setArg(1, a); + invokeNode->setArg(2, a); + invokeNode->setArg(3, a); + invokeNode->setArg(4, a); + invokeNode->setArg(5, a); + invokeNode->setArg(6, a); + invokeNode->setArg(7, a); + invokeNode->setArg(8, a); + invokeNode->setArg(9, a); + invokeNode->setRet(0, a); + + cc.ret(a); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = calledFunc(3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallImmArgs] +// ============================================================================ + +class X86Test_FuncCallImmArgs : public X86TestCase { +public: + X86Test_FuncCallImmArgs() : X86TestCase("FuncCallImmArgs") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallImmArgs()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + // Prepare. + x86::Gp rv = cc.newInt32("rv"); + + // Call function. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)X86Test_FuncCallManyArgs::calledFunc), + FuncSignatureT(CallConv::kIdHost)); + + invokeNode->setArg(0, imm(0x03)); + invokeNode->setArg(1, imm(0x12)); + invokeNode->setArg(2, imm(0xA0)); + invokeNode->setArg(3, imm(0x0B)); + invokeNode->setArg(4, imm(0x2F)); + invokeNode->setArg(5, imm(0x02)); + invokeNode->setArg(6, imm(0x0C)); + invokeNode->setArg(7, imm(0x12)); + invokeNode->setArg(8, imm(0x18)); + invokeNode->setArg(9, imm(0x1E)); + invokeNode->setRet(0, rv); + + cc.ret(rv); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = X86Test_FuncCallManyArgs::calledFunc(0x03, 0x12, 0xA0, 0x0B, 0x2F, 0x02, 0x0C, 0x12, 0x18, 0x1E); + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallPtrArgs] +// ============================================================================ + +class X86Test_FuncCallPtrArgs : public X86TestCase { +public: + X86Test_FuncCallPtrArgs() : X86TestCase("FuncCallPtrArgs") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallPtrArgs()); + } + + static int calledFunc(void* a, void* b, void* c, void* d, void* e, void* f, void* g, void* h, void* i, void* j) { + return int((intptr_t)a) + + int((intptr_t)b) + + int((intptr_t)c) + + int((intptr_t)d) + + int((intptr_t)e) + + int((intptr_t)f) + + int((intptr_t)g) + + int((intptr_t)h) + + int((intptr_t)i) + + int((intptr_t)j) ; + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + // Prepare. + x86::Gp rv = cc.newInt32("rv"); + + // Call function. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)calledFunc), + FuncSignatureT(CallConv::kIdHost)); + + invokeNode->setArg(0, imm(0x01)); + invokeNode->setArg(1, imm(0x02)); + invokeNode->setArg(2, imm(0x03)); + invokeNode->setArg(3, imm(0x04)); + invokeNode->setArg(4, imm(0x05)); + invokeNode->setArg(5, imm(0x06)); + invokeNode->setArg(6, imm(0x07)); + invokeNode->setArg(7, imm(0x08)); + invokeNode->setArg(8, imm(0x09)); + invokeNode->setArg(9, imm(0x0A)); + invokeNode->setRet(0, rv); + + cc.ret(rv); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = 55; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallRefArgs] +// ============================================================================ + +class X86Test_FuncCallRefArgs : public X86TestCase { +public: + X86Test_FuncCallRefArgs() : X86TestCase("FuncCallRefArgs") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallRefArgs()); + } + + static int calledFunc(int& a, int& b, int& c, int& d) { + a += a; + b += b; + c += c; + d += d; + return a + b + c + d; + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + // Prepare. + x86::Gp arg1 = cc.newInt32(); + x86::Gp arg2 = cc.newInt32(); + x86::Gp arg3 = cc.newInt32(); + x86::Gp arg4 = cc.newInt32(); + x86::Gp rv = cc.newInt32("rv"); + + cc.setArg(0, arg1); + cc.setArg(1, arg2); + cc.setArg(2, arg3); + cc.setArg(3, arg4); + + // Call function. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)calledFunc), + FuncSignatureT(CallConv::kIdHost)); + + invokeNode->setArg(0, arg1); + invokeNode->setArg(1, arg2); + invokeNode->setArg(2, arg3); + invokeNode->setArg(3, arg4); + invokeNode->setRet(0, rv); + + cc.ret(rv); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int&, int&, int&, int&); + Func func = ptr_as_func(_func); + + int inputs[4] = { 1, 2, 3, 4 }; + int outputs[4] = { 2, 4, 6, 8 }; + int resultRet = func(inputs[0], inputs[1], inputs[2], inputs[3]); + int expectRet = 20; + + result.assignFormat("ret={%08X %08X %08X %08X %08X}", resultRet, inputs[0], inputs[1], inputs[2], inputs[3]); + expect.assignFormat("ret={%08X %08X %08X %08X %08X}", expectRet, outputs[0], outputs[1], outputs[2], outputs[3]); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallFloatAsXmmRet] +// ============================================================================ + +class X86Test_FuncCallFloatAsXmmRet : public X86TestCase { +public: + X86Test_FuncCallFloatAsXmmRet() : X86TestCase("FuncCallFloatAsXmmRet") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallFloatAsXmmRet()); + } + + static float calledFunc(float a, float b) { + return a * b; + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm a = cc.newXmmSs("a"); + x86::Xmm b = cc.newXmmSs("b"); + x86::Xmm ret = cc.newXmmSs("ret"); + + cc.setArg(0, a); + cc.setArg(1, b); + + // Call function. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, a); + invokeNode->setArg(1, b); + invokeNode->setRet(0, ret); + + cc.ret(ret); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef float (*Func)(float, float); + Func func = ptr_as_func(_func); + + float resultRet = func(15.5f, 2.0f); + float expectRet = calledFunc(15.5f, 2.0f); + + result.assignFormat("ret=%g", resultRet); + expect.assignFormat("ret=%g", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallDoubleAsXmmRet] +// ============================================================================ + +class X86Test_FuncCallDoubleAsXmmRet : public X86TestCase { +public: + X86Test_FuncCallDoubleAsXmmRet() : X86TestCase("FuncCallDoubleAsXmmRet") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallDoubleAsXmmRet()); + } + + static double calledFunc(double a, double b) { + return a * b; + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm a = cc.newXmmSd("a"); + x86::Xmm b = cc.newXmmSd("b"); + x86::Xmm ret = cc.newXmmSd("ret"); + + cc.setArg(0, a); + cc.setArg(1, b); + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, a); + invokeNode->setArg(1, b); + invokeNode->setRet(0, ret); + + cc.ret(ret); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef double (*Func)(double, double); + Func func = ptr_as_func(_func); + + double resultRet = func(15.5, 2.0); + double expectRet = calledFunc(15.5, 2.0); + + result.assignFormat("ret=%g", resultRet); + expect.assignFormat("ret=%g", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallConditional] +// ============================================================================ + +class X86Test_FuncCallConditional : public X86TestCase { +public: + X86Test_FuncCallConditional() : X86TestCase("FuncCallConditional") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallConditional()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp x = cc.newInt32("x"); + x86::Gp y = cc.newInt32("y"); + x86::Gp op = cc.newInt32("op"); + + InvokeNode* invokeNode; + x86::Gp result; + + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.setArg(0, x); + cc.setArg(1, y); + cc.setArg(2, op); + + Label opAdd = cc.newLabel(); + Label opMul = cc.newLabel(); + + cc.cmp(op, 0); + cc.jz(opAdd); + cc.cmp(op, 1); + cc.jz(opMul); + + result = cc.newInt32("result_0"); + cc.mov(result, 0); + cc.ret(result); + + cc.bind(opAdd); + result = cc.newInt32("result_1"); + + cc.invoke(&invokeNode, (uint64_t)calledFuncAdd, FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, x); + invokeNode->setArg(1, y); + invokeNode->setRet(0, result); + cc.ret(result); + + cc.bind(opMul); + result = cc.newInt32("result_2"); + + cc.invoke(&invokeNode, (uint64_t)calledFuncMul, FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, x); + invokeNode->setArg(1, y); + invokeNode->setRet(0, result); + + cc.ret(result); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int, int); + Func func = ptr_as_func(_func); + + int arg1 = 4; + int arg2 = 8; + + int resultAdd = func(arg1, arg2, 0); + int expectAdd = calledFuncAdd(arg1, arg2); + + int resultMul = func(arg1, arg2, 1); + int expectMul = calledFuncMul(arg1, arg2); + + result.assignFormat("ret={add=%d, mul=%d}", resultAdd, resultMul); + expect.assignFormat("ret={add=%d, mul=%d}", expectAdd, expectMul); + + return (resultAdd == expectAdd) && (resultMul == expectMul); + } + + static int calledFuncAdd(int x, int y) { return x + y; } + static int calledFuncMul(int x, int y) { return x * y; } +}; + +// ============================================================================ +// [X86Test_FuncCallMultiple] +// ============================================================================ + +class X86Test_FuncCallMultiple : public X86TestCase { +public: + X86Test_FuncCallMultiple() : X86TestCase("FuncCallMultiple") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallMultiple()); + } + + static int ASMJIT_FASTCALL calledFunc(int* pInt, int index) { + return pInt[index]; + } + + virtual void compile(x86::Compiler& cc) { + unsigned int i; + + x86::Gp buf = cc.newIntPtr("buf"); + x86::Gp acc0 = cc.newInt32("acc0"); + x86::Gp acc1 = cc.newInt32("acc1"); + + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.setArg(0, buf); + + cc.mov(acc0, 0); + cc.mov(acc1, 0); + + for (i = 0; i < 4; i++) { + x86::Gp ret = cc.newInt32("ret"); + x86::Gp ptr = cc.newIntPtr("ptr"); + x86::Gp idx = cc.newInt32("idx"); + InvokeNode* invokeNode; + + cc.mov(ptr, buf); + cc.mov(idx, int(i)); + + cc.invoke(&invokeNode, (uint64_t)calledFunc, FuncSignatureT(CallConv::kIdFastCall)); + invokeNode->setArg(0, ptr); + invokeNode->setArg(1, idx); + invokeNode->setRet(0, ret); + + cc.add(acc0, ret); + + cc.mov(ptr, buf); + cc.mov(idx, int(i)); + + cc.invoke(&invokeNode, (uint64_t)calledFunc, FuncSignatureT(CallConv::kIdFastCall)); + invokeNode->setArg(0, ptr); + invokeNode->setArg(1, idx); + invokeNode->setRet(0, ret); + + cc.sub(acc1, ret); + } + + cc.add(acc0, acc1); + cc.ret(acc0); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int*); + Func func = ptr_as_func(_func); + + int buffer[4] = { 127, 87, 23, 17 }; + + int resultRet = func(buffer); + int expectRet = 0; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallRecursive] +// ============================================================================ + +class X86Test_FuncCallRecursive : public X86TestCase { +public: + X86Test_FuncCallRecursive() : X86TestCase("FuncCallRecursive") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallRecursive()); + } + + virtual void compile(x86::Compiler& cc) { + x86::Gp val = cc.newInt32("val"); + Label skip = cc.newLabel(); + + FuncNode* func = cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + cc.setArg(0, val); + + cc.cmp(val, 1); + cc.jle(skip); + + x86::Gp tmp = cc.newInt32("tmp"); + cc.mov(tmp, val); + cc.dec(tmp); + + InvokeNode* invokeNode; + + cc.invoke(&invokeNode, func->label(), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, tmp); + invokeNode->setRet(0, tmp); + cc.mul(cc.newInt32(), val, tmp); + + cc.bind(skip); + cc.ret(val); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int); + Func func = ptr_as_func(_func); + + int resultRet = func(5); + int expectRet = 1 * 2 * 3 * 4 * 5; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallVarArg1] +// ============================================================================ + +class X86Test_FuncCallVarArg1 : public X86TestCase { +public: + X86Test_FuncCallVarArg1() : X86TestCase("FuncCallVarArg1") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallVarArg1()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp a0 = cc.newInt32("a0"); + x86::Gp a1 = cc.newInt32("a1"); + x86::Gp a2 = cc.newInt32("a2"); + x86::Gp a3 = cc.newInt32("a3"); + + cc.setArg(0, a0); + cc.setArg(1, a1); + cc.setArg(2, a2); + cc.setArg(3, a3); + + // We call `int func(size_t, ...)` + // - The `vaIndex` must be 1 (first argument after size_t). + // - The full signature of varargs (int, int, int, int) must follow. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)calledFunc), + FuncSignatureT(CallConv::kIdHost, 1)); + invokeNode->setArg(0, imm(4)); + invokeNode->setArg(1, a0); + invokeNode->setArg(2, a1); + invokeNode->setArg(3, a2); + invokeNode->setArg(4, a3); + invokeNode->setRet(0, a0); + + cc.ret(a0); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int, int, int); + Func func = ptr_as_func(_func); + + int resultRet = func(1, 2, 3, 4); + int expectRet = 1 + 2 + 3 + 4; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } + + static int calledFunc(size_t n, ...) { + int sum = 0; + va_list ap; + va_start(ap, n); + for (size_t i = 0; i < n; i++) { + int arg = va_arg(ap, int); + sum += arg; + } + va_end(ap); + return sum; + } +}; + +// ============================================================================ +// [X86Test_FuncCallVarArg2] +// ============================================================================ + +class X86Test_FuncCallVarArg2 : public X86TestCase { +public: + X86Test_FuncCallVarArg2() : X86TestCase("FuncCallVarArg2") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallVarArg2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm a0 = cc.newXmmSd("a0"); + x86::Xmm a1 = cc.newXmmSd("a1"); + x86::Xmm a2 = cc.newXmmSd("a2"); + x86::Xmm a3 = cc.newXmmSd("a3"); + + cc.setArg(0, a0); + cc.setArg(1, a1); + cc.setArg(2, a2); + cc.setArg(3, a3); + + // We call `double func(size_t, ...)` + // - The `vaIndex` must be 1 (first argument after size_t). + // - The full signature of varargs (double, double, double, double) must follow. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)calledFunc), + FuncSignatureT(CallConv::kIdHost, 1)); + invokeNode->setArg(0, imm(4)); + invokeNode->setArg(1, a0); + invokeNode->setArg(2, a1); + invokeNode->setArg(3, a2); + invokeNode->setArg(4, a3); + invokeNode->setRet(0, a0); + + cc.ret(a0); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef double (*Func)(double, double, double, double); + Func func = ptr_as_func(_func); + + double resultRet = func(1.0, 2.0, 3.0, 4.0); + double expectRet = 1.0 + 2.0 + 3.0 + 4.0; + + result.assignFormat("ret=%f", resultRet); + expect.assignFormat("ret=%f", expectRet); + + return resultRet == expectRet; + } + + static double calledFunc(size_t n, ...) { + double sum = 0; + va_list ap; + va_start(ap, n); + for (size_t i = 0; i < n; i++) { + double arg = va_arg(ap, double); + sum += arg; + } + va_end(ap); + return sum; + } +}; + +// ============================================================================ +// [X86Test_FuncCallInt64Arg] +// ============================================================================ + +class X86Test_FuncCallInt64Arg : public X86TestCase { +public: + X86Test_FuncCallInt64Arg() : X86TestCase("FuncCallInt64Arg") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallInt64Arg()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + if (cc.is64Bit()) { + x86::Gp reg = cc.newUInt64(); + cc.setArg(0, reg); + cc.add(reg, 1); + cc.ret(reg); + } + else { + x86::Gp hi = cc.newUInt32("hi"); + x86::Gp lo = cc.newUInt32("lo"); + + cc.setArg(0, 0, lo); + cc.setArg(0, 1, hi); + + cc.add(lo, 1); + cc.adc(hi, 0); + cc.ret(lo, hi); + } + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint64_t (*Func)(uint64_t); + Func func = ptr_as_func(_func); + + uint64_t resultRet = func(uint64_t(0xFFFFFFFF)); + uint64_t expectRet = 0x100000000; + + result.assignFormat("ret=%llu", (unsigned long long)resultRet); + expect.assignFormat("ret=%llu", (unsigned long long)expectRet); + + return resultRet == expectRet; + } + + static double calledFunc(size_t n, ...) { + double sum = 0; + va_list ap; + va_start(ap, n); + for (size_t i = 0; i < n; i++) { + double arg = va_arg(ap, double); + sum += arg; + } + va_end(ap); + return sum; + } +}; + +// ============================================================================ +// [X86Test_FuncCallMisc1] +// ============================================================================ + +class X86Test_FuncCallMisc1 : public X86TestCase { +public: + X86Test_FuncCallMisc1() : X86TestCase("FuncCallMisc1") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallMisc1()); + } + + static void dummy(int, int) {} + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp a = cc.newInt32("a"); + x86::Gp b = cc.newInt32("b"); + x86::Gp r = cc.newInt32("r"); + + cc.setArg(0, a); + cc.setArg(1, b); + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)dummy), + FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, a); + invokeNode->setArg(1, b); + + cc.lea(r, x86::ptr(a, b)); + cc.ret(r); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + Func func = ptr_as_func(_func); + + int resultRet = func(44, 199); + int expectRet = 243; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_FuncCallMisc2] +// ============================================================================ + +class X86Test_FuncCallMisc2 : public X86TestCase { +public: + X86Test_FuncCallMisc2() : X86TestCase("FuncCallMisc2") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallMisc2()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp p = cc.newIntPtr("p"); + x86::Xmm arg = cc.newXmmSd("arg"); + x86::Xmm ret = cc.newXmmSd("ret"); + + cc.setArg(0, p); + cc.movsd(arg, x86::ptr(p)); + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)op), + FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, arg); + invokeNode->setRet(0, ret); + + cc.ret(ret); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef double (*Func)(const double*); + Func func = ptr_as_func(_func); + + double arg = 2; + + double resultRet = func(&arg); + double expectRet = op(arg); + + result.assignFormat("ret=%g", resultRet); + expect.assignFormat("ret=%g", expectRet); + + return resultRet == expectRet; + } + + static double op(double a) { return a * a; } +}; + +// ============================================================================ +// [X86Test_FuncCallMisc3] +// ============================================================================ + +class X86Test_FuncCallMisc3 : public X86TestCase { +public: + X86Test_FuncCallMisc3() : X86TestCase("FuncCallMisc3") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallMisc3()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp p = cc.newIntPtr("p"); + x86::Xmm arg = cc.newXmmSd("arg"); + x86::Xmm ret = cc.newXmmSd("ret"); + + cc.setArg(0, p); + cc.movsd(arg, x86::ptr(p)); + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, + imm((void*)op), + FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, arg); + invokeNode->setRet(0, ret); + + cc.xorps(arg, arg); + cc.subsd(arg, ret); + + cc.ret(arg); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef double (*Func)(const double*); + Func func = ptr_as_func(_func); + + double arg = 2; + + double resultRet = func(&arg); + double expectRet = -op(arg); + + result.assignFormat("ret=%g", resultRet); + expect.assignFormat("ret=%g", expectRet); + + return resultRet == expectRet; + } + + static double op(double a) { return a * a; } +}; + +// ============================================================================ +// [X86Test_FuncCallMisc4] +// ============================================================================ + +class X86Test_FuncCallMisc4 : public X86TestCase { +public: + X86Test_FuncCallMisc4() : X86TestCase("FuncCallMisc4") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallMisc4()); + } + + virtual void compile(x86::Compiler& cc) { + InvokeNode* invokeNode; + + FuncSignatureBuilder funcSignature; + funcSignature.setCallConv(CallConv::kIdHost); + funcSignature.setRet(Type::kIdF64); + cc.addFunc(funcSignature); + + FuncSignatureBuilder invokeSignature; + invokeSignature.setCallConv(CallConv::kIdHost); + invokeSignature.setRet(Type::kIdF64); + + cc.invoke(&invokeNode, imm((void*)calledFunc), invokeSignature); + x86::Xmm ret = cc.newXmmSd("ret"); + invokeNode->setRet(0, ret); + cc.ret(ret); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef double (*Func)(void); + Func func = ptr_as_func(_func); + + double resultRet = func(); + double expectRet = 3.14; + + result.assignFormat("ret=%g", resultRet); + expect.assignFormat("ret=%g", expectRet); + + return resultRet == expectRet; + } + + static double calledFunc() { return 3.14; } +}; + +// ============================================================================ +// [X86Test_FuncCallMisc5] +// ============================================================================ + +// The register allocator should clobber the register used by the `call` itself. +class X86Test_FuncCallMisc5 : public X86TestCase { +public: + X86Test_FuncCallMisc5() : X86TestCase("FuncCallMisc5") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallMisc5()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp pFn = cc.newIntPtr("pFn"); + x86::Gp vars[16]; + + uint32_t i, regCount = cc.arch() == Environment::kArchX86 ? 8 : 16; + ASMJIT_ASSERT(regCount <= ASMJIT_ARRAY_SIZE(vars)); + + cc.mov(pFn, imm((void*)calledFunc)); + + for (i = 0; i < regCount; i++) { + if (i == x86::Gp::kIdBp || i == x86::Gp::kIdSp) + continue; + + vars[i] = cc.newInt32("%%%u", unsigned(i)); + cc.mov(vars[i], 1); + } + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, pFn, FuncSignatureT(CallConv::kIdHost)); + + for (i = 1; i < regCount; i++) + if (vars[i].isValid()) + cc.add(vars[0], vars[i]); + cc.ret(vars[0]); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = sizeof(void*) == 4 ? 6 : 14; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } + + static void calledFunc() {} +}; + +// ============================================================================ +// [X86Test_FuncCallMisc6] +// ============================================================================ + +class X86Test_FuncCallMisc6 : public X86TestCase { +public: + X86Test_FuncCallMisc6() : X86TestCase("FuncCallMisc6") {} + + static void add(TestApp& app) { + app.add(new X86Test_FuncCallMisc6()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + constexpr uint32_t kCount = 16; + + x86::Gp v[kCount]; + x86::Gp argVal = cc.newUInt32("argVal"); + x86::Gp retVal = cc.newUInt32("retVal"); + uint32_t i; + + cc.setArg(0, argVal); + cc.add(argVal, 1); + + for (i = 0; i < kCount; i++) + v[i] = cc.newUInt32("v%u", i); + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, argVal); + invokeNode->setRet(0, retVal); + + for (i = 0; i < kCount; i++) + cc.mov(v[i], i + 1); + + for (i = 0; i < kCount; i++) + cc.add(argVal, v[i]); + + cc.add(retVal, argVal); + cc.ret(retVal); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint32_t (*Func)(uint32_t x); + Func func = ptr_as_func(_func); + + uint32_t resultRet = func(111); + uint32_t expectRet = 111 + 112 + 2 + (1 + 16) * 8; + + result.assignFormat("ret=%u", resultRet); + expect.assignFormat("ret=%u", expectRet); + + return resultRet == expectRet; + } + + static uint32_t calledFunc(uint32_t x) { return x + 1; } +}; + +// ============================================================================ +// [X86Test_MiscLocalConstPool] +// ============================================================================ + +class X86Test_MiscLocalConstPool : public X86TestCase { +public: + X86Test_MiscLocalConstPool() : X86TestCase("MiscLocalConstPool") {} + + static void add(TestApp& app) { + app.add(new X86Test_MiscLocalConstPool()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp v0 = cc.newInt32("v0"); + x86::Gp v1 = cc.newInt32("v1"); + + x86::Mem c0 = cc.newInt32Const(ConstPool::kScopeLocal, 200); + x86::Mem c1 = cc.newInt32Const(ConstPool::kScopeLocal, 33); + + cc.mov(v0, c0); + cc.mov(v1, c1); + cc.add(v0, v1); + + cc.ret(v0); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = 233; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_MiscGlobalConstPool] +// ============================================================================ + +class X86Test_MiscGlobalConstPool : public X86TestCase { +public: + X86Test_MiscGlobalConstPool() : X86TestCase("MiscGlobalConstPool") {} + + static void add(TestApp& app) { + app.add(new X86Test_MiscGlobalConstPool()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp v0 = cc.newInt32("v0"); + x86::Gp v1 = cc.newInt32("v1"); + + x86::Mem c0 = cc.newInt32Const(ConstPool::kScopeGlobal, 200); + x86::Mem c1 = cc.newInt32Const(ConstPool::kScopeGlobal, 33); + + cc.mov(v0, c0); + cc.mov(v1, c1); + cc.add(v0, v1); + + cc.ret(v0); + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(void); + Func func = ptr_as_func(_func); + + int resultRet = func(); + int expectRet = 233; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return resultRet == expectRet; + } +}; + +// ============================================================================ +// [X86Test_MiscMultiRet] +// ============================================================================ + +struct X86Test_MiscMultiRet : public X86TestCase { + X86Test_MiscMultiRet() : X86TestCase("MiscMultiRet") {} + + static void add(TestApp& app) { + app.add(new X86Test_MiscMultiRet()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp op = cc.newInt32("op"); + x86::Gp a = cc.newInt32("a"); + x86::Gp b = cc.newInt32("b"); + + Label L_Zero = cc.newLabel(); + Label L_Add = cc.newLabel(); + Label L_Sub = cc.newLabel(); + Label L_Mul = cc.newLabel(); + Label L_Div = cc.newLabel(); + + cc.setArg(0, op); + cc.setArg(1, a); + cc.setArg(2, b); + + cc.cmp(op, 0); + cc.jz(L_Add); + + cc.cmp(op, 1); + cc.jz(L_Sub); + + cc.cmp(op, 2); + cc.jz(L_Mul); + + cc.cmp(op, 3); + cc.jz(L_Div); + + cc.bind(L_Zero); + cc.xor_(a, a); + cc.ret(a); + + cc.bind(L_Add); + cc.add(a, b); + cc.ret(a); + + cc.bind(L_Sub); + cc.sub(a, b); + cc.ret(a); + + cc.bind(L_Mul); + cc.imul(a, b); + cc.ret(a); + + cc.bind(L_Div); + cc.cmp(b, 0); + cc.jz(L_Zero); + + x86::Gp zero = cc.newInt32("zero"); + cc.xor_(zero, zero); + cc.idiv(zero, a, b); + cc.ret(a); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int, int); + + Func func = ptr_as_func(_func); + + int a = 44; + int b = 3; + + int r0 = func(0, a, b); + int r1 = func(1, a, b); + int r2 = func(2, a, b); + int r3 = func(3, a, b); + int e0 = a + b; + int e1 = a - b; + int e2 = a * b; + int e3 = a / b; + + result.assignFormat("ret={%d %d %d %d}", r0, r1, r2, r3); + expect.assignFormat("ret={%d %d %d %d}", e0, e1, e2, e3); + + return result.eq(expect); + } +}; + +// ============================================================================ +// [X86Test_MiscMultiFunc] +// ============================================================================ + +class X86Test_MiscMultiFunc : public X86TestCase { +public: + X86Test_MiscMultiFunc() : X86TestCase("MiscMultiFunc") {} + + static void add(TestApp& app) { + app.add(new X86Test_MiscMultiFunc()); + } + + virtual void compile(x86::Compiler& cc) { + FuncNode* f1 = cc.newFunc(FuncSignatureT(CallConv::kIdHost)); + FuncNode* f2 = cc.newFunc(FuncSignatureT(CallConv::kIdHost)); + + { + x86::Gp a = cc.newInt32("a"); + x86::Gp b = cc.newInt32("b"); + + cc.addFunc(f1); + cc.setArg(0, a); + cc.setArg(1, b); + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, f2->label(), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, a); + invokeNode->setArg(1, b); + invokeNode->setRet(0, a); + + cc.ret(a); + cc.endFunc(); + } + + { + x86::Gp a = cc.newInt32("a"); + x86::Gp b = cc.newInt32("b"); + + cc.addFunc(f2); + cc.setArg(0, a); + cc.setArg(1, b); + + cc.add(a, b); + cc.ret(a); + cc.endFunc(); + } + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (*Func)(int, int); + + Func func = ptr_as_func(_func); + + int resultRet = func(56, 22); + int expectRet = 56 + 22; + + result.assignFormat("ret=%d", resultRet); + expect.assignFormat("ret=%d", expectRet); + + return result.eq(expect); + } +}; + +// ============================================================================ +// [X86Test_MiscUnfollow] +// ============================================================================ + +// Global (I didn't find a better way to test this). +static jmp_buf globalJmpBuf; + +class X86Test_MiscUnfollow : public X86TestCase { +public: + X86Test_MiscUnfollow() : X86TestCase("MiscUnfollow") {} + + static void add(TestApp& app) { + app.add(new X86Test_MiscUnfollow()); + } + + virtual void compile(x86::Compiler& cc) { + // NOTE: Fastcall calling convention is the most appropriate here, as all + // arguments will be passed by registers and there won't be any stack + // misalignment when we call the `handler()`. This was failing on OSX + // when targeting 32-bit. + cc.addFunc(FuncSignatureT(CallConv::kIdFastCall)); + + x86::Gp a = cc.newInt32("a"); + x86::Gp b = cc.newIntPtr("b"); + Label tramp = cc.newLabel(); + + cc.setArg(0, a); + cc.setArg(1, b); + + cc.cmp(a, 0); + cc.jz(tramp); + + cc.ret(a); + + cc.bind(tramp); + cc.unfollow().jmp(b); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef int (ASMJIT_FASTCALL *Func)(int, void*); + + Func func = ptr_as_func(_func); + + int resultRet = 0; + int expectRet = 1; + + if (!setjmp(globalJmpBuf)) + resultRet = func(0, (void*)handler); + else + resultRet = 1; + + result.assignFormat("ret={%d}", resultRet); + expect.assignFormat("ret={%d}", expectRet); + + return resultRet == expectRet; + } + + static void ASMJIT_FASTCALL handler() { longjmp(globalJmpBuf, 1); } +}; + +// ============================================================================ +// [Export] +// ============================================================================ + +void compiler_add_x86_tests(TestApp& app) { + // Base tests. + app.addT(); + app.addT(); + app.addT(); + + // Jump tests. + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + + // Alloc tests. + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + + // Function call tests. + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); + + // Miscellaneous tests. + app.addT(); + app.addT(); + app.addT(); + app.addT(); + app.addT(); +} + +#endif diff --git a/libs/asmjit/test/asmjit_test_misc.h b/libs/asmjit/test/asmjit_test_misc.h index be1757a..0327327 100644 --- a/libs/asmjit/test/asmjit_test_misc.h +++ b/libs/asmjit/test/asmjit_test_misc.h @@ -52,7 +52,7 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) { Label L_LargeEnd = cc.newLabel(); Label L_DataPool = cc.newLabel(); - cc.addFunc(FuncSignatureT(cc.codeInfo().cdeclCallConv())); + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); cc.setArg(0, dst); cc.setArg(1, src); @@ -177,8 +177,8 @@ static void generateAlphaBlend(asmjit::x86::Compiler& cc) { // Data. cc.align(kAlignData, 16); cc.bind(L_DataPool); - cc.dxmm(Data128::fromI16(0x0080)); - cc.dxmm(Data128::fromI16(0x0101)); + cc.embedUInt16(uint16_t(0x0080u), 8); + cc.embedUInt16(uint16_t(0x0101u), 8); } } // {asmtest} diff --git a/libs/asmjit/test/asmjit_test_opcode.cpp b/libs/asmjit/test/asmjit_test_opcode.cpp index 908435b..939cb78 100644 --- a/libs/asmjit/test/asmjit_test_opcode.cpp +++ b/libs/asmjit/test/asmjit_test_opcode.cpp @@ -21,9 +21,11 @@ // misrepresented as being the original software. // 3. This notice may not be removed or altered from any source distribution. +// ---------------------------------------------------------------------------- // This file is used to test opcodes generated by AsmJit. Output can be // disassembled in your IDE or by your favorite disassembler. Instructions // are grouped by category and then sorted alphabetically. +// ---------------------------------------------------------------------------- #include #include @@ -34,21 +36,21 @@ using namespace asmjit; struct OpcodeDumpInfo { - uint32_t archId; + uint32_t arch; bool useRex1; bool useRex2; }; -static const char* archIdToString(uint32_t archId) { - switch (archId) { - case ArchInfo::kIdNone: return "None"; - case ArchInfo::kIdX86 : return "X86"; - case ArchInfo::kIdX64 : return "X64"; - case ArchInfo::kIdA32 : return "A32"; - case ArchInfo::kIdA64 : return "A64"; - - default: - return ""; +static const char* archToString(uint32_t arch) noexcept { + switch (arch & ~Environment::kArchBigEndianMask) { + case Environment::kArchX86 : return "X86"; + case Environment::kArchX64 : return "X64"; + case Environment::kArchARM : return "ARM"; + case Environment::kArchThumb : return "Thumb"; + case Environment::kArchAArch64 : return "AArch64"; + case Environment::kArchMIPS32_LE: return "MIPS32"; + case Environment::kArchMIPS64_LE: return "MIPS64"; + default: return "Unknown"; } } @@ -65,23 +67,23 @@ int main() { TestErrorHandler eh; OpcodeDumpInfo infoList[] = { - { ArchInfo::kIdX86, false, false }, - { ArchInfo::kIdX64, false, false }, - { ArchInfo::kIdX64, false, true }, - { ArchInfo::kIdX64, true , false }, - { ArchInfo::kIdX64, true , true } + { Environment::kArchX86, false, false }, + { Environment::kArchX64, false, false }, + { Environment::kArchX64, false, true }, + { Environment::kArchX64, true , false }, + { Environment::kArchX64, true , true } }; for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(infoList); i++) { const OpcodeDumpInfo& info = infoList[i]; printf("Opcodes [ARCH=%s REX1=%s REX2=%s]\n", - archIdToString(info.archId), + archToString(info.arch), info.useRex1 ? "true" : "false", info.useRex2 ? "true" : "false"); CodeHolder code; - code.init(CodeInfo(info.archId)); + code.init(Environment(info.arch)); code.setErrorHandler(&eh); #ifndef ASMJIT_NO_LOGGING @@ -95,7 +97,7 @@ int main() { // If this is the host architecture the code generated can be executed // for debugging purposes (the first instruction is ret anyway). - if (code.archId() == ArchInfo::kIdHost) { + if (code.arch() == Environment::kArchHost) { JitRuntime runtime; VoidFunc p; diff --git a/libs/asmjit/test/asmjit_test_opcode.h b/libs/asmjit/test/asmjit_test_opcode.h index e89db42..47a3800 100644 --- a/libs/asmjit/test/asmjit_test_opcode.h +++ b/libs/asmjit/test/asmjit_test_opcode.h @@ -91,7 +91,6 @@ static void generateOpcodes(asmjit::x86::Emitter* e, bool useRex1 = false, bool Zmm zmmA = useRex1 ? zmm8 : zmm0; Zmm zmmB = useRex2 ? zmm9 : zmm1; Zmm zmmC = useRex2 ? zmm10 : zmm2; - Zmm zmmD = useRex2 ? zmm11 : zmm3; Mem vx_ptr = ptr(gzB, xmmB); Mem vy_ptr = ptr(gzB, ymmB); @@ -1067,10 +1066,6 @@ static void generateOpcodes(asmjit::x86::Emitter* e, bool useRex1 = false, bool e->mwait(); // Implicit , e->mwaitx(); // Implicit , , - // PCOMMIT. - e->nop(); - e->pcommit(); - // PREFETCH / PREFETCHW / PREFETCHWT1. e->nop(); e->prefetch(anyptr_gpA); // 3DNOW. diff --git a/libs/asmjit/test/asmjit_test_unit.cpp b/libs/asmjit/test/asmjit_test_unit.cpp index f88018a..14b93cf 100644 --- a/libs/asmjit/test/asmjit_test_unit.cpp +++ b/libs/asmjit/test/asmjit_test_unit.cpp @@ -35,25 +35,26 @@ struct DumpCpuFeature { const char* name; }; -static const char* hostArch() noexcept { - switch (ArchInfo::kIdHost) { - case ArchInfo::kIdX86: return "X86"; - case ArchInfo::kIdX64: return "X64"; - case ArchInfo::kIdA32: return "ARM32"; - case ArchInfo::kIdA64: return "ARM64"; +static const char* archToString(uint32_t arch) noexcept { + switch (arch & ~Environment::kArchBigEndianMask) { + case Environment::kArchX86 : return "X86"; + case Environment::kArchX64 : return "X64"; + case Environment::kArchARM : return "ARM"; + case Environment::kArchThumb : return "Thumb"; + case Environment::kArchAArch64 : return "AArch64"; + case Environment::kArchMIPS32_LE: return "MIPS"; + case Environment::kArchMIPS64_LE: return "MIPS64"; default: return "Unknown"; } } -static void dumpFeatures(const CpuInfo& cpu, const DumpCpuFeature* data, size_t count) noexcept { - for (size_t i = 0; i < count; i++) - if (cpu.hasFeature(data[i].feature)) - INFO(" %s", data[i].name); -} - static void dumpCpu(void) noexcept { const CpuInfo& cpu = CpuInfo::host(); + // -------------------------------------------------------------------------- + // [CPU Information] + // -------------------------------------------------------------------------- + INFO("Host CPU:"); INFO(" Vendor : %s", cpu.vendor()); INFO(" Brand : %s", cpu.brand()); @@ -68,137 +69,20 @@ static void dumpCpu(void) noexcept { INFO(""); // -------------------------------------------------------------------------- - // [X86] + // [CPU Features] // -------------------------------------------------------------------------- -#if ASMJIT_ARCH_X86 - static const DumpCpuFeature x86FeaturesList[] = { - { x86::Features::kNX , "NX" }, - { x86::Features::kMT , "MT" }, - { x86::Features::k3DNOW , "3DNOW" }, - { x86::Features::k3DNOW2 , "3DNOW2" }, - { x86::Features::kADX , "ADX" }, - { x86::Features::kAESNI , "AESNI" }, - { x86::Features::kALTMOVCR8 , "ALTMOVCR8" }, - { x86::Features::kAVX , "AVX" }, - { x86::Features::kAVX2 , "AVX2" }, - { x86::Features::kAVX512_4FMAPS , "AVX512_4FMAPS" }, - { x86::Features::kAVX512_4VNNIW , "AVX512_4VNNIW" }, - { x86::Features::kAVX512_BITALG , "AVX512_BITALG" }, - { x86::Features::kAVX512_BW , "AVX512_BW" }, - { x86::Features::kAVX512_CDI , "AVX512_CDI" }, - { x86::Features::kAVX512_DQ , "AVX512_DQ" }, - { x86::Features::kAVX512_ERI , "AVX512_ERI" }, - { x86::Features::kAVX512_F , "AVX512_F" }, - { x86::Features::kAVX512_IFMA , "AVX512_IFMA" }, - { x86::Features::kAVX512_PFI , "AVX512_PFI" }, - { x86::Features::kAVX512_VBMI , "AVX512_VBMI" }, - { x86::Features::kAVX512_VBMI2 , "AVX512_VBMI2" }, - { x86::Features::kAVX512_VL , "AVX512_VL" }, - { x86::Features::kAVX512_VNNI , "AVX512_VNNI" }, - { x86::Features::kAVX512_VPOPCNTDQ, "AVX512_VPOPCNTDQ" }, - { x86::Features::kBMI , "BMI" }, - { x86::Features::kBMI2 , "BMI2" }, - { x86::Features::kCLFLUSH , "CLFLUSH" }, - { x86::Features::kCLFLUSHOPT , "CLFLUSHOPT" }, - { x86::Features::kCLWB , "CLWB" }, - { x86::Features::kCLZERO , "CLZERO" }, - { x86::Features::kCMOV , "CMOV" }, - { x86::Features::kCMPXCHG16B , "CMPXCHG16B" }, - { x86::Features::kCMPXCHG8B , "CMPXCHG8B" }, - { x86::Features::kERMS , "ERMS" }, - { x86::Features::kF16C , "F16C" }, - { x86::Features::kFMA , "FMA" }, - { x86::Features::kFMA4 , "FMA4" }, - { x86::Features::kFPU , "FPU" }, - { x86::Features::kFSGSBASE , "FSGSBASE" }, - { x86::Features::kFXSR , "FXSR" }, - { x86::Features::kFXSROPT , "FXSROPT" }, - { x86::Features::kGEODE , "GEODE" }, - { x86::Features::kGFNI , "GFNI" }, - { x86::Features::kHLE , "HLE" }, - { x86::Features::kI486 , "I486" }, - { x86::Features::kLAHFSAHF , "LAHFSAHF" }, - { x86::Features::kLWP , "LWP" }, - { x86::Features::kLZCNT , "LZCNT" }, - { x86::Features::kMMX , "MMX" }, - { x86::Features::kMMX2 , "MMX2" }, - { x86::Features::kMONITOR , "MONITOR" }, - { x86::Features::kMONITORX , "MONITORX" }, - { x86::Features::kMOVBE , "MOVBE" }, - { x86::Features::kMPX , "MPX" }, - { x86::Features::kMSR , "MSR" }, - { x86::Features::kMSSE , "MSSE" }, - { x86::Features::kOSXSAVE , "OSXSAVE" }, - { x86::Features::kPCLMULQDQ , "PCLMULQDQ" }, - { x86::Features::kPCOMMIT , "PCOMMIT" }, - { x86::Features::kPOPCNT , "POPCNT" }, - { x86::Features::kPREFETCHW , "PREFETCHW" }, - { x86::Features::kPREFETCHWT1 , "PREFETCHWT1" }, - { x86::Features::kRDRAND , "RDRAND" }, - { x86::Features::kRDSEED , "RDSEED" }, - { x86::Features::kRDTSC , "RDTSC" }, - { x86::Features::kRDTSCP , "RDTSCP" }, - { x86::Features::kRTM , "RTM" }, - { x86::Features::kSHA , "SHA" }, - { x86::Features::kSKINIT , "SKINIT" }, - { x86::Features::kSMAP , "SMAP" }, - { x86::Features::kSMEP , "SMEP" }, - { x86::Features::kSMX , "SMX" }, - { x86::Features::kSSE , "SSE" }, - { x86::Features::kSSE2 , "SSE2" }, - { x86::Features::kSSE3 , "SSE3" }, - { x86::Features::kSSE4_1 , "SSE4.1" }, - { x86::Features::kSSE4_2 , "SSE4.2" }, - { x86::Features::kSSE4A , "SSE4A" }, - { x86::Features::kSSSE3 , "SSSE3" }, - { x86::Features::kSVM , "SVM" }, - { x86::Features::kTBM , "TBM" }, - { x86::Features::kTSX , "TSX" }, - { x86::Features::kVAES , "VAES" }, - { x86::Features::kVMX , "VMX" }, - { x86::Features::kVPCLMULQDQ , "VPCLMULQDQ" }, - { x86::Features::kXOP , "XOP" }, - { x86::Features::kXSAVE , "XSAVE" }, - { x86::Features::kXSAVEC , "XSAVEC" }, - { x86::Features::kXSAVEOPT , "XSAVEOPT" }, - { x86::Features::kXSAVES , "XSAVES" } +#ifndef ASMJIT_NO_LOGGING + INFO("CPU Features:"); + BaseFeatures::Iterator it(cpu.features().iterator()); + while (it.hasNext()) { + uint32_t featureId = uint32_t(it.next()); + StringTmp<64> featureString; + Formatter::formatFeature(featureString, cpu.arch(), featureId); + INFO(" %s\n", featureString.data()); }; - - INFO("X86 Features:"); - dumpFeatures(cpu, x86FeaturesList, ASMJIT_ARRAY_SIZE(x86FeaturesList)); INFO(""); -#endif - - // -------------------------------------------------------------------------- - // [ARM] - // -------------------------------------------------------------------------- - -#if ASMJIT_ARCH_ARM - static const DumpCpuFeature armFeaturesList[] = { - { arm::Features::kARMv6 , "ARMv6" }, - { arm::Features::kARMv7 , "ARMv7" }, - { arm::Features::kARMv8 , "ARMv8" }, - { arm::Features::kTHUMB , "THUMB" }, - { arm::Features::kTHUMBv2 , "THUMBv2" }, - { arm::Features::kVFP2 , "VFPv2" }, - { arm::Features::kVFP3 , "VFPv3" }, - { arm::Features::kVFP4 , "VFPv4" }, - { arm::Features::kVFP_D32 , "VFP D32" }, - { arm::Features::kNEON , "NEON" }, - { arm::Features::kDSP , "DSP" }, - { arm::Features::kIDIV , "IDIV" }, - { arm::Features::kAES , "AES" }, - { arm::Features::kCRC32 , "CRC32" }, - { arm::Features::kSHA1 , "SHA1" }, - { arm::Features::kSHA256 , "SHA256" }, - { arm::Features::kATOMIC64 , "ATOMIC64" } - }; - - INFO("ARM Features:"); - dumpFeatures(cpu, armFeaturesList, ASMJIT_ARRAY_SIZE(armFeaturesList)); - INFO(""); -#endif +#endif // !ASMJIT_NO_LOGGING } // ============================================================================ @@ -281,7 +165,7 @@ static void dumpSizeOf(void) noexcept { DUMP_TYPE(BaseCompiler); DUMP_TYPE(FuncNode); DUMP_TYPE(FuncRetNode); - DUMP_TYPE(FuncCallNode); + DUMP_TYPE(InvokeNode); INFO(""); #endif @@ -324,7 +208,7 @@ int main(int argc, const char* argv[]) { unsigned((ASMJIT_LIBRARY_VERSION >> 16) ), unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF), unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF), - hostArch(), + archToString(Environment::kArchHost), buildType ); diff --git a/libs/asmjit/test/asmjit_test_x86_asm.cpp b/libs/asmjit/test/asmjit_test_x86_asm.cpp index a01185d..5000c1e 100644 --- a/libs/asmjit/test/asmjit_test_x86_asm.cpp +++ b/libs/asmjit/test/asmjit_test_x86_asm.cpp @@ -21,7 +21,11 @@ // misrepresented as being the original software. // 3. This notice may not be removed or altered from any source distribution. +#include + +#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86 #include + #include #include #include @@ -48,7 +52,7 @@ static void makeRawFunc(x86::Emitter* emitter) noexcept { // Create and initialize `FuncDetail` and `FuncFrame`. FuncDetail func; - func.init(FuncSignatureT(CallConv::kIdHost)); + func.init(FuncSignatureT(CallConv::kIdHost), emitter->environment()); FuncFrame frame; frame.init(func); @@ -101,10 +105,11 @@ static void makeCompiledFunc(x86::Compiler* cc) noexcept { static uint32_t testFunc(JitRuntime& rt, uint32_t emitterType) noexcept { #ifndef ASMJIT_NO_LOGGING FileLogger logger(stdout); + logger.setIndentation(FormatOptions::kIndentationCode, 2); #endif CodeHolder code; - code.init(rt.codeInfo()); + code.init(rt.environment()); #ifndef ASMJIT_NO_LOGGING code.setLogger(&logger); @@ -173,8 +178,10 @@ static uint32_t testFunc(JitRuntime& rt, uint32_t emitterType) noexcept { } int main() { - unsigned nFailed = 0; + printf("AsmJit X86 Emitter Test\n\n"); + JitRuntime rt; + unsigned nFailed = 0; nFailed += testFunc(rt, BaseEmitter::kTypeAssembler); @@ -187,9 +194,15 @@ int main() { #endif if (!nFailed) - printf("[PASSED] All tests passed\n"); + printf("Success:\n All tests passed\n"); else - printf("[FAILED] %u %s failed\n", nFailed, nFailed == 1 ? "test" : "tests"); + printf("Failure:\n %u %s failed\n", nFailed, nFailed == 1 ? "test" : "tests"); return nFailed ? 1 : 0; } +#else +int main() { + printf("AsmJit X86 Emitter Test is disabled on non-x86 host\n\n"); + return 0; +} +#endif diff --git a/libs/asmjit/test/asmjit_test_x86_cc.cpp b/libs/asmjit/test/asmjit_test_x86_cc.cpp index a0e50e9..9aec487 100644 --- a/libs/asmjit/test/asmjit_test_x86_cc.cpp +++ b/libs/asmjit/test/asmjit_test_x86_cc.cpp @@ -21,12 +21,21 @@ // misrepresented as being the original software. // 3. This notice may not be removed or altered from any source distribution. +#include +#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86 + #include #include #include #include #include +#include +#include + +// Required for function tests that pass / return XMM registers. +#include + #include "./asmjit_test_misc.h" #ifdef _MSC_VER @@ -67,7 +76,7 @@ class SimpleErrorHandler : public ErrorHandler { virtual void handleError(Error err, const char* message, BaseEmitter* origin) { DebugUtils::unused(origin); _err = err; - _message.assignString(message); + _message.assign(message); } Error _err; @@ -81,7 +90,7 @@ class SimpleErrorHandler : public ErrorHandler { //! Base test interface for testing `x86::Compiler`. class X86Test { public: - X86Test(const char* name = nullptr) { _name.assignString(name); } + X86Test(const char* name = nullptr) { _name.assign(name); } virtual ~X86Test() {} inline const char* name() const { return _name.data(); } @@ -98,9 +107,7 @@ class X86Test { class X86TestApp { public: - Zone _zone; - ZoneAllocator _allocator; - ZoneVector _tests; + std::vector> _tests; unsigned _nFailed; size_t _outputSize; @@ -109,20 +116,15 @@ class X86TestApp { bool _dumpAsm; X86TestApp() noexcept - : _zone(8096 - Zone::kBlockOverhead), - _allocator(&_zone), - _nFailed(0), + : _nFailed(0), _outputSize(0), _verbose(false), _dumpAsm(false) {} - ~X86TestApp() noexcept { - for (X86Test* test : _tests) - delete test; - } + ~X86TestApp() noexcept {} - Error add(X86Test* test) noexcept{ - return _tests.append(&_allocator, test); + void add(X86Test* test) noexcept { + _tests.push_back(std::unique_ptr(test)); } template @@ -169,12 +171,12 @@ int X86TestApp::run() { stringLogger.addFlags(kFormatFlags); #endif - for (X86Test* test : _tests) { + for (std::unique_ptr& test : _tests) { JitRuntime runtime; CodeHolder code; SimpleErrorHandler errorHandler; - code.init(runtime.codeInfo()); + code.init(runtime.environment()); code.setErrorHandler(&errorHandler); #ifndef ASMJIT_NO_LOGGING @@ -207,7 +209,7 @@ int X86TestApp::run() { if (!_verbose) printf("\n"); String sb; - cc.dump(sb, kFormatFlags); + Formatter::formatNodeList(sb, kFormatFlags, &cc); printf("%s", sb.data()); } #endif @@ -261,11 +263,12 @@ int X86TestApp::run() { } if (_nFailed == 0) - printf("\n[PASSED] All %u tests passed\n", unsigned(_tests.size())); + printf("\nSuccess:\n All %u tests passed\n", unsigned(_tests.size())); else - printf("\n[FAILED] %u %s of %u failed\n", _nFailed, _nFailed == 1 ? "test" : "tests", unsigned(_tests.size())); + printf("\nFailure:\n %u %s of %u failed\n", _nFailed, _nFailed == 1 ? "test" : "tests", unsigned(_tests.size())); printf(" OutputSize=%zu\n", _outputSize); + printf("\n"); return _nFailed == 0 ? 0 : 1; } @@ -703,8 +706,8 @@ class X86Test_JumpUnreachable1 : public X86Test { func(); - result.appendString("ret={}"); - expect.appendString("ret={}"); + result.append("ret={}"); + expect.append("ret={}"); return true; } @@ -750,8 +753,8 @@ class X86Test_JumpUnreachable2 : public X86Test { func(); - result.appendString("ret={}"); - expect.appendString("ret={}"); + result.append("ret={}"); + expect.append("ret={}"); return true; } @@ -1051,8 +1054,8 @@ class X86Test_AllocMany2 : public X86Test { for (i = 0; i < ASMJIT_ARRAY_SIZE(resultBuf); i++) { if (i != 0) { - result.appendChar(','); - expect.appendChar(','); + result.append(','); + expect.append(','); } result.appendFormat("%u", resultBuf[i]); @@ -1970,6 +1973,59 @@ class X86Test_AllocArgsDouble : public X86Test { } }; +// ============================================================================ +// [X86Test_AllocArgsVec] +// ============================================================================ + +class X86Test_AllocArgsVec : public X86Test { +public: + X86Test_AllocArgsVec() : X86Test("AllocArgsVec") {} + + static void add(X86TestApp& app) { + // Not supported on Windows. +#ifndef _WIN32 + app.add(new X86Test_AllocArgsVec()); +#endif + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Xmm a = cc.newXmm("aXmm"); + x86::Xmm b = cc.newXmm("bXmm"); + + cc.setArg(0, a); + cc.setArg(1, b); + + cc.paddb(a, b); + cc.ret(a); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef __m128i (*Func)(__m128i, __m128i); + Func func = ptr_as_func(_func); + + uint8_t aData[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + uint8_t bData[16] = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + + uint8_t rData[16]; + uint8_t eData[16] = { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 }; + + __m128i aVec = _mm_loadu_si128(reinterpret_cast(aData)); + __m128i bVec = _mm_loadu_si128(reinterpret_cast(bData)); + + __m128i rVec = func(aVec, bVec); + _mm_storeu_si128(reinterpret_cast<__m128i*>(rData), rVec); + + result.appendHex(rData, 16); + expect.appendHex(eData, 16); + + return result == expect; + } +}; + // ============================================================================ // [X86Test_AllocRetFloat1] // ============================================================================ @@ -2253,21 +2309,21 @@ class X86Test_AllocMemcpy : public X86Test { func(dstBuffer, srcBuffer, kCount); - result.assignString("buf={"); - expect.assignString("buf={"); + result.assign("buf={"); + expect.assign("buf={"); for (i = 0; i < kCount; i++) { if (i != 0) { - result.appendString(", "); - expect.appendString(", "); + result.append(", "); + expect.append(", "); } result.appendFormat("%u", unsigned(dstBuffer[i])); expect.appendFormat("%u", unsigned(srcBuffer[i])); } - result.appendString("}"); - expect.appendString("}"); + result.append("}"); + expect.append("}"); return result == expect; } @@ -2395,21 +2451,21 @@ class X86Test_AllocAlphaBlend : public X86Test { func(dstBuffer, srcBuffer, kCount); - result.assignString("buf={"); - expect.assignString("buf={"); + result.assign("buf={"); + expect.assign("buf={"); for (i = 0; i < kCount; i++) { if (i != 0) { - result.appendString(", "); - expect.appendString(", "); + result.append(", "); + expect.append(", "); } result.appendFormat("%08X", unsigned(dstBuffer[i])); expect.appendFormat("%08X", unsigned(expBuffer[i])); } - result.appendString("}"); - expect.appendString("}"); + result.append("}"); + expect.append("}"); return result == expect; } @@ -2443,11 +2499,12 @@ class X86Test_FuncCallBase1 : public X86Test { cc.shl(v2, 1); // Call a function. - FuncCallNode* call = cc.call(imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, v2); - call->setArg(1, v1); - call->setArg(2, v0); - call->setRet(0, v0); + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, v2); + invokeNode->setArg(1, v1); + invokeNode->setArg(2, v0); + invokeNode->setRet(0, v0); cc.ret(v0); cc.endFunc(); @@ -2498,29 +2555,29 @@ class X86Test_FuncCallBase2 : public X86Test { Label L_Exit = cc.newLabel(); static const char token[kTokenSize] = "-+:|abcdefghijklmnopqrstuvwxyz|"; - FuncCallNode* call; + InvokeNode* invokeNode; cc.lea(p1, s1); cc.lea(p2, s2); // Try to corrupt the stack if wrongly allocated. - call = cc.call(imm((void*)memcpy), FuncSignatureT(CallConv::kIdHostCDecl)); - call->setArg(0, p1); - call->setArg(1, imm(token)); - call->setArg(2, imm(kTokenSize)); - call->setRet(0, p1); - - call = cc.call(imm((void*)memcpy), FuncSignatureT(CallConv::kIdHostCDecl)); - call->setArg(0, p2); - call->setArg(1, imm(token)); - call->setArg(2, imm(kTokenSize)); - call->setRet(0, p2); - - call = cc.call(imm((void*)memcmp), FuncSignatureT(CallConv::kIdHostCDecl)); - call->setArg(0, p1); - call->setArg(1, p2); - call->setArg(2, imm(kTokenSize)); - call->setRet(0, ret); + cc.invoke(&invokeNode, imm((void*)memcpy), FuncSignatureT(CallConv::kIdCDecl)); + invokeNode->setArg(0, p1); + invokeNode->setArg(1, imm(token)); + invokeNode->setArg(2, imm(kTokenSize)); + invokeNode->setRet(0, p1); + + cc.invoke(&invokeNode, imm((void*)memcpy), FuncSignatureT(CallConv::kIdCDecl)); + invokeNode->setArg(0, p2); + invokeNode->setArg(1, imm(token)); + invokeNode->setArg(2, imm(kTokenSize)); + invokeNode->setRet(0, p2); + + cc.invoke(&invokeNode, imm((void*)memcmp), FuncSignatureT(CallConv::kIdCDecl)); + invokeNode->setArg(0, p1); + invokeNode->setArg(1, p2); + invokeNode->setArg(2, imm(kTokenSize)); + invokeNode->setRet(0, ret); // This should be 0 on success, however, if both `p1` and `p2` were // allocated in the same address this check will still pass. @@ -2573,13 +2630,14 @@ class X86Test_FuncCallStd : public X86Test { cc.setArg(1, y); cc.setArg(2, z); - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), - FuncSignatureT(CallConv::kIdHostStdCall)); - call->setArg(0, x); - call->setArg(1, y); - call->setArg(2, z); - call->setRet(0, x); + FuncSignatureT(CallConv::kIdStdCall)); + invokeNode->setArg(0, x); + invokeNode->setArg(1, y); + invokeNode->setArg(2, z); + invokeNode->setRet(0, x); cc.ret(x); cc.endFunc(); @@ -2622,18 +2680,15 @@ class X86Test_FuncCallFast : public X86Test { cc.addFunc(FuncSignatureT(CallConv::kIdHost)); cc.setArg(0, var); - FuncCallNode* call; - call = cc.call( - imm((void*)calledFunc), - FuncSignatureT(CallConv::kIdHostFastCall)); - call->setArg(0, var); - call->setRet(0, var); + InvokeNode* invokeNode; - call = cc.call( - imm((void*)calledFunc), - FuncSignatureT(CallConv::kIdHostFastCall)); - call->setArg(0, var); - call->setRet(0, var); + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdFastCall)); + invokeNode->setArg(0, var); + invokeNode->setRet(0, var); + + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdFastCall)); + invokeNode->setArg(0, var); + invokeNode->setRet(0, var); cc.ret(var); cc.endFunc(); @@ -2658,6 +2713,97 @@ class X86Test_FuncCallFast : public X86Test { } }; +// ============================================================================ +// [X86Test_FuncCallSIMD] +// ============================================================================ + +class X86Test_FuncCallSIMD : public X86Test { +public: + bool _useVectorCall; + + X86Test_FuncCallSIMD(bool useVectorCall) + : X86Test(), + _useVectorCall(useVectorCall) { + _name.assignFormat("FuncCallSIMD {%s}", _useVectorCall ? "__vectorcall" : "__cdecl"); + } + + static void add(X86TestApp& app) { + app.add(new X86Test_FuncCallSIMD(false)); +#ifdef _MSC_VER + app.add(new X86Test_FuncCallSIMD(true)); +#endif + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + x86::Gp resultPtr = cc.newIntPtr("resultPtr"); + x86::Gp aPtr = cc.newIntPtr("aPtr"); + x86::Gp bPtr = cc.newIntPtr("bPtr"); + x86::Gp pFn = cc.newIntPtr("pFn"); + + x86::Xmm aXmm = cc.newXmm("aXmm"); + x86::Xmm bXmm = cc.newXmm("bXmm"); + + cc.setArg(0, resultPtr); + cc.setArg(1, aPtr); + cc.setArg(2, bPtr); + + uint32_t ccId = CallConv::kIdCDecl; + Imm pFnImm = imm((void*)calledFunc_cdecl); + +#ifdef _MSC_VER + if (_useVectorCall) { + ccId = CallConv::kIdVectorCall; + pFnImm = imm((void*)calledFunc_vcall); + } +#endif + + cc.mov(pFn, pFnImm); + cc.movdqu(aXmm, x86::ptr(aPtr)); + cc.movdqu(bXmm, x86::ptr(bPtr)); + + InvokeNode* invokeNode; + cc.invoke(&invokeNode, pFn, FuncSignatureT(ccId)); + + invokeNode->setArg(0, aXmm); + invokeNode->setArg(1, bXmm); + invokeNode->setRet(0, aXmm); + + cc.movdqu(x86::ptr(resultPtr), aXmm); + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef void (*Func)(void*, const void*, const void*); + Func func = ptr_as_func(_func); + + uint8_t aData[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + uint8_t bData[16] = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + + uint8_t rData[16]; + uint8_t eData[16] = { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 }; + + func(rData, aData, bData); + + result.appendHex(rData, 16); + expect.appendHex(eData, 16); + + return result == expect; + } + + static __m128i calledFunc_cdecl(__m128i a, __m128i b) { + return _mm_add_epi8(a, b); + } + +#ifdef _MSC_VER + static __m128i __vectorcall calledFunc_vcall(__m128i a, __m128i b) { + return _mm_add_epi8(a, b); + } +#endif +}; + // ============================================================================ // [X86Test_FuncCallLight] // ============================================================================ @@ -2671,8 +2817,8 @@ class X86Test_FuncCallLight : public X86Test { } virtual void compile(x86::Compiler& cc) { - FuncSignatureT funcSig(CallConv::kIdHostCDecl); - FuncSignatureT fastSig(CallConv::kIdHostLightCall2); + FuncSignatureT funcSig(CallConv::kIdCDecl); + FuncSignatureT fastSig(CallConv::kIdLightCall2); FuncNode* func = cc.newFunc(funcSig); FuncNode* fast = cc.newFunc(fastSig); @@ -2705,15 +2851,17 @@ class X86Test_FuncCallLight : public X86Test { x86::Xmm xXmm = cc.newXmm("xXmm"); x86::Xmm yXmm = cc.newXmm("yXmm"); - FuncCallNode* call1 = cc.call(fast->label(), fastSig); - call1->setArg(0, aXmm); - call1->setArg(1, bXmm); - call1->setRet(0, xXmm); + InvokeNode* invokeNode; + + cc.invoke(&invokeNode, fast->label(), fastSig); + invokeNode->setArg(0, aXmm); + invokeNode->setArg(1, bXmm); + invokeNode->setRet(0, xXmm); - FuncCallNode* call2 = cc.call(fast->label(), fastSig); - call2->setArg(0, cXmm); - call2->setArg(1, dXmm); - call2->setRet(0, yXmm); + cc.invoke(&invokeNode, fast->label(), fastSig); + invokeNode->setArg(0, cXmm); + invokeNode->setArg(1, dXmm); + invokeNode->setRet(0, yXmm); cc.pmullw(xXmm, yXmm); cc.movups(x86::ptr(pOut), xXmm); @@ -2798,21 +2946,22 @@ class X86Test_FuncCallManyArgs : public X86Test { cc.mov(vi, 0x18); cc.mov(vj, 0x1E); - // Call function. - FuncCallNode* call = cc.call( + // Function call. + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, va); - call->setArg(1, vb); - call->setArg(2, vc); - call->setArg(3, vd); - call->setArg(4, ve); - call->setArg(5, vf); - call->setArg(6, vg); - call->setArg(7, vh); - call->setArg(8, vi); - call->setArg(9, vj); - call->setRet(0, va); + invokeNode->setArg(0, va); + invokeNode->setArg(1, vb); + invokeNode->setArg(2, vc); + invokeNode->setArg(3, vd); + invokeNode->setArg(4, ve); + invokeNode->setArg(5, vf); + invokeNode->setArg(6, vg); + invokeNode->setArg(7, vh); + invokeNode->setArg(8, vi); + invokeNode->setArg(9, vj); + invokeNode->setRet(0, va); cc.ret(va); cc.endFunc(); @@ -2856,20 +3005,21 @@ class X86Test_FuncCallDuplicateArgs : public X86Test { cc.mov(a, 3); // Call function. - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, a); - call->setArg(1, a); - call->setArg(2, a); - call->setArg(3, a); - call->setArg(4, a); - call->setArg(5, a); - call->setArg(6, a); - call->setArg(7, a); - call->setArg(8, a); - call->setArg(9, a); - call->setRet(0, a); + invokeNode->setArg(0, a); + invokeNode->setArg(1, a); + invokeNode->setArg(2, a); + invokeNode->setArg(3, a); + invokeNode->setArg(4, a); + invokeNode->setArg(5, a); + invokeNode->setArg(6, a); + invokeNode->setArg(7, a); + invokeNode->setArg(8, a); + invokeNode->setArg(9, a); + invokeNode->setRet(0, a); cc.ret(a); cc.endFunc(); @@ -2908,21 +3058,22 @@ class X86Test_FuncCallImmArgs : public X86Test { x86::Gp rv = cc.newInt32("rv"); // Call function. - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)X86Test_FuncCallManyArgs::calledFunc), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, imm(0x03)); - call->setArg(1, imm(0x12)); - call->setArg(2, imm(0xA0)); - call->setArg(3, imm(0x0B)); - call->setArg(4, imm(0x2F)); - call->setArg(5, imm(0x02)); - call->setArg(6, imm(0x0C)); - call->setArg(7, imm(0x12)); - call->setArg(8, imm(0x18)); - call->setArg(9, imm(0x1E)); - call->setRet(0, rv); + invokeNode->setArg(0, imm(0x03)); + invokeNode->setArg(1, imm(0x12)); + invokeNode->setArg(2, imm(0xA0)); + invokeNode->setArg(3, imm(0x0B)); + invokeNode->setArg(4, imm(0x2F)); + invokeNode->setArg(5, imm(0x02)); + invokeNode->setArg(6, imm(0x0C)); + invokeNode->setArg(7, imm(0x12)); + invokeNode->setArg(8, imm(0x18)); + invokeNode->setArg(9, imm(0x1E)); + invokeNode->setRet(0, rv); cc.ret(rv); cc.endFunc(); @@ -2974,21 +3125,22 @@ class X86Test_FuncCallPtrArgs : public X86Test { x86::Gp rv = cc.newInt32("rv"); // Call function. - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, imm(0x01)); - call->setArg(1, imm(0x02)); - call->setArg(2, imm(0x03)); - call->setArg(3, imm(0x04)); - call->setArg(4, imm(0x05)); - call->setArg(5, imm(0x06)); - call->setArg(6, imm(0x07)); - call->setArg(7, imm(0x08)); - call->setArg(8, imm(0x09)); - call->setArg(9, imm(0x0A)); - call->setRet(0, rv); + invokeNode->setArg(0, imm(0x01)); + invokeNode->setArg(1, imm(0x02)); + invokeNode->setArg(2, imm(0x03)); + invokeNode->setArg(3, imm(0x04)); + invokeNode->setArg(4, imm(0x05)); + invokeNode->setArg(5, imm(0x06)); + invokeNode->setArg(6, imm(0x07)); + invokeNode->setArg(7, imm(0x08)); + invokeNode->setArg(8, imm(0x09)); + invokeNode->setArg(9, imm(0x0A)); + invokeNode->setRet(0, rv); cc.ret(rv); cc.endFunc(); @@ -3044,15 +3196,16 @@ class X86Test_FuncCallRefArgs : public X86Test { cc.setArg(3, arg4); // Call function. - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, arg1); - call->setArg(1, arg2); - call->setArg(2, arg3); - call->setArg(3, arg4); - call->setRet(0, rv); + invokeNode->setArg(0, arg1); + invokeNode->setArg(1, arg2); + invokeNode->setArg(2, arg3); + invokeNode->setArg(3, arg4); + invokeNode->setRet(0, rv); cc.ret(rv); cc.endFunc(); @@ -3101,12 +3254,11 @@ class X86Test_FuncCallFloatAsXmmRet : public X86Test { cc.setArg(1, b); // Call function. - FuncCallNode* call = cc.call( - imm((void*)calledFunc), - FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, a); - call->setArg(1, b); - call->setRet(0, ret); + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, a); + invokeNode->setArg(1, b); + invokeNode->setRet(0, ret); cc.ret(ret); cc.endFunc(); @@ -3152,12 +3304,11 @@ class X86Test_FuncCallDoubleAsXmmRet : public X86Test { cc.setArg(0, a); cc.setArg(1, b); - FuncCallNode* call = cc.call( - imm((void*)calledFunc), - FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, a); - call->setArg(1, b); - call->setRet(0, ret); + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, a); + invokeNode->setArg(1, b); + invokeNode->setRet(0, ret); cc.ret(ret); cc.endFunc(); @@ -3194,7 +3345,7 @@ class X86Test_FuncCallConditional : public X86Test { x86::Gp y = cc.newInt32("y"); x86::Gp op = cc.newInt32("op"); - FuncCallNode* call; + InvokeNode* invokeNode; x86::Gp result; cc.addFunc(FuncSignatureT(CallConv::kIdHost)); @@ -3217,19 +3368,19 @@ class X86Test_FuncCallConditional : public X86Test { cc.bind(opAdd); result = cc.newInt32("result_1"); - call = cc.call((uint64_t)calledFuncAdd, FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, x); - call->setArg(1, y); - call->setRet(0, result); + cc.invoke(&invokeNode, (uint64_t)calledFuncAdd, FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, x); + invokeNode->setArg(1, y); + invokeNode->setRet(0, result); cc.ret(result); cc.bind(opMul); result = cc.newInt32("result_2"); - call = cc.call((uint64_t)calledFuncMul, FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, x); - call->setArg(1, y); - call->setRet(0, result); + cc.invoke(&invokeNode, (uint64_t)calledFuncMul, FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, x); + invokeNode->setArg(1, y); + invokeNode->setRet(0, result); cc.ret(result); cc.endFunc(); @@ -3291,25 +3442,25 @@ class X86Test_FuncCallMultiple : public X86Test { x86::Gp ret = cc.newInt32("ret"); x86::Gp ptr = cc.newIntPtr("ptr"); x86::Gp idx = cc.newInt32("idx"); - FuncCallNode* call; + InvokeNode* invokeNode; cc.mov(ptr, buf); cc.mov(idx, int(i)); - call = cc.call((uint64_t)calledFunc, FuncSignatureT(CallConv::kIdHostFastCall)); - call->setArg(0, ptr); - call->setArg(1, idx); - call->setRet(0, ret); + cc.invoke(&invokeNode, (uint64_t)calledFunc, FuncSignatureT(CallConv::kIdFastCall)); + invokeNode->setArg(0, ptr); + invokeNode->setArg(1, idx); + invokeNode->setRet(0, ret); cc.add(acc0, ret); cc.mov(ptr, buf); cc.mov(idx, int(i)); - call = cc.call((uint64_t)calledFunc, FuncSignatureT(CallConv::kIdHostFastCall)); - call->setArg(0, ptr); - call->setArg(1, idx); - call->setRet(0, ret); + cc.invoke(&invokeNode, (uint64_t)calledFunc, FuncSignatureT(CallConv::kIdFastCall)); + invokeNode->setArg(0, ptr); + invokeNode->setArg(1, idx); + invokeNode->setRet(0, ret); cc.sub(acc1, ret); } @@ -3361,9 +3512,11 @@ class X86Test_FuncCallRecursive : public X86Test { cc.mov(tmp, val); cc.dec(tmp); - FuncCallNode* call = cc.call(func->label(), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, tmp); - call->setRet(0, tmp); + InvokeNode* invokeNode; + + cc.invoke(&invokeNode, func->label(), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, tmp); + invokeNode->setRet(0, tmp); cc.mul(cc.newInt32(), val, tmp); cc.bind(skip); @@ -3413,15 +3566,16 @@ class X86Test_FuncCallVarArg1 : public X86Test { // We call `int func(size_t, ...)` // - The `vaIndex` must be 1 (first argument after size_t). // - The full signature of varargs (int, int, int, int) must follow. - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost, 1)); - call->setArg(0, imm(4)); - call->setArg(1, a0); - call->setArg(2, a1); - call->setArg(3, a2); - call->setArg(4, a3); - call->setRet(0, a0); + invokeNode->setArg(0, imm(4)); + invokeNode->setArg(1, a0); + invokeNode->setArg(2, a1); + invokeNode->setArg(3, a2); + invokeNode->setArg(4, a3); + invokeNode->setRet(0, a0); cc.ret(a0); cc.endFunc(); @@ -3481,15 +3635,16 @@ class X86Test_FuncCallVarArg2 : public X86Test { // We call `double func(size_t, ...)` // - The `vaIndex` must be 1 (first argument after size_t). // - The full signature of varargs (double, double, double, double) must follow. - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)calledFunc), FuncSignatureT(CallConv::kIdHost, 1)); - call->setArg(0, imm(4)); - call->setArg(1, a0); - call->setArg(2, a1); - call->setArg(3, a2); - call->setArg(4, a3); - call->setRet(0, a0); + invokeNode->setArg(0, imm(4)); + invokeNode->setArg(1, a0); + invokeNode->setArg(2, a1); + invokeNode->setArg(3, a2); + invokeNode->setArg(4, a3); + invokeNode->setRet(0, a0); cc.ret(a0); cc.endFunc(); @@ -3521,6 +3676,68 @@ class X86Test_FuncCallVarArg2 : public X86Test { } }; +// ============================================================================ +// [X86Test_FuncCallInt64Arg] +// ============================================================================ + +class X86Test_FuncCallInt64Arg : public X86Test { +public: + X86Test_FuncCallInt64Arg() : X86Test("FuncCallInt64Arg") {} + + static void add(X86TestApp& app) { + app.add(new X86Test_FuncCallInt64Arg()); + } + + virtual void compile(x86::Compiler& cc) { + cc.addFunc(FuncSignatureT(CallConv::kIdHost)); + + if (cc.is64Bit()) { + x86::Gp reg = cc.newUInt64(); + cc.setArg(0, reg); + cc.add(reg, 1); + cc.ret(reg); + } + else { + x86::Gp hi = cc.newUInt32("hi"); + x86::Gp lo = cc.newUInt32("lo"); + + cc.setArg(0, 0, lo); + cc.setArg(0, 1, hi); + + cc.add(lo, 1); + cc.adc(hi, 0); + cc.ret(lo, hi); + } + + cc.endFunc(); + } + + virtual bool run(void* _func, String& result, String& expect) { + typedef uint64_t (*Func)(uint64_t); + Func func = ptr_as_func(_func); + + uint64_t resultRet = func(uint64_t(0xFFFFFFFF)); + uint64_t expectRet = 0x100000000; + + result.assignFormat("ret=%llu", (unsigned long long)resultRet); + expect.assignFormat("ret=%llu", (unsigned long long)expectRet); + + return resultRet == expectRet; + } + + static double calledFunc(size_t n, ...) { + double sum = 0; + va_list ap; + va_start(ap, n); + for (size_t i = 0; i < n; i++) { + double arg = va_arg(ap, double); + sum += arg; + } + va_end(ap); + return sum; + } +}; + // ============================================================================ // [X86Test_FuncCallMisc1] // ============================================================================ @@ -3545,11 +3762,12 @@ class X86Test_FuncCallMisc1 : public X86Test { cc.setArg(0, a); cc.setArg(1, b); - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)dummy), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, a); - call->setArg(1, b); + invokeNode->setArg(0, a); + invokeNode->setArg(1, b); cc.lea(r, x86::ptr(a, b)); cc.ret(r); @@ -3593,11 +3811,12 @@ class X86Test_FuncCallMisc2 : public X86Test { cc.setArg(0, p); cc.movsd(arg, x86::ptr(p)); - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)op), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, arg); - call->setRet(0, ret); + invokeNode->setArg(0, arg); + invokeNode->setRet(0, ret); cc.ret(ret); cc.endFunc(); @@ -3643,11 +3862,12 @@ class X86Test_FuncCallMisc3 : public X86Test { cc.setArg(0, p); cc.movsd(arg, x86::ptr(p)); - FuncCallNode* call = cc.call( + InvokeNode* invokeNode; + cc.invoke(&invokeNode, imm((void*)op), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, arg); - call->setRet(0, ret); + invokeNode->setArg(0, arg); + invokeNode->setRet(0, ret); cc.xorps(arg, arg); cc.subsd(arg, ret); @@ -3687,18 +3907,20 @@ class X86Test_FuncCallMisc4 : public X86Test { } virtual void compile(x86::Compiler& cc) { - FuncSignatureBuilder funcPrototype; - funcPrototype.setCallConv(CallConv::kIdHost); - funcPrototype.setRet(Type::kIdF64); - cc.addFunc(funcPrototype); + InvokeNode* invokeNode; - FuncSignatureBuilder callPrototype; - callPrototype.setCallConv(CallConv::kIdHost); - callPrototype.setRet(Type::kIdF64); - FuncCallNode* call = cc.call(imm((void*)calledFunc), callPrototype); + FuncSignatureBuilder funcSignature; + funcSignature.setCallConv(CallConv::kIdHost); + funcSignature.setRet(Type::kIdF64); + cc.addFunc(funcSignature); + FuncSignatureBuilder invokeSignature; + invokeSignature.setCallConv(CallConv::kIdHost); + invokeSignature.setRet(Type::kIdF64); + + cc.invoke(&invokeNode, imm((void*)calledFunc), invokeSignature); x86::Xmm ret = cc.newXmmSd("ret"); - call->setRet(0, ret); + invokeNode->setRet(0, ret); cc.ret(ret); cc.endFunc(); @@ -3739,7 +3961,7 @@ class X86Test_FuncCallMisc5 : public X86Test { x86::Gp pFn = cc.newIntPtr("pFn"); x86::Gp vars[16]; - uint32_t i, regCount = cc.gpCount(); + uint32_t i, regCount = cc.arch() == Environment::kArchX86 ? 8 : 16; ASMJIT_ASSERT(regCount <= ASMJIT_ARRAY_SIZE(vars)); cc.mov(pFn, imm((void*)calledFunc)); @@ -3752,7 +3974,9 @@ class X86Test_FuncCallMisc5 : public X86Test { cc.mov(vars[i], 1); } - cc.call(pFn, FuncSignatureT(CallConv::kIdHost)); + InvokeNode* invokeNode; + cc.invoke(&invokeNode, pFn, FuncSignatureT(CallConv::kIdHost)); + for (i = 1; i < regCount; i++) if (vars[i].isValid()) cc.add(vars[0], vars[i]); @@ -3979,10 +4203,11 @@ class X86Test_MiscMultiFunc : public X86Test { cc.setArg(0, a); cc.setArg(1, b); - FuncCallNode* call = cc.call(f2->label(), FuncSignatureT(CallConv::kIdHost)); - call->setArg(0, a); - call->setArg(1, b); - call->setRet(0, a); + InvokeNode* invokeNode; + cc.invoke(&invokeNode, f2->label(), FuncSignatureT(CallConv::kIdHost)); + invokeNode->setArg(0, a); + invokeNode->setArg(1, b); + invokeNode->setRet(0, a); cc.ret(a); cc.endFunc(); @@ -4037,7 +4262,7 @@ class X86Test_MiscUnfollow : public X86Test { // arguments will be passed by registers and there won't be any stack // misalignment when we call the `handler()`. This was failing on OSX // when targeting 32-bit. - cc.addFunc(FuncSignatureT(CallConv::kIdHostFastCall)); + cc.addFunc(FuncSignatureT(CallConv::kIdFastCall)); x86::Gp a = cc.newInt32("a"); x86::Gp b = cc.newIntPtr("b"); @@ -4123,6 +4348,7 @@ int main(int argc, char* argv[]) { app.addT(); app.addT(); app.addT(); + app.addT(); app.addT(); app.addT(); app.addT(); @@ -4137,6 +4363,7 @@ int main(int argc, char* argv[]) { app.addT(); app.addT(); app.addT(); + app.addT(); app.addT(); app.addT(); app.addT(); @@ -4150,6 +4377,7 @@ int main(int argc, char* argv[]) { app.addT(); app.addT(); app.addT(); + app.addT(); app.addT(); app.addT(); app.addT(); @@ -4165,3 +4393,13 @@ int main(int argc, char* argv[]) { return app.run(); } + +#else +int main() { + printf("AsmJit Compiler Test-Suite v%u.%u.%u is disabled on non-x86 host:\n", + unsigned((ASMJIT_LIBRARY_VERSION >> 16) ), + unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF), + unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF)); + return 0; +} +#endif diff --git a/libs/asmjit/test/asmjit_test_x86_instinfo.cpp b/libs/asmjit/test/asmjit_test_x86_instinfo.cpp new file mode 100644 index 0000000..8f58263 --- /dev/null +++ b/libs/asmjit/test/asmjit_test_x86_instinfo.cpp @@ -0,0 +1,195 @@ +// AsmJit - Machine code generation for C++ +// +// * Official AsmJit Home Page: https://asmjit.com +// * Official Github Repository: https://github.com/asmjit/asmjit +// +// Copyright (c) 2008-2020 The AsmJit Authors +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. + +#include + +#if defined(ASMJIT_BUILD_X86) +#include +#endif + +#include + +using namespace asmjit; + +static char accessLetter(bool r, bool w) noexcept { + return r && w ? 'X' : r ? 'R' : w ? 'W' : '_'; +} + +static void printInfo(uint32_t arch, const BaseInst& inst, const Operand_* operands, size_t opCount) { + StringTmp<512> sb; + + // Read & Write Information + // ------------------------ + + InstRWInfo rw; + InstAPI::queryRWInfo(arch, inst, operands, opCount, &rw); + + sb.append("Instruction:\n"); + sb.append(" "); +#ifndef ASMJIT_NO_LOGGING + Formatter::formatInstruction(sb, 0, nullptr, arch, inst, operands, opCount); +#else + sb.append(""); +#endif + sb.append("\n"); + + sb.append("Operands:\n"); + for (uint32_t i = 0; i < rw.opCount(); i++) { + const OpRWInfo& op = rw.operand(i); + + sb.appendFormat(" [%u] Op=%c Read=%016llX Write=%016llX Extend=%016llX", + i, + accessLetter(op.isRead(), op.isWrite()), + op.readByteMask(), + op.writeByteMask(), + op.extendByteMask()); + + if (op.isMemBaseUsed()) { + sb.appendFormat(" Base=%c", accessLetter(op.isMemBaseRead(), op.isMemBaseWrite())); + if (op.isMemBasePreModify()) + sb.appendFormat("
");
+      if (op.isMemBasePostModify())
+        sb.appendFormat(" ");
+    }
+
+    if (op.isMemIndexUsed()) {
+      sb.appendFormat(" Index=%c", accessLetter(op.isMemIndexRead(), op.isMemIndexWrite()));
+    }
+
+    sb.append("\n");
+  }
+
+  if (rw.readFlags() | rw.writeFlags()) {
+    sb.append("Flags: \n");
+
+    struct FlagMap {
+      uint32_t flag;
+      char name[4];
+    };
+
+    static const FlagMap flagMap[] = {
+      { x86::Status::kCF, "CF" },
+      { x86::Status::kOF, "OF" },
+      { x86::Status::kSF, "SF" },
+      { x86::Status::kZF, "ZF" },
+      { x86::Status::kAF, "AF" },
+      { x86::Status::kPF, "PF" },
+      { x86::Status::kDF, "DF" },
+      { x86::Status::kIF, "IF" },
+      { x86::Status::kAC, "AC" },
+      { x86::Status::kC0, "C0" },
+      { x86::Status::kC1, "C1" },
+      { x86::Status::kC2, "C2" },
+      { x86::Status::kC3, "C3" }
+    };
+
+    sb.append("  ");
+    for (uint32_t f = 0; f < 13; f++) {
+      char c = accessLetter((rw.readFlags() & flagMap[f].flag) != 0,
+                            (rw.writeFlags() & flagMap[f].flag) != 0);
+      if (c != '_')
+        sb.appendFormat("%s=%c ", flagMap[f].name, c);
+    }
+
+    sb.append("\n");
+  }
+
+  // CPU Features
+  // ------------
+
+  BaseFeatures features;
+  InstAPI::queryFeatures(arch, inst, operands, opCount, &features);
+
+#ifndef ASMJIT_NO_LOGGING
+  if (!features.empty()) {
+    sb.append("Features:\n");
+    sb.append("  ");
+
+    bool first = true;
+    BaseFeatures::Iterator it(features.iterator());
+    while (it.hasNext()) {
+      uint32_t featureId = uint32_t(it.next());
+      if (!first)
+        sb.append(" & ");
+      Formatter::formatFeature(sb, arch, featureId);
+      first = false;
+    }
+    sb.append("\n");
+  }
+#endif
+
+  printf("%s\n", sb.data());
+}
+
+template
+static void printInfoSimple(uint32_t arch, uint32_t instId, Args&&... args) {
+  BaseInst inst(instId);
+  Operand_ opArray[] = { std::forward(args)... };
+  printInfo(arch, inst, opArray, sizeof...(args));
+}
+
+static void testX86Arch() {
+#if defined(ASMJIT_BUILD_X86)
+  uint32_t arch = Environment::kArchX64;
+
+  printInfoSimple(arch,
+                  x86::Inst::kIdAdd,
+                  x86::eax, x86::ebx);
+
+  printInfoSimple(arch,
+                  x86::Inst::kIdLods,
+                  x86::eax , dword_ptr(x86::rsi));
+
+  printInfoSimple(arch,
+                  x86::Inst::kIdPshufd,
+                  x86::xmm0, x86::xmm1, imm(0));
+
+  printInfoSimple(arch,
+                  x86::Inst::kIdPextrw,
+                  x86::eax, x86::xmm1, imm(0));
+
+  printInfoSimple(arch,
+                  x86::Inst::kIdPextrw,
+                  x86::ptr(x86::rax), x86::xmm1, imm(0));
+
+  printInfoSimple(arch,
+                  x86::Inst::kIdVaddpd,
+                  x86::ymm0, x86::ymm1, x86::ymm2);
+
+  printInfoSimple(arch,
+                  x86::Inst::kIdVaddpd,
+                  x86::ymm0, x86::ymm30, x86::ymm31);
+
+  printInfoSimple(arch,
+                  x86::Inst::kIdVaddpd,
+                  x86::zmm0, x86::zmm1, x86::zmm2);
+#endif
+}
+
+int main() {
+  printf("AsmJit Instruction Information Test\n\n");
+
+  testX86Arch();
+
+  return 0;
+}
diff --git a/libs/asmjit/test/asmjit_test_x86_sections.cpp b/libs/asmjit/test/asmjit_test_x86_sections.cpp
index a9645b3..599fa57 100644
--- a/libs/asmjit/test/asmjit_test_x86_sections.cpp
+++ b/libs/asmjit/test/asmjit_test_x86_sections.cpp
@@ -34,6 +34,9 @@
 //   - Copy the code to the destination address.
 // ----------------------------------------------------------------------------
 
+#include 
+#if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86
+
 #include 
 #include 
 #include 
@@ -53,7 +56,9 @@ static void fail(const char* message, Error err) {
 }
 
 int main() {
-  CodeInfo codeInfo(ArchInfo::kIdHost);
+  printf("AsmJit X86 Sections Test\n\n");
+
+  Environment env = hostEnvironment();
   JitAllocator allocator;
 
 #ifndef ASMJIT_NO_LOGGING
@@ -62,7 +67,7 @@ int main() {
 #endif
 
   CodeHolder code;
-  code.init(codeInfo);
+  code.init(env);
 
 #ifndef ASMJIT_NO_LOGGING
   code.setLogger(&logger);
@@ -83,7 +88,7 @@ int main() {
     Label data = a.newLabel();
 
     FuncDetail func;
-    func.init(FuncSignatureT(CallConv::kIdHost));
+    func.init(FuncSignatureT(CallConv::kIdHost), code.environment());
 
     FuncFrame frame;
     frame.init(func);
@@ -113,7 +118,7 @@ int main() {
   // how to do it explicitly.
   printf("\nCalculating section offsets:\n");
   uint64_t offset = 0;
-  for (Section* section : code.sections()) {
+  for (Section* section : code.sectionsByOrder()) {
     offset = Support::alignUp(offset, section->alignment());
     section->setOffset(offset);
     offset += section->realSize();
@@ -152,25 +157,29 @@ int main() {
   // Copy the flattened code into `mem.rw`. There are two ways. You can either copy
   // everything manually by iterating over all sections or use `copyFlattenedData`.
   // This code is similar to what `copyFlattenedData(p, codeSize, 0)` would do:
-  for (Section* section : code.sections())
+  for (Section* section : code.sectionsByOrder())
     memcpy(static_cast(rwPtr) + size_t(section->offset()), section->data(), section->bufferSize());
 
   // Execute the function and test whether it works.
   typedef size_t (*Func)(size_t idx);
   Func fn = (Func)roPtr;
 
-  printf("\nTesting the generated function:\n");
+  printf("\n");
   if (fn(0) != dataArray[0] ||
       fn(3) != dataArray[3] ||
       fn(6) != dataArray[6] ||
       fn(9) != dataArray[9] ) {
-    printf("  [FAILED] The generated function returned incorrect result(s)\n");
+    printf("Failure:\n  The generated function returned incorrect result(s)\n");
     return 1;
   }
-  else {
-    printf("  [PASSED] The generated function returned expected results\n");
-  }
 
-  allocator.release((void*)fn);
+  printf("Success:\n  The generated function returned expected results\n");
   return 0;
 }
+
+#else
+int main() {
+  printf("AsmJit X86 Sections Test is disabled on non-x86 host\n\n");
+  return 0;
+}
+#endif
diff --git a/libs/asmjit/test/cmdline.h b/libs/asmjit/test/cmdline.h
new file mode 100644
index 0000000..caafa6c
--- /dev/null
+++ b/libs/asmjit/test/cmdline.h
@@ -0,0 +1,83 @@
+// AsmJit - Machine code generation for C++
+//
+//  * Official AsmJit Home Page: https://asmjit.com
+//  * Official Github Repository: https://github.com/asmjit/asmjit
+//
+// Copyright (c) 2008-2020 The AsmJit Authors
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+
+#ifndef ASMJIT_TEST_CMDLINE_H_INCLUDED
+#define ASMJIT_TEST_CMDLINE_H_INCLUDED
+
+#include 
+#include 
+#include 
+
+// ============================================================================
+// [CmdLine]
+// ============================================================================
+
+class CmdLine {
+public:
+  int _argc;
+  const char* const* _argv;
+
+  CmdLine(int argc, const char* const* argv)
+    : _argc(argc),
+      _argv(argv) {}
+
+  bool hasArg(const char* key) const {
+    for (int i = 1; i < _argc; i++)
+      if (strcmp(key, _argv[i]) == 0)
+        return true;
+    return false;
+  }
+
+  const char* valueOf(const char* key, const char* defaultValue) const {
+    size_t keySize = strlen(key);
+    for (int i = 1; i < _argc; i++) {
+      const char* val = _argv[i];
+      if (strlen(val) >= keySize + 1 && val[keySize] == '=' && memcmp(val, key, keySize) == 0)
+        return val + keySize + 1;
+    }
+
+    return defaultValue;
+  }
+
+  int valueAsInt(const char* key, int defaultValue) const {
+    const char* val = valueOf(key, nullptr);
+    if (val == nullptr || val[0] == '\0')
+      return defaultValue;
+
+    return atoi(val);
+  }
+
+  unsigned valueAsUInt(const char* key, unsigned defaultValue) const {
+    const char* val = valueOf(key, nullptr);
+    if (val == nullptr || val[0] == '\0')
+      return defaultValue;
+
+    int v = atoi(val);
+    if (v < 0)
+      return defaultValue;
+    else
+      return unsigned(v);
+  }
+};
+
+#endif // ASMJIT_TEST_CMDLINE_H_INCLUDED
diff --git a/libs/asmjit/tools/ci-run.sh b/libs/asmjit/tools/ci-run.sh
new file mode 100644
index 0000000..e7d9278
--- /dev/null
+++ b/libs/asmjit/tools/ci-run.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env sh
+
+set -e
+
+BUILD_DIR=$1
+
+if [ "$USE_VALGRIND" = "1" ]; then
+  RUN_CMD="valgrind --leak-check=full --show-reachable=yes --track-origins=yes"
+fi
+
+echo ""
+echo "=== Starting Tests ==="
+
+echo ""
+eval "$RUN_CMD ${BUILD_DIR}/asmjit_test_unit --quick"
+
+echo ""
+echo "AsmJit Opcode Test"
+eval "$RUN_CMD ${BUILD_DIR}/asmjit_test_opcode > /dev/null"
+
+if [ -f ${BUILD_DIR}/asmjit_test_x86_asm ]; then
+  echo ""
+  eval "$RUN_CMD ${BUILD_DIR}/asmjit_test_x86_asm"
+fi
+
+if [ -f ${BUILD_DIR}/asmjit_test_x86_sections ]; then
+  echo ""
+  eval "$RUN_CMD ${BUILD_DIR}/asmjit_test_x86_sections"
+fi
+
+if [ -f ${BUILD_DIR}/asmjit_test_x86_instinfo ]; then
+  echo ""
+  eval "$RUN_CMD ${BUILD_DIR}/asmjit_test_x86_instinfo"
+fi
+
+if [ -f ${BUILD_DIR}asmjit_test_compiler ]; then
+  echo ""
+  eval "$RUN_CMD ${BUILD_DIR}/asmjit_test_compiler"
+fi
diff --git a/libs/asmjit/tools/configure-makefiles.sh b/libs/asmjit/tools/configure-makefiles.sh
index 1c2729a..8bf7ca9 100644
--- a/libs/asmjit/tools/configure-makefiles.sh
+++ b/libs/asmjit/tools/configure-makefiles.sh
@@ -7,7 +7,7 @@ BUILD_OPTIONS="-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DASMJIT_TEST=1"
 echo "** Configuring ${BUILD_DIR}_dbg [Debug Build] **"
 mkdir -p ../${BUILD_DIR}_dbg
 cd ../${BUILD_DIR}_dbg
-eval cmake .. -DCMAKE_BUILD_TYPE=Debug ${BUILD_OPTIONS} -DASMJIT_SANITIZE=1
+eval cmake .. -DCMAKE_BUILD_TYPE=Debug ${BUILD_OPTIONS}
 cd ${CURRENT_DIR}
 
 echo "** Configuring ${BUILD_DIR}_rel [Release Build] **"
diff --git a/libs/asmjit/tools/configure-ninja.sh b/libs/asmjit/tools/configure-ninja.sh
index a3da417..0ff2779 100644
--- a/libs/asmjit/tools/configure-ninja.sh
+++ b/libs/asmjit/tools/configure-ninja.sh
@@ -7,7 +7,7 @@ BUILD_OPTIONS="-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DASMJIT_TEST=1"
 echo "** Configuring ${BUILD_DIR}_dbg [Debug Build] **"
 mkdir -p ../${BUILD_DIR}_dbg
 cd ../${BUILD_DIR}_dbg
-eval cmake .. -G"Ninja" -DCMAKE_BUILD_TYPE=Debug ${BUILD_OPTIONS} -DASMJIT_SANITIZE=1
+eval cmake .. -G"Ninja" -DCMAKE_BUILD_TYPE=Debug ${BUILD_OPTIONS}
 cd ${CURRENT_DIR}
 
 echo "** Configuring ${BUILD_DIR}_rel [Release Build] **"
diff --git a/libs/asmjit/tools/configure-sanitizers.sh b/libs/asmjit/tools/configure-sanitizers.sh
index 46e332c..14a16b4 100644
--- a/libs/asmjit/tools/configure-sanitizers.sh
+++ b/libs/asmjit/tools/configure-sanitizers.sh
@@ -5,13 +5,13 @@ BUILD_DIR="build"
 BUILD_OPTIONS="-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DASMJIT_TEST=1"
 
 echo "** Configuring '${BUILD_DIR}_rel_asan' [Sanitize=Address] **"
-mkdir -p ../${BUILD_DIR}_rel_asan
-cd ../${BUILD_DIR}_rel_asan
-eval cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release ${BUILD_OPTIONS} -DASMJIT_SANITIZE=address
+mkdir -p ../${BUILD_DIR}_dbg_asan
+cd ../${BUILD_DIR}_dbg_asan
+eval cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug ${BUILD_OPTIONS} -DASMJIT_SANITIZE=address
 cd ${CURRENT_DIR}
 
 echo "** Configuring '${BUILD_DIR}_rel_ubsan' [Sanitize=Undefined] **"
-mkdir -p ../${BUILD_DIR}_rel_ubsan
-cd ../${BUILD_DIR}_rel_ubsan
-eval cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release ${BUILD_OPTIONS} -DASMJIT_SANITIZE=undefined
+mkdir -p ../${BUILD_DIR}_dbg_ubsan
+cd ../${BUILD_DIR}_dbg_ubsan
+eval cmake .. -GNinja -DCMAKE_BUILD_TYPE=Debug ${BUILD_OPTIONS} -DASMJIT_SANITIZE=undefined
 cd ${CURRENT_DIR}
diff --git a/libs/asmjit/tools/enumgen.js b/libs/asmjit/tools/enumgen.js
new file mode 100644
index 0000000..b9d57e3
--- /dev/null
+++ b/libs/asmjit/tools/enumgen.js
@@ -0,0 +1,417 @@
+"use strict";
+
+const fs = require("fs");
+const path = require("path");
+
+const hasOwn = Object.prototype.hasOwnProperty;
+
+// ============================================================================
+// [Tokenizer]
+// ============================================================================
+
+// The list of "token types" which our lexer understands:
+const tokenizerPatterns = [
+  { type: "space"   , re: /^\s+/ },
+  { type: "comment" , re: /^(\/\/.*(\n|$)|\/\*.*\*\/)/ },
+  { type: "symbol"  , re: /^[a-zA-Z_]\w*/ },
+  { type: "integer" , re: /^(-?\d+|0[x|X][0-9A-Fa-f]+)(l)?(l)?(u)?\b/ },
+  { type: "comma"   , re: /^,/ },
+  { type: "operator", re: /(\+|\+\+|-|--|\/|\*|<<|>>|=|==|<|<=|>|>=|&|&&|\||\|\||\^|~|!)/ },
+  { type: "paren"   , re: /^[\(\)\{\}\[\]]/ }
+];
+
+function nextToken(input, from, patterns) {
+  if (from >= input.length) {
+    return {
+      type: "end",
+      begin: from,
+      end: from,
+      content: ""
+    }
+  }
+
+  const s = input.slice(from);
+  for (var i = 0; i < patterns.length; i++) {
+    const pattern = patterns[i];
+    const result = s.match(pattern.re);
+
+    if (result !== null) {
+      const content = result[0];
+      return {
+        type: pattern.type,
+        begin: from,
+        end: from + content.length,
+        content: content
+      };
+    }
+  }
+
+  return {
+    type: "invalid",
+    begin: from,
+    end: from + 1,
+    content: input[from]
+  };
+}
+
+class Tokenizer {
+  constructor(input, patterns) {
+    this.input = input;
+    this.index = 0;
+    this.patterns = patterns;
+  }
+
+  next() {
+    for (;;) {
+      const token = nextToken(this.input, this.index, this.patterns);
+      this.index = token.end;
+      if (token.type === "space" || token.type === "comment")
+        continue;
+      return token;
+    }
+  }
+
+  revert(token) {
+    this.index = token.begin;
+  }
+}
+
+// ============================================================================
+// [Parser]
+// ============================================================================
+
+function parseEnum(input) {
+  const map = Object.create(null);
+  const hasOwn = Object.prototype.hasOwnProperty;
+  const tokenizer = new Tokenizer(input, tokenizerPatterns);
+
+  var value = -1;
+
+  for (;;) {
+    var token = tokenizer.next();
+    if (token.type === "end")
+      break;
+
+    if (token.type === "symbol") {
+      const symbol = token.content;
+      token = tokenizer.next();
+      if (token.content === "=") {
+        token = tokenizer.next();
+        if (token.type !== "integer")
+          throw Error(`Expected an integer after symbol '${symbol} = '`);
+        value = parseInt(token.content);
+      }
+      else {
+        value++;
+      }
+
+      if (!hasOwn.call(map, symbol))
+        map[symbol] = value;
+      else
+        console.log(`${symbol} already defined, skipping...`);
+
+      token = tokenizer.next();
+      if (token.type !== "comma")
+        tokenizer.revert(token);
+      continue;
+    }
+
+    throw Error(`Unexpected token ${token.type} (${token.content})`);
+  }
+
+  return map;
+}
+
+// ============================================================================
+// [Stringify]
+// ============================================================================
+
+function compare(a, b) {
+  return a < b ? -1 : a == b ? 0 : 1;
+}
+
+function compactedSize(table) {
+  var size = 0;
+  for (var i = 0; i < table.length; i++)
+    size += table[i].name.length + 1;
+  return size;
+}
+
+function indexTypeFromSize(size) {
+  if (size <= 256)
+    return 'uint8_t';
+  else if (size <= 65536)
+    return 'uint16_t';
+  else
+    return 'uint32_t';
+}
+
+function indent(s, indentation) {
+  var lines = s.split(/\r?\n/g);
+  if (indentation) {
+    for (var i = 0; i < lines.length; i++) {
+      var line = lines[i];
+      if (line) lines[i] = indentation + line;
+    }
+  }
+
+  return lines.join("\n");
+}
+
+function stringifyEnum(map, options) {
+  var output = "";
+
+  const stripPrefix = options.strip;
+  const outputPrefix = options.output;
+
+  var max = -1;
+  var table = [];
+
+  for (var k in map) {
+    var name = k;
+    if (stripPrefix) {
+      if (name.startsWith(stripPrefix))
+        name = name.substring(stripPrefix.length);
+      else
+        throw Error(`Cannot strip prefix '${stripPrefix}' in '${key}'`);
+    }
+
+    table.push({ name: name, value: map[k] });
+    max = Math.max(max, map[k]);
+  }
+
+  table.sort(function(a, b) { return compare(a.value, b.value); });
+
+  const unknownIndex = compactedSize(table);
+  table.push({ name: "", value: max + 1 });
+
+  const indexType = indexTypeFromSize(compactedSize(table));
+
+  function buildStringData() {
+    var s = "";
+    for (var i = 0; i < table.length; i++) {
+      s += `  "${table[i].name}\\0"`;
+      if (i == table.length - 1)
+        s += `;`;
+      s += `\n`;
+    }
+    return s;
+  }
+
+  function buildIndexData() {
+    var index = 0;
+    var indexArray = [];
+
+    for (var i = 0; i < table.length; i++) {
+      while (indexArray.length < table[i].value)
+        indexArray.push(unknownIndex);
+
+      indexArray.push(index);
+      index += table[i].name.length + 1;
+    }
+
+    var s = "";
+    var line = "";
+    var pos = 0;
+
+    for (var i = 0; i < indexArray.length; i++) {
+      if (line)
+        line += " ";
+
+      line += `${indexArray[i]}`;
+      if (i != indexArray.length - 1)
+        line += `,`;
+
+      if (i == indexArray.length - 1 || line.length >= 72) {
+        s += `  ${line}\n`;
+        line = "";
+      }
+    }
+
+    return s;
+  }
+
+  output += `static const char ${outputPrefix}String[] =\n` + buildStringData() + `\n`;
+  output += `static const ${indexType} ${outputPrefix}Index[] = {\n` + buildIndexData() + `};\n`;
+
+  return output;
+}
+
+// ============================================================================
+// [FileSystem]
+// ============================================================================
+
+function walkDir(baseDir) {
+  function walk(baseDir, nestedPath, out) {
+    fs.readdirSync(baseDir).forEach((file) => {
+      const stat = fs.statSync(path.join(baseDir, file));
+      if (stat.isDirectory()) {
+        if (!stat.isSymbolicLink())
+          walk(path.join(baseDir, file), path.join(nestedPath, file), out)
+      }
+      else {
+        out.push(path.join(nestedPath, file));
+      }
+    });
+    return out;
+  }
+
+  return walk(baseDir, "", []);
+}
+
+// ============================================================================
+// [Generator]
+// ============================================================================
+
+class Generator {
+  constructor(options) {
+    this.enumMap = Object.create(null);
+    this.outputs = [];
+
+    this.verify = options.verify;
+    this.baseDir = options.baseDir;
+    this.noBackup = options.noBackup;
+  }
+
+  readEnums() {
+    console.log(`Scanning: ${this.baseDir}`);
+    walkDir(this.baseDir).forEach((fileName) => {
+      if (/\.(cc|cpp|h|hpp)$/.test(fileName)) {
+        const content = fs.readFileSync(path.join(this.baseDir, fileName), "utf8");
+        this.addEnumsFromSource(fileName, content);
+
+        if (/@EnumStringBegin(\{.*\})@/.test(content))
+          this.outputs.push(fileName);
+      }
+    });
+  }
+
+  writeEnums() {
+    this.outputs.forEach((fileName) => {
+      console.log(`Output: ${fileName}`);
+
+      const oldContent = fs.readFileSync(path.join(this.baseDir, fileName), "utf8");
+      const newContent = this.injectEnumsToSource(oldContent);
+
+      if (oldContent != newContent) {
+        if (this.verify) {
+          console.log(`  FAILED: File is not up to date.`);
+          process.exit(1);
+        }
+        else {
+          if (!this.noBackup) {
+            fs.writeFileSync(path.join(this.baseDir, fileName + ".backup"), oldContent, "utf8");
+            console.log(`  Created ${fileName}.backup`);
+          }
+          fs.writeFileSync(path.join(this.baseDir, fileName), newContent, "utf8");
+          console.log(`  Updated ${fileName}`);
+        }
+      }
+      else {
+        console.log(`  File is up to date.`);
+      }
+    });
+  }
+
+  addEnumsFromSource(fileName, src) {
+    var found = false;
+    const matches = [...src.matchAll(/(?:@EnumValuesBegin(\{.*\})@|@EnumValuesEnd@)/g)];
+
+    for (var i = 0; i < matches.length; i += 2) {
+      const def = matches[i];
+      const end = matches[i + 1];
+
+      if (!def[0].startsWith("@EnumValuesBegin"))
+        throw new Error(`Cannot start with '${def[0]}'`);
+
+      if (!end)
+        throw new Error(`Missing @EnumValuesEnd for '${def[0]}'`);
+
+      if (!end[0].startsWith("@EnumValuesEnd@"))
+        throw new Error(`Expected @EnumValuesEnd@ for '${def[0]}' and not '${end[0]}'`);
+
+      const options = JSON.parse(def[1]);
+      const enumName = options.enum;
+
+      if (!enumName)
+        throw Error(`Missing 'enum' in '${def[0]}`);
+
+      if (hasOwn.call(this.enumMap, enumName))
+        throw new Error(`Enumeration '${enumName}' is already defined`);
+
+      const startIndex = src.lastIndexOf("\n", def.index) + 1;
+      const endIndex = end.index + end[0].length;
+
+      if (startIndex === -1 || startIndex > endIndex)
+        throw new Error(`Internal Error, indexes have unexpected values: startIndex=${startIndex} endIndex=${endIndex}`);
+
+      if (!found) {
+        found = true;
+        console.log(`Found: ${fileName}`);
+      }
+
+      console.log(`  Parsing Enum: ${enumName}`);
+      this.enumMap[enumName] = parseEnum(src.substring(startIndex, endIndex));
+    }
+  }
+
+  injectEnumsToSource(src) {
+    const matches = [...src.matchAll(/(?:@EnumStringBegin(\{.*\})@|@EnumStringEnd@)/g)];
+    var delta = 0;
+
+    for (var i = 0; i < matches.length; i += 2) {
+      const def = matches[i];
+      const end = matches[i + 1];
+
+      if (!def[0].startsWith("@EnumStringBegin"))
+        throw new Error(`Cannot start with '${def[0]}'`);
+
+      if (!end)
+        throw new Error(`Missing @EnumStringEnd@ for '${def[0]}'`);
+
+      if (!end[0].startsWith("@EnumStringEnd@"))
+        throw new Error(`Expected @EnumStringEnd@ for '${def[0]}' and not '${end[0]}'`);
+
+      const options = JSON.parse(def[1]);
+      const enumName = options.enum;
+
+      if (!enumName)
+        throwError(`Missing 'name' in '${def[0]}`);
+
+      if (!hasOwn.call(this.enumMap, enumName))
+        throw new Error(`Enumeration '${enumName}' not found`);
+
+      console.log(`  Injecting Enum: ${enumName}`);
+
+      const startIndex = src.indexOf("\n", def.index + delta) + 1;
+      const endIndex = src.lastIndexOf("\n", end.index + delta) + 1;
+
+      if (startIndex === -1 || endIndex === -1 || startIndex > endIndex)
+        throw new Error(`Internal Error, indexes have unexpected values: startIndex=${startIndex} endIndex=${endIndex}`);
+
+      // Calculate the indentation.
+      const indentation = (function() {
+        const begin = src.lastIndexOf("\n", def.index + delta) + 1;
+        const end = src.indexOf("/", begin);
+        return src.substring(begin, end);
+      })();
+
+      const newContent = indent(stringifyEnum(this.enumMap[enumName], options), indentation);
+      src = src.substring(0, startIndex) + newContent + src.substring(endIndex);
+
+      delta -= endIndex - startIndex;
+      delta += newContent.length;
+    }
+
+    return src;
+  }
+}
+
+const generator = new Generator({
+  baseDir : path.resolve(__dirname, "../src"),
+  verify  : process.argv.indexOf("--verify") !== -1,
+  noBackup: process.argv.indexOf("--no-backup") !== -1
+});
+
+generator.readEnums();
+generator.writeEnums();
diff --git a/libs/asmjit/tools/enumgen.sh b/libs/asmjit/tools/enumgen.sh
new file mode 100644
index 0000000..4783db2
--- /dev/null
+++ b/libs/asmjit/tools/enumgen.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env sh
+set -e
+node ./enumgen.js $@
diff --git a/libs/asmjit/tools/tablegen-x86.js b/libs/asmjit/tools/tablegen-x86.js
index dfd0f40..35d8b24 100644
--- a/libs/asmjit/tools/tablegen-x86.js
+++ b/libs/asmjit/tools/tablegen-x86.js
@@ -221,6 +221,9 @@ class GenUtils {
         }
       }
 
+      if (dbInst.attributes.Tsib)
+        f.Tsib = true;
+
       if (dbInst.vsibReg)
         f.Vsib = true;
 
@@ -279,6 +282,20 @@ class GenUtils {
     }
   }
 
+  // Prevent some instructions from having implicit memory size if that would
+  // make them ambiguous. There are some instructions where the ambiguity is
+  // okay, but some like 'push' and 'pop' where it isn't.
+  static canUseImplicitMemSize(name) {
+    switch (name) {
+      case "pop":
+      case "push":
+        return false;
+
+      default:
+        return true;
+    }
+  }
+
   static singleRegCase(name) {
     switch (name) {
       case "xchg"    :
@@ -509,7 +526,9 @@ class X86TableGen extends core.TableGen {
             String(inst.encoding  ).padEnd(19) + ", " +
             String(inst.opcode0   ).padEnd(26) + ", " +
             String(inst.opcode1   ).padEnd(26) + ", " +
-            String("0"            ).padEnd( 4) + ", " +
+            String("0"            ).padEnd( 3) + ", " +
+            String("0"            ).padEnd( 3) + ", " +
+            String("0"            ).padEnd( 5) + ", " +
             String("0"            ).padEnd( 3) + ", " +
             String("0"            ).padEnd( 3) + "),\n";
         }
@@ -547,7 +566,7 @@ class X86TableGen extends core.TableGen {
     var enum_    = name[0].toUpperCase() + name.substr(1);
 
     var opcode   = dbi.opcodeHex;
-    var rm       = dbi.rm;
+    var modR     = dbi.modR;
     var mm       = dbi.mm;
     var pp       = dbi.pp;
     var encoding = dbi.encoding;
@@ -585,7 +604,7 @@ class X86TableGen extends core.TableGen {
       }
 
       if (opcode   !== dbi.opcodeHex ) { console.log(`ISSUE: Opcode ${opcode} != ${dbi.opcodeHex}`); return null; }
-      if (rm       !== dbi.rm        ) { console.log(`ISSUE: RM ${rm} != ${dbi.rm}`); return null; }
+      if (modR     !== dbi.modR      ) { console.log(`ISSUE: ModR ${modR} != ${dbi.modR}`); return null; }
       if (mm       !== dbi.mm        ) { console.log(`ISSUE: MM ${mm} != ${dbi.mm}`); return null; }
       if (pp       !== dbi.pp        ) { console.log(`ISSUE: PP ${pp} != ${dbi.pp}`); return null; }
       if (encoding !== dbi.encoding  ) { console.log(`ISSUE: Enc ${encoding} != ${dbi.encoding}`); return null; }
@@ -600,12 +619,12 @@ class X86TableGen extends core.TableGen {
       type  : isVec ? "V" : "O",
       prefix: ppmm,
       opcode: opcode,
-      o     : rm === "r" ? "_" : (rm ? rm : "_"),
+      o     : modR === "r" ? "_" : (modR ? modR : "_"),
       l     : vexL !== undefined ? vexL : "_",
       w     : vexW !== undefined ? vexW : "_",
       ew    : evexW !== undefined ? evexW : "_",
       en    : "_",
-      tt    : "_  "
+      tt    : dbi.modRM ? dbi.modRM + "  " : "_  "
     });
 
     return {
@@ -731,7 +750,7 @@ class AltOpcodeTable extends core.Task {
       }
 
       // X(______,OP,_,_,_,_,_,_  )
-      if (opcode.startsWith("O_FPU(") || opcode.startsWith("O(") || opcode.startsWith("V(") || opcode.startsWith("E(")) {
+      if (opcode.startsWith("O(") || opcode.startsWith("V(") || opcode.startsWith("E(")) {
         var value = opcode.substring(9, 11);
         var remaining = opcode.substring(0, 9) + "00" + opcode.substring(11);
 
@@ -930,6 +949,7 @@ const OpToAsmJitOp = {
   "dreg"    : "F(DReg)",
   "st"      : "F(St)",
   "bnd"     : "F(Bnd)",
+  "tmm"     : "F(Tmm)",
 
   "mem"     : "F(Mem)",
   "vm"      : "F(Vm)",
@@ -1096,7 +1116,8 @@ class OSignature {
         case "mm"      :
         case "xmm"     :
         case "ymm"     :
-        case "zmm"     : mFlags[k] = true; break;
+        case "zmm"     :
+        case "tmm"     : mFlags[k] = true; break;
 
         case "m8"      :
         case "m16"     :
@@ -1108,8 +1129,9 @@ class OSignature {
         case "m256"    :
         case "m512"    :
         case "m1024"   : mFlags.mem = true; mMemFlags[k] = true; break;
-        case "mib"     : mFlags.mem = true; mMemFlags.mib   = true; break;
-        case "mem"     : mFlags.mem = true; mMemFlags.mAny  = true; break;
+        case "mib"     : mFlags.mem = true; mMemFlags.mib = true; break;
+        case "mem"     : mFlags.mem = true; mMemFlags.mAny = true; break;
+        case "tmem"    : mFlags.mem = true; mMemFlags.mAny = true; break;
 
         case "memBase" : mFlags.mem = true; mMemFlags.memBase = true; break;
         case "memDS"   : mFlags.mem = true; mMemFlags.memDS = true; break;
@@ -1398,7 +1420,8 @@ class SignatureArray extends Array {
       // Patch all instructions to accept implicit-size memory operand.
       for (bIndex = 0; bIndex < sameSizeSet.length; bIndex++) {
         const bInst = sameSizeSet[bIndex];
-        if (implicit) bInst[memPos].flags.mem = true;
+        if (implicit)
+          bInst[memPos].flags.mem = true;
 
         if (!implicit)
           DEBUG(`${this.name}: Explicit: ${bInst}`);
@@ -1689,7 +1712,9 @@ class InstSignatureTable extends core.Task {
       }
     }
 
-    signatures.calcImplicitMemSize();
+    if (signatures.length && GenUtils.canUseImplicitMemSize(dbInsts[0].name))
+      signatures.calcImplicitMemSize();
+
     signatures.simplify();
     signatures.compact();
 
@@ -1814,12 +1839,14 @@ class InstRWInfoTable extends core.Task {
   constructor() {
     super("InstRWInfoTable");
 
-    this.rwInfoIndex = [];
-    this.rwInfoTable = new IndexedArray();
+    this.rwInfoIndexA = [];
+    this.rwInfoIndexB = [];
+    this.rwInfoTableA = new IndexedArray();
+    this.rwInfoTableB = new IndexedArray();
+
     this.rmInfoTable = new IndexedArray();
     this.opInfoTable = new IndexedArray();
 
-    const _ = null;
     this.rwCategoryByName = {
       "imul"      : "Imul",
       "mov"       : "Mov",
@@ -1833,6 +1860,8 @@ class InstRWInfoTable extends core.Task {
       "vpmaskmovd": "Vmaskmov",
       "vpmaskmovq": "Vmaskmov"
     };
+
+    const _ = null;
     this.rwCategoryByData = {
       Vmov1_8: [
         [{access: "W", flags: {}, fixed: -1, index: 0, width:  8}, {access: "R", flags: {}, fixed: -1, index: 0, width: 64},_,_,_,_],
@@ -1900,8 +1929,8 @@ class InstRWInfoTable extends core.Task {
       const o2Insts = dbInsts.filter((inst) => { return inst.operands.length === 2; });
       const oxInsts = dbInsts.filter((inst) => { return inst.operands.length !== 2; });
 
-      const rwInfoArray = [this.rwInfo(o2Insts), this.rwInfo(oxInsts)];
-      const rmInfoArray = [this.rmInfo(o2Insts), this.rmInfo(oxInsts)];
+      const rwInfoArray = [this.rwInfo(inst, o2Insts), this.rwInfo(inst, oxInsts)];
+      const rmInfoArray = [this.rmInfo(inst, o2Insts), this.rmInfo(inst, oxInsts)];
 
       for (var i = 0; i < 2; i++) {
         const rwInfo = rwInfoArray[i];
@@ -1954,21 +1983,30 @@ class InstRWInfoTable extends core.Task {
           CxxUtils.struct(...(rwOpsIndex.map(function(item) { return String(item).padEnd(2); })))
         );
 
-        this.rwInfoIndex.push(this.rwInfoTable.addIndexed(rwData));
+        if (i == 0)
+          this.rwInfoIndexA.push(this.rwInfoTableA.addIndexed(rwData));
+        else
+          this.rwInfoIndexB.push(this.rwInfoTableB.addIndexed(rwData));
       }
     });
 
     var s = "";
-    s += "const uint8_t InstDB::rwInfoIndex[Inst::_kIdCount * 2] = {\n" + StringUtils.format(this.rwInfoIndex, kIndent, -1) + "\n};\n";
+    s += "const uint8_t InstDB::rwInfoIndexA[Inst::_kIdCount] = {\n" + StringUtils.format(this.rwInfoIndexA, kIndent, -1) + "\n};\n";
+    s += "\n";
+    s += "const uint8_t InstDB::rwInfoIndexB[Inst::_kIdCount] = {\n" + StringUtils.format(this.rwInfoIndexB, kIndent, -1) + "\n};\n";
     s += "\n";
-    s += "const InstDB::RWInfo InstDB::rwInfo[] = {\n" + StringUtils.format(this.rwInfoTable, kIndent, true) + "\n};\n";
+    s += "const InstDB::RWInfo InstDB::rwInfoA[] = {\n" + StringUtils.format(this.rwInfoTableA, kIndent, true) + "\n};\n";
+    s += "\n";
+    s += "const InstDB::RWInfo InstDB::rwInfoB[] = {\n" + StringUtils.format(this.rwInfoTableB, kIndent, true) + "\n};\n";
     s += "\n";
     s += "const InstDB::RWInfoOp InstDB::rwInfoOp[] = {\n" + StringUtils.format(this.opInfoTable, kIndent, true) + "\n};\n";
     s += "\n";
     s += "const InstDB::RWInfoRm InstDB::rwInfoRm[] = {\n" + StringUtils.format(this.rmInfoTable, kIndent, true) + "\n};\n";
 
-    const size = this.rwInfoIndex.length +
-                 this.rwInfoTable.length * 8 +
+    const size = this.rwInfoIndexA.length +
+                 this.rwInfoIndexB.length +
+                 this.rwInfoTableA.length * 8 +
+                 this.rwInfoTableB.length * 8 +
                  this.rmInfoTable.length * 4 +
                  this.opInfoTable.length * 24;
 
@@ -2006,7 +2044,9 @@ class InstRWInfoTable extends core.Task {
   // Read/Write Info
   // ---------------
 
-  rwInfo(dbInsts) {
+  rwInfo(asmInst, dbInsts) {
+    const self = this;
+
     function nullOps() {
       return [null, null, null, null, null, null];
     }
@@ -2052,6 +2092,9 @@ class InstRWInfoTable extends core.Task {
           if (op.zext)
             d.flags.ZExt = true;
 
+          for (var k in self.rwOpFlagsForInstruction(asmInst.name, j))
+            d.flags[k] = true;
+
           if ((step === -1 || step === j) || op.rwxIndex !== 0 || op.rwxWidth !== opSize) {
             d.index = op.rwxIndex;
             d.width = op.rwxWidth;
@@ -2147,10 +2190,29 @@ class InstRWInfoTable extends core.Task {
     return null;
   }
 
+  rwOpFlagsForInstruction(instName, opIndex) {
+    const toMap = MapUtils.arrayToMap;
+
+    // TODO: We should be able to get this information from asmdb.
+    switch (instName + "@" + opIndex) {
+      case "cmps@0": return toMap(['MemBaseRW', 'MemBasePostModify']);
+      case "cmps@1": return toMap(['MemBaseRW', 'MemBasePostModify']);
+      case "movs@0": return toMap(['MemBaseRW', 'MemBasePostModify']);
+      case "movs@1": return toMap(['MemBaseRW', 'MemBasePostModify']);
+      case "lods@1": return toMap(['MemBaseRW', 'MemBasePostModify']);
+      case "stos@0": return toMap(['MemBaseRW', 'MemBasePostModify']);
+      case "scas@1": return toMap(['MemBaseRW', 'MemBasePostModify']);
+      case "bndstx@0": return toMap(['MemBaseWrite', 'MemIndexWrite']);
+
+      default:
+        return {};
+    }
+  }
+
   // Reg/Mem Info
   // ------------
 
-  rmInfo(dbInsts) {
+  rmInfo(asmInst, dbInsts) {
     const info = {
       category: "None",
       rmIndexes: this.rmReplaceableIndexes(dbInsts),
diff --git a/libs/asmjit/tools/tablegen.sh b/libs/asmjit/tools/tablegen.sh
index dbb37f1..40facf3 100644
--- a/libs/asmjit/tools/tablegen.sh
+++ b/libs/asmjit/tools/tablegen.sh
@@ -1,3 +1,3 @@
-#!/bin/sh
-
-node ./tablegen-x86.js
+#!/usr/bin/env sh
+set -e
+node ./tablegen-x86.js $@
diff --git a/libs/cpptcl/SCons_cpptcl.py b/libs/cpptcl/SCons_cpptcl.py
deleted file mode 100644
index 778ba07..0000000
--- a/libs/cpptcl/SCons_cpptcl.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import os
-
-Import('*')
-
-lenv = pyenv.Clone()
-
-cpptcl_cpp = File('cpptcl/cpptcl.cpp')
-cpptcl_includes = cpptcl_cpp.srcnode().get_abspath()
-cpptcl_includes = os.path.split(cpptcl_includes)[0]
- 
-lenv.Append(CPPPATH=[cpptcl_includes,
-                    Dir('#/libs/pybind11')],
-)
-
-static_cpptcl = lenv.StaticLibrary('cpptcl', cpptcl_cpp)
-
-cpptclnostubs_obj = lenv.Object(target='cpptclnostubs', source=cpptcl_cpp, 
-             CPPDEFINES = {'CPPTCL_NO_TCL_STUBS' : ''},
-)
-static_cpptcl_no_stubs = lenv.StaticLibrary(cpptclnostubs_obj)
-
-
-Return('static_cpptcl static_cpptcl_no_stubs cpptcl_includes')
diff --git a/libs/cpptcl/cpptcl.SConscript b/libs/cpptcl/cpptcl.SConscript
new file mode 100644
index 0000000..a56144d
--- /dev/null
+++ b/libs/cpptcl/cpptcl.SConscript
@@ -0,0 +1,29 @@
+import os
+
+Import('pyenv env')
+
+env_cpptcl = pyenv.Clone()
+
+sources = File('cpptcl/cpptcl.cpp')
+
+cpptcl_includes = sources.srcnode().get_abspath()
+cpptcl_includes = os.path.split(cpptcl_includes)[0]
+env['cpptcl_includes'] = [ cpptcl_includes ]
+ 
+env_cpptcl.Append(
+    CPPPATH=env['cpptcl_includes'] + 
+             env['pybind11_includes'],
+)
+
+static_cpptcl = env_cpptcl.StaticLibrary(target = 'cpptcl',
+            source = sources,
+)
+
+cpptclnostubs_obj = env_cpptcl.Object(target='cpptclnostubs',
+            source = sources, 
+            CPPDEFINES = {'CPPTCL_NO_TCL_STUBS' : ''},
+)
+static_cpptcl_no_stubs = env_cpptcl.StaticLibrary(cpptclnostubs_obj)
+
+
+Return('static_cpptcl static_cpptcl_no_stubs')
diff --git a/libs/cpptcl/cpptcl/cpptcl.cpp b/libs/cpptcl/cpptcl/cpptcl.cpp
index d97916e..f75e885 100644
--- a/libs/cpptcl/cpptcl/cpptcl.cpp
+++ b/libs/cpptcl/cpptcl/cpptcl.cpp
@@ -774,6 +774,11 @@ result interpreter::setVar(string const &variableName, object const &scalarTclVa
 	return result(interp_);
 }
 
+void interpreter::setResult(object const &o)
+{
+    details::set_result(interp_, o);
+}
+
 bool interpreter::exists(string const &variableName, string const &indexName) {
     object n = object(variableName.c_str());
     object i = object(indexName.c_str());
diff --git a/libs/cpptcl/cpptcl/cpptcl.h b/libs/cpptcl/cpptcl/cpptcl.h
index 7f597d6..8a86048 100644
--- a/libs/cpptcl/cpptcl/cpptcl.h
+++ b/libs/cpptcl/cpptcl/cpptcl.h
@@ -334,91 +334,34 @@ template
 struct callbacka : public details::callback_base  {
 
 private:
-	Func f_;
+	Func _f;
 
-	using Traits = traits;
+	using Traits = FunctionTraits;
 	using R = typename Traits::return_type;
 
 	static constexpr std::size_t nargs = Traits::arity;
 
   public:
-	callbacka(Func f) : f_(f) {}
-
-	virtual void invoke(Tcl_Interp *interp, int objc, Tcl_Obj *const objv[] , policies const &pol) {
-		object variadic_arguments{};
-		if (!pol.variadic_)
-		{
-			details::check_params_no(objc, nargs, pol.usage_);
-		} else {
-			if constexpr (nargs > 0) {
-				// first element in objv[0] is procedure name
-				variadic_arguments = details::get_var_params(interp, objc, objv, nargs, pol);
-			}
-		}
-
-		if constexpr (nargs <= 0) {
-			details::dispatch::do_dispatch(interp, f_);
-			return;
-		} else {
-			using T1 = typename Traits::template argument<0>::type;
-			details::tcl_cast_by_reference byRef1;
-
-			if constexpr (nargs <= 1) {
-				if (pol.variadic_)
-				{
-					details::dispatch::template do_dispatch(interp, f_,
-							variadic_arguments);
-				} else {
-					details::dispatch::template do_dispatch(interp, f_,
-							details::tcl_cast::from(interp, objv[1], byRef1.value));
-				}
-				return;
-			} else {
-				using T2 = typename Traits::template argument<1>::type;
-				details::tcl_cast_by_reference byRef2;
-
-				if constexpr (nargs <= 2) {
-					if (pol.variadic_)
-					{
-						details::dispatch::template do_dispatch(interp, f_,
-							details::tcl_cast::from(interp, objv[1], byRef1.value),
-							variadic_arguments
-						);
-					} else {
-						details::dispatch::template do_dispatch(interp, f_,
-							details::tcl_cast::from(interp, objv[1], byRef1.value),
-							details::tcl_cast::from(interp, objv[2], byRef2.value)
-						);
-					}
-					return;
-				} else {
-					using T3 = typename Traits::template argument<2>::type;
-					details::tcl_cast_by_reference byRef3;
-
-					if constexpr (nargs <= 3) {
-						if (pol.variadic_) {
-							details::dispatch::template do_dispatch(interp, f_,
-									details::tcl_cast::from(interp, objv[1], byRef1.value),
-									details::tcl_cast::from(interp, objv[2], byRef2.value),
-									variadic_arguments
-							);
-						}
-						else
-						{
-							details::dispatch::template do_dispatch(interp, f_,
-								details::tcl_cast::from(interp, objv[1], byRef1.value),
-								details::tcl_cast::from(interp, objv[2], byRef2.value),
-								details::tcl_cast::from(interp, objv[3], byRef3.value)
-								);
-						}
-						return;
-					} else {
-						static_assert(nargs > 3, "Argument count exceed implemented count");
-					}
-				}
-			}
-		}
-	}
+	callbacka(Func f) : _f(f) {}
+
+    virtual void invoke(Tcl_Interp *interp, int objc, Tcl_Obj *const objv[], policies const &pol)
+    {
+        if (pol.variadic_)
+        {
+            object variadic_arguments{};
+            if constexpr (nargs > 0)
+            {
+                // first element in objv[0] is procedure name
+                variadic_arguments = details::get_var_params(interp, objc, objv, nargs, pol);
+            }
+            details::nextgen::Dispatcher::dispatch(interp, objv, _f, variadic_arguments);
+        }
+        else
+        {
+            details::check_params_no(objc, nargs, pol.usage_);
+            details::nextgen::Dispatcher::dispatch(interp, objv, _f);
+        }
+    }
 };
 
 
@@ -449,32 +392,10 @@ class interpreter {
 	Tcl_Interp *get() const { return interp_; }
 
     template 
-    void def2(std::string const &name, Func f, policies const &p = policies()) {
-    	add_function(name, std::shared_ptr(new callbacka(f)), p);
+    void def(std::string const &name, Func f, policies const &p = policies()) {
+        add_function(name, std::shared_ptr(new callbacka(f)), p);
     }
 
-	// free function definitions
-
-	template  void def(std::string const &name, R (*f)(), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback0(f)), p); }
-
-	template  void def(std::string const &name, R (*f)(T1), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback1(f)), p); }
-
-	template  void def(std::string const &name, R (*f)(T1, T2), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback2(f)), p); }
-
-	template  void def(std::string const &name, R (*f)(T1, T2, T3), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback3(f)), p); }
-
-	template  void def(std::string const &name, R (*f)(T1, T2, T3, T4), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback4(f)), p); }
-
-	template  void def(std::string const &name, R (*f)(T1, T2, T3, T4, T5), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback5(f)), p); }
-
-	template  void def(std::string const &name, R (*f)(T1, T2, T3, T4, T5, T6), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback6(f)), p); }
-
-	template  void def(std::string const &name, R (*f)(T1, T2, T3, T4, T5, T6, T7), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback7(f)), p); }
-
-	template  void def(std::string const &name, R (*f)(T1, T2, T3, T4, T5, T6, T7, T8), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback8(f)), p); }
-
-	template  void def(std::string const &name, R (*f)(T1, T2, T3, T4, T5, T6, T7, T8, T9), policies const &p = policies()) { add_function(name, std::shared_ptr(new details::callback9(f)), p); }
-
 	// class definitions
 
 	template  details::class_definer class_(std::string const &name) {
@@ -522,6 +443,8 @@ class interpreter {
 
 	details::result setVar(std::string const &variableName, object const &scalarTclVariable, int flags = TCL_LEAVE_ERR_MSG);
 
+    void setResult(object const &o);
+
     // check if variables exist
     bool exists(std::string const &scalarTclVariable);
     bool exists(std::string const &arrayTclVariable, std::string const &arrayIndex);
@@ -589,9 +512,15 @@ using details::result;
 // macro for defining loadable module entry point
 // - used for extending Tcl interpreter
 
+#ifdef _MSC_VER
+#define CPPTCL_EXPORT __declspec( dllexport )
+#else
+#define CPPTCL_EXPORT __attribute__((dllexport))
+#endif
+
 #define CPPTCL_MODULE(name, i)                       \
 	void name##_cpptcl_Init(Tcl::interpreter &i);    \
-	extern "C" __attribute__((dllexport)) int name##_Init(Tcl_Interp *interpreter) { \
+	extern "C" CPPTCL_EXPORT int name##_Init(Tcl_Interp *interpreter) { \
 		Tcl_InitStubs(interpreter, const_cast("8.3"), 0);             \
 		Tcl::interpreter i(interpreter, false);           \
 		name##_cpptcl_Init(i);                       \
diff --git a/libs/cpptcl/cpptcl/details/dispatchers.h b/libs/cpptcl/cpptcl/details/dispatchers.h
index 0b80b42..2d280c9 100644
--- a/libs/cpptcl/cpptcl/details/dispatchers.h
+++ b/libs/cpptcl/cpptcl/details/dispatchers.h
@@ -14,12 +14,44 @@
 // capture its return value
 // further dispatch specialization ignores the res
 
-template  struct dispatch {
-	template  static void do_dispatch2(Tcl_Interp *interp, Functor f) {
-		R res = f();
-		set_result(interp, res);
-	}
+namespace nextgen
+{
+	// Template recursion
+template 
+struct Dispatcher {
+    template 
+    static void dispatch(Tcl_Interp *interp, Tcl_Obj *const objv[], FuncPtr func_ptr, Args... args) {
+        constexpr size_t idx = i - 1;
+        using ArgType = typename Traits::template argument::type;
+
+        details::tcl_cast_by_reference is_by_ref;
+        // first element in objv[0] is procedure name
+        ArgType casted = details::tcl_cast::from(interp, objv[idx+1], is_by_ref.value);
+
+        Dispatcher::dispatch(interp, objv, func_ptr, casted, args...);
+    }
+};
 
+// Terminating template specialisation
+template 
+struct Dispatcher<0, Traits, Args...> {
+    template 
+    static void dispatch(Tcl_Interp *interp, Tcl_Obj *const objv[], FuncPtr func_ptr, Args... args) {
+        constexpr size_t idx = 0;
+        using ArgType = typename Traits::template argument::type;
+
+        details::tcl_cast_by_reference is_by_ref;
+        ArgType casted = details::tcl_cast::from(interp, objv[idx+1], is_by_ref.value);
+
+        using Result = typename Traits::return_type;
+        Result res = func_ptr(args...);
+        set_result(interp, res);
+    }
+};
+
+}
+
+template  struct dispatch {
 	template  static void do_dispatch(Tcl_Interp *interp, Functor f) {
 		R res = f();
 		set_result(interp, res);
diff --git a/libs/cpptcl/cpptcl/function_traits.h b/libs/cpptcl/cpptcl/function_traits.h
index 6053507..f94d930 100644
--- a/libs/cpptcl/cpptcl/function_traits.h
+++ b/libs/cpptcl/cpptcl/function_traits.h
@@ -1,76 +1,83 @@
-/*
- * function_traits.h
- */
-
-#ifndef CPPTCL_FUNCTION_TRAITS_H_
-#define CPPTCL_FUNCTION_TRAITS_H_
+#ifndef CPPTCL_FUNCTION_TRAITS_H
+#define CPPTCL_FUNCTION_TRAITS_H
 
 #include 
 
 template
-struct function_traits;
+struct FunctionTraits;
 
 // function pointer
 template
-struct function_traits : public function_traits
+struct FunctionTraits : public FunctionTraits
 {};
 
 template
-struct function_traits
+struct FunctionTraits
 {
     using return_type = R;
 
     static constexpr std::size_t arity = sizeof...(Args);
+    using ArgsTypeList = std::tuple;
 
     template 
     struct argument
     {
         static_assert(N < arity, "error: invalid parameter index.");
-        using type = typename std::tuple_element>::type;
+        using type = typename std::tuple_element::type;
     };
 };
 
 /// member function pointer
 template
-struct function_traits : public function_traits
+struct FunctionTraits : public FunctionTraits
 {};
 
 /// const member function pointer
 template
-struct function_traits : public function_traits
+struct FunctionTraits : public FunctionTraits
 {};
 
 /// member object pointer
 template
-struct function_traits : public function_traits
+struct FunctionTraits : public FunctionTraits
 {};
 
+namespace {
+    template
+    std::tuple>...>
+        sub(std::index_sequence);
 
-template
-struct traits
+    template
+    using TupleSubpack = decltype(sub(std::make_index_sequence{}));
+}
+
+template
+struct FunctionTraits
 {
-    private:
-        using call_type = function_traits;
-    public:
-        using return_type = typename call_type::return_type;
-
-        static constexpr std::size_t arity = call_type::arity - 1;
-
-        template 
-        struct argument
-        {
-            static_assert(N < arity, "error: invalid parameter index.");
-            using type = typename call_type::template argument::type;
-        };
+private:
+    using call_type = FunctionTraits;
+
+public:
+    using return_type = typename call_type::return_type;
+
+    static constexpr std::size_t arity = call_type::arity - 1;
+    using ArgsTypeList = TupleSubpack<1, typename call_type::ArgsTypeList>;
+
+    template 
+    struct argument
+    {
+        static_assert(N < arity, "error: invalid parameter index.");
+        using type = typename call_type::template argument::type;
+    };
 };
 
 template
-struct traits : public traits
+struct FunctionTraits : public FunctionTraits
 {};
 
 template
-struct traits : public traits
+struct FunctionTraits : public FunctionTraits
 {};
 
 
-#endif /* CPPTCL_FUNCTION_TRAITS_H_ */
+#endif // CPPTCL_FUNCTION_TRAITS_H
diff --git a/libs/libs.SConscript b/libs/libs.SConscript
index 6dc2340..103a842 100644
--- a/libs/libs.SConscript
+++ b/libs/libs.SConscript
@@ -3,19 +3,22 @@ import os
 Import('env pyenv tclStubLib_obj')
 
 # Build asmjit dll library
-env.SConscript('asmjit/src/SConstruct')
+env.SConscript('asmjit/src/asmjit.SConscript')
 
 # Build polyhook static library
 env.SConscript('polyhook2.0/polyhook.SConscript')
 
+# Header only
+env.SConscript('pybind11/pybind11.SConscript')
+
 # Build cpptcl static library
-(static_cpptcl, static_cpptcl_no_stubs, cpptcl_includes) = pyenv.SConscript('cpptcl/SCons_cpptcl.py')
+(static_cpptcl, static_cpptcl_no_stubs) = pyenv.SConscript('cpptcl/cpptcl.SConscript')
 
 # Build tcl extensions
-tclpython_dll, py3_obj, pybind11_includes = pyenv.SConscript('tcl2python/tcl2python.SConscript',
-                exports='pyenv static_cpptcl cpptcl_includes tclStubLib_obj',
+tclpython_dll, py3_static = pyenv.SConscript('tcl2python/tcl2python.SConscript',
+                exports='pyenv static_cpptcl tclStubLib_obj',
                 duplicate=0,
 )
 
-Return('tclpython_dll static_cpptcl static_cpptcl_no_stubs py3_obj cpptcl_includes pybind11_includes')
+Return('tclpython_dll static_cpptcl static_cpptcl_no_stubs py3_static')
 
diff --git a/libs/polyhook2.0/.gitignore b/libs/polyhook2.0/.gitignore
index 8a54b30..8c23945 100644
--- a/libs/polyhook2.0/.gitignore
+++ b/libs/polyhook2.0/.gitignore
@@ -330,3 +330,9 @@ ASALocalRun/
 .mfractor/
 build32/
 build64/
+
+CMakeCache.txt
+cmake-*
+_build/
+_install/
+out/
\ No newline at end of file
diff --git a/libs/polyhook2.0/CMakeLists.txt b/libs/polyhook2.0/CMakeLists.txt
index 1d7da3f..781bf7c 100644
--- a/libs/polyhook2.0/CMakeLists.txt
+++ b/libs/polyhook2.0/CMakeLists.txt
@@ -1,6 +1,8 @@
 cmake_minimum_required(VERSION 3.15)
 
 project(PolyHook_2)
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 include(CMakePackageConfigHelpers)
 
@@ -14,6 +16,10 @@ option(POLYHOOK_BUILD_SHARED_ASMJIT "Build polyhook as shared libary" OFF)
 option(POLYHOOK_BUILD_SHARED_CAPSTONE "Build capstone as shared libary" OFF)
 option(POLYHOOK_BUILD_SHARED_ZYDIS "Build polyhook as shared libary" OFF)
 
+option(POLYHOOK_USE_EXTERNAL_ASMJIT "Use external asmjit libary" OFF)
+option(POLYHOOK_USE_EXTERNAL_CAPSTONE "Use external capstone libary" OFF)
+option(POLYHOOK_USE_EXTERNAL_ZYDIS "Use external zydis libary" OFF)
+
 if(MSVC)
 	option(POLYHOOK_BUILD_STATIC_RUNTIME "Use static runtime" ON)
 endif()
@@ -38,7 +44,7 @@ endif()
 # ASMJIT
 #
 
-if(POLYHOOK_FEATURE_INLINENTD)
+if(POLYHOOK_FEATURE_INLINENTD AND NOT POLYHOOK_USE_EXTERNAL_ASMJIT)
 
 	if(POLYHOOK_BUILD_SHARED_ASMJIT)
 		set(ASMJIT_STATIC OFF CACHE BOOL "")
@@ -60,7 +66,7 @@ endif()
 # Capstone
 #
 
-if(POLYHOOK_DISASM_CAPSTONE)
+if(POLYHOOK_DISASM_CAPSTONE AND NOT POLYHOOK_USE_EXTERNAL_CAPSTONE)
 	set(CAPSTONE_BUILD_STATIC_RUNTIME ${POLYHOOK_BUILD_STATIC_RUNTIME} CACHE BOOL "")
 	if(POLYHOOK_BUILD_SHARED_CAPSTONE)
 		set(CAPSTONE_BUILD_SHARED ON CACHE BOOL "")
@@ -106,7 +112,7 @@ endif()
 # Zydis
 #
 
-if(POLYHOOK_DISASM_ZYDIS)
+if(POLYHOOK_DISASM_ZYDIS AND NOT POLYHOOK_USE_EXTERNAL_ZYDIS)
     set(ZYDIS_BUILD_SHARED_LIB ${POLYHOOK_BUILD_SHARED_ZYDIS} CACHE BOOL "")
 	set(ZYCORE_BUILD_SHARED_LIB ${POLYHOOK_BUILD_SHARED_ZYDIS} CACHE BOOL "")
 	set(ZYDIS_BUILD_TOOLS OFF CACHE BOOL "")
@@ -179,37 +185,61 @@ set(POLYHOOK_CORE_HEADERS ${PROJECT_SOURCE_DIR}/polyhook2/ADisassembler.hpp
 		${PROJECT_SOURCE_DIR}/polyhook2/UID.hpp
 		${PROJECT_SOURCE_DIR}/polyhook2/ErrorLog.hpp
 		${PROJECT_SOURCE_DIR}/polyhook2/MemProtector.hpp
-		${PROJECT_SOURCE_DIR}/polyhook2/PageAllocator.hpp)
+		${PROJECT_SOURCE_DIR}/polyhook2/MemAccessor.hpp
+		${PROJECT_SOURCE_DIR}/polyhook2/PageAllocator.hpp
+		${PROJECT_SOURCE_DIR}/polyhook2/Tests/TestEffectTracker.hpp
+		${PROJECT_SOURCE_DIR}/polyhook2/Tests/StackCanary.hpp
+		${PROJECT_SOURCE_DIR}/polyhook2/EventDispatcher.hpp
+		)
 install(FILES ${POLYHOOK_CORE_HEADERS} DESTINATION include/polyhook2)
 
 
 target_sources(${PROJECT_NAME} PRIVATE
 	${PROJECT_SOURCE_DIR}/sources/MemProtector.cpp
+	${PROJECT_SOURCE_DIR}/sources/MemAccessor.cpp
 	${PROJECT_SOURCE_DIR}/sources/TestEffectTracker.cpp
-	${PROJECT_SOURCE_DIR}/sources/PageAllocator.cpp)
+	${PROJECT_SOURCE_DIR}/sources/StackCanary.cpp
+	${PROJECT_SOURCE_DIR}/sources/PageAllocator.cpp
+	${PROJECT_SOURCE_DIR}/sources/ErrorLog.cpp
+	${PROJECT_SOURCE_DIR}/sources/UID.cpp
+	${PROJECT_SOURCE_DIR}/sources/Misc.cpp
+	)
 
 #DisAsm/Capstone
 if(POLYHOOK_DISASM_CAPSTONE)
-	target_link_libraries(${PROJECT_NAME} PRIVATE $)
-	install(TARGETS ${CAPSTONE_LIBRARY_NAME} EXPORT exporthack)
-
+	if (POLYHOOK_USE_EXTERNAL_CAPSTONE)
+		find_library(CAPSTONE_LIBRARY NAMES capstone_dll capstone)
+		find_path(CAPSTONE_INCLUDE_DIR NAMES capstone/capstone.h)
+		target_link_libraries(${PROJECT_NAME} PRIVATE ${CAPSTONE_LIBRARY})
+		target_include_directories(${PROJECT_NAME} PRIVATE ${CAPSTONE_INCLUDE_DIR})
+	else()
+		target_link_libraries(${PROJECT_NAME} PRIVATE $)
+		target_include_directories(${PROJECT_NAME} PRIVATE $)
+		install(TARGETS ${CAPSTONE_LIBRARY_NAME} EXPORT exporthack)
+	endif()
 	target_sources(${PROJECT_NAME} PRIVATE "${PROJECT_SOURCE_DIR}/sources/CapstoneDisassembler.cpp")
-
-	target_include_directories(${PROJECT_NAME} PRIVATE $)
-
 	install(FILES ${PROJECT_SOURCE_DIR}/polyhook2/CapstoneDisassembler.hpp DESTINATION include/polyhook2)
 endif()
 
 #DisAsm/Zydis
 if(POLYHOOK_DISASM_ZYDIS)
-	target_link_libraries(${PROJECT_NAME} PRIVATE $)
-
-	target_sources(${PROJECT_NAME} PRIVATE "${PROJECT_SOURCE_DIR}/sources/ZydisDisassembler.cpp")
-
-	target_include_directories(${PROJECT_NAME} PRIVATE $)
-	target_include_directories(${PROJECT_NAME} PRIVATE $)
-	target_include_directories(${PROJECT_NAME} PRIVATE $)
+	if (POLYHOOK_USE_EXTERNAL_ZYDIS)
+		find_library(ZYDIS_LIBRARY NAMES zydis)
+		find_library(ZYCORE_LIBRARY NAMES zycore)
+		find_path(ZYDIS_INCLUDE_DIR NAMES zydis/zydis.h)
+		find_path(ZYCORE_INCLUDE_DIR NAMES zycore/zycore.h)
+		target_link_libraries(${PROJECT_NAME} PRIVATE ${ZYDIS_LIBRARY})
+		target_link_libraries(${PROJECT_NAME} PRIVATE ${ZYCORE_LIBRARY})
+		target_include_directories(${PROJECT_NAME} PRIVATE ${ZYDIS_INCLUDE_DIR})
+		target_include_directories(${PROJECT_NAME} PRIVATE ${ZYCORE_INCLUDE_DIR})
+	else()
+		target_link_libraries(${PROJECT_NAME} PRIVATE $)
+		target_include_directories(${PROJECT_NAME} PRIVATE $)
+		target_include_directories(${PROJECT_NAME} PRIVATE $)
+		target_include_directories(${PROJECT_NAME} PRIVATE $)
+	endif()
 	
+	target_sources(${PROJECT_NAME} PRIVATE "${PROJECT_SOURCE_DIR}/sources/ZydisDisassembler.cpp")
 	install(FILES ${PROJECT_SOURCE_DIR}/polyhook2/ZydisDisassembler.hpp DESTINATION include/polyhook2)
 endif()
 
@@ -260,7 +290,14 @@ endif()
 
 #Feature/Inlinentd
 if(POLYHOOK_FEATURE_INLINENTD)
-	target_link_libraries(${PROJECT_NAME} PRIVATE $)
+	if (POLYHOOK_USE_EXTERNAL_ASMJIT)
+		find_library(ASMJIT_LIBRARY NAMES asmjit)
+		find_path(ASMJIT_INCLUDE_DIR NAMES asmjit/asmjit.h)
+		target_link_libraries(${PROJECT_NAME} PRIVATE ${ASMJIT_LIBRARY})
+		target_include_directories(${PROJECT_NAME} PRIVATE ${ASMJIT_INCLUDE_DIR})
+	else()
+		target_link_libraries(${PROJECT_NAME} PRIVATE $)
+	endif()
 
 	install(FILES ${PROJECT_SOURCE_DIR}/polyhook2/Detour/ILCallback.hpp DESTINATION include/polyhook2/Detour)
 
@@ -313,6 +350,7 @@ if(POLYHOOK_FEATURE_VIRTUALS)
 	if(NOT POLYHOOK_BUILD_DLL)
 		target_sources(${PROJECT_NAME} PRIVATE
 			${PROJECT_SOURCE_DIR}/UnitTests/TestVTableSwapHook.cpp
+			${PROJECT_SOURCE_DIR}/UnitTests/TestVTableSwapHook2.cpp
 			${PROJECT_SOURCE_DIR}/UnitTests/TestVFuncSwapHook.cpp)
 	endif()
 endif()
diff --git a/libs/polyhook2.0/CMakeSettings.json b/libs/polyhook2.0/CMakeSettings.json
index 7d8df26..758390b 100644
--- a/libs/polyhook2.0/CMakeSettings.json
+++ b/libs/polyhook2.0/CMakeSettings.json
@@ -1,5 +1,5 @@
 {
-    // See https://go.microsoft.com//fwlink//?linkid=834763 for more information about this file.
+  // See https://go.microsoft.com//fwlink//?linkid=834763 for more information about this file.
   "configurations": [
     {
       "name": "x86-Debug",
diff --git a/libs/polyhook2.0/COMMIT.md b/libs/polyhook2.0/COMMIT.md
index 1ee1965..6f9b12b 100644
--- a/libs/polyhook2.0/COMMIT.md
+++ b/libs/polyhook2.0/COMMIT.md
@@ -1,2 +1,2 @@
 Source:
-https://github.com/stevemk14ebr/PolyHook_2_0/commit/5a6b0cde0b746a7951e02f811873f399f0e6dcd1
+https://github.com/stevemk14ebr/PolyHook_2_0/commit/84d6be2a208fa7ab9d14e27c497413d598632770
diff --git a/libs/polyhook2.0/MainTests.cpp b/libs/polyhook2.0/MainTests.cpp
index 2788e51..8f07a06 100644
--- a/libs/polyhook2.0/MainTests.cpp
+++ b/libs/polyhook2.0/MainTests.cpp
@@ -2,8 +2,13 @@
 #include "Catch.hpp"
 #include 
 
+#include "polyhook2/ErrorLog.hpp"
 int main(int argc, char* const argv[]) {
+	_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF | _CRTDBG_CHECK_ALWAYS_DF );
 	std::cout << "Welcome to PolyHook -By- Stevemk14ebr" << std::endl;
+	auto logger = std::make_shared();
+	logger->setLogLevel(PLH::ErrorLevel::INFO);
+	PLH::Log::registerLogger(logger);
 	int result = Catch::Session().run(argc, argv);
 
 	getchar();
diff --git a/libs/polyhook2.0/Polyhook_2-config.cmake.in b/libs/polyhook2.0/Polyhook_2-config.cmake.in
index 7ddd490..b919912 100644
--- a/libs/polyhook2.0/Polyhook_2-config.cmake.in
+++ b/libs/polyhook2.0/Polyhook_2-config.cmake.in
@@ -14,4 +14,5 @@ set(POLYHOOK_FEATURE_INLINENTD @POLYHOOK_FEATURE_INLINENTD@)
 set(POLYHOOK_FEATURE_PE @POLYHOOK_FEATURE_PE@)
 set(POLYHOOK_FEATURE_VIRTUALS @POLYHOOK_FEATURE_VIRTUALS@)
 
-include("${PACKAGE_PREFIX_DIR}/lib/Polyhook_2/Polyhook_2-targets.cmake")
+get_filename_component(POLYHOOK_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
+include("${POLYHOOK_CMAKE_DIR}/PolyHook_2-targets.cmake")
diff --git a/libs/polyhook2.0/README.md b/libs/polyhook2.0/README.md
index adaf938..cb6489f 100644
--- a/libs/polyhook2.0/README.md
+++ b/libs/polyhook2.0/README.md
@@ -5,6 +5,8 @@ Article 1: https://www.codeproject.com/articles/1100579/polyhook-the-cplusplus-x
 
 Article 2: https://www.codeproject.com/Articles/1252212/PolyHook-2-Cplusplus17-x86-x64-Hooking-Library
 
+# Please consider sponsoring my work by clicking sponsor up in the top right
+
 # Community
 Ask for help, chat with others, talk to me here
 * [Official Gitter Chat](https://gitter.im/PolyHook/Lobby)
@@ -34,19 +36,17 @@ See: https://github.com/stevemk14ebr/PolyHook_2_0/pull/59#issuecomment-619223616
 I provide directions below for how to setup the visual studio cmake environment only. If you don't want to use visual studio that's fine, this is a standard cmake project and will build from command line just fine. 
 
 ### Visual Studio 2017/2019
-clone and init with given commands
-
-Open VS 2017, go to file->open->cmake.. this will load the project and start cmake generation. Next (optional step) go to tools->options->cmake->general->CMakeSettings.json path needs to be set to the polyhook2_0 directory that directly contains CMakeSettings.json, this will tell visual studio the build paths and also setup the build types (if it doesn't look right clear all the cmake cache stuff by cmake->clean all & cmake->cache->delete all & cmake->cache->generate. After all the stuff is done finally goto cmake->build all or cmake->build only or if you are in exe mode you can also set a startup item and release mode and use the play button. Capstone and asmjit are both set to automatically build and link, you DO NOT need to build them seperately.
+clone the project and perform submodule init as above. Do not run the cmake commands, instead:
 
-![CMakeSettings.json](https://i.imgur.com/RpHQ5Km.png)
+Open VS 2017, go to file->open->cmake.. this will load the project and start cmake generation. Next goto cmake->build all or cmake->build, you can also set a startup item and release mode to use the play button (do not use the install target). Capstone, Zydis, and asmjit are set to automatically build and link, you DO NOT need to build them seperately.
 
 ### Documentation
-I've setup an example project to show how to use this as a static library. You should clear your cmake cache between changing these options. The dll is built with the cmake option to export all symbols. This is different from the typical windows DLL where things are manually exported via declspec(dllexport), instead it behaves how linux dlls do with all symbols exported by default. This style should make it easier to maintain the code, the downside is there are many exports but i don't care.
+https://stevemk14ebr.github.io/PolyHook_2_0/ & Read the Tests!
 
-Read the tests for docs for now until i write some. They are extensive
+I've setup an example project to show how to use this as a static library. You should clear your cmake cache between changing these options. The dll is built with the cmake option to export all symbols. This is different from the typical windows DLL where things are manually exported via declspec(dllexport), instead it behaves how linux dlls do with all symbols exported by default. This style should make it easier to maintain the code, the downside is there are many exports but i don't care.
 
 # Features
-0) Both capstone and zydis are supported as disassembly backends and are fully abstracted
+0) Both capstone and zydis are supported as disassembly backends and are fully abstracted.
 1) Inline hook (x86/x64 Detour)
     - Places a jmp to a callback at the prologue, and then allocates a trampoline to continue execution of the original function
     - Operates entirely on an intermediate instruction object, disassembler engine is swappable, capstone included by default
@@ -57,8 +57,10 @@ Read the tests for docs for now until i write some. They are extensive
       - Branches into overwritten section are resolved to the new moved location
       - Jmps from moved prologue back to original section are resolved through a jmp table
       - Relocations inside the moved section are resolved (not using relocation table, disassembles using engine)
-    - x64 trampoline is not restricted to +- 2GB, can be anywhere, avoids shadow space + no registers spoiled
+    - x64 trampoline is not restricted to +- 2GB, can be anywhere, avoids shadow space + no registers spoiled.
     - If inline hook fails at an intermediate step the original function will not be malformed. All writes are batched until after we know later steps succeed.
+    - Cross-Architecture hooking is _fully_ supported. Including the overriding of memory acccess routines to allow read/write of 64bit memory from 32bit process. You can hook 64bit from 32bit process if you're clever enough to write the shellcode required for the callbacks.
+    - Effecient reHook-ing logic is implemented. This can be used to combat third parties overwriting prologues back to original bytes. This is optimized into a few simple memcpy's rather than re-executing the entire logic in hook().
     
  2) Runtime Inline Hook
     - All the goodness of normal inline hooks, but JIT's a translation stub compatible with the given typedef and ABI. The translation stub will move arguments into a small struct, which is passed as pointer to a callback and allow the spoofing of return value. This allows tools to generate hook translation stubs at runtime, allowing for the full inline hooking of functions where the typedef is not known until runtime.
diff --git a/libs/polyhook2.0/UnitTests/TestDetourNoTDx64.cpp b/libs/polyhook2.0/UnitTests/TestDetourNoTDx64.cpp
index d6db140..2c90527 100644
--- a/libs/polyhook2.0/UnitTests/TestDetourNoTDx64.cpp
+++ b/libs/polyhook2.0/UnitTests/TestDetourNoTDx64.cpp
@@ -3,6 +3,7 @@
 #include "polyhook2/Detour/ILCallback.hpp"
 #pragma warning( disable : 4244)
 
+#include "polyhook2/Tests/StackCanary.hpp"
 #include "polyhook2/Tests/TestEffectTracker.hpp"
 
 /**These tests can spontaneously fail if the compiler desides to optimize away
@@ -12,38 +13,11 @@ printf inside the body can mitigate this significantly. Do serious checking in d
 or releasewithdebinfo mode (relwithdebinfo optimizes sliiiightly less)**/
 
 EffectTracker effectsNTD64;
-
-typedef int(*Func)(void);
-TEST_CASE("Minimal Example", "[AsmJit]") {
-	asmjit::JitRuntime rt;                          // Runtime specialized for JIT code execution.
-
-	asmjit::CodeHolder code;                        // Holds code and relocation information.
-	code.init(rt.codeInfo());					// Initialize to the same arch as JIT runtime.
-
-	asmjit::x86::Assembler a(&code);                  // Create and attach X86Assembler to `code`.
-	a.mov(asmjit::x86::eax, 1);                     // Move one to 'eax' register.
-	a.ret();										// Return from function.
-	// ----> X86Assembler is no longer needed from here and can be destroyed <----
-	
-	Func fn;
-	asmjit::Error err = rt.add(&fn, &code);         // Add the generated code to the runtime.
-	if (err) {
-		REQUIRE(false);
-	}
-	
-	int result = fn();                      // Execute the generated code.
-	REQUIRE(result == 1);
-
-	// All classes use RAII, all resources will be released before `main()` returns,
-	// the generated function can be, however, released explicitly if you intend to
-	// reuse or keep the runtime alive, which you should in a production-ready code.
-	rt.release(fn);
-}
-
 #include "polyhook2/Detour/x64Detour.hpp"
 #include "polyhook2/CapstoneDisassembler.hpp"
 
 NOINLINE void hookMeInt(int a) {
+	PLH::StackCanary canary;
 	volatile int var = 1;
 	int var2 = var + a;
 
@@ -59,12 +33,14 @@ NOINLINE void hookMeInt(int a) {
 }
 
 NOINLINE void hookMeFloat(float a) {
+	PLH::StackCanary canary;
 	float ans = 1.0f;
 	ans += a;
 	printf("%f %f\n", ans, a); 
 }
 
 NOINLINE void hookMeIntFloatDouble(int a, float b, double c) {
+	PLH::StackCanary canary;
 	volatile float ans = 0.0f;
 	ans += (float)a;
 	ans += c;
@@ -74,6 +50,7 @@ NOINLINE void hookMeIntFloatDouble(int a, float b, double c) {
 
 NOINLINE void myCallback(const PLH::ILCallback::Parameters* p, const uint8_t count, const PLH::ILCallback::ReturnValue* retVal) {
 	PH_UNUSED(retVal);
+	PLH::StackCanary canary;
 
 	printf("Argument Count: %d\n", count);
 	for (int i = 0; i < count; i++) {
@@ -90,11 +67,11 @@ NOINLINE void myCallback(const PLH::ILCallback::Parameters* p, const uint8_t cou
 
 TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 	PLH::ILCallback callback;
-
 	SECTION("Integer argument") {
+		PLH::StackCanary canary;
 		asmjit::FuncSignatureT sig;
-		sig.setCallConv(asmjit::CallConv::kIdX86Win64);
-		uint64_t JIT = callback.getJitFunc(sig, &myCallback);
+		sig.setCallConv(asmjit::CallConv::kIdX64Windows);
+		uint64_t JIT = callback.getJitFunc(sig, asmjit::Environment::kArchHost, &myCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x64);
@@ -108,7 +85,8 @@ TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 	}
 
 	SECTION("Floating argument") {
-		uint64_t JIT = callback.getJitFunc("void", {"float"}, &myCallback);
+		PLH::StackCanary canary;
+		uint64_t JIT = callback.getJitFunc("void", {"float"}, asmjit::Environment::kArchHost, &myCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x64);
@@ -122,7 +100,8 @@ TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 	}
 
 	SECTION("Int, float, double arguments, string parsing types") {
-		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, &myCallback);
+		PLH::StackCanary canary;
+		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, asmjit::Environment::kArchHost, &myCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x64);
@@ -139,6 +118,7 @@ TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 
 NOINLINE void rw(int a, float b, double c, int type) {
 	PH_UNUSED(type);
+	PLH::StackCanary canary;
 	volatile float ans = 0.0f;
 	ans += (float)a;
 	ans += c;
@@ -151,6 +131,7 @@ NOINLINE void rw(int a, float b, double c, int type) {
 
 NOINLINE float rw_float(int a, float b, double c, int type) {
 	PH_UNUSED(type);
+	PLH::StackCanary canary;
 	volatile float ans = 0.0f;
 	ans += (float)a;
 	ans += c;
@@ -164,6 +145,7 @@ NOINLINE float rw_float(int a, float b, double c, int type) {
 
 NOINLINE double rw_double(int a, float b, double c, int type) {
 	PH_UNUSED(type);
+	PLH::StackCanary canary;
 	volatile float ans = 0.0f;
 	ans += (float)a;
 	ans += c;
@@ -177,6 +159,7 @@ NOINLINE double rw_double(int a, float b, double c, int type) {
 
 NOINLINE int rw_int(int a, float b, double c, int type) {
 	PH_UNUSED(type);
+	PLH::StackCanary canary;
 	volatile float ans = 0.0f;
 	ans += (float)a;
 	ans += c;
@@ -189,6 +172,7 @@ NOINLINE int rw_int(int a, float b, double c, int type) {
 }
 
 NOINLINE void mySecondCallback(const PLH::ILCallback::Parameters* p, const uint8_t count, const PLH::ILCallback::ReturnValue* retVal) {
+	PLH::StackCanary canary;
 	printf("Argument Count: %d\n", count);
 	for (int i = 0; i < count; i++) {
 		printf("Arg: %d asInt:%d asFloat:%f asDouble:%f\n", i, p->getArg(i), p->getArg(i), p->getArg(i));
@@ -227,7 +211,8 @@ TEST_CASE("ILCallback Argument re-writing", "[ILCallback]") {
 	PLH::ILCallback callback;
 
 	SECTION("Int, float, double arguments host") {
-		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double", "int" }, &mySecondCallback);
+		PLH::StackCanary canary;
+		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &mySecondCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x64);
@@ -241,7 +226,8 @@ TEST_CASE("ILCallback Argument re-writing", "[ILCallback]") {
 	}
 
 	SECTION("Int, float, double arguments, float ret, host") {
-		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, &mySecondCallback);
+		PLH::StackCanary canary;
+		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &mySecondCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x64);
@@ -256,7 +242,8 @@ TEST_CASE("ILCallback Argument re-writing", "[ILCallback]") {
 	}
 
 	SECTION("Int, float, double arguments, double ret, host") {
-		uint64_t JIT = callback.getJitFunc("double", { "int", "float", "double", "int" }, &mySecondCallback);
+		PLH::StackCanary canary;
+		uint64_t JIT = callback.getJitFunc("double", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &mySecondCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x64);
diff --git a/libs/polyhook2.0/UnitTests/TestDetourNoTDx86.cpp b/libs/polyhook2.0/UnitTests/TestDetourNoTDx86.cpp
index 0bac5ee..d59cc1c 100644
--- a/libs/polyhook2.0/UnitTests/TestDetourNoTDx86.cpp
+++ b/libs/polyhook2.0/UnitTests/TestDetourNoTDx86.cpp
@@ -14,31 +14,6 @@ or releasewithdebinfo mode (relwithdebinfo optimizes sliiiightly less)**/
 EffectTracker effectsNTD;
 
 typedef int(*Func)(void);
-TEST_CASE("Minimal Asmjit Example", "[AsmJit]") {
-	asmjit::JitRuntime rt;                          // Runtime specialized for JIT code execution.
-
-	asmjit::CodeHolder code;                        // Holds code and relocation information.
-	code.init(rt.codeInfo());					// Initialize to the same arch as JIT runtime.
-
-	asmjit::x86::Assembler a(&code);                  // Create and attach X86Assembler to `code`.
-	a.mov(asmjit::x86::eax, 1);                     // Move one to 'eax' register.
-	a.ret();										// Return from function.
-	// ----> X86Assembler is no longer needed from here and can be destroyed <----
-
-	Func fn;
-	asmjit::Error err = rt.add(&fn, &code);         // Add the generated code to the runtime.
-	if (err) {
-		REQUIRE(false);
-	}
-
-	int result = fn();                      // Execute the generated code.
-	REQUIRE(result == 1);
-
-	// All classes use RAII, all resources will be released before `main()` returns,
-	// the generated function can be, however, released explicitly if you intend to
-	// reuse or keep the runtime alive, which you should in a production-ready code.
-	rt.release(fn);
-}
 
 #include "polyhook2/Detour/x86Detour.hpp"
 #include "polyhook2/CapstoneDisassembler.hpp"
@@ -107,7 +82,7 @@ TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 	PLH::ILCallback callback;
 
 	SECTION("Integer argument") {
-		uint64_t JIT = callback.getJitFunc("void", { "int" }, &myCallback);
+		uint64_t JIT = callback.getJitFunc("void", { "int" }, asmjit::Environment::kArchHost, &myCallback);
 		REQUIRE(JIT != 0);
 		
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -121,7 +96,7 @@ TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 	}
 
 	SECTION("Floating argument") {
-		uint64_t JIT = callback.getJitFunc("void", { "float" }, &myCallback);
+		uint64_t JIT = callback.getJitFunc("void", { "float" }, asmjit::Environment::kArchHost, &myCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -135,7 +110,7 @@ TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 	}
 
 	SECTION("Int, float, double arguments standard") {
-		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, &myCallback, "stdcall");
+		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, asmjit::Environment::kArchHost, &myCallback, "stdcall");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -149,7 +124,7 @@ TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 	}
 
 	SECTION("Int, float, double arguments cdecl") {
-		uint64_t JIT = callback.getJitFunc("void", {"int", "float", "double"}, &myCallback, "cdecl");
+		uint64_t JIT = callback.getJitFunc("void", {"int", "float", "double"}, asmjit::Environment::kArchHost, &myCallback, "cdecl");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -163,7 +138,7 @@ TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 	}
 
 	SECTION("Int, float, double arguments fastcall") {
-		uint64_t JIT = callback.getJitFunc("void", {"int", "float", "double"}, &myCallback, "fastcall");
+		uint64_t JIT = callback.getJitFunc("void", {"int", "float", "double"}, asmjit::Environment::kArchHost, &myCallback, "fastcall");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -177,7 +152,7 @@ TEST_CASE("Minimal ILCallback", "[AsmJit][ILCallback]") {
 	}
 
 	SECTION("Verify return address spoofing doesn't crash") {
-		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, &myCallback, "fastcall");
+		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, asmjit::Environment::kArchHost, &myCallback, "fastcall");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -259,7 +234,7 @@ TEST_CASE("ILCallback Argument re-writing", "[ILCallback]") {
 	PLH::ILCallback callback;
 
 	SECTION("Int, float, double arguments host") {
-		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, &mySecondCallback);
+		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, asmjit::Environment::kArchHost, &mySecondCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -273,7 +248,7 @@ TEST_CASE("ILCallback Argument re-writing", "[ILCallback]") {
 	}
 
 	SECTION("Int, float, double arguments fastcall") {
-		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, &mySecondCallback, "fastcall");
+		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, asmjit::Environment::kArchHost, &mySecondCallback, "fastcall");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -287,7 +262,7 @@ TEST_CASE("ILCallback Argument re-writing", "[ILCallback]") {
 	}
 
 	SECTION("Int, float, double arguments cdecl") {
-		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, &mySecondCallback, "cdecl");
+		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, asmjit::Environment::kArchHost, &mySecondCallback, "cdecl");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -301,7 +276,7 @@ TEST_CASE("ILCallback Argument re-writing", "[ILCallback]") {
 	}
 
 	SECTION("Int, float, double arguments stdcall") {
-		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, &mySecondCallback, "stdcall");
+		uint64_t JIT = callback.getJitFunc("void", { "int", "float", "double" }, asmjit::Environment::kArchHost, &mySecondCallback, "stdcall");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -472,7 +447,7 @@ TEST_CASE("ILCallback Return and Argument Re-Writing", "[ILCallback]") {
 	PLH::ILCallback callback;
 
 	SECTION("Minimal host, int, float, double, int return") {
-		uint64_t JIT = callback.getJitFunc("int", { "int", "float", "double", "int" }, &myThirdCallback);
+		uint64_t JIT = callback.getJitFunc("int", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &myThirdCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -488,7 +463,7 @@ TEST_CASE("ILCallback Return and Argument Re-Writing", "[ILCallback]") {
 	}
 
 	SECTION("Minimal host, int, float, double, float return") {
-		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, &myThirdCallback);
+		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &myThirdCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -504,7 +479,7 @@ TEST_CASE("ILCallback Return and Argument Re-Writing", "[ILCallback]") {
 	}
 
 	SECTION("Minimal host, int, float, double, double return") {
-		uint64_t JIT = callback.getJitFunc("double", { "int", "float", "double", "int" }, &myThirdCallback);
+		uint64_t JIT = callback.getJitFunc("double", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &myThirdCallback);
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -521,7 +496,7 @@ TEST_CASE("ILCallback Return and Argument Re-Writing", "[ILCallback]") {
 
 
 	SECTION("int, float, double, int return, stdcall") {
-		uint64_t JIT = callback.getJitFunc("int", { "int", "float", "double", "int" }, &myThirdCallback, "stdcall");
+		uint64_t JIT = callback.getJitFunc("int", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &myThirdCallback, "stdcall");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -538,7 +513,7 @@ TEST_CASE("ILCallback Return and Argument Re-Writing", "[ILCallback]") {
 
 
 	SECTION("int, float, double, int return, cdecl") {
-		uint64_t JIT = callback.getJitFunc("int", { "int", "float", "double", "int" }, &myThirdCallback, "cdecl");
+		uint64_t JIT = callback.getJitFunc("int", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &myThirdCallback, "cdecl");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -554,7 +529,7 @@ TEST_CASE("ILCallback Return and Argument Re-Writing", "[ILCallback]") {
 	}
 
 	SECTION("int, float, double, int return, fastcall") {
-		uint64_t JIT = callback.getJitFunc("int", { "int", "float", "double", "int" }, &myThirdCallback, "fastcall");
+		uint64_t JIT = callback.getJitFunc("int", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &myThirdCallback, "fastcall");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -570,7 +545,7 @@ TEST_CASE("ILCallback Return and Argument Re-Writing", "[ILCallback]") {
 	}
 
 	SECTION("int, float, double, float return, fastcall") {
-		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, &myThirdCallback, "fastcall");
+		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &myThirdCallback, "fastcall");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -587,7 +562,7 @@ TEST_CASE("ILCallback Return and Argument Re-Writing", "[ILCallback]") {
 	}
 
 	SECTION("int, float, double, float return, cdecl") {
-		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, &myThirdCallback, "cdecl");
+		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &myThirdCallback, "cdecl");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
@@ -604,7 +579,7 @@ TEST_CASE("ILCallback Return and Argument Re-Writing", "[ILCallback]") {
 	}
 
 	SECTION("int, float, double, float return, std") {
-		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, &myThirdCallback, "stdcall");
+		uint64_t JIT = callback.getJitFunc("float", { "int", "float", "double", "int" }, asmjit::Environment::kArchHost, &myThirdCallback, "stdcall");
 		REQUIRE(JIT != 0);
 
 		PLH::CapstoneDisassembler dis(PLH::Mode::x86);
diff --git a/libs/polyhook2.0/UnitTests/TestDetourx64.cpp b/libs/polyhook2.0/UnitTests/TestDetourx64.cpp
index 091e5c9..dbc6148 100644
--- a/libs/polyhook2.0/UnitTests/TestDetourx64.cpp
+++ b/libs/polyhook2.0/UnitTests/TestDetourx64.cpp
@@ -6,6 +6,7 @@
 #include "polyhook2/CapstoneDisassembler.hpp"
 #include "polyhook2/ZydisDisassembler.hpp"
 
+#include "polyhook2/Tests/StackCanary.hpp"
 #include "polyhook2/Tests/TestEffectTracker.hpp"
 
 EffectTracker effects;
@@ -17,6 +18,7 @@ printf inside the body can mitigate this significantly. Do serious checking in d
 or releasewithdebinfo mode (relwithdebinfo optimizes sliiiightly less)**/
 
 NOINLINE void hookMe1() {
+	PLH::StackCanary canary;
 	volatile int var = 1;
 	volatile int var2 = 0;
 	var2 += 3;
@@ -28,25 +30,26 @@ NOINLINE void hookMe1() {
 	REQUIRE(var2 == 40);
 }
 uint64_t hookMe1Tramp = NULL;
-
-NOINLINE void h_hookMe1() {
+HOOK_CALLBACK(&hookMe1, h_hookMe1, {
+	PLH::StackCanary canary;
 	std::cout << "Hook 1 Called!" << std::endl;
 	effects.PeakEffect().trigger();
 	return PLH::FnCast(hookMe1Tramp, &hookMe1)();
-}
+});
 
 NOINLINE void hookMe2() {
+	PLH::StackCanary canary;
 	for (int i = 0; i < 10; i++) {
 		printf("%d\n", i);
 	}
 }
 uint64_t hookMe2Tramp = NULL;
-
-NOINLINE void h_hookMe2() {
+HOOK_CALLBACK(&hookMe2, h_hookMe2, {
+	PLH::StackCanary canary;
 	std::cout << "Hook 2 Called!" << std::endl;
 	effects.PeakEffect().trigger();
 	return PLH::FnCast(hookMe2Tramp, &hookMe2)();
-}
+});
 
 unsigned char hookMe3[] = {
 0x57, // push rdi 
@@ -70,25 +73,37 @@ unsigned char hookMe4[] = {
 
 uint64_t nullTramp = NULL;
 NOINLINE void h_nullstub() {
+	PLH::StackCanary canary;
 	volatile int i = 0;
 	PH_UNUSED(i);
 }
 
 #include 
 uint64_t hookMallocTramp = NULL;
-NOINLINE void* h_hookMalloc(size_t size) {
+HOOK_CALLBACK(&malloc, h_hookMalloc, {
+	PLH::StackCanary canary;
 	volatile int i = 0;
 	PH_UNUSED(i);
 	effects.PeakEffect().trigger();
 
-	return PLH::FnCast(hookMallocTramp, &malloc)(size);
-}
+	return PLH::FnCast(hookMallocTramp, &malloc)(_args...);
+});
+
+uint64_t oCreateMutexExA = 0;
+HOOK_CALLBACK(&CreateMutexExA, hCreateMutexExA, {
+	PLH::StackCanary canary;
+	LPCSTR lpName = GET_ARG(1);
+	printf("kernel32!CreateMutexExA  Name:%s",  lpName);
+	return PLH::FnCast(oCreateMutexExA, &CreateMutexExA)(_args...);
+});
 
 TEMPLATE_TEST_CASE("Testing 64 detours", "[x64Detour],[ADetour]", PLH::CapstoneDisassembler, PLH::ZydisDisassembler) {
 	TestType dis(PLH::Mode::x64);
 
+
 	SECTION("Normal function") {
-		PLH::x64Detour detour((char*)&hookMe1, (char*)&h_hookMe1, &hookMe1Tramp, dis);
+		PLH::StackCanary canary;
+		PLH::x64Detour detour((char*)&hookMe1, (char*)h_hookMe1, &hookMe1Tramp, dis);
 		REQUIRE(detour.hook() == true);
 
 		effects.PushEffect();
@@ -97,8 +112,40 @@ TEMPLATE_TEST_CASE("Testing 64 detours", "[x64Detour],[ADetour]", PLH::CapstoneD
 		REQUIRE(detour.unHook() == true);
 	}
 
+	SECTION("Normal function rehook")
+	{
+		PLH::StackCanary canary;
+		PLH::x64Detour detour((char*)&hookMe1, (char*)h_hookMe1, &hookMe1Tramp, dis);
+		REQUIRE(detour.hook() == true);
+		
+		effects.PushEffect();
+		REQUIRE(detour.reHook() == true); // can only really test this doesn't cause memory corruption easily
+		hookMe1();
+		REQUIRE(effects.PopEffect().didExecute());
+		REQUIRE(detour.unHook() == true);
+	}
+
+	// In release mode win apis usually go through two levels of jmps 
+	/*
+	0xe9 ... jmp iat_thunk
+
+	iat_thunk:
+	0xff 25 ... jmp [api_implementation]
+
+	api_implementation:
+	    sub rsp, ...
+		... the goods ...
+	*/
+	SECTION("WinApi Indirection") {
+		PLH::StackCanary canary;
+		PLH::x64Detour detour((char*)&CreateMutexExA, (char*)hCreateMutexExA, &oCreateMutexExA, dis);
+		REQUIRE(detour.hook() == true);
+		REQUIRE(detour.unHook() == true);
+	}
+
 	SECTION("Loop function") {
-		PLH::x64Detour detour((char*)&hookMe2, (char*)&h_hookMe2, &hookMe2Tramp, dis);
+		PLH::StackCanary canary;
+		PLH::x64Detour detour((char*)&hookMe2, (char*)h_hookMe2, &hookMe2Tramp, dis);
 		REQUIRE(detour.hook() == true);
 
 		effects.PushEffect();
@@ -108,19 +155,23 @@ TEMPLATE_TEST_CASE("Testing 64 detours", "[x64Detour],[ADetour]", PLH::CapstoneD
 	}
 
 	SECTION("Jmp into prol w/src in range") {
+		PLH::StackCanary canary;
 		PLH::x64Detour detour((char*)&hookMe3, (char*)&h_nullstub, &nullTramp, dis);
 		REQUIRE(detour.hook() == true);
 		REQUIRE(detour.unHook() == true);
 	}
 
 	SECTION("Jmp into prol w/src out of range") {
+		PLH::StackCanary canary;
 		PLH::x64Detour detour((char*)&hookMe4, (char*)&h_nullstub, &nullTramp, dis);
+
 		REQUIRE(detour.hook() == true);
 		REQUIRE(detour.unHook() == true);
 	}
 
 	SECTION("hook malloc") {
-		PLH::x64Detour detour((char*)&malloc, (char*)&h_hookMalloc, &hookMallocTramp, dis);
+		PLH::StackCanary canary;
+		PLH::x64Detour detour((char*)&malloc, (char*)h_hookMalloc, &hookMallocTramp, dis);
 		effects.PushEffect(); // catch does some allocations, push effect first so peak works
 		bool result = detour.hook();
 
diff --git a/libs/polyhook2.0/UnitTests/TestDetourx86.cpp b/libs/polyhook2.0/UnitTests/TestDetourx86.cpp
index f30772e..04c8cb2 100644
--- a/libs/polyhook2.0/UnitTests/TestDetourx86.cpp
+++ b/libs/polyhook2.0/UnitTests/TestDetourx86.cpp
@@ -28,12 +28,12 @@ NOINLINE int __cdecl hookMe1() {
 }
 
 uint64_t hookMe1Tramp = NULL;
-NOINLINE int __cdecl h_hookMe1() {
+HOOK_CALLBACK(&hookMe1, h_hookMe1, {
 	std::cout << "Hook 1 Called!" << std::endl;
 
 	effects.PeakEffect().trigger();
 	return PLH::FnCast(hookMe1Tramp, &hookMe1)();
-}
+});
 
 /*  55                      push   ebp
 1:  8b ec                   mov    ebp,esp
@@ -90,12 +90,12 @@ NOINLINE void PH_ATTR_NAKED hookMeLoop() {
 }
 
 uint64_t hookMeLoopTramp = NULL;
-NOINLINE void __stdcall h_hookMeLoop() {
+HOOK_CALLBACK(&hookMeLoop, h_hookMeLoop, {
 	std::cout << "Hook loop Called!" << std::endl;
 
 	effects.PeakEffect().trigger();
 	PLH::FnCast(hookMeLoopTramp, &hookMeLoop)();
-}
+});
 
 #include 
 uint64_t hookPrintfTramp = NULL;
@@ -115,24 +115,30 @@ NOINLINE int __cdecl h_hookPrintf(const char* format, ...) {
 double(*pFnPowDouble)(double, double) = &std::pow;
 
 uint64_t hookPowTramp = NULL;
-NOINLINE double __cdecl h_hookPow(double X, double Y) {
+HOOK_CALLBACK(pFnPowDouble, h_hookPow, {
 	effects.PeakEffect().trigger();
-
-	return PLH::FnCast(hookPowTramp, pFnPowDouble)(X, Y);
-}
+	return PLH::FnCast(hookPowTramp, pFnPowDouble)(_args...);
+});
 
 #include 
 uint64_t hookMallocTramp = NULL;
-NOINLINE void* h_hookMalloc(size_t size) {
+HOOK_CALLBACK(&malloc, h_hookMalloc, {
 	effects.PeakEffect().trigger();
-	return PLH::FnCast(hookMallocTramp, &malloc)(size);
+	return PLH::FnCast(hookMallocTramp, &malloc)(_args...);
+});
+
+#include 
+uint64_t g_hook_recv_tramp = NULL;
+void hkRecv(SOCKET s, char* buf, int len, int flags)
+{
+	PLH::FnCast(g_hook_recv_tramp, &hkRecv)(s, buf, len, flags);
 }
 
 TEMPLATE_TEST_CASE("Testing x86 detours", "[x86Detour],[ADetour]", PLH::CapstoneDisassembler, PLH::ZydisDisassembler) {
 	TestType dis(PLH::Mode::x86);
 
 	SECTION("Normal function") {
-		PLH::x86Detour detour((char*)&hookMe1, (char*)&h_hookMe1, &hookMe1Tramp, dis);
+		PLH::x86Detour detour((char*)&hookMe1, (char*)h_hookMe1, &hookMe1Tramp, dis);
 		REQUIRE(detour.hook() == true);
 
 		effects.PushEffect();
@@ -142,6 +148,18 @@ TEMPLATE_TEST_CASE("Testing x86 detours", "[x86Detour],[ADetour]", PLH::Capstone
 		REQUIRE(detour.unHook() == true);
 	}
 
+	SECTION("Normal function rehook") {
+		PLH::x86Detour detour((char*)&hookMe1, (char*)h_hookMe1, &hookMe1Tramp, dis);
+		REQUIRE(detour.hook() == true);
+
+		effects.PushEffect();
+		REQUIRE(detour.reHook() == true); // can only really test this doesn't cause memory corruption easily
+		volatile auto result = hookMe1();
+		PH_UNUSED(result);
+		REQUIRE(effects.PopEffect().didExecute());
+		REQUIRE(detour.unHook() == true);
+	}
+
 	SECTION("Jmp into prologue w/ src in range") {
 		PLH::x86Detour detour((char*)&hookMe2, (char*)&h_nullstub, &nullTramp, dis);
 
@@ -157,7 +175,7 @@ TEMPLATE_TEST_CASE("Testing x86 detours", "[x86Detour],[ADetour]", PLH::Capstone
 	}
 
 	SECTION("Loop") {
-		PLH::x86Detour detour((char*)&hookMeLoop, (char*)&h_hookMeLoop, &hookMeLoopTramp, dis);
+		PLH::x86Detour detour((char*)&hookMeLoop, (char*)h_hookMeLoop, &hookMeLoopTramp, dis);
 		REQUIRE(detour.hook() == true);
 
 		effects.PushEffect();
@@ -167,7 +185,7 @@ TEMPLATE_TEST_CASE("Testing x86 detours", "[x86Detour],[ADetour]", PLH::Capstone
 	}
 
 	SECTION("hook printf") {
-		PLH::x86Detour detour((char*)&printf, (char*)&h_hookPrintf, &hookPrintfTramp, dis);
+		PLH::x86Detour detour((char*)&printf, (char*)h_hookPrintf, &hookPrintfTramp, dis);
 		REQUIRE(detour.hook() == true);
 
 		effects.PushEffect();
@@ -178,7 +196,7 @@ TEMPLATE_TEST_CASE("Testing x86 detours", "[x86Detour],[ADetour]", PLH::Capstone
 
 	// it's a pun...
 	SECTION("hook pow") {
-		PLH::x86Detour detour((char*)pFnPowDouble, (char*)&h_hookPow, &hookPowTramp, dis);
+		PLH::x86Detour detour((char*)pFnPowDouble, (char*)h_hookPow, &hookPowTramp, dis);
 		REQUIRE(detour.hook() == true);
 
 		effects.PushEffect();
@@ -189,7 +207,7 @@ TEMPLATE_TEST_CASE("Testing x86 detours", "[x86Detour],[ADetour]", PLH::Capstone
 	}
 
 	SECTION("hook malloc") {
-		PLH::x86Detour detour((char*)&malloc, (char*)&h_hookMalloc, &hookMallocTramp, dis);
+		PLH::x86Detour detour((char*)&malloc, (char*)h_hookMalloc, &hookMallocTramp, dis);
 		effects.PushEffect(); // catch does some allocations, push effect first so peak works
 		REQUIRE(detour.hook() == true);
 
@@ -198,4 +216,11 @@ TEMPLATE_TEST_CASE("Testing x86 detours", "[x86Detour],[ADetour]", PLH::Capstone
 		detour.unHook(); // unhook so we can popeffect safely w/o catch allocation happening again
 		REQUIRE(effects.PopEffect().didExecute());
 	}
+
+	SECTION("hook recv") {
+		auto recv_addr = reinterpret_cast(GetProcAddress(GetModuleHandleA("ws2_32.dll"), "recv"));
+		PLH::x86Detour detour((char*)&malloc, (char*)h_hookMalloc, &recv_addr, dis);
+		effects.PushEffect(); // catch does some allocations, push effect first so peak works
+		REQUIRE(detour.hook() == true);
+	}
 }
diff --git a/libs/polyhook2.0/UnitTests/TestDisassembler.cpp b/libs/polyhook2.0/UnitTests/TestDisassembler.cpp
index 92b42ca..4a39a5c 100644
--- a/libs/polyhook2.0/UnitTests/TestDisassembler.cpp
+++ b/libs/polyhook2.0/UnitTests/TestDisassembler.cpp
@@ -4,6 +4,8 @@
 #include "Catch.hpp"
 #include "polyhook2/CapstoneDisassembler.hpp"
 #include "polyhook2/ZydisDisassembler.hpp"
+#include "polyhook2/Tests/StackCanary.hpp"
+#include "polyhook2/MemAccessor.hpp"
 
 #include 
 #include 
@@ -20,7 +22,13 @@ std::vector x64ASM = {
 	0x83, 0xFA, 0x01,                       //7) cmp edx, 1
 	0x75, 0xE4,                             //8) jne  0x1800182B0   when @0x1800182CA (base + 0xE4(neg) + 0x2)
 	0xE8, 0xCB, 0x57, 0x01, 0x00,           //9) call 0x18002DA9C   when @0x1800182CC (base + 0x157CB + 0x5)
-	0xFF, 0x25, 0xCB, 0x57, 0x01, 0x00,     //10)jmp qword ptr [rip + 0x157cb]  when @0x1800182d1FF
+	0xFF, 0x25, 0x00, 0x00, 0x00, 0x00,     //10)jmp qword ptr [rip + 0x00] (relative in x64)
+	0xAB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xAA
+};
+
+std::vector x64ASM2 = { 
+	0x48, 0x8B, 0x05, 0x10, 0x00, 0x00, 0x00,  // mov    rax,QWORD PTR[rip + 0x10]  
+    0x48, 0x8B, 0x90, 0x55, 0x02, 0x00, 0x00  // mov    rdx,QWORD PTR[rax + 0x255]
 };
 
 // page 590 for jmp types, page 40 for mod/rm table:
@@ -35,7 +43,54 @@ std::vector x86ASM = {
 	0x74, 0x00,                         //4) 57b8edcb je  0x57b8edcd
 	0x8d, 0x87, 0x89, 0x67, 0x00, 0x00, //5) 57b8edcd lea eax, [edi+0x6789]child@4
 	0xeb, 0xf0,                         //6) 57b8edd3 jmp 0x57b8edc5       child@3
-	0xe9, 0x00, 0xff, 0x00, 0x00,        //7) 57b8edd5 jmp 57b9ecda
+};
+
+std::vector x86ASM_FF25 = {
+	0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // this displacement is re-written at test time since it's absolute in x86
+	0xAB, 0x00, 0x00, 0xAA
+};
+
+std::vector x86x64Nops = {
+	0x90,
+	0x66, 0x90,
+	0x0f, 0x1f, 0x00,
+	0x0f, 0x1f, 0x40, 0x00,
+	0x0f, 0x1f, 0x44, 0x00, 0x00,
+	0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+	0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+	0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x66, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 
+
+	/*
+	* 
+	x64/x86 capstone
+    [1]: 90                            nop
+    [2]: 66 90                         nop
+    [3]: 0f 1f 00                      nop dword ptr [rax]
+    [4]: 0f 1f 40 00                   nop dword ptr [rax]
+    [5]: 0f 1f 44 00 00                nop dword ptr [rax + rax]
+    [6]: 66 0f 1f 44 00 00             nop word ptr [rax + rax]
+    [7]: 0f 1f 80 00 00 00 00          nop dword ptr [rax]
+    [8]: 0f 1f 84 00 00 00 00 00       nop dword ptr [rax + rax]
+    [9]: 66 0f 1f 84 00 00 00 00 00    nop word ptr [rax + rax]
+    [a]: 66 66 0f 1f 84 00 00 00 00 00 nop word ptr [rax + rax]
+    [b]: 66 66 66 0f 1f 84 00 00 00 00 00 nop word ptr [rax + rax]
+
+	x64/x86 zydis
+    [1]: 90                            nop nop
+    [2]: 66 90                         nop nop
+    [3]: 0f 1f 00                      nop dword ptr ds:[rax], eax
+    [4]: 0f 1f 40 00                   nop dword ptr ds:[rax], eax
+    [5]: 0f 1f 44 00 00                nop dword ptr ds:[rax+rax*1], eax
+    [6]: 66 0f 1f 44 00 00             nop word ptr ds:[rax+rax*1], ax
+    [7]: 0f 1f 80 00 00 00 00          nop dword ptr ds:[rax], eax
+    [8]: 0f 1f 84 00 00 00 00 00       nop dword ptr ds:[rax+rax*1], eax
+    [9]: 66 0f 1f 84 00 00 00 00 00    nop word ptr ds:[rax+rax*1], ax
+    [a]: 66 66 0f 1f 84 00 00 00 00 00 nop word ptr ds:[rax+rax*1], ax
+    [b]: 66 66 66 0f 1f 84 00 00 00 00 00 nop word ptr ds:[rax+rax*1], ax
+	*/
 };
 
 std::string filterJXX(const std::string& lhs) {
@@ -63,6 +118,7 @@ TEST_CASE("Test Instruction UUID generator", "[Instruction],[UID]") {
 									 displacement,
 									 0,
 									 false,
+			                         false,
 									 {},
 									 0,
 									 "nothing",
@@ -78,17 +134,21 @@ TEST_CASE("Test Instruction UUID generator", "[Instruction],[UID]") {
 }
 
 TEMPLATE_TEST_CASE("Test Disassemblers x64", "[ADisassembler],[CapstoneDisassembler],[ZydisDisassembler]", PLH::CapstoneDisassembler, PLH::ZydisDisassembler) {
+	PLH::StackCanary canaryg;
 	TestType disasm(PLH::Mode::x64);
 	auto                      Instructions = disasm.disassemble((uint64_t)&x64ASM.front(), (uint64_t)&x64ASM.front(),
-		(uint64_t)&x64ASM.front() + x64ASM.size());
+		(uint64_t)&x64ASM.front() + x64ASM.size(), PLH::MemAccessor());
+
+	Instructions.erase(Instructions.begin() + 0xB, Instructions.end());
 
 	uint64_t PrevInstAddress = (uint64_t)&x64ASM.front();
 	size_t   PrevInstSize = 0;
 
-	const char* CorrectMnemonic[] = {"mov", "mov", "push", "sub", "mov", "mov", "mov", "cmp", "jne", "call", "jmp"};
-	const uint8_t CorrectSizes[] = {5, 5, 1, 4, 3, 2, 3, 3, 2, 5, 6};
+	std::vector CorrectMnemonic = {"mov", "mov", "push", "sub", "mov", "mov", "mov", "cmp", "jne", "call", "jmp"};
+	std::vector CorrectSizes = {5, 5, 1, 4, 3, 2, 3, 3, 2, 5, 6};
 
 	SECTION("Check disassembler integrity") {
+		PLH::StackCanary canary;
 		REQUIRE(Instructions.size() == 11);
 
 		std::cout << Instructions << std::endl;
@@ -96,21 +156,42 @@ TEMPLATE_TEST_CASE("Test Disassemblers x64", "[ADisassembler],[CapstoneDisassemb
 		for (const auto &p : disasm.getBranchMap()) {
 			std::cout << std::hex << "dest: " << p.first << " " << std::dec << p.second << std::endl;
 		}
+
+		for (size_t i = 0; i < Instructions.size(); i++) {
+			INFO("Index: " << i
+				<< " Correct Mnemonic:"
+				<< CorrectMnemonic[i]
+				<< " Mnemonic:"
+				<< filterJXX(Instructions[i].getMnemonic()));
+
+			REQUIRE(filterJXX(Instructions[i].getMnemonic()).compare(CorrectMnemonic[i]) == 0);
+
+			REQUIRE(Instructions[i].size() == CorrectSizes[i]);
+
+			REQUIRE(Instructions[i].getAddress() == (PrevInstAddress + PrevInstSize));
+			PrevInstAddress = Instructions[i].getAddress();
+			PrevInstSize = Instructions[i].size();
+		}
+
+		// special little indirect ff25 jmp
+		REQUIRE(Instructions.back().getDestination() == 0xaa000000000000ab);
 	}
 
 	SECTION("Check branch map") {
+		PLH::StackCanary canary;
 		auto brMap = disasm.getBranchMap();
 		REQUIRE(brMap.size() == 1);
 		REQUIRE(brMap.find(Instructions[0].getAddress()) != brMap.end());
 	}
 
 	SECTION("Check instruction re-encoding integrity") {
+		PLH::StackCanary canary;
 		auto vecCopy = x64ASM;
 		Instructions[8].setRelativeDisplacement(0x00);
-		disasm.writeEncoding(Instructions[8]);
+		disasm.writeEncoding(Instructions[8], PLH::MemAccessor());
 
 		Instructions[9].setRelativeDisplacement(0x00);
-		disasm.writeEncoding(Instructions[9]);
+		disasm.writeEncoding(Instructions[9], PLH::MemAccessor());
 
 		REQUIRE(Instructions[8].getDestination() == Instructions[8].getAddress() + Instructions[8].size());
 		REQUIRE(Instructions[9].getDestination() == Instructions[9].getAddress() + Instructions[9].size());
@@ -118,36 +199,22 @@ TEMPLATE_TEST_CASE("Test Disassemblers x64", "[ADisassembler],[CapstoneDisassemb
 		// undo writes
 		x64ASM = vecCopy;
 		Instructions = disasm.disassemble((uint64_t)&x64ASM.front(), (uint64_t)&x64ASM.front(),
-			(uint64_t)&x64ASM.front() + x64ASM.size());
-	}
-
-	for (size_t i = 0; i < Instructions.size(); i++) {
-		INFO("Index: " << i
-			 << " Correct Mnemonic:"
-			 << CorrectMnemonic[i]
-			 << " Mnemonic:"
-			 << filterJXX(Instructions[i].getMnemonic()));
-
-		REQUIRE(filterJXX(Instructions[i].getMnemonic()).compare(CorrectMnemonic[i]) == 0);
-
-		REQUIRE(Instructions[i].size() == CorrectSizes[i]);
-
-		REQUIRE(Instructions[i].getAddress() == (PrevInstAddress + PrevInstSize));
-		PrevInstAddress = Instructions[i].getAddress();
-		PrevInstSize = Instructions[i].size();
+			(uint64_t)&x64ASM.front() + x64ASM.size(), PLH::MemAccessor());
 	}
 
 	SECTION("Check multiple calls") {
+		PLH::StackCanary canary;
 		PLH::insts_t insts;
 		for (int i = 0; i < 100; i++) {
 			insts = disasm.disassemble((uint64_t)&x64ASM.front(), (uint64_t)&x64ASM.front(),
-				(uint64_t)&x64ASM.front() + x64ASM.size());
+				(uint64_t)&x64ASM.front() + x64ASM.size(), PLH::MemAccessor());
 		}
 	}
 
 	SECTION("Verify branching, relative fields") {
+		PLH::StackCanary canary;
 		PLH::insts_t insts = disasm.disassemble((uint64_t)&x64ASM.front(), (uint64_t)&x64ASM.front(),
-			(uint64_t)&x64ASM.front() + x64ASM.size());
+			(uint64_t)&x64ASM.front() + x64ASM.size(), PLH::MemAccessor());
 
 		REQUIRE(insts.at(0).hasDisplacement() == false);
 		REQUIRE(insts.at(0).isBranching() == false);
@@ -166,26 +233,84 @@ TEMPLATE_TEST_CASE("Test Disassemblers x64", "[ADisassembler],[CapstoneDisassemb
 	}
 
 	SECTION("Test garbage instructions") {
+		PLH::StackCanary canary;
 		char randomBuf[500];
 		for (int i = 0; i < 500; i++)
 			randomBuf[i] = randByte();
 
 		auto insts = disasm.disassemble((uint64_t)randomBuf, (uint64_t)0x0,
-										500);
+										500, PLH::MemAccessor());
 		std::cout << insts << std::endl;
 	}
 }
 
+TEMPLATE_TEST_CASE("Test Disassemblers x86 FF25", "[ADisassembler],[CapstoneDisassembler],[ZydisDisassembler]", PLH::CapstoneDisassembler, PLH::ZydisDisassembler) {
+	// re-write ff 25 displacement to point to data (absolute)
+#ifndef _WIN64
+	*(uint32_t*)(x86ASM_FF25.data() + 2) = (uint32_t)(x86ASM_FF25.data() + 6); // 0xFF25  = &mem; (just fyi *mem == 0xAA0000AB)
+#else
+	// this test is not suitable for x64 due to ff 25 not being re-written
+	return;
+#endif
+
+	PLH::StackCanary canaryg;
+	TestType disasm(PLH::Mode::x86);
+	auto                      Instructions = disasm.disassemble((uint64_t)&x86ASM_FF25.front(), (uint64_t)&x86ASM_FF25.front(),
+		(uint64_t)&x86ASM_FF25.front() + x86ASM_FF25.size(), PLH::MemAccessor());
+
+	SECTION("Check disassembler integrity") {
+		PLH::StackCanary canary;
+		REQUIRE(Instructions.size() == 1);
+		std::cout << Instructions << std::endl;
+
+		for (const auto& p : disasm.getBranchMap()) {
+			std::cout << std::hex << "dest: " << p.first << " -> " << std::dec << p.second << std::endl;
+		}
+
+		// special little indirect ff25 jmp
+#ifndef _WIN64
+		REQUIRE(Instructions.back().getDestination() == 0xaa0000ab);
+#endif
+	}
+
+	REQUIRE(Instructions.at(0).isBranching());
+	REQUIRE(Instructions.at(0).hasDisplacement());
+}
+
 TEMPLATE_TEST_CASE("Test Disassemblers x86", "[ADisassembler],[CapstoneDisassembler],[ZydisDisassembler]", PLH::CapstoneDisassembler, PLH::ZydisDisassembler) {
+	PLH::StackCanary canaryg;
 	TestType disasm(PLH::Mode::x86);
 	auto                      Instructions = disasm.disassemble((uint64_t)&x86ASM.front(), (uint64_t)&x86ASM.front(),
-		(uint64_t)&x86ASM.front() + x86ASM.size());
+		(uint64_t)&x86ASM.front() + x86ASM.size(), PLH::MemAccessor());
+
+	// TODO: full buffer isn't disassembled
+	//Instructions.erase(Instructions.begin() + 0x9, Instructions.end());
+	std::vector CorrectSizes = {2, 6, 5, 6, 2, 6, 2};
+	std::vector CorrectMnemonic = {"add", "add", "add", "jne", "je", "lea", "jmp"};
+
+	uint64_t PrevInstAddress = (uint64_t)&x86ASM.front();
+	size_t   PrevInstSize = 0;
+
+	for (size_t i = 0; i < Instructions.size(); i++) {
+		INFO("Index: " << i);
+		INFO("Correct Mnemonic:"
+			<< CorrectMnemonic[i]
+			<< " Mnemonic:"
+			<< Instructions[i].getMnemonic());
+
+		REQUIRE(filterJXX(Instructions.at(i).getMnemonic()).compare(CorrectMnemonic.at(i)) == 0);
 
-	const uint8_t CorrectSizes[] = {2, 6, 5, 6, 2, 6, 2, 5};
-	const char* CorrectMnemonic[] = {"add", "add", "add", "jne", "je", "lea", "jmp", "jmp"};
+		REQUIRE(Instructions.at(i).size() == CorrectSizes.at(i));
+
+		REQUIRE(Instructions.at(i).getAddress() == (PrevInstAddress + PrevInstSize));
+		PrevInstAddress = Instructions[i].getAddress();
+		PrevInstSize = Instructions[i].size();
+	}
+	REQUIRE(Instructions.size() == 7);
 
 	SECTION("Check disassembler integrity") {
-		REQUIRE(Instructions.size() == 8);
+		PLH::StackCanary canary;
+		REQUIRE(Instructions.size() == 7);
 		std::cout << Instructions << std::endl;
 
 		for (const auto &p : disasm.getBranchMap()) {
@@ -194,63 +319,46 @@ TEMPLATE_TEST_CASE("Test Disassemblers x86", "[ADisassembler],[CapstoneDisassemb
 	}
 
 	SECTION("Check branch map") {
+		PLH::StackCanary canary;
 		auto brMap = disasm.getBranchMap();
 		REQUIRE(brMap.size() == 3);
-		REQUIRE(brMap.find(Instructions[3].getAddress()) != brMap.end());
-		REQUIRE(brMap.find(Instructions[5].getAddress()) != brMap.end());
-		REQUIRE(brMap.find(Instructions[6].getAddress()) != brMap.end());
+		REQUIRE(brMap.find(Instructions.at(3).getAddress()) != brMap.end());
+		REQUIRE(brMap.find(Instructions.at(5).getAddress()) != brMap.end());
+		REQUIRE(brMap.find(Instructions.at(6).getAddress()) != brMap.end());
 	}
 
 	SECTION("Check instruction re-encoding integrity") {
+		PLH::StackCanary canary;
 		auto vecCopy = x86ASM;
-		Instructions[3].setRelativeDisplacement(0x00);
-		disasm.writeEncoding(Instructions[3]);
+		Instructions.at(3).setRelativeDisplacement(0x00);
+		disasm.writeEncoding(Instructions.at(3), PLH::MemAccessor());
 
-		Instructions[6].setRelativeDisplacement(0x00);
-		disasm.writeEncoding(Instructions[6]);
+		Instructions.at(6).setRelativeDisplacement(0x00);
+		disasm.writeEncoding(Instructions.at(6), PLH::MemAccessor());
 
-		REQUIRE(Instructions[3].getDestination() == Instructions[3].getAddress() + Instructions[3].size());
-		REQUIRE(Instructions[6].getDestination() == Instructions[6].getAddress() + Instructions[6].size());
+		REQUIRE(Instructions.at(3).getDestination() == Instructions.at(3).getAddress() + Instructions.at(3).size());
+		REQUIRE(Instructions.at(6).getDestination() == Instructions.at(6).getAddress() + Instructions.at(6).size());
 
 		// undo writes
 		x86ASM = vecCopy;
 		Instructions =
 			disasm.disassemble((uint64_t)&x86ASM.front(), (uint64_t)&x86ASM.front(),
-			(uint64_t)&x86ASM.front() + x86ASM.size());
-	}
-
-	uint64_t PrevInstAddress = (uint64_t)&x86ASM.front();
-	size_t   PrevInstSize = 0;
-
-
-	for (size_t i = 0; i < Instructions.size(); i++) {
-		INFO("Index: " << i);
-		INFO("Correct Mnemonic:"
-			 << CorrectMnemonic[i]
-			 << " Mnemonic:"
-			 << Instructions[i].getMnemonic());
-
-		REQUIRE(filterJXX(Instructions[i].getMnemonic()).compare(CorrectMnemonic[i]) == 0);
-
-		REQUIRE(Instructions[i].size() == CorrectSizes[i]);
-
-		REQUIRE(Instructions[i].getAddress() == (PrevInstAddress + PrevInstSize));
-		PrevInstAddress = Instructions[i].getAddress();
-		PrevInstSize = Instructions[i].size();
+			(uint64_t)&x86ASM.front() + x86ASM.size(), PLH::MemAccessor());
 	}
-	REQUIRE(Instructions.size() == 8);
 
 	SECTION("Check multiple calls") {
+		PLH::StackCanary canary;
 		PLH::insts_t insts;
 		for (int i = 0; i < 100; i++) {
 			insts = disasm.disassemble((uint64_t)&x86ASM.front(), (uint64_t)&x86ASM.front(),
-				(uint64_t)&x86ASM.front() + x86ASM.size());
+				(uint64_t)&x86ASM.front() + x86ASM.size(), PLH::MemAccessor());
 		}
 	}
 
 	SECTION("Verify branching, relative fields") {
+		PLH::StackCanary canary;
 		PLH::insts_t insts = disasm.disassemble((uint64_t)&x86ASM.front(), (uint64_t)&x86ASM.front(),
-			(uint64_t)&x86ASM.front() + x86ASM.size());
+			(uint64_t)&x86ASM.front() + x86ASM.size(), PLH::MemAccessor());
 
 		REQUIRE(insts.at(4).isBranching());
 		REQUIRE(insts.at(4).hasDisplacement());
@@ -260,34 +368,78 @@ TEMPLATE_TEST_CASE("Test Disassemblers x86", "[ADisassembler],[CapstoneDisassemb
 
 		REQUIRE(insts.at(6).isBranching());
 		REQUIRE(insts.at(6).hasDisplacement());
-
-		REQUIRE(insts.at(7).isBranching());
-		REQUIRE(insts.at(7).hasDisplacement());
 	}
 
 	SECTION("Test garbage instructions") {
+		PLH::StackCanary canary;
 		char randomBuf[500];
 		for (int i = 0; i < 500; i++)
 			randomBuf[i] = randByte();
 
 		auto insts = disasm.disassemble((uint64_t)randomBuf, (uint64_t)0x0,
-										500);
+										500, PLH::MemAccessor());
 		std::cout << insts << std::endl;
 	}
 }
 
+TEMPLATE_TEST_CASE("Test Disassemblers x64 Two", "[ADisassembler],[CapstoneDisassembler],[ZydisDisassembler]", PLH::CapstoneDisassembler, PLH::ZydisDisassembler) {
+	PLH::StackCanary canaryg;
+	TestType disasm(PLH::Mode::x64);
+	PLH::insts_t Instructions = disasm.disassemble((uint64_t)&x64ASM2.front(), (uint64_t)&x64ASM2.front(),
+		(uint64_t)&x64ASM2.front() + x64ASM2.size(), PLH::MemAccessor());
+
+	SECTION("Verify relative displacements") {
+		REQUIRE(Instructions.at(0).m_isRelative);
+		REQUIRE(Instructions.at(0).m_hasDisplacement);
+		REQUIRE(Instructions.at(0).m_isIndirect == false);
+		REQUIRE(Instructions.at(0).getDestination() == Instructions.at(0).getAddress() + Instructions.at(0).size() + 0x10);
+
+		REQUIRE(Instructions.at(1).m_isRelative == false);
+		REQUIRE(Instructions.at(1).m_isIndirect == false);
+	}
+}
+
+TEMPLATE_TEST_CASE("Test Disassemblers NOPS", "[ADisassembler],[CapstoneDisassembler],[ZydisDisassembler]", PLH::CapstoneDisassembler, PLH::ZydisDisassembler) {
+	PLH::StackCanary canaryg;
+	TestType disasm(PLH::Mode::x64);
+	PLH::insts_t Instructions = disasm.disassemble((uint64_t)&x86x64Nops.front(), (uint64_t)&x86x64Nops.front(),
+		(uint64_t)&x86x64Nops.front() + x86x64Nops.size(), PLH::MemAccessor());
+
+	TestType disasmx86(PLH::Mode::x86);
+	PLH::insts_t Instructionsx86 = disasmx86.disassemble((uint64_t)&x86x64Nops.front(), (uint64_t)&x86x64Nops.front(),
+		(uint64_t)&x86x64Nops.front() + x86x64Nops.size(), PLH::MemAccessor());
+
+	SECTION("Verify multi-byte nops decodings x64") {
+		for (auto& ins : Instructions) {
+			REQUIRE(ins.getMnemonic() == "nop");
+			REQUIRE(TestType::isPadBytes(ins));
+		}
+	}
+
+	SECTION("Verify multi-byte nops decodings x86") {
+		for (auto& ins : Instructionsx86) {
+			REQUIRE(ins.getMnemonic() == "nop");
+			REQUIRE(TestType::isPadBytes(ins));			
+		}
+	}
+}
+
+// unreachable code
+#pragma warning(disable: 4702)
 TEST_CASE("Compare x86 Decompilers", "[ADisassembler],[ZydisDisassembler][CapstoneDisassembler]") {
+	PLH::StackCanary canaryg;
 	// Use capstone as reference
 	PLH::CapstoneDisassembler disasmRef(PLH::Mode::x86);
 	auto                      InstructionsRef = disasmRef.disassemble((uint64_t)&x86ASM.front(), (uint64_t)&x86ASM.front(),
-		(uint64_t)&x86ASM.front() + x86ASM.size());
+		(uint64_t)&x86ASM.front() + x86ASM.size(), PLH::MemAccessor());
 
 	PLH::ZydisDisassembler disasm(PLH::Mode::x86);
 	auto                      Instructions = disasm.disassemble((uint64_t)&x86ASM.front(), (uint64_t)&x86ASM.front(),
-		(uint64_t)&x86ASM.front() + x86ASM.size());
+		(uint64_t)&x86ASM.front() + x86ASM.size(), PLH::MemAccessor());
 
 	SECTION("Check Integrity") {
-		REQUIRE(Instructions.size() == 8);
+		PLH::StackCanary canary;
+		REQUIRE(Instructions.size() == 7);
 		std::cout << Instructions << std::endl;
 
 		for (const auto &p : disasm.getBranchMap()) {
diff --git a/libs/polyhook2.0/UnitTests/TestEatHook.cpp b/libs/polyhook2.0/UnitTests/TestEatHook.cpp
index 0817d12..6b6e355 100644
--- a/libs/polyhook2.0/UnitTests/TestEatHook.cpp
+++ b/libs/polyhook2.0/UnitTests/TestEatHook.cpp
@@ -1,24 +1,28 @@
 #include 
 
 #include "polyhook2/PE/EatHook.hpp"
+#include "polyhook2/Tests/StackCanary.hpp"
 #include "polyhook2/Tests/TestEffectTracker.hpp"
 
 EffectTracker eatEffectTracker;
 
 typedef void(* tEatTestExport)();
-tEatTestExport oEatTestExport;
+uint64_t oEatTestExport;
 
 extern "C" __declspec(dllexport) NOINLINE void EatTestExport()
 {
+	PLH::StackCanary canary;
 }
 
 NOINLINE void hkEatTestExport()
 {	
+	PLH::StackCanary canary;
 	eatEffectTracker.PeakEffect().trigger();
 }
 
 TEST_CASE("Eat Hook Tests", "[EatHook]") {
 	SECTION("Verify if export is found and hooked") {
+		PLH::StackCanary canary;
 		PLH::EatHook hook("EatTestExport", L"", (char*)&hkEatTestExport, (uint64_t*)&oEatTestExport);
 		REQUIRE(hook.hook());
 
@@ -32,6 +36,7 @@ TEST_CASE("Eat Hook Tests", "[EatHook]") {
 	}
 
 	SECTION("Verify if export is found and hooked when module explicitly named") {
+		PLH::StackCanary canary;
 		PLH::EatHook hook("EatTestExport", L"Polyhook_2.exe", (char*)&hkEatTestExport, (uint64_t*)&oEatTestExport);
 		REQUIRE(hook.hook());
 
@@ -49,7 +54,7 @@ typedef  int(__stdcall* tEatMessageBox)(HWND    hWnd,
 	LPCTSTR lpText,
 	LPCTSTR lpCaption,
 	UINT    uType);
-tEatMessageBox  oEatMessageBox;
+uint64_t  oEatMessageBox;
 
 int __stdcall hkEatMessageBox(HWND    hWnd,
 	LPCTSTR lpText,
@@ -60,7 +65,7 @@ int __stdcall hkEatMessageBox(HWND    hWnd,
 	UNREFERENCED_PARAMETER(lpCaption);
 	UNREFERENCED_PARAMETER(uType);
 	UNREFERENCED_PARAMETER(hWnd);
-
+	PLH::StackCanary canary;
 	tEatMessageBox MsgBox = (tEatMessageBox)oEatMessageBox;
 	MsgBox(0, "My Hook", "text", 0);
 	eatEffectTracker.PeakEffect().trigger();
@@ -68,6 +73,7 @@ int __stdcall hkEatMessageBox(HWND    hWnd,
 }
 
 TEST_CASE("Eat winapi tests", "[EatHook]") {
+	PLH::StackCanary canary;
 	LoadLibrary("User32.dll");
 
 	PLH::EatHook hook("MessageBoxA", L"User32.dll", (char*)&hkEatMessageBox, (uint64_t*)&oEatMessageBox);
@@ -83,26 +89,28 @@ TEST_CASE("Eat winapi tests", "[EatHook]") {
 }
 
 typedef  void(__stdcall* tEatGetSystemTime)(PSYSTEMTIME systemTime);
-tEatGetSystemTime oEatGetSystemTime;
+uint64_t oEatGetSystemTime;
 void WINAPI hkGetSystemTime(PSYSTEMTIME systemTime)
 {
+	PLH::StackCanary canary;
 	eatEffectTracker.PeakEffect().trigger();
-	oEatGetSystemTime(systemTime);
+	((tEatGetSystemTime)oEatGetSystemTime)(systemTime);
 }
 
 typedef void(__stdcall* tEatGetLocalTime)(PSYSTEMTIME systemTime);
-tEatGetLocalTime oEatGetLocalTime;
+uint64_t oEatGetLocalTime;
 void WINAPI hkGetLocalTime(PSYSTEMTIME systemTime)
 {
+	PLH::StackCanary canary;
 	eatEffectTracker.PeakEffect().trigger();
-	oEatGetLocalTime(systemTime);
+	((tEatGetLocalTime)oEatGetLocalTime)(systemTime);
 }
 
 TEST_CASE("Eat winapi multiple hook", "[EatHook]") {
 	// These are out of module hooks that require a trampoline stub.
 	// Multiple hooks can fail if the trampoline region isn't re-used 
 	// across multiple calls. Or if no free block is found at all
-
+	PLH::StackCanary canary;
 	PLH::EatHook hook_GST("GetSystemTime", L"kernel32.dll", (char*)&hkGetSystemTime, (uint64_t*)&oEatGetSystemTime);
 	REQUIRE(hook_GST.hook());
 	eatEffectTracker.PushEffect();
diff --git a/libs/polyhook2.0/UnitTests/TestIatHook.cpp b/libs/polyhook2.0/UnitTests/TestIatHook.cpp
index 3999103..b2049fb 100644
--- a/libs/polyhook2.0/UnitTests/TestIatHook.cpp
+++ b/libs/polyhook2.0/UnitTests/TestIatHook.cpp
@@ -1,24 +1,28 @@
 #include 
-
 #include "polyhook2/PE/IatHook.hpp"
 #include "polyhook2/Tests/TestEffectTracker.hpp"
+#include "polyhook2/Tests/StackCanary.hpp"
 
 EffectTracker iatEffectTracker;
 
 typedef DWORD(__stdcall* tGetCurrentThreadId)();
-tGetCurrentThreadId oGetCurrentThreadID;
+uint64_t oGetCurrentThreadID;
 
 NOINLINE DWORD __stdcall hkGetCurrentThreadId() {
 	iatEffectTracker.PeakEffect().trigger();
-	return oGetCurrentThreadID();
+	return ((tGetCurrentThreadId)oGetCurrentThreadID)();
 }
 
 TEST_CASE("Iat Hook Tests", "[IatHook]") {
 	SECTION("Verify api thunk is found and hooked") {
+		PLH::StackCanary canary;
+		volatile DWORD thrdId2 = GetCurrentThreadId();
+		UNREFERENCED_PARAMETER(thrdId2);
 		PLH::IatHook hook("kernel32.dll", "GetCurrentThreadId", (char*)&hkGetCurrentThreadId, (uint64_t*)&oGetCurrentThreadID, L"");
 		REQUIRE(hook.hook());
 		
 		iatEffectTracker.PushEffect();
+		REQUIRE(canary.isStackGood());
 		volatile DWORD thrdId = GetCurrentThreadId();
 		thrdId++;
 		REQUIRE(iatEffectTracker.PopEffect().didExecute());
@@ -26,13 +30,13 @@ TEST_CASE("Iat Hook Tests", "[IatHook]") {
 	}
 
 	SECTION("Verify api thunk is found and hooked when module explicitly named") {
+		PLH::StackCanary canary;
 		PLH::IatHook hook("kernel32.dll", "GetCurrentThreadId", (char*)&hkGetCurrentThreadId, (uint64_t*)&oGetCurrentThreadID, L"polyhook_2.exe");
 		REQUIRE(hook.hook());
 
 		iatEffectTracker.PushEffect();
 		volatile DWORD thrdId = GetCurrentThreadId();
 		thrdId++;
-		REQUIRE(iatEffectTracker.PopEffect().didExecute());
 		REQUIRE(hook.unHook());
 	}
 }
\ No newline at end of file
diff --git a/libs/polyhook2.0/UnitTests/TestMemProtector.cpp b/libs/polyhook2.0/UnitTests/TestMemProtector.cpp
index fdca3bd..56d68b7 100644
--- a/libs/polyhook2.0/UnitTests/TestMemProtector.cpp
+++ b/libs/polyhook2.0/UnitTests/TestMemProtector.cpp
@@ -1,8 +1,10 @@
 #include "Catch.hpp"
 #include "polyhook2/MemProtector.hpp"
+#include "polyhook2/Tests/StackCanary.hpp"
 
 TEST_CASE("Test protflag translation", "[MemProtector],[Enums]") {
 	SECTION("flags to native") {
+		PLH::StackCanary canary;
 		REQUIRE(PLH::TranslateProtection(PLH::ProtFlag::X) == PAGE_EXECUTE);
 		REQUIRE(PLH::TranslateProtection(PLH::ProtFlag::R) == PAGE_READONLY);
 		REQUIRE(PLH::TranslateProtection(PLH::ProtFlag::W) == PAGE_READWRITE);
@@ -14,6 +16,7 @@ TEST_CASE("Test protflag translation", "[MemProtector],[Enums]") {
 	}
 
 	SECTION("native to flags") {
+		PLH::StackCanary canary;
 		REQUIRE(PLH::TranslateProtection(PAGE_EXECUTE) == PLH::ProtFlag::X);
 		REQUIRE(PLH::TranslateProtection(PAGE_READONLY) == PLH::ProtFlag::R);
 		REQUIRE(PLH::TranslateProtection(PAGE_READWRITE) == (PLH::ProtFlag::W | PLH::ProtFlag::R));
@@ -24,35 +27,37 @@ TEST_CASE("Test protflag translation", "[MemProtector],[Enums]") {
 }
 
 TEST_CASE("Test setting page protections", "[MemProtector]") {
+	PLH::StackCanary canary;
 	char* page = (char*)VirtualAlloc(0, 4 * 1024, MEM_COMMIT, PAGE_NOACCESS);
 	bool isGood = page != nullptr; // indirection because catch reads var, causing access violation
 	REQUIRE(isGood);
+	PLH::MemAccessor accessor;
 
 	{
-		PLH::MemoryProtector prot((uint64_t)page, 4 * 1024, PLH::ProtFlag::R);
+		PLH::MemoryProtector prot((uint64_t)page, 4 * 1024, PLH::ProtFlag::R, accessor);
 		REQUIRE(prot.isGood());
 		REQUIRE(prot.originalProt() == PLH::ProtFlag::NONE);
 
-		PLH::MemoryProtector prot1((uint64_t)page, 4 * 1024, PLH::ProtFlag::W);
+		PLH::MemoryProtector prot1((uint64_t)page, 4 * 1024, PLH::ProtFlag::W, accessor);
 		REQUIRE(prot1.isGood());
 		REQUIRE(prot1.originalProt() == PLH::ProtFlag::R);
 
-		PLH::MemoryProtector prot2((uint64_t)page, 4 * 1024, PLH::ProtFlag::X);
+		PLH::MemoryProtector prot2((uint64_t)page, 4 * 1024, PLH::ProtFlag::X, accessor);
 		REQUIRE(prot2.isGood());
 		REQUIRE((prot2.originalProt() & PLH::ProtFlag::W));
 	}
 
 	// protection should now be NOACCESS if destructors worked
 	{
-		PLH::MemoryProtector prot((uint64_t)page, 4 * 1024, PLH::ProtFlag::X | PLH::ProtFlag::R);
+		PLH::MemoryProtector prot((uint64_t)page, 4 * 1024, PLH::ProtFlag::X | PLH::ProtFlag::R, accessor);
 		REQUIRE(prot.isGood());
 		REQUIRE(prot.originalProt() == PLH::ProtFlag::NONE);
 
-		PLH::MemoryProtector prot1((uint64_t)page, 4 * 1024, PLH::ProtFlag::X | PLH::ProtFlag::W);
+		PLH::MemoryProtector prot1((uint64_t)page, 4 * 1024, PLH::ProtFlag::X | PLH::ProtFlag::W, accessor);
 		REQUIRE(prot.isGood());
 		REQUIRE((prot1.originalProt() == (PLH::ProtFlag::X | PLH::ProtFlag::R)));
 
-		PLH::MemoryProtector prot2((uint64_t)page, 4 * 1024, PLH::ProtFlag::X | PLH::ProtFlag::R | PLH::ProtFlag::W);
+		PLH::MemoryProtector prot2((uint64_t)page, 4 * 1024, PLH::ProtFlag::X | PLH::ProtFlag::R | PLH::ProtFlag::W, accessor);
 		REQUIRE(prot.isGood());
 		REQUIRE(prot2.originalProt() == (PLH::ProtFlag::X | PLH::ProtFlag::R | PLH::ProtFlag::W));
 	}
diff --git a/libs/polyhook2.0/UnitTests/TestVFuncSwapHook.cpp b/libs/polyhook2.0/UnitTests/TestVFuncSwapHook.cpp
index 4c719d2..118108e 100644
--- a/libs/polyhook2.0/UnitTests/TestVFuncSwapHook.cpp
+++ b/libs/polyhook2.0/UnitTests/TestVFuncSwapHook.cpp
@@ -3,6 +3,7 @@
 #include 
 
 #include "polyhook2/Virtuals/VFuncSwapHook.hpp"
+#include "polyhook2/Tests/StackCanary.hpp"
 #include "polyhook2/Tests/TestEffectTracker.hpp"
 
 EffectTracker vFuncSwapEffects;
@@ -11,35 +12,36 @@ class VirtualTest2 {
 public:
 	virtual ~VirtualTest2() {}
 
-	virtual int NoParamVirt() {
+	virtual int __stdcall NoParamVirt() {
 		return 4;
 	}
 
-	virtual int NoParamVirt2() {
+	virtual int __stdcall NoParamVirt2() {
 		return 7;
 	}
 };
 
 #pragma warning(disable: 4100)
 
-typedef int(__thiscall* tVirtNoParams)(uintptr_t pThis);
 PLH::VFuncMap origVFuncs2;
-
-NOINLINE int __fastcall hkVirtNoParams2(uintptr_t pThis) {
+HOOK_CALLBACK(&VirtualTest2::NoParamVirt, hkVirtNoParams2, {
+	PLH::StackCanary canary;
 	vFuncSwapEffects.PeakEffect().trigger();
-	return ((tVirtNoParams)origVFuncs2.at(1))(pThis);
-}
+	return ((hkVirtNoParams2_t)origVFuncs2.at(1))(_args...);
+});
 
-NOINLINE int __fastcall hkVirt2NoParams2(uintptr_t pThis) {
+HOOK_CALLBACK(&VirtualTest2::NoParamVirt2, hkVirt2NoParams2, {
+	PLH::StackCanary canary;
 	vFuncSwapEffects.PeakEffect().trigger();
-	return ((tVirtNoParams)origVFuncs2.at(2))(pThis);
-}
+	return ((hkVirtNoParams2_t)origVFuncs2.at(2))(_args...);
+});
 
 TEST_CASE("VFuncSwap tests", "[VFuncSwap]") {
 	std::shared_ptr ClassToHook(new VirtualTest2);
 
 	SECTION("Verify vfunc redirected") {
-		PLH::VFuncMap redirect = {{(uint16_t)1, (uint64_t)&hkVirtNoParams2}};
+		PLH::StackCanary canary;
+		PLH::VFuncMap redirect = {{(uint16_t)1, (uint64_t)hkVirtNoParams2}};
 		PLH::VFuncSwapHook hook((char*)ClassToHook.get(), redirect, &origVFuncs2);
 		REQUIRE(hook.hook());
 		REQUIRE(origVFuncs2.size() == 1);
@@ -51,7 +53,8 @@ TEST_CASE("VFuncSwap tests", "[VFuncSwap]") {
 	}
 
 	SECTION("Verify multiple vfunc redirected") {
-		PLH::VFuncMap redirect = {{(uint16_t)1, (uint64_t)&hkVirtNoParams2},{(uint16_t)2, (uint64_t)&hkVirt2NoParams2}};
+		PLH::StackCanary canary;
+		PLH::VFuncMap redirect = {{(uint16_t)1, (uint64_t)hkVirtNoParams2},{(uint16_t)2, (uint64_t)hkVirt2NoParams2}};
 		PLH::VFuncSwapHook hook((char*)ClassToHook.get(), redirect, &origVFuncs2);
 		REQUIRE(hook.hook());
 		REQUIRE(origVFuncs2.size() == 2);
diff --git a/libs/polyhook2.0/UnitTests/TestVTableSwapHook.cpp b/libs/polyhook2.0/UnitTests/TestVTableSwapHook.cpp
index a202093..499e96c 100644
--- a/libs/polyhook2.0/UnitTests/TestVTableSwapHook.cpp
+++ b/libs/polyhook2.0/UnitTests/TestVTableSwapHook.cpp
@@ -3,6 +3,7 @@
 #include 
 
 #include "polyhook2/Virtuals/VTableSwapHook.hpp"
+#include "polyhook2/Tests/StackCanary.hpp"
 #include "polyhook2/Tests/TestEffectTracker.hpp"
 
 EffectTracker vTblSwapEffects;
@@ -11,35 +12,33 @@ class VirtualTest {
 public:
 	virtual ~VirtualTest() {}
 
-	virtual int NoParamVirt() {
+	virtual int __stdcall NoParamVirt() {
 		return 4;
 	}
 
-	virtual int NoParamVirt2() {
+	virtual int __stdcall NoParamVirt2() {
 		return 7;
 	}
 };
 
 #pragma warning(disable: 4100)
-
-typedef int(__thiscall* tVirtNoParams)(uintptr_t pThis);
 PLH::VFuncMap origVFuncs;
-
-NOINLINE int __fastcall hkVirtNoParams(uintptr_t pThis) {
+HOOK_CALLBACK(&VirtualTest::NoParamVirt, hkVirtNoParams, {
 	vTblSwapEffects.PeakEffect().trigger();
-	return ((tVirtNoParams)origVFuncs.at(1))(pThis);
-}
+	return ((hkVirtNoParams_t)origVFuncs.at(1))(_args...);
+});
 
-NOINLINE int __fastcall hkVirt2NoParams(uintptr_t pThis) {
+HOOK_CALLBACK(&VirtualTest::NoParamVirt2, hkVirt2NoParams, {
 	vTblSwapEffects.PeakEffect().trigger();
-	return ((tVirtNoParams)origVFuncs.at(2))(pThis);
-}
+	return ((hkVirt2NoParams_t)origVFuncs.at(2))(_args...);
+});
 
 TEST_CASE("VTableSwap tests", "[VTableSwap]") {
 	std::shared_ptr ClassToHook(new VirtualTest);
 
 	SECTION("Verify vtable redirected") {
-		PLH::VFuncMap redirect = {{(uint16_t)1, (uint64_t)&hkVirtNoParams}};
+		PLH::StackCanary canary;
+		PLH::VFuncMap redirect = {{(uint16_t)1, (uint64_t)hkVirtNoParams}};
 		PLH::VTableSwapHook hook((char*)ClassToHook.get(), redirect);
 		REQUIRE(hook.hook());
 		origVFuncs = hook.getOriginals();
@@ -52,7 +51,8 @@ TEST_CASE("VTableSwap tests", "[VTableSwap]") {
 	}
 
 	SECTION("Verify multiple vtable redirected") {
-		PLH::VFuncMap redirect = {{(uint16_t)1, (uint64_t)&hkVirtNoParams},{(uint16_t)2, (uint64_t)&hkVirtNoParams}};
+		PLH::StackCanary canary;
+		PLH::VFuncMap redirect = {{(uint16_t)1, (uint64_t)hkVirtNoParams},{(uint16_t)2, (uint64_t)hkVirtNoParams}};
 		PLH::VTableSwapHook hook((char*)ClassToHook.get(), redirect);
 		REQUIRE(hook.hook());
 		origVFuncs = hook.getOriginals();
diff --git a/libs/polyhook2.0/UnitTests/TestVTableSwapHook2.cpp b/libs/polyhook2.0/UnitTests/TestVTableSwapHook2.cpp
new file mode 100644
index 0000000..29edaf3
--- /dev/null
+++ b/libs/polyhook2.0/UnitTests/TestVTableSwapHook2.cpp
@@ -0,0 +1,79 @@
+#include 
+#include 
+
+#include 
+
+#include "polyhook2/Virtuals/VTableSwapHook.hpp"
+#include "polyhook2/Tests/StackCanary.hpp"
+#include "polyhook2/Tests/TestEffectTracker.hpp"
+
+EffectTracker vTblSwapEffects2;
+
+// original class
+
+class MyClass {
+public:
+	virtual ~MyClass() {}
+
+	virtual int __stdcall method1(int x) {
+		return 2 * x;
+	}
+
+	virtual int __stdcall method2(int x, int y) {
+		return x + y;
+	}
+};
+
+// helper typedefs, and unique_ptr for storing the hook
+
+template
+using VMethod1 = PLH::VFunc<1, T>;
+
+template
+using VMethod2 = PLH::VFunc<2, T>;
+
+std::unique_ptr hook = nullptr;
+
+// hook implementations
+HOOK_CALLBACK(&MyClass::method1, myclass_method1, {
+	vTblSwapEffects2.PeakEffect().trigger();
+	return hook->origFunc>(_args...) + 1;
+});
+
+HOOK_CALLBACK(&MyClass::method2, myclass_method2, {
+	vTblSwapEffects2.PeakEffect().trigger();
+	return hook->origFunc>(_args...) + 2;
+});
+
+TEST_CASE("VTableSwap2 tests", "[VTableSwap2]") {
+	auto ClassToHook = std::make_shared();
+
+	SECTION("Verify vtable redirected") {
+		PLH::StackCanary canary;
+		REQUIRE(ClassToHook->method1(3) == 6);
+		REQUIRE(ClassToHook->method2(13, 9) == 22);
+		hook = std::make_unique(
+			reinterpret_cast(ClassToHook.get()),
+			VMethod1(myclass_method1),
+			VMethod2(myclass_method2));
+		REQUIRE(hook->hook());
+
+		vTblSwapEffects2.PushEffect();
+		REQUIRE(ClassToHook->method1(3) == 7);
+		REQUIRE(vTblSwapEffects2.PopEffect().didExecute());
+
+		vTblSwapEffects2.PushEffect();
+		REQUIRE(ClassToHook->method2(13, 9) == 24);
+		REQUIRE(vTblSwapEffects2.PopEffect().didExecute());
+
+		REQUIRE(hook->unHook());
+
+		vTblSwapEffects2.PushEffect();
+		REQUIRE(ClassToHook->method1(3) == 6);
+		REQUIRE(!vTblSwapEffects2.PopEffect().didExecute());
+
+		vTblSwapEffects2.PushEffect();
+		REQUIRE(ClassToHook->method2(13, 9) == 22);
+		REQUIRE(!vTblSwapEffects2.PopEffect().didExecute());
+	}
+}
\ No newline at end of file
diff --git a/libs/polyhook2.0/_config.yml b/libs/polyhook2.0/_config.yml
new file mode 100644
index 0000000..c741881
--- /dev/null
+++ b/libs/polyhook2.0/_config.yml
@@ -0,0 +1 @@
+theme: jekyll-theme-slate
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook.SConscript b/libs/polyhook2.0/polyhook.SConscript
index 126f980..17daa3a 100644
--- a/libs/polyhook2.0/polyhook.SConscript
+++ b/libs/polyhook2.0/polyhook.SConscript
@@ -6,16 +6,19 @@ sources = Glob('polyhook2/**/*.cpp', source=True)
 sources = [
     "sources/ADetour.cpp",
     "sources/CapstoneDisassembler.cpp",
+    "sources/ErrorLog.cpp",
     "sources/ILCallback.cpp",
+    "sources/MemAccessor.cpp",
     "sources/MemProtector.cpp",
     "sources/PageAllocator.cpp",
     "sources/PyCallback.cpp",
+    "sources/UID.cpp",
     "sources/x86Detour.cpp",
 ]
 
 polyhook_includes = File('CMakeLists.txt').srcnode().get_abspath()
 polyhook_includes = os.path.split(polyhook_includes)[0]
-env['polyhook_includes'] = polyhook_includes
+env['polyhook_includes'] = [ polyhook_includes ]
 
 polyhook = env.StaticLibrary(target = 'polyhook',
     source = sources,
diff --git a/libs/polyhook2.0/polyhook2/ADisassembler.hpp b/libs/polyhook2.0/polyhook2/ADisassembler.hpp
index 1e8fa8b..d71c6e1 100644
--- a/libs/polyhook2.0/polyhook2/ADisassembler.hpp
+++ b/libs/polyhook2.0/polyhook2/ADisassembler.hpp
@@ -8,6 +8,7 @@
 #include "polyhook2/ErrorLog.hpp"
 #include "polyhook2/Instruction.hpp"
 #include "polyhook2/Enums.hpp"
+#include "polyhook2/MemAccessor.hpp"
 
 #include 
 #include 
@@ -31,11 +32,11 @@ class ADisassembler {
 	 * @param Start: The address of the code buffer
 	 * @param End: The address of the end of the code buffer
 	 * **/
-	virtual insts_t disassemble(uint64_t firstInstruction, uint64_t start, uint64_t end) = 0;
+	virtual insts_t disassemble(uint64_t firstInstruction, uint64_t start, uint64_t end, const MemAccessor& accessor) = 0;
 
-	static void writeEncoding(const PLH::insts_t& instructions) {
+	static void writeEncoding(const PLH::insts_t& instructions, const MemAccessor& accessor) {
 		for (const auto& inst : instructions)
-			writeEncoding(inst);
+			writeEncoding(inst, accessor);
 	}
 
 	/**Write the raw bytes of the given instruction into the memory specified by the
@@ -45,8 +46,9 @@ class ADisassembler {
 	* first modify the byte array, and then call write encoding, proper order to relocate
 	* an instruction should be disasm instructions -> set relative/absolute displacement() ->
 	**/
-	static void writeEncoding(const Instruction& instruction) {
-		memcpy((void*)instruction.getAddress(), &instruction.getBytes()[0], instruction.size());
+	static void writeEncoding(const Instruction& instruction, const MemAccessor& accessor) {
+		assert(instruction.size() <= instruction.getBytes().size());
+		accessor.mem_copy(instruction.getAddress(), (uint64_t)&instruction.getBytes()[0], instruction.size());
 	}
 
 	static bool isConditionalJump(const PLH::Instruction& instruction) {
@@ -84,7 +86,16 @@ class ADisassembler {
 		* 0xFDFDFDFD : Used by Microsoft's C++ debugging heap to mark "no man's land" guard bytes before and after allocated heap memory
 		* 0xFEEEFEEE : Used by Microsoft's HeapFree() to mark freed heap memory
 		*/
-		return instruction.getMnemonic() == "ret";
+		std::string mnemonic = instruction.getMnemonic();
+		auto byts = instruction.getBytes();
+		return (instruction.size() == 1 && byts[0] == 0xCC) || 
+			(instruction.size() >= 2 && byts[0] == 0xf3 && byts[1] == 0xc3) ||
+			mnemonic == "ret" || mnemonic == "jmp" || mnemonic.find("iret") == 0;
+	}
+
+	static bool isPadBytes(const PLH::Instruction& instruction) {
+		// supports multi-byte nops
+		return instruction.getMnemonic() == "nop";
 	}
 
 	branch_map_t getBranchMap() {
@@ -95,7 +106,7 @@ class ADisassembler {
 	{
 		if (inst.isBranching()) {
 			// search back, check if new instruction points to older ones (one to one)
-			auto destInst = std::find_if(insVec.begin(), insVec.end(), [=] (const Instruction& oldIns) {
+			auto destInst = std::find_if(insVec.begin(), insVec.end(), [&] (const Instruction& oldIns) {
 				return oldIns.getAddress() == inst.getDestination();
 			});
 
diff --git a/libs/polyhook2.0/polyhook2/CapstoneDisassembler.hpp b/libs/polyhook2.0/polyhook2/CapstoneDisassembler.hpp
index 13ea511..a4e92ec 100644
--- a/libs/polyhook2.0/polyhook2/CapstoneDisassembler.hpp
+++ b/libs/polyhook2.0/polyhook2/CapstoneDisassembler.hpp
@@ -23,7 +23,7 @@ class CapstoneDisassembler : public ADisassembler {
 	virtual ~CapstoneDisassembler();
 
 	virtual std::vector
-		disassemble(uint64_t firstInstruction, uint64_t start, uint64_t end) override;
+		disassemble(uint64_t firstInstruction, uint64_t start, uint64_t end, const MemAccessor& accessor) override;
 private:
 	x86_reg getIpReg() const {
 		if (m_mode == PLH::Mode::x64)
diff --git a/libs/polyhook2.0/polyhook2/Detour/ADetour.hpp b/libs/polyhook2.0/polyhook2/Detour/ADetour.hpp
index 033bb72..bb76494 100644
--- a/libs/polyhook2.0/polyhook2/Detour/ADetour.hpp
+++ b/libs/polyhook2.0/polyhook2/Detour/ADetour.hpp
@@ -49,6 +49,8 @@ class Detour : public PLH::IHook {
 public:
 	Detour(const uint64_t fnAddress, const uint64_t fnCallback, uint64_t* userTrampVar, PLH::ADisassembler& dis) : m_disasm(dis) {
 		assert(fnAddress != 0 && fnCallback != 0);
+		assert(sizeof(*userTrampVar) == sizeof(uint64_t) && "Given trampoline holder to small");
+
 		m_fnAddress = fnAddress;
 		m_fnCallback = fnCallback;
 		m_trampoline = NULL;
@@ -59,6 +61,8 @@ class Detour : public PLH::IHook {
 
 	Detour(const char* fnAddress, const char* fnCallback, uint64_t* userTrampVar, PLH::ADisassembler& dis) : m_disasm(dis) {
 		assert(fnAddress != nullptr && fnCallback != nullptr);
+		assert(sizeof(*userTrampVar) == sizeof(uint64_t) && "Given trampoline holder to small");
+
 		m_fnAddress = (uint64_t)fnAddress;
 		m_fnCallback = (uint64_t)fnCallback;
 		m_trampoline = NULL;
@@ -67,10 +71,21 @@ class Detour : public PLH::IHook {
 		m_userTrampVar = userTrampVar;
 	}
 
-	virtual ~Detour() = default;
+	virtual ~Detour() {
+		if (m_hooked) {
+			unHook();
+		}
+	}
 
 	virtual bool unHook() override;
 
+	/**
+	This is for restoring hook bytes if a 3rd party uninstalled them.
+	DO NOT call this after unHook(). This may only be called after hook() 
+	but before unHook()
+	**/
+	virtual bool reHook() override;
+
 	virtual HookType getType() const override {
 		return HookType::Detour;
 	}
@@ -86,6 +101,14 @@ class Detour : public PLH::IHook {
 
 	PLH::insts_t			m_originalInsts;
 
+	/*Save the instructions used for the hook so that we can re-write in rehook()
+	Note: There's a nop range we store too so that it doesn't need to be re-calculated
+	*/
+	PLH::insts_t            m_hookInsts;
+	uint16_t                m_nopProlOffset;
+	uint16_t                m_nopSize;
+	uint32_t                m_hookSize;
+
 	/**Walks the given vector of instructions and sets roundedSz to the lowest size possible that doesn't split any instructions and is greater than minSz.
 	If end of function is encountered before this condition an empty optional is returned. Returns instructions in the range start to adjusted end**/
 	std::optional calcNearestSz(const insts_t& functionInsts, const uint64_t minSz,
@@ -94,7 +117,7 @@ class Detour : public PLH::IHook {
 	/**If function starts with a jump follow it until the first non-jump instruction, recursively. This handles already hooked functions
 	and also compilers that emit jump tables on function call. Returns true if resolution was successful (nothing to resolve, or resolution worked),
 	false if resolution failed.**/
-	bool followJmp(insts_t& functionInsts, const uint8_t curDepth = 0, const uint8_t depth = 3);
+	bool followJmp(insts_t& functionInsts, const uint8_t curDepth = 0, const uint8_t depth = 5);
 
 	/**Expand the prologue up to the address of the last jmp that points back into the prologue. This
 	is necessary because we modify the location of things in the prologue, so re-entrant jmps point
@@ -110,7 +133,11 @@ class Detour : public PLH::IHook {
 	template
 	PLH::insts_t relocateTrampoline(insts_t& prologue, uint64_t jmpTblStart, const int64_t delta, const uint8_t jmpSz, MakeJmpFn makeJmp, const PLH::insts_t& instsNeedingReloc, const PLH::insts_t& instsNeedingEntry);
 
-	bool                    m_hooked;
+	/**
+	Insert nops from [Base, Base+size). We _MUST_ insert multi-byte nops so we don't accidentally
+	confused our code cave finder for x64
+	**/
+	void writeNop(uint64_t base, uint32_t size);
 };
 
 template
@@ -123,13 +150,13 @@ PLH::insts_t PLH::Detour::relocateTrampoline(insts_t& prologue, uint64_t jmpTblS
 			assert(inst.hasDisplacement());
 			// make an entry pointing to where inst did point to
 			auto entry = makeJmp(jmpTblCurAddr, inst.getDestination());
-
+			
 			// move inst to trampoline and point instruction to entry
 			inst.setAddress(inst.getAddress() + delta);
 			inst.setDestination(jmpTblCurAddr);
 			jmpTblCurAddr += jmpSz;
 
-			m_disasm.writeEncoding(entry);
+			m_disasm.writeEncoding(entry, *this);
 			jmpTblEntries.insert(jmpTblEntries.end(), entry.begin(), entry.end());
 		} else if (std::find(instsNeedingReloc.begin(), instsNeedingReloc.end(), inst) != instsNeedingReloc.end()) {
 			assert(inst.hasDisplacement());
@@ -141,7 +168,7 @@ PLH::insts_t PLH::Detour::relocateTrampoline(insts_t& prologue, uint64_t jmpTblS
 			inst.setAddress(inst.getAddress() + delta);
 		}
 
-		m_disasm.writeEncoding(inst);
+		m_disasm.writeEncoding(inst, *this);
 	}
 	return jmpTblEntries;
 }
diff --git a/libs/polyhook2.0/polyhook2/Detour/ILCallback.hpp b/libs/polyhook2.0/polyhook2/Detour/ILCallback.hpp
index 1431524..300cb13 100644
--- a/libs/polyhook2.0/polyhook2/Detour/ILCallback.hpp
+++ b/libs/polyhook2.0/polyhook2/Detour/ILCallback.hpp
@@ -1,8 +1,7 @@
 #ifndef POLYHOOK_2_0_ILCALLBACK_HPP
 #define POLYHOOK_2_0_ILCALLBACK_HPP
 
-#pragma warning( push )
-#pragma warning( disable : 4245)
+#pragma warning(push, 0)  
 #include 
 #pragma warning( pop )
 
@@ -53,12 +52,12 @@ namespace PLH {
 		/* Construct a callback given the raw signature at runtime. 'Callback' param is the C stub to transfer to,
 		where parameters can be modified through a structure which is written back to the parameter slots depending
 		on calling convention.*/
-		uint64_t getJitFunc(const asmjit::FuncSignature& sig, const tUserCallback callback);
+		uint64_t getJitFunc(const asmjit::FuncSignature& sig, const asmjit::Environment::Arch arch, const tUserCallback callback);
 
 		/* Construct a callback given the typedef as a string. Types are any valid C/C++ data type (basic types), and pointers to
 		anything are just a uintptr_t. Calling convention is defaulted to whatever is typical for the compiler you use, you can override with
 		stdcall, fastcall, or cdecl (cdecl is default on x86). On x64 those map to the same thing.*/
-		uint64_t getJitFunc(const std::string& retType, const std::vector& paramTypes, const tUserCallback callback, std::string callConv = "");
+		uint64_t getJitFunc(const std::string& retType, const std::vector& paramTypes, const asmjit::Environment::Arch arch, const tUserCallback callback, std::string callConv = "");
 		uint64_t* getTrampolineHolder();
 	protected:
 		// does a given type fit in a general purpose register (i.e. is it integer type)
@@ -66,13 +65,16 @@ namespace PLH {
 		// float, double, simd128
 		bool isXmmReg(const uint8_t typeId) const;
 
-		asmjit::CallConv::Id getCallConv(const std::string& conv);
-		uint8_t getTypeId(const std::string& type);
+    public:
+		static asmjit::CallConv::Id getCallConv(const std::string& conv);
+		static uint8_t getTypeId(const std::string& type);
 
+    protected:
 		PageAllocator m_mem;
 		uint64_t m_callbackBuf;
 		asmjit::x86::Mem argsStack;
 
+    private:
 		// ptr to trampoline allocated by hook, we hold this so user doesn't need to.
 		uint64_t m_trampolinePtr;
 	};
diff --git a/libs/polyhook2.0/polyhook2/Detour/PyCallback.hpp b/libs/polyhook2.0/polyhook2/Detour/PyCallback.hpp
index 19a8933..d01d9b4 100644
--- a/libs/polyhook2.0/polyhook2/Detour/PyCallback.hpp
+++ b/libs/polyhook2.0/polyhook2/Detour/PyCallback.hpp
@@ -6,29 +6,31 @@
 #include 
 #include 
 
-namespace PLH {
+namespace PLH
+{
 
-class PyCallback : public ILCallback {
-public:
-	using tUserCallback = void(*)(const uint32_t unique_id, const Parameters* params, const uint8_t count, const ReturnValue* ret);
+    class PyCallback : public ILCallback
+    {
+    public:
+        using tUserCallback = void (*)(const uint32_t unique_id, const Parameters *params, const uint8_t count, const ReturnValue *ret);
 
-	PyCallback();
-	~PyCallback();
+        PyCallback() = default;
+        ~PyCallback() = default;
 
-	/* Construct a callback given the raw signature at runtime. 'Callback' param is the C stub to transfer to,
-	where parameters can be modified through a structure which is written back to the parameter slots depending
-	on calling convention.*/
-	uint64_t getJitFunc(const uint32_t unique_id, const asmjit::FuncSignature& sig, const tUserCallback callback, bool use_trampoline=false);
+        uint64_t getJitFunc(const uint32_t unique_id, const std::string &retType,
+                            const std::vector ¶mTypes, const tUserCallback callback,
+                            std::string callConv /* = ""*/);
 
-	/* Construct a callback given the typedef as a string. Types are any valid C/C++ data type (basic types), and pointers to
-	anything are just a uintptr_t. Calling convention is defaulted to whatever is typical for the compiler you use, you can override with
-	stdcall, fastcall, or cdecl (cdecl is default on x86). On x64 those map to the same thing.*/
-	uint64_t getJitFunc(const uint32_t unique_id, const std::string& retType, const std::vector& paramTypes, const tUserCallback callback, std::string callConv = "", bool use_trampoline=false);
+        // Construct a callback given the typedef as a string. Types are any valid C/C++ data type (basic types), and pointers to
+        // anything are just a uintptr_t. Calling convention is defaulted to whatever is typical for the compiler you use, you can override with
+        // stdcall, fastcall, or cdecl (cdecl is default on x86). On x64 those map to the same thing.
+        uint64_t getJitFunc(const uint32_t unique_id, const asmjit::FuncSignature &sig, const asmjit::Environment::Arch arch,
+                            const tUserCallback callback, bool use_trampoline = false);
 
-protected:
-	asmjit::CallConv::Id getCallConv(const std::string& conv);
+        uint64_t getJitFunc(const uint32_t native_address, const asmjit::FuncSignature &sig, const asmjit::Environment::Arch arch);
 
-};
+        static asmjit::CallConv::Id getCallConv(const std::string &conv);
+    };
 
 } // namespace PLH
 
diff --git a/libs/polyhook2.0/polyhook2/Detour/x64Detour.hpp b/libs/polyhook2.0/polyhook2/Detour/x64Detour.hpp
index fe22050..c5bac7e 100644
--- a/libs/polyhook2.0/polyhook2/Detour/x64Detour.hpp
+++ b/libs/polyhook2.0/polyhook2/Detour/x64Detour.hpp
@@ -5,9 +5,8 @@
 #ifndef POLYHOOK_2_X64DETOUR_HPP
 #define POLYHOOK_2_X64DETOUR_HPP
 
-#include 
-#include 
 #include 
+#include 
 using namespace std::placeholders;
 
 #include "polyhook2/Detour/ADetour.hpp"
@@ -15,7 +14,6 @@ using namespace std::placeholders;
 #include "polyhook2/Instruction.hpp"
 #include "polyhook2/ADisassembler.hpp"
 #include "polyhook2/ErrorLog.hpp"
-#include "polyhook2/MemProtector.hpp"
 
 namespace PLH {
 
@@ -34,6 +32,10 @@ class x64Detour : public Detour {
 	uint8_t getPrefJmpSize() const;
 private:
 	bool makeTrampoline(insts_t& prologue, insts_t& trampolineOut);
+
+	// assumes we are looking within a +-2GB window
+	template
+	std::optional findNearestCodeCave(uint64_t addr);
 };
 }
 #endif //POLYHOOK_2_X64DETOUR_HPP
diff --git a/libs/polyhook2.0/polyhook2/Enums.hpp b/libs/polyhook2.0/polyhook2/Enums.hpp
index 7b1ea96..d1532e3 100644
--- a/libs/polyhook2.0/polyhook2/Enums.hpp
+++ b/libs/polyhook2.0/polyhook2/Enums.hpp
@@ -19,6 +19,7 @@ enum class HookType {
 	UNKNOWN
 };
 
+
 //unsafe enum by design to allow binary OR
 enum ProtFlag : std::uint8_t {
 	UNSET = 0, // Value means this give no information about protection state (un-read)
diff --git a/libs/polyhook2.0/polyhook2/ErrorLog.hpp b/libs/polyhook2.0/polyhook2/ErrorLog.hpp
index 87578c2..63ea5fe 100644
--- a/libs/polyhook2.0/polyhook2/ErrorLog.hpp
+++ b/libs/polyhook2.0/polyhook2/ErrorLog.hpp
@@ -4,64 +4,49 @@
 #include 
 #include 
 #include 
+#include 
 #include "polyhook2/Enums.hpp"
 
 namespace PLH {
 
-struct Error {
-	std::string msg;
-	ErrorLevel lvl;
+// abstract base class for logging, clients should subclass this to intercept log messages
+class Logger
+{
+public:
+	virtual void log(std::string msg, ErrorLevel level) = 0;
+	virtual ~Logger() {};
 };
 
-class ErrorLog {
+// class for registering client loggers
+class Log
+{
+private:
+	static std::shared_ptr m_logger;
 public:
-	void setLogLevel(ErrorLevel level) {
-		m_logLevel = level;
-	}
-	
-	void push(std::string msg, ErrorLevel level) {
-			push({ std::move(msg), level });
-	}
+	static void registerLogger(std::shared_ptr logger);
+	static void log(std::string msg, ErrorLevel level);
+};
 
-	void push(Error err) {
-		if (err.lvl >= m_logLevel) { 
-			switch (err.lvl) {
-			case ErrorLevel::INFO:
-				std::cout << "[+] Info: " << err.msg << std::endl;
-				break;
-			case ErrorLevel::WARN:
-				std::cout << "[!] Warn: " << err.msg << std::endl;
-				break;
-			case ErrorLevel::SEV:
-				std::cout << "[!] Error: " << err.msg << std::endl;
-				break;
-			default:
-				std::cout << "Unsupported error message logged " << err.msg << std::endl;
-			}
-		}
-		
-		m_log.push_back(std::move(err));
-	}
+// simple logger implementation
 
-	Error pop() {
-		Error err{};
-		if (!m_log.empty()) {
-			err = std::move(m_log.back());
-			m_log.pop_back();
-		}
-		return err;
-	}
+struct Error {
+	std::string msg;
+	ErrorLevel lvl;
+};
 
-	static ErrorLog& singleton() {
-		static ErrorLog log;
-		return log;
-	}
+class ErrorLog : public Logger {
+public:
+	void setLogLevel(ErrorLevel level);
+	void log(std::string msg, ErrorLevel level);
+	void push(std::string msg, ErrorLevel level);
+	void push(Error err);
+	Error pop();
+	static ErrorLog& singleton();
 private:
 	std::vector m_log;
 	ErrorLevel m_logLevel = ErrorLevel::INFO;
 };
 
-
 }
 
 #endif
diff --git a/libs/polyhook2.0/polyhook2/EventDispatcher.hpp b/libs/polyhook2.0/polyhook2/EventDispatcher.hpp
new file mode 100644
index 0000000..e0d31a0
--- /dev/null
+++ b/libs/polyhook2.0/polyhook2/EventDispatcher.hpp
@@ -0,0 +1,27 @@
+#pragma once
+#include 
+#include 
+
+namespace PLH {
+template
+class EventDispatcher
+{
+public:
+	typedef std::function Event;
+	void operator+=(const Event& event);
+
+	template
+	typename Event::result_type Invoke(Args&& ...Params)
+	{
+		return m_Event(std::forward(Params)...);
+	}
+private:
+	Event m_Event;
+};
+
+template
+void EventDispatcher::operator+=(const Event& event)
+{
+	m_Event = event;
+}
+}
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook2/Exceptions/AVehHook.hpp b/libs/polyhook2.0/polyhook2/Exceptions/AVehHook.hpp
index 94ecdd1..7353072 100644
--- a/libs/polyhook2.0/polyhook2/Exceptions/AVehHook.hpp
+++ b/libs/polyhook2.0/polyhook2/Exceptions/AVehHook.hpp
@@ -2,12 +2,13 @@
 #define POLYHOOK_2_0_VEHHOOK_HPP
 
 #include 
-#include 
+#include 
 
 #include "polyhook2/MemProtector.hpp"
 #include "polyhook2/ErrorLog.hpp"
 #include "polyhook2/IHook.hpp"
 #include "polyhook2/Enums.hpp"
+#include "polyhook2/EventDispatcher.hpp"
 
 namespace PLH {
 
@@ -22,6 +23,42 @@ class RefCounter {
 	uint16_t m_count = 0;
 };
 
+enum class AVehHookImpType {
+	SINGLE, // will exception occur at one address (end address ignored)
+	RANGE // will exception occur over a potential range
+};
+
+class AVehHook;
+struct AVehHookImpEntry {
+	uint64_t startAddress; // start address impl applies to
+	uint64_t endAddress; // end address impl applies to
+	AVehHook* impl; // the instance to forward to
+	AVehHookImpType type;
+
+	AVehHookImpEntry(uint64_t start, AVehHook* imp) {
+		startAddress = start;
+		endAddress = 0;
+		impl = imp;
+		type = AVehHookImpType::SINGLE;
+	}
+
+	AVehHookImpEntry(uint64_t start, uint64_t end, AVehHook* imp) {
+		startAddress = start;
+		endAddress = end;
+		impl = imp;
+		type = AVehHookImpType::RANGE;
+	}
+};
+
+inline bool operator==(const AVehHookImpEntry& lhs, const AVehHookImpEntry& rhs)
+{
+	return lhs.type == rhs.type && lhs.startAddress == rhs.startAddress && lhs.endAddress == rhs.endAddress;
+}
+
+
+
+
+typedef EventDispatcher eException;
 class AVehHook : public IHook {
 public:
 	AVehHook();
@@ -30,15 +67,33 @@ class AVehHook : public IHook {
 	virtual HookType getType() const {
 		return HookType::VEHHOOK;
 	}
+
+	/**If true is returned**/
+	static eException& EventException();
+	static eException& EventUnhandledException();
 protected:
 	// May not allocate or acquire synchonization objects in this
 	virtual LONG OnException(EXCEPTION_POINTERS* ExceptionInfo) = 0;
 
 	static RefCounter m_refCount;
 	static void* m_hHandler;
-	static std::map m_impls;
+	static std::unordered_set m_impls;
 	static LONG CALLBACK Handler(EXCEPTION_POINTERS* ExceptionInfo);
+	static eException m_onException;
+	static eException m_onUnhandledException;
 };
 }
 
+namespace std {
+	template<> struct hash
+	{
+		std::size_t operator()(const PLH::AVehHookImpEntry& e) const noexcept
+		{
+			auto h1 = std::hash{}(e.startAddress);
+			auto h2 = std::hash{}(e.endAddress);
+			return h1 ^ (h2 << 1);
+		}
+	};
+}
+
 #endif
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook2/Exceptions/BreakPointHook.hpp b/libs/polyhook2.0/polyhook2/Exceptions/BreakPointHook.hpp
index 431c7ee..b36fafa 100644
--- a/libs/polyhook2.0/polyhook2/Exceptions/BreakPointHook.hpp
+++ b/libs/polyhook2.0/polyhook2/Exceptions/BreakPointHook.hpp
@@ -12,12 +12,17 @@ class BreakPointHook : public AVehHook {
 public:
 	BreakPointHook(const uint64_t fnAddress, const uint64_t fnCallback);
 	BreakPointHook(const char* fnAddress, const char* fnCallback);
-	~BreakPointHook();
+	~BreakPointHook() {
+		m_impls.erase(AVehHookImpEntry(m_fnAddress, this));
+		if (m_hooked) {
+			unHook();
+		}
+	}
 
 	virtual bool hook() override;
 	virtual bool unHook() override;
 	auto getProtectionObject() {
-		return finally([=] () {
+		return finally([&] () {
 			hook();
 		});
 	}
diff --git a/libs/polyhook2.0/polyhook2/Exceptions/HWBreakPointHook.hpp b/libs/polyhook2.0/polyhook2/Exceptions/HWBreakPointHook.hpp
index 4e7228c..7c0e0cc 100644
--- a/libs/polyhook2.0/polyhook2/Exceptions/HWBreakPointHook.hpp
+++ b/libs/polyhook2.0/polyhook2/Exceptions/HWBreakPointHook.hpp
@@ -12,12 +12,17 @@ class HWBreakPointHook : public AVehHook {
 public:
 	HWBreakPointHook(const uint64_t fnAddress, const uint64_t fnCallback, HANDLE hThread);
 	HWBreakPointHook(const char* fnAddress, const char* fnCallback, HANDLE hThread);
-	~HWBreakPointHook();
+	~HWBreakPointHook() {
+		m_impls.erase(AVehHookImpEntry(m_fnAddress, this));
+		if (m_hooked) {
+			unHook();
+		}
+	}
 
 	virtual bool hook() override;
 	virtual bool unHook() override;
 	auto getProtectionObject() {
-		return finally([=] () {
+		return finally([&] () {
 			hook();
 		});
 	}
diff --git a/libs/polyhook2.0/polyhook2/IHook.hpp b/libs/polyhook2.0/polyhook2/IHook.hpp
index b7c0c48..4b61420 100644
--- a/libs/polyhook2.0/polyhook2/IHook.hpp
+++ b/libs/polyhook2.0/polyhook2/IHook.hpp
@@ -8,6 +8,12 @@
 
 #include "polyhook2/ADisassembler.hpp"
 #include "polyhook2/Enums.hpp"
+#include "polyhook2/MemAccessor.hpp"
+
+#include 
+#include 
+#include 
+
 
 #if defined(__clang__)
 #define NOINLINE __attribute__((noinline))
@@ -28,10 +34,11 @@ _Pragma("GCC optimize (\"O0\")")
 #define PH_UNUSED(a) (void)a
 
 namespace PLH {
-class IHook {
+class IHook : public MemAccessor {
 public:
 	IHook() {
 		m_debugSet = false;
+		m_hooked = false;
 	}
 
 	IHook(IHook&& other) = default; //move
@@ -44,6 +51,11 @@ class IHook {
 
 	virtual bool unHook() = 0;
 
+	// this is allowed to be nothing by default
+	virtual bool reHook() {
+		return true;
+	}
+
 	virtual HookType getType() const = 0;
 
 	virtual void setDebug(const bool state) {
@@ -52,6 +64,80 @@ class IHook {
 
 protected:
 	bool m_debugSet;
+	bool m_hooked;
+};
+
+//Thanks @_can1357 for help with this.
+template
+struct callback_type { using type = T; };
+
+template
+using callback_type_t = typename callback_type::type;
+
+template
+using callback_type_v = typename callback_type::type;
+
+#define MAKE_CALLBACK_IMPL(CCFROM, CCTO) template \
+auto make_callback(Ret(CCFROM*)(Args...), F&& f) \
+{ \
+    Ret(CCTO * fn)(Args...) = f; \
+    return fn; \
+} \
+template \
+struct callback_type \
+{ \
+    using type = Ret(CCTO*)(Args...); \
+};
+
+// switch to __VA_OPT__ when C++ 2a release. MSVC removes comma before empty __VA_ARGS__ as is.
+// https://devblogs.microsoft.com/cppblog/msvc-preprocessor-progress-towards-conformance/
+#define MAKE_CALLBACK_CLASS_IMPL(CCFROM, CCTO, ...) template \
+auto make_callback(Ret(CCFROM Class::*)(Args...), F&& f) \
+{ \
+    Ret(CCTO * fn)(Class*, ## __VA_ARGS__, Args...) = f; \
+    return fn; \
+} \
+template \
+struct callback_type \
+{ \
+    using type = Ret(CCTO*)(Class*, ## __VA_ARGS__, Args...); \
 };
+
+#ifndef _WIN64 
+MAKE_CALLBACK_IMPL(__stdcall, __stdcall)
+MAKE_CALLBACK_CLASS_IMPL(__stdcall, __stdcall)
+
+MAKE_CALLBACK_IMPL(__cdecl, __cdecl)
+MAKE_CALLBACK_CLASS_IMPL(__cdecl, __cdecl)
+
+MAKE_CALLBACK_IMPL(__thiscall, __thiscall)
+MAKE_CALLBACK_CLASS_IMPL(__thiscall, __fastcall, char*)
+#endif
+
+#ifndef __GNUC__
+MAKE_CALLBACK_IMPL(__fastcall, __fastcall)
+MAKE_CALLBACK_CLASS_IMPL(_fastcall, __fastcall)
+#endif
+
+template 
+decltype(auto) get_pack_idx(Ts&&... ts) {
+	return std::get(std::forward_as_tuple(ts...));
+}
 }
+
+/**
+Creates a hook callback function pointer that matches the type of a given function definition. The name variable
+will be a pointer to the function, and the variables _args... and name_t will be created to represent the original
+arguments of the function and the type of the callback respectively.
+**/
+#define HOOK_CALLBACK(pType, name, body) typedef PLH::callback_type_t ##name##_t; \
+PLH::callback_type_t name = PLH::make_callback(pType, [](auto... _args) body )
+
+/**
+When using the HOOK_CALLBACK macro this helper utility can be used to retreive one of the original
+arguments by index. The type and value will exactly match that of the original function at that index.
+for member functions this is essentially 1's indexed because first param is this*
+**/
+#define GET_ARG(idx) PLH::get_pack_idx(_args...)
+
 #endif //POLYHOOK_2_0_IHOOK_HPP
diff --git a/libs/polyhook2.0/polyhook2/Instruction.hpp b/libs/polyhook2.0/polyhook2/Instruction.hpp
index d3719aa..b9fa6e3 100644
--- a/libs/polyhook2.0/polyhook2/Instruction.hpp
+++ b/libs/polyhook2.0/polyhook2/Instruction.hpp
@@ -28,30 +28,32 @@ class Instruction {
 				const Displacement& displacement,
 				const uint8_t displacementOffset,
 				const bool isRelative,
+		        const bool isIndirect,
 				const std::vector& bytes,
 				const std::string& mnemonic,
 				const std::string& opStr,
 				Mode mode) : m_uid(UID::singleton()) {
 
-		Init(address, displacement, displacementOffset, isRelative, bytes, mnemonic, opStr, false, m_uid, mode);
+		Init(address, displacement, displacementOffset, isRelative, isIndirect, bytes, mnemonic, opStr, false, m_uid, mode);
 	}
 
 	Instruction(uint64_t address,
 				const Displacement& displacement,
 				const uint8_t displacementOffset,
-				bool isRelative,
+				const bool isRelative,
+		        const bool isIndirect,
 				uint8_t bytes[],
-				size_t arrLen,
+				const size_t arrLen,
 				const std::string& mnemonic,
 				const std::string& opStr,
 				Mode mode) : m_uid(UID::singleton()) {
 
 		std::vector Arr(bytes, bytes + arrLen);
-		Init(address, displacement, displacementOffset, isRelative, Arr, mnemonic, opStr, false, m_uid, mode);
+		Init(address, displacement, displacementOffset, isRelative, isIndirect, Arr, mnemonic, opStr, false, m_uid, mode);
 	}
 
 	Instruction& operator=(const Instruction& rhs) {
-		Init(rhs.m_address, rhs.m_displacement, rhs.m_dispOffset, rhs.m_isRelative,
+		Init(rhs.m_address, rhs.m_displacement, rhs.m_dispOffset, rhs.m_isRelative, rhs.m_isIndirect,
 			 rhs.m_bytes, rhs.m_mnemonic, rhs.m_opStr, rhs.m_hasDisplacement, rhs.m_uid, rhs.m_mode);
 		return *this;
 	}
@@ -60,11 +62,22 @@ class Instruction {
 	* @Notes: Handles eip/rip & immediate branches correctly
 	* **/
 	uint64_t getDestination() const {
+		uint64_t dest = 0;
 		if (isDisplacementRelative()) {
-			uint64_t dest = m_address + m_displacement.Relative + size();
-			return dest;
+			dest = m_address + m_displacement.Relative + size();
+		} else {
+			dest = m_displacement.Absolute;
+		}
+
+		// ff 25 00 00 00 00 goes from jmp qword ptr [rip + 0] to jmp word ptr [rip + 0] on x64 -> x86
+		if (m_isIndirect) {
+			if (m_mode == Mode::x64) {
+				dest = *(uint64_t*)dest;
+			} else {
+				dest = *(uint32_t*)dest;
+			}
 		}
-		return m_displacement.Absolute;
+		return dest;
 	}
 
 	void setDestination(const uint64_t dest) {
@@ -165,12 +178,12 @@ class Instruction {
 		m_hasDisplacement = true;
 
 		const uint32_t dispSz = (uint32_t)(size() - getDisplacementOffset());
-		if (getDisplacementOffset() + dispSz > m_bytes.size() || dispSz > sizeof(m_displacement.Relative)) {
+		if (((uint32_t)getDisplacementOffset()) + dispSz > m_bytes.size() || dispSz > sizeof(m_displacement.Relative)) {
 			__debugbreak();
 			return;
 		}
 
-		assert(getDisplacementOffset() + dispSz <= m_bytes.size() && dispSz <= sizeof(m_displacement.Relative));
+		assert((uint32_t)getDisplacementOffset() + dispSz <= m_bytes.size() && dispSz <= sizeof(m_displacement.Relative));
 		std::memcpy(&m_bytes[getDisplacementOffset()], &m_displacement.Relative, dispSz);
 	}
 
@@ -183,12 +196,12 @@ class Instruction {
 		m_hasDisplacement = true;
 
 		const uint32_t dispSz = (uint32_t)(size() - getDisplacementOffset());
-		if (getDisplacementOffset() + dispSz > m_bytes.size() || dispSz > sizeof(m_displacement.Absolute)) {
+		if (((uint32_t)getDisplacementOffset()) + dispSz > m_bytes.size() || dispSz > sizeof(m_displacement.Absolute)) {
 			__debugbreak();
 			return;
 		}
 
-		assert(getDisplacementOffset() + dispSz <= m_bytes.size() && dispSz <= sizeof(m_displacement.Absolute));
+		assert(((uint32_t)getDisplacementOffset()) + dispSz <= m_bytes.size() && dispSz <= sizeof(m_displacement.Absolute));
 		std::memcpy(&m_bytes[getDisplacementOffset()], &m_displacement.Absolute, dispSz);
 	}
 
@@ -202,11 +215,21 @@ class Instruction {
 			return (T)(0 - (from - to) - insSize);
 		return (T)(to - (from + insSize));
 	}
+
+	void setIndirect(const bool isIndirect) {
+		m_isIndirect = isIndirect;
+	}
+
+	bool         m_isRelative;      // Does the displacement need to be added to the address to retrieve where it points too?
+	bool         m_hasDisplacement; // Does this instruction have the displacement fields filled (only rip/eip relative types are filled)
+	bool		 m_isBranching;     // Does this instrunction jmp/call or otherwise change control flow
+	bool         m_isIndirect;      // Does this instruction get it's destination via an indirect mem read (ff 25 ... jmp [jmp_dest]) (only filled for jmps / calls)
 private:
 	void Init(const uint64_t address,
 			  const Displacement& displacement,
 			  const uint8_t displacementOffset,
 			  const bool isRelative,
+		      const bool isIndirect,
 			  const std::vector& bytes,
 			  const std::string& mnemonic,
 			  const std::string& opStr,
@@ -217,6 +240,7 @@ class Instruction {
 		m_displacement = displacement;
 		m_dispOffset = displacementOffset;
 		m_isRelative = isRelative;
+		m_isIndirect = isIndirect;
 		m_hasDisplacement = hasDisp;
 
 		m_bytes = bytes;
@@ -227,12 +251,9 @@ class Instruction {
 		m_mode = mode;
 	}
 
-	uint64_t     m_address;       //Address the instruction is at
-	Displacement m_displacement;  //Where an instruction points too (valid for jmp + call types)
-	uint8_t      m_dispOffset;    //Offset into the byte array where displacement is encoded
-	bool         m_isRelative;    //Does the displacement need to be added to the address to retrieve where it points too?
-	bool         m_hasDisplacement; //Does this instruction have the displacement fields filled (only rip/eip relative types are filled)
-	bool		 m_isBranching; //Does this instrunction jmp/call or otherwise change control flow
+	uint64_t     m_address;         // Address the instruction is at
+	Displacement m_displacement;    // Where an instruction points too (valid for jmp + call types)
+	uint8_t      m_dispOffset;      // Offset into the byte array where displacement is encoded
 
 	std::vector m_bytes; //All the raw bytes of this instruction
 	std::string          m_mnemonic; //If you don't know what these two are then gtfo of this source code :)
@@ -305,6 +326,7 @@ inline PLH::insts_t makex64PreferredJump(const uint64_t address, const uint64_t
 		zeroDisp,
 		0,
 		false,
+		false,
 		raxBytes,
 		"push",
 		"rax", Mode::x64);
@@ -319,17 +341,17 @@ inline PLH::insts_t makex64PreferredJump(const uint64_t address, const uint64_t
 	movRaxBytes[1] = 0xB8;
 	memcpy(&movRaxBytes[2], &destination, 8);
 
-	Instruction movRax(curInstAddress, zeroDisp, 0, false,
+	Instruction movRax(curInstAddress, zeroDisp, 0, false, false,
 		movRaxBytes, "mov", "rax, " + ss.str(), Mode::x64);
 	curInstAddress += movRax.size();
 
 	std::vector xchgBytes = { 0x48, 0x87, 0x04, 0x24 };
-	Instruction xchgRspRax(curInstAddress, zeroDisp, 0, false,
+	Instruction xchgRspRax(curInstAddress, zeroDisp, 0, false, false,
 		xchgBytes, "xchg", "QWORD PTR [rsp],rax", Mode::x64);
 	curInstAddress += xchgRspRax.size();
 
 	std::vector retBytes = { 0xC3 };
-	Instruction ret(curInstAddress, zeroDisp, 0, false,
+	Instruction ret(curInstAddress, zeroDisp, 0, false, false,
 		retBytes, "ret", "", Mode::x64);
 	curInstAddress += ret.size();
 
@@ -347,7 +369,7 @@ inline PLH::insts_t makex64MinimumJump(const uint64_t address, const uint64_t de
 	std::vector destBytes;
 	destBytes.resize(8);
 	memcpy(destBytes.data(), &destination, 8);
-	Instruction specialDest(destHolder, disp, 0, false, destBytes, "dest holder", "", Mode::x64);
+	Instruction specialDest(destHolder, disp, 0, false, false, destBytes, "dest holder", "", Mode::x64);
 
 	std::vector bytes;
 	bytes.resize(6);
@@ -358,7 +380,7 @@ inline PLH::insts_t makex64MinimumJump(const uint64_t address, const uint64_t de
 	std::stringstream ss;
 	ss << std::hex << "[" << destHolder << "] ->" << destination;
 
-	return { Instruction(address, disp, 2, true, bytes, "jmp", ss.str(), Mode::x64),  specialDest };
+	return { Instruction(address, disp, 2, true, true, bytes, "jmp", ss.str(), Mode::x64),  specialDest };
 }
 
 inline PLH::insts_t makex86Jmp(const uint64_t address, const uint64_t destination) {
@@ -372,7 +394,7 @@ inline PLH::insts_t makex86Jmp(const uint64_t address, const uint64_t destinatio
 	std::stringstream ss;
 	ss << std::hex << destination;
 
-	return { Instruction(address, disp, 1, true, bytes, "jmp", ss.str(), Mode::x86) };
+	return { Instruction(address, disp, 1, true, false, bytes, "jmp", ss.str(), Mode::x86) };
 }
 
 
diff --git a/libs/polyhook2.0/polyhook2/MemAccessor.hpp b/libs/polyhook2.0/polyhook2/MemAccessor.hpp
new file mode 100644
index 0000000..f64098c
--- /dev/null
+++ b/libs/polyhook2.0/polyhook2/MemAccessor.hpp
@@ -0,0 +1,39 @@
+
+#ifndef POLYHOOK_2_MEMORYACCESSOR_HPP
+#define POLYHOOK_2_MEMORYACCESSOR_HPP
+#include 
+#include 
+#include "polyhook2/Enums.hpp"
+
+namespace PLH {
+	/**
+	Overriding these routines can allow cross-process/cross-arch hooks
+	**/
+	class MemAccessor {
+	public:
+		virtual ~MemAccessor() = default;
+
+		/**
+		Defines a memory read/write routine that may fail ungracefully. It's expected
+		this library will only ever use this routine in cases that are expected to succeed.
+		**/
+		virtual bool mem_copy(uint64_t dest, uint64_t src, uint64_t size) const;
+
+		/**
+		Defines a memory write routine that will not throw exceptions, and can handle potential
+		writes to NO_ACCESS or otherwise innaccessible memory pages. Defaults to writeprocessmemory.
+		Must fail gracefully
+		**/
+		virtual bool safe_mem_write(uint64_t dest, uint64_t src, uint64_t size, size_t& written) const noexcept;
+
+		/**
+		Defines a memory read routine that will not throw exceptions, and can handle potential
+		reads from NO_ACCESS or otherwise innaccessible memory pages. Defaults to readprocessmemory.
+		Must fail gracefully
+		**/
+		virtual bool safe_mem_read(uint64_t src, uint64_t dest, uint64_t size, size_t& read) const noexcept;
+	
+		virtual PLH::ProtFlag mem_protect(uint64_t dest, uint64_t size, PLH::ProtFlag newProtection, bool& status) const;
+	};
+}
+#endif
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook2/MemProtector.hpp b/libs/polyhook2.0/polyhook2/MemProtector.hpp
index 76c5db0..c044bb6 100644
--- a/libs/polyhook2.0/polyhook2/MemProtector.hpp
+++ b/libs/polyhook2.0/polyhook2/MemProtector.hpp
@@ -5,6 +5,7 @@
 #ifndef POLYHOOK_2_MEMORYPROTECTOR_HPP
 #define POLYHOOK_2_MEMORYPROTECTOR_HPP
 
+#include "polyhook2/MemAccessor.hpp"
 #include "polyhook2/Enums.hpp"
 #ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
@@ -19,19 +20,22 @@ PLH::ProtFlag operator|(PLH::ProtFlag lhs, PLH::ProtFlag rhs);
 bool operator&(PLH::ProtFlag lhs, PLH::ProtFlag rhs);
 std::ostream& operator<<(std::ostream& os, const PLH::ProtFlag v);
 
+// prefer enum class over enum
+#pragma warning( disable : 26812)
+
 namespace PLH {
 int	TranslateProtection(const PLH::ProtFlag flags);
 ProtFlag TranslateProtection(const int prot);
 
 class MemoryProtector {
 public:
-	MemoryProtector(const uint64_t address, const uint64_t length, const PLH::ProtFlag prot, bool unsetOnDestroy = true) {
+	MemoryProtector(const uint64_t address, const uint64_t length, const PLH::ProtFlag prot, MemAccessor& accessor, bool unsetOnDestroy = true) : m_accessor(accessor) {
 		m_address = address;
 		m_length = length;
 		unsetLater = unsetOnDestroy;
 
 		m_origProtection = PLH::ProtFlag::UNSET;
-		m_origProtection = protect(address, length, TranslateProtection(prot));
+		m_origProtection = m_accessor.mem_protect(address, length, prot, status);
 	}
 
 	PLH::ProtFlag originalProt() {
@@ -46,17 +50,11 @@ class MemoryProtector {
 		if (m_origProtection == PLH::ProtFlag::UNSET || !unsetLater)
 			return;
 
-		protect(m_address, m_length, TranslateProtection(m_origProtection));
+		m_accessor.mem_protect(m_address, m_length, m_origProtection, status);
 	}
 private:
-	PLH::ProtFlag protect(const uint64_t address, const uint64_t length, int prot) {
-		DWORD orig;
-		DWORD dwProt = prot;
-		status = VirtualProtect((char*)address, (SIZE_T)length, dwProt, &orig) != 0;
-		return TranslateProtection(orig);
-	}
-
 	PLH::ProtFlag m_origProtection;
+	MemAccessor& m_accessor;
 
 	uint64_t m_address;
 	uint64_t m_length;
diff --git a/libs/polyhook2.0/polyhook2/Misc.hpp b/libs/polyhook2.0/polyhook2/Misc.hpp
index dd0ba88..8c175ab 100644
--- a/libs/polyhook2.0/polyhook2/Misc.hpp
+++ b/libs/polyhook2.0/polyhook2/Misc.hpp
@@ -8,8 +8,9 @@
 #include 
 #include 
 #include 
-#include 
 #include 
+#include 
+#include 
 
 namespace PLH {
 
@@ -133,6 +134,37 @@ struct ci_wchar_traits : public std::char_traits {
     }
 };
 
+inline bool isMatch(const char* addr, const char* pat, const char* msk)
+{
+	size_t n = 0;
+	while (addr[n] == pat[n] || msk[n] == (uint8_t)'?') {
+		if (!msk[++n]) {
+			return true;
+		}
+	}
+	return false;
+}
+
+#define INRANGE(x,a,b)		(x >= a && x <= b) 
+#define getBits( x )		(INRANGE(x,'0','9') ? (x - '0') : ((x&(~0x20)) - 'A' + 0xa))
+#define getByte( x )		(getBits(x[0]) << 4 | getBits(x[1]))
+
+// https://github.com/learn-more/findpattern-bench/blob/master/patterns/learn_more.h
+// must use space between bytes and ?? for wildcards. Do not add 0x prefix
+uint64_t findPattern(const uint64_t rangeStart, size_t len, const char* pattern);
+uint64_t findPattern_rev(const uint64_t rangeStart, size_t len, const char* pattern);
+
+inline std::string repeat_n(std::string s, size_t n, std::string delim = "") {
+	std::string out = "";
+	for (size_t i = 0; i < n; i++) {
+		out += s;
+		if (i != n - 1) {
+			out += delim;
+		}
+	}
+	return out;
+}
+
 using ci_wstring = std::basic_string;
 using ci_wstring_view = std::basic_string_view;
 
diff --git a/libs/polyhook2.0/polyhook2/PE/EatHook.hpp b/libs/polyhook2.0/polyhook2/PE/EatHook.hpp
index 118ed49..b06624c 100644
--- a/libs/polyhook2.0/polyhook2/PE/EatHook.hpp
+++ b/libs/polyhook2.0/polyhook2/PE/EatHook.hpp
@@ -19,7 +19,14 @@ class EatHook : public IHook {
 public:
 	EatHook(const std::string& apiName, const std::wstring& moduleName, const char* fnCallback, uint64_t* userOrigVar);
 	EatHook(const std::string& apiName, const std::wstring& moduleName, const uint64_t fnCallback, uint64_t* userOrigVar);
-	virtual ~EatHook();
+	virtual ~EatHook() {
+		// trampoline freed by pageallocator dtor
+		if (m_allocator != nullptr) {
+			delete m_allocator;
+			m_allocator = nullptr;
+		}
+	}
+
 	virtual bool hook() override;
 	virtual bool unHook() override;
 	
@@ -42,7 +49,6 @@ class EatHook : public IHook {
 	PageAllocator* m_allocator;
 	uint64_t m_trampoline;
 
-	bool m_hooked;
 	uint64_t m_moduleBase;
 };
 }
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook2/PE/IatHook.hpp b/libs/polyhook2.0/polyhook2/PE/IatHook.hpp
index d6f02cc..d914dee 100644
--- a/libs/polyhook2.0/polyhook2/PE/IatHook.hpp
+++ b/libs/polyhook2.0/polyhook2/PE/IatHook.hpp
@@ -17,7 +17,12 @@ class IatHook : public IHook {
 public:
 	IatHook(const std::string& dllName, const std::string& apiName, const char* fnCallback, uint64_t* userOrigVar, const std::wstring& moduleName);
 	IatHook(const std::string& dllName, const std::string& apiName, const uint64_t fnCallback, uint64_t* userOrigVar, const std::wstring& moduleName);
-	virtual ~IatHook() = default;
+	virtual ~IatHook() {
+		if (m_hooked) {
+			unHook();
+		}
+	}
+
 	virtual bool hook() override;
 	virtual bool unHook() override;
 	virtual HookType getType() const override {
@@ -34,7 +39,5 @@ class IatHook : public IHook {
 	uint64_t m_fnCallback;
 	uint64_t m_origFunc;
 	uint64_t* m_userOrigVar;
-
-	bool m_hooked;
 };
 }
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook2/PageAllocator.hpp b/libs/polyhook2.0/polyhook2/PageAllocator.hpp
index bd97942..7dbe21b 100644
--- a/libs/polyhook2.0/polyhook2/PageAllocator.hpp
+++ b/libs/polyhook2.0/polyhook2/PageAllocator.hpp
@@ -61,6 +61,19 @@ namespace PLH {
 }
 
 inline uint64_t PLH::AllocateWithinRange(const uint64_t pStart, const int64_t Delta) {
+	/**
+	If WIN >= 2004 this can be simplified by using:
+	MEM_ADDRESS_REQUIREMENTS addressReqs = { 0 };
+	MEM_EXTENDED_PARAMETER extendedParams = { 0 };
+	extendedParams.Type = MemExtendedParameterAddressRequirements;
+	extendedParams.Pointer = &addressReqs;
+
+	addressReqs.LowestStartingAddress =
+	addressReqs.HighestEndingAddress =
+
+	VirtualAlloc2(GetCurrentProcess(), NULL, m_trampolineSize, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE, &extendedParams, 1);
+	**/
+
 	/*These lambda's let us use a single for loop for both the forward and backward loop conditions.
 	I passed delta variable as a parameter instead of capturing it because it is faster, it allows
 	the compiler to optimize the lambda into a function pointer rather than constructing
diff --git a/libs/polyhook2.0/polyhook2/Tests/StackCanary.hpp b/libs/polyhook2.0/polyhook2/Tests/StackCanary.hpp
new file mode 100644
index 0000000..261d65c
--- /dev/null
+++ b/libs/polyhook2.0/polyhook2/Tests/StackCanary.hpp
@@ -0,0 +1,12 @@
+#pragma once
+
+namespace PLH {
+	class StackCanary {
+	public:
+		StackCanary();
+		bool isStackGood();
+		~StackCanary() noexcept(false);
+	private:
+		unsigned char buf[50];
+	};
+}
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook2/Tests/TestEffectTracker.hpp b/libs/polyhook2.0/polyhook2/Tests/TestEffectTracker.hpp
index 90b873b..3af668b 100644
--- a/libs/polyhook2.0/polyhook2/Tests/TestEffectTracker.hpp
+++ b/libs/polyhook2.0/polyhook2/Tests/TestEffectTracker.hpp
@@ -16,7 +16,7 @@ class Effect {
 	bool didExecute();
 private:
 	bool m_executed;
-	UID m_uid;
+	PLH::UID m_uid;
 };
 
 /**Track if some side effect happened.**/
diff --git a/libs/polyhook2.0/polyhook2/UID.hpp b/libs/polyhook2.0/polyhook2/UID.hpp
index aaa574a..7d80d23 100644
--- a/libs/polyhook2.0/polyhook2/UID.hpp
+++ b/libs/polyhook2.0/polyhook2/UID.hpp
@@ -6,19 +6,13 @@
 #define POLYHOOK_2_UID_HPP
 
 #include 
+namespace PLH {
+	class UID {
+	public:
+		UID(long val);
+		static std::atomic_long& singleton();
 
-class UID {
-public:
-	UID(long val) {
-		this->val = val;
-	}
-
-	static std::atomic_long& singleton() {
-		static std::atomic_long base = {-1};
-		base++;
-		return base;
-	}
-
-	long	val;
-};
+		long val;
+	};
+}
 #endif //POLYHOOK_2_UID_HPP
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook2/Virtuals/VFuncSwapHook.hpp b/libs/polyhook2.0/polyhook2/Virtuals/VFuncSwapHook.hpp
index 100fe64..ededb6d 100644
--- a/libs/polyhook2.0/polyhook2/Virtuals/VFuncSwapHook.hpp
+++ b/libs/polyhook2.0/polyhook2/Virtuals/VFuncSwapHook.hpp
@@ -15,7 +15,11 @@ class VFuncSwapHook : public PLH::IHook {
 public:
 	VFuncSwapHook(const uint64_t Class, const VFuncMap& redirectMap, VFuncMap* origVFuncs);
 	VFuncSwapHook(const char* Class, const VFuncMap& redirectMap, VFuncMap* origVFuncs);
-	virtual ~VFuncSwapHook() = default;
+	virtual ~VFuncSwapHook() {
+		if (m_hooked) {
+			unHook();
+		}
+	}
 
 	virtual bool hook() override;
 	virtual bool unHook() override;
@@ -33,7 +37,6 @@ class VFuncSwapHook : public PLH::IHook {
 	VFuncMap m_redirectMap;
 	VFuncMap m_origVFuncs;
 	VFuncMap* m_userOrigMap;
-	bool m_Hooked;
 };
 }
 #endif
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook2/Virtuals/VTableSwapHook.hpp b/libs/polyhook2.0/polyhook2/Virtuals/VTableSwapHook.hpp
index ba7d5cb..0d5a68e 100644
--- a/libs/polyhook2.0/polyhook2/Virtuals/VTableSwapHook.hpp
+++ b/libs/polyhook2.0/polyhook2/Virtuals/VTableSwapHook.hpp
@@ -10,16 +10,52 @@
 #include "polyhook2/Misc.hpp"
 
 namespace PLH {
+
 typedef std::map VFuncMap;
 
+// storage class for address of a virtual function
+// also stores the function pointer type and index number on the class level
+template
+struct VFunc {
+	VFunc() : func(nullptr) {};
+	VFunc(FuncPtr f) : func(f) {};
+	const FuncPtr func;
+	static const uint16_t func_index;
+	typedef FuncPtr func_type;
+};
+
+// definition of constant must reside outside class declaration
+template const uint16_t VFunc::func_index = I;
+
 class VTableSwapHook : public PLH::IHook {
 public:
+	VTableSwapHook(const uint64_t Class);
 	VTableSwapHook(const uint64_t Class, const VFuncMap& redirectMap);
 	VTableSwapHook(const char* Class, const VFuncMap& redirectMap);
-	virtual ~VTableSwapHook() = default;
+
+	template
+	VTableSwapHook(const uint64_t Class, VFunc vfunc, VFuncTypes ... vfuncs)
+		: VTableSwapHook(Class, vfuncs ...)
+	{
+		m_redirectMap[I] = reinterpret_cast(vfunc.func);
+	};
+
+	virtual ~VTableSwapHook() {
+		if (m_hooked) {
+			unHook();
+		}
+	}
 
 	const VFuncMap& getOriginals() const;
 
+	template
+	auto origFunc(Args&& ... args) {
+		// NOTE: could do extra type check if VFuncTypes were a template argument of the class
+		// static_assert(std::disjunction_v ...>);
+		auto func = reinterpret_cast(m_origVFuncs.at(VFuncType::func_index));
+		return func(std::forward(args) ...);
+	};
+
 	virtual bool hook() override;
 	virtual bool unHook() override;
 	virtual HookType getType() const override {
@@ -38,7 +74,8 @@ class VTableSwapHook : public PLH::IHook {
 	// index -> ptr val 
 	VFuncMap m_redirectMap;
 	VFuncMap m_origVFuncs;
-	bool m_Hooked;
 };
+
 }
+
 #endif
\ No newline at end of file
diff --git a/libs/polyhook2.0/polyhook2/ZydisDisassembler.hpp b/libs/polyhook2.0/polyhook2/ZydisDisassembler.hpp
index 4a70806..8270f95 100644
--- a/libs/polyhook2.0/polyhook2/ZydisDisassembler.hpp
+++ b/libs/polyhook2.0/polyhook2/ZydisDisassembler.hpp
@@ -19,7 +19,7 @@ class ZydisDisassembler : public ADisassembler {
 	virtual ~ZydisDisassembler();
 
 	virtual std::vector
-		disassemble(uint64_t firstInstruction, uint64_t start, uint64_t end) override;
+		disassemble(uint64_t firstInstruction, uint64_t start, uint64_t end, const MemAccessor& accessor) override;
 private:
 
 	bool getOpStr(ZydisDecodedInstruction* pInstruction, uint64_t addr, std::string* pOpStrOut);
diff --git a/libs/polyhook2.0/sources/ADetour.cpp b/libs/polyhook2.0/sources/ADetour.cpp
index a0fa5d7..c8f6a9c 100644
--- a/libs/polyhook2.0/sources/ADetour.cpp
+++ b/libs/polyhook2.0/sources/ADetour.cpp
@@ -9,27 +9,38 @@ std::optional PLH::Detour::calcNearestSz(const PLH::insts_t& funct
 	PLH::insts_t instructionsInRange;
 
 	// count instructions until at least length needed or func end
+	bool endHit = false;
 	for (auto inst : functionInsts) {
-		if (prolLen >= prolOvrwStartOffset)
+		prolLen += inst.size();
+		instructionsInRange.push_back(inst);
+
+		// only safe to overwrite pad bytes once end is hit
+		if (endHit && !m_disasm.isPadBytes(inst))
 			break;
 
 		if (m_disasm.isFuncEnd(inst))
-			break;
+			endHit = true;
 
-		prolLen += inst.size();
-		instructionsInRange.push_back(inst);
+		if (prolLen >= prolOvrwStartOffset)
+			break;
 	}
 
+	prolOvrwEndOffset = prolLen;
 	if (prolLen >= prolOvrwStartOffset) {
-		prolOvrwEndOffset = prolLen;
 		return instructionsInRange;
 	}
+
 	return std::nullopt;
 }
 
 bool PLH::Detour::followJmp(PLH::insts_t& functionInsts, const uint8_t curDepth, const uint8_t depth) {
-	if (functionInsts.size() <= 0 || curDepth >= depth) {
-		ErrorLog::singleton().push("Couldn't decompile instructions at followed jmp", ErrorLevel::WARN);
+	if (functionInsts.size() <= 0) {
+		Log::log("Couldn't decompile instructions at followed jmp", ErrorLevel::WARN);
+		return false;
+	}
+
+	if (curDepth >= depth) {
+		Log::log("Prologue jmp resolution hit max depth, prologue too deep", ErrorLevel::WARN);
 		return false;
 	}
 
@@ -40,22 +51,57 @@ bool PLH::Detour::followJmp(PLH::insts_t& functionInsts, const uint8_t curDepth,
 
 	// might be a mem type like jmp rax, not supported
 	if (!functionInsts.front().hasDisplacement()) {
-		ErrorLog::singleton().push("Branching instruction without displacement encountered", ErrorLevel::WARN);
+		Log::log("Branching instruction without displacement encountered", ErrorLevel::WARN);
 		return false;
 	}
 
 	uint64_t dest = functionInsts.front().getDestination();
-	functionInsts = m_disasm.disassemble(dest, dest, dest + 100);
+	functionInsts = m_disasm.disassemble(dest, dest, dest + 100, *this);
 	return followJmp(functionInsts, curDepth + 1); // recurse
 }
 
+void PLH::Detour::writeNop(uint64_t base, uint32_t size) {
+	// we absolutely, MUST, never emit more than 8 0x90 single byte nops in a row
+	/**
+	https://stackoverflow.com/questions/25545470/long-multi-byte-nops-commonly-understood-macros-or-other-notation
+	90                              NOP
+    6690                            66 NOP
+    0f1f00                          NOP DWORD ptr [EAX]
+    0f1f4000                        NOP DWORD ptr [EAX + 00H]
+    0f1f440000                      NOP DWORD ptr [EAX + EAX*1 + 00H]
+    660f1f440000                    66 NOP DWORD ptr [EAX + EAX*1 + 00H]
+    0f1f8000000000                  NOP DWORD ptr [EAX + 00000000H]
+    0f1f840000000000                NOP DWORD ptr [EAX + EAX*1 + 00000000H]
+    660f1f840000000000              66 NOP DWORD ptr [EAX + EAX*1 + 00000000H]
+	**/
+	if (size >= 2) {
+		uint64_t fat = size / 2;
+		bool leftOver = size % 2;
+		for (uint64_t i = 0; i < fat; i++) {
+			uint16_t multi_nop = 0x9066;
+			mem_copy(base + i * 2, (uint64_t)&multi_nop, sizeof(multi_nop));
+		}
+
+		if (leftOver) {
+			uint8_t nop = 0x90;
+			mem_copy(base + fat * 2, (uint64_t)&nop, sizeof(nop));
+		}
+	} else if(size == 1) {
+		uint8_t nop = 0x90;
+		mem_copy(base, (uint64_t)&nop, sizeof(nop));
+	} else {
+		// this case is a nop for the nop routine :p
+	}
+}
+
 bool PLH::Detour::expandProlSelfJmps(insts_t& prol,
 									 const insts_t& func,
 									 uint64_t& minProlSz,
 									 uint64_t& roundProlSz) {
+	
+	uint64_t maxAddr = 0;
 	const uint64_t prolStart = prol.front().getAddress();
 	branch_map_t branchMap = m_disasm.getBranchMap();
-
 	for (size_t i = 0; i < prol.size(); i++) {
 		auto inst = prol.at(i);
 
@@ -64,7 +110,6 @@ bool PLH::Detour::expandProlSelfJmps(insts_t& prol,
 			continue;
 
 		insts_t srcs = branchMap.at(inst.getAddress());
-		uint64_t maxAddr = 0;
 		for (const auto& src : srcs) {
 			const uint64_t srcEndAddr = src.getAddress() + src.size();
 			if (srcEndAddr > maxAddr)
@@ -73,7 +118,7 @@ bool PLH::Detour::expandProlSelfJmps(insts_t& prol,
 
 		minProlSz = maxAddr - prolStart;
 
-		// expand prol by one entry size, may fail if prol to small
+		// expand prol by one entry size, may fail if prol too small
 		auto prolOpt = calcNearestSz(func, minProlSz, roundProlSz);
 		if (!prolOpt)
 			return false;
@@ -118,7 +163,7 @@ bool PLH::Detour::buildRelocationList(insts_t& prologue, const uint64_t roundPro
 				*/
 				std::string err = "Cannot fixup IP relative data operation, needed disp. beyond max disp range: " + inst.getFullName() +
 					" needed: " + int_to_hex((uint64_t)std::llabs(delta)) + " raw: " + int_to_hex(delta) +  " max: " + int_to_hex(maxInstDisp);
-				ErrorLog::singleton().push(err, ErrorLevel::SEV);
+				Log::log(err, ErrorLevel::SEV);
 				return false;
 			}else {
 				instsNeedingReloc.push_back(inst);
@@ -130,9 +175,13 @@ bool PLH::Detour::buildRelocationList(insts_t& prologue, const uint64_t roundPro
 
 bool PLH::Detour::unHook() {
 	assert(m_hooked);
+	if (!m_hooked) {
+		Log::log("Detour unhook failed: no hook present", ErrorLevel::SEV);
+		return false;
+	}
 
-	MemoryProtector prot(m_fnAddress, PLH::calcInstsSz(m_originalInsts), ProtFlag::R | ProtFlag::W | ProtFlag::X);
-	m_disasm.writeEncoding(m_originalInsts);
+	MemoryProtector prot(m_fnAddress, PLH::calcInstsSz(m_originalInsts), ProtFlag::R | ProtFlag::W | ProtFlag::X, *this);
+	m_disasm.writeEncoding(m_originalInsts, *this);
 	
 	if (m_trampoline != NULL) {
 		delete[](char*)m_trampoline;
@@ -147,3 +196,14 @@ bool PLH::Detour::unHook() {
 	m_hooked = false;
 	return true;
 }
+
+bool PLH::Detour::reHook()
+{
+	MemoryProtector prot(m_fnAddress, m_hookSize, ProtFlag::R | ProtFlag::W | ProtFlag::X, *this);
+	m_disasm.writeEncoding(m_hookInsts, *this);
+
+	// Nop the space between jmp and end of prologue
+	assert(m_hookSize >= m_nopProlOffset);
+	writeNop(m_fnAddress + m_nopProlOffset, m_nopSize);
+	return true;
+}
\ No newline at end of file
diff --git a/libs/polyhook2.0/sources/AVehHook.cpp b/libs/polyhook2.0/sources/AVehHook.cpp
index 52d03d9..d52f474 100644
--- a/libs/polyhook2.0/sources/AVehHook.cpp
+++ b/libs/polyhook2.0/sources/AVehHook.cpp
@@ -2,14 +2,16 @@
 
 PLH::RefCounter PLH::AVehHook::m_refCount;
 void* PLH::AVehHook::m_hHandler;
-std::map PLH::AVehHook::m_impls;
+std::unordered_set PLH::AVehHook::m_impls;
+PLH::eException PLH::AVehHook::m_onException;
+PLH::eException PLH::AVehHook::m_onUnhandledException;
 
 // https://reverseengineering.stackexchange.com/questions/14992/what-are-the-vectored-continue-handlers
 PLH::AVehHook::AVehHook() {
 	if (m_refCount.m_count == 0) {
 		m_hHandler = AddVectoredExceptionHandler(1, &AVehHook::Handler);
 		if (m_hHandler == NULL) {
-			ErrorLog::singleton().push("Failed to add VEH", ErrorLevel::SEV);
+			Log::log("Failed to add VEH", ErrorLevel::SEV);
 		}
 	}
 
@@ -25,28 +27,57 @@ PLH::AVehHook::~AVehHook() {
 		ULONG status = RemoveVectoredExceptionHandler(m_hHandler);
 		m_hHandler = nullptr;
 		if (status == 0) {
-			ErrorLog::singleton().push("Failed to remove VEH", ErrorLevel::SEV);
+			Log::log("Failed to remove VEH", ErrorLevel::SEV);
 		}
 	}
 }
 
+PLH::eException& PLH::AVehHook::EventException() {
+	return m_onException;
+}
+
+PLH::eException& PLH::AVehHook::EventUnhandledException() {
+	return m_onUnhandledException;
+}
+
 LONG CALLBACK PLH::AVehHook::Handler(EXCEPTION_POINTERS* ExceptionInfo) {
 	DWORD ExceptionCode = ExceptionInfo->ExceptionRecord->ExceptionCode;
 	uint64_t ip = ExceptionInfo->ContextRecord->XIP;
-	
+
+	// invoke callback (let users filter)
+	DWORD code = EXCEPTION_CONTINUE_SEARCH;
+	if (m_onException.Invoke(ExceptionInfo, &code))
+		return code;
+
 	switch (ExceptionCode) {
 	case 0xE06D7363: // oooh aaahh a magic value
         std::cout << "C++ exception thrown" << std::endl;
 		break;
+	// these could all reasonably be hooked by someone
+	case EXCEPTION_GUARD_PAGE:
+	case EXCEPTION_ACCESS_VIOLATION:
 	case EXCEPTION_BREAKPOINT:
 	case EXCEPTION_SINGLE_STEP:
 		// lookup which instance to forward exception to
-        const auto it = m_impls.find(ip);
-
-		if (it != m_impls.end()) {
-			return it->second->OnException(ExceptionInfo);
+		for (const auto& hk : m_impls) {
+			switch (hk.type) {
+			case AVehHookImpType::SINGLE:
+				if (hk.startAddress == ip) {
+					return hk.impl->OnException(ExceptionInfo);
+				}
+				break;
+			case AVehHookImpType::RANGE:
+				if (ip >= hk.startAddress && ip < hk.endAddress) {
+					return hk.impl->OnException(ExceptionInfo);
+				}
+				break;
+			}
 		}
 		break;
+	default:
+		// let users extend manually
+		if (m_onUnhandledException.Invoke(ExceptionInfo, &code))
+			return code;
 	}
 	return EXCEPTION_CONTINUE_SEARCH;
 }
\ No newline at end of file
diff --git a/libs/polyhook2.0/sources/BreakPointHook.cpp b/libs/polyhook2.0/sources/BreakPointHook.cpp
index 0917659..d5ecfe3 100644
--- a/libs/polyhook2.0/sources/BreakPointHook.cpp
+++ b/libs/polyhook2.0/sources/BreakPointHook.cpp
@@ -3,31 +3,39 @@
 PLH::BreakPointHook::BreakPointHook(const uint64_t fnAddress, const uint64_t fnCallback) : AVehHook() {
 	m_fnCallback = fnCallback;
 	m_fnAddress = fnAddress;
-	assert(m_impls.find(m_fnAddress) == m_impls.end());
-	m_impls[fnAddress] = this;
+
+	auto entry = AVehHookImpEntry(fnAddress, this);
+	assert(m_impls.find(entry) == m_impls.end());
+	m_impls.insert(entry);
 }
 
 PLH::BreakPointHook::BreakPointHook(const char* fnAddress, const char* fnCallback) : AVehHook() {
 	m_fnCallback = (uint64_t)fnCallback;
 	m_fnAddress = (uint64_t)fnAddress;
-	assert(m_impls.find(m_fnAddress) == m_impls.end());
-	m_impls[(uint64_t)fnAddress] = this;
-}
 
-PLH::BreakPointHook::~BreakPointHook() {
-	m_impls.erase(m_fnAddress);
+	auto entry = AVehHookImpEntry((uint64_t)fnAddress, this);
+	assert(m_impls.find(entry) == m_impls.end());
+	m_impls.insert(entry);
 }
 
 bool PLH::BreakPointHook::hook() {
-	MemoryProtector prot(m_fnAddress, 1, ProtFlag::R | ProtFlag::W | ProtFlag::X);
+	MemoryProtector prot(m_fnAddress, 1, ProtFlag::R | ProtFlag::W | ProtFlag::X, *this);
 	m_origByte = *(uint8_t*)m_fnAddress;
 	*(uint8_t*)m_fnAddress = 0xCC;
+	m_hooked = true;
 	return true;
 }
 
 bool PLH::BreakPointHook::unHook() {
-	MemoryProtector prot(m_fnAddress, 1, ProtFlag::R | ProtFlag::W | ProtFlag::X);
+	assert(m_hooked);
+	if (!m_hooked) {
+		Log::log("BPHook unhook failed: no hook present", ErrorLevel::SEV);
+		return false;
+	}
+
+	MemoryProtector prot(m_fnAddress, 1, ProtFlag::R | ProtFlag::W | ProtFlag::X, *this);
 	*(uint8_t*)m_fnAddress = m_origByte;
+	m_hooked = false;
 	return true;
 }
 
@@ -35,6 +43,7 @@ LONG PLH::BreakPointHook::OnException(EXCEPTION_POINTERS* ExceptionInfo) {
 	if (ExceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_BREAKPOINT)
 		return EXCEPTION_CONTINUE_SEARCH;
 
+	// restored via getProtectionObject()
 	unHook();
 	ExceptionInfo->ContextRecord->XIP = (decltype(ExceptionInfo->ContextRecord->XIP))m_fnCallback;
 	return EXCEPTION_CONTINUE_EXECUTION;
diff --git a/libs/polyhook2.0/sources/CapstoneDisassembler.cpp b/libs/polyhook2.0/sources/CapstoneDisassembler.cpp
index 16413f0..80a6b75 100644
--- a/libs/polyhook2.0/sources/CapstoneDisassembler.cpp
+++ b/libs/polyhook2.0/sources/CapstoneDisassembler.cpp
@@ -7,7 +7,7 @@ PLH::CapstoneDisassembler::CapstoneDisassembler(const PLH::Mode mode) : ADisasse
 	const cs_mode csMode = (mode == PLH::Mode::x64 ? CS_MODE_64 : CS_MODE_32);
 	if (cs_open(CS_ARCH_X86, csMode, &m_capHandle) != CS_ERR_OK) {
 		m_capHandle = NULL;
-		ErrorLog::singleton().push("Failed to initialize capstone", ErrorLevel::SEV);
+		Log::log("Failed to initialize capstone", ErrorLevel::SEV);
 	}
 
 	cs_option(m_capHandle, CS_OPT_DETAIL, CS_OPT_ON);
@@ -20,13 +20,25 @@ PLH::CapstoneDisassembler::~CapstoneDisassembler() {
 }
 
 PLH::insts_t
-PLH::CapstoneDisassembler::disassemble(uint64_t firstInstruction, uint64_t start, uint64_t End) {
+PLH::CapstoneDisassembler::disassemble(uint64_t firstInstruction, uint64_t start, uint64_t End, const MemAccessor& accessor) {
 	cs_insn* insInfo = cs_malloc(m_capHandle);
 	insts_t insVec;
 	m_branchMap.clear();
 
 	uint64_t size = End - start;
-	while (cs_disasm_iter(m_capHandle, (const uint8_t**)&firstInstruction, (size_t*)&size, &start, insInfo)) {
+	assert(size > 0);
+	if (size <= 0)
+		return insVec;
+
+	// copy potentially remote memory to local buffer
+	uint8_t* buf = new uint8_t[(uint32_t)size];
+
+	// bufAddr updated by cs_disasm_iter
+	uint64_t bufAddr = (uint64_t)buf;
+	accessor.mem_copy((uint64_t)buf, firstInstruction, size);
+
+	bool endHit = false;
+	while (cs_disasm_iter(m_capHandle, (const uint8_t**)&bufAddr, (size_t*)&size, &start, insInfo)) {
 		// Set later by 'SetDisplacementFields'
 		Instruction::Displacement displacement = {};
 		displacement.Absolute = 0;
@@ -35,6 +47,7 @@ PLH::CapstoneDisassembler::disassemble(uint64_t firstInstruction, uint64_t start
 						 displacement,
 						 0,
 						 false,
+			             false,
 						 insInfo->bytes,
 						 insInfo->size,
 						 insInfo->mnemonic,
@@ -42,11 +55,18 @@ PLH::CapstoneDisassembler::disassemble(uint64_t firstInstruction, uint64_t start
 						 m_mode);
 
 		setDisplacementFields(inst, insInfo);
+		if (endHit && !isPadBytes(inst))
+			break;
+
 		insVec.push_back(inst);
 
 		// searches instruction vector and updates references
 		addToBranchMap(insVec, inst);
+
+		if (isFuncEnd(inst))
+			endHit = true;
 	}
+	delete[] buf;
 	cs_free(insInfo, 1);
 	return insVec;
 }
@@ -66,20 +86,40 @@ void PLH::CapstoneDisassembler::setDisplacementFields(PLH::Instruction& inst, co
 		if (op.type == X86_OP_MEM) {
 			// Are we relative to instruction pointer?
 			// mem are types like jmp [rip + 0x4] where location is dereference-d
-			if (op.mem.base != getIpReg()) {
-				if (hasGroup(capInst, x86_insn_group::X86_GRP_JUMP) && inst.getBytes().at(0) == 0xff && inst.getBytes().at(1) == 0x25) {
-					// far jmp 0xff, 0x25, holder jmp [0xdeadbeef]
-					inst.setAbsoluteDisplacement(*(uint32_t*)op.mem.disp);
+
+			bool needsDisplacement = false;
+			if ((hasGroup(capInst, x86_insn_group::X86_GRP_JUMP) && inst.size() >= 2 && inst.getBytes().at(0) == 0xff && inst.getBytes().at(1) == 0x25) ||
+				(hasGroup(capInst, x86_insn_group::X86_GRP_CALL) && inst.size() >= 2 && inst.getBytes().at(0) == 0xff && inst.getBytes().at(1) == 0x15) ||
+
+				// skip rex prefix
+			    (hasGroup(capInst, x86_insn_group::X86_GRP_JUMP) && inst.size() >= 3 && inst.getBytes().at(1) == 0xff && inst.getBytes().at(2) == 0x25) ||
+				(hasGroup(capInst, x86_insn_group::X86_GRP_CALL) && inst.size() >= 3 && inst.getBytes().at(1) == 0xff && inst.getBytes().at(2) == 0x25)
+				)
+			{
+				// far jmp 0xff, 0x25, holder jmp [0xdeadbeef]
+				inst.setIndirect(true);
+
+				if (m_mode == Mode::x86) {
+					needsDisplacement = true;
 				}
-				continue;
+			} 
+
+			if (op.mem.base == getIpReg()) {
+				const uint8_t offset = x86.encoding.disp_offset;
+				const uint8_t size = std::min(x86.encoding.disp_size,
+					std::min(sizeof(uint64_t), (uint8_t)(capInst->size - x86.encoding.disp_offset)));
+
+				// it's relative, set immDest to max to trigger later check
+				copyDispSx(inst, offset, size, std::numeric_limits::max());
+			} else if (needsDisplacement) {
+				const uint8_t offset = x86.encoding.disp_offset;
+				const uint8_t size = std::min(x86.encoding.disp_size,
+					std::min(sizeof(uint64_t), (uint8_t)(capInst->size - x86.encoding.disp_offset)));
+
+				// it's absolute
+				copyDispSx(inst, offset, size, op.mem.disp);
 			}
 
-			const uint8_t offset = x86.encoding.disp_offset;
-			const uint8_t size = std::min(x86.encoding.disp_size,
-												   std::min(sizeof(uint64_t), (uint8_t)(capInst->size - x86.encoding.disp_offset)));
-
-			// it's relative, set immDest to max to trigger later check
-			copyDispSx(inst, offset, size, std::numeric_limits::max());
 			break;
 		} else if (op.type == X86_OP_IMM) {
 			// IMM types are like call 0xdeadbeef where they jmp straight to some location
diff --git a/libs/polyhook2.0/sources/EatHook.cpp b/libs/polyhook2.0/sources/EatHook.cpp
index 5986309..8a7d33b 100644
--- a/libs/polyhook2.0/sources/EatHook.cpp
+++ b/libs/polyhook2.0/sources/EatHook.cpp
@@ -13,14 +13,6 @@ PLH::EatHook::EatHook(const std::string& apiName, const std::wstring& moduleName
 	, m_trampoline(0)
 {}
 
-PLH::EatHook::~EatHook() {
-	// trampoline freed by pageallocator dtor
-	if (m_allocator != nullptr) {
-		delete m_allocator;
-		m_allocator = nullptr;
-	}
-}
-
 bool PLH::EatHook::hook() {
 	assert(m_userOrigVar != nullptr);
 	uint32_t* pExport = FindEatFunction(m_apiName, m_moduleName);
@@ -36,19 +28,19 @@ bool PLH::EatHook::hook() {
 		m_allocator = new PageAllocator(m_moduleBase, 0x80000000);
 		m_trampoline = m_allocator->getBlock(m_trampolineSize);
 		if (m_trampoline == 0) {
-			ErrorLog::singleton().push("EAT hook offset is > 32bit's. Allocation of trampoline necessary and failed to find free page within range", ErrorLevel::INFO);
+			Log::log("EAT hook offset is > 32bit's. Allocation of trampoline necessary and failed to find free page within range", ErrorLevel::INFO);
 			return false;
 		}
 
-		PLH::ADisassembler::writeEncoding(makeAgnosticJmp(m_trampoline, m_fnCallback));
+		PLH::ADisassembler::writeEncoding(makeAgnosticJmp(m_trampoline, m_fnCallback), *this);
 		offset = m_trampoline - m_moduleBase;
 
-		ErrorLog::singleton().push("EAT hook offset is > 32bit's. Allocation of trampoline necessary", ErrorLevel::INFO);
+		Log::log("EAT hook offset is > 32bit's. Allocation of trampoline necessary", ErrorLevel::INFO);
 	}
 
 	// Just like IAT, EAT is by default a writeable section
 	// any EAT entry must be an offset
-	MemoryProtector prot((uint64_t)pExport, sizeof(uintptr_t), ProtFlag::R | ProtFlag::W);
+	MemoryProtector prot((uint64_t)pExport, sizeof(uintptr_t), ProtFlag::R | ProtFlag::W, *this);
 	m_origFunc = *pExport; // original offset
 	*pExport = (uint32_t)offset;
 	m_hooked = true;
@@ -59,14 +51,16 @@ bool PLH::EatHook::hook() {
 bool PLH::EatHook::unHook() {
 	assert(m_userOrigVar != nullptr);
 	assert(m_hooked);
-	if (!m_hooked)
+	if (!m_hooked) {
+		Log::log("EatHook unhook failed: no hook present", ErrorLevel::SEV);
 		return false;
+	}
 
 	uint32_t* pExport = FindEatFunction(m_apiName, m_moduleName);
 	if (pExport == nullptr)
 		return false;
 
-	MemoryProtector prot((uint64_t)pExport, sizeof(uintptr_t), ProtFlag::R | ProtFlag::W);
+	MemoryProtector prot((uint64_t)pExport, sizeof(uintptr_t), ProtFlag::R | ProtFlag::W, *this);
 	*pExport = (uint32_t)m_origFunc;
 	m_hooked = false;
 	*m_userOrigVar = NULL;
@@ -103,7 +97,7 @@ uint32_t* PLH::EatHook::FindEatFunction(const std::string& apiName, const std::w
 	}
 
 	if (pExportAddress == nullptr) {
-		ErrorLog::singleton().push("Failed to find export address from requested dll", ErrorLevel::SEV);
+		Log::log("Failed to find export address from requested dll", ErrorLevel::SEV);
 	}
 	return pExportAddress;
 }
@@ -118,7 +112,7 @@ uint32_t* PLH::EatHook::FindEatFunctionInModule(const std::string& apiName) {
 	auto* pDataDir = (IMAGE_DATA_DIRECTORY*)pNT->OptionalHeader.DataDirectory;
 
 	if (pDataDir[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress == NULL) {
-		ErrorLog::singleton().push("PEs without export tables are unsupported", ErrorLevel::SEV);
+		Log::log("PEs without export tables are unsupported", ErrorLevel::SEV);
 		return NULL;
 	}
 
@@ -141,6 +135,6 @@ uint32_t* PLH::EatHook::FindEatFunctionInModule(const std::string& apiName) {
 		return pExportAddress;
 	}
 
-	ErrorLog::singleton().push("API not found before end of EAT", ErrorLevel::SEV);
+	Log::log("API not found before end of EAT", ErrorLevel::SEV);
 	return nullptr;
 }
diff --git a/libs/polyhook2.0/sources/ErrorLog.cpp b/libs/polyhook2.0/sources/ErrorLog.cpp
new file mode 100644
index 0000000..d4cd96c
--- /dev/null
+++ b/libs/polyhook2.0/sources/ErrorLog.cpp
@@ -0,0 +1,59 @@
+#include "polyhook2/ErrorLog.hpp"
+
+std::shared_ptr PLH::Log::m_logger = nullptr;
+
+void PLH::Log::registerLogger(std::shared_ptr logger) {
+	m_logger = logger;
+}
+
+void PLH::Log::log(std::string msg, ErrorLevel level) {
+	if (m_logger) m_logger->log(std::move(msg), level);
+}
+
+void PLH::ErrorLog::setLogLevel(PLH::ErrorLevel level) {
+	m_logLevel = level;
+}
+
+void PLH::ErrorLog::log(std::string msg, ErrorLevel level)
+{
+	push({ std::move(msg), level });
+}
+
+void PLH::ErrorLog::push(std::string msg, ErrorLevel level)
+{
+	push({ std::move(msg), level });
+}
+
+void PLH::ErrorLog::push(PLH::Error err) {
+	if (err.lvl >= m_logLevel) {
+		switch (err.lvl) {
+		case ErrorLevel::INFO:
+			std::cout << "[+] Info: " << err.msg << std::endl;
+			break;
+		case ErrorLevel::WARN:
+			std::cout << "[!] Warn: " << err.msg << std::endl;
+			break;
+		case ErrorLevel::SEV:
+			std::cout << "[!] Error: " << err.msg << std::endl;
+			break;
+		default:
+			std::cout << "Unsupported error message logged " << err.msg << std::endl;
+		}
+	}
+
+	m_log.push_back(std::move(err));
+}
+
+PLH::Error PLH::ErrorLog::pop() {
+	Error err{};
+	if (!m_log.empty()) {
+		err = m_log.back();
+		m_log.pop_back();
+	}
+	return err;
+}
+
+PLH::ErrorLog& PLH::ErrorLog::singleton() {
+	static ErrorLog log;
+	return log;
+}
diff --git a/libs/polyhook2.0/sources/HWBreakPointHook.cpp b/libs/polyhook2.0/sources/HWBreakPointHook.cpp
index 2e0478f..a6ac44b 100644
--- a/libs/polyhook2.0/sources/HWBreakPointHook.cpp
+++ b/libs/polyhook2.0/sources/HWBreakPointHook.cpp
@@ -3,8 +3,10 @@
 PLH::HWBreakPointHook::HWBreakPointHook(const uint64_t fnAddress, const uint64_t fnCallback, HANDLE hThread) : AVehHook() {
 	m_fnCallback = fnCallback;
 	m_fnAddress = fnAddress;
-	assert(m_impls.find(m_fnAddress) == m_impls.end());
-	m_impls[fnAddress] = this;
+
+	auto entry = AVehHookImpEntry(fnAddress, this);
+	assert(m_impls.find(entry) == m_impls.end());
+	m_impls.insert(entry);
 
 	m_hThread = hThread;
 }
@@ -12,14 +14,12 @@ PLH::HWBreakPointHook::HWBreakPointHook(const uint64_t fnAddress, const uint64_t
 PLH::HWBreakPointHook::HWBreakPointHook(const char* fnAddress, const char* fnCallback, HANDLE hThread) : AVehHook() {
 	m_fnCallback = (uint64_t)fnCallback;
 	m_fnAddress = (uint64_t)fnAddress;
-	assert(m_impls.find(m_fnAddress) == m_impls.end());
-	m_impls[(uint64_t)fnAddress] = this;
 
-	m_hThread = hThread;
-}
+	auto entry = AVehHookImpEntry((uint64_t)fnAddress, this);
+	assert(m_impls.find(entry) == m_impls.end());
+	m_impls.insert(entry);
 
-PLH::HWBreakPointHook::~HWBreakPointHook() {
-	m_impls.erase(m_fnAddress);
+	m_hThread = hThread;
 }
 
 bool PLH::HWBreakPointHook::hook()
@@ -28,7 +28,7 @@ bool PLH::HWBreakPointHook::hook()
 	ZeroMemory(&ctx, sizeof(ctx));
 	ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS;
 	if (!GetThreadContext(m_hThread, &ctx)) {
-		ErrorLog::singleton().push("Failed to get thread context", ErrorLevel::SEV);
+		Log::log("Failed to get thread context", ErrorLevel::SEV);
 		return false;
 	}
 
@@ -41,7 +41,7 @@ bool PLH::HWBreakPointHook::hook()
 	}
 
 	if (!freeReg) {
-		ErrorLog::singleton().push("All HW BP's are used", ErrorLevel::SEV);
+		Log::log("All HW BP's are used", ErrorLevel::SEV);
 		return false;
 	}
 
@@ -68,18 +68,25 @@ bool PLH::HWBreakPointHook::hook()
 
 	// undefined, suspendthread needed
 	if (!SetThreadContext(m_hThread, &ctx)) {
-		ErrorLog::singleton().push("Failed to set thread context", ErrorLevel::SEV);
+		Log::log("Failed to set thread context", ErrorLevel::SEV);
 	}
 
+	m_hooked = true;
 	return true;
 }
 
 bool PLH::HWBreakPointHook::unHook() {
+	assert(m_hooked);
+	if (!m_hooked) {
+		Log::log("HWBPHook unhook failed: no hook present", ErrorLevel::SEV);
+		return false;
+	}
+
 	CONTEXT ctx;
 	ZeroMemory(&ctx, sizeof(ctx));
 	ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS;
 	if (!GetThreadContext(m_hThread, &ctx)) {
-		ErrorLog::singleton().push("Failed to get thread context", ErrorLevel::SEV);
+		Log::log("Failed to get thread context", ErrorLevel::SEV);
 		return false;
 	}
 
@@ -87,9 +94,10 @@ bool PLH::HWBreakPointHook::unHook() {
 
 	//Still need to call suspend thread
 	if (!SetThreadContext(m_hThread, &ctx)) {
-		ErrorLog::singleton().push("Failed to set thread context", ErrorLevel::SEV);
+		Log::log("Failed to set thread context", ErrorLevel::SEV);
 		return false;
 	}
+	m_hooked = false;
 	return true;
 }
 
diff --git a/libs/polyhook2.0/sources/ILCallback.cpp b/libs/polyhook2.0/sources/ILCallback.cpp
index 217cf2d..de15f45 100644
--- a/libs/polyhook2.0/sources/ILCallback.cpp
+++ b/libs/polyhook2.0/sources/ILCallback.cpp
@@ -2,11 +2,13 @@
 
 asmjit::CallConv::Id PLH::ILCallback::getCallConv(const std::string& conv) {
 	if (conv == "cdecl") {
-		return asmjit::CallConv::kIdHostCDecl;
+		return asmjit::CallConv::kIdCDecl;
 	}else if (conv == "stdcall") {
-		return asmjit::CallConv::kIdHostStdCall;
+		return asmjit::CallConv::kIdStdCall;
 	}else if (conv == "fastcall") {
-		return asmjit::CallConv::kIdHostFastCall;
+		return asmjit::CallConv::kIdFastCall;
+	}else if (conv == "thiscall") {
+		return asmjit::CallConv::kIdThisCall;
 	} 
 	return asmjit::CallConv::kIdHost;
 }
@@ -56,7 +58,7 @@ uint8_t PLH::ILCallback::getTypeId(const std::string& type) {
 	return asmjit::Type::kIdVoid;
 }
 
-uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const PLH::ILCallback::tUserCallback callback) {;
+uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const asmjit::Environment::Arch arch, const PLH::ILCallback::tUserCallback callback) {;
 	/*AsmJit is smart enough to track register allocations and will forward
 	  the proper registers the right values and fixup any it dirtied earlier.
 	  This can only be done if it knows the signature, and ABI, so we give it 
@@ -73,8 +75,10 @@ uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const PLH
 	  be spoiled and must be manually marked dirty. After endFunc ONLY concrete
 	  physical registers may be inserted as nodes.
 	*/
-	asmjit::CodeHolder code;                      
-	code.init(asmjit::CodeInfo(asmjit::ArchInfo::kIdHost));			
+	asmjit::CodeHolder code;        
+	auto env = asmjit::hostEnvironment();
+	env.setArch(arch);
+	code.init(env);
 	
 	// initialize function
 	asmjit::x86::Compiler cc(&code);            
@@ -102,7 +106,7 @@ uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const PLH
 		} else if (isXmmReg(argType)) {
 			arg = cc.newXmm();
 		} else {
-			ErrorLog::singleton().push("Parameters wider than 64bits not supported", ErrorLevel::SEV);
+			Log::log("Parameters wider than 64bits not supported", ErrorLevel::SEV);
 			return 0;
 		}
 
@@ -136,7 +140,7 @@ uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const PLH
 		} else if(isXmmReg(argType)) {
 			cc.movq(argsStackIdx, argRegisters.at(argIdx).as());
 		} else {
-			ErrorLog::singleton().push("Parameters wider than 64bits not supported", ErrorLevel::SEV);
+			Log::log("Parameters wider than 64bits not supported", ErrorLevel::SEV);
 			return 0;
 		}
 
@@ -157,11 +161,16 @@ uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const PLH
 	asmjit::x86::Gp retStruct = cc.newUIntPtr("retStruct");
 	cc.lea(retStruct, retStack);
 
+	asmjit::InvokeNode* invokeNode;
+	cc.invoke(&invokeNode,
+		(uint64_t)callback,
+		asmjit::FuncSignatureT()
+	);
+
 	// call to user provided function (use ABI of host compiler)
-	auto call = cc.call(asmjit::Imm(static_cast((intptr_t)callback)), asmjit::FuncSignatureT(asmjit::CallConv::kIdHost));
-	call->setArg(0, argStruct);
-	call->setArg(1, argCountParam);
-	call->setArg(2, retStruct);
+	invokeNode->setArg(0, argStruct);
+	invokeNode->setArg(1, argCountParam);
+	invokeNode->setArg(2, retStruct);
 
 	// mov from arguments stack structure into regs
 	cc.mov(i, 0); // reset idx
@@ -173,7 +182,7 @@ uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const PLH
 		}else if (isXmmReg(argType)) {
 			cc.movq(argRegisters.at(arg_idx).as(), argsStackIdx);
 		}else {
-			ErrorLog::singleton().push("Parameters wider than 64bits not supported", ErrorLevel::SEV);
+			Log::log("Parameters wider than 64bits not supported", ErrorLevel::SEV);
 			return 0;
 		}
 
@@ -186,9 +195,10 @@ uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const PLH
 	cc.mov(origPtr, (uintptr_t)getTrampolineHolder());
 	cc.mov(origPtr, asmjit::x86::ptr(origPtr));
 
-	auto origCall = cc.call(origPtr, sig);
+	asmjit::InvokeNode* origInvokeNode;
+	cc.invoke(&origInvokeNode, origPtr, sig);
 	for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++) {
-		origCall->setArg(argIdx, argRegisters.at(argIdx));
+		origInvokeNode->setArg(argIdx, argRegisters.at(argIdx));
 	}
 	
 	if (sig.hasRet()) {
@@ -207,26 +217,10 @@ uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const PLH
 
 	cc.func()->frame().addDirtyRegs(origPtr);
 	
-	
-
 	cc.endFunc();
-	
-	/*
-		finalize() Manually so we can mutate node list (for future use). In asmjit the compiler inserts implicit calculated 
-		nodes around some instructions, such as call where it will emit implicit movs for params and stack stuff.
-		Asmjit finalize applies optimization and reg assignment 'passes', then serializes via assembler (we do these steps manually).
-	*/
-	cc.runPasses();
-
-	/* 
-		Passes will also do virtual register allocations, which may be assigned multiple concrete
-		registers throughout the lifetime of the function. So we must only emit raw assembly with
-		concrete registers from this point on (after runPasses call).
-	*/
 
 	// write to buffer
-	asmjit::x86::Assembler assembler(&code);
-	cc.serialize(&assembler);
+	cc.finalize();
 
 	// worst case, overestimates for case trampolines needed
 	code.flatten();
@@ -248,18 +242,18 @@ uint64_t PLH::ILCallback::getJitFunc(const asmjit::FuncSignature& sig, const PLH
 	code.relocateToBase(m_callbackBuf);
 	code.copyFlattenedData((unsigned char*)m_callbackBuf, size);
 
-	ErrorLog::singleton().push("JIT Stub:\n" + std::string(log.data()), ErrorLevel::INFO);
+	Log::log("JIT Stub:\n" + std::string(log.data()), ErrorLevel::INFO);
 	return m_callbackBuf;
 }
 
-uint64_t PLH::ILCallback::getJitFunc(const std::string& retType, const std::vector& paramTypes, const tUserCallback callback, std::string callConv/* = ""*/) {
+uint64_t PLH::ILCallback::getJitFunc(const std::string& retType, const std::vector& paramTypes, const asmjit::Environment::Arch arch, const tUserCallback callback, std::string callConv/* = ""*/) {
 	asmjit::FuncSignature sig = {};
 	std::vector args;
 	for (const std::string& s : paramTypes) {
 		args.push_back(getTypeId(s));
 	}
 	sig.init(getCallConv(callConv),asmjit::FuncSignature::kNoVarArgs, getTypeId(retType), args.data(), (uint32_t)args.size());
-	return getJitFunc(sig, callback);
+	return getJitFunc(sig, arch, callback);
 }
 
 uint64_t* PLH::ILCallback::getTrampolineHolder() {
diff --git a/libs/polyhook2.0/sources/IatHook.cpp b/libs/polyhook2.0/sources/IatHook.cpp
index a885248..7ad7f4f 100644
--- a/libs/polyhook2.0/sources/IatHook.cpp
+++ b/libs/polyhook2.0/sources/IatHook.cpp
@@ -10,6 +10,7 @@ PLH::IatHook::IatHook(const std::string& dllName, const std::string& apiName, co
     , m_moduleName(moduleName)
     , m_fnCallback(fnCallback)
     , m_userOrigVar(userOrigVar)
+	, m_origFunc(0)
 {}
 
 bool PLH::IatHook::hook() {
@@ -19,7 +20,7 @@ bool PLH::IatHook::hook() {
 		return false;
 
 	// IAT is by default a writeable section
-	MemoryProtector prot((uint64_t)&pThunk->u1.Function, sizeof(uintptr_t), ProtFlag::R | ProtFlag::W);
+	MemoryProtector prot((uint64_t)&pThunk->u1.Function, sizeof(uintptr_t), ProtFlag::R | ProtFlag::W, *this);
 	m_origFunc = (uint64_t)pThunk->u1.Function;
 	pThunk->u1.Function = (uintptr_t)m_fnCallback;
 	m_hooked = true;
@@ -37,7 +38,7 @@ bool PLH::IatHook::unHook() {
 	if (pThunk == nullptr)
 		return false;
 
-	MemoryProtector prot((uint64_t)&pThunk->u1.Function, sizeof(uintptr_t), ProtFlag::R | ProtFlag::W);
+	MemoryProtector prot((uint64_t)&pThunk->u1.Function, sizeof(uintptr_t), ProtFlag::R | ProtFlag::W, *this);
 	pThunk->u1.Function = (uintptr_t)m_origFunc;
 	m_hooked = false;
 	*m_userOrigVar = NULL;
@@ -72,7 +73,7 @@ IMAGE_THUNK_DATA* PLH::IatHook::FindIatThunk(const std::string& dllName, const s
 	}
 
 	if (pThunk == nullptr) {
-		ErrorLog::singleton().push("Failed to find thunk for api from requested dll", ErrorLevel::SEV);
+		Log::log("Failed to find thunk for api from requested dll", ErrorLevel::SEV);
 	}
 	return pThunk;
 }
@@ -87,7 +88,7 @@ IMAGE_THUNK_DATA* PLH::IatHook::FindIatThunkInModule(void* moduleBase, const std
 	auto* pDataDir = (IMAGE_DATA_DIRECTORY*)pNT->OptionalHeader.DataDirectory;
 
 	if (pDataDir[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress == NULL) {
-		ErrorLog::singleton().push("PEs without import tables are unsupported", ErrorLevel::SEV);
+		Log::log("PEs without import tables are unsupported", ErrorLevel::SEV);
 		return nullptr;
 	}
 
@@ -108,7 +109,7 @@ IMAGE_THUNK_DATA* PLH::IatHook::FindIatThunkInModule(void* moduleBase, const std
 			RVA2VA(uintptr_t, moduleBase, pImports[i].FirstThunk);
 
 		if (!pOriginalThunk) {
-			ErrorLog::singleton().push("IAT's without valid original thunk are un-supported", ErrorLevel::SEV);
+			Log::log("IAT's without valid original thunk are un-supported", ErrorLevel::SEV);
 			return nullptr;
 		}
 
@@ -129,6 +130,6 @@ IMAGE_THUNK_DATA* PLH::IatHook::FindIatThunkInModule(void* moduleBase, const std
 		}
 	}
 
-	ErrorLog::singleton().push("Thunk not found before end of IAT", ErrorLevel::SEV);
+	Log::log("Thunk not found before end of IAT", ErrorLevel::SEV);
 	return nullptr;
 }
\ No newline at end of file
diff --git a/libs/polyhook2.0/sources/MemAccessor.cpp b/libs/polyhook2.0/sources/MemAccessor.cpp
new file mode 100644
index 0000000..e59bca3
--- /dev/null
+++ b/libs/polyhook2.0/sources/MemAccessor.cpp
@@ -0,0 +1,26 @@
+#include "polyhook2/MemAccessor.hpp"
+#include "polyhook2/MemProtector.hpp"
+
+#define WIN32_LEAN_AND_MEAN
+#include 
+
+bool PLH::MemAccessor::mem_copy(uint64_t dest, uint64_t src, uint64_t size) const {
+	memcpy((char*)dest, (char*)src, (SIZE_T)size);
+	return true;
+}
+
+bool PLH::MemAccessor::safe_mem_write(uint64_t dest, uint64_t src, uint64_t size, size_t& written) const noexcept {
+	written = 0;
+	return WriteProcessMemory(GetCurrentProcess(), (char*)dest, (char*)src, (SIZE_T)size, (PSIZE_T)&written);
+}
+
+bool PLH::MemAccessor::safe_mem_read(uint64_t src, uint64_t dest, uint64_t size, size_t& read) const noexcept {
+	read = 0;
+	return ReadProcessMemory(GetCurrentProcess(), (char*)src, (char*)dest, (SIZE_T)size, (PSIZE_T)&read) || (GetLastError() == ERROR_PARTIAL_COPY);
+}
+
+PLH::ProtFlag PLH::MemAccessor::mem_protect(uint64_t dest, uint64_t size, PLH::ProtFlag prot, bool& status) const {
+	DWORD orig;
+	status = VirtualProtect((char*)dest, (SIZE_T)size, TranslateProtection(prot), &orig) != 0;
+	return TranslateProtection(orig);
+}
\ No newline at end of file
diff --git a/libs/polyhook2.0/sources/Misc.cpp b/libs/polyhook2.0/sources/Misc.cpp
new file mode 100644
index 0000000..1f1bf95
--- /dev/null
+++ b/libs/polyhook2.0/sources/Misc.cpp
@@ -0,0 +1,75 @@
+#include "polyhook2/Misc.hpp"
+
+uint64_t PLH::findPattern(const uint64_t rangeStart, size_t len, const char* pattern)
+{
+	const size_t l = strlen(pattern);
+
+	// l = 2 * b + (b - 1) . 2 chars per byte + b - 1 spaces between
+	const size_t patSize = (l + 1) / 3;
+	auto patt_base = (char*)_alloca(patSize + 1);
+	auto msk_base = (char*)_alloca(patSize + 1);
+	char* pat = patt_base;
+	char* msk = msk_base;
+
+	if (patSize + 1 > len)
+		return NULL;
+
+	size_t counter = patSize;
+	while (counter) {
+		if (*(uint8_t*)pattern == (uint8_t)'\?') {
+			*pat++ = 0;
+			*msk++ = '?';
+		} else {
+			*pat++ = getByte(pattern);
+			*msk++ = 'x';
+		}
+		pattern += 3;
+		counter--;
+	}
+
+	*msk = 0;
+	for (size_t n = 0; n < (len - (patSize + 1)); ++n)
+	{
+		if (isMatch((char*)(rangeStart + n), patt_base, msk_base)) {
+			return rangeStart + n;
+		}
+	}
+	return NULL;
+}
+
+uint64_t PLH::findPattern_rev(const uint64_t rangeStart, size_t len, const char* pattern)
+{
+	const size_t l = strlen(pattern);
+
+	// c = 2 * b + (b - 1) . 2 chars per byte + b - 1 spaces between
+	const size_t patSize = (l + 1) / 3;
+	auto patt_base = (char*)_alloca(patSize + 1);
+	auto msk_base = (char*)_alloca(patSize + 1);
+	char* pat = patt_base;
+	char* msk = msk_base;
+
+	if (patSize + 1 > len)
+		return NULL;
+
+	size_t counter = patSize;
+	while (counter) {
+		if (*(uint8_t*)pattern == (uint8_t)'\?') {
+			*pat++ = 0;
+			*msk++ = '?';
+		} else {
+			*pat++ = getByte(pattern);
+			*msk++ = 'x';
+		}
+		pattern += 3;
+		counter--;
+	}
+
+	*msk = 0;
+	for (size_t n = len - (patSize + 1); n > 0; n--)
+	{
+		if (isMatch((char*)(rangeStart + n), patt_base, msk_base)) {
+			return rangeStart + n;
+		}
+	}
+	return NULL;
+}
\ No newline at end of file
diff --git a/libs/polyhook2.0/sources/PyCallback.cpp b/libs/polyhook2.0/sources/PyCallback.cpp
index 301c590..361dd70 100644
--- a/libs/polyhook2.0/sources/PyCallback.cpp
+++ b/libs/polyhook2.0/sources/PyCallback.cpp
@@ -1,236 +1,388 @@
 #include "polyhook2/Detour/PyCallback.hpp"
 
-asmjit::CallConv::Id PLH::PyCallback::getCallConv(const std::string& conv) {
-	if (conv == "cdecl") {
-		return asmjit::CallConv::kIdHostCDecl;
-	}else if (conv == "stdcall") {
-		return asmjit::CallConv::kIdHostStdCall;
-	}else if (conv == "fastcall") {
-		return asmjit::CallConv::kIdHostFastCall;
-	}else if (conv == "thiscall") {
-		return asmjit::CallConv::kIdX86MsThisCall;
-	} 
-	return asmjit::CallConv::kIdHost;
-}
-
-uint64_t PLH::PyCallback::getJitFunc(const uint32_t unique_id, const asmjit::FuncSignature& sig, const PLH::PyCallback::tUserCallback callback, bool use_trampoline) {;
-	/*AsmJit is smart enough to track register allocations and will forward
-	  the proper registers the right values and fixup any it dirtied earlier.
-	  This can only be done if it knows the signature, and ABI, so we give it 
-	  them. It also only does this mapping for calls, so we need to generate 
-	  calls on our boundaries of transfers when we want argument order correct
-	  (ABI stuff is managed for us when calling C code within this project via host mode).
-	  It also does stack operations for us including alignment, shadow space, and
-	  arguments, everything really. Manual stack push/pop is not supported using
-	  the AsmJit compiler, so we must create those nodes, and insert them into
-	  the Node list manually to not corrupt the compiler's tracking of things.
-
-	  Inside the compiler, before endFunc only virtual registers may be used. Any
-	  concrete physical registers will not have their liveness tracked, so will
-	  be spoiled and must be manually marked dirty. After endFunc ONLY concrete
-	  physical registers may be inserted as nodes.
-	*/
-	asmjit::CodeHolder code;                      
-	code.init(asmjit::CodeInfo(asmjit::ArchInfo::kIdHost));			
-	
-	// initialize function
-	asmjit::x86::Compiler cc(&code);            
-	asmjit::FuncNode* func = cc.addFunc(sig);              
-
-	asmjit::StringLogger log;
-	uint32_t kFormatFlags = asmjit::FormatOptions::kFlagMachineCode | asmjit::FormatOptions::kFlagExplainImms | asmjit::FormatOptions::kFlagRegCasts 
-		| asmjit::FormatOptions::kFlagAnnotations | asmjit::FormatOptions::kFlagDebugPasses | asmjit::FormatOptions::kFlagDebugRA
-		| asmjit::FormatOptions::kFlagHexImms | asmjit::FormatOptions::kFlagHexOffsets | asmjit::FormatOptions::kFlagPositions;
-	
-	log.addFlags(kFormatFlags);
-	code.setLogger(&log);
-	
-	// too small to really need it
-	func->frame().resetPreservedFP();
-	
-	// map argument slots to registers, following abi.
-	std::vector argRegisters;
-	for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++) {
-		const uint8_t argType = sig.args()[argIdx];
-
-		asmjit::x86::Reg arg;
-		if (isGeneralReg(argType)) {
-			arg = cc.newUIntPtr();
-		} else if (isXmmReg(argType)) {
-			arg = cc.newXmm();
-		} else {
-			ErrorLog::singleton().push("Parameters wider than 64bits not supported", ErrorLevel::SEV);
-			return 0;
-		}
-
-		cc.setArg(argIdx, arg);
-		argRegisters.push_back(arg);
-	}
-  
-	// setup the stack structure to hold arguments for user callback
-	uint32_t stackSize = (uint32_t)(sizeof(uint64_t) * sig.argCount());
-	argsStack = cc.newStack(stackSize, 16);
-	asmjit::x86::Mem argsStackIdx(argsStack);               
-
-	// assigns some register as index reg 
-	asmjit::x86::Gp i = cc.newUIntPtr();
-
-	// stackIdx <- stack[i].
-	argsStackIdx.setIndex(i);                   
-
-	// r/w are sizeof(uint64_t) width now
-	argsStackIdx.setSize(sizeof(uint64_t));
-	
-	// set i = 0
-	cc.mov(i, 0);
-	//// mov from arguments registers into the stack structure
-	for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++) {
-		const uint8_t argType = sig.args()[argIdx];
-
-		// have to cast back to explicit register types to gen right mov type
-		if (isGeneralReg(argType)) {
-			cc.mov(argsStackIdx, argRegisters.at(argIdx).as());
-		} else if(isXmmReg(argType)) {
-			cc.movq(argsStackIdx, argRegisters.at(argIdx).as());
-		} else {
-			ErrorLog::singleton().push("Parameters wider than 64bits not supported", ErrorLevel::SEV);
-			return 0;
-		}
-
-		// next structure slot (+= sizeof(uint64_t))
-		cc.add(i, sizeof(uint64_t));
-	}
-
-	// get pointer to stack structure and pass it to the user callback
-	asmjit::x86::Gp argStruct = cc.newUIntPtr("argStruct");
-	cc.lea(argStruct, argsStack);
-
-	// fill reg to pass struct arg count to callback
-	asmjit::x86::Gp argCountParam = cc.newU8();
-	cc.mov(argCountParam, (uint8_t)sig.argCount());
-
-	// create buffer for ret val
-	asmjit::x86::Mem retStack = cc.newStack(sizeof(uint64_t), 16);
-	asmjit::x86::Gp retStruct = cc.newUIntPtr("retStruct");
-	cc.lea(retStruct, retStack);
-
-	asmjit::x86::Gp param0 = cc.newInt32("tmp");
-	cc.mov(param0, unique_id);
-
-	// call to user provided function (use ABI of host compiler)
-	auto call = cc.call(asmjit::Imm(static_cast((intptr_t)callback)), asmjit::FuncSignatureT(asmjit::CallConv::kIdHost));
-	call->setArg(0, param0);
-	call->setArg(1, argStruct);
-	call->setArg(2, argCountParam);
-	call->setArg(3, retStruct);
-
-	// mov from arguments stack structure into regs
-	cc.mov(i, 0); // reset idx
-	for (uint8_t arg_idx = 0; arg_idx < sig.argCount(); arg_idx++) {
-		const uint8_t argType = sig.args()[arg_idx];
-
-		if (isGeneralReg(argType)) {
-			cc.mov(argRegisters.at(arg_idx).as(), argsStackIdx);
-		}else if (isXmmReg(argType)) {
-			cc.movq(argRegisters.at(arg_idx).as(), argsStackIdx);
-		}else {
-			ErrorLog::singleton().push("Parameters wider than 64bits not supported", ErrorLevel::SEV);
-			return 0;
-		}
-
-		// next structure slot (+= sizeof(uint64_t))
-		cc.add(i, sizeof(uint64_t));
-	}
-
-	asmjit::x86::Gp origPtr;
-	if (use_trampoline) {
-		// deref the trampoline ptr (holder must live longer, must be concrete reg since push later)
-		origPtr = cc.zbx();
-		cc.mov(origPtr, (uintptr_t)getTrampolineHolder());
-		cc.mov(origPtr, asmjit::x86::ptr(origPtr));
-
-		auto origCall = cc.call(origPtr, sig);
-		for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++) {
-			origCall->setArg(argIdx, argRegisters.at(argIdx));
-		}
-	}
-	
-	if (sig.hasRet()) {
-		asmjit::x86::Mem retStackIdx(retStack);
-		retStackIdx.setSize(sizeof(uint64_t));
-		if (isGeneralReg((uint8_t)sig.ret())) {
-			asmjit::x86::Gp tmp2 = cc.newUIntPtr();
-			cc.mov(tmp2, retStackIdx);
-			cc.ret(tmp2);
-		} else {
-			asmjit::x86::Xmm tmp2 = cc.newXmm();
-			cc.movq(tmp2, retStackIdx);
-			cc.ret(tmp2);
-		}
-	}
-
-	if (use_trampoline) {
-		cc.func()->frame().addDirtyRegs(origPtr);
-	}
-	
-
-	cc.endFunc();
-	
-	/*
-		finalize() Manually so we can mutate node list (for future use). In asmjit the compiler inserts implicit calculated 
-		nodes around some instructions, such as call where it will emit implicit movs for params and stack stuff.
-		Asmjit finalize applies optimization and reg assignment 'passes', then serializes via assembler (we do these steps manually).
-	*/
-	cc.runPasses();
-
-	/* 
-		Passes will also do virtual register allocations, which may be assigned multiple concrete
-		registers throughout the lifetime of the function. So we must only emit raw assembly with
-		concrete registers from this point on (after runPasses call).
-	*/
-
-	// write to buffer
-	asmjit::x86::Assembler assembler(&code);
-	cc.serialize(&assembler);
-
-	// worst case, overestimates for case trampolines needed
-	code.flatten();
-	size_t size = code.codeSize();
-
-	// Allocate a virtual memory (executable).
-	m_callbackBuf = (uint64_t)m_mem.getBlock(size);
-	if (!m_callbackBuf) {
-		__debugbreak();
-		return 0;
-	}
-
-	// if multiple sections, resolve linkage (1 atm)
-	if (code.hasUnresolvedLinks()) {
-		code.resolveUnresolvedLinks();
-	}
-
-	 // Relocate to the base-address of the allocated memory.
-	code.relocateToBase(m_callbackBuf);
-	code.copyFlattenedData((unsigned char*)m_callbackBuf, size);
-
-	ErrorLog::singleton().push("JIT Stub:\n" + std::string(log.data()), ErrorLevel::INFO);
-	return m_callbackBuf;
+asmjit::CallConv::Id PLH::PyCallback::getCallConv(const std::string &conv)
+{
+    if (conv == "cdecl")
+    {
+        return asmjit::CallConv::kIdCDecl;
+    }
+    else if (conv == "stdcall")
+    {
+        return asmjit::CallConv::kIdStdCall;
+    }
+    else if (conv == "fastcall")
+    {
+        return asmjit::CallConv::kIdFastCall;
+    }
+    else if (conv == "thiscall")
+    {
+        return asmjit::CallConv::kIdThisCall;
+    }
+
+    throw std::invalid_argument("Unknown calling convention" + conv);
 }
 
-uint64_t PLH::PyCallback::getJitFunc(const uint32_t unique_id, const std::string& retType, const std::vector& paramTypes, const tUserCallback callback, std::string callConv/* = ""*/, bool use_trampoline) {
-	asmjit::FuncSignature sig = {};
-	std::vector args;
-	for (const std::string& s : paramTypes) {
-		args.push_back(getTypeId(s));
-	}
-	sig.init(getCallConv(callConv),asmjit::FuncSignature::kNoVarArgs, getTypeId(retType), args.data(), (uint32_t)args.size());
-	return getJitFunc(unique_id, sig, callback);
+uint64_t PLH::PyCallback::getJitFunc(const uint32_t native_address, const asmjit::FuncSignature &sig, const asmjit::Environment::Arch arch)
+{
+    asmjit::CodeHolder code;
+    auto env = asmjit::hostEnvironment();
+    env.setArch(arch);
+    code.init(env);
+
+    // initialize function
+    asmjit::x86::Compiler cc(&code);
+
+    asmjit::FuncNode *func = cc.addFunc(asmjit::FuncSignatureT());
+
+    asmjit::StringLogger log;
+    uint32_t kFormatFlags = asmjit::FormatOptions::kFlagMachineCode | asmjit::FormatOptions::kFlagExplainImms | asmjit::FormatOptions::kFlagRegCasts | asmjit::FormatOptions::kFlagAnnotations | asmjit::FormatOptions::kFlagDebugPasses | asmjit::FormatOptions::kFlagDebugRA | asmjit::FormatOptions::kFlagHexImms | asmjit::FormatOptions::kFlagHexOffsets | asmjit::FormatOptions::kFlagPositions;
+    log.addFlags(kFormatFlags);
+    code.setLogger(&log);
+
+    // too small to really need it
+    func->frame().resetPreservedFP();
+
+    constexpr uint32_t QWORD_SIZE = sizeof(uint64_t);
+
+    asmjit::x86::Gp argStruct = cc.newUIntPtr("argStruct");
+    cc.setArg(0, argStruct);
+
+    // asmjit::x86::Mem argsStack = cc.newStack(stackSize, 16);
+    asmjit::x86::Mem argsStack = asmjit::x86::ptr(argStruct);
+
+    asmjit::x86::Gp retStruct = cc.newUIntPtr("retStruct");
+    cc.setArg(1, retStruct);
+
+    std::vector argRegisters;
+    for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++)
+    {
+        const uint8_t argType = sig.args()[argIdx];
+
+        asmjit::x86::Reg arg;
+        if (isGeneralReg(argType))
+        {
+            arg = cc.newUIntPtr();
+        }
+        else if (isXmmReg(argType))
+        {
+            arg = cc.newXmm();
+        }
+        else
+        {
+            Log::log("Parameters wider than 64bits not supported", ErrorLevel::SEV);
+            return 0;
+        }
+
+        argRegisters.push_back(arg);
+    }
+
+    // mov from stack to arguments
+    for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++)
+    {
+        const uint8_t argType = sig.args()[argIdx];
+
+        // have to cast back to explicit register types to gen right mov type
+        if (isGeneralReg(argType))
+        {
+            cc.mov(argRegisters[argIdx].as(), argsStack);
+        }
+        else if (isXmmReg(argType))
+        {
+            cc.movq(argRegisters[argIdx].as(), argsStack);
+        }
+        else
+        {
+            Log::log("Parameters wider than 64bits not supported", ErrorLevel::SEV);
+            return 0;
+        }
+
+        argsStack.addOffset(QWORD_SIZE);
+    }
+
+    asmjit::InvokeNode *invokeNode;
+    cc.invoke(&invokeNode, native_address, sig);
+    for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++)
+    {
+        invokeNode->setArg(argIdx, argRegisters[argIdx]);
+    }
+
+    asmjit::x86::Reg trampoline_ret;
+    if (sig.hasRet())
+    {
+        if (asmjit::Type::isFloat(sig.ret()))
+        {
+            trampoline_ret = cc.newXmm();
+        }
+        else
+        {
+            trampoline_ret = cc.newUInt32();
+        }
+
+        // float and double do not need extra code because return value is in FPU's ST0
+        invokeNode->setRet(0, trampoline_ret);
+
+        asmjit::x86::Mem ret_memory = asmjit::x86::ptr(retStruct);
+
+        const uint8_t retType = sig.ret();
+
+        // have to cast back to explicit register types to gen right mov type
+        if (isGeneralReg(retType))
+        {
+            cc.mov(ret_memory, trampoline_ret.as());
+        }
+        else if (isXmmReg(retType))
+        {
+            cc.movq(ret_memory, trampoline_ret.as());
+        }
+        else
+        {
+            Log::log("Return type wider than 64bits not supported", ErrorLevel::SEV);
+            return 0;
+        }
+    }
+
+    cc.endFunc();
+    // write to buffer
+    cc.finalize();
+
+    // worst case, overestimates for case trampolines needed
+    code.flatten();
+    size_t size = code.codeSize();
+
+    // Allocate a virtual memory (executable).
+    m_callbackBuf = (uint64_t)m_mem.getBlock(size);
+    if (!m_callbackBuf)
+    {
+        __debugbreak();
+        return 0;
+    }
+
+    // if multiple sections, resolve linkage (1 atm)
+    if (code.hasUnresolvedLinks())
+    {
+        code.resolveUnresolvedLinks();
+    }
+
+    // Relocate to the base-address of the allocated memory.
+    code.relocateToBase(m_callbackBuf);
+    code.copyFlattenedData((unsigned char *)m_callbackBuf, size);
+
+    Log::log("JIT Stub:\n" + std::string(log.data()), ErrorLevel::INFO);
+    return m_callbackBuf;
 }
 
-PLH::PyCallback::PyCallback()
+uint64_t PLH::PyCallback::getJitFunc(const uint32_t unique_id, const asmjit::FuncSignature &sig,
+                                     const asmjit::Environment::Arch arch, const PLH::PyCallback::tUserCallback callback, bool use_trampoline)
 {
+    asmjit::CodeHolder code;
+    auto env = asmjit::hostEnvironment();
+    env.setArch(arch);
+    code.init(env);
+
+    // initialize function
+    asmjit::x86::Compiler cc(&code);
+    asmjit::FuncNode *func = cc.addFunc(sig);
+
+    asmjit::StringLogger log;
+    uint32_t kFormatFlags = asmjit::FormatOptions::kFlagMachineCode | asmjit::FormatOptions::kFlagExplainImms | asmjit::FormatOptions::kFlagRegCasts | asmjit::FormatOptions::kFlagAnnotations | asmjit::FormatOptions::kFlagDebugPasses | asmjit::FormatOptions::kFlagDebugRA | asmjit::FormatOptions::kFlagHexImms | asmjit::FormatOptions::kFlagHexOffsets | asmjit::FormatOptions::kFlagPositions;
+    log.addFlags(kFormatFlags);
+    code.setLogger(&log);
+
+    // too small to really need it
+    func->frame().resetPreservedFP();
+
+    constexpr uint32_t QWORD_SIZE = sizeof(uint64_t);
+
+    // map argument slots to registers, following abi.
+    std::vector argRegisters;
+    for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++)
+    {
+        const uint8_t argType = sig.args()[argIdx];
+
+        asmjit::x86::Reg arg;
+        if (isGeneralReg(argType))
+        {
+            arg = cc.newUIntPtr();
+        }
+        else if (isXmmReg(argType))
+        {
+            arg = cc.newXmm();
+        }
+        else
+        {
+            Log::log("Parameters wider than 64bits not supported", ErrorLevel::SEV);
+            return 0;
+        }
+
+        cc.setArg(argIdx, arg);
+        argRegisters.push_back(arg);
+    }
+
+    // setup the stack structure to hold arguments for user callback
+    uint32_t stackSize = (uint32_t)(QWORD_SIZE * sig.argCount());
+    asmjit::x86::Mem argsStack = cc.newStack(stackSize, 16);
+
+    //// mov from arguments registers into the stack structure
+    for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++)
+    {
+        const uint8_t argType = sig.args()[argIdx];
+
+        // have to cast back to explicit register types to gen right mov type
+        if (isGeneralReg(argType))
+        {
+            cc.mov(argsStack, argRegisters[argIdx].as());
+        }
+        else if (isXmmReg(argType))
+        {
+            cc.movq(argsStack, argRegisters[argIdx].as());
+        }
+        else
+        {
+            Log::log("Parameters wider than 64bits not supported", ErrorLevel::SEV);
+            return 0;
+        }
+
+        argsStack.addOffset(QWORD_SIZE);
+    }
+
+    // get pointer to stack structure and pass it to the user callback
+    asmjit::x86::Gp argStruct = cc.newUIntPtr("argStruct");
+    argsStack.resetOffset();
+    cc.lea(argStruct, argsStack);
+
+    // fill reg to pass struct arg count to callback
+    asmjit::x86::Gp argCountParam = cc.newU8();
+    cc.mov(argCountParam, (uint8_t)sig.argCount());
+
+    // create buffer for ret val
+    asmjit::x86::Mem retStack = cc.newStack(1 * QWORD_SIZE, 16);
+    asmjit::x86::Gp retStruct = cc.newUIntPtr("retStruct");
+    cc.lea(retStruct, retStack);
+
+    asmjit::x86::Gp param0 = cc.newInt32("tmp");
+    cc.mov(param0, unique_id);
+
+    asmjit::InvokeNode *invokeNode;
+    cc.invoke(&invokeNode,
+              reinterpret_cast(callback),
+              asmjit::FuncSignatureT());
+
+    // call to user provided function (use ABI of host compiler)
+    invokeNode->setArg(0, param0);
+    invokeNode->setArg(1, argStruct);
+    invokeNode->setArg(2, argCountParam);
+    invokeNode->setArg(3, retStruct);
+
+    argsStack.resetOffset();
+    for (uint8_t arg_idx = 0; arg_idx < sig.argCount(); arg_idx++)
+    {
+        const uint8_t argType = sig.args()[arg_idx];
+
+        if (isGeneralReg(argType))
+        {
+            cc.mov(argRegisters[arg_idx].as(), argsStack);
+        }
+        else if (isXmmReg(argType))
+        {
+            cc.movq(argRegisters[arg_idx].as(), argsStack);
+        }
+        else
+        {
+            Log::log("Parameters wider than 64bits not supported", ErrorLevel::SEV);
+            return 0;
+        }
+
+        // next structure slot (+= sizeof(uint64_t))
+        argsStack.addOffset(QWORD_SIZE);
+    }
+
+    if (use_trampoline)
+    {
+        asmjit::x86::Reg trampoline_ret;
+        if (sig.hasRet() && asmjit::Type::isFloat(sig.ret()))
+        {
+            trampoline_ret = cc.newXmm();
+        }
+        else
+        {
+            trampoline_ret = cc.newUInt32();
+        }
+
+        asmjit::x86::Gp origPtr = cc.newUIntPtr("trampoline_holder");
+
+        cc.mov(origPtr, (uintptr_t)getTrampolineHolder());
+        cc.mov(origPtr, asmjit::x86::ptr(origPtr));
+
+        asmjit::InvokeNode *origInvokeNode;
+        cc.invoke(&origInvokeNode, origPtr, sig);
+        for (uint8_t argIdx = 0; argIdx < sig.argCount(); argIdx++)
+        {
+            origInvokeNode->setArg(argIdx, argRegisters[argIdx]);
+        }
+
+        if (sig.hasRet() && !asmjit::Type::isFloat(sig.ret()))
+        {
+            // float and double do not need extra code because return value is in FPU's ST0
+            origInvokeNode->setRet(0, trampoline_ret);
+            cc.ret(trampoline_ret);
+        }
+    }
+    else if (sig.hasRet())
+    {
+        asmjit::x86::Mem retStackIdx(retStack);
+        retStackIdx.setSize(QWORD_SIZE);
+        if (isGeneralReg((uint8_t)sig.ret()))
+        {
+            asmjit::x86::Gp tmp2 = cc.newUIntPtr();
+            cc.mov(tmp2, retStackIdx);
+            cc.ret(tmp2);
+        }
+        else
+        {
+            asmjit::x86::Xmm tmp2 = cc.newXmm();
+            cc.movq(tmp2, retStackIdx);
+            cc.ret(tmp2);
+        }
+    }
+
+    cc.endFunc();
+    // write to buffer
+    cc.finalize();
+
+    // worst case, overestimates for case trampolines needed
+    code.flatten();
+    size_t size = code.codeSize();
+
+    // Allocate a virtual memory (executable).
+    m_callbackBuf = (uint64_t)m_mem.getBlock(size);
+    if (!m_callbackBuf)
+    {
+        __debugbreak();
+        return 0;
+    }
+
+    // if multiple sections, resolve linkage (1 atm)
+    if (code.hasUnresolvedLinks())
+    {
+        code.resolveUnresolvedLinks();
+    }
+
+    // Relocate to the base-address of the allocated memory.
+    code.relocateToBase(m_callbackBuf);
+    code.copyFlattenedData((unsigned char *)m_callbackBuf, size);
+
+    Log::log("JIT Stub:\n" + std::string(log.data()), ErrorLevel::INFO);
+    return m_callbackBuf;
 }
 
-PLH::PyCallback::~PyCallback() {
-	
+uint64_t PLH::PyCallback::getJitFunc(const uint32_t unique_id, const std::string &retType, const std::vector ¶mTypes, const tUserCallback callback, std::string callConv)
+{
+    asmjit::FuncSignature sig = {};
+    std::vector args;
+    for (const std::string &s : paramTypes)
+    {
+        args.push_back(getTypeId(s));
+    }
+    sig.init(getCallConv(callConv), asmjit::FuncSignature::kNoVarArgs, getTypeId(retType), args.data(), (uint32_t)args.size());
+    return getJitFunc(unique_id, sig, asmjit::Environment::kArchHost, callback);
 }
diff --git a/libs/polyhook2.0/sources/StackCanary.cpp b/libs/polyhook2.0/sources/StackCanary.cpp
new file mode 100644
index 0000000..3659cc6
--- /dev/null
+++ b/libs/polyhook2.0/sources/StackCanary.cpp
@@ -0,0 +1,20 @@
+#include "polyhook2/Tests/StackCanary.hpp"
+
+PLH::StackCanary::StackCanary() {
+	for (int i = 0; i < 50; i++) {
+		buf[i] = 0xCE;
+	}
+}
+
+bool PLH::StackCanary::isStackGood() {
+	for (int i = 0; i < 50; i++) {
+		if (buf[i] != 0xCE)
+			return false;
+	}
+	return true;
+}
+
+PLH::StackCanary::~StackCanary() noexcept(false) {
+	if (!isStackGood())
+		throw "Stack corruption detected";
+}
\ No newline at end of file
diff --git a/libs/polyhook2.0/sources/TestEffectTracker.cpp b/libs/polyhook2.0/sources/TestEffectTracker.cpp
index 1006eca..198fb1f 100644
--- a/libs/polyhook2.0/sources/TestEffectTracker.cpp
+++ b/libs/polyhook2.0/sources/TestEffectTracker.cpp
@@ -1,7 +1,7 @@
 #include "polyhook2/Tests/TestEffectTracker.hpp"
 
 
-Effect::Effect() : m_uid(UID::singleton()) {
+Effect::Effect() : m_uid(PLH::UID::singleton()) {
 	m_executed = false;
 }
 
diff --git a/libs/polyhook2.0/sources/UID.cpp b/libs/polyhook2.0/sources/UID.cpp
new file mode 100644
index 0000000..c7a4344
--- /dev/null
+++ b/libs/polyhook2.0/sources/UID.cpp
@@ -0,0 +1,11 @@
+#include "polyhook2/UID.hpp"
+
+PLH::UID::UID(long val) {
+	this->val = val;
+}
+
+std::atomic_long& PLH::UID::singleton() {
+	static std::atomic_long base = { -1 };
+	base++;
+	return base;
+}
diff --git a/libs/polyhook2.0/sources/VFuncSwapHook.cpp b/libs/polyhook2.0/sources/VFuncSwapHook.cpp
index c69d13b..a244717 100644
--- a/libs/polyhook2.0/sources/VFuncSwapHook.cpp
+++ b/libs/polyhook2.0/sources/VFuncSwapHook.cpp
@@ -6,19 +6,22 @@ PLH::VFuncSwapHook::VFuncSwapHook(const char* Class, const VFuncMap& redirectMap
 
 PLH::VFuncSwapHook::VFuncSwapHook(const uint64_t Class, const VFuncMap& redirectMap, VFuncMap* userOrigMap) 
 	: m_class(Class)
+	, m_vtable(nullptr)
+	, m_vFuncCount(0)
 	, m_redirectMap(redirectMap)
+	, m_origVFuncs()
 	, m_userOrigMap(userOrigMap)
 {}
 
 bool PLH::VFuncSwapHook::hook() {
 	assert(m_userOrigMap != nullptr);
-	MemoryProtector prot(m_class, sizeof(void*), ProtFlag::R | ProtFlag::W);
+	MemoryProtector prot(m_class, sizeof(void*), ProtFlag::R | ProtFlag::W, *this);
 	m_vtable = *(uintptr_t**)m_class;
 	m_vFuncCount = countVFuncs();
 	if (m_vFuncCount <= 0)
 		return false;
 
-	MemoryProtector prot2((uint64_t)&m_vtable[0], sizeof(uintptr_t) * m_vFuncCount, ProtFlag::R | ProtFlag::W);
+	MemoryProtector prot2((uint64_t)&m_vtable[0], sizeof(uintptr_t) * m_vFuncCount, ProtFlag::R | ProtFlag::W, *this);
 	for (const auto& p : m_redirectMap) {
 		assert(p.first < m_vFuncCount);
 		if (p.first >= m_vFuncCount)
@@ -30,17 +33,19 @@ bool PLH::VFuncSwapHook::hook() {
 		m_vtable[p.first] = (uintptr_t)p.second;
 	}
 
-	m_Hooked = true;
+	m_hooked = true;
 	return true;
 }
 
 bool PLH::VFuncSwapHook::unHook() {
 	assert(m_userOrigMap != nullptr);
-	assert(m_Hooked);
-	if (!m_Hooked)
+	assert(m_hooked);
+	if (!m_hooked) {
+		Log::log("vfuncswap unhook failed: no hook present", ErrorLevel::SEV);
 		return false;
+	}
 
-	MemoryProtector prot2((uint64_t)&m_vtable[0], sizeof(uintptr_t) * m_vFuncCount, ProtFlag::R | ProtFlag::W);
+	MemoryProtector prot2((uint64_t)&m_vtable[0], sizeof(uintptr_t) * m_vFuncCount, ProtFlag::R | ProtFlag::W, *this);
 	for (const auto& p : m_origVFuncs) {
 		assert(p.first < m_vFuncCount);
 		if (p.first >= m_vFuncCount)
@@ -50,6 +55,7 @@ bool PLH::VFuncSwapHook::unHook() {
 	}
 
 	m_userOrigMap = nullptr;
+	m_hooked = false;
 	return true;
 }
 
diff --git a/libs/polyhook2.0/sources/VTableSwapHook.cpp b/libs/polyhook2.0/sources/VTableSwapHook.cpp
index 95402c3..31e08af 100644
--- a/libs/polyhook2.0/sources/VTableSwapHook.cpp
+++ b/libs/polyhook2.0/sources/VTableSwapHook.cpp
@@ -4,17 +4,35 @@ PLH::VTableSwapHook::VTableSwapHook(const char* Class, const VFuncMap& redirectM
 	: VTableSwapHook((uint64_t)Class, redirectMap)
 {}
 
+PLH::VTableSwapHook::VTableSwapHook(const uint64_t Class)
+	: VTableSwapHook(Class, PLH::VFuncMap{ })
+{}
+
 PLH::VTableSwapHook::VTableSwapHook(const uint64_t Class, const VFuncMap& redirectMap) 
-	: m_class(Class)
+	: m_newVtable(nullptr)
+	, m_origVtable(nullptr)
+	, m_class(Class)
+	, m_vFuncCount(0)
 	, m_redirectMap(redirectMap)
+	, m_origVFuncs()
 {}
 
 bool PLH::VTableSwapHook::hook() {
-	MemoryProtector prot(m_class, sizeof(void*), ProtFlag::R | ProtFlag::W);
+	assert(!m_hooked);
+	if (m_hooked) {
+		Log::log("vtable hook failed: hook already present", ErrorLevel::SEV);
+		return false;
+	}
+
+	MemoryProtector prot(m_class, sizeof(void*), ProtFlag::R | ProtFlag::W, *this);
 	m_origVtable = *(uintptr_t**)m_class;
 	m_vFuncCount = countVFuncs();
+	assert(m_vFuncCount > 0);
 	if (m_vFuncCount <= 0)
+	{
+		Log::log("vtable hook failed: class has no virtual functions", ErrorLevel::SEV);
 		return false;
+	}
 
 	m_newVtable.reset(new uintptr_t[m_vFuncCount]);
 
@@ -23,8 +41,12 @@ bool PLH::VTableSwapHook::hook() {
 
 	for (const auto& p : m_redirectMap) {
 		assert(p.first < m_vFuncCount);
-		if (p.first >= m_vFuncCount)
+		if (p.first >= m_vFuncCount) {
+			Log::log("vtable hook failed: index exceeds virtual function count", ErrorLevel::SEV);
+			m_newVtable = nullptr;
+			m_origVFuncs.clear();
 			return false;
+		}
 
 		// redirect ptr at VTable[i]
 		m_origVFuncs[p.first] = (uint64_t)m_newVtable[p.first];
@@ -32,22 +54,26 @@ bool PLH::VTableSwapHook::hook() {
 	}
 
 	*(uint64_t**)m_class = (uint64_t*)m_newVtable.get();
-	m_Hooked = true;
+	m_hooked = true;
+	Log::log("vtable hooked", ErrorLevel::INFO);
 	return true;
 }
 
 bool PLH::VTableSwapHook::unHook() {
-	assert(m_Hooked);
-	if (!m_Hooked)
+	assert(m_hooked);
+	if (!m_hooked) {
+		Log::log("vtable unhook failed: no hook present", ErrorLevel::SEV);
 		return false;
+	}
 
-	MemoryProtector prot(m_class, sizeof(void*), ProtFlag::R | ProtFlag::W);
+	MemoryProtector prot(m_class, sizeof(void*), ProtFlag::R | ProtFlag::W, *this);
 	*(uint64_t**)m_class = (uint64_t*)m_origVtable;
 	
 	m_newVtable.reset();
 
-	m_Hooked = false;
+	m_hooked = false;
 	m_origVtable = nullptr;
+	Log::log("vtable unhooked", ErrorLevel::INFO);
 	return true;
 }
 
diff --git a/libs/polyhook2.0/sources/ZydisDisassembler.cpp b/libs/polyhook2.0/sources/ZydisDisassembler.cpp
index 095ca6a..39a3e28 100644
--- a/libs/polyhook2.0/sources/ZydisDisassembler.cpp
+++ b/libs/polyhook2.0/sources/ZydisDisassembler.cpp
@@ -7,13 +7,13 @@ PLH::ZydisDisassembler::ZydisDisassembler(PLH::Mode mode) : ADisassembler(mode),
 		(mode == PLH::Mode::x64) ? ZYDIS_MACHINE_MODE_LONG_64 : ZYDIS_MACHINE_MODE_LONG_COMPAT_32,
 		(mode == PLH::Mode::x64) ? ZYDIS_ADDRESS_WIDTH_64 : ZYDIS_ADDRESS_WIDTH_32)))
 	{
-		ErrorLog::singleton().push("Failed to initialize zydis decoder", ErrorLevel::SEV);
+		Log::log("Failed to initialize zydis decoder", ErrorLevel::SEV);
 		return;
 	}
 
 	if (ZYAN_FAILED(ZydisFormatterInit(m_formatter, ZYDIS_FORMATTER_STYLE_INTEL)))
 	{
-		ErrorLog::singleton().push("Failed to initialize zydis formatter", ErrorLevel::SEV);
+		Log::log("Failed to initialize zydis formatter", ErrorLevel::SEV);
 		return;
 	}
 
@@ -27,13 +27,24 @@ PLH::ZydisDisassembler::~ZydisDisassembler() {
 }
 
 PLH::insts_t
-PLH::ZydisDisassembler::disassemble(uint64_t firstInstruction, uint64_t start, uint64_t End) {
+PLH::ZydisDisassembler::disassemble(uint64_t firstInstruction, uint64_t start, uint64_t End, const MemAccessor& accessor) {
 	insts_t insVec;
 	m_branchMap.clear();
 
+	uint64_t size = End - start;
+	assert(size > 0);
+	if (size <= 0) {
+		return insVec;
+	}
+
+	// copy potentially remote memory to local buffer
+	uint8_t* buf = new uint8_t[(uint32_t)size];
+	accessor.mem_copy((uint64_t)buf, firstInstruction, size);
+
 	ZydisDecodedInstruction insInfo;
 	uint64_t offset = 0;
-	while(ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(m_decoder, (char*)(firstInstruction + offset), (ZyanUSize)(End - start - offset), &insInfo)))
+	bool endHit = false;
+	while(ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(m_decoder, (char*)(buf + offset), (ZyanUSize)(size - offset), &insInfo)))
 	{
 		Instruction::Displacement displacement = {};
 		displacement.Absolute = 0;
@@ -48,20 +59,27 @@ PLH::ZydisDisassembler::disassemble(uint64_t firstInstruction, uint64_t start, u
 						 displacement,
 						 0,
 						 false,
-						 (uint8_t*)((unsigned char*)firstInstruction + offset),
+			             false,
+						 (uint8_t*)((unsigned char*)buf + offset),
 						 insInfo.length,
 						 ZydisMnemonicGetString(insInfo.mnemonic),
 						 opstr,
 						 m_mode);
 
 		setDisplacementFields(inst, &insInfo);
+		if (endHit && !isPadBytes(inst))
+			break;
+
 		insVec.push_back(inst);
 
 		// searches instruction vector and updates references
 		addToBranchMap(insVec, inst);
+		if (isFuncEnd(inst))
+			endHit = true;
 
 		offset += insInfo.length;
 	}
+	delete[] buf;
 	return insVec;
 }
 
@@ -97,12 +115,30 @@ void PLH::ZydisDisassembler::setDisplacementFields(PLH::Instruction& inst, const
             break;
         case ZYDIS_OPERAND_TYPE_MEMORY:
 			// Relative to RIP/EIP
-			if(zydisInst->attributes & ZYDIS_ATTRIB_IS_RELATIVE)
+		
+		{
+			bool set = false;
+			if (zydisInst->attributes & ZYDIS_ATTRIB_IS_RELATIVE)
 			{
 				inst.setDisplacementOffset(zydisInst->raw.disp.offset);
 				inst.setRelativeDisplacement(operand->mem.disp.value);
-				return;
+				set = true;
 			}
+
+			if ((zydisInst->mnemonic == ZydisMnemonic::ZYDIS_MNEMONIC_JMP && inst.size() >= 2 && inst.getBytes().at(0) == 0xff && inst.getBytes().at(1) == 0x25) ||
+				(zydisInst->mnemonic == ZydisMnemonic::ZYDIS_MNEMONIC_CALL && inst.size() >= 2 && inst.getBytes().at(0) == 0xff && inst.getBytes().at(1) == 0x15) ||
+				(zydisInst->mnemonic == ZydisMnemonic::ZYDIS_MNEMONIC_CALL && inst.size() >= 3 && inst.getBytes().at(1) == 0xff && inst.getBytes().at(2) == 0x15) ||
+				(zydisInst->mnemonic == ZydisMnemonic::ZYDIS_MNEMONIC_JMP && inst.size() >= 3 && inst.getBytes().at(1) == 0xff && inst.getBytes().at(2) == 0x15)
+				) {
+
+				if (!set) {
+					// displacement is absolute on x86 mode
+					inst.setDisplacementOffset(zydisInst->raw.disp.offset);
+					inst.setAbsoluteDisplacement(zydisInst->raw.disp.value);
+				}
+				inst.setIndirect(true);
+			}
+		}
             break;
         case ZYDIS_OPERAND_TYPE_POINTER:
 			
diff --git a/libs/polyhook2.0/sources/x64Detour.cpp b/libs/polyhook2.0/sources/x64Detour.cpp
index f1a2984..107c06d 100644
--- a/libs/polyhook2.0/sources/x64Detour.cpp
+++ b/libs/polyhook2.0/sources/x64Detour.cpp
@@ -1,7 +1,13 @@
 //
 // Created by steve on 7/5/17.
 //
+#include 
+#include 
+#include 
+
 #include "polyhook2/Detour/x64Detour.hpp"
+#include "polyhook2/Misc.hpp"
+#include "polyhook2/MemProtector.hpp"
 
 PLH::x64Detour::x64Detour(const uint64_t fnAddress, const uint64_t fnCallback, uint64_t* userTrampVar, PLH::ADisassembler& dis) : PLH::Detour(fnAddress, fnCallback, userTrampVar, dis) {
 
@@ -23,30 +29,169 @@ uint8_t PLH::x64Detour::getPrefJmpSize() const {
 	return 16;
 }
 
+template
+std::optional PLH::x64Detour::findNearestCodeCave(uint64_t addr) {
+	const uint64_t chunkSize = 64000;
+	unsigned char* data = new unsigned char[chunkSize];
+	auto delete_data = finally([=]() {
+		delete[] data;
+	});
+
+	// RPM so we don't pagefault, careful to check for partial reads
+	auto calc_2gb_below = [](uint64_t address) -> uint64_t
+	{
+		return (address > (uint64_t)0x7ff80000) ? address - 0x7ff80000 : 0x80000;
+	};
+
+	auto calc2gb_above = [](uint64_t address) -> uint64_t
+	{
+		return (address < (uint64_t)0xffffffff80000000) ? address + 0x7ff80000 : (uint64_t)0xfffffffffff80000;
+	};
+	
+	// these patterns are listed in order of most accurate to least accurate with size taken into account
+	// simple c3 ret is more accurate than c2 ?? ?? and series of CC or 90 is more accurate than complex multi-byte nop
+	std::string CC_PATTERN_RET = "c3 " + repeat_n("cc", SIZE, " ");
+	std::string NOP1_PATTERN_RET = "c3 " + repeat_n("90", SIZE, " ");
+
+	std::string CC_PATTERN_RETN = "c2 ?? ?? " + repeat_n("cc", SIZE, " ");
+	std::string NOP1_PATTERN_RETN = "c2 ?? ?? " + repeat_n("90", SIZE, " ");
+
+	const char* NOP2_RET = "c3 0f 1f 44 00 00";
+	const char* NOP3_RET = "c3 0f 1f 84 00 00 00 00 00";
+	const char* NOP4_RET = "c3 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP5_RET = "c3 66 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP6_RET = "c3 cc cc cc cc cc cc 66 0f 1f 44 00 00";
+	const char* NOP7_RET = "c3 66 66 66 66 66 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP8_RET = "c3 cc cc cc cc cc cc 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP9_RET = "c3 cc cc cc cc cc cc 66 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP10_RET = "c3 cc cc cc cc cc cc cc 66 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP11_RET = "c3 cc cc cc cc cc cc cc 66 66 0f 1f 84 00 00 00 00 00";
+	
+	const char* NOP2_RETN = "c2 ?? ?? 0f 1f 44 00 00";
+	const char* NOP3_RETN = "c2 ?? ?? 0f 1f 84 00 00 00 00 00";
+	const char* NOP4_RETN = "c2 ?? ?? 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP5_RETN = "c2 ?? ?? 66 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP6_RETN = "c2 ?? ?? cc cc cc cc cc cc 66 0f 1f 44 00 00";
+	const char* NOP7_RETN = "c2 ?? ?? 66 66 66 66 66 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP8_RETN = "c2 ?? ?? cc cc cc cc cc cc 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP9_RETN = "c2 ?? ?? cc cc cc cc cc cc 66 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP10_RETN = "c2 ?? ?? cc cc cc cc cc cc cc 66 66 0f 1f 84 00 00 00 00 00";
+	const char* NOP11_RETN = "c2 ?? ?? cc cc cc cc cc cc cc 66 66 0f 1f 84 00 00 00 00 00";
+
+	// Scan in same order as listing above
+	const char* PATTERNS_OFF1[] = {
+		CC_PATTERN_RET.c_str(), NOP1_PATTERN_RET.c_str(),
+		NOP2_RET, NOP3_RET, NOP4_RET, NOP5_RET,NOP6_RET,
+		NOP7_RET, NOP8_RET, NOP9_RET, NOP10_RET, NOP11_RET
+	};
+
+	const char* PATTERNS_OFF3[] = {
+		CC_PATTERN_RETN.c_str(), NOP1_PATTERN_RETN.c_str(),
+		NOP2_RETN, NOP3_RETN, NOP4_RETN, NOP5_RETN,NOP6_RETN,
+		NOP7_RETN, NOP8_RETN, NOP9_RETN, NOP10_RETN, NOP11_RETN,
+	};
+
+	// Most common:
+	// https://gist.github.com/stevemk14ebr/d117e8d0fd1432fb2a92354a034ce5b9
+	// We check for rets to verify it's not like like a mid function or jmp table pad
+	// [0xc3 | 0xC2 ? ? ? ? ] & 6666666666660f1f840000000000
+	// [0xc3 | 0xC2 ? ? ? ? ] & 0f1f440000
+	// [0xc3 | 0xC2 ? ? ? ? ] & 0f1f840000000000
+	// [0xc3 | 0xC2 ? ? ? ? ] & cccccccccccc660f1f440000
+	// [0xc3 | 0xC2 ? ? ? ? ] & cccccccccccc660f1f840000000000
+	// [0xc3 | 0xC2 ? ? ? ? ] & cccccccccccccc66660f1f840000000000
+	// [0xc3 | 0xC2 ? ? ? ? ] & cccccccccccccccccccccccccccc66660f1f840000000000
+	// [0xc3 | 0xC2 ? ? ? ? ] & cccccccccccc66660f1f840000000000
+	// [0xc3 | 0xC2 ? ? ? ? ] & 66660f1f840000000000
+	// [0xc3 | 0xC2 ? ? ? ? ] & 660f1f840000000000
+
+	// Search 2GB below
+	for (uint64_t search = addr - chunkSize; (search + chunkSize) >= calc_2gb_below(addr); search -= chunkSize) {
+		size_t read = 0;
+		if (safe_mem_read(search, (uint64_t)data, chunkSize, read)) {
+			assert(read <= chunkSize);
+			if (read == 0 || read < SIZE)
+				continue;
+
+			auto finder = [&](const char* pattern, const uint64_t offset) -> std::optional {
+				if (auto found = (uint64_t)findPattern_rev((uint64_t)data, read, pattern)) {
+					return search + (found + offset - (uint64_t)data);
+				}
+				return {};
+			};
+
+			for (const char* pat : PATTERNS_OFF1) {
+				if (auto found = finder(pat, 1)) {
+					return found;
+				}
+			}
+
+			for (const char* pat : PATTERNS_OFF3) {
+				if (auto found = finder(pat, 3)) {
+					return found;
+				}
+			}
+		}
+	}
+
+	// Search 2GB above
+	for (uint64_t search = addr; (search + chunkSize) < calc2gb_above(addr); search += chunkSize) {
+		size_t read = 0;
+		if (safe_mem_read(search, (uint64_t)data, chunkSize, read)) {
+			uint32_t contiguousInt3 = 0;
+			uint32_t contiguousNop = 0;
+
+			assert(read <= chunkSize);
+			if (read == 0 || read < SIZE)
+				continue;
+
+			auto finder = [&](const char* pattern, const uint64_t offset) -> std::optional {
+				if (auto found = (uint64_t)findPattern((uint64_t)data, read, pattern)) {
+					return search + (found + offset - (uint64_t)data);
+				}
+				return {};
+			};
+
+			for (const char* pat : PATTERNS_OFF1) {
+				if (auto found = finder(pat, 1)) {
+					return found;
+				}
+			}
+
+			for (const char* pat : PATTERNS_OFF3) {
+				if (auto found = finder(pat, 3)) {
+					return found;
+				}
+			}
+		}
+	}
+	return {};
+}
+
 bool PLH::x64Detour::hook() {
 	// ------- Must resolve callback first, so that m_disasm branchmap is filled for prologue stuff
-	insts_t callbackInsts = m_disasm.disassemble(m_fnCallback, m_fnCallback, m_fnCallback + 100);
+	insts_t callbackInsts = m_disasm.disassemble(m_fnCallback, m_fnCallback, m_fnCallback + 100, *this);
 	if (callbackInsts.empty()) {
-		ErrorLog::singleton().push("Disassembler unable to decode any valid callback instructions", ErrorLevel::SEV);
+		Log::log("Disassembler unable to decode any valid callback instructions", ErrorLevel::SEV);
 		return false;
 	}
 
 	if (!followJmp(callbackInsts)) {
-		ErrorLog::singleton().push("Callback jmp resolution failed", ErrorLevel::SEV);
+		Log::log("Callback jmp resolution failed", ErrorLevel::SEV);
 		return false;
 	}
 
 	// update given fn callback address to resolved one
 	m_fnCallback = callbackInsts.front().getAddress();
 
-	insts_t insts = m_disasm.disassemble(m_fnAddress, m_fnAddress, m_fnAddress + 100);
+	insts_t insts = m_disasm.disassemble(m_fnAddress, m_fnAddress, m_fnAddress + 100, *this);
 	if (insts.empty()) {
-		ErrorLog::singleton().push("Disassembler unable to decode any valid instructions", ErrorLevel::SEV);
+		Log::log("Disassembler unable to decode any valid instructions", ErrorLevel::SEV);
 		return false;
 	}
 
 	if (!followJmp(insts)) {
-		ErrorLog::singleton().push("Prologue jmp resolution failed", ErrorLevel::SEV);
+		Log::log("Prologue jmp resolution failed", ErrorLevel::SEV);
 		return false;
 	}
 
@@ -54,17 +199,18 @@ bool PLH::x64Detour::hook() {
 	m_fnAddress = insts.front().getAddress();
 
 	// --------------- END RECURSIVE JMP RESOLUTION ---------------------
-	ErrorLog::singleton().push("Original function:\n" + instsToStr(insts) + "\n", ErrorLevel::INFO);
+	Log::log("Original function:\n" + instsToStr(insts) + "\n", ErrorLevel::INFO);
 
-	uint64_t minProlSz = getPrefJmpSize(); // min size of patches that may split instructions
+	uint64_t minProlSz = getMinJmpSize(); // min size of patches that may split instructions
 	uint64_t roundProlSz = minProlSz; // nearest size to min that doesn't split any instructions
 
+	std::optional prologueOpt;
 	insts_t prologue;
 	{
 		// find the prologue section we will overwrite with jmp + zero or more nops
-		auto prologueOpt = calcNearestSz(insts, minProlSz, roundProlSz);
+		prologueOpt = calcNearestSz(insts, minProlSz, roundProlSz);
 		if (!prologueOpt) {
-			ErrorLog::singleton().push("Function too small to hook safely!", ErrorLevel::SEV);
+			Log::log("Function too small to hook safely!", ErrorLevel::SEV);
 			return false;
 		}
 
@@ -72,33 +218,45 @@ bool PLH::x64Detour::hook() {
 		prologue = *prologueOpt;
 
 		if (!expandProlSelfJmps(prologue, insts, minProlSz, roundProlSz)) {
-			ErrorLog::singleton().push("Function needs a prologue jmp table but it's too small to insert one", ErrorLevel::SEV);
+			Log::log("Function needs a prologue jmp table but it's too small to insert one", ErrorLevel::SEV);
 			return false;
 		}
 	}
 
 	m_originalInsts = prologue;
-	ErrorLog::singleton().push("Prologue to overwrite:\n" + instsToStr(prologue) + "\n", ErrorLevel::INFO);
+	Log::log("Prologue to overwrite:\n" + instsToStr(prologue) + "\n", ErrorLevel::INFO);
 	
 	{   // copy all the prologue stuff to trampoline
 		insts_t jmpTblOpt;
-		if (!makeTrampoline(prologue, jmpTblOpt))
+		if (!makeTrampoline(prologue, jmpTblOpt)) {
 			return false;
+		}
 
-		ErrorLog::singleton().push("Trampoline:\n" + instsToStr(m_disasm.disassemble(m_trampoline, m_trampoline, m_trampoline + m_trampolineSz)) + "\n", ErrorLevel::INFO);
-		if (jmpTblOpt.empty())
-			ErrorLog::singleton().push("Trampoline Jmp Tbl:\n" + instsToStr(jmpTblOpt) + "\n", ErrorLevel::INFO);
+		Log::log("Trampoline:\n" + instsToStr(m_disasm.disassemble(m_trampoline, m_trampoline, m_trampoline + m_trampolineSz, *this)) + "\n", ErrorLevel::INFO);
+		if (!jmpTblOpt.empty())
+			Log::log("Trampoline Jmp Tbl:\n" + instsToStr(jmpTblOpt) + "\n", ErrorLevel::INFO);
 	}
 
 	*m_userTrampVar = m_trampoline;
+	m_hookSize = (uint32_t)roundProlSz;
+	m_nopProlOffset = (uint16_t)minProlSz;
+
+	MemoryProtector prot(m_fnAddress, m_hookSize, ProtFlag::R | ProtFlag::W | ProtFlag::X, *this);
+	// we're really space constrained, try to do some stupid hacks like checking for 0xCC's near us
+	auto cave = findNearestCodeCave<8>(m_fnAddress);
+	if (!cave) {
+		Log::log("Function too small to hook safely, no code caves found near function", ErrorLevel::SEV);
+		return false;
+	}
 
-	MemoryProtector prot(m_fnAddress, roundProlSz, ProtFlag::R | ProtFlag::W | ProtFlag::X);
-	const auto prolJmp = makex64PreferredJump(m_fnAddress, m_fnCallback);
-	m_disasm.writeEncoding(prolJmp);
+	MemoryProtector holderProt(*cave, 8, ProtFlag::R | ProtFlag::W | ProtFlag::X, *this, false);
+	m_hookInsts = makex64MinimumJump(m_fnAddress, m_fnCallback, *cave);
+	m_disasm.writeEncoding(m_hookInsts, *this);
 
 	// Nop the space between jmp and end of prologue
-	const uint8_t nopSz = (uint8_t)(roundProlSz - minProlSz);
-	std::memset((char*)(m_fnAddress + minProlSz), 0x90, (size_t)nopSz);
+	assert(m_hookSize >= m_nopProlOffset);
+	m_nopSize = (uint16_t)(m_hookSize - m_nopProlOffset);
+	writeNop(m_fnAddress + m_nopProlOffset, m_nopSize);
 
 	m_hooked = true;
 	return true;
@@ -106,6 +264,8 @@ bool PLH::x64Detour::hook() {
 
 bool PLH::x64Detour::makeTrampoline(insts_t& prologue, insts_t& trampolineOut) {
 	assert(!prologue.empty());
+	assert(m_trampoline == NULL);
+
 	const uint64_t prolStart = prologue.front().getAddress();
 	const uint16_t prolSz = calcInstsSz(prologue);
 	const uint8_t destHldrSz = 8;
@@ -116,35 +276,40 @@ bool PLH::x64Detour::makeTrampoline(insts_t& prologue, insts_t& trampolineOut) {
 	
 	The relocation could also because of data operations too. But that's specific to the function and can't
 	work again on a retry (same function, duh). Return immediately in that case.**/
-	uint8_t neededEntryCount = 5;
+	uint8_t neededEntryCount = 0;
 	PLH::insts_t instsNeedingEntry;
 	PLH::insts_t instsNeedingReloc;
-
 	uint8_t retries = 0;
+
+	bool good = false;
 	do {
-		if (retries++ > 4) {
-			ErrorLog::singleton().push("Failed to calculate trampoline information", ErrorLevel::SEV);
-			return false;
-		}
+		neededEntryCount = std::max((uint8_t)instsNeedingEntry.size(), (uint8_t)5);
+		
+		// prol + jmp back to prol + N * jmpEntries
+		m_trampolineSz = (uint16_t)(prolSz + (getMinJmpSize() + destHldrSz) +
+			(getMinJmpSize() + destHldrSz)* neededEntryCount);
 
+		// allocate new trampoline before deleting old to increase odds of new mem address
+		uint64_t tmpTrampoline = (uint64_t)new unsigned char[m_trampolineSz];
 		if (m_trampoline != NULL) {
 			delete[](unsigned char*)m_trampoline;
-			neededEntryCount = (uint8_t)instsNeedingEntry.size();
 		}
 
-		// prol + jmp back to prol + N * jmpEntries
-		m_trampolineSz = (uint16_t)(prolSz + (getMinJmpSize() + destHldrSz) +
-			(getMinJmpSize() + destHldrSz)* neededEntryCount);
-		m_trampoline = (uint64_t) new unsigned char[m_trampolineSz];
-
+		m_trampoline = tmpTrampoline;
 		const int64_t delta = m_trampoline - prolStart;
 
 		if (!buildRelocationList(prologue, prolSz, delta, instsNeedingEntry, instsNeedingReloc))
-			return false;
-	} while (instsNeedingEntry.size() > neededEntryCount);
+			continue;
+
+		good = true;
+	} while (retries++ < 5 && !good);
+
+	if (!good) {
+		return false;
+	}
 
 	const int64_t delta = m_trampoline - prolStart;
-	MemoryProtector prot(m_trampoline, m_trampolineSz, ProtFlag::R | ProtFlag::W | ProtFlag::X, false);
+	MemoryProtector prot(m_trampoline, m_trampolineSz, ProtFlag::R | ProtFlag::W | ProtFlag::X, *this, false);
 
 	// Insert jmp from trampoline -> prologue after overwritten section
 	const uint64_t jmpToProlAddr = m_trampoline + prolSz;
@@ -152,22 +317,20 @@ bool PLH::x64Detour::makeTrampoline(insts_t& prologue, insts_t& trampolineOut) {
 	{
 		const auto jmpToProl = makex64MinimumJump(jmpToProlAddr, prologue.front().getAddress() + prolSz, jmpHolderCurAddr);
 
-		ErrorLog::singleton().push("Jmp To Prol:\n" + instsToStr(jmpToProl) + "\n", ErrorLevel::INFO);
-		m_disasm.writeEncoding(jmpToProl);
+		Log::log("Jmp To Prol:\n" + instsToStr(jmpToProl) + "\n", ErrorLevel::INFO);
+		m_disasm.writeEncoding(jmpToProl, *this);
 	}
 
-	// each jmp tbl entries holder is one slot down from the previous
-	auto calcJmpHolder = [=] () -> uint64_t {
-		static uint64_t captureAddr = jmpHolderCurAddr;
-		captureAddr -= destHldrSz;
-		return captureAddr;
+	// each jmp tbl entries holder is one slot down from the previous (lambda holds state)
+	const auto makeJmpFn = [=, captureAddress = jmpHolderCurAddr](uint64_t a, uint64_t b) mutable {
+		captureAddress -= destHldrSz;
+		assert(captureAddress > (uint64_t)m_trampoline && (captureAddress + destHldrSz) < (m_trampoline + m_trampolineSz));
+		return makex64MinimumJump(a, b, captureAddress);
 	};
 
-	const auto makeJmpFn = std::bind(makex64MinimumJump, _1, _2, std::bind(calcJmpHolder));
-
 	const uint64_t jmpTblStart = jmpToProlAddr + getMinJmpSize();
 	trampolineOut = relocateTrampoline(prologue, jmpTblStart, delta, getMinJmpSize(),
 													makeJmpFn, instsNeedingReloc, instsNeedingEntry);
 
 	return true;
-}
\ No newline at end of file
+}
diff --git a/libs/polyhook2.0/sources/x86Detour.cpp b/libs/polyhook2.0/sources/x86Detour.cpp
index 02dba48..5a68902 100644
--- a/libs/polyhook2.0/sources/x86Detour.cpp
+++ b/libs/polyhook2.0/sources/x86Detour.cpp
@@ -21,28 +21,28 @@ uint8_t PLH::x86Detour::getJmpSize() const {
 
 bool PLH::x86Detour::hook() {
 	// ------- Must resolve callback first, so that m_disasm branchmap is filled for prologue stuff
-	insts_t callbackInsts = m_disasm.disassemble(m_fnCallback, m_fnCallback, m_fnCallback + 100);
+	insts_t callbackInsts = m_disasm.disassemble(m_fnCallback, m_fnCallback, m_fnCallback + 100, *this);
 	if (callbackInsts.empty()) {
-		ErrorLog::singleton().push("Disassembler unable to decode any valid callback instructions", ErrorLevel::SEV);
+		Log::log("Disassembler unable to decode any valid callback instructions", ErrorLevel::SEV);
 		return false;
 	}
 
 	if (!followJmp(callbackInsts)) {
-		ErrorLog::singleton().push("Callback jmp resolution failed", ErrorLevel::SEV);
+		Log::log("Callback jmp resolution failed", ErrorLevel::SEV);
 		return false;
 	}
 
 	// update given fn callback address to resolved one
 	m_fnCallback = callbackInsts.front().getAddress();
 
-	insts_t insts = m_disasm.disassemble(m_fnAddress, m_fnAddress, m_fnAddress + 100);
+	insts_t insts = m_disasm.disassemble(m_fnAddress, m_fnAddress, m_fnAddress + 100, *this);
 	if (insts.size() <= 0) {
-		ErrorLog::singleton().push("Disassembler unable to decode any valid instructions", ErrorLevel::SEV);
+		Log::log("Disassembler unable to decode any valid instructions", ErrorLevel::SEV);
 		return false;
 	}
 
 	if (!followJmp(insts)) {
-		ErrorLog::singleton().push("Prologue jmp resolution failed", ErrorLevel::SEV);
+		Log::log("Prologue jmp resolution failed", ErrorLevel::SEV);
 		return false;
 	}
 
@@ -51,7 +51,7 @@ bool PLH::x86Detour::hook() {
 
 	// --------------- END RECURSIVE JMP RESOLUTION ---------------------
 
-	ErrorLog::singleton().push("Original function:\n" + instsToStr(insts) + "\n", ErrorLevel::INFO);
+	Log::log("Original function:\n" + instsToStr(insts) + "\n", ErrorLevel::INFO);
 
 	uint64_t minProlSz = getJmpSize(); // min size of patches that may split instructions
 	uint64_t roundProlSz = minProlSz; // nearest size to min that doesn't split any instructions
@@ -61,7 +61,7 @@ bool PLH::x86Detour::hook() {
 		// find the prologue section we will overwrite with jmp + zero or more nops
 		auto prologueOpt = calcNearestSz(insts, minProlSz, roundProlSz);
 		if (!prologueOpt) {
-			ErrorLog::singleton().push("Function too small to hook safely!", ErrorLevel::SEV);
+			Log::log("Function too small to hook safely!", ErrorLevel::SEV);
 			return false;
 		}
 
@@ -69,33 +69,36 @@ bool PLH::x86Detour::hook() {
 		prologue = *prologueOpt;
 
 		if (!expandProlSelfJmps(prologue, insts, minProlSz, roundProlSz)) {
-			ErrorLog::singleton().push("Function needs a prologue jmp table but it's too small to insert one", ErrorLevel::SEV);
+			Log::log("Function needs a prologue jmp table but it's too small to insert one", ErrorLevel::SEV);
 			return false;
 		}
 	}
 
 	m_originalInsts = prologue;
-	ErrorLog::singleton().push("Prologue to overwrite:\n" + instsToStr(prologue) + "\n", ErrorLevel::INFO);
+	Log::log("Prologue to overwrite:\n" + instsToStr(prologue) + "\n", ErrorLevel::INFO);
 
 	{   // copy all the prologue stuff to trampoline
 		insts_t jmpTblOpt;
 		if (!makeTrampoline(prologue, jmpTblOpt))
 			return false;
 
-		ErrorLog::singleton().push("Trampoline:\n" + instsToStr(m_disasm.disassemble(m_trampoline, m_trampoline, m_trampoline + m_trampolineSz)) + "\n", ErrorLevel::INFO);
+		Log::log("Trampoline:\n" + instsToStr(m_disasm.disassemble(m_trampoline, m_trampoline, m_trampoline + m_trampolineSz, *this)) + "\n", ErrorLevel::INFO);
 		if (!jmpTblOpt.empty())
-			ErrorLog::singleton().push("Trampoline Jmp Tbl:\n" + instsToStr(jmpTblOpt) + "\n", ErrorLevel::INFO);
+			Log::log("Trampoline Jmp Tbl:\n" + instsToStr(jmpTblOpt) + "\n", ErrorLevel::INFO);
 	}
 
 	*m_userTrampVar = m_trampoline;
+	m_hookSize = (uint32_t)roundProlSz;
+	m_nopProlOffset = (uint16_t)minProlSz;
 
-	MemoryProtector prot(m_fnAddress, roundProlSz, ProtFlag::R | ProtFlag::W | ProtFlag::X);
-	const auto prolJmp = makex86Jmp(m_fnAddress, m_fnCallback);
-	m_disasm.writeEncoding(prolJmp);
+	MemoryProtector prot(m_fnAddress, m_hookSize, ProtFlag::R | ProtFlag::W | ProtFlag::X, *this);
+	m_hookInsts = makex86Jmp(m_fnAddress, m_fnCallback);
+	m_disasm.writeEncoding(m_hookInsts, *this);
 
 	// Nop the space between jmp and end of prologue
-	const uint8_t nopSz = (uint8_t)(roundProlSz - minProlSz);
-	std::memset((char*)(m_fnAddress + minProlSz), 0x90, (size_t)nopSz);
+	assert(m_hookSize >= m_nopProlOffset);
+	m_nopSize = (uint16_t)(m_hookSize - m_nopProlOffset);
+	writeNop(m_fnAddress + m_nopProlOffset, m_nopSize);
 
 	m_hooked = true;
 	return true;
@@ -120,7 +123,7 @@ bool PLH::x86Detour::makeTrampoline(insts_t& prologue, insts_t& trampolineOut) {
 	uint8_t retries = 0;
 	do {
 		if (retries++ > 4) {
-			ErrorLog::singleton().push("Failed to calculate trampoline information", ErrorLevel::SEV);
+			Log::log("Failed to calculate trampoline information", ErrorLevel::SEV);
 			return false;
 		}
 
@@ -140,13 +143,13 @@ bool PLH::x86Detour::makeTrampoline(insts_t& prologue, insts_t& trampolineOut) {
 	} while (instsNeedingEntry.size() > neededEntryCount);
 
 	const int64_t delta = m_trampoline - prolStart;
-	MemoryProtector prot(m_trampoline, m_trampolineSz, ProtFlag::R | ProtFlag::W | ProtFlag::X, false);
+	MemoryProtector prot(m_trampoline, m_trampolineSz, ProtFlag::R | ProtFlag::W | ProtFlag::X, *this, false);
 
 	// Insert jmp from trampoline -> prologue after overwritten section
 	const uint64_t jmpToProlAddr = m_trampoline + prolSz;
 	{
 		const auto jmpToProl = makex86Jmp(jmpToProlAddr, prologue.front().getAddress() + prolSz);
-		m_disasm.writeEncoding(jmpToProl);
+		m_disasm.writeEncoding(jmpToProl, *this);
 	}
 
 	const uint64_t jmpTblStart = jmpToProlAddr + getJmpSize();
diff --git a/libs/pybind11/LICENSE b/libs/pybind11/LICENSE
index 6f15578..e466b0d 100644
--- a/libs/pybind11/LICENSE
+++ b/libs/pybind11/LICENSE
@@ -25,5 +25,5 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-Please also refer to the file CONTRIBUTING.md, which clarifies licensing of
+Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of
 external contributions to this project including patches, pull requests, etc.
diff --git a/libs/pybind11/attr.h b/libs/pybind11/attr.h
index 6962d6f..50efdc7 100644
--- a/libs/pybind11/attr.h
+++ b/libs/pybind11/attr.h
@@ -12,7 +12,7 @@
 
 #include "cast.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 /// \addtogroup annotations
 /// @{
@@ -23,6 +23,9 @@ struct is_method { handle class_; is_method(const handle &c) : class_(c) { } };
 /// Annotation for operators
 struct is_operator { };
 
+/// Annotation for classes that cannot be subclassed
+struct is_final { };
+
 /// Annotation for parent scope
 struct scope { handle value; scope(const handle &s) : value(s) { } };
 
@@ -37,8 +40,9 @@ struct sibling { handle value; sibling(const handle &value) : value(value.ptr())
 
 /// Annotation indicating that a class derives from another given type
 template  struct base {
+
     PYBIND11_DEPRECATED("base() was deprecated in favor of specifying 'T' as a template argument to class_")
-    base() { }
+    base() { } // NOLINT(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute
 };
 
 /// Keep patient alive while nurse lives
@@ -58,7 +62,7 @@ struct metaclass {
     handle value;
 
     PYBIND11_DEPRECATED("py::metaclass() is no longer required. It's turned on by default now.")
-    metaclass() {}
+    metaclass() { } // NOLINT(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute
 
     /// Override pybind11's default metaclass
     explicit metaclass(handle value) : value(value) { }
@@ -70,6 +74,9 @@ struct module_local { const bool value; constexpr module_local(bool v = true) :
 /// Annotation to mark enums as an arithmetic type
 struct arithmetic { };
 
+/// Mark a function for addition at the beginning of the existing overload chain instead of the end
+struct prepend { };
+
 /** \rst
     A call policy which places one or more guard variables (``Ts...``) around the function call.
 
@@ -110,7 +117,7 @@ struct call_guard {
 
 /// @} annotations
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 /* Forward declarations */
 enum op_id : int;
 enum op_type : int;
@@ -134,7 +141,8 @@ struct argument_record {
 struct function_record {
     function_record()
         : is_constructor(false), is_new_style_constructor(false), is_stateless(false),
-          is_operator(false), has_args(false), has_kwargs(false), is_method(false) { }
+          is_operator(false), is_method(false), has_args(false),
+          has_kwargs(false), has_kw_only_args(false), prepend(false) { }
 
     /// Function name
     char *name = nullptr; /* why no C++ strings? They generate heavier code.. */
@@ -172,18 +180,30 @@ struct function_record {
     /// True if this is an operator (__add__), etc.
     bool is_operator : 1;
 
+    /// True if this is a method
+    bool is_method : 1;
+
     /// True if the function has a '*args' argument
     bool has_args : 1;
 
     /// True if the function has a '**kwargs' argument
     bool has_kwargs : 1;
 
-    /// True if this is a method
-    bool is_method : 1;
+    /// True once a 'py::kw_only' is encountered (any following args are keyword-only)
+    bool has_kw_only_args : 1;
+
+    /// True if this function is to be inserted at the beginning of the overload resolution chain
+    bool prepend : 1;
 
     /// Number of arguments (including py::args and/or py::kwargs, if present)
     std::uint16_t nargs;
 
+    /// Number of trailing arguments (counted in `nargs`) that are keyword-only
+    std::uint16_t nargs_kw_only = 0;
+
+    /// Number of leading arguments (counted in `nargs`) that are positional-only
+    std::uint16_t nargs_pos_only = 0;
+
     /// Python method object
     PyMethodDef *def = nullptr;
 
@@ -201,7 +221,7 @@ struct function_record {
 struct type_record {
     PYBIND11_NOINLINE type_record()
         : multiple_inheritance(false), dynamic_attr(false), buffer_protocol(false),
-          default_holder(true), module_local(false) { }
+          default_holder(true), module_local(false), is_final(false) { }
 
     /// Handle to the parent scope
     handle scope;
@@ -254,6 +274,9 @@ struct type_record {
     /// Is the class definition local to the module shared object?
     bool module_local : 1;
 
+    /// Is the class inheritable from python classes?
+    bool is_final : 1;
+
     PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *)) {
         auto base_info = detail::get_type_info(base, false);
         if (!base_info) {
@@ -353,12 +376,20 @@ template <> struct process_attribute : process_attribu
     static void init(const is_new_style_constructor &, function_record *r) { r->is_new_style_constructor = true; }
 };
 
+inline void process_kw_only_arg(const arg &a, function_record *r) {
+    if (!a.name || strlen(a.name) == 0)
+        pybind11_fail("arg(): cannot specify an unnamed argument after an kw_only() annotation");
+    ++r->nargs_kw_only;
+}
+
 /// Process a keyword argument attribute (*without* a default value)
 template <> struct process_attribute : process_attribute_default {
     static void init(const arg &a, function_record *r) {
         if (r->is_method && r->args.empty())
             r->args.emplace_back("self", nullptr, handle(), true /*convert*/, false /*none not allowed*/);
         r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none);
+
+        if (r->has_kw_only_args) process_kw_only_arg(a, r);
     }
 };
 
@@ -390,6 +421,22 @@ template <> struct process_attribute : process_attribute_default {
 #endif
         }
         r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none);
+
+        if (r->has_kw_only_args) process_kw_only_arg(a, r);
+    }
+};
+
+/// Process a keyword-only-arguments-follow pseudo argument
+template <> struct process_attribute : process_attribute_default {
+    static void init(const kw_only &, function_record *r) {
+        r->has_kw_only_args = true;
+    }
+};
+
+/// Process a positional-only-argument maker
+template <> struct process_attribute : process_attribute_default {
+    static void init(const pos_only &, function_record *r) {
+        r->nargs_pos_only = static_cast(r->args.size());
     }
 };
 
@@ -416,6 +463,11 @@ struct process_attribute : process_attribute_default
     static void init(const dynamic_attr &, type_record *r) { r->dynamic_attr = true; }
 };
 
+template <>
+struct process_attribute : process_attribute_default {
+    static void init(const is_final &, type_record *r) { r->is_final = true; }
+};
+
 template <>
 struct process_attribute : process_attribute_default {
     static void init(const buffer_protocol &, type_record *r) { r->buffer_protocol = true; }
@@ -431,6 +483,12 @@ struct process_attribute : process_attribute_default
     static void init(const module_local &l, type_record *r) { r->module_local = l.value; }
 };
 
+/// Process a 'prepend' attribute, putting this at the beginning of the overload chain
+template <>
+struct process_attribute : process_attribute_default {
+    static void init(const prepend &, function_record *r) { r->prepend = true; }
+};
+
 /// Process an 'arithmetic' attribute for enums (does nothing here)
 template <>
 struct process_attribute : process_attribute_default {};
@@ -486,8 +544,8 @@ template ::value...),
           size_t self  = constexpr_sum(std::is_same::value...)>
 constexpr bool expected_num_args(size_t nargs, bool has_args, bool has_kwargs) {
-    return named == 0 || (self + named + has_args + has_kwargs) == nargs;
+    return named == 0 || (self + named + size_t(has_args) + size_t(has_kwargs)) == nargs;
 }
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/buffer_info.h b/libs/pybind11/buffer_info.h
index 1f4115a..d803004 100644
--- a/libs/pybind11/buffer_info.h
+++ b/libs/pybind11/buffer_info.h
@@ -11,7 +11,30 @@
 
 #include "detail/common.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+// Default, C-style strides
+inline std::vector c_strides(const std::vector &shape, ssize_t itemsize) {
+    auto ndim = shape.size();
+    std::vector strides(ndim, itemsize);
+    if (ndim > 0)
+        for (size_t i = ndim - 1; i > 0; --i)
+            strides[i - 1] = strides[i] * shape[i];
+    return strides;
+}
+
+// F-style strides; default when constructing an array_t with `ExtraFlags & f_style`
+inline std::vector f_strides(const std::vector &shape, ssize_t itemsize) {
+    auto ndim = shape.size();
+    std::vector strides(ndim, itemsize);
+    for (size_t i = 1; i < ndim; ++i)
+        strides[i] = strides[i - 1] * shape[i - 1];
+    return strides;
+}
+
+PYBIND11_NAMESPACE_END(detail)
 
 /// Information record describing a Python buffer object
 struct buffer_info {
@@ -24,7 +47,7 @@ struct buffer_info {
     std::vector strides; // Number of bytes between adjacent entries (for each per dimension)
     bool readonly = false;        // flag to indicate if the underlying storage may be written to
 
-    buffer_info() { }
+    buffer_info() = default;
 
     buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
                 detail::any_container shape_in, detail::any_container strides_in, bool readonly=false)
@@ -53,8 +76,15 @@ struct buffer_info {
 
     explicit buffer_info(Py_buffer *view, bool ownview = true)
     : buffer_info(view->buf, view->itemsize, view->format, view->ndim,
-            {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}, view->readonly) {
-        this->view = view;
+            {view->shape, view->shape + view->ndim},
+            /* Though buffer::request() requests PyBUF_STRIDES, ctypes objects
+             * ignore this flag and return a view with NULL strides.
+             * When strides are NULL, build them manually.  */
+            view->strides
+            ? std::vector(view->strides, view->strides + view->ndim)
+            : detail::c_strides({view->shape, view->shape + view->ndim}, view->itemsize),
+            view->readonly) {
+        this->m_view = view;
         this->ownview = ownview;
     }
 
@@ -73,16 +103,18 @@ struct buffer_info {
         ndim = rhs.ndim;
         shape = std::move(rhs.shape);
         strides = std::move(rhs.strides);
-        std::swap(view, rhs.view);
+        std::swap(m_view, rhs.m_view);
         std::swap(ownview, rhs.ownview);
         readonly = rhs.readonly;
         return *this;
     }
 
     ~buffer_info() {
-        if (view && ownview) { PyBuffer_Release(view); delete view; }
+        if (m_view && ownview) { PyBuffer_Release(m_view); delete m_view; }
     }
 
+    Py_buffer *view() const { return m_view; }
+    Py_buffer *&view() { return m_view; }
 private:
     struct private_ctr_tag { };
 
@@ -90,11 +122,11 @@ struct buffer_info {
                 detail::any_container &&shape_in, detail::any_container &&strides_in, bool readonly)
     : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) { }
 
-    Py_buffer *view = nullptr;
+    Py_buffer *m_view = nullptr;
     bool ownview = false;
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template  struct compare_buffer_info {
     static bool compare(const buffer_info& b) {
@@ -110,5 +142,5 @@ template  struct compare_buffer_info 16 && stack.size() != 0 && stack.capacity() / stack.size() > 2)
+        if (stack.capacity() > 16 && !stack.empty() && stack.capacity() / stack.size() > 2)
             stack.shrink_to_fit();
     }
 
@@ -163,7 +163,7 @@ inline const std::vector &all_type_info(PyTypeObject *type)
  */
 PYBIND11_NOINLINE inline detail::type_info* get_type_info(PyTypeObject *type) {
     auto &bases = all_type_info(type);
-    if (bases.size() == 0)
+    if (bases.empty())
         return nullptr;
     if (bases.size() > 1)
         pybind11_fail("pybind11::detail::get_type_info: type has multiple pybind11-registered bases");
@@ -220,7 +220,7 @@ struct value_and_holder {
     {}
 
     // Default constructor (used to signal a value-and-holder not found by get_value_and_holder())
-    value_and_holder() {}
+    value_and_holder() = default;
 
     // Used for past-the-end iterator
     value_and_holder(size_t index) : index{index} {}
@@ -288,8 +288,8 @@ struct values_and_holders {
         // Past-the-end iterator:
         iterator(size_t end) : curr(end) {}
     public:
-        bool operator==(const iterator &other) { return curr.index == other.curr.index; }
-        bool operator!=(const iterator &other) { return curr.index != other.curr.index; }
+        bool operator==(const iterator &other) const { return curr.index == other.curr.index; }
+        bool operator!=(const iterator &other) const { return curr.index != other.curr.index; }
         iterator &operator++() {
             if (!inst->simple_layout)
                 curr.vh += 1 + (*types)[curr.index]->holder_size_in_ptrs;
@@ -342,8 +342,8 @@ PYBIND11_NOINLINE inline value_and_holder instance::get_value_and_holder(const t
             "(compile in debug mode for type details)");
 #else
     pybind11_fail("pybind11::detail::instance::get_value_and_holder: `" +
-            std::string(find_type->type->tp_name) + "' is not a pybind11 base of the given `" +
-            std::string(Py_TYPE(this)->tp_name) + "' instance");
+            get_fully_qualified_tp_name(find_type->type) + "' is not a pybind11 base of the given `" +
+            get_fully_qualified_tp_name(Py_TYPE(this)) + "' instance");
 #endif
 }
 
@@ -432,7 +432,7 @@ PYBIND11_NOINLINE inline std::string error_string() {
 
 #if !defined(PYPY_VERSION)
     if (scope.trace) {
-        PyTracebackObject *trace = (PyTracebackObject *) scope.trace;
+        auto *trace = (PyTracebackObject *) scope.trace;
 
         /* Get the deepest trace possible */
         while (trace->tb_next)
@@ -458,7 +458,7 @@ PYBIND11_NOINLINE inline handle get_object_handle(const void *ptr, const detail:
     auto &instances = get_internals().registered_instances;
     auto range = instances.equal_range(ptr);
     for (auto it = range.first; it != range.second; ++it) {
-        for (auto vh : values_and_holders(it->second)) {
+        for (const auto &vh : values_and_holders(it->second)) {
             if (vh.type == type)
                 return handle((PyObject *) it->second);
         }
@@ -636,7 +636,7 @@ class type_caster_generic {
     /// native typeinfo, or when the native one wasn't able to produce a value.
     PYBIND11_NOINLINE bool try_load_foreign_module_local(handle src) {
         constexpr auto *local_key = PYBIND11_MODULE_LOCAL_ID;
-        const auto pytype = src.get_type();
+        const auto pytype = type::handle_of(src);
         if (!hasattr(pytype, local_key))
             return false;
 
@@ -816,7 +816,7 @@ template  struct is_copy_assignable struct is_copy_assignable>
     : all_of, is_copy_assignable> {};
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // polymorphic_type_hook::get(src, tinfo) determines whether the object pointed
 // to by `src` actually is an instance of some class derived from `itype`.
@@ -835,21 +835,27 @@ NAMESPACE_END(detail)
 // You may specialize polymorphic_type_hook yourself for types that want to appear
 // polymorphic to Python but do not use C++ RTTI. (This is a not uncommon pattern
 // in performance-sensitive applications, used most notably in LLVM.)
+//
+// polymorphic_type_hook_base allows users to specialize polymorphic_type_hook with
+// std::enable_if. User provided specializations will always have higher priority than
+// the default implementation and specialization provided in polymorphic_type_hook_base.
 template 
-struct polymorphic_type_hook
+struct polymorphic_type_hook_base
 {
     static const void *get(const itype *src, const std::type_info*&) { return src; }
 };
 template 
-struct polymorphic_type_hook::value>>
+struct polymorphic_type_hook_base::value>>
 {
     static const void *get(const itype *src, const std::type_info*& type) {
         type = src ? &typeid(*src) : nullptr;
         return dynamic_cast(src);
     }
 };
+template 
+struct polymorphic_type_hook : public polymorphic_type_hook_base {};
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Generic type caster for objects stored on the heap
 template  class type_caster_base : public type_caster_generic {
@@ -954,9 +960,14 @@ template  class type_caster> {
 private:
     using caster_t = make_caster;
     caster_t subcaster;
-    using subcaster_cast_op_type = typename caster_t::template cast_op_type;
-    static_assert(std::is_same::type &, subcaster_cast_op_type>::value,
-            "std::reference_wrapper caster requires T to have a caster with an `T &` operator");
+    using reference_t = type&;
+    using subcaster_cast_op_type =
+        typename caster_t::template cast_op_type;
+
+    static_assert(std::is_same::type &, subcaster_cast_op_type>::value ||
+                  std::is_same::value,
+                  "std::reference_wrapper caster requires T to have a caster with an "
+                  "`operator T &()` or `operator const T &()`");
 public:
     bool load(handle src, bool convert) { return subcaster.load(src, convert); }
     static constexpr auto name = caster_t::name;
@@ -967,7 +978,7 @@ template  class type_caster> {
         return caster_t::cast(&src.get(), policy, parent);
     }
     template  using cast_op_type = std::reference_wrapper;
-    operator std::reference_wrapper() { return subcaster.operator subcaster_cast_op_type&(); }
+    operator std::reference_wrapper() { return cast_op(subcaster); }
 };
 
 #define PYBIND11_TYPE_CASTER(type, py_name) \
@@ -1000,6 +1011,7 @@ template  using is_std_char_type = any_of<
     std::is_same /* std::wstring */
 >;
 
+
 template 
 struct type_caster::value && !is_std_char_type::value>> {
     using _py_type_0 = conditional_t;
@@ -1013,6 +1025,14 @@ struct type_caster::value && !is_std_char_t
         if (!src)
             return false;
 
+#if !defined(PYPY_VERSION)
+        auto index_check = [](PyObject *o) { return PyIndex_Check(o); };
+#else
+        // In PyPy 7.3.3, `PyIndex_Check` is implemented by calling `__index__`,
+        // while CPython only considers the existence of `nb_index`/`__index__`.
+        auto index_check = [](PyObject *o) { return hasattr(o, "__index__"); };
+#endif
+
         if (std::is_floating_point::value) {
             if (convert || PyFloat_Check(src.ptr()))
                 py_value = (py_type) PyFloat_AsDouble(src.ptr());
@@ -1020,29 +1040,41 @@ struct type_caster::value && !is_std_char_t
                 return false;
         } else if (PyFloat_Check(src.ptr())) {
             return false;
-        } else if (std::is_unsigned::value) {
-            py_value = as_unsigned(src.ptr());
-        } else { // signed integer:
-            py_value = sizeof(T) <= sizeof(long)
-                ? (py_type) PyLong_AsLong(src.ptr())
-                : (py_type) PYBIND11_LONG_AS_LONGLONG(src.ptr());
+        } else if (!convert && !PYBIND11_LONG_CHECK(src.ptr()) && !index_check(src.ptr())) {
+            return false;
+        } else {
+            handle src_or_index = src;
+#if PY_VERSION_HEX < 0x03080000
+            object index;
+            if (!PYBIND11_LONG_CHECK(src.ptr())) {  // So: index_check(src.ptr())
+                index = reinterpret_steal(PyNumber_Index(src.ptr()));
+                if (!index) {
+                    PyErr_Clear();
+                    if (!convert)
+                        return false;
+                }
+                else {
+                    src_or_index = index;
+                }
+            }
+#endif
+            if (std::is_unsigned::value) {
+                py_value = as_unsigned(src_or_index.ptr());
+            } else { // signed integer:
+                py_value = sizeof(T) <= sizeof(long)
+                    ? (py_type) PyLong_AsLong(src_or_index.ptr())
+                    : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr());
+            }
         }
 
+        // Python API reported an error
         bool py_err = py_value == (py_type) -1 && PyErr_Occurred();
 
-        // Protect std::numeric_limits::min/max with parentheses
-        if (py_err || (std::is_integral::value && sizeof(py_type) != sizeof(T) &&
-                       (py_value < (py_type) (std::numeric_limits::min)() ||
-                        py_value > (py_type) (std::numeric_limits::max)()))) {
-            bool type_error = py_err && PyErr_ExceptionMatches(
-#if PY_VERSION_HEX < 0x03000000 && !defined(PYPY_VERSION)
-                PyExc_SystemError
-#else
-                PyExc_TypeError
-#endif
-            );
+        // Check to see if the conversion is valid (integers should match exactly)
+        // Signed/unsigned checks happen elsewhere
+        if (py_err || (std::is_integral::value && sizeof(py_type) != sizeof(T) && py_value != (py_type) (T) py_value)) {
             PyErr_Clear();
-            if (type_error && convert && PyNumber_Check(src.ptr())) {
+            if (py_err && convert && PyNumber_Check(src.ptr())) {
                 auto tmp = reinterpret_steal(std::is_floating_point::value
                                                      ? PyNumber_Float(src.ptr())
                                                      : PyNumber_Long(src.ptr()));
@@ -1123,7 +1155,7 @@ template <> class type_caster : public type_caster {
         }
 
         /* Check if this is a C++ type */
-        auto &bases = all_type_info((PyTypeObject *) h.get_type().ptr());
+        auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr());
         if (bases.size() == 1) { // Only allowing loading from a single-value type
             value = values_and_holders(reinterpret_cast(h.ptr())).begin()->value_ptr();
             return true;
@@ -1233,11 +1265,11 @@ template  struct string_caster {
 #endif
         }
 
-        object utfNbytes = reinterpret_steal(PyUnicode_AsEncodedString(
+        auto utfNbytes = reinterpret_steal(PyUnicode_AsEncodedString(
             load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr));
         if (!utfNbytes) { PyErr_Clear(); return false; }
 
-        const CharT *buffer = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr()));
+        const auto *buffer = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr()));
         size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT);
         if (UTF_N > 8) { buffer++; length--; } // Skip BOM for UTF-16/32
         value = StringType(buffer, length);
@@ -1251,7 +1283,7 @@ template  struct string_caster {
 
     static handle cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) {
         const char *buffer = reinterpret_cast(src.data());
-        ssize_t nbytes = ssize_t(src.size() * sizeof(CharT));
+        auto nbytes = ssize_t(src.size() * sizeof(CharT));
         handle s = decode_utfN(buffer, nbytes);
         if (!s) throw error_already_set();
         return s;
@@ -1267,10 +1299,8 @@ template  struct string_caster {
             UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) :
                           PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr);
 #else
-        // PyPy seems to have multiple problems related to PyUnicode_UTF*: the UTF8 version
-        // sometimes segfaults for unknown reasons, while the UTF16 and 32 versions require a
-        // non-const char * arguments, which is also a nuisance, so bypass the whole thing by just
-        // passing the encoding as a string value, which works properly:
+        // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as well),
+        // so bypass the whole thing by just passing the encoding as a string value, which works properly:
         return PyUnicode_Decode(buffer, nbytes, UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr);
 #endif
     }
@@ -1357,7 +1387,7 @@ template  struct type_caster 1 && str_len <= 4) {
-            unsigned char v0 = static_cast(value[0]);
+            auto v0 = static_cast(value[0]);
             size_t char0_bytes = !(v0 & 0x80) ? 1 : // low bits only: 0-127
                 (v0 & 0xE0) == 0xC0 ? 2 : // 0b110xxxxx - start of 2-byte sequence
                 (v0 & 0xF0) == 0xE0 ? 3 : // 0b1110xxxx - start of 3-byte sequence
@@ -1415,6 +1445,17 @@ template  class Tuple, typename... Ts> class tuple_caster
         return cast_impl(std::forward(src), policy, parent, indices{});
     }
 
+    // copied from the PYBIND11_TYPE_CASTER macro
+    template 
+    static handle cast(T *src, return_value_policy policy, handle parent) {
+        if (!src) return none().release();
+        if (policy == return_value_policy::take_ownership) {
+            auto h = cast(std::move(*src), policy, parent); delete src; return h;
+        } else {
+            return cast(*src, policy, parent);
+        }
+    }
+
     static constexpr auto name = _("Tuple[") + concat(make_caster::name...) + _("]");
 
     template  using cast_op_type = type;
@@ -1492,16 +1533,11 @@ struct copyable_holder_caster : public type_caster_base {
     }
 
     explicit operator type*() { return this->value; }
-    explicit operator type&() { return *(this->value); }
+    // static_cast works around compiler error with MSVC 17 and CUDA 10.2
+    // see issue #2180
+    explicit operator type&() { return *(static_cast(this->value)); }
     explicit operator holder_type*() { return std::addressof(holder); }
-
-    // Workaround for Intel compiler bug
-    // see pybind11 issue 94
-    #if defined(__ICC) || defined(__INTEL_COMPILER)
-    operator holder_type&() { return holder; }
-    #else
     explicit operator holder_type&() { return holder; }
-    #endif
 
     static handle cast(const holder_type &src, return_value_policy, handle) {
         const auto *ptr = holder_helper::get(src);
@@ -1598,6 +1634,10 @@ template  struct is_holder_type struct handle_type_name { static constexpr auto name = _(); };
 template <> struct handle_type_name { static constexpr auto name = _(PYBIND11_BYTES_NAME); };
+template <> struct handle_type_name { static constexpr auto name = _("int"); };
+template <> struct handle_type_name { static constexpr auto name = _("Iterable"); };
+template <> struct handle_type_name { static constexpr auto name = _("Iterator"); };
+template <> struct handle_type_name { static constexpr auto name = _("None"); };
 template <> struct handle_type_name { static constexpr auto name = _("*args"); };
 template <> struct handle_type_name { static constexpr auto name = _("**kwargs"); };
 
@@ -1684,7 +1724,7 @@ template  type_caster &load_type(type_ca
         throw cast_error("Unable to cast Python instance to C++ type (compile in debug mode for details)");
 #else
         throw cast_error("Unable to cast Python instance of type " +
-            (std::string) str(handle.get_type()) + " to C++ type '" + type_id() + "'");
+            (std::string) str(type::handle_of(handle)) + " to C++ type '" + type_id() + "'");
 #endif
     }
     return conv;
@@ -1696,7 +1736,7 @@ template  make_caster load_type(const handle &handle) {
     return conv;
 }
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // pytype -> C++ type
 template ::value, int> = 0>
@@ -1713,13 +1753,16 @@ T cast(const handle &handle) { return T(reinterpret_borrow(handle)); }
 
 // C++ type -> py::object
 template ::value, int> = 0>
-object cast(const T &value, return_value_policy policy = return_value_policy::automatic_reference,
+object cast(T &&value, return_value_policy policy = return_value_policy::automatic_reference,
             handle parent = handle()) {
+    using no_ref_T = typename std::remove_reference::type;
     if (policy == return_value_policy::automatic)
-        policy = std::is_pointer::value ? return_value_policy::take_ownership : return_value_policy::copy;
+        policy = std::is_pointer::value ? return_value_policy::take_ownership :
+                 std::is_lvalue_reference::value ? return_value_policy::copy : return_value_policy::move;
     else if (policy == return_value_policy::automatic_reference)
-        policy = std::is_pointer::value ? return_value_policy::reference : return_value_policy::copy;
-    return reinterpret_steal(detail::make_caster::cast(value, policy, parent));
+        policy = std::is_pointer::value ? return_value_policy::reference :
+                 std::is_lvalue_reference::value ? return_value_policy::copy : return_value_policy::move;
+    return reinterpret_steal(detail::make_caster::cast(std::forward(value), policy, parent));
 }
 
 template  T handle::cast() const { return pybind11::cast(*this); }
@@ -1732,7 +1775,7 @@ detail::enable_if_t::value, T> move(object &&obj) {
         throw cast_error("Unable to cast Python instance to C++ rvalue: instance has multiple references"
             " (compile in debug mode for details)");
 #else
-        throw cast_error("Unable to move from Python " + (std::string) str(obj.get_type()) +
+        throw cast_error("Unable to move from Python " + (std::string) str(type::handle_of(obj)) +
                 " instance to C++ " + type_id() + " instance: instance has multiple references");
 #endif
 
@@ -1741,7 +1784,7 @@ detail::enable_if_t::value, T> move(object &&obj) {
     return ret;
 }
 
-// Calling cast() on an rvalue calls pybind::cast with the object rvalue, which does:
+// Calling cast() on an rvalue calls pybind11::cast with the object rvalue, which does:
 // - If we have to move (because T has no copy constructor), do it.  This will fail if the moved
 //   object has multiple references, but trying to copy will fail to compile.
 // - If both movable and copyable, check ref count: if 1, move; otherwise copy
@@ -1764,22 +1807,22 @@ template  T object::cast() && { return pybind11::cast(std::move(*
 template <> inline void object::cast() const & { return; }
 template <> inline void object::cast() && { return; }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Declared in pytypes.h:
 template ::value, int>>
 object object_or_cast(T &&o) { return pybind11::cast(std::forward(o)); }
 
-struct overload_unused {}; // Placeholder type for the unneeded (and dead code) static variable in the OVERLOAD_INT macro
-template  using overload_caster_t = conditional_t<
-    cast_is_temporary_value_reference::value, make_caster, overload_unused>;
+struct override_unused {}; // Placeholder type for the unneeded (and dead code) static variable in the PYBIND11_OVERRIDE_OVERRIDE macro
+template  using override_caster_t = conditional_t<
+    cast_is_temporary_value_reference::value, make_caster, override_unused>;
 
 // Trampoline use: for reference/pointer types to value-converted values, we do a value cast, then
 // store the result in the given variable.  For other types, this is a no-op.
 template  enable_if_t::value, T> cast_ref(object &&o, make_caster &caster) {
     return cast_op(load_type(caster, o));
 }
-template  enable_if_t::value, T> cast_ref(object &&, overload_unused &) {
+template  enable_if_t::value, T> cast_ref(object &&, override_unused &) {
     pybind11_fail("Internal error: cast_ref fallback invoked"); }
 
 // Trampoline use: Having a pybind11::cast with an invalid reference type is going to static_assert, even
@@ -1791,7 +1834,7 @@ template  enable_if_t::value, T
     pybind11_fail("Internal error: cast_safe fallback invoked"); }
 template <> inline void cast_safe(object &&) {}
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 template 
 tuple make_tuple() { return tuple(0); }
@@ -1852,7 +1895,14 @@ struct arg_v : arg {
 #if !defined(NDEBUG)
         , type(type_id())
 #endif
-    { }
+    {
+        // Workaround! See:
+        // https://github.com/pybind/pybind11/issues/2336
+        // https://github.com/pybind/pybind11/pull/2685#issuecomment-731286700
+        if (PyErr_Occurred()) {
+            PyErr_Clear();
+        }
+    }
 
 public:
     /// Direct construction with name, default, and description
@@ -1881,6 +1931,16 @@ struct arg_v : arg {
 #endif
 };
 
+/// \ingroup annotations
+/// Annotation indicating that all following arguments are keyword-only; the is the equivalent of an
+/// unnamed '*' argument (in Python 3)
+struct kw_only {};
+
+/// \ingroup annotations
+/// Annotation indicating that all previous arguments are positional-only; the is the equivalent of an
+/// unnamed '/' argument (in Python 3.8)
+struct pos_only {};
+
 template 
 arg_v arg::operator=(T &&value) const { return {std::move(*this), std::forward(value)}; }
 
@@ -1892,9 +1952,9 @@ inline namespace literals {
     String literal version of `arg`
  \endrst */
 constexpr arg operator"" _a(const char *name, size_t) { return arg(name); }
-}
+} // namespace literals
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // forward declaration (definition in attr.h)
 struct function_record;
@@ -2055,7 +2115,7 @@ class unpacking_collector {
     }
 
     void process(list &args_list, detail::args_proxy ap) {
-        for (const auto &a : ap)
+        for (auto a : ap)
             args_list.append(a);
     }
 
@@ -2087,7 +2147,7 @@ class unpacking_collector {
     void process(list &/*args_list*/, detail::kwargs_proxy kp) {
         if (!kp)
             return;
-        for (const auto &k : reinterpret_borrow(kp)) {
+        for (auto k : reinterpret_borrow(kp)) {
             if (m_kwargs.contains(k.first)) {
 #if defined(NDEBUG)
                 multiple_values_error();
@@ -2132,16 +2192,26 @@ class unpacking_collector {
     dict m_kwargs;
 };
 
+// [workaround(intel)] Separate function required here
+// We need to put this into a separate function because the Intel compiler
+// fails to compile enable_if_t...>::value>
+// (tested with ICC 2021.1 Beta 20200827).
+template 
+constexpr bool args_are_all_positional()
+{
+  return all_of...>::value;
+}
+
 /// Collect only positional arguments for a Python function call
 template ...>::value>>
+          typename = enable_if_t()>>
 simple_collector collect_arguments(Args &&...args) {
     return simple_collector(std::forward(args)...);
 }
 
 /// Collect all arguments, including keywords and unpacking (only instantiated when needed)
 template ...>::value>>
+          typename = enable_if_t()>>
 unpacking_collector collect_arguments(Args &&...args) {
     // Following argument order rules for generalized unpacking according to PEP 448
     static_assert(
@@ -2165,7 +2235,19 @@ object object_api::call(Args &&...args) const {
     return operator()(std::forward(args)...);
 }
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
+
+
+template
+handle type::handle_of() {
+   static_assert(
+      std::is_base_of>::value,
+      "py::type::of only supports the case where T is a registered C++ types."
+    );
+
+    return detail::get_type_handle(typeid(T), true);
+}
+
 
 #define PYBIND11_MAKE_OPAQUE(...) \
     namespace pybind11 { namespace detail { \
@@ -2173,7 +2255,7 @@ NAMESPACE_END(detail)
     }}
 
 /// Lets you pass a type containing a `,` through a macro parameter without needing a separate
-/// typedef, e.g.: `PYBIND11_OVERLOAD(PYBIND11_TYPE(ReturnType), PYBIND11_TYPE(Parent), f, arg)`
+/// typedef, e.g.: `PYBIND11_OVERRIDE(PYBIND11_TYPE(ReturnType), PYBIND11_TYPE(Parent), f, arg)`
 #define PYBIND11_TYPE(...) __VA_ARGS__
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/chrono.h b/libs/pybind11/chrono.h
index ea777e6..c368110 100644
--- a/libs/pybind11/chrono.h
+++ b/libs/pybind11/chrono.h
@@ -27,15 +27,15 @@
 #define PyDateTime_DELTA_GET_MICROSECONDS(o) (((PyDateTime_Delta*)o)->microseconds)
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template  class duration_caster {
 public:
-    typedef typename type::rep rep;
-    typedef typename type::period period;
+    using rep = typename type::rep;
+    using period = typename type::period;
 
-    typedef std::chrono::duration> days;
+    using days = std::chrono::duration>;
 
     bool load(handle src, bool) {
         using namespace std::chrono;
@@ -98,7 +98,7 @@ template  class duration_caster {
 // This is for casting times on the system clock into datetime.datetime instances
 template  class type_caster> {
 public:
-    typedef std::chrono::time_point type;
+    using type = std::chrono::time_point;
     bool load(handle src, bool) {
         using namespace std::chrono;
 
@@ -140,7 +140,7 @@ template  class type_caster(system_clock::from_time_t(std::mktime(&cal)) + msecs);
         return true;
     }
 
@@ -150,21 +150,28 @@ template  class type_caster(src));
+        // Get out microseconds, and make sure they are positive, to avoid bug in eastern hemisphere time zones
+        // (cfr. https://github.com/pybind/pybind11/issues/2417)
+        using us_t = duration;
+        auto us = duration_cast(src.time_since_epoch() % seconds(1));
+        if (us.count() < 0)
+            us += seconds(1);
+
+        // Subtract microseconds BEFORE `system_clock::to_time_t`, because:
+        // > If std::time_t has lower precision, it is implementation-defined whether the value is rounded or truncated.
+        // (https://en.cppreference.com/w/cpp/chrono/system_clock/to_time_t)
+        std::time_t tt = system_clock::to_time_t(time_point_cast(src - us));
         // this function uses static memory so it's best to copy it out asap just in case
         // otherwise other code that is using localtime may break this (not just python code)
         std::tm localtime = *std::localtime(&tt);
 
-        // Declare these special duration types so the conversions happen with the correct primitive types (int)
-        using us_t = duration;
-
         return PyDateTime_FromDateAndTime(localtime.tm_year + 1900,
                                           localtime.tm_mon + 1,
                                           localtime.tm_mday,
                                           localtime.tm_hour,
                                           localtime.tm_min,
                                           localtime.tm_sec,
-                                          (duration_cast(src.time_since_epoch() % seconds(1))).count());
+                                          us.count());
     }
     PYBIND11_TYPE_CASTER(type, _("datetime.datetime"));
 };
@@ -180,5 +187,5 @@ template  class type_caster> {
 };
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/complex.h b/libs/pybind11/complex.h
index 3f89638..f8327eb 100644
--- a/libs/pybind11/complex.h
+++ b/libs/pybind11/complex.h
@@ -17,7 +17,7 @@
 #  undef I
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 template  struct format_descriptor, detail::enable_if_t::value>> {
     static constexpr const char c = format_descriptor::c;
@@ -32,7 +32,7 @@ template  constexpr const char format_descriptor<
 
 #endif
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template  struct is_fmt_numeric, detail::enable_if_t::value>> {
     static constexpr bool value = true;
@@ -61,5 +61,5 @@ template  class type_caster> {
 
     PYBIND11_TYPE_CASTER(std::complex, _("complex"));
 };
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/detail/class.h b/libs/pybind11/detail/class.h
index edfa7de..2f414e5 100644
--- a/libs/pybind11/detail/class.h
+++ b/libs/pybind11/detail/class.h
@@ -12,10 +12,10 @@
 #include "../attr.h"
 #include "../options.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
-#if PY_VERSION_HEX >= 0x03030000
+#if PY_VERSION_HEX >= 0x03030000 && !defined(PYPY_VERSION)
 #  define PYBIND11_BUILTIN_QUALNAME
 #  define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj)
 #else
@@ -24,6 +24,18 @@ NAMESPACE_BEGIN(detail)
 #  define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj) setattr((PyObject *) obj, "__qualname__", nameobj)
 #endif
 
+inline std::string get_fully_qualified_tp_name(PyTypeObject *type) {
+#if !defined(PYPY_VERSION)
+    return type->tp_name;
+#else
+    auto module_name = handle((PyObject *) type).attr("__module__").cast();
+    if (module_name == PYBIND11_BUILTINS_MODULE)
+        return type->tp_name;
+    else
+        return std::move(module_name) + "." + type->tp_name;
+#endif
+}
+
 inline PyTypeObject *type_incref(PyTypeObject *type) {
     Py_INCREF(type);
     return type;
@@ -117,7 +129,7 @@ extern "C" inline int pybind11_meta_setattro(PyObject* obj, PyObject* name, PyOb
     //   2. `Type.static_prop = other_static_prop` --> setattro:  replace existing `static_prop`
     //   3. `Type.regular_attribute = value`       --> setattro:  regular attribute assignment
     const auto static_prop = (PyObject *) get_internals().static_property_type;
-    const auto call_descr_set = descr && PyObject_IsInstance(descr, static_prop)
+    const auto call_descr_set = descr && value && PyObject_IsInstance(descr, static_prop)
                                 && !PyObject_IsInstance(value, static_prop);
     if (call_descr_set) {
         // Call `static_property.__set__()` instead of replacing the `static_property`.
@@ -156,6 +168,69 @@ extern "C" inline PyObject *pybind11_meta_getattro(PyObject *obj, PyObject *name
 }
 #endif
 
+/// metaclass `__call__` function that is used to create all pybind11 objects.
+extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, PyObject *kwargs) {
+
+    // use the default metaclass call to create/initialize the object
+    PyObject *self = PyType_Type.tp_call(type, args, kwargs);
+    if (self == nullptr) {
+        return nullptr;
+    }
+
+    // This must be a pybind11 instance
+    auto instance = reinterpret_cast(self);
+
+    // Ensure that the base __init__ function(s) were called
+    for (const auto &vh : values_and_holders(instance)) {
+        if (!vh.holder_constructed()) {
+            PyErr_Format(PyExc_TypeError, "%.200s.__init__() must be called when overriding __init__",
+                         get_fully_qualified_tp_name(vh.type->type).c_str());
+            Py_DECREF(self);
+            return nullptr;
+        }
+    }
+
+    return self;
+}
+
+/// Cleanup the type-info for a pybind11-registered type.
+extern "C" inline void pybind11_meta_dealloc(PyObject *obj) {
+    auto *type = (PyTypeObject *) obj;
+    auto &internals = get_internals();
+
+    // A pybind11-registered type will:
+    // 1) be found in internals.registered_types_py
+    // 2) have exactly one associated `detail::type_info`
+    auto found_type = internals.registered_types_py.find(type);
+    if (found_type != internals.registered_types_py.end() &&
+        found_type->second.size() == 1 &&
+        found_type->second[0]->type == type) {
+
+        auto *tinfo = found_type->second[0];
+        auto tindex = std::type_index(*tinfo->cpptype);
+        internals.direct_conversions.erase(tindex);
+
+        if (tinfo->module_local)
+            registered_local_types_cpp().erase(tindex);
+        else
+            internals.registered_types_cpp.erase(tindex);
+        internals.registered_types_py.erase(tinfo->type);
+
+        // Actually just `std::erase_if`, but that's only available in C++20
+        auto &cache = internals.inactive_override_cache;
+        for (auto it = cache.begin(), last = cache.end(); it != last; ) {
+            if (it->first == (PyObject *) tinfo->type)
+                it = cache.erase(it);
+            else
+                ++it;
+        }
+
+        delete tinfo;
+    }
+
+    PyType_Type.tp_dealloc(obj);
+}
+
 /** This metaclass is assigned by default to all pybind11 types and is required in order
     for static properties to function correctly. Users may override this using `py::metaclass`.
     Return value: New reference. */
@@ -181,11 +256,15 @@ inline PyTypeObject* make_default_metaclass() {
     type->tp_base = type_incref(&PyType_Type);
     type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE;
 
+    type->tp_call = pybind11_meta_call;
+
     type->tp_setattro = pybind11_meta_setattro;
 #if PY_MAJOR_VERSION >= 3
     type->tp_getattro = pybind11_meta_getattro;
 #endif
 
+    type->tp_dealloc = pybind11_meta_dealloc;
+
     if (PyType_Ready(type) < 0)
         pybind11_fail("make_default_metaclass(): failure in PyType_Ready()!");
 
@@ -223,7 +302,7 @@ inline bool deregister_instance_impl(void *ptr, instance *self) {
     auto ®istered_instances = get_internals().registered_instances;
     auto range = registered_instances.equal_range(ptr);
     for (auto it = range.first; it != range.second; ++it) {
-        if (Py_TYPE(self) == Py_TYPE(it->second)) {
+        if (self == it->second) {
             registered_instances.erase(it);
             return true;
         }
@@ -261,8 +340,6 @@ inline PyObject *make_new_instance(PyTypeObject *type) {
     // Allocate the value/holder internals:
     inst->allocate_layout();
 
-    inst->owned = true;
-
     return self;
 }
 
@@ -277,12 +354,7 @@ extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *,
 /// following default function will be used which simply throws an exception.
 extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject *) {
     PyTypeObject *type = Py_TYPE(self);
-    std::string msg;
-#if defined(PYPY_VERSION)
-    msg += handle((PyObject *) type).attr("__module__").cast() + ".";
-#endif
-    msg += type->tp_name;
-    msg += ": No constructor defined!";
+    std::string msg = get_fully_qualified_tp_name(type) + ": No constructor defined!";
     PyErr_SetString(PyExc_TypeError, msg.c_str());
     return -1;
 }
@@ -421,7 +493,7 @@ extern "C" inline PyObject *pybind11_get_dict(PyObject *self, void *) {
 extern "C" inline int pybind11_set_dict(PyObject *self, PyObject *new_dict, void *) {
     if (!PyDict_Check(new_dict)) {
         PyErr_Format(PyExc_TypeError, "__dict__ must be set to a dictionary, not a '%.200s'",
-                     Py_TYPE(new_dict)->tp_name);
+                     get_fully_qualified_tp_name(Py_TYPE(new_dict)).c_str());
         return -1;
     }
     PyObject *&dict = *_PyObject_GetDictPtr(self);
@@ -448,11 +520,6 @@ extern "C" inline int pybind11_clear(PyObject *self) {
 /// Give instances of this type a `__dict__` and opt into garbage collection.
 inline void enable_dynamic_attributes(PyHeapTypeObject *heap_type) {
     auto type = &heap_type->ht_type;
-#if defined(PYPY_VERSION)
-    pybind11_fail(std::string(type->tp_name) + ": dynamic attributes are "
-                                               "currently not supported in "
-                                               "conjunction with PyPy!");
-#endif
     type->tp_flags |= Py_TPFLAGS_HAVE_GC;
     type->tp_dictoffset = type->tp_basicsize; // place dict at the end
     type->tp_basicsize += (ssize_t)sizeof(PyObject *); // and allocate enough space for it
@@ -483,6 +550,12 @@ extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int fla
     }
     std::memset(view, 0, sizeof(Py_buffer));
     buffer_info *info = tinfo->get_buffer(obj, tinfo->get_buffer_data);
+    if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE && info->readonly) {
+        delete info;
+        // view->obj = nullptr;  // Was just memset to 0, so not necessary
+        PyErr_SetString(PyExc_BufferError, "Writable buffer requested for readonly storage");
+        return -1;
+    }
     view->obj = obj;
     view->ndim = 1;
     view->internal = info;
@@ -492,12 +565,6 @@ extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int fla
     for (auto s : info->shape)
         view->len *= s;
     view->readonly = info->readonly;
-    if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE && info->readonly) {
-        if (view)
-            view->obj = nullptr;
-        PyErr_SetString(PyExc_BufferError, "Writable buffer requested for readonly storage");
-        return -1;
-    }
     if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
         view->format = const_cast(info->format.c_str());
     if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) {
@@ -540,17 +607,17 @@ inline PyObject* make_new_python_type(const type_record &rec) {
 #endif
     }
 
-    object module;
+    object module_;
     if (rec.scope) {
         if (hasattr(rec.scope, "__module__"))
-            module = rec.scope.attr("__module__");
+            module_ = rec.scope.attr("__module__");
         else if (hasattr(rec.scope, "__name__"))
-            module = rec.scope.attr("__name__");
+            module_ = rec.scope.attr("__name__");
     }
 
     auto full_name = c_str(
 #if !defined(PYPY_VERSION)
-        module ? str(module).cast() + "." + rec.name :
+        module_ ? str(module_).cast() + "." + rec.name :
 #endif
         rec.name);
 
@@ -565,7 +632,7 @@ inline PyObject* make_new_python_type(const type_record &rec) {
 
     auto &internals = get_internals();
     auto bases = tuple(rec.bases);
-    auto base = (bases.size() == 0) ? internals.instance_base
+    auto base = (bases.empty()) ? internals.instance_base
                                     : bases[0].ptr();
 
     /* Danger zone: from now (and until PyType_Ready), make sure to
@@ -589,7 +656,7 @@ inline PyObject* make_new_python_type(const type_record &rec) {
     type->tp_doc = tp_doc;
     type->tp_base = type_incref((PyTypeObject *)base);
     type->tp_basicsize = static_cast(sizeof(instance));
-    if (bases.size() > 0)
+    if (!bases.empty())
         type->tp_bases = bases.release().ptr();
 
     /* Don't inherit base __init__ */
@@ -604,10 +671,12 @@ inline PyObject* make_new_python_type(const type_record &rec) {
 #endif
 
     /* Flags */
-    type->tp_flags |= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE;
+    type->tp_flags |= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE;
 #if PY_MAJOR_VERSION < 3
     type->tp_flags |= Py_TPFLAGS_CHECKTYPES;
 #endif
+    if (!rec.is_final)
+        type->tp_flags |= Py_TPFLAGS_BASETYPE;
 
     if (rec.dynamic_attr)
         enable_dynamic_attributes(heap_type);
@@ -627,13 +696,13 @@ inline PyObject* make_new_python_type(const type_record &rec) {
     else
         Py_INCREF(type); // Keep it alive forever (reference leak)
 
-    if (module) // Needed by pydoc
-        setattr((PyObject *) type, "__module__", module);
+    if (module_) // Needed by pydoc
+        setattr((PyObject *) type, "__module__", module_);
 
     PYBIND11_SET_OLDPY_QUALNAME(type, qualname);
 
     return (PyObject *) type;
 }
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/detail/common.h b/libs/pybind11/detail/common.h
index 362421d..de495e4 100644
--- a/libs/pybind11/detail/common.h
+++ b/libs/pybind11/detail/common.h
@@ -9,12 +9,12 @@
 
 #pragma once
 
-#if !defined(NAMESPACE_BEGIN)
-#  define NAMESPACE_BEGIN(name) namespace name {
-#endif
-#if !defined(NAMESPACE_END)
-#  define NAMESPACE_END(name) }
-#endif
+#define PYBIND11_VERSION_MAJOR 2
+#define PYBIND11_VERSION_MINOR 6
+#define PYBIND11_VERSION_PATCH 2
+
+#define PYBIND11_NAMESPACE_BEGIN(name) namespace name {
+#define PYBIND11_NAMESPACE_END(name) }
 
 // Robust support for some features and loading modules compiled against different pybind versions
 // requires forcing hidden visibility on pybind code, so we enforce this by setting the attribute on
@@ -27,7 +27,7 @@
 #  endif
 #endif
 
-#if !(defined(_MSC_VER) && __cplusplus == 199711L) && !defined(__INTEL_COMPILER)
+#if !(defined(_MSC_VER) && __cplusplus == 199711L)
 #  if __cplusplus >= 201402L
 #    define PYBIND11_CPP14
 #    if __cplusplus >= 201703L
@@ -47,8 +47,10 @@
 
 // Compiler version assertions
 #if defined(__INTEL_COMPILER)
-#  if __INTEL_COMPILER < 1700
-#    error pybind11 requires Intel C++ compiler v17 or newer
+#  if __INTEL_COMPILER < 1800
+#    error pybind11 requires Intel C++ compiler v18 or newer
+#  elif __INTEL_COMPILER < 1900 && defined(PYBIND11_CPP14)
+#    error pybind11 supports only C++11 with Intel C++ compiler v18. Use v19 or newer for C++14.
 #  endif
 #elif defined(__clang__) && !defined(__apple_build_version__)
 #  if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 3)
@@ -92,9 +94,19 @@
 #  define PYBIND11_DEPRECATED(reason) __attribute__((deprecated(reason)))
 #endif
 
-#define PYBIND11_VERSION_MAJOR 2
-#define PYBIND11_VERSION_MINOR 4
-#define PYBIND11_VERSION_PATCH dev4
+#if defined(PYBIND11_CPP17)
+#  define PYBIND11_MAYBE_UNUSED [[maybe_unused]]
+#elif defined(_MSC_VER) && !defined(__clang__)
+#  define PYBIND11_MAYBE_UNUSED
+#else
+#  define PYBIND11_MAYBE_UNUSED __attribute__ ((__unused__))
+#endif
+
+/* Don't let Python.h #define (v)snprintf as macro because they are implemented
+   properly in Visual Studio since 2015. */
+#if defined(_MSC_VER) && _MSC_VER >= 1900
+#  define HAVE_SNPRINTF 1
+#endif
 
 /// Include Python header, disable linking to pythonX_d.lib on Windows in debug mode
 #if defined(_MSC_VER)
@@ -144,6 +156,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -171,9 +184,11 @@
 #define PYBIND11_STR_TYPE ::pybind11::str
 #define PYBIND11_BOOL_ATTR "__bool__"
 #define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_bool)
-// Providing a separate declaration to make Clang's -Wmissing-prototypes happy
+#define PYBIND11_BUILTINS_MODULE "builtins"
+// Providing a separate declaration to make Clang's -Wmissing-prototypes happy.
+// See comment for PYBIND11_MODULE below for why this is marked "maybe unused".
 #define PYBIND11_PLUGIN_IMPL(name) \
-    extern "C" PYBIND11_EXPORT PyObject *PyInit_##name();   \
+    extern "C" PYBIND11_MAYBE_UNUSED PYBIND11_EXPORT PyObject *PyInit_##name(); \
     extern "C" PYBIND11_EXPORT PyObject *PyInit_##name()
 
 #else
@@ -197,13 +212,15 @@
 #define PYBIND11_STR_TYPE ::pybind11::bytes
 #define PYBIND11_BOOL_ATTR "__nonzero__"
 #define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_nonzero)
-// Providing a separate PyInit decl to make Clang's -Wmissing-prototypes happy
+#define PYBIND11_BUILTINS_MODULE "__builtin__"
+// Providing a separate PyInit decl to make Clang's -Wmissing-prototypes happy.
+// See comment for PYBIND11_MODULE below for why this is marked "maybe unused".
 #define PYBIND11_PLUGIN_IMPL(name) \
-    static PyObject *pybind11_init_wrapper();               \
-    extern "C" PYBIND11_EXPORT void init##name();           \
-    extern "C" PYBIND11_EXPORT void init##name() {          \
-        (void)pybind11_init_wrapper();                      \
-    }                                                       \
+    static PyObject *pybind11_init_wrapper();                           \
+    extern "C" PYBIND11_MAYBE_UNUSED PYBIND11_EXPORT void init##name(); \
+    extern "C" PYBIND11_EXPORT void init##name() {                      \
+        (void)pybind11_init_wrapper();                                  \
+    }                                                                   \
     PyObject *pybind11_init_wrapper()
 #endif
 
@@ -250,13 +267,13 @@ extern "C" {
     ***Deprecated in favor of PYBIND11_MODULE***
 
     This macro creates the entry point that will be invoked when the Python interpreter
-    imports a plugin library. Please create a `module` in the function body and return
+    imports a plugin library. Please create a `module_` in the function body and return
     the pointer to its underlying Python object at the end.
 
     .. code-block:: cpp
 
         PYBIND11_PLUGIN(example) {
-            pybind11::module m("example", "pybind11 example plugin");
+            pybind11::module_ m("example", "pybind11 example plugin");
             /// Set up bindings here
             return m.ptr();
         }
@@ -277,7 +294,11 @@ extern "C" {
     This macro creates the entry point that will be invoked when the Python interpreter
     imports an extension module. The module name is given as the fist argument and it
     should not be in quotes. The second macro argument defines a variable of type
-    `py::module` which can be used to initialize the module.
+    `py::module_` which can be used to initialize the module.
+
+    The entry point is marked as "maybe unused" to aid dead-code detection analysis:
+    since the entry point is typically only looked up at runtime and not referenced
+    during translation, it would otherwise appear as unused ("dead") code.
 
     .. code-block:: cpp
 
@@ -291,20 +312,25 @@ extern "C" {
         }
 \endrst */
 #define PYBIND11_MODULE(name, variable)                                        \
-    static void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &);     \
+    static ::pybind11::module_::module_def                                     \
+        PYBIND11_CONCAT(pybind11_module_def_, name) PYBIND11_MAYBE_UNUSED;     \
+    PYBIND11_MAYBE_UNUSED                                                      \
+    static void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &);  \
     PYBIND11_PLUGIN_IMPL(name) {                                               \
         PYBIND11_CHECK_PYTHON_VERSION                                          \
         PYBIND11_ENSURE_INTERNALS_READY                                        \
-        auto m = pybind11::module(PYBIND11_TOSTRING(name));                    \
+        auto m = ::pybind11::module_::create_extension_module(                 \
+            PYBIND11_TOSTRING(name), nullptr,                                  \
+            &PYBIND11_CONCAT(pybind11_module_def_, name));                     \
         try {                                                                  \
             PYBIND11_CONCAT(pybind11_init_, name)(m);                          \
             return m.ptr();                                                    \
         } PYBIND11_CATCH_INIT_EXCEPTIONS                                       \
     }                                                                          \
-    void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &variable)
+    void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &variable)
 
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 using ssize_t = Py_ssize_t;
 using size_t  = std::size_t;
@@ -361,7 +387,7 @@ enum class return_value_policy : uint8_t {
     reference_internal
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 inline static constexpr int log2(size_t n, int k = 0) { return (n <= 1) ? k : log2(n >> 1, k + 1); }
 
@@ -470,7 +496,7 @@ using std::make_index_sequence;
 #else
 template struct index_sequence  { };
 template struct make_index_sequence_impl : make_index_sequence_impl  { };
-template struct make_index_sequence_impl <0, S...> { typedef index_sequence type; };
+template struct make_index_sequence_impl <0, S...> { using type = index_sequence; };
 template using make_index_sequence = typename make_index_sequence_impl::type;
 #endif
 
@@ -484,8 +510,16 @@ template  using select_indices = typename select_indices_impl using bool_constant = std::integral_constant;
 template  struct negation : bool_constant { };
 
+// PGI/Intel cannot detect operator delete with the "compatible" void_t impl, so
+// using the new one (C++14 defect, so generally works on newer compilers, even
+// if not in C++17 mode)
+#if defined(__PGIC__) || defined(__INTEL_COMPILER)
+template using void_t = void;
+#else
 template  struct void_t_impl { using type = void; };
 template  using void_t = typename void_t_impl::type;
+#endif
+
 
 /// Compile-time all/any/none of that check the boolean value of all template types
 #if defined(__cpp_fold_expressions) && !(defined(_MSC_VER) && (_MSC_VER < 1916))
@@ -511,17 +545,17 @@ template  class... Predicates> using satisfies_none_of
 
 /// Strip the class from a method type
 template  struct remove_class { };
-template  struct remove_class { typedef R type(A...); };
-template  struct remove_class { typedef R type(A...); };
+template  struct remove_class { using type = R (A...); };
+template  struct remove_class { using type = R (A...); };
 
 /// Helper template to strip away type modifiers
-template  struct intrinsic_type                       { typedef T type; };
-template  struct intrinsic_type              { typedef typename intrinsic_type::type type; };
-template  struct intrinsic_type                   { typedef typename intrinsic_type::type type; };
-template  struct intrinsic_type                   { typedef typename intrinsic_type::type type; };
-template  struct intrinsic_type                  { typedef typename intrinsic_type::type type; };
-template  struct intrinsic_type { typedef typename intrinsic_type::type type; };
-template  struct intrinsic_type       { typedef typename intrinsic_type::type type; };
+template  struct intrinsic_type                       { using type = T; };
+template  struct intrinsic_type              { using type = typename intrinsic_type::type; };
+template  struct intrinsic_type                   { using type = typename intrinsic_type::type; };
+template  struct intrinsic_type                   { using type = typename intrinsic_type::type; };
+template  struct intrinsic_type                  { using type = typename intrinsic_type::type; };
+template  struct intrinsic_type { using type = typename intrinsic_type::type; };
+template  struct intrinsic_type       { using type = typename intrinsic_type::type; };
 template  using intrinsic_t = typename intrinsic_type::type;
 
 /// Helper type to replace 'void' in some expressions
@@ -539,7 +573,7 @@ template 
 constexpr size_t constexpr_sum(T n, Ts... ns) { return size_t{n} + constexpr_sum(ns...); }
 #endif
 
-NAMESPACE_BEGIN(constexpr_impl)
+PYBIND11_NAMESPACE_BEGIN(constexpr_impl)
 /// Implementation details for constexpr functions
 constexpr int first(int i) { return i; }
 template 
@@ -548,7 +582,7 @@ constexpr int first(int i, T v, Ts... vs) { return v ? i : first(i + 1, vs...);
 constexpr int last(int /*i*/, int result) { return result; }
 template 
 constexpr int last(int i, int result, T v, Ts... vs) { return last(i + 1, v ? i : result, vs...); }
-NAMESPACE_END(constexpr_impl)
+PYBIND11_NAMESPACE_END(constexpr_impl)
 
 /// Return the index of the first type in Ts which satisfies Predicate.  Returns sizeof...(Ts) if
 /// none match.
@@ -592,8 +626,9 @@ template  using is_strict_base_of = bool_consta
 
 /// Like is_base_of, but also requires that the base type is accessible (i.e. that a Derived pointer
 /// can be converted to a Base pointer)
+/// For unions, `is_base_of::value` is False, so we need to check `is_same` as well.
 template  using is_accessible_base_of = bool_constant<
-    std::is_base_of::value && std::is_convertible::value>;
+    (std::is_same::value || std::is_base_of::value) && std::is_convertible::value>;
 
 template  class Base>
 struct is_template_base_of_impl {
@@ -630,6 +665,10 @@ template  using is_function_pointer = bool_constant<
     std::is_pointer::value && std::is_function::type>::value>;
 
 template  struct strip_function_object {
+    // If you are encountering an
+    // 'error: name followed by "::" must be a class or namespace name'
+    // with the Intel compiler and a noexcept function here,
+    // try to use noexcept(true) instead of plain noexcept.
     using type = typename remove_class::type;
 };
 
@@ -654,15 +693,17 @@ template  using is_lambda = satisfies_none_of,
 /// Ignore that a variable is unused in compiler warnings
 inline void ignore_unused(const int *) { }
 
+// [workaround(intel)] Internal error on fold expression
 /// Apply a function over each element of a parameter pack
-#ifdef __cpp_fold_expressions
+#if defined(__cpp_fold_expressions) && !defined(__INTEL_COMPILER)
+// Intel compiler produces an internal error on this fold expression (tested with ICC 19.0.2)
 #define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (((PATTERN), void()), ...)
 #else
 using expand_side_effects = bool[];
-#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) pybind11::detail::expand_side_effects{ ((PATTERN), void(), false)..., false }
+#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (void)pybind11::detail::expand_side_effects{ ((PATTERN), void(), false)..., false }
 #endif
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// C++ bindings of builtin Python exceptions
 class builtin_exception : public std::runtime_error {
@@ -694,7 +735,7 @@ PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used in
 
 template  struct format_descriptor { };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 // Returns the index of the given type in the type char array below, and in the list in numpy.h
 // The order here is: bool; 8 ints ((signed,unsigned)x(8,16,32,64)bits); float,double,long double;
 // complex float,double,long double.  Note that the long double types only participate when long
@@ -707,7 +748,7 @@ template  struct is_fmt_numeric
         std::is_integral::value ? detail::log2(sizeof(T))*2 + std::is_unsigned::value : 8 + (
         std::is_same::value ? 1 : std::is_same::value ? 2 : 0));
 };
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 template  struct format_descriptor::value>> {
     static constexpr const char c = "?bBhHiIqQfdg"[detail::is_fmt_numeric::index];
@@ -732,10 +773,10 @@ struct error_scope {
 /// Dummy destructor wrapper that can be used to expose classes with a private destructor
 struct nodelete { template  void operator()(T*) { } };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 template 
 struct overload_cast_impl {
-    constexpr overload_cast_impl() {} // MSVC 2015 needs this
+    constexpr overload_cast_impl() {}; // NOLINT(modernize-use-equals-default):  MSVC 2015 needs this
 
     template 
     constexpr auto operator()(Return (*pf)(Args...)) const noexcept
@@ -749,7 +790,7 @@ struct overload_cast_impl {
     constexpr auto operator()(Return (Class::*pmf)(Args...) const, std::true_type) const noexcept
                               -> decltype(pmf) { return pmf; }
 };
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // overload_cast requires variable templates: C++14
 #if defined(PYBIND11_CPP14)
@@ -774,7 +815,7 @@ template  struct overload_cast {
 };
 #endif // overload_cast
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Adaptor for converting arbitrary container arguments into a vector; implicitly convertible from
 // any standard container (or C-style array) supporting std::begin/std::end, any singleton
@@ -813,8 +854,8 @@ class any_container {
     const std::vector *operator->() const { return &v; }
 };
 
-NAMESPACE_END(detail)
-
-
+// Forward-declaration; see detail/class.h
+std::string get_fully_qualified_tp_name(PyTypeObject*);
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/detail/descr.h b/libs/pybind11/detail/descr.h
index 8d404e5..92720cd 100644
--- a/libs/pybind11/detail/descr.h
+++ b/libs/pybind11/detail/descr.h
@@ -11,8 +11,8 @@
 
 #include "common.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 #if !defined(_MSC_VER)
 #  define PYBIND11_DESCR_CONSTEXPR static constexpr
@@ -96,5 +96,5 @@ constexpr descr type_descr(const descr &descr) {
     return _("{") + descr + _("}");
 }
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/detail/init.h b/libs/pybind11/detail/init.h
index acfe00b..3ef78c1 100644
--- a/libs/pybind11/detail/init.h
+++ b/libs/pybind11/detail/init.h
@@ -11,8 +11,8 @@
 
 #include "class.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template <>
 class type_caster {
@@ -30,7 +30,7 @@ class type_caster {
     value_and_holder *value = nullptr;
 };
 
-NAMESPACE_BEGIN(initimpl)
+PYBIND11_NAMESPACE_BEGIN(initimpl)
 
 inline void no_nullptr(void *ptr) {
     if (!ptr) throw type_error("pybind11::init(): factory function returned nullptr");
@@ -132,6 +132,7 @@ void construct(value_and_holder &v_h, Alias *alias_ptr, bool) {
 template 
 void construct(value_and_holder &v_h, Holder holder, bool need_alias) {
     auto *ptr = holder_helper>::get(holder);
+    no_nullptr(ptr);
     // If we need an alias, check that the held pointer is actually an alias instance
     if (Class::has_alias && need_alias && !is_alias(ptr))
         throw type_error("pybind11::init(): construction failed: returned holder-wrapped instance "
@@ -330,6 +331,6 @@ struct pickle_factory {
     }
 };
 
-NAMESPACE_END(initimpl)
-NAMESPACE_END(detail)
-NAMESPACE_END(pybind11)
+PYBIND11_NAMESPACE_END(initimpl)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(pybind11)
diff --git a/libs/pybind11/detail/internals.h b/libs/pybind11/detail/internals.h
index 6224dfb..75fcd3c 100644
--- a/libs/pybind11/detail/internals.h
+++ b/libs/pybind11/detail/internals.h
@@ -11,8 +11,8 @@
 
 #include "../pytypes.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 // Forward declarations
 inline PyTypeObject *make_static_property_type();
 inline PyTypeObject *make_default_metaclass();
@@ -82,10 +82,10 @@ struct type_equal_to {
 template 
 using type_map = std::unordered_map;
 
-struct overload_hash {
+struct override_hash {
     inline size_t operator()(const std::pair& v) const {
         size_t value = std::hash()(v.first);
-        value ^= std::hash()(v.second)  + 0x9e3779b9 + (value<<6) + (value>>2);
+        value ^= std::hash()(v.second) + 0x9e3779b9 + (value<<6) + (value>>2);
         return value;
     }
 };
@@ -97,7 +97,7 @@ struct internals {
     type_map registered_types_cpp; // std::type_index -> pybind11's type information
     std::unordered_map> registered_types_py; // PyTypeObject* -> base type_info(s)
     std::unordered_multimap registered_instances; // void * -> instance*
-    std::unordered_set, overload_hash> inactive_overload_cache;
+    std::unordered_set, override_hash> inactive_override_cache;
     type_map> direct_conversions;
     std::unordered_map> patients;
     std::forward_list registered_exception_translators;
@@ -112,7 +112,7 @@ struct internals {
     PyInterpreterState *istate = nullptr;
     ~internals() {
         // This destructor is called *after* Py_Finalize() in finalize_interpreter().
-        // That *SHOULD BE* fine. The following details what happens whe PyThread_tss_free is called.
+        // That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is called.
         // PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does nothing.
         // PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree.
         // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). Neither
@@ -154,49 +154,60 @@ struct type_info {
 
 /// On MSVC, debug and release builds are not ABI-compatible!
 #if defined(_MSC_VER) && defined(_DEBUG)
-#   define PYBIND11_BUILD_TYPE "_debug"
+#  define PYBIND11_BUILD_TYPE "_debug"
 #else
-#   define PYBIND11_BUILD_TYPE ""
+#  define PYBIND11_BUILD_TYPE ""
 #endif
 
 /// Let's assume that different compilers are ABI-incompatible.
-#if defined(_MSC_VER)
-#   define PYBIND11_COMPILER_TYPE "_msvc"
-#elif defined(__INTEL_COMPILER)
-#   define PYBIND11_COMPILER_TYPE "_icc"
-#elif defined(__clang__)
-#   define PYBIND11_COMPILER_TYPE "_clang"
-#elif defined(__PGI)
-#   define PYBIND11_COMPILER_TYPE "_pgi"
-#elif defined(__MINGW32__)
-#   define PYBIND11_COMPILER_TYPE "_mingw"
-#elif defined(__CYGWIN__)
-#   define PYBIND11_COMPILER_TYPE "_gcc_cygwin"
-#elif defined(__GNUC__)
-#   define PYBIND11_COMPILER_TYPE "_gcc"
-#else
-#   define PYBIND11_COMPILER_TYPE "_unknown"
+/// A user can manually set this string if they know their
+/// compiler is compatible.
+#ifndef PYBIND11_COMPILER_TYPE
+#  if defined(_MSC_VER)
+#    define PYBIND11_COMPILER_TYPE "_msvc"
+#  elif defined(__INTEL_COMPILER)
+#    define PYBIND11_COMPILER_TYPE "_icc"
+#  elif defined(__clang__)
+#    define PYBIND11_COMPILER_TYPE "_clang"
+#  elif defined(__PGI)
+#    define PYBIND11_COMPILER_TYPE "_pgi"
+#  elif defined(__MINGW32__)
+#    define PYBIND11_COMPILER_TYPE "_mingw"
+#  elif defined(__CYGWIN__)
+#    define PYBIND11_COMPILER_TYPE "_gcc_cygwin"
+#  elif defined(__GNUC__)
+#    define PYBIND11_COMPILER_TYPE "_gcc"
+#  else
+#    define PYBIND11_COMPILER_TYPE "_unknown"
+#  endif
 #endif
 
-#if defined(_LIBCPP_VERSION)
-#  define PYBIND11_STDLIB "_libcpp"
-#elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
-#  define PYBIND11_STDLIB "_libstdcpp"
-#else
-#  define PYBIND11_STDLIB ""
+/// Also standard libs
+#ifndef PYBIND11_STDLIB
+#  if defined(_LIBCPP_VERSION)
+#    define PYBIND11_STDLIB "_libcpp"
+#  elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
+#    define PYBIND11_STDLIB "_libstdcpp"
+#  else
+#    define PYBIND11_STDLIB ""
+#  endif
 #endif
 
 /// On Linux/OSX, changes in __GXX_ABI_VERSION__ indicate ABI incompatibility.
-#if defined(__GXX_ABI_VERSION)
-#  define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION)
-#else
-#  define PYBIND11_BUILD_ABI ""
+#ifndef PYBIND11_BUILD_ABI
+#  if defined(__GXX_ABI_VERSION)
+#    define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION)
+#  else
+#    define PYBIND11_BUILD_ABI ""
+#  endif
 #endif
 
-#if defined(WITH_THREAD)
-#  define PYBIND11_INTERNALS_KIND ""
-#else
-#  define PYBIND11_INTERNALS_KIND "_without_thread"
+#ifndef PYBIND11_INTERNALS_KIND
+#  if defined(WITH_THREAD)
+#    define PYBIND11_INTERNALS_KIND ""
+#  else
+#    define PYBIND11_INTERNALS_KIND "_without_thread"
+#  endif
 #endif
 
 #define PYBIND11_INTERNALS_ID "__pybind11_internals_v" \
@@ -255,7 +266,7 @@ PYBIND11_NOINLINE inline internals &get_internals() {
         const PyGILState_STATE state;
     } gil;
 
-    constexpr auto *id = PYBIND11_INTERNALS_ID;
+    PYBIND11_STR_TYPE id(PYBIND11_INTERNALS_ID);
     auto builtins = handle(PyEval_GetBuiltins());
     if (builtins.contains(id) && isinstance(builtins[id])) {
         internals_pp = static_cast(capsule(builtins[id]));
@@ -273,7 +284,10 @@ PYBIND11_NOINLINE inline internals &get_internals() {
         auto *&internals_ptr = *internals_pp;
         internals_ptr = new internals();
 #if defined(WITH_THREAD)
-        PyEval_InitThreads();
+
+        #if PY_VERSION_HEX < 0x03090000
+                PyEval_InitThreads();
+        #endif
         PyThreadState *tstate = PyThreadState_Get();
         #if PY_VERSION_HEX >= 0x03070000
             internals_ptr->tstate = PyThread_tss_alloc();
@@ -314,7 +328,7 @@ const char *c_str(Args &&...args) {
     return strings.front().c_str();
 }
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Returns a named pointer that is shared among all extension modules (using the same
 /// pybind11 version) running in the current interpreter. Names starting with underscores
@@ -346,4 +360,4 @@ T &get_or_create_shared_data(const std::string &name) {
     return *ptr;
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/detail/typeid.h b/libs/pybind11/detail/typeid.h
index 9c8a4fc..148889f 100644
--- a/libs/pybind11/detail/typeid.h
+++ b/libs/pybind11/detail/typeid.h
@@ -18,8 +18,8 @@
 
 #include "common.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 /// Erase all occurrences of a substring
 inline void erase_all(std::string &string, const std::string &search) {
     for (size_t pos = 0;;) {
@@ -43,7 +43,7 @@ PYBIND11_NOINLINE inline void clean_type_id(std::string &name) {
 #endif
     detail::erase_all(name, "pybind11::");
 }
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Return a string representation of a C++ type
 template  static std::string type_id() {
@@ -52,4 +52,4 @@ template  static std::string type_id() {
     return name;
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/eigen.h b/libs/pybind11/eigen.h
index d963d96..e8c6f63 100644
--- a/libs/pybind11/eigen.h
+++ b/libs/pybind11/eigen.h
@@ -41,14 +41,14 @@
 // of matrices seems highly undesirable.
 static_assert(EIGEN_VERSION_AT_LEAST(3,2,7), "Eigen support in pybind11 requires Eigen >= 3.2.7");
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 // Provide a convenience alias for easier pass-by-ref usage with fully dynamic strides:
 using EigenDStride = Eigen::Stride;
 template  using EigenDRef = Eigen::Ref;
 template  using EigenDMap = Eigen::Map;
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 #if EIGEN_VERSION_AT_LEAST(3,3,0)
 using EigenIndex = Eigen::Index;
@@ -432,7 +432,7 @@ struct type_caster<
         if (!need_copy) {
             // We don't need a converting copy, but we also need to check whether the strides are
             // compatible with the Ref's stride requirements
-            Array aref = reinterpret_borrow(src);
+            auto aref = reinterpret_borrow(src);
 
             if (aref && (!need_writeable || aref.writeable())) {
                 fits = props::conformable(aref);
@@ -539,9 +539,9 @@ struct type_caster::value>> {
 
 template
 struct type_caster::value>> {
-    typedef typename Type::Scalar Scalar;
-    typedef remove_reference_t().outerIndexPtr())> StorageIndex;
-    typedef typename Type::Index Index;
+    using Scalar = typename Type::Scalar;
+    using StorageIndex = remove_reference_t().outerIndexPtr())>;
+    using Index = typename Type::Index;
     static constexpr bool rowMajor = Type::IsRowMajor;
 
     bool load(handle src, bool) {
@@ -549,11 +549,11 @@ struct type_caster::value>> {
             return false;
 
         auto obj = reinterpret_borrow(src);
-        object sparse_module = module::import("scipy.sparse");
+        object sparse_module = module_::import("scipy.sparse");
         object matrix_type = sparse_module.attr(
             rowMajor ? "csr_matrix" : "csc_matrix");
 
-        if (!obj.get_type().is(matrix_type)) {
+        if (!type::handle_of(obj).is(matrix_type)) {
             try {
                 obj = matrix_type(obj);
             } catch (const error_already_set &) {
@@ -580,7 +580,7 @@ struct type_caster::value>> {
     static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) {
         const_cast(src).makeCompressed();
 
-        object matrix_type = module::import("scipy.sparse").attr(
+        object matrix_type = module_::import("scipy.sparse").attr(
             rowMajor ? "csr_matrix" : "csc_matrix");
 
         array data(src.nonZeros(), src.valuePtr());
@@ -597,8 +597,8 @@ struct type_caster::value>> {
             + npy_format_descriptor::name + _("]"));
 };
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(__GNUG__) || defined(__clang__)
 #  pragma GCC diagnostic pop
diff --git a/libs/pybind11/embed.h b/libs/pybind11/embed.h
index f814c78..204aaf9 100644
--- a/libs/pybind11/embed.h
+++ b/libs/pybind11/embed.h
@@ -45,29 +45,28 @@
             });
         }
  \endrst */
-#define PYBIND11_EMBEDDED_MODULE(name, variable)                              \
-    static void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &);    \
-    static PyObject PYBIND11_CONCAT(*pybind11_init_wrapper_, name)() {        \
-        auto m = pybind11::module(PYBIND11_TOSTRING(name));                   \
-        try {                                                                 \
-            PYBIND11_CONCAT(pybind11_init_, name)(m);                         \
-            return m.ptr();                                                   \
-        } catch (pybind11::error_already_set &e) {                            \
-            PyErr_SetString(PyExc_ImportError, e.what());                     \
-            return nullptr;                                                   \
-        } catch (const std::exception &e) {                                   \
-            PyErr_SetString(PyExc_ImportError, e.what());                     \
-            return nullptr;                                                   \
-        }                                                                     \
-    }                                                                         \
-    PYBIND11_EMBEDDED_MODULE_IMPL(name)                                       \
-    pybind11::detail::embedded_module name(PYBIND11_TOSTRING(name),           \
-                               PYBIND11_CONCAT(pybind11_init_impl_, name));   \
-    void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &variable)
-
-
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+#define PYBIND11_EMBEDDED_MODULE(name, variable)                                \
+    static ::pybind11::module_::module_def                                      \
+        PYBIND11_CONCAT(pybind11_module_def_, name);                            \
+    static void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &);   \
+    static PyObject PYBIND11_CONCAT(*pybind11_init_wrapper_, name)() {          \
+        auto m = ::pybind11::module_::create_extension_module(                  \
+            PYBIND11_TOSTRING(name), nullptr,                                   \
+            &PYBIND11_CONCAT(pybind11_module_def_, name));                      \
+        try {                                                                   \
+            PYBIND11_CONCAT(pybind11_init_, name)(m);                           \
+            return m.ptr();                                                     \
+        } PYBIND11_CATCH_INIT_EXCEPTIONS                                        \
+    }                                                                           \
+    PYBIND11_EMBEDDED_MODULE_IMPL(name)                                         \
+    ::pybind11::detail::embedded_module PYBIND11_CONCAT(pybind11_module_, name) \
+                              (PYBIND11_TOSTRING(name),                         \
+                               PYBIND11_CONCAT(pybind11_init_impl_, name));     \
+    void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &variable)
+
+
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Python 2.7/3.x compatible version of `PyImport_AppendInittab` and error checks.
 struct embedded_module {
@@ -86,7 +85,7 @@ struct embedded_module {
     }
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /** \rst
     Initialize the Python interpreter. No other pybind11 or CPython API functions can be
@@ -108,7 +107,7 @@ inline void initialize_interpreter(bool init_signal_handlers = true) {
     Py_InitializeEx(init_signal_handlers ? 1 : 0);
 
     // Make .py files in the working directory available by default
-    module::import("sys").attr("path").cast().append(".");
+    module_::import("sys").attr("path").cast().append(".");
 }
 
 /** \rst
@@ -199,4 +198,4 @@ class scoped_interpreter {
     bool is_valid = true;
 };
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/eval.h b/libs/pybind11/eval.h
index ea85ba1..fa6b8af 100644
--- a/libs/pybind11/eval.h
+++ b/libs/pybind11/eval.h
@@ -13,7 +13,23 @@
 
 #include "pybind11.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+inline void ensure_builtins_in_globals(object &global) {
+    #if PY_VERSION_HEX < 0x03080000
+        // Running exec and eval on Python 2 and 3 adds `builtins` module under
+        // `__builtins__` key to globals if not yet present.
+        // Python 3.8 made PyRun_String behave similarly. Let's also do that for
+        // older versions, for consistency.
+        if (!global.contains("__builtins__"))
+            global["__builtins__"] = module_::import(PYBIND11_BUILTINS_MODULE);
+    #else
+        (void) global;
+    #endif
+}
+
+PYBIND11_NAMESPACE_END(detail)
 
 enum eval_mode {
     /// Evaluate a string containing an isolated expression
@@ -31,6 +47,8 @@ object eval(str expr, object global = globals(), object local = object()) {
     if (!local)
         local = global;
 
+    detail::ensure_builtins_in_globals(global);
+
     /* PyRun_String does not accept a PyObject / encoding specifier,
        this seems to be the only alternative */
     std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr;
@@ -52,7 +70,7 @@ object eval(str expr, object global = globals(), object local = object()) {
 template 
 object eval(const char (&s)[N], object global = globals(), object local = object()) {
     /* Support raw string literals by removing common leading whitespace */
-    auto expr = (s[0] == '\n') ? str(module::import("textwrap").attr("dedent")(s))
+    auto expr = (s[0] == '\n') ? str(module_::import("textwrap").attr("dedent")(s))
                                : str(s);
     return eval(expr, global, local);
 }
@@ -66,11 +84,27 @@ void exec(const char (&s)[N], object global = globals(), object local = object()
     eval(s, global, local);
 }
 
+#if defined(PYPY_VERSION) && PY_VERSION_HEX >= 0x03000000
+template 
+object eval_file(str, object, object) {
+    pybind11_fail("eval_file not supported in PyPy3. Use eval");
+}
+template 
+object eval_file(str, object) {
+    pybind11_fail("eval_file not supported in PyPy3. Use eval");
+}
+template 
+object eval_file(str) {
+    pybind11_fail("eval_file not supported in PyPy3. Use eval");
+}
+#else
 template 
 object eval_file(str fname, object global = globals(), object local = object()) {
     if (!local)
         local = global;
 
+    detail::ensure_builtins_in_globals(global);
+
     int start;
     switch (mode) {
         case eval_expr:             start = Py_eval_input;   break;
@@ -113,5 +147,6 @@ object eval_file(str fname, object global = globals(), object local = object())
         throw error_already_set();
     return reinterpret_steal(result);
 }
+#endif
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/functional.h b/libs/pybind11/functional.h
index f8bda64..92c17dc 100644
--- a/libs/pybind11/functional.h
+++ b/libs/pybind11/functional.h
@@ -12,8 +12,8 @@
 #include "pybind11.h"
 #include 
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 template 
 struct type_caster> {
@@ -58,7 +58,10 @@ struct type_caster> {
         struct func_handle {
             function f;
             func_handle(function&& f_) : f(std::move(f_)) {}
-            func_handle(const func_handle&) = default;
+            func_handle(const func_handle& f_) {
+                gil_scoped_acquire acq;
+                f = f_.f;
+            }
             ~func_handle() {
                 gil_scoped_acquire acq;
                 function kill_f(std::move(f));
@@ -97,5 +100,5 @@ struct type_caster> {
                                + make_caster::name + _("]"));
 };
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/iostream.h b/libs/pybind11/iostream.h
index c43b7c9..9dee755 100644
--- a/libs/pybind11/iostream.h
+++ b/libs/pybind11/iostream.h
@@ -17,8 +17,8 @@
 #include 
 #include 
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Buffer that writes to Python instead of C++
 class pythonbuf : public std::streambuf {
@@ -30,7 +30,7 @@ class pythonbuf : public std::streambuf {
     object pywrite;
     object pyflush;
 
-    int overflow(int c) {
+    int overflow(int c) override {
         if (!traits_type::eq_int_type(c, traits_type::eof())) {
             *pptr() = traits_type::to_char_type(c);
             pbump(1);
@@ -38,22 +38,33 @@ class pythonbuf : public std::streambuf {
         return sync() == 0 ? traits_type::not_eof(c) : traits_type::eof();
     }
 
-    int sync() {
+    // This function must be non-virtual to be called in a destructor. If the
+    // rare MSVC test failure shows up with this version, then this should be
+    // simplified to a fully qualified call.
+    int _sync() {
         if (pbase() != pptr()) {
-            // This subtraction cannot be negative, so dropping the sign
-            str line(pbase(), static_cast(pptr() - pbase()));
 
             {
                 gil_scoped_acquire tmp;
+
+                // This subtraction cannot be negative, so dropping the sign.
+                str line(pbase(), static_cast(pptr() - pbase()));
+
                 pywrite(line);
                 pyflush();
+
+                // Placed inside gil_scoped_aquire as a mutex to avoid a race
+                setp(pbase(), epptr());
             }
 
-            setp(pbase(), epptr());
         }
         return 0;
     }
 
+    int sync() override {
+        return _sync();
+    }
+
 public:
 
     pythonbuf(object pyostream, size_t buffer_size = 1024)
@@ -67,12 +78,12 @@ class pythonbuf : public std::streambuf {
     pythonbuf(pythonbuf&&) = default;
 
     /// Sync before destroy
-    ~pythonbuf() {
-        sync();
+    ~pythonbuf() override {
+        _sync();
     }
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 
 /** \rst
@@ -96,7 +107,7 @@ NAMESPACE_END(detail)
 
         {
             py::scoped_ostream_redirect output{std::cerr, py::module::import("sys").attr("stderr")};
-            std::cerr << "Hello, World!";
+            std::cout << "Hello, World!";
         }
  \endrst */
 class scoped_ostream_redirect {
@@ -108,7 +119,7 @@ class scoped_ostream_redirect {
 public:
     scoped_ostream_redirect(
             std::ostream &costream = std::cout,
-            object pyostream = module::import("sys").attr("stdout"))
+            object pyostream = module_::import("sys").attr("stdout"))
         : costream(costream), buffer(pyostream) {
         old = costream.rdbuf(&buffer);
     }
@@ -139,12 +150,12 @@ class scoped_estream_redirect : public scoped_ostream_redirect {
 public:
     scoped_estream_redirect(
             std::ostream &costream = std::cerr,
-            object pyostream = module::import("sys").attr("stderr"))
+            object pyostream = module_::import("sys").attr("stderr"))
         : scoped_ostream_redirect(costream,pyostream) {}
 };
 
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Class to redirect output as a context manager. C++ backend.
 class OstreamRedirect {
@@ -170,7 +181,7 @@ class OstreamRedirect {
     }
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /** \rst
     This is a helper function to add a C++ redirect context manager to Python
@@ -199,11 +210,11 @@ NAMESPACE_END(detail)
             m.noisy_function_with_error_printing()
 
  \endrst */
-inline class_ add_ostream_redirect(module m, std::string name = "ostream_redirect") {
+inline class_ add_ostream_redirect(module_ m, std::string name = "ostream_redirect") {
     return class_(m, name.c_str(), module_local())
         .def(init(), arg("stdout")=true, arg("stderr")=true)
         .def("__enter__", &detail::OstreamRedirect::enter)
         .def("__exit__", [](detail::OstreamRedirect &self_, args) { self_.exit(); });
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/numpy.h b/libs/pybind11/numpy.h
index ba41a22..019f568 100644
--- a/libs/pybind11/numpy.h
+++ b/libs/pybind11/numpy.h
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -33,13 +34,18 @@
    whole npy_intp / ssize_t / Py_intptr_t business down to just ssize_t for all size
    and dimension types (e.g. shape, strides, indexing), instead of inflicting this
    upon the library user. */
-static_assert(sizeof(ssize_t) == sizeof(Py_intptr_t), "ssize_t != Py_intptr_t");
+static_assert(sizeof(::pybind11::ssize_t) == sizeof(Py_intptr_t), "ssize_t != Py_intptr_t");
+static_assert(std::is_signed::value, "Py_intptr_t must be signed");
+// We now can reinterpret_cast between py::ssize_t and Py_intptr_t (MSVC + PyPy cares)
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 class array; // Forward declaration
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+template <> struct handle_type_name { static constexpr auto name = _("numpy.ndarray"); };
+
 template  struct npy_format_descriptor;
 
 struct PyArrayDescr_Proxy {
@@ -178,8 +184,9 @@ struct npy_api {
     unsigned int (*PyArray_GetNDArrayCFeatureVersion_)();
     PyObject *(*PyArray_DescrFromType_)(int);
     PyObject *(*PyArray_NewFromDescr_)
-        (PyTypeObject *, PyObject *, int, Py_intptr_t *,
-         Py_intptr_t *, void *, int, PyObject *);
+        (PyTypeObject *, PyObject *, int, Py_intptr_t const *,
+         Py_intptr_t const *, void *, int, PyObject *);
+    // Unused. Not removed because that affects ABI of the class.
     PyObject *(*PyArray_DescrNewFromType_)(int);
     int (*PyArray_CopyInto_)(PyObject *, PyObject *);
     PyObject *(*PyArray_NewCopy_)(PyObject *, int);
@@ -190,9 +197,10 @@ struct npy_api {
     PyObject *(*PyArray_FromAny_) (PyObject *, PyObject *, int, int, int, PyObject *);
     int (*PyArray_DescrConverter_) (PyObject *, PyObject **);
     bool (*PyArray_EquivTypes_) (PyObject *, PyObject *);
-    int (*PyArray_GetArrayParamsFromObject_)(PyObject *, PyObject *, char, PyObject **, int *,
-                                             Py_ssize_t *, PyObject **, PyObject *);
+    int (*PyArray_GetArrayParamsFromObject_)(PyObject *, PyObject *, unsigned char, PyObject **, int *,
+                                             Py_intptr_t *, PyObject **, PyObject *);
     PyObject *(*PyArray_Squeeze_)(PyObject *);
+    // Unused. Not removed because that affects ABI of the class.
     int (*PyArray_SetBaseObject_)(PyObject *, PyObject *);
     PyObject* (*PyArray_Resize_)(PyObject*, PyArray_Dims*, int, int);
 private:
@@ -208,7 +216,7 @@ struct npy_api {
         API_PyArray_CopyInto = 82,
         API_PyArray_NewCopy = 85,
         API_PyArray_NewFromDescr = 94,
-        API_PyArray_DescrNewFromType = 9,
+        API_PyArray_DescrNewFromType = 96,
         API_PyArray_DescrConverter = 174,
         API_PyArray_EquivTypes = 182,
         API_PyArray_GetArrayParamsFromObject = 278,
@@ -217,7 +225,7 @@ struct npy_api {
     };
 
     static npy_api lookup() {
-        module m = module::import("numpy.core.multiarray");
+        module_ m = module_::import("numpy.core.multiarray");
         auto c = m.attr("_ARRAY_API");
 #if PY_MAJOR_VERSION >= 3
         void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), NULL);
@@ -276,7 +284,7 @@ template  struct is_complex : std::false_type { };
 template  struct is_complex> : std::true_type { };
 
 template  struct array_info_scalar {
-    typedef T type;
+    using type = T;
     static constexpr bool is_array = false;
     static constexpr bool is_empty = false;
     static constexpr auto extents = _("");
@@ -323,6 +331,12 @@ template  using is_pod_struct = all_of<
     satisfies_none_of
 >;
 
+// Replacement for std::is_pod (deprecated in C++20)
+template  using is_pod = all_of<
+    std::is_standard_layout,
+    std::is_trivial
+>;
+
 template  ssize_t byte_offset_unsafe(const Strides &) { return 0; }
 template 
 ssize_t byte_offset_unsafe(const Strides &strides, ssize_t i, Ix... index) {
@@ -414,6 +428,10 @@ class unchecked_mutable_reference : public unchecked_reference {
     using ConstBase::ConstBase;
     using ConstBase::Dynamic;
 public:
+    // Bring in const-qualified versions from base class
+    using ConstBase::operator();
+    using ConstBase::operator[];
+
     /// Mutable, unchecked access to data at the given indices.
     template  T& operator()(Ix... index) {
         static_assert(ssize_t{sizeof...(Ix)} == Dims || Dynamic,
@@ -439,7 +457,7 @@ struct type_caster> {
 template 
 struct type_caster> : type_caster> {};
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 class dtype : public object {
 public:
@@ -496,7 +514,7 @@ class dtype : public object {
 
 private:
     static object _dtype_from_pep3118() {
-        static PyObject *obj = module::import("numpy.core._internal")
+        static PyObject *obj = module_::import("numpy.core._internal")
             .attr("_dtype_from_pep3118").cast().release().ptr();
         return reinterpret_borrow(obj);
     }
@@ -545,7 +563,7 @@ class array : public buffer {
         forcecast = detail::npy_api::NPY_ARRAY_FORCECAST_
     };
 
-    array() : array({{0}}, static_cast(nullptr)) {}
+    array() : array(0, static_cast(nullptr)) {}
 
     using ShapeContainer = detail::any_container;
     using StridesContainer = detail::any_container;
@@ -555,7 +573,7 @@ class array : public buffer {
           const void *ptr = nullptr, handle base = handle()) {
 
         if (strides->empty())
-            *strides = c_strides(*shape, dt.itemsize());
+            *strides = detail::c_strides(*shape, dt.itemsize());
 
         auto ndim = shape->size();
         if (ndim != strides->size())
@@ -574,7 +592,10 @@ class array : public buffer {
 
         auto &api = detail::npy_api::get();
         auto tmp = reinterpret_steal(api.PyArray_NewFromDescr_(
-            api.PyArray_Type_, descr.release().ptr(), (int) ndim, shape->data(), strides->data(),
+            api.PyArray_Type_, descr.release().ptr(), (int) ndim,
+            // Use reinterpret_cast for PyPy on Windows (remove if fixed, checked on 7.3.1)
+            reinterpret_cast(shape->data()),
+            reinterpret_cast(strides->data()),
             const_cast(ptr), flags, nullptr));
         if (!tmp)
             throw error_already_set();
@@ -606,8 +627,8 @@ class array : public buffer {
     template 
     explicit array(ssize_t count, const T *ptr, handle base = handle()) : array({count}, {}, ptr, base) { }
 
-    explicit array(const buffer_info &info)
-    : array(pybind11::dtype(info), info.shape, info.strides, info.ptr) { }
+    explicit array(const buffer_info &info, handle base = handle())
+    : array(pybind11::dtype(info), info.shape, info.strides, info.ptr, base) { }
 
     /// Array descriptor (dtype)
     pybind11::dtype dtype() const {
@@ -746,10 +767,12 @@ class array : public buffer {
     /// then resize will succeed only if it makes a reshape, i.e. original size doesn't change
     void resize(ShapeContainer new_shape, bool refcheck = true) {
         detail::npy_api::PyArray_Dims d = {
-            new_shape->data(), int(new_shape->size())
+            // Use reinterpret_cast for PyPy on Windows (remove if fixed, checked on 7.3.1)
+            reinterpret_cast(new_shape->data()),
+            int(new_shape->size())
         };
         // try to resize, set ordering param to -1 cause it's not used anyway
-        object new_array = reinterpret_steal(
+        auto new_array = reinterpret_steal(
             detail::npy_api::get().PyArray_Resize_(m_ptr, &d, int(refcheck), -1)
         );
         if (!new_array) throw error_already_set();
@@ -783,25 +806,6 @@ class array : public buffer {
             throw std::domain_error("array is not writeable");
     }
 
-    // Default, C-style strides
-    static std::vector c_strides(const std::vector &shape, ssize_t itemsize) {
-        auto ndim = shape.size();
-        std::vector strides(ndim, itemsize);
-        if (ndim > 0)
-            for (size_t i = ndim - 1; i > 0; --i)
-                strides[i - 1] = strides[i] * shape[i];
-        return strides;
-    }
-
-    // F-style strides; default when constructing an array_t with `ExtraFlags & f_style`
-    static std::vector f_strides(const std::vector &shape, ssize_t itemsize) {
-        auto ndim = shape.size();
-        std::vector strides(ndim, itemsize);
-        for (size_t i = 1; i < ndim; ++i)
-            strides[i] = strides[i - 1] * shape[i - 1];
-        return strides;
-    }
-
     template void check_dimensions(Ix... index) const {
         check_dimensions_impl(ssize_t(0), shape(), ssize_t(index)...);
     }
@@ -853,17 +857,19 @@ template  class array_t : public
         if (!m_ptr) throw error_already_set();
     }
 
-    explicit array_t(const buffer_info& info) : array(info) { }
+    explicit array_t(const buffer_info& info, handle base = handle()) : array(info, base) { }
 
     array_t(ShapeContainer shape, StridesContainer strides, const T *ptr = nullptr, handle base = handle())
         : array(std::move(shape), std::move(strides), ptr, base) { }
 
     explicit array_t(ShapeContainer shape, const T *ptr = nullptr, handle base = handle())
         : array_t(private_ctor{}, std::move(shape),
-                ExtraFlags & f_style ? f_strides(*shape, itemsize()) : c_strides(*shape, itemsize()),
+                ExtraFlags & f_style
+                ? detail::f_strides(*shape, itemsize())
+                : detail::c_strides(*shape, itemsize()),
                 ptr, base) { }
 
-    explicit array_t(size_t count, const T *ptr = nullptr, handle base = handle())
+    explicit array_t(ssize_t count, const T *ptr = nullptr, handle base = handle())
         : array({count}, {}, ptr, base) { }
 
     constexpr ssize_t itemsize() const {
@@ -929,7 +935,8 @@ template  class array_t : public
     static bool check_(handle h) {
         const auto &api = detail::npy_api::get();
         return api.PyArray_Check_(h.ptr())
-               && api.PyArray_EquivTypes_(detail::array_proxy(h.ptr())->descr, dtype::of().ptr());
+               && api.PyArray_EquivTypes_(detail::array_proxy(h.ptr())->descr, dtype::of().ptr())
+               && detail::check_flags(h.ptr(), ExtraFlags & (array::c_style | array::f_style));
     }
 
 protected:
@@ -976,7 +983,7 @@ struct format_descriptor::is_array>
     }
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 template 
 struct pyobject_caster> {
     using type = array_t;
@@ -1007,14 +1014,14 @@ struct npy_format_descriptor_name;
 template 
 struct npy_format_descriptor_name::value>> {
     static constexpr auto name = _::value>(
-        _("bool"), _::value>("int", "uint") + _()
+        _("bool"), _::value>("numpy.int", "numpy.uint") + _()
     );
 };
 
 template 
 struct npy_format_descriptor_name::value>> {
     static constexpr auto name = _::value || std::is_same::value>(
-        _("float") + _(), _("longdouble")
+        _("numpy.float") + _(), _("numpy.longdouble")
     );
 };
 
@@ -1022,7 +1029,7 @@ template 
 struct npy_format_descriptor_name::value>> {
     static constexpr auto name = _::value
                                    || std::is_same::value>(
-        _("complex") + _(), _("longcomplex")
+        _("numpy.complex") + _(), _("numpy.longcomplex")
     );
 };
 
@@ -1218,7 +1225,7 @@ template  struct npy_format_descriptor {
 #define PYBIND11_MAP_NEXT0(test, next, ...) next PYBIND11_MAP_OUT
 #define PYBIND11_MAP_NEXT1(test, next) PYBIND11_MAP_NEXT0 (test, next, 0)
 #define PYBIND11_MAP_NEXT(test, next)  PYBIND11_MAP_NEXT1 (PYBIND11_MAP_GET_END test, next)
-#ifdef _MSC_VER // MSVC is not as eager to expand macros, hence this workaround
+#if defined(_MSC_VER) && !defined(__clang__) // MSVC is not as eager to expand macros, hence this workaround
 #define PYBIND11_MAP_LIST_NEXT1(test, next) \
     PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0))
 #else
@@ -1240,7 +1247,7 @@ template  struct npy_format_descriptor {
         (::std::vector<::pybind11::detail::field_descriptor> \
          {PYBIND11_MAP_LIST (PYBIND11_FIELD_DESCRIPTOR, Type, __VA_ARGS__)})
 
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && !defined(__clang__)
 #define PYBIND11_MAP2_LIST_NEXT1(test, next) \
     PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0))
 #else
@@ -1264,19 +1271,6 @@ template  struct npy_format_descriptor {
 
 #endif // __CLION_IDE__
 
-template  
-using array_iterator = typename std::add_pointer::type;
-
-template 
-array_iterator array_begin(const buffer_info& buffer) {
-    return array_iterator(reinterpret_cast(buffer.ptr));
-}
-
-template 
-array_iterator array_end(const buffer_info& buffer) {
-    return array_iterator(reinterpret_cast(buffer.ptr) + buffer.size);
-}
-
 class common_iterator {
 public:
     using container_type = std::vector;
@@ -1290,7 +1284,7 @@ class common_iterator {
         m_strides.back() = static_cast(strides.back());
         for (size_type i = m_strides.size() - 1; i != 0; --i) {
             size_type j = i - 1;
-            value_type s = static_cast(shape[i]);
+            auto s = static_cast(shape[i]);
             m_strides[j] = strides[j] + m_strides[i] - strides[i] * s;
         }
     }
@@ -1468,7 +1462,7 @@ struct vectorize_arg {
     using call_type = remove_reference_t;
     // Is this a vectorized argument?
     static constexpr bool vectorize =
-        satisfies_any_of::value &&
+        satisfies_any_of::value &&
         satisfies_none_of::value &&
         (!std::is_reference::value ||
          (std::is_lvalue_reference::value && std::is_const::value));
@@ -1476,9 +1470,66 @@ struct vectorize_arg {
     using type = conditional_t, array::forcecast>, T>;
 };
 
+
+// py::vectorize when a return type is present
+template 
+struct vectorize_returned_array {
+    using Type = array_t;
+
+    static Type create(broadcast_trivial trivial, const std::vector &shape) {
+        if (trivial == broadcast_trivial::f_trivial)
+            return array_t(shape);
+        else
+            return array_t(shape);
+    }
+
+    static Return *mutable_data(Type &array) {
+        return array.mutable_data();
+    }
+
+    static Return call(Func &f, Args &... args) {
+        return f(args...);
+    }
+
+    static void call(Return *out, size_t i, Func &f, Args &... args) {
+        out[i] = f(args...);
+    }
+};
+
+// py::vectorize when a return type is not present
+template 
+struct vectorize_returned_array {
+    using Type = none;
+
+    static Type create(broadcast_trivial, const std::vector &) {
+        return none();
+    }
+
+    static void *mutable_data(Type &) {
+        return nullptr;
+    }
+
+    static detail::void_type call(Func &f, Args &... args) {
+        f(args...);
+        return {};
+    }
+
+    static void call(void *, size_t, Func &f, Args &... args) {
+        f(args...);
+    }
+};
+
+
 template 
 struct vectorize_helper {
+
+// NVCC for some reason breaks if NVectorized is private
+#ifdef __CUDACC__
+public:
+#else
 private:
+#endif
+
     static constexpr size_t N = sizeof...(Args);
     static constexpr size_t NVectorized = constexpr_sum(vectorize_arg::vectorize...);
     static_assert(NVectorized >= 1,
@@ -1503,6 +1554,8 @@ struct vectorize_helper {
     using arg_call_types = std::tuple::call_type...>;
     template  using param_n_t = typename std::tuple_element::type;
 
+    using returned_array = vectorize_returned_array;
+
     // Runs a vectorized function given arguments tuple and three index sequences:
     //     - Index is the full set of 0 ... (N-1) argument indices;
     //     - VIndex is the subset of argument indices with vectorized parameters, letting us access
@@ -1526,7 +1579,7 @@ struct vectorize_helper {
         ssize_t nd = 0;
         std::vector shape(0);
         auto trivial = broadcast(buffers, nd, shape);
-        size_t ndim = (size_t) nd;
+        auto ndim = (size_t) nd;
 
         size_t size = std::accumulate(shape.begin(), shape.end(), (size_t) 1, std::multiplies());
 
@@ -1534,20 +1587,19 @@ struct vectorize_helper {
         // not wrapped in an array).
         if (size == 1 && ndim == 0) {
             PYBIND11_EXPAND_SIDE_EFFECTS(params[VIndex] = buffers[BIndex].ptr);
-            return cast(f(*reinterpret_cast *>(params[Index])...));
+            return cast(returned_array::call(f, *reinterpret_cast *>(params[Index])...));
         }
 
-        array_t result;
-        if (trivial == broadcast_trivial::f_trivial) result = array_t(shape);
-        else result = array_t(shape);
+        auto result = returned_array::create(trivial, shape);
 
         if (size == 0) return std::move(result);
 
         /* Call the function */
+        auto mutable_data = returned_array::mutable_data(result);
         if (trivial == broadcast_trivial::non_trivial)
-            apply_broadcast(buffers, params, result, i_seq, vi_seq, bi_seq);
+            apply_broadcast(buffers, params, mutable_data, size, shape, i_seq, vi_seq, bi_seq);
         else
-            apply_trivial(buffers, params, result.mutable_data(), size, i_seq, vi_seq, bi_seq);
+            apply_trivial(buffers, params, mutable_data, size, i_seq, vi_seq, bi_seq);
 
         return std::move(result);
     }
@@ -1570,7 +1622,7 @@ struct vectorize_helper {
         }};
 
         for (size_t i = 0; i < size; ++i) {
-            out[i] = f(*reinterpret_cast *>(params[Index])...);
+            returned_array::call(out, i, f, *reinterpret_cast *>(params[Index])...);
             for (auto &x : vecparams) x.first += x.second;
         }
     }
@@ -1578,19 +1630,18 @@ struct vectorize_helper {
     template 
     void apply_broadcast(std::array &buffers,
                          std::array ¶ms,
-                         array_t &output_array,
+                         Return *out,
+                         size_t size,
+                         const std::vector &output_shape,
                          index_sequence, index_sequence, index_sequence) {
 
-        buffer_info output = output_array.request();
-        multi_array_iterator input_iter(buffers, output.shape);
+        multi_array_iterator input_iter(buffers, output_shape);
 
-        for (array_iterator iter = array_begin(output), end = array_end(output);
-             iter != end;
-             ++iter, ++input_iter) {
+        for (size_t i = 0; i < size; ++i, ++input_iter) {
             PYBIND11_EXPAND_SIDE_EFFECTS((
                 params[VIndex] = input_iter.template data()
             ));
-            *iter = f(*reinterpret_cast *>(std::get(params))...);
+            returned_array::call(out, i, f, *reinterpret_cast *>(std::get(params))...);
         }
     }
 };
@@ -1605,7 +1656,7 @@ template  struct handle_type_name> {
     static constexpr auto name = _("numpy.ndarray[") + npy_format_descriptor::name + _("]");
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // Vanilla pointer vectorizer:
 template 
@@ -1635,7 +1686,7 @@ Helper vectorize(Return (Class::*f)(Args...) const) {
     return Helper(std::mem_fn(f));
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(_MSC_VER)
 #pragma warning(pop)
diff --git a/libs/pybind11/operators.h b/libs/pybind11/operators.h
index b3dd62c..086cb4c 100644
--- a/libs/pybind11/operators.h
+++ b/libs/pybind11/operators.h
@@ -18,8 +18,8 @@
 #  pragma warning(disable: 4127) // warning C4127: Conditional expression is constant
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Enumeration with all supported operator types
 enum op_id : int {
@@ -147,6 +147,9 @@ PYBIND11_INPLACE_OPERATOR(ixor,     operator^=,   l ^= r)
 PYBIND11_INPLACE_OPERATOR(ior,      operator|=,   l |= r)
 PYBIND11_UNARY_OPERATOR(neg,        operator-,    -l)
 PYBIND11_UNARY_OPERATOR(pos,        operator+,    +l)
+// WARNING: This usage of `abs` should only be done for existing STL overloads.
+// Adding overloads directly in to the `std::` namespace is advised against:
+// https://en.cppreference.com/w/cpp/language/extending_std
 PYBIND11_UNARY_OPERATOR(abs,        abs,          std::abs(l))
 PYBIND11_UNARY_OPERATOR(hash,       hash,         std::hash()(l))
 PYBIND11_UNARY_OPERATOR(invert,     operator~,    (~l))
@@ -157,11 +160,13 @@ PYBIND11_UNARY_OPERATOR(float,      float_,       (double) l)
 #undef PYBIND11_BINARY_OPERATOR
 #undef PYBIND11_INPLACE_OPERATOR
 #undef PYBIND11_UNARY_OPERATOR
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 using detail::self;
+// Add named operators so that they are accessible via `py::`.
+using detail::hash;
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(_MSC_VER)
 #  pragma warning(pop)
diff --git a/libs/pybind11/options.h b/libs/pybind11/options.h
index cc1e1f6..d74db1c 100644
--- a/libs/pybind11/options.h
+++ b/libs/pybind11/options.h
@@ -11,7 +11,7 @@
 
 #include "detail/common.h"
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 class options {
 public:
@@ -62,4 +62,4 @@ class options {
     state previous_state;
 };
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/pybind11.SConscript b/libs/pybind11/pybind11.SConscript
new file mode 100644
index 0000000..681c05b
--- /dev/null
+++ b/libs/pybind11/pybind11.SConscript
@@ -0,0 +1,7 @@
+import os
+
+Import('env')
+
+pybind11_includes = File('pybind11.h').srcnode().get_abspath()
+pybind11_includes = os.path.split(pybind11_includes)[0]
+env['pybind11_includes'] = [ pybind11_includes ]
diff --git a/libs/pybind11/pybind11.h b/libs/pybind11/pybind11.h
index d95d61f..3bffbb2 100644
--- a/libs/pybind11/pybind11.h
+++ b/libs/pybind11/pybind11.h
@@ -29,6 +29,7 @@
 #  pragma warning(disable: 4996) // warning C4996: The POSIX name for this item is deprecated. Instead, use the ISO C and C++ conformant name
 #  pragma warning(disable: 4702) // warning C4702: unreachable code
 #  pragma warning(disable: 4522) // warning C4522: multiple assignment operators specified
+#  pragma warning(disable: 4505) // warning C4505: 'PySlice_GetIndicesEx': unreferenced local function has been removed (PyPy only)
 #elif defined(__GNUG__) && !defined(__clang__)
 #  pragma GCC diagnostic push
 #  pragma GCC diagnostic ignored "-Wunused-but-set-parameter"
@@ -46,16 +47,21 @@
 #include "detail/class.h"
 #include "detail/init.h"
 
+#include 
+#include 
+#include 
+#include 
+
 #if defined(__GNUG__) && !defined(__clang__)
 #  include 
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 /// Wraps an arbitrary C++ function/method/lambda function/.. into a callable Python object
 class cpp_function : public function {
 public:
-    cpp_function() { }
+    cpp_function() = default;
     cpp_function(std::nullptr_t) { }
 
     /// Construct a cpp_function from a vanilla function pointer
@@ -72,16 +78,34 @@ class cpp_function : public function {
                    (detail::function_signature_t *) nullptr, extra...);
     }
 
-    /// Construct a cpp_function from a class method (non-const)
+    /// Construct a cpp_function from a class method (non-const, no ref-qualifier)
     template 
     cpp_function(Return (Class::*f)(Arg...), const Extra&... extra) {
+        initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(std::forward(args)...); },
+                   (Return (*) (Class *, Arg...)) nullptr, extra...);
+    }
+
+    /// Construct a cpp_function from a class method (non-const, lvalue ref-qualifier)
+    /// A copy of the overload for non-const functions without explicit ref-qualifier
+    /// but with an added `&`.
+    template 
+    cpp_function(Return (Class::*f)(Arg...)&, const Extra&... extra) {
         initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(args...); },
                    (Return (*) (Class *, Arg...)) nullptr, extra...);
     }
 
-    /// Construct a cpp_function from a class method (const)
+    /// Construct a cpp_function from a class method (const, no ref-qualifier)
     template 
     cpp_function(Return (Class::*f)(Arg...) const, const Extra&... extra) {
+        initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(std::forward(args)...); },
+                   (Return (*)(const Class *, Arg ...)) nullptr, extra...);
+    }
+
+    /// Construct a cpp_function from a class method (const, lvalue ref-qualifier)
+    /// A copy of the overload for const functions without explicit ref-qualifier
+    /// but with an added `&`.
+    template 
+    cpp_function(Return (Class::*f)(Arg...) const&, const Extra&... extra) {
         initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(args...); },
                    (Return (*)(const Class *, Arg ...)) nullptr, extra...);
     }
@@ -90,9 +114,16 @@ class cpp_function : public function {
     object name() const { return attr("__name__"); }
 
 protected:
+    struct InitializingFunctionRecordDeleter {
+        // `destruct(function_record, false)`: `initialize_generic` copies strings and
+        // takes care of cleaning up in case of exceptions. So pass `false` to `free_strings`.
+        void operator()(detail::function_record * rec) { destruct(rec, false); }
+    };
+    using unique_function_record = std::unique_ptr;
+
     /// Space optimization: don't inline this frequently instantiated fragment
-    PYBIND11_NOINLINE detail::function_record *make_function_record() {
-        return new detail::function_record();
+    PYBIND11_NOINLINE unique_function_record make_function_record() {
+        return unique_function_record(new detail::function_record());
     }
 
     /// Special internal constructor for functors, lambda functions, etc.
@@ -102,7 +133,9 @@ class cpp_function : public function {
         struct capture { remove_reference_t f; };
 
         /* Store the function including any extra state it might have (e.g. a lambda capture object) */
-        auto rec = make_function_record();
+        // The unique_ptr makes sure nothing is leaked in case of an exception.
+        auto unique_rec = make_function_record();
+        auto rec = unique_rec.get();
 
         /* Store the capture object directly in the function record if there is enough space */
         if (sizeof(capture) <= sizeof(rec->data)) {
@@ -147,7 +180,7 @@ class cpp_function : public function {
             /* Get a pointer to the capture object */
             auto data = (sizeof(capture) <= sizeof(call.func.data)
                          ? &call.func.data : call.func.data[0]);
-            capture *cap = const_cast(reinterpret_cast(data));
+            auto *cap = const_cast(reinterpret_cast(data));
 
             /* Override policy for rvalues -- usually to enforce rvp::move on an rvalue */
             return_value_policy policy = return_value_policy_override::policy(call.func.policy);
@@ -168,12 +201,23 @@ class cpp_function : public function {
         /* Process any user-provided function attributes */
         process_attributes::init(extra..., rec);
 
+        {
+            constexpr bool has_kw_only_args = any_of...>::value,
+                           has_pos_only_args = any_of...>::value,
+                           has_args = any_of...>::value,
+                           has_arg_annotations = any_of...>::value;
+            static_assert(has_arg_annotations || !has_kw_only_args, "py::kw_only requires the use of argument annotations");
+            static_assert(has_arg_annotations || !has_pos_only_args, "py::pos_only requires the use of argument annotations (for docstrings and aligning the annotations to the argument)");
+            static_assert(!(has_args && has_kw_only_args), "py::kw_only cannot be combined with a py::args argument");
+        }
+
         /* Generate a readable signature describing the function's arguments and return value types */
         static constexpr auto signature = _("(") + cast_in::arg_names + _(") -> ") + cast_out::name;
         PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types();
 
         /* Register the function with Python from generic (non-templated) code */
-        initialize_generic(rec, signature.text, types.data(), sizeof...(Args));
+        // Pass on the ownership over the `unique_rec` to `initialize_generic`. `rec` stays valid.
+        initialize_generic(std::move(unique_rec), signature.text, types.data(), sizeof...(Args));
 
         if (cast_in::has_args) rec->has_args = true;
         if (cast_in::has_kwargs) rec->has_kwargs = true;
@@ -189,27 +233,58 @@ class cpp_function : public function {
         }
     }
 
+    // Utility class that keeps track of all duplicated strings, and cleans them up in its destructor,
+    // unless they are released. Basically a RAII-solution to deal with exceptions along the way.
+    class strdup_guard {
+    public:
+        ~strdup_guard() {
+            for (auto s : strings)
+                std::free(s);
+        }
+        char *operator()(const char *s) {
+            auto t = strdup(s);
+            strings.push_back(t);
+            return t;
+        }
+        void release() {
+            strings.clear();
+        }
+    private:
+        std::vector strings;
+    };
+
     /// Register a function call with Python (generic non-templated code goes here)
-    void initialize_generic(detail::function_record *rec, const char *text,
+    void initialize_generic(unique_function_record &&unique_rec, const char *text,
                             const std::type_info *const *types, size_t args) {
+        // Do NOT receive `unique_rec` by value. If this function fails to move out the unique_ptr,
+        // we do not want this to destuct the pointer. `initialize` (the caller) still relies on the
+        // pointee being alive after this call. Only move out if a `capsule` is going to keep it alive.
+        auto rec = unique_rec.get();
+
+        // Keep track of strdup'ed strings, and clean them up as long as the function's capsule
+        // has not taken ownership yet (when `unique_rec.release()` is called).
+        // Note: This cannot easily be fixed by a `unique_ptr` with custom deleter, because the strings
+        // are only referenced before strdup'ing. So only *after* the following block could `destruct`
+        // safely be called, but even then, `repr` could still throw in the middle of copying all strings.
+        strdup_guard guarded_strdup;
 
         /* Create copies of all referenced C-style strings */
-        rec->name = strdup(rec->name ? rec->name : "");
-        if (rec->doc) rec->doc = strdup(rec->doc);
+        rec->name = guarded_strdup(rec->name ? rec->name : "");
+        if (rec->doc) rec->doc = guarded_strdup(rec->doc);
         for (auto &a: rec->args) {
             if (a.name)
-                a.name = strdup(a.name);
+                a.name = guarded_strdup(a.name);
             if (a.descr)
-                a.descr = strdup(a.descr);
+                a.descr = guarded_strdup(a.descr);
             else if (a.value)
-                a.descr = strdup(a.value.attr("__repr__")().cast().c_str());
+                a.descr = guarded_strdup(repr(a.value).cast().c_str());
         }
 
         rec->is_constructor = !strcmp(rec->name, "__init__") || !strcmp(rec->name, "__setstate__");
 
 #if !defined(NDEBUG) && !defined(PYBIND11_DISABLE_NEW_STYLE_INIT_WARNING)
         if (rec->is_constructor && !rec->is_new_style_constructor) {
-            const auto class_name = std::string(((PyTypeObject *) rec->scope.ptr())->tp_name);
+            const auto class_name = detail::get_fully_qualified_tp_name((PyTypeObject *) rec->scope.ptr());
             const auto func_name = std::string(rec->name);
             PyErr_WarnEx(
                 PyExc_FutureWarning,
@@ -231,7 +306,10 @@ class cpp_function : public function {
                 // Write arg name for everything except *args and **kwargs.
                 if (*(pc + 1) == '*')
                     continue;
-
+                // Separator for keyword-only arguments, placed before the kw
+                // arguments start
+                if (rec->nargs_kw_only > 0 && arg_index + rec->nargs_kw_only == args)
+                    signature += "*, ";
                 if (arg_index < rec->args.size() && rec->args[arg_index].name) {
                     signature += rec->args[arg_index].name;
                 } else if (arg_index == 0 && rec->is_method) {
@@ -246,6 +324,10 @@ class cpp_function : public function {
                     signature += " = ";
                     signature += rec->args[arg_index].descr;
                 }
+                // Separator for positional-only arguments (placed after the
+                // argument, rather than before like *
+                if (rec->nargs_pos_only > 0 && (arg_index + 1) == rec->nargs_pos_only)
+                    signature += ", /";
                 arg_index++;
             } else if (c == '%') {
                 const std::type_info *t = types[type_index++];
@@ -271,19 +353,20 @@ class cpp_function : public function {
                 signature += c;
             }
         }
+
         if (arg_index != args || types[type_index] != nullptr)
             pybind11_fail("Internal error while parsing type signature (2)");
 
 #if PY_MAJOR_VERSION < 3
         if (strcmp(rec->name, "__next__") == 0) {
             std::free(rec->name);
-            rec->name = strdup("next");
+            rec->name = guarded_strdup("next");
         } else if (strcmp(rec->name, "__bool__") == 0) {
             std::free(rec->name);
-            rec->name = strdup("__nonzero__");
+            rec->name = guarded_strdup("__nonzero__");
         }
 #endif
-        rec->signature = strdup(signature.c_str());
+        rec->signature = guarded_strdup(signature.c_str());
         rec->args.shrink_to_fit();
         rec->nargs = (std::uint16_t) args;
 
@@ -314,9 +397,10 @@ class cpp_function : public function {
             rec->def->ml_meth = reinterpret_cast(reinterpret_cast(*dispatcher));
             rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS;
 
-            capsule rec_capsule(rec, [](void *ptr) {
+            capsule rec_capsule(unique_rec.release(), [](void *ptr) {
                 destruct((detail::function_record *) ptr);
             });
+            guarded_strdup.release();
 
             object scope_module;
             if (rec->scope) {
@@ -331,10 +415,9 @@ class cpp_function : public function {
             if (!m_ptr)
                 pybind11_fail("cpp_function::cpp_function(): Could not allocate function object");
         } else {
-            /* Append at the end of the overload chain */
+            /* Append at the beginning or end of the overload chain */
             m_ptr = rec->sibling.ptr();
             inc_ref();
-            chain_start = chain;
             if (chain->is_method != rec->is_method)
                 pybind11_fail("overloading a method with both static and instance methods is not supported; "
                     #if defined(NDEBUG)
@@ -344,9 +427,24 @@ class cpp_function : public function {
                         std::string(pybind11::str(rec->scope.attr("__name__"))) + "." + std::string(rec->name) + signature
                     #endif
                 );
-            while (chain->next)
-                chain = chain->next;
-            chain->next = rec;
+
+            if (rec->prepend) {
+                // Beginning of chain; we need to replace the capsule's current head-of-the-chain
+                // pointer with this one, then make this one point to the previous head of the
+                // chain.
+                chain_start = rec;
+                rec->next = chain;
+                auto rec_capsule = reinterpret_borrow(((PyCFunctionObject *) m_ptr)->m_self);
+                rec_capsule.set_pointer(unique_rec.release());
+                guarded_strdup.release();
+            } else {
+                // Or end of chain (normal behavior)
+                chain_start = chain;
+                while (chain->next)
+                    chain = chain->next;
+                chain->next = unique_rec.release();
+                guarded_strdup.release();
+            }
         }
 
         std::string signatures;
@@ -384,10 +482,10 @@ class cpp_function : public function {
         }
 
         /* Install docstring */
-        PyCFunctionObject *func = (PyCFunctionObject *) m_ptr;
-        if (func->m_ml->ml_doc)
-            std::free(const_cast(func->m_ml->ml_doc));
-        func->m_ml->ml_doc = strdup(signatures.c_str());
+        auto *func = (PyCFunctionObject *) m_ptr;
+        std::free(const_cast(func->m_ml->ml_doc));
+        // Install docstring if it's non-empty (when at least one option is enabled)
+        func->m_ml->ml_doc = signatures.empty() ? nullptr : strdup(signatures.c_str());
 
         if (rec->is_method) {
             m_ptr = PYBIND11_INSTANCE_METHOD_NEW(m_ptr, rec->scope.ptr());
@@ -398,22 +496,42 @@ class cpp_function : public function {
     }
 
     /// When a cpp_function is GCed, release any memory allocated by pybind11
-    static void destruct(detail::function_record *rec) {
+    static void destruct(detail::function_record *rec, bool free_strings = true) {
+        // If on Python 3.9, check the interpreter "MICRO" (patch) version.
+        // If this is running on 3.9.0, we have to work around a bug.
+        #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
+            static bool is_zero = Py_GetVersion()[4] == '0';
+        #endif
+
         while (rec) {
             detail::function_record *next = rec->next;
             if (rec->free_data)
                 rec->free_data(rec);
-            std::free((char *) rec->name);
-            std::free((char *) rec->doc);
-            std::free((char *) rec->signature);
-            for (auto &arg: rec->args) {
-                std::free(const_cast(arg.name));
-                std::free(const_cast(arg.descr));
-                arg.value.dec_ref();
+            // During initialization, these strings might not have been copied yet,
+            // so they cannot be freed. Once the function has been created, they can.
+            // Check `make_function_record` for more details.
+            if (free_strings) {
+                std::free((char *) rec->name);
+                std::free((char *) rec->doc);
+                std::free((char *) rec->signature);
+                for (auto &arg: rec->args) {
+                    std::free(const_cast(arg.name));
+                    std::free(const_cast(arg.descr));
+                }
             }
+            for (auto &arg: rec->args)
+                arg.value.dec_ref();
             if (rec->def) {
                 std::free(const_cast(rec->def->ml_doc));
-                delete rec->def;
+                // Python 3.9.0 decref's these in the wrong order; rec->def
+                // If loaded on 3.9.0, let these leak (use Python 3.9.1 at runtime to fix)
+                // See https://github.com/python/cpython/pull/22670
+                #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
+                    if (!is_zero)
+                        delete rec->def;
+                #else
+                    delete rec->def;
+                #endif
             }
             delete rec;
             rec = next;
@@ -429,22 +547,22 @@ class cpp_function : public function {
                               *it = overloads;
 
         /* Need to know how many arguments + keyword arguments there are to pick the right overload */
-        const size_t n_args_in = (size_t) PyTuple_GET_SIZE(args_in);
+        const auto n_args_in = (size_t) PyTuple_GET_SIZE(args_in);
 
         handle parent = n_args_in > 0 ? PyTuple_GET_ITEM(args_in, 0) : nullptr,
                result = PYBIND11_TRY_NEXT_OVERLOAD;
 
         auto self_value_and_holder = value_and_holder();
         if (overloads->is_constructor) {
-            const auto tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr());
-            const auto pi = reinterpret_cast(parent.ptr());
-            self_value_and_holder = pi->get_value_and_holder(tinfo, false);
-
-            if (!self_value_and_holder.type || !self_value_and_holder.inst) {
+            if (!PyObject_TypeCheck(parent.ptr(), (PyTypeObject *) overloads->scope.ptr())) {
                 PyErr_SetString(PyExc_TypeError, "__init__(self, ...) called with invalid `self` argument");
                 return nullptr;
             }
 
+            const auto tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr());
+            const auto pi = reinterpret_cast(parent.ptr());
+            self_value_and_holder = pi->get_value_and_holder(tinfo, true);
+
             // If this value is already registered it must mean __init__ is invoked multiple times;
             // we really can't support that in C++, so just ignore the second __init__.
             if (self_value_and_holder.instance_registered())
@@ -483,15 +601,16 @@ class cpp_function : public function {
                  */
 
                 const function_record &func = *it;
-                size_t pos_args = func.nargs;    // Number of positional arguments that we need
-                if (func.has_args) --pos_args;   // (but don't count py::args
-                if (func.has_kwargs) --pos_args; //  or py::kwargs)
+                size_t num_args = func.nargs;    // Number of positional arguments that we need
+                if (func.has_args) --num_args;   // (but don't count py::args
+                if (func.has_kwargs) --num_args; //  or py::kwargs)
+                size_t pos_args = num_args - func.nargs_kw_only;
 
                 if (!func.has_args && n_args_in > pos_args)
-                    continue; // Too many arguments for this overload
+                    continue; // Too many positional arguments for this overload
 
                 if (n_args_in < pos_args && func.args.size() < pos_args)
-                    continue; // Not enough arguments given, and not enough defaults to fill in the blanks
+                    continue; // Not enough positional arguments given, and not enough defaults to fill in the blanks
 
                 function_call call(func, parent);
 
@@ -506,7 +625,7 @@ class cpp_function : public function {
                         self_value_and_holder.type->dealloc(self_value_and_holder);
 
                     call.init_self = PyTuple_GET_ITEM(args_in, 0);
-                    call.args.push_back(reinterpret_cast(&self_value_and_holder));
+                    call.args.emplace_back(reinterpret_cast(&self_value_and_holder));
                     call.args_convert.push_back(false);
                     ++args_copied;
                 }
@@ -534,16 +653,36 @@ class cpp_function : public function {
                 // We'll need to copy this if we steal some kwargs for defaults
                 dict kwargs = reinterpret_borrow(kwargs_in);
 
+                // 1.5. Fill in any missing pos_only args from defaults if they exist
+                if (args_copied < func.nargs_pos_only) {
+                    for (; args_copied < func.nargs_pos_only; ++args_copied) {
+                        const auto &arg_rec = func.args[args_copied];
+                        handle value;
+
+                        if (arg_rec.value) {
+                            value = arg_rec.value;
+                        }
+                        if (value) {
+                            call.args.push_back(value);
+                            call.args_convert.push_back(arg_rec.convert);
+                        } else
+                            break;
+                    }
+
+                    if (args_copied < func.nargs_pos_only)
+                        continue; // Not enough defaults to fill the positional arguments
+                }
+
                 // 2. Check kwargs and, failing that, defaults that may help complete the list
-                if (args_copied < pos_args) {
+                if (args_copied < num_args) {
                     bool copied_kwargs = false;
 
-                    for (; args_copied < pos_args; ++args_copied) {
-                        const auto &arg = func.args[args_copied];
+                    for (; args_copied < num_args; ++args_copied) {
+                        const auto &arg_rec = func.args[args_copied];
 
                         handle value;
-                        if (kwargs_in && arg.name)
-                            value = PyDict_GetItemString(kwargs.ptr(), arg.name);
+                        if (kwargs_in && arg_rec.name)
+                            value = PyDict_GetItemString(kwargs.ptr(), arg_rec.name);
 
                         if (value) {
                             // Consume a kwargs value
@@ -551,25 +690,29 @@ class cpp_function : public function {
                                 kwargs = reinterpret_steal(PyDict_Copy(kwargs.ptr()));
                                 copied_kwargs = true;
                             }
-                            PyDict_DelItemString(kwargs.ptr(), arg.name);
-                        } else if (arg.value) {
-                            value = arg.value;
+                            PyDict_DelItemString(kwargs.ptr(), arg_rec.name);
+                        } else if (arg_rec.value) {
+                            value = arg_rec.value;
+                        }
+
+                        if (!arg_rec.none && value.is_none()) {
+                            break;
                         }
 
                         if (value) {
                             call.args.push_back(value);
-                            call.args_convert.push_back(arg.convert);
+                            call.args_convert.push_back(arg_rec.convert);
                         }
                         else
                             break;
                     }
 
-                    if (args_copied < pos_args)
+                    if (args_copied < num_args)
                         continue; // Not enough arguments, defaults, or kwargs to fill the positional arguments
                 }
 
                 // 3. Check everything was consumed (unless we have a kwargs arg)
-                if (kwargs && kwargs.size() > 0 && !func.has_kwargs)
+                if (kwargs && !kwargs.empty() && !func.has_kwargs)
                     continue; // Unconsumed kwargs, but no py::kwargs argument to accept them
 
                 // 4a. If we have a py::args argument, create a new tuple with leftovers
@@ -667,7 +810,7 @@ class cpp_function : public function {
         } catch (error_already_set &e) {
             e.restore();
             return nullptr;
-#if defined(__GNUG__) && !defined(__clang__)
+#ifdef __GLIBCXX__
         } catch ( abi::__forced_unwind& ) {
             throw;
 #endif
@@ -749,18 +892,27 @@ class cpp_function : public function {
             for (size_t ti = overloads->is_constructor ? 1 : 0; ti < args_.size(); ++ti) {
                 if (!some_args) some_args = true;
                 else msg += ", ";
-                msg += pybind11::repr(args_[ti]);
+                try {
+                    msg += pybind11::repr(args_[ti]);
+                } catch (const error_already_set&) {
+                    msg += "";
+                }
             }
             if (kwargs_in) {
                 auto kwargs = reinterpret_borrow(kwargs_in);
-                if (kwargs.size() > 0) {
+                if (!kwargs.empty()) {
                     if (some_args) msg += "; ";
                     msg += "kwargs: ";
                     bool first = true;
                     for (auto kwarg : kwargs) {
                         if (first) first = false;
                         else msg += ", ";
-                        msg += pybind11::str("{}={!r}").format(kwarg.first, kwarg.second);
+                        msg += pybind11::str("{}=").format(kwarg.first);
+                        try {
+                            msg += pybind11::repr(kwarg.second);
+                        } catch (const error_already_set&) {
+                            msg += "";
+                        }
                     }
                 }
             }
@@ -786,27 +938,18 @@ class cpp_function : public function {
 };
 
 /// Wrapper for Python extension modules
-class module : public object {
+class module_ : public object {
 public:
-    PYBIND11_OBJECT_DEFAULT(module, object, PyModule_Check)
+    PYBIND11_OBJECT_DEFAULT(module_, object, PyModule_Check)
 
     /// Create a new top-level Python module with the given name and docstring
-    explicit module(const char *name, const char *doc = nullptr) {
-        if (!options::show_user_defined_docstrings()) doc = nullptr;
+    PYBIND11_DEPRECATED("Use PYBIND11_MODULE or module_::create_extension_module instead")
+    explicit module_(const char *name, const char *doc = nullptr) {
 #if PY_MAJOR_VERSION >= 3
-        PyModuleDef *def = new PyModuleDef();
-        std::memset(def, 0, sizeof(PyModuleDef));
-        def->m_name = name;
-        def->m_doc = doc;
-        def->m_size = -1;
-        Py_INCREF(def);
-        m_ptr = PyModule_Create(def);
+        *this = create_extension_module(name, doc, new PyModuleDef());
 #else
-        m_ptr = Py_InitModule3(name, nullptr, doc);
+        *this = create_extension_module(name, doc, nullptr);
 #endif
-        if (m_ptr == nullptr)
-            pybind11_fail("Internal error in module::module()");
-        inc_ref();
     }
 
     /** \rst
@@ -815,7 +958,7 @@ class module : public object {
         details on the ``Extra&& ... extra`` argument, see section :ref:`extras`.
     \endrst */
     template 
-    module &def(const char *name_, Func &&f, const Extra& ... extra) {
+    module_ &def(const char *name_, Func &&f, const Extra& ... extra) {
         cpp_function func(std::forward(f), name(name_), scope(*this),
                           sibling(getattr(*this, name_, none())), extra...);
         // NB: allow overwriting here because cpp_function sets up a chain with the intention of
@@ -830,14 +973,14 @@ class module : public object {
 
         .. code-block:: cpp
 
-            py::module m("example", "pybind11 example plugin");
-            py::module m2 = m.def_submodule("sub", "A submodule of 'example'");
-            py::module m3 = m2.def_submodule("subsub", "A submodule of 'example.sub'");
+            py::module_ m("example", "pybind11 example plugin");
+            py::module_ m2 = m.def_submodule("sub", "A submodule of 'example'");
+            py::module_ m3 = m2.def_submodule("subsub", "A submodule of 'example.sub'");
     \endrst */
-    module def_submodule(const char *name, const char *doc = nullptr) {
+    module_ def_submodule(const char *name, const char *doc = nullptr) {
         std::string full_name = std::string(PyModule_GetName(m_ptr))
             + std::string(".") + std::string(name);
-        auto result = reinterpret_borrow(PyImport_AddModule(full_name.c_str()));
+        auto result = reinterpret_borrow(PyImport_AddModule(full_name.c_str()));
         if (doc && options::show_user_defined_docstrings())
             result.attr("__doc__") = pybind11::str(doc);
         attr(name) = result;
@@ -845,11 +988,11 @@ class module : public object {
     }
 
     /// Import and return a module or throws `error_already_set`.
-    static module import(const char *name) {
+    static module_ import(const char *name) {
         PyObject *obj = PyImport_ImportModule(name);
         if (!obj)
             throw error_already_set();
-        return reinterpret_steal(obj);
+        return reinterpret_steal(obj);
     }
 
     /// Reload the module or throws `error_already_set`.
@@ -857,14 +1000,16 @@ class module : public object {
         PyObject *obj = PyImport_ReloadModule(ptr());
         if (!obj)
             throw error_already_set();
-        *this = reinterpret_steal(obj);
+        *this = reinterpret_steal(obj);
     }
 
-    // Adds an object to the module using the given name.  Throws if an object with the given name
-    // already exists.
-    //
-    // overwrite should almost always be false: attempting to overwrite objects that pybind11 has
-    // established will, in most cases, break things.
+    /** \rst
+        Adds an object to the module using the given name.  Throws if an object with the given name
+        already exists.
+
+        ``overwrite`` should almost always be false: attempting to overwrite objects that pybind11 has
+        established will, in most cases, break things.
+    \endrst */
     PYBIND11_NOINLINE void add_object(const char *name, handle obj, bool overwrite = false) {
         if (!overwrite && hasattr(*this, name))
             pybind11_fail("Error during initialization: multiple incompatible definitions with name \"" +
@@ -872,25 +1017,71 @@ class module : public object {
 
         PyModule_AddObject(ptr(), name, obj.inc_ref().ptr() /* steals a reference */);
     }
+
+#if PY_MAJOR_VERSION >= 3
+    using module_def = PyModuleDef;
+#else
+    struct module_def {};
+#endif
+
+    /** \rst
+        Create a new top-level module that can be used as the main module of a C extension.
+
+        For Python 3, ``def`` should point to a statically allocated module_def.
+        For Python 2, ``def`` can be a nullptr and is completely ignored.
+    \endrst */
+    static module_ create_extension_module(const char *name, const char *doc, module_def *def) {
+#if PY_MAJOR_VERSION >= 3
+        // module_def is PyModuleDef
+        def = new (def) PyModuleDef {  // Placement new (not an allocation).
+            /* m_base */     PyModuleDef_HEAD_INIT,
+            /* m_name */     name,
+            /* m_doc */      options::show_user_defined_docstrings() ? doc : nullptr,
+            /* m_size */     -1,
+            /* m_methods */  nullptr,
+            /* m_slots */    nullptr,
+            /* m_traverse */ nullptr,
+            /* m_clear */    nullptr,
+            /* m_free */     nullptr
+        };
+        auto m = PyModule_Create(def);
+#else
+        // Ignore module_def *def; only necessary for Python 3
+        (void) def;
+        auto m = Py_InitModule3(name, nullptr, options::show_user_defined_docstrings() ? doc : nullptr);
+#endif
+        if (m == nullptr) {
+            if (PyErr_Occurred())
+                throw error_already_set();
+            pybind11_fail("Internal error in module_::create_extension_module()");
+        }
+        // TODO: Should be reinterpret_steal for Python 3, but Python also steals it again when returned from PyInit_...
+        //       For Python 2, reinterpret_borrow is correct.
+        return reinterpret_borrow(m);
+    }
 };
 
+// When inside a namespace (or anywhere as long as it's not the first item on a line),
+// C++20 allows "module" to be used. This is provided for backward compatibility, and for
+// simplicity, if someone wants to use py::module for example, that is perfectly safe.
+using module = module_;
+
 /// \ingroup python_builtins
 /// Return a dictionary representing the global variables in the current execution frame,
 /// or ``__main__.__dict__`` if there is no frame (usually when the interpreter is embedded).
 inline dict globals() {
     PyObject *p = PyEval_GetGlobals();
-    return reinterpret_borrow(p ? p : module::import("__main__").attr("__dict__").ptr());
+    return reinterpret_borrow(p ? p : module_::import("__main__").attr("__dict__").ptr());
 }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 /// Generic support for creating new Python heap types
 class generic_type : public object {
-    template  friend class class_;
 public:
     PYBIND11_OBJECT_DEFAULT(generic_type, object, PyType_Check)
 protected:
     void initialize(const type_record &rec) {
-        if (rec.scope && hasattr(rec.scope, rec.name))
+        if (rec.scope && hasattr(rec.scope, "__dict__") && rec.scope.attr("__dict__").contains(rec.name))
             pybind11_fail("generic_type: cannot initialize type \"" + std::string(rec.name) +
                           "\": an object with that name is already defined");
 
@@ -954,13 +1145,13 @@ class generic_type : public object {
     void install_buffer_funcs(
             buffer_info *(*get_buffer)(PyObject *, void *),
             void *get_buffer_data) {
-        PyHeapTypeObject *type = (PyHeapTypeObject*) m_ptr;
+        auto *type = (PyHeapTypeObject*) m_ptr;
         auto tinfo = detail::get_type_info(&type->ht_type);
 
         if (!type->ht_type.tp_as_buffer)
             pybind11_fail(
                 "To be able to register buffer protocol support for the type '" +
-                std::string(tinfo->type->tp_name) +
+                get_fully_qualified_tp_name(tinfo->type) +
                 "' the associated class<>(..) invocation must "
                 "include the pybind11::buffer_protocol() annotation!");
 
@@ -1020,7 +1211,14 @@ inline void call_operator_delete(void *p, size_t s, size_t a) {
     #endif
 }
 
-NAMESPACE_END(detail)
+inline void add_class_method(object& cls, const char *name_, const cpp_function &cf) {
+    cls.attr(cf.name()) = cf;
+    if (strcmp(name_, "__eq__") == 0 && !cls.attr("__dict__").contains("__hash__")) {
+      cls.attr("__hash__") = none();
+    }
+}
+
+PYBIND11_NAMESPACE_END(detail)
 
 /// Given a pointer to a member function, cast it to its `Derived` version.
 /// Forward everything else unchanged.
@@ -1117,7 +1315,7 @@ class class_ : public detail::generic_type {
     class_ &def(const char *name_, Func&& f, const Extra&... extra) {
         cpp_function cf(method_adaptor(std::forward(f)), name(name_), is_method(*this),
                         sibling(getattr(*this, name_, none())), extra...);
-        attr(cf.name()) = cf;
+        add_class_method(*this, name_, cf);
         return *this;
     }
 
@@ -1167,15 +1365,20 @@ class class_ : public detail::generic_type {
         return *this;
     }
 
-    template  class_& def_buffer(Func &&func) {
+    template 
+    class_& def_buffer(Func &&func) {
         struct capture { Func func; };
-        capture *ptr = new capture { std::forward(func) };
+        auto *ptr = new capture { std::forward(func) };
         install_buffer_funcs([](PyObject *obj, void *ptr) -> buffer_info* {
             detail::make_caster caster;
             if (!caster.load(obj, false))
                 return nullptr;
             return new buffer_info(((capture *) ptr)->func(caster));
         }, ptr);
+        weakref(m_ptr, cpp_function([ptr](handle wr) {
+            delete ptr;
+            wr.dec_ref();
+        })).release();
         return *this;
     }
 
@@ -1354,6 +1557,13 @@ class class_ : public detail::generic_type {
 
     /// Deallocates an instance; via holder, if constructed; otherwise via operator delete.
     static void dealloc(detail::value_and_holder &v_h) {
+        // We could be deallocating because we are cleaning up after a Python exception.
+        // If so, the Python error indicator will be set. We need to clear that before
+        // running the destructor, in case the destructor code calls more Python.
+        // If we don't, the Python API will exit with an exception, and pybind11 will
+        // throw error_already_set from the C++ destructor which is forbidden and triggers
+        // std::terminate().
+        error_scope scope;
         if (v_h.holder_constructed()) {
             v_h.holder().~holder_type();
             v_h.set_holder_constructed(false);
@@ -1398,7 +1608,17 @@ detail::initimpl::pickle_factory pickle(GetState &&g, SetSta
     return {std::forward(g), std::forward(s)};
 }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+inline str enum_name(handle arg) {
+    dict entries = arg.get_type().attr("__entries");
+    for (auto kv : entries) {
+        if (handle(kv.second[int_(0)]).equal(arg))
+            return pybind11::str(kv.first);
+    }
+    return "???";
+}
+
 struct enum_base {
     enum_base(handle base, handle parent) : m_base(base), m_parent(parent) { }
 
@@ -1408,29 +1628,21 @@ struct enum_base {
         auto static_property = handle((PyObject *) get_internals().static_property_type);
 
         m_base.attr("__repr__") = cpp_function(
-            [](handle arg) -> str {
-                handle type = arg.get_type();
+            [](object arg) -> str {
+                handle type = type::handle_of(arg);
                 object type_name = type.attr("__name__");
-                dict entries = type.attr("__entries");
-                for (const auto &kv : entries) {
-                    object other = kv.second[int_(0)];
-                    if (other.equal(arg))
-                        return pybind11::str("{}.{}").format(type_name, kv.first);
-                }
-                return pybind11::str("{}.???").format(type_name);
-            }, is_method(m_base)
+                return pybind11::str("<{}.{}: {}>").format(type_name, enum_name(arg), int_(arg));
+            }, name("__repr__"), is_method(m_base)
         );
 
-        m_base.attr("name") = property(cpp_function(
+        m_base.attr("name") = property(cpp_function(&enum_name, name("name"), is_method(m_base)));
+
+        m_base.attr("__str__") = cpp_function(
             [](handle arg) -> str {
-                dict entries = arg.get_type().attr("__entries");
-                for (const auto &kv : entries) {
-                    if (handle(kv.second[int_(0)]).equal(arg))
-                        return pybind11::str(kv.first);
-                }
-                return "???";
-            }, is_method(m_base)
-        ));
+                object type_name = type::handle_of(arg).attr("__name__");
+                return pybind11::str("{}.{}").format(type_name, enum_name(arg));
+            }, name("name"), is_method(m_base)
+        );
 
         m_base.attr("__doc__") = static_property(cpp_function(
             [](handle arg) -> std::string {
@@ -1439,7 +1651,7 @@ struct enum_base {
                 if (((PyTypeObject *) arg.ptr())->tp_doc)
                     docstring += std::string(((PyTypeObject *) arg.ptr())->tp_doc) + "\n\n";
                 docstring += "Members:";
-                for (const auto &kv : entries) {
+                for (auto kv : entries) {
                     auto key = std::string(pybind11::str(kv.first));
                     auto comment = kv.second[int_(1)];
                     docstring += "\n\n  " + key;
@@ -1447,26 +1659,26 @@ struct enum_base {
                         docstring += " : " + (std::string) pybind11::str(comment);
                 }
                 return docstring;
-            }
+            }, name("__doc__")
         ), none(), none(), "");
 
         m_base.attr("__members__") = static_property(cpp_function(
             [](handle arg) -> dict {
                 dict entries = arg.attr("__entries"), m;
-                for (const auto &kv : entries)
+                for (auto kv : entries)
                     m[kv.first] = kv.second[int_(0)];
                 return m;
-            }), none(), none(), ""
+            }, name("__members__")), none(), none(), ""
         );
 
         #define PYBIND11_ENUM_OP_STRICT(op, expr, strict_behavior)                     \
             m_base.attr(op) = cpp_function(                                            \
                 [](object a, object b) {                                               \
-                    if (!a.get_type().is(b.get_type()))                                \
+                    if (!type::handle_of(a).is(type::handle_of(b)))                    \
                         strict_behavior;                                               \
                     return expr;                                                       \
                 },                                                                     \
-                is_method(m_base))
+                name(op), is_method(m_base), arg("other"))
 
         #define PYBIND11_ENUM_OP_CONV(op, expr)                                        \
             m_base.attr(op) = cpp_function(                                            \
@@ -1474,7 +1686,7 @@ struct enum_base {
                     int_ a(a_), b(b_);                                                 \
                     return expr;                                                       \
                 },                                                                     \
-                is_method(m_base))
+                name(op), is_method(m_base), arg("other"))
 
         #define PYBIND11_ENUM_OP_CONV_LHS(op, expr)                                    \
             m_base.attr(op) = cpp_function(                                            \
@@ -1482,7 +1694,7 @@ struct enum_base {
                     int_ a(a_);                                                        \
                     return expr;                                                       \
                 },                                                                     \
-                is_method(m_base))
+                name(op), is_method(m_base), arg("other"))
 
         if (is_convertible) {
             PYBIND11_ENUM_OP_CONV_LHS("__eq__", !b.is_none() &&  a.equal(b));
@@ -1500,7 +1712,7 @@ struct enum_base {
                 PYBIND11_ENUM_OP_CONV("__xor__",  a ^  b);
                 PYBIND11_ENUM_OP_CONV("__rxor__", a ^  b);
                 m_base.attr("__invert__") = cpp_function(
-                    [](object arg) { return ~(int_(arg)); }, is_method(m_base));
+                    [](object arg) { return ~(int_(arg)); }, name("__invert__"), is_method(m_base));
             }
         } else {
             PYBIND11_ENUM_OP_STRICT("__eq__",  int_(a).equal(int_(b)), return false);
@@ -1520,11 +1732,11 @@ struct enum_base {
         #undef PYBIND11_ENUM_OP_CONV
         #undef PYBIND11_ENUM_OP_STRICT
 
-        object getstate = cpp_function(
-            [](object arg) { return int_(arg); }, is_method(m_base));
+        m_base.attr("__getstate__") = cpp_function(
+            [](object arg) { return int_(arg); }, name("__getstate__"), is_method(m_base));
 
-        m_base.attr("__getstate__") = getstate;
-        m_base.attr("__hash__") = getstate;
+        m_base.attr("__hash__") = cpp_function(
+            [](object arg) { return int_(arg); }, name("__hash__"), is_method(m_base));
     }
 
     PYBIND11_NOINLINE void value(char const* name_, object value, const char *doc = nullptr) {
@@ -1541,7 +1753,7 @@ struct enum_base {
 
     PYBIND11_NOINLINE void export_values() {
         dict entries = m_base.attr("__entries");
-        for (const auto &kv : entries)
+        for (auto kv : entries)
             m_parent.attr(kv.first) = kv.second[int_(0)];
     }
 
@@ -1549,7 +1761,7 @@ struct enum_base {
     handle m_parent;
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Binds C++ enumerations and enumeration classes to Python
 template  class enum_ : public class_ {
@@ -1568,7 +1780,8 @@ template  class enum_ : public class_ {
         constexpr bool is_convertible = std::is_convertible::value;
         m_base.init(is_arithmetic, is_convertible);
 
-        def(init([](Scalar i) { return static_cast(i); }));
+        def(init([](Scalar i) { return static_cast(i); }), arg("value"));
+        def_property_readonly("value", [](Type value) { return (Scalar) value; });
         def("__int__", [](Type value) { return (Scalar) value; });
         #if PY_MAJOR_VERSION < 3
             def("__long__", [](Type value) { return (Scalar) value; });
@@ -1577,10 +1790,12 @@ template  class enum_ : public class_ {
             def("__index__", [](Type value) { return (Scalar) value; });
         #endif
 
-        cpp_function setstate(
-            [](Type &value, Scalar arg) { value = static_cast(arg); },
-            is_method(*this));
-        attr("__setstate__") = setstate;
+        attr("__setstate__") = cpp_function(
+            [](detail::value_and_holder &v_h, Scalar arg) {
+                detail::initimpl::setstate(v_h, static_cast(arg),
+                        Py_TYPE(v_h.inst) != v_h.type->type); },
+            detail::is_new_style_constructor(),
+            pybind11::name("__setstate__"), is_method(*this), arg("state"));
     }
 
     /// Export enumeration entries into the parent scope
@@ -1599,7 +1814,7 @@ template  class enum_ : public class_ {
     detail::enum_base m_base;
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 
 inline void keep_alive_impl(handle nurse, handle patient) {
@@ -1669,7 +1884,7 @@ struct iterator_state {
     bool first_or_done;
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Makes a python iterator from a first and past-the-end C++ InputIterator.
 template ()),
           typename... Extra>
 iterator make_iterator(Iterator first, Sentinel last, Extra &&... extra) {
-    typedef detail::iterator_state state;
+    using state = detail::iterator_state;
 
     if (!detail::get_type_info(typeid(state), false)) {
         class_(handle(), "iterator", pybind11::module_local())
@@ -1707,7 +1922,7 @@ template ()).first),
           typename... Extra>
 iterator make_key_iterator(Iterator first, Sentinel last, Extra &&... extra) {
-    typedef detail::iterator_state state;
+    using state = detail::iterator_state;
 
     if (!detail::get_type_info(typeid(state), false)) {
         class_(handle(), "iterator", pybind11::module_local())
@@ -1745,7 +1960,7 @@ template  void implicitly_convertible() {
     struct set_flag {
         bool &flag;
-        set_flag(bool &flag) : flag(flag) { flag = true; }
+        set_flag(bool &flag_) : flag(flag_) { flag_ = true; }
         ~set_flag() { flag = false; }
     };
     auto implicit_caster = [](PyObject *obj, PyTypeObject *type) -> PyObject * {
@@ -1786,11 +2001,11 @@ template 
 class exception : public object {
 public:
     exception() = default;
-    exception(handle scope, const char *name, PyObject *base = PyExc_Exception) {
+    exception(handle scope, const char *name, handle base = PyExc_Exception) {
         std::string full_name = scope.attr("__name__").cast() +
                                 std::string(".") + name;
-        m_ptr = PyErr_NewException(const_cast(full_name.c_str()), base, NULL);
-        if (hasattr(scope, name))
+        m_ptr = PyErr_NewException(const_cast(full_name.c_str()), base.ptr(), NULL);
+        if (hasattr(scope, "__dict__") && scope.attr("__dict__").contains(name))
             pybind11_fail("Error during initialization: multiple incompatible "
                           "definitions with name \"" + std::string(name) + "\"");
         scope.attr(name) = *this;
@@ -1802,13 +2017,13 @@ class exception : public object {
     }
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 // Returns a reference to a function-local static exception object used in the simple
 // register_exception approach below.  (It would be simpler to have the static local variable
 // directly in register_exception, but that makes clang <3.5 segfault - issue #1349).
 template 
 exception &get_exception_object() { static exception ex; return ex; }
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /**
  * Registers a Python exception in `m` of the given `name` and installs an exception translator to
@@ -1819,7 +2034,7 @@ NAMESPACE_END(detail)
 template 
 exception ®ister_exception(handle scope,
                                             const char *name,
-                                            PyObject *base = PyExc_Exception) {
+                                            handle base = PyExc_Exception) {
     auto &ex = detail::get_exception_object();
     if (!ex) ex = exception(scope, name, base);
 
@@ -1834,7 +2049,7 @@ exception ®ister_exception(handle scope,
     return ex;
 }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 PYBIND11_NOINLINE inline void print(tuple args, dict kwargs) {
     auto strings = tuple(args.size());
     for (size_t i = 0; i < args.size(); ++i) {
@@ -1848,7 +2063,7 @@ PYBIND11_NOINLINE inline void print(tuple args, dict kwargs) {
         file = kwargs["file"].cast();
     } else {
         try {
-            file = module::import("sys").attr("stdout");
+            file = module_::import("sys").attr("stdout");
         } catch (const error_already_set &) {
             /* If print() is called from code that is executed as
                part of garbage collection during interpreter shutdown,
@@ -1865,7 +2080,7 @@ PYBIND11_NOINLINE inline void print(tuple args, dict kwargs) {
     if (kwargs.contains("flush") && kwargs["flush"].cast())
         file.attr("flush")();
 }
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 template 
 void print(Args &&...args) {
@@ -1925,15 +2140,7 @@ class gil_scoped_acquire {
         }
 
         if (release) {
-            /* Work around an annoying assertion in PyThreadState_Swap */
-            #if defined(Py_DEBUG)
-                PyInterpreterState *interp = tstate->interp;
-                tstate->interp = nullptr;
-            #endif
             PyEval_AcquireThread(tstate);
-            #if defined(Py_DEBUG)
-                tstate->interp = interp;
-            #endif
         }
 
         inc_ref();
@@ -1957,12 +2164,22 @@ class gil_scoped_acquire {
                     pybind11_fail("scoped_acquire::dec_ref(): internal error!");
             #endif
             PyThreadState_Clear(tstate);
-            PyThreadState_DeleteCurrent();
+            if (active)
+                PyThreadState_DeleteCurrent();
             PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate);
             release = false;
         }
     }
 
+    /// This method will disable the PyThreadState_DeleteCurrent call and the
+    /// GIL won't be acquired. This method should be used if the interpreter
+    /// could be shutting down when this is called, as thread deletion is not
+    /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
+    /// protect subsequent code.
+    PYBIND11_NOINLINE void disarm() {
+        active = false;
+    }
+
     PYBIND11_NOINLINE ~gil_scoped_acquire() {
         dec_ref();
         if (release)
@@ -1971,6 +2188,7 @@ class gil_scoped_acquire {
 private:
     PyThreadState *tstate = nullptr;
     bool release = true;
+    bool active = true;
 };
 
 class gil_scoped_release {
@@ -1986,10 +2204,22 @@ class gil_scoped_release {
             PYBIND11_TLS_DELETE_VALUE(key);
         }
     }
+
+    /// This method will disable the PyThreadState_DeleteCurrent call and the
+    /// GIL won't be acquired. This method should be used if the interpreter
+    /// could be shutting down when this is called, as thread deletion is not
+    /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
+    /// protect subsequent code.
+    PYBIND11_NOINLINE void disarm() {
+        active = false;
+    }
+
     ~gil_scoped_release() {
         if (!tstate)
             return;
-        PyEval_RestoreThread(tstate);
+        // `PyEval_RestoreThread()` should not be called if runtime is finalizing
+        if (active)
+            PyEval_RestoreThread(tstate);
         if (disassoc) {
             auto key = detail::get_internals().tstate;
             PYBIND11_TLS_REPLACE_VALUE(key, tstate);
@@ -1998,6 +2228,7 @@ class gil_scoped_release {
 private:
     PyThreadState *tstate;
     bool disassoc;
+    bool active = true;
 };
 #elif defined(PYPY_VERSION)
 class gil_scoped_acquire {
@@ -2005,6 +2236,7 @@ class gil_scoped_acquire {
 public:
     gil_scoped_acquire() { state = PyGILState_Ensure(); }
     ~gil_scoped_acquire() { PyGILState_Release(state); }
+    void disarm() {}
 };
 
 class gil_scoped_release {
@@ -2012,10 +2244,15 @@ class gil_scoped_release {
 public:
     gil_scoped_release() { state = PyEval_SaveThread(); }
     ~gil_scoped_release() { PyEval_RestoreThread(state); }
+    void disarm() {}
 };
 #else
-class gil_scoped_acquire { };
-class gil_scoped_release { };
+class gil_scoped_acquire {
+    void disarm() {}
+};
+class gil_scoped_release {
+    void disarm() {}
+};
 #endif
 
 error_already_set::~error_already_set() {
@@ -2028,21 +2265,22 @@ error_already_set::~error_already_set() {
     }
 }
 
-inline function get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name)  {
-    handle self = detail::get_object_handle(this_ptr, this_type);
+PYBIND11_NAMESPACE_BEGIN(detail)
+inline function get_type_override(const void *this_ptr, const type_info *this_type, const char *name)  {
+    handle self = get_object_handle(this_ptr, this_type);
     if (!self)
         return function();
-    handle type = self.get_type();
+    handle type = type::handle_of(self);
     auto key = std::make_pair(type.ptr(), name);
 
-    /* Cache functions that aren't overloaded in Python to avoid
+    /* Cache functions that aren't overridden in Python to avoid
        many costly Python dictionary lookups below */
-    auto &cache = detail::get_internals().inactive_overload_cache;
+    auto &cache = get_internals().inactive_override_cache;
     if (cache.find(key) != cache.end())
         return function();
 
-    function overload = getattr(self, name, function());
-    if (overload.is_cpp_function()) {
+    function override = getattr(self, name, function());
+    if (override.is_cpp_function()) {
         cache.insert(key);
         return function();
     }
@@ -2082,34 +2320,36 @@ inline function get_type_overload(const void *this_ptr, const detail::type_info
     Py_DECREF(result);
 #endif
 
-    return overload;
+    return override;
 }
+PYBIND11_NAMESPACE_END(detail)
 
 /** \rst
   Try to retrieve a python method by the provided name from the instance pointed to by the this_ptr.
 
-  :this_ptr: The pointer to the object the overload should be retrieved for. This should be the first
-                   non-trampoline class encountered in the inheritance chain.
-  :name: The name of the overloaded Python method to retrieve.
+  :this_ptr: The pointer to the object the overridden method should be retrieved for. This should be
+             the first non-trampoline class encountered in the inheritance chain.
+  :name: The name of the overridden Python method to retrieve.
   :return: The Python method by this name from the object or an empty function wrapper.
  \endrst */
-template  function get_overload(const T *this_ptr, const char *name) {
+template  function get_override(const T *this_ptr, const char *name) {
     auto tinfo = detail::get_type_info(typeid(T));
-    return tinfo ? get_type_overload(this_ptr, tinfo, name) : function();
+    return tinfo ? detail::get_type_override(this_ptr, tinfo, name) : function();
 }
 
-#define PYBIND11_OVERLOAD_INT(ret_type, cname, name, ...) { \
+#define PYBIND11_OVERRIDE_IMPL(ret_type, cname, name, ...) \
+    do { \
         pybind11::gil_scoped_acquire gil; \
-        pybind11::function overload = pybind11::get_overload(static_cast(this), name); \
-        if (overload) { \
-            auto o = overload(__VA_ARGS__); \
+        pybind11::function override = pybind11::get_override(static_cast(this), name); \
+        if (override) { \
+            auto o = override(__VA_ARGS__); \
             if (pybind11::detail::cast_is_temporary_value_reference::value) { \
-                static pybind11::detail::overload_caster_t caster; \
+                static pybind11::detail::override_caster_t caster; \
                 return pybind11::detail::cast_ref(std::move(o), caster); \
             } \
             else return pybind11::detail::cast_safe(std::move(o)); \
         } \
-    }
+    } while (false)
 
 /** \rst
     Macro to populate the virtual method in the trampoline class. This macro tries to look up a method named 'fn'
@@ -2120,25 +2360,29 @@ template  function get_overload(const T *this_ptr, const char *name) {
     .. code-block:: cpp
 
       std::string toString() override {
-        PYBIND11_OVERLOAD_NAME(
+        PYBIND11_OVERRIDE_NAME(
             std::string, // Return type (ret_type)
             Animal,      // Parent class (cname)
-            toString,    // Name of function in C++ (name)
-            "__str__",   // Name of method in Python (fn)
+            "__str__",   // Name of method in Python (name)
+            toString,    // Name of function in C++ (fn)
         );
       }
 \endrst */
-#define PYBIND11_OVERLOAD_NAME(ret_type, cname, name, fn, ...) \
-    PYBIND11_OVERLOAD_INT(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__) \
-    return cname::fn(__VA_ARGS__)
+#define PYBIND11_OVERRIDE_NAME(ret_type, cname, name, fn, ...) \
+    do { \
+        PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__); \
+        return cname::fn(__VA_ARGS__); \
+    } while (false)
 
 /** \rst
-    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERLOAD_NAME`, except that it
-    throws if no overload can be found.
+    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERRIDE_NAME`, except that it
+    throws if no override can be found.
 \endrst */
-#define PYBIND11_OVERLOAD_PURE_NAME(ret_type, cname, name, fn, ...) \
-    PYBIND11_OVERLOAD_INT(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__) \
-    pybind11::pybind11_fail("Tried to call pure virtual function \"" PYBIND11_STRINGIFY(cname) "::" name "\"");
+#define PYBIND11_OVERRIDE_PURE_NAME(ret_type, cname, name, fn, ...) \
+    do { \
+        PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__); \
+        pybind11::pybind11_fail("Tried to call pure virtual function \"" PYBIND11_STRINGIFY(cname) "::" name "\""); \
+    } while (false)
 
 /** \rst
     Macro to populate the virtual method in the trampoline class. This macro tries to look up the method
@@ -2155,7 +2399,7 @@ template  function get_overload(const T *this_ptr, const char *name) {
 
           // Trampoline (need one for each virtual function)
           std::string go(int n_times) override {
-              PYBIND11_OVERLOAD_PURE(
+              PYBIND11_OVERRIDE_PURE(
                   std::string, // Return type (ret_type)
                   Animal,      // Parent class (cname)
                   go,          // Name of function in C++ (must match Python name) (fn)
@@ -2164,17 +2408,41 @@ template  function get_overload(const T *this_ptr, const char *name) {
           }
       };
 \endrst */
-#define PYBIND11_OVERLOAD(ret_type, cname, fn, ...) \
-    PYBIND11_OVERLOAD_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
+#define PYBIND11_OVERRIDE(ret_type, cname, fn, ...) \
+    PYBIND11_OVERRIDE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
 
 /** \rst
-    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERLOAD`, except that it throws
-    if no overload can be found.
+    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERRIDE`, except that it throws
+    if no override can be found.
 \endrst */
+#define PYBIND11_OVERRIDE_PURE(ret_type, cname, fn, ...) \
+    PYBIND11_OVERRIDE_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
+
+
+// Deprecated versions
+
+PYBIND11_DEPRECATED("get_type_overload has been deprecated")
+inline function get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name) {
+    return detail::get_type_override(this_ptr, this_type, name);
+}
+
+template 
+inline function get_overload(const T *this_ptr, const char *name) {
+    return get_override(this_ptr, name);
+}
+
+#define PYBIND11_OVERLOAD_INT(ret_type, cname, name, ...) \
+    PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__)
+#define PYBIND11_OVERLOAD_NAME(ret_type, cname, name, fn, ...) \
+    PYBIND11_OVERRIDE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, fn, __VA_ARGS__)
+#define PYBIND11_OVERLOAD_PURE_NAME(ret_type, cname, name, fn, ...) \
+    PYBIND11_OVERRIDE_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, fn, __VA_ARGS__);
+#define PYBIND11_OVERLOAD(ret_type, cname, fn, ...) \
+    PYBIND11_OVERRIDE(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), fn, __VA_ARGS__)
 #define PYBIND11_OVERLOAD_PURE(ret_type, cname, fn, ...) \
-    PYBIND11_OVERLOAD_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
+    PYBIND11_OVERRIDE_PURE(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), fn, __VA_ARGS__);
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
 #  pragma warning(pop)
diff --git a/libs/pybind11/pytypes.h b/libs/pybind11/pytypes.h
index 4003d69..78db794 100644
--- a/libs/pybind11/pytypes.h
+++ b/libs/pybind11/pytypes.h
@@ -14,14 +14,15 @@
 #include 
 #include 
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 /* A few forward declarations */
 class handle; class object;
 class str; class iterator;
+class type;
 struct arg; struct arg_v;
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 class args_proxy;
 inline bool isinstance_generic(handle obj, const std::type_info &tp);
 
@@ -34,7 +35,7 @@ namespace accessor_policies {
     struct sequence_item;
     struct list_item;
     struct tuple_item;
-}
+} // namespace accessor_policies
 using obj_attr_accessor = accessor;
 using str_attr_accessor = accessor;
 using item_accessor = accessor;
@@ -151,14 +152,15 @@ class object_api : public pyobject_tag {
 
     /// Return the object's current reference count
     int ref_count() const { return static_cast(Py_REFCNT(derived().ptr())); }
-    /// Return a handle to the Python type object underlying the instance
+
+    // TODO PYBIND11_DEPRECATED("Call py::type::handle_of(h) or py::type::of(h) instead of h.get_type()")
     handle get_type() const;
 
 private:
     bool rich_compare(object_api const &other, int value) const;
 };
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /** \rst
     Holds a reference to a Python object (no reference counting)
@@ -240,7 +242,7 @@ class object : public handle {
     ~object() { dec_ref(); }
 
     /** \rst
-        Resets the internal pointer to ``nullptr`` without without decreasing the
+        Resets the internal pointer to ``nullptr`` without decreasing the
         object's reference count. The function returns a raw handle to the original
         Python object.
     \endrst */
@@ -311,9 +313,9 @@ template  T reinterpret_borrow(handle h) { return {h, object::borrow
 \endrst */
 template  T reinterpret_steal(handle h) { return {h, object::stolen_t{}}; }
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 inline std::string error_string();
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 /// Fetch and hold an error which was already set in Python.  An instance of this is typically
 /// thrown to propagate python-side errors back through C++ which can either be caught manually or
@@ -330,13 +332,27 @@ class error_already_set : public std::runtime_error {
     error_already_set(const error_already_set &) = default;
     error_already_set(error_already_set &&) = default;
 
-    inline ~error_already_set();
+    inline ~error_already_set() override;
 
     /// Give the currently-held error back to Python, if any.  If there is currently a Python error
     /// already set it is cleared first.  After this call, the current object no longer stores the
     /// error variables (but the `.what()` string is still available).
     void restore() { PyErr_Restore(m_type.release().ptr(), m_value.release().ptr(), m_trace.release().ptr()); }
 
+    /// If it is impossible to raise the currently-held error, such as in destructor, we can write
+    /// it out using Python's unraisable hook (sys.unraisablehook). The error context should be
+    /// some object whose repr() helps identify the location of the error. Python already knows the
+    /// type and value of the error, so there is no need to repeat that. For example, __func__ could
+    /// be helpful. After this call, the current object no longer stores the error variables,
+    /// and neither does Python.
+    void discard_as_unraisable(object err_context) {
+        restore();
+        PyErr_WriteUnraisable(err_context.ptr());
+    }
+    void discard_as_unraisable(const char *err_context) {
+        discard_as_unraisable(reinterpret_steal(PYBIND11_FROM_STRING(err_context)));
+    }
+
     // Does nothing; provided for backwards compatibility.
     PYBIND11_DEPRECATED("Use of error_already_set.clear() is deprecated")
     void clear() {}
@@ -370,7 +386,7 @@ bool isinstance(handle obj) { return T::check_(obj); }
 template ::value, int> = 0>
 bool isinstance(handle obj) { return detail::isinstance_generic(obj, typeid(T)); }
 
-template <> inline bool isinstance(handle obj) = delete;
+template <> inline bool isinstance(handle) = delete;
 template <> inline bool isinstance(handle obj) { return obj.ptr() != nullptr; }
 
 /// \ingroup python_builtins
@@ -446,7 +462,7 @@ inline ssize_t hash(handle obj) {
 
 /// @} python_builtins
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 inline handle get_function(handle value) {
     if (value) {
 #if PY_MAJOR_VERSION >= 3
@@ -520,7 +536,7 @@ class accessor : public object_api> {
     mutable object cache;
 };
 
-NAMESPACE_BEGIN(accessor_policies)
+PYBIND11_NAMESPACE_BEGIN(accessor_policies)
 struct obj_attr {
     using key_type = object;
     static object get(handle obj, handle key) { return getattr(obj, key); }
@@ -597,7 +613,7 @@ struct tuple_item {
         }
     }
 };
-NAMESPACE_END(accessor_policies)
+PYBIND11_NAMESPACE_END(accessor_policies)
 
 /// STL iterator template used for tuple, list, sequence and dict
 template 
@@ -638,7 +654,7 @@ class generic_iterator : public Policy {
     friend bool operator<=(const It &a, const It &b) { return !(a > b); }
 };
 
-NAMESPACE_BEGIN(iterator_policies)
+PYBIND11_NAMESPACE_BEGIN(iterator_policies)
 /// Quick proxy class needed to implement ``operator->`` for iterators which can't return pointers
 template 
 struct arrow_proxy {
@@ -711,7 +727,7 @@ class dict_readonly {
     PyObject *key = nullptr, *value = nullptr;
     ssize_t pos = -1;
 };
-NAMESPACE_END(iterator_policies)
+PYBIND11_NAMESPACE_END(iterator_policies)
 
 #if !defined(PYPY_VERSION)
 using tuple_iterator = generic_iterator;
@@ -736,9 +752,7 @@ inline bool PyIterable_Check(PyObject *obj) {
 }
 
 inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
-#if PY_MAJOR_VERSION >= 3
 inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
-#endif
 
 inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
 
@@ -770,7 +784,7 @@ class simple_collector;
 template 
 class unpacking_collector;
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 // TODO: After the deprecated constructors are removed, this macro can be simplified by
 //       inheriting ctors: `using Parent::Parent`. It's not an option right now because
@@ -784,7 +798,9 @@ NAMESPACE_END(detail)
         Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \
         PYBIND11_DEPRECATED("Use py::isinstance(obj) instead") \
         bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \
-        static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); }
+        static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \
+        template  \
+        Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { }
 
 #define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \
     PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
@@ -794,15 +810,20 @@ NAMESPACE_END(detail)
     { if (!m_ptr) throw error_already_set(); } \
     Name(object &&o) \
     : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \
-    { if (!m_ptr) throw error_already_set(); } \
-    template  \
-    Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { }
+    { if (!m_ptr) throw error_already_set(); }
+
+#define PYBIND11_OBJECT_CHECK_FAILED(Name, o_ptr) \
+    ::pybind11::type_error("Object of type '" + \
+                           ::pybind11::detail::get_fully_qualified_tp_name(Py_TYPE(o_ptr)) + \
+                           "' is not an instance of '" #Name "'")
 
 #define PYBIND11_OBJECT(Name, Parent, CheckFun) \
     PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
     /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \
-    Name(const object &o) : Parent(o) { } \
-    Name(object &&o) : Parent(std::move(o)) { }
+    Name(const object &o) : Parent(o) \
+    { if (m_ptr && !check_(m_ptr)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr); } \
+    Name(object &&o) : Parent(std::move(o)) \
+    { if (m_ptr && !check_(m_ptr)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr); }
 
 #define PYBIND11_OBJECT_DEFAULT(Name, Parent, CheckFun) \
     PYBIND11_OBJECT(Name, Parent, CheckFun) \
@@ -878,6 +899,32 @@ class iterator : public object {
     object value = {};
 };
 
+
+
+class type : public object {
+public:
+    PYBIND11_OBJECT(type, object, PyType_Check)
+
+    /// Return a type handle from a handle or an object
+    static handle handle_of(handle h) { return handle((PyObject*) Py_TYPE(h.ptr())); }
+
+    /// Return a type object from a handle or an object
+    static type of(handle h) { return type(type::handle_of(h), borrowed_t{}); }
+
+    // Defined in pybind11/cast.h
+    /// Convert C++ type to handle if previously registered. Does not convert
+    /// standard types, like int, float. etc. yet.
+    /// See https://github.com/pybind/pybind11/issues/2486
+    template
+    static handle handle_of();
+
+    /// Convert C++ type to type if previously registered. Does not convert
+    /// standard types, like int, float. etc. yet.
+    /// See https://github.com/pybind/pybind11/issues/2486
+    template
+    static type of() {return type(type::handle_of(), borrowed_t{}); }
+};
+
 class iterable : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(iterable, object, detail::PyIterable_Check)
@@ -908,7 +955,7 @@ class str : public object {
         Return a string representation of the object. This is analogous to
         the ``str()`` function in Python.
     \endrst */
-    explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { }
+    explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { if (!m_ptr) throw error_already_set(); }
 
     operator std::string() const {
         object temp = *this;
@@ -948,7 +995,7 @@ inline namespace literals {
     String literal version of `str`
  \endrst */
 inline str operator"" _s(const char *s, size_t size) { return {s, size}; }
-}
+} // namespace literals
 
 /// \addtogroup pytypes
 /// @{
@@ -980,6 +1027,9 @@ class bytes : public object {
         return std::string(buffer, (size_t) length);
     }
 };
+// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors
+// are included in the doxygen group; close here and reopen after as a workaround
+/// @} pytypes
 
 inline bytes::bytes(const pybind11::str &s) {
     object temp = s;
@@ -1009,19 +1059,19 @@ inline str::str(const bytes& b) {
     m_ptr = obj.release().ptr();
 }
 
+/// \addtogroup pytypes
+/// @{
 class none : public object {
 public:
     PYBIND11_OBJECT(none, object, detail::PyNone_Check)
     none() : object(Py_None, borrowed_t{}) { }
 };
 
-#if PY_MAJOR_VERSION >= 3
 class ellipsis : public object {
 public:
     PYBIND11_OBJECT(ellipsis, object, detail::PyEllipsis_Check)
     ellipsis() : object(Py_Ellipsis, borrowed_t{}) { }
 };
-#endif
 
 class bool_ : public object {
 public:
@@ -1040,7 +1090,7 @@ class bool_ : public object {
     }
 };
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 // Converts a value to the given unsigned type.  If an error occurs, you get back (Unsigned) -1;
 // otherwise you get back the unsigned long or unsigned long long value cast to (Unsigned).
 // (The distinction is critically important when casting a returned -1 error value to some other
@@ -1060,7 +1110,7 @@ Unsigned as_unsigned(PyObject *o) {
         return v == (unsigned long long) -1 && PyErr_Occurred() ? (Unsigned) -1 : (Unsigned) v;
     }
 }
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 class int_ : public object {
 public:
@@ -1186,12 +1236,24 @@ class capsule : public object {
     }
 
     template  operator T *() const {
+        return get_pointer();
+    }
+
+    /// Get the pointer the capsule holds.
+    template
+    T* get_pointer() const {
         auto name = this->name();
-        T * result = static_cast(PyCapsule_GetPointer(m_ptr, name));
+        T *result = static_cast(PyCapsule_GetPointer(m_ptr, name));
         if (!result) pybind11_fail("Unable to extract capsule contents!");
         return result;
     }
 
+    /// Replaces a capsule's pointer *without* calling the destructor on the existing one.
+    void set_pointer(const void *value) {
+        if (PyCapsule_SetPointer(m_ptr, const_cast(value)) != 0)
+            pybind11_fail("Could not set capsule pointer");
+    }
+
     const char *name() const { return PyCapsule_GetName(m_ptr); }
 };
 
@@ -1209,6 +1271,15 @@ class tuple : public object {
     detail::tuple_iterator end() const { return {*this, PyTuple_GET_SIZE(m_ptr)}; }
 };
 
+// We need to put this into a separate function because the Intel compiler
+// fails to compile enable_if_t...>::value> part below
+// (tested with ICC 2021.1 Beta 20200827).
+template 
+constexpr bool args_are_all_keyword_or_ds()
+{
+  return detail::all_of...>::value;
+}
+
 class dict : public object {
 public:
     PYBIND11_OBJECT_CVT(dict, object, PyDict_Check, raw_dict)
@@ -1216,7 +1287,7 @@ class dict : public object {
         if (!m_ptr) pybind11_fail("Could not allocate dict object!");
     }
     template ...>::value>,
+              typename = detail::enable_if_t()>,
               // MSVC workaround: it can't compile an out-of-line definition, so defer the collector
               typename collector = detail::deferred_t, Args...>>
     explicit dict(Args &&...args) : dict(collector(std::forward(args)...).kwargs()) { }
@@ -1242,7 +1313,12 @@ class dict : public object {
 class sequence : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(sequence, object, PySequence_Check)
-    size_t size() const { return (size_t) PySequence_Size(m_ptr); }
+    size_t size() const {
+        ssize_t result = PySequence_Size(m_ptr);
+        if (result == -1)
+            throw error_already_set();
+        return (size_t) result;
+    }
     bool empty() const { return size() == 0; }
     detail::sequence_accessor operator[](size_t index) const { return {*this, index}; }
     detail::item_accessor operator[](handle h) const { return object::operator[](h); }
@@ -1315,7 +1391,7 @@ class buffer : public object {
     buffer_info request(bool writable = false) const {
         int flags = PyBUF_STRIDES | PyBUF_FORMAT;
         if (writable) flags |= PyBUF_WRITABLE;
-        Py_buffer *view = new Py_buffer();
+        auto *view = new Py_buffer();
         if (PyObject_GetBuffer(m_ptr, view, flags) != 0) {
             delete view;
             throw error_already_set();
@@ -1326,46 +1402,154 @@ class buffer : public object {
 
 class memoryview : public object {
 public:
-    explicit memoryview(const buffer_info& info) {
-        static Py_buffer buf { };
-        // Py_buffer uses signed sizes, strides and shape!..
-        static std::vector py_strides { };
-        static std::vector py_shape { };
-        buf.buf = info.ptr;
-        buf.itemsize = info.itemsize;
-        buf.format = const_cast(info.format.c_str());
-        buf.ndim = (int) info.ndim;
-        buf.len = info.size;
-        py_strides.clear();
-        py_shape.clear();
-        for (size_t i = 0; i < (size_t) info.ndim; ++i) {
-            py_strides.push_back(info.strides[i]);
-            py_shape.push_back(info.shape[i]);
-        }
-        buf.strides = py_strides.data();
-        buf.shape = py_shape.data();
-        buf.suboffsets = nullptr;
-        buf.readonly = info.readonly;
-        buf.internal = nullptr;
+    PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject)
 
-        m_ptr = PyMemoryView_FromBuffer(&buf);
+    /** \rst
+        Creates ``memoryview`` from ``buffer_info``.
+
+        ``buffer_info`` must be created from ``buffer::request()``. Otherwise
+        throws an exception.
+
+        For creating a ``memoryview`` from objects that support buffer protocol,
+        use ``memoryview(const object& obj)`` instead of this constructor.
+     \endrst */
+    explicit memoryview(const buffer_info& info) {
+        if (!info.view())
+            pybind11_fail("Prohibited to create memoryview without Py_buffer");
+        // Note: PyMemoryView_FromBuffer never increments obj reference.
+        m_ptr = (info.view()->obj) ?
+            PyMemoryView_FromObject(info.view()->obj) :
+            PyMemoryView_FromBuffer(info.view());
         if (!m_ptr)
             pybind11_fail("Unable to create memoryview from buffer descriptor");
     }
 
-    PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject)
+    /** \rst
+        Creates ``memoryview`` from static buffer.
+
+        This method is meant for providing a ``memoryview`` for C/C++ buffer not
+        managed by Python. The caller is responsible for managing the lifetime
+        of ``ptr`` and ``format``, which MUST outlive the memoryview constructed
+        here.
+
+        See also: Python C API documentation for `PyMemoryView_FromBuffer`_.
+
+        .. _PyMemoryView_FromBuffer: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromBuffer
+
+        :param ptr: Pointer to the buffer.
+        :param itemsize: Byte size of an element.
+        :param format: Pointer to the null-terminated format string. For
+            homogeneous Buffers, this should be set to
+            ``format_descriptor::value``.
+        :param shape: Shape of the tensor (1 entry per dimension).
+        :param strides: Number of bytes between adjacent entries (for each
+            per dimension).
+        :param readonly: Flag to indicate if the underlying storage may be
+            written to.
+     \endrst */
+    static memoryview from_buffer(
+        void *ptr, ssize_t itemsize, const char *format,
+        detail::any_container shape,
+        detail::any_container strides, bool readonly = false);
+
+    static memoryview from_buffer(
+        const void *ptr, ssize_t itemsize, const char *format,
+        detail::any_container shape,
+        detail::any_container strides) {
+        return memoryview::from_buffer(
+            const_cast(ptr), itemsize, format, shape, strides, true);
+    }
+
+    template
+    static memoryview from_buffer(
+        T *ptr, detail::any_container shape,
+        detail::any_container strides, bool readonly = false) {
+        return memoryview::from_buffer(
+            reinterpret_cast(ptr), sizeof(T),
+            format_descriptor::value, shape, strides, readonly);
+    }
+
+    template
+    static memoryview from_buffer(
+        const T *ptr, detail::any_container shape,
+        detail::any_container strides) {
+        return memoryview::from_buffer(
+            const_cast(ptr), shape, strides, true);
+    }
+
+#if PY_MAJOR_VERSION >= 3
+    /** \rst
+        Creates ``memoryview`` from static memory.
+
+        This method is meant for providing a ``memoryview`` for C/C++ buffer not
+        managed by Python. The caller is responsible for managing the lifetime
+        of ``mem``, which MUST outlive the memoryview constructed here.
+
+        This method is not available in Python 2.
+
+        See also: Python C API documentation for `PyMemoryView_FromBuffer`_.
+
+        .. _PyMemoryView_FromMemory: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromMemory
+     \endrst */
+    static memoryview from_memory(void *mem, ssize_t size, bool readonly = false) {
+        PyObject* ptr = PyMemoryView_FromMemory(
+            reinterpret_cast(mem), size,
+            (readonly) ? PyBUF_READ : PyBUF_WRITE);
+        if (!ptr)
+            pybind11_fail("Could not allocate memoryview object!");
+        return memoryview(object(ptr, stolen_t{}));
+    }
+
+    static memoryview from_memory(const void *mem, ssize_t size) {
+        return memoryview::from_memory(const_cast(mem), size, true);
+    }
+#endif
 };
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+inline memoryview memoryview::from_buffer(
+    void *ptr, ssize_t itemsize, const char* format,
+    detail::any_container shape,
+    detail::any_container strides, bool readonly) {
+    size_t ndim = shape->size();
+    if (ndim != strides->size())
+        pybind11_fail("memoryview: shape length doesn't match strides length");
+    ssize_t size = ndim ? 1 : 0;
+    for (size_t i = 0; i < ndim; ++i)
+        size *= (*shape)[i];
+    Py_buffer view;
+    view.buf = ptr;
+    view.obj = nullptr;
+    view.len = size * itemsize;
+    view.readonly = static_cast(readonly);
+    view.itemsize = itemsize;
+    view.format = const_cast(format);
+    view.ndim = static_cast(ndim);
+    view.shape = shape->data();
+    view.strides = strides->data();
+    view.suboffsets = nullptr;
+    view.internal = nullptr;
+    PyObject* obj = PyMemoryView_FromBuffer(&view);
+    if (!obj)
+        throw error_already_set();
+    return memoryview(object(obj, stolen_t{}));
+}
+#endif  // DOXYGEN_SHOULD_SKIP_THIS
 /// @} pytypes
 
 /// \addtogroup python_builtins
 /// @{
+
+/// Get the length of a Python object.
 inline size_t len(handle h) {
     ssize_t result = PyObject_Length(h.ptr());
     if (result < 0)
-        pybind11_fail("Unable to compute length of object");
+        throw error_already_set();
     return (size_t) result;
 }
 
+/// Get the length hint of a Python object.
+/// Returns 0 when this cannot be determined.
 inline size_t len_hint(handle h) {
 #if PY_VERSION_HEX >= 0x03040000
     ssize_t result = PyObject_LengthHint(h.ptr(), 0);
@@ -1399,7 +1583,7 @@ inline iterator iter(handle obj) {
 }
 /// @} python_builtins
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 template  iterator object_api::begin() const { return iter(derived()); }
 template  iterator object_api::end() const { return iterator::sentinel(); }
 template  item_accessor object_api::operator[](handle key) const {
@@ -1428,7 +1612,7 @@ template 
 str_attr_accessor object_api::doc() const { return attr("__doc__"); }
 
 template 
-handle object_api::get_type() const { return (PyObject *) Py_TYPE(derived().ptr()); }
+handle object_api::get_type() const { return type::handle_of(derived()); }
 
 template 
 bool object_api::rich_compare(object_api const &other, int value) const {
@@ -1480,5 +1664,5 @@ PYBIND11_MATH_OPERATOR_BINARY(operator>>=, PyNumber_InPlaceRshift)
 #undef PYBIND11_MATH_OPERATOR_UNARY
 #undef PYBIND11_MATH_OPERATOR_BINARY
 
-NAMESPACE_END(detail)
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/pybind11/stl.h b/libs/pybind11/stl.h
index 32f8d29..721bb66 100644
--- a/libs/pybind11/stl.h
+++ b/libs/pybind11/stl.h
@@ -48,8 +48,8 @@
 #  define PYBIND11_HAS_VARIANT 1
 #endif
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Extracts an const lvalue reference or rvalue reference for U based on the type of T (e.g. for
 /// forwarding a container element).  Typically used indirect via forwarded_type(), below.
@@ -266,7 +266,9 @@ template struct optional_caster {
     static handle cast(T_ &&src, return_value_policy policy, handle parent) {
         if (!src)
             return none().inc_ref();
-        policy = return_value_policy_override::policy(policy);
+        if (!std::is_lvalue_reference::value) {
+            policy = return_value_policy_override::policy(policy);
+        }
         return value_conv::cast(*std::forward(src), policy, parent);
     }
 
@@ -287,7 +289,7 @@ template struct optional_caster {
     PYBIND11_TYPE_CASTER(T, _("Optional[") + value_conv::name + _("]"));
 };
 
-#if PYBIND11_HAS_OPTIONAL
+#if defined(PYBIND11_HAS_OPTIONAL)
 template struct type_caster>
     : public optional_caster> {};
 
@@ -295,7 +297,7 @@ template<> struct type_caster
     : public void_caster {};
 #endif
 
-#if PYBIND11_HAS_EXP_OPTIONAL
+#if defined(PYBIND11_HAS_EXP_OPTIONAL)
 template struct type_caster>
     : public optional_caster> {};
 
@@ -367,19 +369,19 @@ struct variant_caster> {
     PYBIND11_TYPE_CASTER(Type, _("Union[") + detail::concat(make_caster::name...) + _("]"));
 };
 
-#if PYBIND11_HAS_VARIANT
+#if defined(PYBIND11_HAS_VARIANT)
 template 
 struct type_caster> : variant_caster> { };
 #endif
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 inline std::ostream &operator<<(std::ostream &os, const handle &obj) {
     os << (std::string) str(obj);
     return os;
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 #if defined(_MSC_VER)
 #pragma warning(pop)
diff --git a/libs/pybind11/stl_bind.h b/libs/pybind11/stl_bind.h
index da233ec..83195ee 100644
--- a/libs/pybind11/stl_bind.h
+++ b/libs/pybind11/stl_bind.h
@@ -15,8 +15,8 @@
 #include 
 #include 
 
-NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /* SFINAE helper class used by 'is_comparable */
 template   struct container_traits {
@@ -223,7 +223,7 @@ void vector_modifiers(enable_if_treserve((size_t) slicelength);
 
             for (size_t i=0; i
 struct vector_has_data_and_format::format(), std::declval().data()), typename Vector::value_type*>::value>> : std::true_type {};
 
+// [workaround(intel)] Separate function required here
+// Workaround as the Intel compiler does not compile the enable_if_t part below
+// (tested with icc (ICC) 2021.1 Beta 20200827)
+template 
+constexpr bool args_any_are_buffer() {
+    return detail::any_of...>::value;
+}
+
+// [workaround(intel)] Separate function required here
+// [workaround(msvc)] Can't use constexpr bool in return type
+
 // Add the buffer interface to a vector
 template 
-enable_if_t...>::value>
-vector_buffer(Class_& cl) {
+void vector_buffer_impl(Class_& cl, std::true_type) {
     using T = typename Vector::value_type;
 
     static_assert(vector_has_data_and_format::value, "There is not an appropriate format descriptor for this vector");
@@ -397,23 +407,33 @@ vector_buffer(Class_& cl) {
         if (!detail::compare_buffer_info::compare(info) || (ssize_t) sizeof(T) != info.itemsize)
             throw type_error("Format mismatch (Python: " + info.format + " C++: " + format_descriptor::format() + ")");
 
-        auto vec = std::unique_ptr(new Vector());
-        vec->reserve((size_t) info.shape[0]);
         T *p = static_cast(info.ptr);
         ssize_t step = info.strides[0] / static_cast(sizeof(T));
         T *end = p + info.shape[0] * step;
-        for (; p != end; p += step)
-            vec->push_back(*p);
-        return vec.release();
+        if (step == 1) {
+            return Vector(p, end);
+        }
+        else {
+            Vector vec;
+            vec.reserve((size_t) info.shape[0]);
+            for (; p != end; p += step)
+                vec.push_back(*p);
+            return vec;
+        }
     }));
 
     return;
 }
 
 template 
-enable_if_t...>::value> vector_buffer(Class_&) {}
+void vector_buffer_impl(Class_&, std::false_type) {}
+
+template 
+void vector_buffer(Class_& cl) {
+    vector_buffer_impl(cl, detail::any_of...>{});
+}
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 //
 // std::vector
@@ -511,7 +531,7 @@ class_ bind_vector(handle scope, std::string const &name, A
 // std::map, std::unordered_map
 //
 
-NAMESPACE_BEGIN(detail)
+PYBIND11_NAMESPACE_BEGIN(detail)
 
 /* Fallback functions */
 template  void map_if_insertion_operator(const Args &...) { }
@@ -577,7 +597,7 @@ template  auto map_if_insertion_operator(Class_ &
 }
 
 
-NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(detail)
 
 template , typename... Args>
 class_ bind_map(handle scope, const std::string &name, Args&&... args) {
@@ -653,4 +673,4 @@ class_ bind_map(handle scope, const std::string &name, Args&&.
     return cl;
 }
 
-NAMESPACE_END(PYBIND11_NAMESPACE)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/libs/tcl2python/module_cpptcl.cpp b/libs/tcl2python/module_cpptcl.cpp
index a1d2003..bbbbc09 100644
--- a/libs/tcl2python/module_cpptcl.cpp
+++ b/libs/tcl2python/module_cpptcl.cpp
@@ -15,17 +15,47 @@ Tcl::interpreter get_current_interpreter() {
     return interp;
 }
 
-int get_current_interpreter_raw() {
-    return reinterpret_cast(TclPythonGlobals::getInstance().top());
+/**
+ * @brief 
+ * def eval(*args):
+ *    interp = cpptcl.get_current_interpreter()
+ *    liste = cpptcl.object_()
+ *    for arg in args:
+ *        o = cpptcl.object_(arg)
+ *        liste.append(o)
+ *    return interp.eval(liste)
+ * 
+ * @param args 
+ * @return Tcl::object 
+ */
+Tcl::object eval(py::args args) {
+    Tcl::interpreter interp = get_current_interpreter();
+    Tcl::object liste{};
+    for (py::handle arg : args) {
+        auto temp = arg.cast();
+        auto raw_object = pythonToTclObject(temp); 
+        Tcl::object o{raw_object};
+        liste.append(o, interp);
+    }
+    return interp.eval(liste); 
 }
 
 void register_tcl_interpreter_class(py::module &m)
 {
     py::class_(m, "interpreter")
         .def(py::init<::Tcl_Interp *, bool>(), py::arg("arg0"), py::arg("owner") = false)
+        .def_static("reinterpret_cast", []( int address) -> Tcl::interpreter {
+            Tcl_Interp* interp = reinterpret_cast(address);
+            Tcl::interpreter in{interp, false};
+            return in;
+        }, "reinterpret_cast of an integer address of type Tcl_Interp", py::arg("address") )
+        .def_static("reinterpret_cast", []( Tcl::interpreter& interp) -> int {
+            int address = reinterpret_cast(interp.get());
+            return address;
+        }, "reinterpret_cast of an interpreter to address", py::arg("interpreter") )
         .def_static("getDefault", (Tcl::interpreter * (*)()) & Tcl::interpreter::getDefault, " ")
         .def("make_safe", (void (Tcl::interpreter::*)()) & Tcl::interpreter::make_safe, " ")
-        .def("get", (Tcl_Interp * (Tcl::interpreter::*)() const) & Tcl::interpreter::get, " ")
+        .def("get", (Tcl_Interp * (Tcl::interpreter::*)() const) & Tcl::interpreter::get, " ", py::return_value_policy::reference)
         //  (Tcl::result(Tcl::interpreter::*)(std::string const &)) 
         .def("eval",[] (Tcl::interpreter& self, std::string const & script) -> Tcl::object {
             Tcl::result res = self.eval(script);
@@ -39,6 +69,7 @@ void register_tcl_interpreter_class(py::module &m)
         .def("getVar", (Tcl::result(Tcl::interpreter::*)(std::string const &, std::string const &)) & Tcl::interpreter::getVar, " ", py::arg("arrayTclVariable"), py::arg("arrayIndex"))
         .def("setVar", (Tcl::result(Tcl::interpreter::*)(std::string const &, Tcl::object const & scalarTclVariable, int)) & Tcl::interpreter::setVar,
             " ", py::arg("arrayTclVariable"), py::arg("scalarTclVariable"), py::arg("flags"))
+        .def("setResult", &Tcl::interpreter::setResult, py::arg("result"))
         .def("exists", (bool (Tcl::interpreter::*)(std::string const &)) & Tcl::interpreter::exists, " ", py::arg("scalarTclVariable"))
         .def("exists", (bool (Tcl::interpreter::*)(::std::string const &, ::std::string const &)) & Tcl::interpreter::exists, " ", py::arg("arrayTclVariable"), py::arg("arrayIndex"))
         .def("create_alias", (void (Tcl::interpreter::*)(::std::string const &, Tcl::interpreter &, ::std::string const &)) & Tcl::interpreter::create_alias, " ", py::arg("cmd"), py::arg("targetInterp"), py::arg("targetCmd"))
@@ -61,6 +92,22 @@ void register_tcl_object_class(py::module &m)
             Tcl_Obj* tcl_obj = pythonToTclObject(py_object);
             self.assign(tcl_obj);
         })
+        .def_static("reinterpret_cast", []( int address) -> Tcl::object {
+            if (address == 0) {
+                throw std::invalid_argument("Called reinterpret_cast (int address) -> cpptcl.object_ with 0");
+            }
+            Tcl_Obj* tcl_obj = reinterpret_cast(address);
+            Tcl::object o{tcl_obj, true};
+            return o;
+        }, "reinterpret_cast of an integer address of type Tcl_Obj ", py::arg("address") )
+        .def_static("reinterpret_cast", []( Tcl::object& obj) -> int {
+            int address = reinterpret_cast(obj.get_object());
+            return address;
+        }, "reinterpret_cast to an integer address", py::arg("Tcl::object") )
+        .def_static("addressof", []( Tcl::object& obj) -> int {
+            int address = reinterpret_cast(std::addressof(obj));
+            return address;
+        }, "addressof Tcl::object as an integer address", py::arg("Tcl::object") )
         .def("swap", 
             (::Tcl::object & (Tcl::object::*)(::Tcl::object &)) & Tcl::object::swap," ", py::arg("other"))
         .def("get_bool",
@@ -118,6 +165,7 @@ void register_tcl_object_class(py::module &m)
 
 void register_tcl_result_class(py::module &m){
     py::class_(m, "result")
+        .def(py::init<::Tcl_Interp *>())
         .def_property_readonly("object_", &Tcl::result::operator Tcl::object)
         .def("__str__", &Tcl::result::operator std::string)
         .def("__float__", &Tcl::result::operator double)
@@ -128,9 +176,11 @@ void register_tcl_result_class(py::module &m){
 }
 
 PYBIND11_EMBEDDED_MODULE(cpptcl, m) {
+    py::module::import("ctcl");
+
     register_tcl_interpreter_class(m);
     register_tcl_object_class(m);
     register_tcl_result_class(m);
-    m.def("get_current_interpreter_raw", get_current_interpreter_raw);
     m.def("get_current_interpreter", get_current_interpreter);
+    m.def("eval", eval);
 }
diff --git a/libs/tcl2python/module_native.cpp b/libs/tcl2python/module_native.cpp
index fa56196..87f5f60 100644
--- a/libs/tcl2python/module_native.cpp
+++ b/libs/tcl2python/module_native.cpp
@@ -1,8 +1,14 @@
 #include "embed.h"
+#include "stl.h" // py::object -> std::vector conversion
 
 #include 
 #include 
 
+// #include "asmjit/asmjit.h"
+#include "polyhook2/Detour/PyCallback.hpp"
+#include "polyhook2/Detour/x86Detour.hpp"
+#include "polyhook2/CapstoneDisassembler.hpp"
+
 #define WINDOWS_LEAN_AND_MEAN
 #include 
 #include 
@@ -10,8 +16,7 @@
 
 namespace py = pybind11;
 
-namespace
-{
+namespace {
 
 static const constexpr auto ANTI_DEBUG_THREAD_ENTRY = 0xf15cfd;
 
@@ -75,6 +80,8 @@ void killAntiDebugThread() {
     
     CloseHandle(snapshot_handle);
 
+#ifndef _MSC_VER
+    // ThreadQuerySetWin32StartAddress not available for each compiler
     for(auto thread_id : threads) {
         HANDLE thread_handle = OpenThread(THREAD_ALL_ACCESS, false, thread_id);
         PVOID thread_info;
@@ -87,6 +94,7 @@ void killAntiDebugThread() {
         }
         CloseHandle(thread_handle);
     }
+#endif
 }
 
 void waitForDebuggerAndBreak() {
@@ -94,13 +102,491 @@ void waitForDebuggerAndBreak() {
         using namespace std::chrono_literals;
         std::this_thread::sleep_for(100ms);
     }
+#ifdef _MSC_VER
+    __debugbreak();
+#else
     __asm__("int $3");
+#endif
 }
 
+struct PyFunctionWrap {
+    PyFunctionWrap(py::function& func) {
+        _func = func;
+    }
+
+    PLH::PyCallback callback;
+    py::function _func;
+    std::vector _args;
+    asmjit::Type::Id _return_type;
+    std::shared_ptr _detour;
+};
+
+class ConversionException : public std::exception {
+    const char* const _msg = "";
+
+public:
+    ConversionException(const char* msg):
+        _msg(msg)
+    {}
+
+    const char* what() const noexcept override final {
+        return _msg;
+    }
+};
 
 } // namespace
 
-PYBIND11_EMBEDDED_MODULE(_native, m) {
+NOINLINE void dispatch(const uint32_t unique_id, const PLH::PyCallback::Parameters *p, const uint8_t param_count, const PLH::PyCallback::ReturnValue *retVal)
+{
+    PyFunctionWrap *py_func_wrap = reinterpret_cast(unique_id);
+    std::string function_name;
+    std::string arguments_as_str;
+
+    try
+    {
+        py::gil_scoped_acquire gil{};
+        py::list arguments;
+        namespace Type = asmjit::Type;
+
+        function_name = py::str(py_func_wrap->_func);
+
+        for (uint8_t indx = 0; indx != param_count; ++indx)
+        {
+            uint8_t type = py_func_wrap->_args[indx];
+
+            switch (type)
+            {
+            case Type::Id::kIdUIntPtr:
+            {
+                uintptr_t value = p->getArg(indx);
+                py::int_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdF32:
+            {
+                float value = p->getArg(indx);
+                py::float_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdF64:
+            {
+                double value = p->getArg(indx);
+                py::float_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdU32:
+            {
+                uint32_t value = p->getArg(indx);
+                py::int_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdI32:
+            {
+                int32_t value = p->getArg(indx);
+                py::int_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdI8:
+            {
+                int8_t value = p->getArg(indx);
+                py::int_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdU8:
+            {
+                uint8_t value = p->getArg(indx);
+                py::int_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdI64:
+            {
+                int64_t value = p->getArg(indx);
+                py::int_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdU64:
+            {
+                uint64_t value = p->getArg(indx);
+                py::int_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdI16:
+            {
+                int16_t value = p->getArg(indx);
+                py::int_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            break;
+            case Type::Id::kIdU16:
+            {
+                uint16_t value = p->getArg(indx);
+                py::int_ py_obj{value};
+                arguments.append(py_obj);
+            }
+            default:
+                throw ConversionException{"Unsupported argument type"};
+            }
+        }
+        
+        arguments_as_str = py::str(arguments);
+
+        py::object result = py_func_wrap->_func(*arguments);
+
+        auto ret_type = py_func_wrap->_return_type;
+        if (Type::isFloat32(ret_type))
+        {
+            float value = py::cast(result);
+            *((float *)retVal->getRetPtr()) = value;
+        }
+        else if (Type::isFloat64(ret_type))
+        {
+            double value = py::cast(result);
+            *((double *)retVal->getRetPtr()) = value;
+        }
+        else if (Type::isInt(ret_type))
+        {
+            uint64_t unsigned_integer = py::cast(result);
+            int64_t signed_integer = py::cast(result);
+
+            if (Type::isInt8(ret_type))
+            {
+                *((int8_t *)retVal->getRetPtr()) = static_cast(signed_integer);
+            }
+            else if (Type::isInt16(ret_type))
+            {
+                *((int16_t *)retVal->getRetPtr()) = static_cast(signed_integer);
+            }
+            else if (Type::isInt32(ret_type))
+            {
+                *((int32_t *)retVal->getRetPtr()) = static_cast(signed_integer);
+            }
+            else
+            {
+                *((uint64_t *)retVal->getRetPtr()) = unsigned_integer;
+            }
+        }
+        else
+        {
+            throw ConversionException{"Unsupported return type"};
+        }
+    }
+    catch (std::exception &exc)
+    {
+#ifdef _MSC_VER
+        throw exc;
+#else
+        std::cerr << "Exception occurred during dispatch of native function to python function\n" << 
+             __FILE__ << " " << __LINE__ << " " << function_name << "(" << arguments_as_str << "\n" << 
+             exc.what() << std::endl;
+        return;
+#endif
+    }
+    return;
+}
+
+namespace {
+
+class Logger : public PLH::Logger
+{
+public:
+    void log(std::string msg, PLH::ErrorLevel level) override {
+        std::cout << "[" << static_cast(level) << "] " << msg << std::endl;
+    }
+};
+
+struct NativeFunction
+{
+    asmjit::Type::Id _ret_type;
+    std::string _call_conv;
+    std::vector _param_types;
+    uint64_t _function_address = 0;
+    PLH::PyCallback _callback;
+
+    NativeFunction(const uint32_t native_address, const asmjit::Type::Id ret_type, const std::string &call_conv, const std::vector ¶m_types)
+    {
+        _ret_type = ret_type;
+        _call_conv = call_conv;
+        _param_types = param_types;
+
+        asmjit::FuncSignature sig = {};
+
+        const size_t param_count = param_types.size();
+
+        std::vector args_types;
+        for (const auto &s : param_types)
+        {
+            args_types.push_back(static_cast(s));
+        }
+
+        sig.init(PLH::PyCallback::getCallConv(call_conv),
+                 asmjit::FuncSignature::kNoVarArgs,
+                 ret_type,
+                 args_types.data(),
+                 static_cast(param_types.size()));
+
+        _function_address = _callback.getJitFunc(native_address, sig, asmjit::Environment::kArchHost);
+    }
+
+    py::object operator()(const py::args& args)
+    {
+        const NativeFunction &native_function = *this;
+
+        const size_t param_count = native_function._param_types.size();
+        if (args.size() != param_count)
+        {
+            throw std::invalid_argument("args.size() != param_types.size()");
+        }
+
+        std::vector params;
+        params.reserve(param_count);
+
+        for (size_t i = 0; i < param_count; i++)
+        {
+            params.push_back(0);
+        }
+
+        namespace Type = asmjit::Type;
+        for (uint8_t indx = 0; indx != param_count; ++indx)
+        {
+            Type::Id type = native_function._param_types[indx];
+            PLH::PyCallback::Parameters *param = reinterpret_cast(params.data());
+
+            switch (type)
+            {
+            case Type::Id::kIdUIntPtr:
+            {
+                uintptr_t value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdF32:
+            {
+                float value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdF64:
+            {
+                double value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdU32:
+            {
+                uint32_t value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdI32:
+            {
+                int32_t value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdI8:
+            {
+                int8_t value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdU8:
+            {
+                uint8_t value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdI64:
+            {
+                int64_t value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdU64:
+            {
+                uint64_t value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdI16:
+            {
+                int16_t value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            break;
+            case Type::Id::kIdU16:
+            {
+                uint16_t value = py::cast(args[indx]);
+                param->setArg(indx, value);
+            }
+            default:
+                throw ConversionException{"Unsupported argument type"};
+            }
+        }
+
+        PLH::PyCallback::ReturnValue return_value;
+        return_value.m_retVal = 0;
+
+        using Func = void (*)(PLH::PyCallback::Parameters *, PLH::PyCallback::ReturnValue *);
+        Func func = reinterpret_cast(native_function._function_address);
+        {
+            py::gil_scoped_release gil{};
+            func(reinterpret_cast(params.data()), &return_value);
+        }
+
+        if (native_function._ret_type == Type::Id::kIdVoid) {
+            return py::none{};
+        }
+
+        py::object ret;
+
+        if (Type::isFloat32(native_function._ret_type))
+        {
+            float value = *((float *)return_value.getRetPtr());
+            ret = py::float_{value};
+        }
+        else if (Type::isFloat64(native_function._ret_type))
+        {
+            double value = *((double *)return_value.getRetPtr());
+            ret = py::float_{value};
+        }
+        else if (Type::isInt(native_function._ret_type))
+        {
+            if (Type::isInt8(native_function._ret_type))
+            {
+                int8_t value = *((int8_t *)return_value.getRetPtr());
+                ret = py::int_{value};
+            }
+            else if (Type::isUInt8(native_function._ret_type))
+            {
+                uint8_t value = *((uint8_t *)return_value.getRetPtr());
+                ret = py::int_{value};
+            }
+            else if (Type::isInt16(native_function._ret_type))
+            {
+                int16_t value = *((int16_t *)return_value.getRetPtr());
+                ret = py::int_{value};
+            }
+            else if (Type::isUInt16(native_function._ret_type))
+            {
+                uint16_t value = *((uint16_t *)return_value.getRetPtr());
+                ret = py::int_{value};
+            }
+            else if (Type::isInt32(native_function._ret_type))
+            {
+                int32_t value = *((int32_t *)return_value.getRetPtr());
+                ret = py::int_{value};
+            }
+            else if (Type::isUInt32(native_function._ret_type))
+            {
+                uint32_t value = *((uint32_t *)return_value.getRetPtr());
+                ret = py::int_{value};
+            }
+            else if (Type::isInt64(native_function._ret_type))
+            {
+                int64_t value = *((int64_t *)return_value.getRetPtr());
+                ret = py::int_{value};
+            }
+            else
+            {
+                uint64_t value = *((uint64_t *)return_value.getRetPtr());
+                ret = py::int_{value};
+            }
+        }
+        else
+        {
+            throw ConversionException{"Unsupported return type"};
+        }
+
+        return ret;
+    }
+};
+
+/**
+ * native function
+ */
+std::unique_ptr createNativeToPythonFunction(uint64_t native_address, asmjit::Type::Id ret_type,
+                                  const std::string &call_conv, const std::vector ¶m_types,
+                                  py::function function, bool use_trampoline)
+{
+    std::unique_ptr py_function_wrap = std::make_unique(function);
+
+    asmjit::FuncSignature sig = {};
+    py_function_wrap->_args = param_types;
+    py_function_wrap->_return_type = ret_type;
+
+    std::vector args;
+    for (const auto &s : param_types)
+    {
+        args.push_back(static_cast(s));
+    }
+
+    sig.init(PLH::PyCallback::getCallConv(call_conv),
+        asmjit::FuncSignature::kNoVarArgs,
+        ret_type,
+        args.data(),
+        static_cast(args.size())
+    );
+
+    uint32_t unique_id = reinterpret_cast(py_function_wrap.get());
+    uint64_t jit_callback = py_function_wrap->callback.getJitFunc(unique_id,
+        sig, asmjit::Environment::kArchHost, &dispatch, use_trampoline);
+
+    // TODO move to static
+    PLH::CapstoneDisassembler disassembler(PLH::Mode::x86);
+    std::shared_ptr detour = std::make_shared(native_address, jit_callback, py_function_wrap->callback.getTrampolineHolder(), disassembler);
+
+    detour->hook();
+    py_function_wrap->_detour = std::move(detour);
+
+    return py_function_wrap;
+}
+
+}
+
+PYBIND11_EMBEDDED_MODULE(_native, m)
+{
+    auto asmjit = m.def_submodule("asmjit");
+    auto Type = asmjit.def_submodule("Type");
+
+    using asmjit::Type::Id;
+    py::enum_(Type, "Id")
+        .value("kIdVoid", Id::kIdVoid)
+        .value("kIdF32", Id::kIdF32)
+        .value("kIdF64", Id::kIdF64)
+        .value("kIdI8", Id::kIdI8)
+        .value("kIdU8", Id::kIdU8)
+        .value("kIdI16", Id::kIdI16)
+        .value("kIdU16", Id::kIdU16)
+        .value("kIdI32", Id::kIdI32)
+        .value("kIdU32", Id::kIdU32)
+        .value("kIdI64", Id::kIdI64)
+        .value("kIdU64", Id::kIdU64)
+        .value("kIdUIntPtr", Id::kIdUIntPtr);
+
+    py::class_(m, "PyFunctionWrap")
+        .def("getTrampolineAddress", [](PyFunctionWrap &self) -> uint64_t {
+            uint64_t *holder = self.callback.getTrampolineHolder();
+            return *holder;
+        });
+
+    py::class_(m, "NativeFunction")
+        .def(py::init &>(),
+             py::arg("native_address"), py::arg("ret_type"), py::arg("call_conv"), py::arg("param_types"))
+        .def("__call__", [] (NativeFunction& this_, const py::args& args) {
+            return this_(args);
+        });
     m.def("killAntiDebugThread", killAntiDebugThread);
     m.def("waitForDebuggerAndBreak", waitForDebuggerAndBreak);
+    m.def("createNativeToPythonFunction", createNativeToPythonFunction);
+
+    PLH::Log::registerLogger(std::make_shared());
 }
diff --git a/libs/tcl2python/module_tcl.cpp b/libs/tcl2python/module_tcl.cpp
index 6863564..b543bcf 100644
--- a/libs/tcl2python/module_tcl.cpp
+++ b/libs/tcl2python/module_tcl.cpp
@@ -184,8 +184,9 @@ void createTclToPythonFunction(std::string name, py::function function)
         }
     }
 
-    auto func = [function, types = std::move(types)](Tcl::object const &argv) -> py::object  {
-		Tcl::interpreter interp{argv.get_interp(), false};
+    auto func = [function, types = std::move(types)](Tcl::object const &argv) -> py::object {
+        py::gil_scoped_acquire gil{};
+        Tcl::interpreter interp{argv.get_interp(), false};
 
         TclPythonGlobals::AutoStack handle_interpreter{interp.get()};
         size_t argc = argv.size(interp);
@@ -198,7 +199,8 @@ void createTclToPythonFunction(std::string name, py::function function)
             tcl_obj.set_interp(interp.get());
 
             Type type;
-            if (types.size() <= indx) {
+            if (types.size() <= indx)
+            {
                 if (types[types.size() - 1] == Type::VAR_POSITIONAL)
                 {
                     type = Type::VAR_POSITIONAL;
@@ -225,11 +227,13 @@ void createTclToPythonFunction(std::string name, py::function function)
         py::object result = function(*arguments);
 
         return result;
-	};
+    };
+
+    auto interp = TclPythonGlobals::getInstance().top();
+    Tcl::interpreter i{interp};
+    i.def<>(name, func, Tcl::variadic());
 
-	auto interp = TclPythonGlobals::getInstance().top();
-    Tcl::interpreter i {interp};
-    i.def2<>(name, func, Tcl::variadic());
+    // TODO: return raw address
 }
 
 
diff --git a/libs/tcl2python/tcl2python.SConscript b/libs/tcl2python/tcl2python.SConscript
index 9de3b5a..aea2357 100644
--- a/libs/tcl2python/tcl2python.SConscript
+++ b/libs/tcl2python/tcl2python.SConscript
@@ -1,26 +1,33 @@
-Import('*')
+Import('pyenv env static_cpptcl tclStubLib_obj')
 
-env = pyenv.Clone()
+env_tcl2python = pyenv.Clone()
+env_py3 = pyenv.Clone()
 
-pybind11_includes = Dir('#/libs/pybind11')
-
-py3_obj = env.Object(['py3.cpp',
+py3_obj = env_py3.Object(['py3.cpp',
                       'tcl_globals.cpp'])
+
+env_py3.AppendUnique(CPPPATH=env['pybind11_includes'])
+
+py3_static = env_py3.StaticLibrary('py3_static', source = py3_obj)
+
 sources = ['tclandpython.cpp',
            'module_native.cpp',
            'module_ctcl.cpp',
            'module_cpptcl.cpp',
            'module_tcl.cpp']
-tclpython_cpp = py3_obj + env.Object(sources)
+tclpython_obj = py3_obj + env_tcl2python.Object(sources)
 
-env.AppendUnique(CPPPATH=[cpptcl_includes, pybind11_includes],
-    LIBS = [static_cpptcl, tclStubLib_obj, 'ntdll']
+env_tcl2python.AppendUnique(CPPPATH= env['pybind11_includes'] + env['polyhook_includes'] + env['asmjit_includes'] + env['cpptcl_includes'],
+    # CPPDEFINES = {'ASMJIT_BUILD_X86' : ''},
+    LIBPATH = env['asmjit_lib_path'] ,
+    LIBS = [static_cpptcl, tclStubLib_obj, 'ntdll'] + env['polyhook'] + ['asmjit'] + ['capstone']
 )
 
-tclpython_dll = env.SharedLibrary('tclandpython',
-    source = tclpython_cpp,
+tclpython_dll = env_tcl2python.SharedLibrary('tclandpython',
+    source = tclpython_obj,
 )
 
-env.Install('#/bin', tclpython_dll)
+env_tcl2python.Install('#/bin', tclpython_dll)
 
-Return('tclpython_dll py3_obj pybind11_includes')
+Export('tclpython_obj')
+Return('tclpython_dll py3_static')
diff --git a/libs/tcl2python/tcl_globals.cpp b/libs/tcl2python/tcl_globals.cpp
index 73ca61a..d4349d4 100644
--- a/libs/tcl2python/tcl_globals.cpp
+++ b/libs/tcl2python/tcl_globals.cpp
@@ -8,6 +8,11 @@ class TclPythonGlobals::Implementation
     std::unordered_map _py_globals_per_interpreter;
 
 public:
+    ~Implementation() {
+        py::gil_scoped_acquire gil{};
+        _py_globals_per_interpreter.clear();
+    }
+
     void push(Tcl_Interp * interp)
     {
         _tcl_interpreter.push(interp);
@@ -15,6 +20,9 @@ class TclPythonGlobals::Implementation
 
     Tcl_Interp *top()
     {
+        if (_tcl_interpreter.empty()) {
+            throw std::runtime_error("Stack of tcl interpreters is empty while accessing it");
+        }
         return _tcl_interpreter.top();
     }
 
@@ -38,8 +46,23 @@ class TclPythonGlobals::Implementation
         _py_globals_per_interpreter.insert({interp, new_global});
         return new_global;
     }
+
+    bool tryClear() {
+        if (_py_globals_per_interpreter.size() == 0) {
+            return true;
+        }
+        else {
+            return false;
+        }
+    }
 };
 
+TclPythonGlobals& TclPythonGlobals::getInstance()
+{
+    static TclPythonGlobals instance;
+    return instance;
+}
+
 TclPythonGlobals::TclPythonGlobals() :
     _p(std::make_unique())
 {
diff --git a/libs/tcl2python/tcl_globals.h b/libs/tcl2python/tcl_globals.h
index 6345f45..127a8e7 100644
--- a/libs/tcl2python/tcl_globals.h
+++ b/libs/tcl2python/tcl_globals.h
@@ -11,11 +11,7 @@ struct Tcl_Interp;
 class TclPythonGlobals {
 public:
     ~TclPythonGlobals();
-    static TclPythonGlobals& getInstance()
-    {
-        static TclPythonGlobals instance;
-        return instance;
-    }
+    static TclPythonGlobals& getInstance();
 
     class AutoStack {
         public:
diff --git a/libs/tcl2python/tclandpython.cpp b/libs/tcl2python/tclandpython.cpp
index 1e737e7..60d53c4 100644
--- a/libs/tcl2python/tclandpython.cpp
+++ b/libs/tcl2python/tclandpython.cpp
@@ -3,7 +3,7 @@
 
 #include "py3.h"
 
-#include 
+// #include 
 #include "cpptcl.h"
 
 #include 
@@ -15,83 +15,117 @@ using namespace py::literals;
 
 namespace py = pybind11;
 
-class PythonInterpreterSingleton {
-	PythonInterpreterSingleton() {
-		py::initialize_interpreter();
-	}
+class PythonInterpreterSingleton
+{
+    PythonInterpreterSingleton()
+    {
+        py::initialize_interpreter();
+        _gil_release = std::make_unique();
+    }
+
 public:
-	~PythonInterpreterSingleton() {
-		py::finalize_interpreter();
-	}
+    ~PythonInterpreterSingleton()
+    {
+        _gil_release.reset();
+        py::finalize_interpreter();
+    }
+
+    PythonInterpreterSingleton(PythonInterpreterSingleton const &) = delete;
+    void operator=(PythonInterpreterSingleton const &) = delete;
 
-	PythonInterpreterSingleton(PythonInterpreterSingleton const&) = delete;
-	void operator=(PythonInterpreterSingleton const&) = delete;
+    static PythonInterpreterSingleton &getInstance()
+    {
+        static PythonInterpreterSingleton instance;
+        return instance;
+    }
 
-	static PythonInterpreterSingleton& getInstance() {
-		static PythonInterpreterSingleton instance;
-		return instance;
-	}
 private:
+
+    std::unique_ptr _gil_release;
 };
 
-struct PythonInterpreter {
+struct PythonInterpreter
+{
 public:
-	PythonInterpreter() {
-		PythonInterpreterSingleton::getInstance();
+    PythonInterpreter()
+    {
+        PythonInterpreterSingleton::getInstance();
+
+        py::gil_scoped_acquire gil{};
+        // Inject something
+        py::module::import("sys").attr("argv") = py::make_tuple("insert anything stupid", "");
+        /// importi("tcl", Tcl::interpreter::getDefault()->get());
+    }
 
-		// Inject something 
-		py::module::import("sys").attr("argv") = py::make_tuple("insert anything stupid", "");
-		/// importi("tcl", Tcl::interpreter::getDefault()->get());
-	}
+    ~PythonInterpreter() = default;
 
-	~PythonInterpreter() = default;
+    void exec(const char *str, const Tcl::object &dummy)
+    {
+        auto interp = dummy.get_interp();
+        TclPythonGlobals::AutoStack handle_interpreter{interp};
 
-	void exec(const char *str, const Tcl::object &dummy) {
-		auto interp = dummy.get_interp();
-		TclPythonGlobals::AutoStack handle_interpreter{interp};
+        py::gil_scoped_acquire gil{};
+        py::object globals = TclPythonGlobals::getInstance().getGlobals();
 
-		auto globals = TclPythonGlobals::getInstance().getGlobals();
-		py::exec(str, globals);
-	}
+        py::exec(str, globals);
+    }
 
-	PyObject* eval(const char *str, const Tcl::object &dummy) {
-		auto interp = dummy.get_interp();
-		TclPythonGlobals::AutoStack handle_interpreter{interp};
+    PyObject *eval(const char *str, const Tcl::object &dummy)
+    {
+        auto interp = dummy.get_interp();
+        TclPythonGlobals::AutoStack handle_interpreter{interp};
 
-		auto globals = TclPythonGlobals::getInstance().getGlobals();
+        py::gil_scoped_acquire gil{};
+        py::object globals = TclPythonGlobals::getInstance().getGlobals();
 
-		py::object obj = py::eval(str, globals);
-		return obj.release().ptr();
-	}
+        py::object obj = py::eval(str, globals);
+        return obj.release().ptr();
+    }
 
-	std::string  str(py::object str, const Tcl::object &dummy) {
-		auto interp = dummy.get_interp();
-		TclPythonGlobals::AutoStack handle_interpreter{interp};
+    std::string str(py::object str, const Tcl::object &dummy)
+    {
+        auto interp = dummy.get_interp();
+        TclPythonGlobals::AutoStack handle_interpreter{interp};
 
-		return py::str(str);
-	}
+        py::gil_scoped_acquire gil{};
+        return py::str(str);
+    }
 
-	void import(const char*str, const Tcl::object &dummy) {
-		auto interp = dummy.get_interp();
+    void import(const char *str, const Tcl::object &dummy)
+    {
+        auto interp = dummy.get_interp();
+
+        importi(str, interp);
+    }
 
-		importi(str, interp);
-	}
 private:
-	void importi(const char*str, Tcl_Interp* interp) {
-		TclPythonGlobals::AutoStack handle_interpreter{interp};
-		auto globals = TclPythonGlobals::getInstance().getGlobals();
+    void importi(const char *str, Tcl_Interp *interp)
+    {
+        TclPythonGlobals::AutoStack handle_interpreter{interp};
+
+        py::gil_scoped_acquire gil{};
+        py::object globals = TclPythonGlobals::getInstance().getGlobals();
+
+        std::string full_module_name{str};
+        auto point = full_module_name.find('.');
+        std::string module_name = full_module_name.substr(0, point);
+
+        if (point != -1) {
+            py::module::import(str);
+        }
 
-		py::module imported = py::module::import(str);
-		globals[str] = imported;
-	}
+        py::module imported = py::module::import(module_name.c_str());
+        globals[module_name.c_str()] = imported;
+    }
 };
 
-CPPTCL_MODULE(Tclandpython, interp) {
-	Tcl_RegisterObjType(getTclPyObjectInstance());
+CPPTCL_MODULE(Tclandpython, interp)
+{
+    Tcl_RegisterObjType(getTclPyObjectInstance());
 
     interp.class_("PythonInterpreter")
-         .def("exec", &PythonInterpreter::exec, Tcl::variadic())
-         .def("eval", &PythonInterpreter::eval, Tcl::variadic())
-         .def("str", &PythonInterpreter::str, Tcl::variadic())
-         .def("import", &PythonInterpreter::import, Tcl::variadic());
+        .def("exec", &PythonInterpreter::exec, Tcl::variadic())
+        .def("eval", &PythonInterpreter::eval, Tcl::variadic())
+        .def("str", &PythonInterpreter::str, Tcl::variadic())
+        .def("import", &PythonInterpreter::import, Tcl::variadic());
 }
diff --git a/libs/tcl8.3.2/SConscript_dll.py b/libs/tcl8.3.2/SConscript_dll.py
index 2dcb7d2..b99ba33 100644
--- a/libs/tcl8.3.2/SConscript_dll.py
+++ b/libs/tcl8.3.2/SConscript_dll.py
@@ -4,8 +4,14 @@
 
 from os.path import join
 
-baselibs   = [] # ['kernel32', 'advapi32', 'user32']
-winlibs    = baselibs + [] # ['gdi32', 'comdlg32', 'winspool']
+env_export = env.Clone()
+
+if env["is_mingw"]:
+    baselibs   = [] # ['kernel32', 'advapi32', 'user32']
+    winlibs    = baselibs + [] # ['gdi32', 'comdlg32', 'winspool']
+else:
+    baselibs   = ['advapi32', 'user32'] # ['kernel32', ]
+    winlibs    = baselibs + [] # ['gdi32', 'comdlg32', 'winspool']
 
 guilibs    = winlibs
 conlibs    = baselibs
@@ -28,7 +34,6 @@
 )
 env.Install(BIN_INSTALL_DIR, [tclPipe_dll, tclDDE_dll, tclReg_dll])
 '''
-env_export = env.Clone()
 env_export.AppendUnique(CPPPATH = TCL_INCLUDES,
                         CPPDEFINES = ['BUILD_tcl'] + TCL_DEFINES
 )
diff --git a/libs/tcl8.3.2/SConscript_main.py b/libs/tcl8.3.2/SConscript_main.py
index 009e24d..cc207e5 100644
--- a/libs/tcl8.3.2/SConscript_main.py
+++ b/libs/tcl8.3.2/SConscript_main.py
@@ -60,7 +60,7 @@
 
 LIB_INSTALL_DIR = join(INSTALLDIR, 'lib')
 BIN_INSTALL_DIR = join(INSTALLDIR, 'bin')
-SCRIPT_INSTALL_DIR  = join(INSTALLDIR, 'lib\tcl') + DOTVERSION
+SCRIPT_INSTALL_DIR  = join(INSTALLDIR, 'lib', 'tcl') + DOTVERSION
 INCLUDE_INSTALL_DIR = join(INSTALLDIR, 'include')
 
 
@@ -69,6 +69,8 @@
 
 if is_mingw:
     TCL_DEFINES +=  ['HAVE_NO_SEH']
+if not is_mingw and windows:
+    TCL_DEFINES +=  ['HAVE_TM_ZONE']
 if not windows:
     TCL_DEFINES +=  ['HAVE_UNISTD_H', 'NO_UNION_WAIT',
         'TIME_WITH_SYS_TIME',
@@ -149,15 +151,7 @@
 TCLOBJS = tcl_objs_gen + tcl_objs + strftime_obj + tclStubLib_obj
 TCLTESTOBJS = tcl_test_objs_gen + tcl_test_objs
 
-if not is_mingw:
-    env.SConscript('SConscript_staticlib.py',
-               exports='env '
-                        'TCLREGDLLNAME TCLDDEDLLNAME TCLPIPEDLLNAME TCLDLLNAME TCLLIB '
-                        'TCLTEST TCLOBJS TCLTESTOBJS tclStubLib_obj '
-                        'TCL_INCLUDES TCL_DEFINES '
-						'LIB_INSTALL_DIR BIN_INSTALL_DIR ',
-               variant_dir=join(TMPDIR, 'static'),
-               duplicate=0
-    )
+tcl_dll_full_path = join(BIN_INSTALL_DIR, TCLDLLNAME + '.dll')
+env.Install(Dir('#/bin'), tcl_dll_full_path)
 
 Return('TCL_INCLUDES TCLDLLNAME BIN_INSTALL_DIR tclStubLib_obj')
diff --git a/libs/tcl8.3.2/SConscript_staticlib.py b/libs/tcl8.3.2/SConscript_staticlib.py
deleted file mode 100644
index d7ca81c..0000000
--- a/libs/tcl8.3.2/SConscript_staticlib.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-
-Import('*')
-
-from os.path import join
-
-baselibs   = ['kernel32', 'advapi32', 'user32']
-winlibs    = baselibs + ['gdi32', 'comdlg32', 'winspool']
-
-guilibs    = winlibs
-conlibs    = baselibs
-guilibsdll = winlibs
-conlibsdll = baselibs
-
-env_static = env.Clone()
-env_static.AppendUnique(CPPPATH = TCL_INCLUDES,
-                        CPPDEFINES = ['STATIC_BUILD', 'BUILD_tcl'] + TCL_DEFINES,
-)
-
-compile_static_exe = False
-if compile_static_exe:
-    tclStatic_lib = env_static.StaticLibrary(target=TCLLIB,
-                   source=TCLOBJS,
-                   LIBS = guilibsdll,
-    )
-    env.Install(LIB_INSTALL_DIR, tclStatic_lib)
-
-else:
-    test_exe = env_static.Program(target=TCLTEST+'static',
-                    source = TCLTESTOBJS + TCLOBJS,
-                    LIBS = conlibsdll + guilibsdll,
-    )
-    
-    env.Install(LIB_INSTALL_DIR, test_exe)
diff --git a/scripts/abi.py b/scripts/abi.py
new file mode 100644
index 0000000..e2680a4
--- /dev/null
+++ b/scripts/abi.py
@@ -0,0 +1,80 @@
+#%%
+
+import process
+import win32
+import ctypes
+from typing import Type, NewType, List, Tuple, Dict
+
+#%% Native code
+from _native.asmjit.Type import Id # tclandpython.dll
+
+executable = win32.GetModuleFileName()
+image_base = process.get_image_base(executable)
+
+#%% Data types and private functions
+
+Address = NewType('Address', int)
+Abi = NewType('Abi', Tuple[Address, str, Id, List[Id]])
+
+def BASE(address: int) -> Address:
+    B_4 = 0x400000
+    if address < B_4:
+        raise ValueError(f"{hex(address)} shall be bigger than {hex(B_4)}")
+
+    return Address(address - 0x400000)
+
+
+def arg_count(abi: Abi) -> int:
+    return len(abi[3])
+
+
+ABI_1_0_844_59495 = {
+    "findFileLocation": (BASE(0x46A150), Id.kIdUIntPtr, "cdecl", [Id.kIdUIntPtr, Id.kIdUIntPtr]),
+    "CObj_tclEval": (BASE(0x483710), Id.kIdI8,  "thiscall", [Id.kIdUIntPtr, Id.kIdUIntPtr]),
+    "script_call": (BASE(0x592560), Id.kIdI32, "cdecl", [Id.kIdUIntPtr, Id.kIdUIntPtr, Id.kIdI32, Id.kIdUIntPtr]),
+    "sTclObject_loadTclFile": (BASE(0x6A46CD), Id.kIdI32, "cdecl", [Id.kIdUIntPtr, Id.kIdUIntPtr, Id.kIdI32, Id.kIdI32]),
+    "sTclObject_preprocessFile": (BASE(0x6A4708), Id.kIdVoid, "cdecl", [Id.kIdUIntPtr, Id.kIdUIntPtr, Id.kIdI32, Id.kIdI32]),
+    "BuiltinTclData_ctor_Tcl_Interp": (BASE(0x6A5C06), Id.kIdVoid, "thiscall", [Id.kIdUIntPtr, Id.kIdUIntPtr]),
+    "TclCache_get": (BASE(0x6A538F), Id.kIdVoid, "thiscall", [Id.kIdUIntPtr, Id.kIdUIntPtr, Id.kIdUIntPtr, Id.kIdI32]),
+    "g_tclCache": (BASE(0xECAE80), Id.kIdUIntPtr, None, None),     
+}
+
+
+
+ABI_2_1_1_10 = {
+    "findFileLocation": (BASE(0x4402C0), Id.kIdUIntPtr, "fastcall", [Id.kIdUIntPtr, Id.kIdUIntPtr]),
+    "CObj_tclEval": (BASE(0x555A00), Id.kIdI8,  "thiscall", [Id.kIdUIntPtr, Id.kIdUIntPtr]),
+    "script_call": (BASE(0x6313A0), Id.kIdI32, "cdecl", [Id.kIdUIntPtr, Id.kIdUIntPtr, Id.kIdI32, Id.kIdUIntPtr]),
+    "sTclObject_preprocessFile": (BASE(0x6BB140), Id.kIdVoid, "fastcall", [Id.kIdUIntPtr, Id.kIdUIntPtr]),
+    "BuiltinTclData_ctor_Tcl_Interp": (BASE(0x6BC170), Id.kIdVoid, "thiscall", [Id.kIdUIntPtr, Id.kIdUIntPtr]),
+    "TclCache_get": (BASE(0x6BBB00), Id.kIdVoid, "cdecl", [Id.kIdUIntPtr, Id.kIdUIntPtr]),
+    "g_tclCache": (BASE(0xA42DD0), Id.kIdUIntPtr, None, None),     
+}
+
+
+def _prepare_abi(original_abi: Dict[str, Abi]) -> Dict[str, Abi]:
+    abi = {}
+    for key, value in original_abi.items():
+        abi[key] = (Address(int(value[0]) + image_base),) + value[1:]
+    return abi
+
+
+def detect_abi():
+    """
+    Returns detected ABI
+    """
+    autoversion_build = ctypes.cast(image_base + 0x78236C - 0x400000, ctypes.c_char_p)
+
+    if autoversion_build.value:
+        autoversion_build = autoversion_build.value.decode("ascii")
+        if autoversion_build == '$AUTOVERSION_BUILD 844   ':
+            return _prepare_abi(ABI_1_0_844_59495)
+
+    version_string = ctypes.cast(image_base + 0x722148 - 0x400000, ctypes.c_char_p)
+
+    if version_string.value:
+        version_string = version_string.value.decode("ascii")
+        if version_string == '2.1.1.10':
+            return _prepare_abi(ABI_2_1_1_10)
+
+    return None
diff --git a/scripts/process.py b/scripts/process.py
new file mode 100644
index 0000000..d391716
--- /dev/null
+++ b/scripts/process.py
@@ -0,0 +1,18 @@
+import os
+
+import win32
+
+
+def get_image_base(exe_name = 'Diggles'):
+    pid = os.getpid()
+
+    handle_snapshot = win32.CreateToolhelp32Snapshot(win32.TH32CS_SNAPMODULE, pid)
+
+    module = win32.Module32First(handle_snapshot)
+
+    while module:
+        if exe_name in module.szExePath.decode('ascii'):
+            return module.modBaseAddr
+        module = win32.Module32Next(handle_snapshot, module)
+
+    return None
diff --git a/examples/pytcl.py b/scripts/pytcl.py
similarity index 74%
rename from examples/pytcl.py
rename to scripts/pytcl.py
index 0e266ad..8250cfb 100644
--- a/examples/pytcl.py
+++ b/scripts/pytcl.py
@@ -1,3 +1,10 @@
+__doc__ = """
+Convenient access to tcl from Python
+:class:`Tcl` is the main helper. Create an object of 
+:class:`Tcl` and use Tcl. to call a TCL procedure
+Use :func:`Tcl.globals` to list TCL global variables
+"""
+
 import sys
 
 import _tcl
@@ -7,18 +14,6 @@
 TCL_LEAVE_ERR_MSG = 0x200
 
 
-def tcl_eval(*args):
-    '''
-    :returns: cpptcl.result
-    '''
-    interp = cpptcl.get_current_interpreter()
-    liste = cpptcl.object_()
-    for arg in args:
-        o = cpptcl.object_(arg)
-        liste.append(o)
-    return interp.eval(liste)
-
-
 class CallProc:
     def __init__(self, name, autocast_list=True):
         self._name = name
@@ -26,7 +21,7 @@ def __init__(self, name, autocast_list=True):
 
     def __call__(self, *args):
         if self._name:
-            result = tcl_eval(self._name, *args)
+            result = cpptcl.eval(self._name, *args)
             t = _tcl.detectEquivalentType(result)
             if not self._autocast_list and t == _tcl.Type.PyList:
                 t = _tcl.Type.PyUnicode
@@ -46,6 +41,9 @@ def __getattr__(self, name):
             if 'variable is array' in str(exception):
                 # print(exception, file=sys.stderr)
                 return None
+            if 'no such variable' in str(exception):
+                # print(exception, file=sys.stderr)
+                return None
             raise exception
 
         t = _tcl.detectEquivalentType(result.object_)
@@ -61,16 +59,19 @@ def __setattr__(self, name, value):
 
 
 class Tcl:
-    CLASS_COMMANDS = ('proc', 'globals')
+    CLASS_COMMANDS = ['proc', 'globals']
 
     def __init__(self):
-        self.__cache_commands()
+        self.__cache_commands('')
     
-    def __cache_commands(self):
+    def __cache_commands(self, asked_command):
         '''
         Caching commands enables autocomplete at runtime in any Python editor
         '''
-        command_list = tcl_eval('info', 'commands')
+        if asked_command in Tcl.CLASS_COMMANDS:
+            return
+
+        command_list = cpptcl.eval('info', 'commands')
         t = _tcl.detectEquivalentType(command_list)
         command_list = _tcl.castValue(t, command_list)
         for my_command in Tcl.CLASS_COMMANDS:
@@ -80,7 +81,6 @@ def __cache_commands(self):
         for command in command_list:
             setattr(self, command, CallProc(command))
 
-
     @classmethod
     def proc(cls, *args, **kwargs):
         return _tcl.createTclToPythonFunction(*args, **kwargs)
@@ -90,5 +90,7 @@ def globals(self):
         return Globals(cpptcl.get_current_interpreter())
 
     def __getattr__(self, name):
-        self.__cache_commands()
+        self.__cache_commands(name)
         return CallProc(name)
+
+tcl = Tcl()
diff --git a/scripts/win32/LICENSE b/scripts/win32/LICENSE
new file mode 100644
index 0000000..1857757
--- /dev/null
+++ b/scripts/win32/LICENSE
@@ -0,0 +1,26 @@
+Copyright (c) 2009-2020, Mario Vilas
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice,this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/scripts/win32/__init__.py b/scripts/win32/__init__.py
new file mode 100644
index 0000000..99ae427
--- /dev/null
+++ b/scripts/win32/__init__.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Debugging API wrappers in ctypes.
+"""
+
+#-----------------------------------------------------------------------------
+# Monkey patch for Cygwin, which does not load some features correctly since
+# it believes to be running on Linux.
+
+# Detect whether we need to patch or not.
+try:
+    from ctypes import WINFUNCTYPE
+except ImportError:
+    import ctypes
+
+    # Fix FormatError.
+    ##from _ctypes import FormatError
+    ##ctypes.FormatError = FormatError
+
+    # Fix FUNCFLAG_STDCALL.
+    ctypes.FUNCFLAG_STDCALL = FUNCFLAG_STDCALL = _FUNCFLAG_STDCALL = 0
+
+    # Fix WINFUNCTYPE.
+    ctypes._win_functype_cache = {}
+    def WINFUNCTYPE(restype, *argtypes, **kw):
+        flags = _FUNCFLAG_STDCALL
+        if kw.pop("use_errno", False):
+            flags |= ctypes._FUNCFLAG_USE_ERRNO
+        if kw.pop("use_last_error", False):
+            flags |= ctypes._FUNCFLAG_USE_LASTERROR
+        if kw:
+            raise ValueError("unexpected keyword argument(s) %s" % kw.keys())
+        try:
+            return ctypes._win_functype_cache[(restype, argtypes, flags)]
+        except KeyError:
+            class WinFunctionType(ctypes._CFuncPtr):
+                _argtypes_ = argtypes
+                _restype_ = restype
+                _flags_ = flags
+            ctypes._win_functype_cache[(restype, argtypes, flags)] = WinFunctionType
+            return WinFunctionType
+    if WINFUNCTYPE.__doc__:
+        WINFUNCTYPE.__doc__ = ctypes.CFUNCTYPE.__doc__.replace(
+            "CFUNCTYPE", "WINFUNCTYPE")
+    ctypes.WINFUNCTYPE = WINFUNCTYPE
+
+    # Fix _reset_cache.
+    _original_reset_cache = ctypes._reset_cache
+    def _reset_cache():
+        ctypes._win_functype_cache.clear()
+        _original_reset_cache()
+    ctypes._reset_cache = _reset_cache
+
+    # Fix the string conversion mode.
+    if hasattr(ctypes, "set_conversion_mode"):
+        ctypes.set_conversion_mode("mbcs", "ignore")
+
+    # Fix WinDLL.
+    class WinDLL(ctypes.CDLL):
+        """This class represents a dll exporting functions using the
+        Windows stdcall calling convention.
+        """
+        _func_flags_ = _FUNCFLAG_STDCALL
+    ctypes.WinDLL = WinDLL
+
+    # Fix HRESULT.
+    from _ctypes import _SimpleCData
+    class HRESULT(_SimpleCData):
+        _type_ = "l"
+        ##_check_retval_ = _check_HRESULT
+    ctypes.HRESULT = HRESULT
+
+    # Fix OleDLL.
+    class OleDLL(ctypes.CDLL):
+        """This class represents a dll exporting functions using the
+        Windows stdcall calling convention, and returning HRESULT.
+        HRESULT error values are automatically raised as WindowsError
+        exceptions.
+        """
+        _func_flags_ = _FUNCFLAG_STDCALL
+        _func_restype_ = HRESULT
+    ctypes.OleDLL = OleDLL
+
+    # Fix windll, oledll and GetLastError.
+    ctypes.windll = ctypes.LibraryLoader(WinDLL)
+    ctypes.oledll = ctypes.LibraryLoader(OleDLL)
+    ctypes.GetLastError = ctypes.windll.kernel32.GetLastError
+
+    # Fix get_last_error and set_last_error.
+    ctypes.get_last_error = ctypes.windll.kernel32.GetLastError
+    ctypes.set_last_error = ctypes.windll.kernel32.SetLastError
+
+    # Fix FormatError.
+    def FormatError(code):
+        code = int(long(code))
+        try:
+            if GuessStringType.t_default == GuessStringType.t_ansi:
+                FormatMessage = windll.kernel32.FormatMessageA
+                FormatMessage.argtypes = [DWORD, LPVOID, DWORD, DWORD, LPSTR, DWORD]
+                FormatMessage.restype  = DWORD
+                lpBuffer = ctypes.create_string_buffer(1024)
+            else:
+                FormatMessage = windll.kernel32.FormatMessageW
+                FormatMessage.argtypes = [DWORD, LPVOID, DWORD, DWORD, LPWSTR, DWORD]
+                FormatMessage.restype  = DWORD
+                lpBuffer = ctypes.create_unicode_buffer(1024)
+            ##FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000
+            ##FORMAT_MESSAGE_IGNORE_INSERTS = 0x00000200
+            success = FormatMessage(0x1200, None, code, 0, lpBuffer, 1024)
+            if success:
+                return lpBuffer.value
+        except Exception:
+            pass
+        if GuessStringType.t_default == GuessStringType.t_ansi:
+            return "Error code 0x%.8X" % code
+        return u"Error code 0x%.8X" % code
+    ctypes.FormatError = FormatError
+
+    # Fix WinError.
+    def WinError(code=None, descr=None):
+        if code is None:
+            code = ctypes.GetLastError()
+        if descr is None:
+            descr = ctypes.FormatError(code).strip()
+        return WindowsError(code, descr)
+    ctypes.WinError = WinError
+
+    # Fix DllGetClassObject.
+    def DllGetClassObject(rclsid, riid, ppv):
+        try:
+            ccom = __import__(
+                "comtypes.server.inprocserver", globals(), locals(), ['*'])
+        except ImportError:
+            return -2147221231 # CLASS_E_CLASSNOTAVAILABLE
+        else:
+            return ccom.DllGetClassObject(rclsid, riid, ppv)
+    ctypes.DllGetClassObject = DllGetClassObject
+
+    # Fix DllCanUnloadNow.
+    def DllCanUnloadNow():
+        try:
+            ccom = __import__(
+                "comtypes.server.inprocserver", globals(), locals(), ['*'])
+        except ImportError:
+            return 0 # S_OK
+        return ccom.DllCanUnloadNow()
+    ctypes.DllCanUnloadNow = DllCanUnloadNow
+
+#-----------------------------------------------------------------------------
+
+# Import all submodules into this namespace.
+# Required for compatibility with older versions of WinAppDbg.
+from . import defines
+from . import kernel32
+from . import user32
+from . import advapi32
+from . import wtsapi32
+from . import shell32
+from . import shlwapi
+from . import psapi
+from . import dbghelp
+from . import ntdll
+
+# Import all symbols from submodules into this namespace.
+# Required for compatibility with older versions of WinAppDbg.
+from .defines    import *  # NOQA
+from .kernel32   import *  # NOQA
+from .user32     import *  # NOQA
+from .advapi32   import *  # NOQA
+from .wtsapi32   import *  # NOQA
+from .shell32    import *  # NOQA
+from .shlwapi    import *  # NOQA
+from .psapi      import *  # NOQA
+from .dbghelp    import *  # NOQA
+from .ntdll      import *  # NOQA
+
+# This calculates the list of exported symbols.
+_all = set()
+_all.update(defines._all)
+_all.update(kernel32._all)
+_all.update(user32._all)
+_all.update(advapi32._all)
+_all.update(wtsapi32._all)
+_all.update(shell32._all)
+_all.update(shlwapi._all)
+_all.update(psapi._all)
+_all.update(dbghelp._all)
+_all.update(ntdll._all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
diff --git a/scripts/win32/advapi32.py b/scripts/win32/advapi32.py
new file mode 100644
index 0000000..eb242b5
--- /dev/null
+++ b/scripts/win32/advapi32.py
@@ -0,0 +1,3246 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for advapi32.dll in ctypes.
+"""
+
+from .defines import *  # NOQA
+from .kernel32 import *  # NOQA
+
+# XXX TODO
+# + add transacted registry operations
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- Constants ----------------------------------------------------------------
+
+# Privilege constants
+SE_ASSIGNPRIMARYTOKEN_NAME      = "SeAssignPrimaryTokenPrivilege"
+SE_AUDIT_NAME                   = "SeAuditPrivilege"
+SE_BACKUP_NAME                  = "SeBackupPrivilege"
+SE_CHANGE_NOTIFY_NAME           = "SeChangeNotifyPrivilege"
+SE_CREATE_GLOBAL_NAME           = "SeCreateGlobalPrivilege"
+SE_CREATE_PAGEFILE_NAME         = "SeCreatePagefilePrivilege"
+SE_CREATE_PERMANENT_NAME        = "SeCreatePermanentPrivilege"
+SE_CREATE_SYMBOLIC_LINK_NAME    = "SeCreateSymbolicLinkPrivilege"
+SE_CREATE_TOKEN_NAME            = "SeCreateTokenPrivilege"
+SE_DEBUG_NAME                   = "SeDebugPrivilege"
+SE_ENABLE_DELEGATION_NAME       = "SeEnableDelegationPrivilege"
+SE_IMPERSONATE_NAME             = "SeImpersonatePrivilege"
+SE_INC_BASE_PRIORITY_NAME       = "SeIncreaseBasePriorityPrivilege"
+SE_INCREASE_QUOTA_NAME          = "SeIncreaseQuotaPrivilege"
+SE_INC_WORKING_SET_NAME         = "SeIncreaseWorkingSetPrivilege"
+SE_LOAD_DRIVER_NAME             = "SeLoadDriverPrivilege"
+SE_LOCK_MEMORY_NAME             = "SeLockMemoryPrivilege"
+SE_MACHINE_ACCOUNT_NAME         = "SeMachineAccountPrivilege"
+SE_MANAGE_VOLUME_NAME           = "SeManageVolumePrivilege"
+SE_PROF_SINGLE_PROCESS_NAME     = "SeProfileSingleProcessPrivilege"
+SE_RELABEL_NAME                 = "SeRelabelPrivilege"
+SE_REMOTE_SHUTDOWN_NAME         = "SeRemoteShutdownPrivilege"
+SE_RESTORE_NAME                 = "SeRestorePrivilege"
+SE_SECURITY_NAME                = "SeSecurityPrivilege"
+SE_SHUTDOWN_NAME                = "SeShutdownPrivilege"
+SE_SYNC_AGENT_NAME              = "SeSyncAgentPrivilege"
+SE_SYSTEM_ENVIRONMENT_NAME      = "SeSystemEnvironmentPrivilege"
+SE_SYSTEM_PROFILE_NAME          = "SeSystemProfilePrivilege"
+SE_SYSTEMTIME_NAME              = "SeSystemtimePrivilege"
+SE_TAKE_OWNERSHIP_NAME          = "SeTakeOwnershipPrivilege"
+SE_TCB_NAME                     = "SeTcbPrivilege"
+SE_TIME_ZONE_NAME               = "SeTimeZonePrivilege"
+SE_TRUSTED_CREDMAN_ACCESS_NAME  = "SeTrustedCredManAccessPrivilege"
+SE_UNDOCK_NAME                  = "SeUndockPrivilege"
+SE_UNSOLICITED_INPUT_NAME       = "SeUnsolicitedInputPrivilege"
+
+SE_PRIVILEGE_ENABLED_BY_DEFAULT = 0x00000001
+SE_PRIVILEGE_ENABLED            = 0x00000002
+SE_PRIVILEGE_REMOVED            = 0x00000004
+SE_PRIVILEGE_USED_FOR_ACCESS    = 0x80000000
+
+TOKEN_ADJUST_PRIVILEGES         = 0x00000020
+
+LOGON_WITH_PROFILE              = 0x00000001
+LOGON_NETCREDENTIALS_ONLY       = 0x00000002
+
+# Token access rights
+TOKEN_ASSIGN_PRIMARY    = 0x0001
+TOKEN_DUPLICATE         = 0x0002
+TOKEN_IMPERSONATE       = 0x0004
+TOKEN_QUERY             = 0x0008
+TOKEN_QUERY_SOURCE      = 0x0010
+TOKEN_ADJUST_PRIVILEGES = 0x0020
+TOKEN_ADJUST_GROUPS     = 0x0040
+TOKEN_ADJUST_DEFAULT    = 0x0080
+TOKEN_ADJUST_SESSIONID  = 0x0100
+TOKEN_READ = (STANDARD_RIGHTS_READ | TOKEN_QUERY)
+TOKEN_ALL_ACCESS = (STANDARD_RIGHTS_REQUIRED | TOKEN_ASSIGN_PRIMARY |
+        TOKEN_DUPLICATE | TOKEN_IMPERSONATE | TOKEN_QUERY | TOKEN_QUERY_SOURCE |
+        TOKEN_ADJUST_PRIVILEGES | TOKEN_ADJUST_GROUPS | TOKEN_ADJUST_DEFAULT |
+        TOKEN_ADJUST_SESSIONID)
+
+# Predefined HKEY values
+HKEY_CLASSES_ROOT       = 0x80000000
+HKEY_CURRENT_USER       = 0x80000001
+HKEY_LOCAL_MACHINE      = 0x80000002
+HKEY_USERS              = 0x80000003
+HKEY_PERFORMANCE_DATA   = 0x80000004
+HKEY_CURRENT_CONFIG     = 0x80000005
+
+# Registry access rights
+KEY_ALL_ACCESS          = 0xF003F
+KEY_CREATE_LINK         = 0x0020
+KEY_CREATE_SUB_KEY      = 0x0004
+KEY_ENUMERATE_SUB_KEYS  = 0x0008
+KEY_EXECUTE             = 0x20019
+KEY_NOTIFY              = 0x0010
+KEY_QUERY_VALUE         = 0x0001
+KEY_READ                = 0x20019
+KEY_SET_VALUE           = 0x0002
+KEY_WOW64_32KEY         = 0x0200
+KEY_WOW64_64KEY         = 0x0100
+KEY_WRITE               = 0x20006
+
+# Registry value types
+REG_NONE                        = 0
+REG_SZ                          = 1
+REG_EXPAND_SZ                   = 2
+REG_BINARY                      = 3
+REG_DWORD                       = 4
+REG_DWORD_LITTLE_ENDIAN         = REG_DWORD
+REG_DWORD_BIG_ENDIAN            = 5
+REG_LINK                        = 6
+REG_MULTI_SZ                    = 7
+REG_RESOURCE_LIST               = 8
+REG_FULL_RESOURCE_DESCRIPTOR    = 9
+REG_RESOURCE_REQUIREMENTS_LIST  = 10
+REG_QWORD                       = 11
+REG_QWORD_LITTLE_ENDIAN         = REG_QWORD
+
+#--- TOKEN_PRIVILEGE structure ------------------------------------------------
+
+# typedef struct _LUID {
+#   DWORD LowPart;
+#   LONG HighPart;
+# } LUID,
+#  *PLUID;
+class LUID(Structure):
+    _fields_ = [
+        ("LowPart",     DWORD),
+        ("HighPart",    LONG),
+    ]
+
+PLUID = POINTER(LUID)
+
+# typedef struct _LUID_AND_ATTRIBUTES {
+#   LUID Luid;
+#   DWORD Attributes;
+# } LUID_AND_ATTRIBUTES,
+#  *PLUID_AND_ATTRIBUTES;
+class LUID_AND_ATTRIBUTES(Structure):
+    _fields_ = [
+        ("Luid",        LUID),
+        ("Attributes",  DWORD),
+    ]
+
+# typedef struct _TOKEN_PRIVILEGES {
+#   DWORD PrivilegeCount;
+#   LUID_AND_ATTRIBUTES Privileges[ANYSIZE_ARRAY];
+# } TOKEN_PRIVILEGES,
+#  *PTOKEN_PRIVILEGES;
+class TOKEN_PRIVILEGES(Structure):
+    _fields_ = [
+        ("PrivilegeCount",  DWORD),
+##        ("Privileges",      LUID_AND_ATTRIBUTES * ANYSIZE_ARRAY),
+        ("Privileges",      LUID_AND_ATTRIBUTES),
+    ]
+    # See comments on AdjustTokenPrivileges about this structure
+
+PTOKEN_PRIVILEGES = POINTER(TOKEN_PRIVILEGES)
+
+#--- GetTokenInformation enums and structures ---------------------------------
+
+# typedef enum _TOKEN_INFORMATION_CLASS {
+#   TokenUser                              = 1,
+#   TokenGroups,
+#   TokenPrivileges,
+#   TokenOwner,
+#   TokenPrimaryGroup,
+#   TokenDefaultDacl,
+#   TokenSource,
+#   TokenType,
+#   TokenImpersonationLevel,
+#   TokenStatistics,
+#   TokenRestrictedSids,
+#   TokenSessionId,
+#   TokenGroupsAndPrivileges,
+#   TokenSessionReference,
+#   TokenSandBoxInert,
+#   TokenAuditPolicy,
+#   TokenOrigin,
+#   TokenElevationType,
+#   TokenLinkedToken,
+#   TokenElevation,
+#   TokenHasRestrictions,
+#   TokenAccessInformation,
+#   TokenVirtualizationAllowed,
+#   TokenVirtualizationEnabled,
+#   TokenIntegrityLevel,
+#   TokenUIAccess,
+#   TokenMandatoryPolicy,
+#   TokenLogonSid,
+#   TokenIsAppContainer,
+#   TokenCapabilities,
+#   TokenAppContainerSid,
+#   TokenAppContainerNumber,
+#   TokenUserClaimAttributes,
+#   TokenDeviceClaimAttributes,
+#   TokenRestrictedUserClaimAttributes,
+#   TokenRestrictedDeviceClaimAttributes,
+#   TokenDeviceGroups,
+#   TokenRestrictedDeviceGroups,
+#   TokenSecurityAttributes,
+#   TokenIsRestricted,
+#   MaxTokenInfoClass
+# } TOKEN_INFORMATION_CLASS, *PTOKEN_INFORMATION_CLASS;
+
+TOKEN_INFORMATION_CLASS = ctypes.c_int
+
+TokenUser                               = 1
+TokenGroups                             = 2
+TokenPrivileges                         = 3
+TokenOwner                              = 4
+TokenPrimaryGroup                       = 5
+TokenDefaultDacl                        = 6
+TokenSource                             = 7
+TokenType                               = 8
+TokenImpersonationLevel                 = 9
+TokenStatistics                         = 10
+TokenRestrictedSids                     = 11
+TokenSessionId                          = 12
+TokenGroupsAndPrivileges                = 13
+TokenSessionReference                   = 14
+TokenSandBoxInert                       = 15
+TokenAuditPolicy                        = 16
+TokenOrigin                             = 17
+TokenElevationType                      = 18
+TokenLinkedToken                        = 19
+TokenElevation                          = 20
+TokenHasRestrictions                    = 21
+TokenAccessInformation                  = 22
+TokenVirtualizationAllowed              = 23
+TokenVirtualizationEnabled              = 24
+TokenIntegrityLevel                     = 25
+TokenUIAccess                           = 26
+TokenMandatoryPolicy                    = 27
+TokenLogonSid                           = 28
+TokenIsAppContainer                     = 29
+TokenCapabilities                       = 30
+TokenAppContainerSid                    = 31
+TokenAppContainerNumber                 = 32
+TokenUserClaimAttributes                = 33
+TokenDeviceClaimAttributes              = 34
+TokenRestrictedUserClaimAttributes      = 35
+TokenRestrictedDeviceClaimAttributes    = 36
+TokenDeviceGroups                       = 37
+TokenRestrictedDeviceGroups             = 38
+TokenSecurityAttributes                 = 39
+TokenIsRestricted                       = 40
+MaxTokenInfoClass                       = 41
+
+# typedef enum tagTOKEN_TYPE {
+#   TokenPrimary         = 1,
+#   TokenImpersonation
+# } TOKEN_TYPE, *PTOKEN_TYPE;
+
+TOKEN_TYPE = ctypes.c_int
+PTOKEN_TYPE = POINTER(TOKEN_TYPE)
+
+TokenPrimary        = 1
+TokenImpersonation  = 2
+
+# typedef enum  {
+#   TokenElevationTypeDefault   = 1,
+#   TokenElevationTypeFull,
+#   TokenElevationTypeLimited
+# } TOKEN_ELEVATION_TYPE , *PTOKEN_ELEVATION_TYPE;
+
+TokenElevationTypeDefault   = 1
+TokenElevationTypeFull      = 2
+TokenElevationTypeLimited   = 3
+
+TOKEN_ELEVATION_TYPE = ctypes.c_int
+PTOKEN_ELEVATION_TYPE = POINTER(TOKEN_ELEVATION_TYPE)
+
+# typedef enum _SECURITY_IMPERSONATION_LEVEL {
+#   SecurityAnonymous,
+#   SecurityIdentification,
+#   SecurityImpersonation,
+#   SecurityDelegation
+# } SECURITY_IMPERSONATION_LEVEL, *PSECURITY_IMPERSONATION_LEVEL;
+
+SecurityAnonymous       = 0
+SecurityIdentification  = 1
+SecurityImpersonation   = 2
+SecurityDelegation      = 3
+
+SECURITY_IMPERSONATION_LEVEL = ctypes.c_int
+PSECURITY_IMPERSONATION_LEVEL = POINTER(SECURITY_IMPERSONATION_LEVEL)
+
+# typedef struct _SID_AND_ATTRIBUTES {
+#   PSID  Sid;
+#   DWORD Attributes;
+# } SID_AND_ATTRIBUTES, *PSID_AND_ATTRIBUTES;
+class SID_AND_ATTRIBUTES(Structure):
+    _fields_ = [
+        ("Sid",         PSID),
+        ("Attributes",  DWORD),
+    ]
+PSID_AND_ATTRIBUTES = POINTER(SID_AND_ATTRIBUTES)
+
+# typedef struct _TOKEN_USER {
+#   SID_AND_ATTRIBUTES User;
+# } TOKEN_USER, *PTOKEN_USER;
+class TOKEN_USER(Structure):
+    _fields_ = [
+        ("User", SID_AND_ATTRIBUTES),
+    ]
+PTOKEN_USER = POINTER(TOKEN_USER)
+
+# typedef struct _TOKEN_MANDATORY_LABEL {
+#   SID_AND_ATTRIBUTES Label;
+# } TOKEN_MANDATORY_LABEL, *PTOKEN_MANDATORY_LABEL;
+class TOKEN_MANDATORY_LABEL(Structure):
+    _fields_ = [
+        ("Label", SID_AND_ATTRIBUTES),
+    ]
+PTOKEN_MANDATORY_LABEL = POINTER(TOKEN_MANDATORY_LABEL)
+
+# typedef struct _TOKEN_OWNER {
+#   PSID Owner;
+# } TOKEN_OWNER, *PTOKEN_OWNER;
+class TOKEN_OWNER(Structure):
+    _fields_ = [
+        ("Owner", PSID),
+    ]
+PTOKEN_OWNER = POINTER(TOKEN_OWNER)
+
+# typedef struct _TOKEN_PRIMARY_GROUP {
+#   PSID PrimaryGroup;
+# } TOKEN_PRIMARY_GROUP, *PTOKEN_PRIMARY_GROUP;
+class TOKEN_PRIMARY_GROUP(Structure):
+    _fields_ = [
+        ("PrimaryGroup", PSID),
+    ]
+PTOKEN_PRIMARY_GROUP = POINTER(TOKEN_PRIMARY_GROUP)
+
+# typedef struct _TOKEN_APPCONTAINER_INFORMATION {
+#   	PSID TokenAppContainer;
+# } TOKEN_APPCONTAINER_INFORMATION, *PTOKEN_APPCONTAINER_INFORMATION;
+class TOKEN_APPCONTAINER_INFORMATION(Structure):
+    _fields_ = [
+        ("TokenAppContainer", PSID),
+    ]
+PTOKEN_APPCONTAINER_INFORMATION = POINTER(TOKEN_APPCONTAINER_INFORMATION)
+
+# typedef struct _TOKEN_ORIGIN {
+#   LUID OriginatingLogonSession;
+# } TOKEN_ORIGIN, *PTOKEN_ORIGIN;
+class TOKEN_ORIGIN(Structure):
+    _fields_ = [
+        ("OriginatingLogonSession", LUID),
+    ]
+PTOKEN_ORIGIN = POINTER(TOKEN_ORIGIN)
+
+# typedef struct _TOKEN_LINKED_TOKEN {
+#   HANDLE LinkedToken;
+# } TOKEN_LINKED_TOKEN, *PTOKEN_LINKED_TOKEN;
+class TOKEN_LINKED_TOKEN(Structure):
+    _fields_ = [
+        ("LinkedToken", HANDLE),
+    ]
+PTOKEN_LINKED_TOKEN = POINTER(TOKEN_LINKED_TOKEN)
+
+# typedef struct _TOKEN_STATISTICS {
+#   LUID                         TokenId;
+#   LUID                         AuthenticationId;
+#   LARGE_INTEGER                ExpirationTime;
+#   TOKEN_TYPE                   TokenType;
+#   SECURITY_IMPERSONATION_LEVEL ImpersonationLevel;
+#   DWORD                        DynamicCharged;
+#   DWORD                        DynamicAvailable;
+#   DWORD                        GroupCount;
+#   DWORD                        PrivilegeCount;
+#   LUID                         ModifiedId;
+# } TOKEN_STATISTICS, *PTOKEN_STATISTICS;
+class TOKEN_STATISTICS(Structure):
+    _fields_ = [
+        ("TokenId",             LUID),
+        ("AuthenticationId",    LUID),
+        ("ExpirationTime",      LONGLONG),  # LARGE_INTEGER
+        ("TokenType",           TOKEN_TYPE),
+        ("ImpersonationLevel",  SECURITY_IMPERSONATION_LEVEL),
+        ("DynamicCharged",      DWORD),
+        ("DynamicAvailable",    DWORD),
+        ("GroupCount",          DWORD),
+        ("PrivilegeCount",      DWORD),
+        ("ModifiedId",          LUID),
+    ]
+PTOKEN_STATISTICS = POINTER(TOKEN_STATISTICS)
+
+#--- SID_NAME_USE enum --------------------------------------------------------
+
+# typedef enum _SID_NAME_USE {
+#   SidTypeUser             = 1,
+#   SidTypeGroup,
+#   SidTypeDomain,
+#   SidTypeAlias,
+#   SidTypeWellKnownGroup,
+#   SidTypeDeletedAccount,
+#   SidTypeInvalid,
+#   SidTypeUnknown,
+#   SidTypeComputer,
+#   SidTypeLabel
+# } SID_NAME_USE, *PSID_NAME_USE;
+
+SidTypeUser             = 1
+SidTypeGroup            = 2
+SidTypeDomain           = 3
+SidTypeAlias            = 4
+SidTypeWellKnownGroup   = 5
+SidTypeDeletedAccount   = 6
+SidTypeInvalid          = 7
+SidTypeUnknown          = 8
+SidTypeComputer         = 9
+SidTypeLabel            = 10
+
+#--- WAITCHAIN_NODE_INFO structure and types ----------------------------------
+
+WCT_MAX_NODE_COUNT       = 16
+WCT_OBJNAME_LENGTH       = 128
+WCT_ASYNC_OPEN_FLAG      = 1
+WCTP_OPEN_ALL_FLAGS      = WCT_ASYNC_OPEN_FLAG
+WCT_OUT_OF_PROC_FLAG     = 1
+WCT_OUT_OF_PROC_COM_FLAG = 2
+WCT_OUT_OF_PROC_CS_FLAG  = 4
+WCTP_GETINFO_ALL_FLAGS   = WCT_OUT_OF_PROC_FLAG | WCT_OUT_OF_PROC_COM_FLAG | WCT_OUT_OF_PROC_CS_FLAG
+
+HWCT = LPVOID
+
+# typedef enum _WCT_OBJECT_TYPE
+# {
+#     WctCriticalSectionType = 1,
+#     WctSendMessageType,
+#     WctMutexType,
+#     WctAlpcType,
+#     WctComType,
+#     WctThreadWaitType,
+#     WctProcessWaitType,
+#     WctThreadType,
+#     WctComActivationType,
+#     WctUnknownType,
+#     WctMaxType
+# } WCT_OBJECT_TYPE;
+
+WCT_OBJECT_TYPE         = DWORD
+
+WctCriticalSectionType  = 1
+WctSendMessageType      = 2
+WctMutexType            = 3
+WctAlpcType             = 4
+WctComType              = 5
+WctThreadWaitType       = 6
+WctProcessWaitType      = 7
+WctThreadType           = 8
+WctComActivationType    = 9
+WctUnknownType          = 10
+WctMaxType              = 11
+
+# typedef enum _WCT_OBJECT_STATUS
+# {
+#     WctStatusNoAccess = 1,            // ACCESS_DENIED for this object
+#     WctStatusRunning,                 // Thread status
+#     WctStatusBlocked,                 // Thread status
+#     WctStatusPidOnly,                 // Thread status
+#     WctStatusPidOnlyRpcss,            // Thread status
+#     WctStatusOwned,                   // Dispatcher object status
+#     WctStatusNotOwned,                // Dispatcher object status
+#     WctStatusAbandoned,               // Dispatcher object status
+#     WctStatusUnknown,                 // All objects
+#     WctStatusError,                   // All objects
+#     WctStatusMax
+# } WCT_OBJECT_STATUS;
+
+WCT_OBJECT_STATUS       = DWORD
+
+WctStatusNoAccess       = 1             # ACCESS_DENIED for this object
+WctStatusRunning        = 2             # Thread status
+WctStatusBlocked        = 3             # Thread status
+WctStatusPidOnly        = 4             # Thread status
+WctStatusPidOnlyRpcss   = 5             # Thread status
+WctStatusOwned          = 6             # Dispatcher object status
+WctStatusNotOwned       = 7             # Dispatcher object status
+WctStatusAbandoned      = 8             # Dispatcher object status
+WctStatusUnknown        = 9             # All objects
+WctStatusError          = 10            # All objects
+WctStatusMax            = 11
+
+# typedef struct _WAITCHAIN_NODE_INFO {
+#   WCT_OBJECT_TYPE   ObjectType;
+#   WCT_OBJECT_STATUS ObjectStatus;
+#   union {
+#     struct {
+#       WCHAR ObjectName[WCT_OBJNAME_LENGTH];
+#       LARGE_INTEGER Timeout;
+#       BOOL Alertable;
+#     } LockObject;
+#     struct {
+#       DWORD ProcessId;
+#       DWORD ThreadId;
+#       DWORD WaitTime;
+#       DWORD ContextSwitches;
+#     } ThreadObject;
+#   } ;
+# }WAITCHAIN_NODE_INFO, *PWAITCHAIN_NODE_INFO;
+
+class _WAITCHAIN_NODE_INFO_STRUCT_1(Structure):
+    _fields_ = [
+        ("ObjectName",      WCHAR * WCT_OBJNAME_LENGTH),
+        ("Timeout",         LONGLONG), # LARGE_INTEGER
+        ("Alertable",       BOOL),
+    ]
+
+class _WAITCHAIN_NODE_INFO_STRUCT_2(Structure):
+    _fields_ = [
+        ("ProcessId",       DWORD),
+        ("ThreadId",        DWORD),
+        ("WaitTime",        DWORD),
+        ("ContextSwitches", DWORD),
+    ]
+
+class _WAITCHAIN_NODE_INFO_UNION(Union):
+    _fields_ = [
+        ("LockObject",      _WAITCHAIN_NODE_INFO_STRUCT_1),
+        ("ThreadObject",    _WAITCHAIN_NODE_INFO_STRUCT_2),
+    ]
+
+class WAITCHAIN_NODE_INFO(Structure):
+    _fields_ = [
+        ("ObjectType",      WCT_OBJECT_TYPE),
+        ("ObjectStatus",    WCT_OBJECT_STATUS),
+        ("u",               _WAITCHAIN_NODE_INFO_UNION),
+    ]
+
+PWAITCHAIN_NODE_INFO = POINTER(WAITCHAIN_NODE_INFO)
+
+class WaitChainNodeInfo (object):
+    """
+    Represents a node in the wait chain.
+
+    It's a wrapper on the L{WAITCHAIN_NODE_INFO} structure.
+
+    The following members are defined only
+    if the node is of L{WctThreadType} type:
+     - C{ProcessId}
+     - C{ThreadId}
+     - C{WaitTime}
+     - C{ContextSwitches}
+
+    @see: L{GetThreadWaitChain}
+
+    @type ObjectName: unicode
+    @ivar ObjectName: Object name. May be an empty string.
+
+    @type ObjectType: int
+    @ivar ObjectType: Object type.
+        Should be one of the following values:
+         - L{WctCriticalSectionType}
+         - L{WctSendMessageType}
+         - L{WctMutexType}
+         - L{WctAlpcType}
+         - L{WctComType}
+         - L{WctThreadWaitType}
+         - L{WctProcessWaitType}
+         - L{WctThreadType}
+         - L{WctComActivationType}
+         - L{WctUnknownType}
+
+    @type ObjectStatus: int
+    @ivar ObjectStatus: Wait status.
+        Should be one of the following values:
+         - L{WctStatusNoAccess} I{(ACCESS_DENIED for this object)}
+         - L{WctStatusRunning} I{(Thread status)}
+         - L{WctStatusBlocked} I{(Thread status)}
+         - L{WctStatusPidOnly} I{(Thread status)}
+         - L{WctStatusPidOnlyRpcss} I{(Thread status)}
+         - L{WctStatusOwned} I{(Dispatcher object status)}
+         - L{WctStatusNotOwned} I{(Dispatcher object status)}
+         - L{WctStatusAbandoned} I{(Dispatcher object status)}
+         - L{WctStatusUnknown} I{(All objects)}
+         - L{WctStatusError} I{(All objects)}
+
+    @type ProcessId: int
+    @ivar ProcessId: Process global ID.
+
+    @type ThreadId: int
+    @ivar ThreadId: Thread global ID.
+
+    @type WaitTime: int
+    @ivar WaitTime: Wait time.
+
+    @type ContextSwitches: int
+    @ivar ContextSwitches: Number of context switches.
+    """
+
+    #@type Timeout: int
+    #@ivar Timeout: Currently not documented in MSDN.
+    #
+    #@type Alertable: bool
+    #@ivar Alertable: Currently not documented in MSDN.
+
+    # TODO: __repr__
+
+    def __init__(self, aStructure):
+        self.ObjectType = aStructure.ObjectType
+        self.ObjectStatus = aStructure.ObjectStatus
+        if self.ObjectType == WctThreadType:
+            self.ProcessId = aStructure.u.ThreadObject.ProcessId
+            self.ThreadId = aStructure.u.ThreadObject.ThreadId
+            self.WaitTime = aStructure.u.ThreadObject.WaitTime
+            self.ContextSwitches = aStructure.u.ThreadObject.ContextSwitches
+            self.ObjectName = u''
+        else:
+            self.ObjectName = aStructure.u.LockObject.ObjectName.value
+            #self.Timeout = aStructure.u.LockObject.Timeout
+            #self.Alertable = bool(aStructure.u.LockObject.Alertable)
+
+class ThreadWaitChainSessionHandle (Handle):
+    """
+    Thread wait chain session handle.
+
+    Returned by L{OpenThreadWaitChainSession}.
+
+    @see: L{Handle}
+    """
+
+    def __init__(self, aHandle = None):
+        """
+        @type  aHandle: int
+        @param aHandle: Win32 handle value.
+        """
+        super(ThreadWaitChainSessionHandle, self).__init__(aHandle,
+                                                           bOwnership = True)
+
+    def _close(self):
+        if self.value is None:
+            raise ValueError("Handle was already closed!")
+        CloseThreadWaitChainSession(self.value)
+
+    def dup(self):
+        raise NotImplementedError()
+
+    def wait(self, dwMilliseconds = None):
+        raise NotImplementedError()
+
+    @property
+    def inherit(self):
+        return False
+
+    @property
+    def protectFromClose(self):
+        return False
+
+#--- Privilege dropping -------------------------------------------------------
+
+SAFER_LEVEL_HANDLE = HANDLE
+
+SAFER_SCOPEID_MACHINE = 1
+SAFER_SCOPEID_USER    = 2
+
+SAFER_LEVEL_OPEN = 1
+
+SAFER_LEVELID_DISALLOWED   = 0x00000
+SAFER_LEVELID_UNTRUSTED    = 0x01000
+SAFER_LEVELID_CONSTRAINED  = 0x10000
+SAFER_LEVELID_NORMALUSER   = 0x20000
+SAFER_LEVELID_FULLYTRUSTED = 0x40000
+
+SAFER_POLICY_INFO_CLASS = DWORD
+SaferPolicyLevelList = 1
+SaferPolicyEnableTransparentEnforcement = 2
+SaferPolicyDefaultLevel = 3
+SaferPolicyEvaluateUserScope = 4
+SaferPolicyScopeFlags = 5
+
+SAFER_TOKEN_NULL_IF_EQUAL = 1
+SAFER_TOKEN_COMPARE_ONLY  = 2
+SAFER_TOKEN_MAKE_INERT    = 4
+SAFER_TOKEN_WANT_FLAGS    = 8
+SAFER_TOKEN_MASK          = 15
+
+#--- Service Control Manager types, constants and structures ------------------
+
+SC_HANDLE = HANDLE
+
+SERVICES_ACTIVE_DATABASEW = u"ServicesActive"
+SERVICES_FAILED_DATABASEW = u"ServicesFailed"
+
+SERVICES_ACTIVE_DATABASEA = "ServicesActive"
+SERVICES_FAILED_DATABASEA = "ServicesFailed"
+
+SC_GROUP_IDENTIFIERW = u'+'
+SC_GROUP_IDENTIFIERA = '+'
+
+SERVICE_NO_CHANGE = 0xffffffff
+
+# enum SC_STATUS_TYPE
+SC_STATUS_TYPE         = ctypes.c_int
+SC_STATUS_PROCESS_INFO = 0
+
+# enum SC_ENUM_TYPE
+SC_ENUM_TYPE         = ctypes.c_int
+SC_ENUM_PROCESS_INFO = 0
+
+# Access rights
+# http://msdn.microsoft.com/en-us/library/windows/desktop/ms685981(v=vs.85).aspx
+
+SERVICE_ALL_ACCESS           = 0xF01FF
+SERVICE_QUERY_CONFIG         = 0x0001
+SERVICE_CHANGE_CONFIG        = 0x0002
+SERVICE_QUERY_STATUS         = 0x0004
+SERVICE_ENUMERATE_DEPENDENTS = 0x0008
+SERVICE_START                = 0x0010
+SERVICE_STOP                 = 0x0020
+SERVICE_PAUSE_CONTINUE       = 0x0040
+SERVICE_INTERROGATE          = 0x0080
+SERVICE_USER_DEFINED_CONTROL = 0x0100
+
+SC_MANAGER_ALL_ACCESS           = 0xF003F
+SC_MANAGER_CONNECT              = 0x0001
+SC_MANAGER_CREATE_SERVICE       = 0x0002
+SC_MANAGER_ENUMERATE_SERVICE    = 0x0004
+SC_MANAGER_LOCK                 = 0x0008
+SC_MANAGER_QUERY_LOCK_STATUS    = 0x0010
+SC_MANAGER_MODIFY_BOOT_CONFIG   = 0x0020
+
+# CreateService() service start type
+SERVICE_BOOT_START   = 0x00000000
+SERVICE_SYSTEM_START = 0x00000001
+SERVICE_AUTO_START   = 0x00000002
+SERVICE_DEMAND_START = 0x00000003
+SERVICE_DISABLED     = 0x00000004
+
+# CreateService() error control flags
+SERVICE_ERROR_IGNORE    = 0x00000000
+SERVICE_ERROR_NORMAL    = 0x00000001
+SERVICE_ERROR_SEVERE    = 0x00000002
+SERVICE_ERROR_CRITICAL  = 0x00000003
+
+# EnumServicesStatusEx() service state filters
+SERVICE_ACTIVE    = 1
+SERVICE_INACTIVE  = 2
+SERVICE_STATE_ALL = 3
+
+# SERVICE_STATUS_PROCESS.dwServiceType
+SERVICE_KERNEL_DRIVER       = 0x00000001
+SERVICE_FILE_SYSTEM_DRIVER  = 0x00000002
+SERVICE_ADAPTER             = 0x00000004
+SERVICE_RECOGNIZER_DRIVER   = 0x00000008
+SERVICE_WIN32_OWN_PROCESS   = 0x00000010
+SERVICE_WIN32_SHARE_PROCESS = 0x00000020
+SERVICE_INTERACTIVE_PROCESS = 0x00000100
+
+# EnumServicesStatusEx() service type filters (in addition to actual types)
+SERVICE_DRIVER = 0x0000000B # SERVICE_KERNEL_DRIVER and SERVICE_FILE_SYSTEM_DRIVER
+SERVICE_WIN32  = 0x00000030 # SERVICE_WIN32_OWN_PROCESS and SERVICE_WIN32_SHARE_PROCESS
+
+# SERVICE_STATUS_PROCESS.dwCurrentState
+SERVICE_STOPPED             = 0x00000001
+SERVICE_START_PENDING       = 0x00000002
+SERVICE_STOP_PENDING        = 0x00000003
+SERVICE_RUNNING             = 0x00000004
+SERVICE_CONTINUE_PENDING    = 0x00000005
+SERVICE_PAUSE_PENDING       = 0x00000006
+SERVICE_PAUSED              = 0x00000007
+
+# SERVICE_STATUS_PROCESS.dwControlsAccepted
+SERVICE_ACCEPT_STOP                  = 0x00000001
+SERVICE_ACCEPT_PAUSE_CONTINUE        = 0x00000002
+SERVICE_ACCEPT_SHUTDOWN              = 0x00000004
+SERVICE_ACCEPT_PARAMCHANGE           = 0x00000008
+SERVICE_ACCEPT_NETBINDCHANGE         = 0x00000010
+SERVICE_ACCEPT_HARDWAREPROFILECHANGE = 0x00000020
+SERVICE_ACCEPT_POWEREVENT            = 0x00000040
+SERVICE_ACCEPT_SESSIONCHANGE         = 0x00000080
+SERVICE_ACCEPT_PRESHUTDOWN           = 0x00000100
+
+# SERVICE_STATUS_PROCESS.dwServiceFlags
+SERVICE_RUNS_IN_SYSTEM_PROCESS = 0x00000001
+
+# Service control flags
+SERVICE_CONTROL_STOP                  = 0x00000001
+SERVICE_CONTROL_PAUSE                 = 0x00000002
+SERVICE_CONTROL_CONTINUE              = 0x00000003
+SERVICE_CONTROL_INTERROGATE           = 0x00000004
+SERVICE_CONTROL_SHUTDOWN              = 0x00000005
+SERVICE_CONTROL_PARAMCHANGE           = 0x00000006
+SERVICE_CONTROL_NETBINDADD            = 0x00000007
+SERVICE_CONTROL_NETBINDREMOVE         = 0x00000008
+SERVICE_CONTROL_NETBINDENABLE         = 0x00000009
+SERVICE_CONTROL_NETBINDDISABLE        = 0x0000000A
+SERVICE_CONTROL_DEVICEEVENT           = 0x0000000B
+SERVICE_CONTROL_HARDWAREPROFILECHANGE = 0x0000000C
+SERVICE_CONTROL_POWEREVENT            = 0x0000000D
+SERVICE_CONTROL_SESSIONCHANGE         = 0x0000000E
+
+# Service control accepted bitmasks
+SERVICE_ACCEPT_STOP                  = 0x00000001
+SERVICE_ACCEPT_PAUSE_CONTINUE        = 0x00000002
+SERVICE_ACCEPT_SHUTDOWN              = 0x00000004
+SERVICE_ACCEPT_PARAMCHANGE           = 0x00000008
+SERVICE_ACCEPT_NETBINDCHANGE         = 0x00000010
+SERVICE_ACCEPT_HARDWAREPROFILECHANGE = 0x00000020
+SERVICE_ACCEPT_POWEREVENT            = 0x00000040
+SERVICE_ACCEPT_SESSIONCHANGE         = 0x00000080
+SERVICE_ACCEPT_PRESHUTDOWN           = 0x00000100
+SERVICE_ACCEPT_TIMECHANGE            = 0x00000200
+SERVICE_ACCEPT_TRIGGEREVENT          = 0x00000400
+SERVICE_ACCEPT_USERMODEREBOOT        = 0x00000800
+
+# enum SC_ACTION_TYPE
+SC_ACTION_NONE        = 0
+SC_ACTION_RESTART     = 1
+SC_ACTION_REBOOT      = 2
+SC_ACTION_RUN_COMMAND = 3
+
+# QueryServiceConfig2
+SERVICE_CONFIG_DESCRIPTION     = 1
+SERVICE_CONFIG_FAILURE_ACTIONS = 2
+
+# typedef struct _SERVICE_STATUS {
+#   DWORD dwServiceType;
+#   DWORD dwCurrentState;
+#   DWORD dwControlsAccepted;
+#   DWORD dwWin32ExitCode;
+#   DWORD dwServiceSpecificExitCode;
+#   DWORD dwCheckPoint;
+#   DWORD dwWaitHint;
+# } SERVICE_STATUS, *LPSERVICE_STATUS;
+class SERVICE_STATUS(Structure):
+    _fields_ = [
+        ("dwServiceType",               DWORD),
+        ("dwCurrentState",              DWORD),
+        ("dwControlsAccepted",          DWORD),
+        ("dwWin32ExitCode",             DWORD),
+        ("dwServiceSpecificExitCode",   DWORD),
+        ("dwCheckPoint",                DWORD),
+        ("dwWaitHint",                  DWORD),
+    ]
+LPSERVICE_STATUS = POINTER(SERVICE_STATUS)
+
+# typedef struct _SERVICE_STATUS_PROCESS {
+#   DWORD dwServiceType;
+#   DWORD dwCurrentState;
+#   DWORD dwControlsAccepted;
+#   DWORD dwWin32ExitCode;
+#   DWORD dwServiceSpecificExitCode;
+#   DWORD dwCheckPoint;
+#   DWORD dwWaitHint;
+#   DWORD dwProcessId;
+#   DWORD dwServiceFlags;
+# } SERVICE_STATUS_PROCESS, *LPSERVICE_STATUS_PROCESS;
+class SERVICE_STATUS_PROCESS(Structure):
+    _fields_ = SERVICE_STATUS._fields_ + [
+        ("dwProcessId",                 DWORD),
+        ("dwServiceFlags",              DWORD),
+    ]
+LPSERVICE_STATUS_PROCESS = POINTER(SERVICE_STATUS_PROCESS)
+
+# typedef struct _ENUM_SERVICE_STATUS {
+#   LPTSTR         lpServiceName;
+#   LPTSTR         lpDisplayName;
+#   SERVICE_STATUS ServiceStatus;
+# } ENUM_SERVICE_STATUS, *LPENUM_SERVICE_STATUS;
+class ENUM_SERVICE_STATUSA(Structure):
+    _fields_ = [
+        ("lpServiceName", LPSTR),
+        ("lpDisplayName", LPSTR),
+        ("ServiceStatus", SERVICE_STATUS),
+    ]
+class ENUM_SERVICE_STATUSW(Structure):
+    _fields_ = [
+        ("lpServiceName", LPWSTR),
+        ("lpDisplayName", LPWSTR),
+        ("ServiceStatus", SERVICE_STATUS),
+    ]
+LPENUM_SERVICE_STATUSA = POINTER(ENUM_SERVICE_STATUSA)
+LPENUM_SERVICE_STATUSW = POINTER(ENUM_SERVICE_STATUSW)
+
+# typedef struct _ENUM_SERVICE_STATUS_PROCESS {
+#   LPTSTR                 lpServiceName;
+#   LPTSTR                 lpDisplayName;
+#   SERVICE_STATUS_PROCESS ServiceStatusProcess;
+# } ENUM_SERVICE_STATUS_PROCESS, *LPENUM_SERVICE_STATUS_PROCESS;
+class ENUM_SERVICE_STATUS_PROCESSA(Structure):
+    _fields_ = [
+        ("lpServiceName",        LPSTR),
+        ("lpDisplayName",        LPSTR),
+        ("ServiceStatusProcess", SERVICE_STATUS_PROCESS),
+    ]
+class ENUM_SERVICE_STATUS_PROCESSW(Structure):
+    _fields_ = [
+        ("lpServiceName",        LPWSTR),
+        ("lpDisplayName",        LPWSTR),
+        ("ServiceStatusProcess", SERVICE_STATUS_PROCESS),
+    ]
+LPENUM_SERVICE_STATUS_PROCESSA = POINTER(ENUM_SERVICE_STATUS_PROCESSA)
+LPENUM_SERVICE_STATUS_PROCESSW = POINTER(ENUM_SERVICE_STATUS_PROCESSW)
+
+class ServiceStatus(object):
+    """
+    Wrapper for the L{SERVICE_STATUS} structure.
+    """
+
+    def __init__(self, raw):
+        """
+        @type  raw: L{SERVICE_STATUS}
+        @param raw: Raw structure for this service status data.
+        """
+        self.ServiceType             = raw.dwServiceType
+        self.CurrentState            = raw.dwCurrentState
+        self.ControlsAccepted        = raw.dwControlsAccepted
+        self.Win32ExitCode           = raw.dwWin32ExitCode
+        self.ServiceSpecificExitCode = raw.dwServiceSpecificExitCode
+        self.CheckPoint              = raw.dwCheckPoint
+        self.WaitHint                = raw.dwWaitHint
+
+class ServiceStatusProcess(object):
+    """
+    Wrapper for the L{SERVICE_STATUS_PROCESS} structure.
+    """
+
+    def __init__(self, raw):
+        """
+        @type  raw: L{SERVICE_STATUS_PROCESS}
+        @param raw: Raw structure for this service status data.
+        """
+        self.ServiceType             = raw.dwServiceType
+        self.CurrentState            = raw.dwCurrentState
+        self.ControlsAccepted        = raw.dwControlsAccepted
+        self.Win32ExitCode           = raw.dwWin32ExitCode
+        self.ServiceSpecificExitCode = raw.dwServiceSpecificExitCode
+        self.CheckPoint              = raw.dwCheckPoint
+        self.WaitHint                = raw.dwWaitHint
+        self.ProcessId               = raw.dwProcessId
+        self.ServiceFlags            = raw.dwServiceFlags
+
+class ServiceStatusEntry(object):
+    """
+    Service status entry returned by L{EnumServicesStatus}.
+    """
+
+    def __init__(self, raw):
+        """
+        @type  raw: L{ENUM_SERVICE_STATUSA} or L{ENUM_SERVICE_STATUSW}
+        @param raw: Raw structure for this service status entry.
+        """
+        self.ServiceName             = raw.lpServiceName
+        self.DisplayName             = raw.lpDisplayName
+        self.ServiceType             = raw.ServiceStatus.dwServiceType
+        self.CurrentState            = raw.ServiceStatus.dwCurrentState
+        self.ControlsAccepted        = raw.ServiceStatus.dwControlsAccepted
+        self.Win32ExitCode           = raw.ServiceStatus.dwWin32ExitCode
+        self.ServiceSpecificExitCode = raw.ServiceStatus.dwServiceSpecificExitCode
+        self.CheckPoint              = raw.ServiceStatus.dwCheckPoint
+        self.WaitHint                = raw.ServiceStatus.dwWaitHint
+
+    def __str__(self):
+        output = []
+        if self.ServiceType & SERVICE_INTERACTIVE_PROCESS:
+            output.append("Interactive service")
+        else:
+            output.append("Service")
+        if self.DisplayName:
+            output.append("\"%s\" (%s)" % (self.DisplayName, self.ServiceName))
+        else:
+            output.append("\"%s\"" % self.ServiceName)
+        if   self.CurrentState == SERVICE_CONTINUE_PENDING:
+            output.append("is about to continue.")
+        elif self.CurrentState == SERVICE_PAUSE_PENDING:
+            output.append("is pausing.")
+        elif self.CurrentState == SERVICE_PAUSED:
+            output.append("is paused.")
+        elif self.CurrentState == SERVICE_RUNNING:
+            output.append("is running.")
+        elif self.CurrentState == SERVICE_START_PENDING:
+            output.append("is starting.")
+        elif self.CurrentState == SERVICE_STOP_PENDING:
+            output.append("is stopping.")
+        elif self.CurrentState == SERVICE_STOPPED:
+            output.append("is stopped.")
+        return " ".join(output)
+
+class ServiceStatusProcessEntry(object):
+    """
+    Service status entry returned by L{EnumServicesStatusEx}.
+    """
+
+    def __init__(self, raw):
+        """
+        @type  raw: L{ENUM_SERVICE_STATUS_PROCESSA} or L{ENUM_SERVICE_STATUS_PROCESSW}
+        @param raw: Raw structure for this service status entry.
+        """
+        self.ServiceName             = raw.lpServiceName
+        self.DisplayName             = raw.lpDisplayName
+        self.ServiceType             = raw.ServiceStatusProcess.dwServiceType
+        self.CurrentState            = raw.ServiceStatusProcess.dwCurrentState
+        self.ControlsAccepted        = raw.ServiceStatusProcess.dwControlsAccepted
+        self.Win32ExitCode           = raw.ServiceStatusProcess.dwWin32ExitCode
+        self.ServiceSpecificExitCode = raw.ServiceStatusProcess.dwServiceSpecificExitCode
+        self.CheckPoint              = raw.ServiceStatusProcess.dwCheckPoint
+        self.WaitHint                = raw.ServiceStatusProcess.dwWaitHint
+        self.ProcessId               = raw.ServiceStatusProcess.dwProcessId
+        self.ServiceFlags            = raw.ServiceStatusProcess.dwServiceFlags
+
+    def __str__(self):
+        output = []
+        if self.ServiceType & SERVICE_INTERACTIVE_PROCESS:
+            output.append("Interactive service ")
+        else:
+            output.append("Service ")
+        if self.DisplayName:
+            output.append("\"%s\" (%s)" % (self.DisplayName, self.ServiceName))
+        else:
+            output.append("\"%s\"" % self.ServiceName)
+        if   self.CurrentState == SERVICE_CONTINUE_PENDING:
+            output.append(" is about to continue")
+        elif self.CurrentState == SERVICE_PAUSE_PENDING:
+            output.append(" is pausing")
+        elif self.CurrentState == SERVICE_PAUSED:
+            output.append(" is paused")
+        elif self.CurrentState == SERVICE_RUNNING:
+            output.append(" is running")
+        elif self.CurrentState == SERVICE_START_PENDING:
+            output.append(" is starting")
+        elif self.CurrentState == SERVICE_STOP_PENDING:
+            output.append(" is stopping")
+        elif self.CurrentState == SERVICE_STOPPED:
+            output.append(" is stopped")
+        if self.ProcessId:
+            output.append(" at process %d" % self.ProcessId)
+        output.append(".")
+        return "".join(output)
+
+#--- Handle wrappers ----------------------------------------------------------
+
+# XXX maybe add functions related to the tokens here?
+class TokenHandle (Handle):
+    """
+    Access token handle.
+
+    @see: L{Handle}
+    """
+    pass
+
+class RegistryKeyHandle (UserModeHandle):
+    """
+    Registry key handle.
+    """
+
+    _TYPE = HKEY
+
+    def _close(self):
+        RegCloseKey(self.value)
+
+class SaferLevelHandle (UserModeHandle):
+    """
+    Safer level handle.
+
+    @see: U{http://msdn.microsoft.com/en-us/library/ms722425(VS.85).aspx}
+    """
+
+    _TYPE = SAFER_LEVEL_HANDLE
+
+    def _close(self):
+        SaferCloseLevel(self.value)
+
+class ServiceHandle (UserModeHandle):
+    """
+    Service handle.
+
+    @see: U{http://msdn.microsoft.com/en-us/library/windows/desktop/ms684330(v=vs.85).aspx}
+    """
+
+    _TYPE = SC_HANDLE
+
+    def _close(self):
+        CloseServiceHandle(self.value)
+
+class ServiceControlManagerHandle (UserModeHandle):
+    """
+    Service Control Manager (SCM) handle.
+
+    @see: U{http://msdn.microsoft.com/en-us/library/windows/desktop/ms684323(v=vs.85).aspx}
+    """
+
+    _TYPE = SC_HANDLE
+
+    def _close(self):
+        CloseServiceHandle(self.value)
+
+#--- advapi32.dll -------------------------------------------------------------
+
+# BOOL WINAPI GetUserName(
+#   __out    LPTSTR lpBuffer,
+#   __inout  LPDWORD lpnSize
+# );
+def GetUserNameA():
+    _GetUserNameA = windll.advapi32.GetUserNameA
+    _GetUserNameA.argtypes = [LPSTR, LPDWORD]
+    _GetUserNameA.restype  = bool
+
+    nSize = DWORD(0)
+    _GetUserNameA(None, byref(nSize))
+    error = GetLastError()
+    if error != ERROR_INSUFFICIENT_BUFFER:
+        raise ctypes.WinError(error)
+    lpBuffer = ctypes.create_string_buffer('', nSize.value + 1)
+    success = _GetUserNameA(lpBuffer, byref(nSize))
+    if not success:
+        raise ctypes.WinError()
+    return lpBuffer.value
+
+def GetUserNameW():
+    _GetUserNameW = windll.advapi32.GetUserNameW
+    _GetUserNameW.argtypes = [LPWSTR, LPDWORD]
+    _GetUserNameW.restype  = bool
+
+    nSize = DWORD(0)
+    _GetUserNameW(None, byref(nSize))
+    error = GetLastError()
+    if error != ERROR_INSUFFICIENT_BUFFER:
+        raise ctypes.WinError(error)
+    lpBuffer = ctypes.create_unicode_buffer(u'', nSize.value + 1)
+    success = _GetUserNameW(lpBuffer, byref(nSize))
+    if not success:
+        raise ctypes.WinError()
+    return lpBuffer.value
+
+GetUserName = DefaultStringType(GetUserNameA, GetUserNameW)
+
+# BOOL WINAPI LookupAccountName(
+#   __in_opt   LPCTSTR lpSystemName,
+#   __in       LPCTSTR lpAccountName,
+#   __out_opt  PSID Sid,
+#   __inout    LPDWORD cbSid,
+#   __out_opt  LPTSTR ReferencedDomainName,
+#   __inout    LPDWORD cchReferencedDomainName,
+#   __out      PSID_NAME_USE peUse
+# );
+def LookupAccountNameA(lpSystemName, lpAccountName):
+    _LookupAccountNameA = windll.advapi32.LookupAccountNameA
+    _LookupAccountNameA.argtypes = [LPSTR, LPSTR, PSID, LPDWORD, LPSTR, LPDWORD, LPDWORD]
+    _LookupAccountNameA.restype  = BOOL
+
+    cbSid = DWORD(0)
+    cchReferencedDomainName = DWORD(0)
+    peUse = DWORD(0)
+    _LookupAccountNameA(lpSystemName, lpAccountName, None, byref(cbSid), None, byref(cchReferencedDomainName), byref(peUse))
+    error = GetLastError()
+    if error != ERROR_INSUFFICIENT_BUFFER:
+        raise(ctypes.WinError(error))
+    sid = ctypes.create_string_buffer('', cbSid.value)
+    psid = ctypes.cast(ctypes.pointer(sid), PSID)
+    lpReferencedDomainName = ctypes.create_string_buffer('', cchReferencedDomainName.value + 1)
+    success = _LookupAccountNameA(lpSystemName, lpAccountName, psid, byref(cbSid), lpReferencedDomainName, byref(cchReferencedDomainName), byref(peUse))
+    if not success:
+        raise ctypes.WinError()
+    return psid, lpReferencedDomainName.value, peUse.value
+
+def LookupAccountNameW(lpSystemName, lpAccountName):
+    _LookupAccountNameW = windll.advapi32.LookupAccountNameW
+    _LookupAccountNameW.argtypes = [LPWSTR, LPWSTR, PSID, LPDWORD, LPWSTR, LPDWORD, LPDWORD]
+    _LookupAccountNameW.restype  = BOOL
+
+    cbSid = DWORD(0)
+    cchReferencedDomainName = DWORD(0)
+    peUse = DWORD(0)
+    _LookupAccountNameW(lpSystemName, lpAccountName, None, byref(cbSid), None, byref(cchReferencedDomainName), byref(peUse))
+    error = GetLastError()
+    if error != ERROR_INSUFFICIENT_BUFFER:
+        raise(ctypes.WinError(error))
+    sid = ctypes.create_string_buffer('', cbSid.value)
+    psid = ctypes.cast(ctypes.pointer(sid), PSID)
+    lpReferencedDomainName = ctypes.create_unicode_buffer(u'', cchReferencedDomainName.value + 1)
+    success = _LookupAccountNameW(lpSystemName, lpAccountName, psid, byref(cbSid), lpReferencedDomainName, byref(cchReferencedDomainName), byref(peUse))
+    if not success:
+        raise ctypes.WinError()
+    return psid, lpReferencedDomainName.value, peUse.value
+
+LookupAccountName = GuessStringType(LookupAccountNameA, LookupAccountNameW)
+
+# BOOL WINAPI LookupAccountSid(
+#   __in_opt   LPCTSTR lpSystemName,
+#   __in       PSID lpSid,
+#   __out_opt  LPTSTR lpName,
+#   __inout    LPDWORD cchName,
+#   __out_opt  LPTSTR lpReferencedDomainName,
+#   __inout    LPDWORD cchReferencedDomainName,
+#   __out      PSID_NAME_USE peUse
+# );
+def LookupAccountSidA(lpSystemName, lpSid):
+    _LookupAccountSidA = windll.advapi32.LookupAccountSidA
+    _LookupAccountSidA.argtypes = [LPSTR, PSID, LPSTR, LPDWORD, LPSTR, LPDWORD, LPDWORD]
+    _LookupAccountSidA.restype  = bool
+
+    cchName = DWORD(0)
+    cchReferencedDomainName = DWORD(0)
+    peUse = DWORD(0)
+    _LookupAccountSidA(lpSystemName, lpSid, None, byref(cchName), None, byref(cchReferencedDomainName), byref(peUse))
+    error = GetLastError()
+    if error != ERROR_INSUFFICIENT_BUFFER:
+        raise ctypes.WinError(error)
+    lpName = ctypes.create_string_buffer('', cchName + 1)
+    lpReferencedDomainName = ctypes.create_string_buffer('', cchReferencedDomainName + 1)
+    success = _LookupAccountSidA(lpSystemName, lpSid, lpName, byref(cchName), lpReferencedDomainName, byref(cchReferencedDomainName), byref(peUse))
+    if not success:
+        raise ctypes.WinError()
+    return lpName.value, lpReferencedDomainName.value, peUse.value
+
+def LookupAccountSidW(lpSystemName, lpSid):
+    _LookupAccountSidW = windll.advapi32.LookupAccountSidW
+    _LookupAccountSidW.argtypes = [LPSTR, PSID, LPWSTR, LPDWORD, LPWSTR, LPDWORD, LPDWORD]
+    _LookupAccountSidW.restype  = bool
+
+    cchName = DWORD(0)
+    cchReferencedDomainName = DWORD(0)
+    peUse = DWORD(0)
+    _LookupAccountSidW(lpSystemName, lpSid, None, byref(cchName), None, byref(cchReferencedDomainName), byref(peUse))
+    error = GetLastError()
+    if error != ERROR_INSUFFICIENT_BUFFER:
+        raise ctypes.WinError(error)
+    lpName = ctypes.create_unicode_buffer(u'', cchName + 1)
+    lpReferencedDomainName = ctypes.create_unicode_buffer(u'', cchReferencedDomainName + 1)
+    success = _LookupAccountSidW(lpSystemName, lpSid, lpName, byref(cchName), lpReferencedDomainName, byref(cchReferencedDomainName), byref(peUse))
+    if not success:
+        raise ctypes.WinError()
+    return lpName.value, lpReferencedDomainName.value, peUse.value
+
+LookupAccountSid = GuessStringType(LookupAccountSidA, LookupAccountSidW)
+
+# BOOL ConvertSidToStringSid(
+#   __in   PSID Sid,
+#   __out  LPTSTR *StringSid
+# );
+def ConvertSidToStringSidA(Sid):
+    _ConvertSidToStringSidA = windll.advapi32.ConvertSidToStringSidA
+    _ConvertSidToStringSidA.argtypes = [PSID, POINTER(LPSTR)]
+    _ConvertSidToStringSidA.restype  = bool
+    _ConvertSidToStringSidA.errcheck = RaiseIfZero
+
+    pStringSid = LPSTR()
+    _ConvertSidToStringSidA(Sid, byref(pStringSid))
+    try:
+        StringSid = pStringSid.value
+    finally:
+        LocalFree(pStringSid)
+    return StringSid
+
+def ConvertSidToStringSidW(Sid):
+    _ConvertSidToStringSidW = windll.advapi32.ConvertSidToStringSidW
+    _ConvertSidToStringSidW.argtypes = [PSID, POINTER(LPWSTR)]
+    _ConvertSidToStringSidW.restype  = bool
+    _ConvertSidToStringSidW.errcheck = RaiseIfZero
+
+    pStringSid = LPWSTR()
+    _ConvertSidToStringSidW(Sid, byref(pStringSid))
+    try:
+        StringSid = pStringSid.value
+    finally:
+        LocalFree(pStringSid)
+    return StringSid
+
+ConvertSidToStringSid = DefaultStringType(ConvertSidToStringSidA, ConvertSidToStringSidW)
+
+# BOOL WINAPI ConvertStringSidToSid(
+#   __in   LPCTSTR StringSid,
+#   __out  PSID *Sid
+# );
+def ConvertStringSidToSidA(StringSid):
+    _ConvertStringSidToSidA = windll.advapi32.ConvertStringSidToSidA
+    _ConvertStringSidToSidA.argtypes = [LPSTR, PVOID]
+    _ConvertStringSidToSidA.restype  = bool
+    _ConvertStringSidToSidA.errcheck = RaiseIfZero
+
+    Sid = PVOID()
+    _ConvertStringSidToSidA(StringSid, ctypes.pointer(Sid))
+    return Sid.value
+
+def ConvertStringSidToSidW(StringSid):
+    _ConvertStringSidToSidW = windll.advapi32.ConvertStringSidToSidW
+    _ConvertStringSidToSidW.argtypes = [LPWSTR, PVOID]
+    _ConvertStringSidToSidW.restype  = bool
+    _ConvertStringSidToSidW.errcheck = RaiseIfZero
+
+    Sid = PVOID()
+    _ConvertStringSidToSidW(StringSid, ctypes.pointer(Sid))
+    return Sid.value
+
+ConvertStringSidToSid = GuessStringType(ConvertStringSidToSidA, ConvertStringSidToSidW)
+
+# BOOL WINAPI IsValidSid(
+#   __in  PSID pSid
+# );
+def IsValidSid(pSid):
+    _IsValidSid = windll.advapi32.IsValidSid
+    _IsValidSid.argtypes = [PSID]
+    _IsValidSid.restype  = bool
+    return _IsValidSid(pSid)
+
+# BOOL WINAPI EqualSid(
+#   __in  PSID pSid1,
+#   __in  PSID pSid2
+# );
+def EqualSid(pSid1, pSid2):
+    _EqualSid = windll.advapi32.EqualSid
+    _EqualSid.argtypes = [PSID, PSID]
+    _EqualSid.restype  = bool
+    return _EqualSid(pSid1, pSid2)
+
+# DWORD WINAPI GetLengthSid(
+#   __in  PSID pSid
+# );
+def GetLengthSid(pSid):
+    _GetLengthSid = windll.advapi32.GetLengthSid
+    _GetLengthSid.argtypes = [PSID]
+    _GetLengthSid.restype  = DWORD
+    return _GetLengthSid(pSid)
+
+# BOOL WINAPI CopySid(
+#   __in   DWORD nDestinationSidLength,
+#   __out  PSID pDestinationSid,
+#   __in   PSID pSourceSid
+# );
+def CopySid(pSourceSid):
+    _CopySid = windll.advapi32.CopySid
+    _CopySid.argtypes = [DWORD, PVOID, PSID]
+    _CopySid.restype  = bool
+    _CopySid.errcheck = RaiseIfZero
+
+    nDestinationSidLength = GetLengthSid(pSourceSid)
+    DestinationSid = ctypes.create_string_buffer('', nDestinationSidLength)
+    pDestinationSid = ctypes.cast(ctypes.pointer(DestinationSid), PVOID)
+    _CopySid(nDestinationSidLength, pDestinationSid, pSourceSid)
+    return ctypes.cast(pDestinationSid, PSID)
+
+# PVOID WINAPI FreeSid(
+#   __in  PSID pSid
+# );
+def FreeSid(pSid):
+    _FreeSid = windll.advapi32.FreeSid
+    _FreeSid.argtypes = [PSID]
+    _FreeSid.restype  = PSID
+    _FreeSid.errcheck = RaiseIfNotZero
+    _FreeSid(pSid)
+
+# BOOL WINAPI OpenProcessToken(
+#   __in   HANDLE ProcessHandle,
+#   __in   DWORD DesiredAccess,
+#   __out  PHANDLE TokenHandle
+# );
+def OpenProcessToken(ProcessHandle, DesiredAccess = TOKEN_ALL_ACCESS):
+    _OpenProcessToken = windll.advapi32.OpenProcessToken
+    _OpenProcessToken.argtypes = [HANDLE, DWORD, PHANDLE]
+    _OpenProcessToken.restype  = bool
+    _OpenProcessToken.errcheck = RaiseIfZero
+
+    NewTokenHandle = HANDLE(INVALID_HANDLE_VALUE)
+    _OpenProcessToken(ProcessHandle, DesiredAccess, byref(NewTokenHandle))
+    return TokenHandle(NewTokenHandle.value)
+
+# BOOL WINAPI OpenThreadToken(
+#   __in   HANDLE ThreadHandle,
+#   __in   DWORD DesiredAccess,
+#   __in   BOOL OpenAsSelf,
+#   __out  PHANDLE TokenHandle
+# );
+def OpenThreadToken(ThreadHandle, DesiredAccess, OpenAsSelf = True):
+    _OpenThreadToken = windll.advapi32.OpenThreadToken
+    _OpenThreadToken.argtypes = [HANDLE, DWORD, BOOL, PHANDLE]
+    _OpenThreadToken.restype  = bool
+    _OpenThreadToken.errcheck = RaiseIfZero
+
+    NewTokenHandle = HANDLE(INVALID_HANDLE_VALUE)
+    _OpenThreadToken(ThreadHandle, DesiredAccess, OpenAsSelf, byref(NewTokenHandle))
+    return TokenHandle(NewTokenHandle.value)
+
+# BOOL WINAPI DuplicateToken(
+#   _In_   HANDLE ExistingTokenHandle,
+#   _In_   SECURITY_IMPERSONATION_LEVEL ImpersonationLevel,
+#   _Out_  PHANDLE DuplicateTokenHandle
+# );
+def DuplicateToken(ExistingTokenHandle, ImpersonationLevel = SecurityImpersonation):
+    _DuplicateToken = windll.advapi32.DuplicateToken
+    _DuplicateToken.argtypes = [HANDLE, SECURITY_IMPERSONATION_LEVEL, PHANDLE]
+    _DuplicateToken.restype  = bool
+    _DuplicateToken.errcheck = RaiseIfZero
+
+    DuplicateTokenHandle = HANDLE(INVALID_HANDLE_VALUE)
+    _DuplicateToken(ExistingTokenHandle, ImpersonationLevel, byref(DuplicateTokenHandle))
+    return TokenHandle(DuplicateTokenHandle.value)
+
+# BOOL WINAPI DuplicateTokenEx(
+#   _In_      HANDLE hExistingToken,
+#   _In_      DWORD dwDesiredAccess,
+#   _In_opt_  LPSECURITY_ATTRIBUTES lpTokenAttributes,
+#   _In_      SECURITY_IMPERSONATION_LEVEL ImpersonationLevel,
+#   _In_      TOKEN_TYPE TokenType,
+#   _Out_     PHANDLE phNewToken
+# );
+def DuplicateTokenEx(hExistingToken, dwDesiredAccess = TOKEN_ALL_ACCESS, lpTokenAttributes = None, ImpersonationLevel = SecurityImpersonation, TokenType = TokenPrimary):
+    _DuplicateTokenEx = windll.advapi32.DuplicateTokenEx
+    _DuplicateTokenEx.argtypes = [HANDLE, DWORD, LPSECURITY_ATTRIBUTES, SECURITY_IMPERSONATION_LEVEL, TOKEN_TYPE, PHANDLE]
+    _DuplicateTokenEx.restype  = bool
+    _DuplicateTokenEx.errcheck = RaiseIfZero
+
+    DuplicateTokenHandle = HANDLE(INVALID_HANDLE_VALUE)
+    _DuplicateTokenEx(hExistingToken, dwDesiredAccess, lpTokenAttributes, ImpersonationLevel, TokenType, byref(DuplicateTokenHandle))
+    return TokenHandle(DuplicateTokenHandle.value)
+
+# BOOL WINAPI IsTokenRestricted(
+#   __in  HANDLE TokenHandle
+# );
+def IsTokenRestricted(hTokenHandle):
+    _IsTokenRestricted = windll.advapi32.IsTokenRestricted
+    _IsTokenRestricted.argtypes = [HANDLE]
+    _IsTokenRestricted.restype  = bool
+    _IsTokenRestricted.errcheck = RaiseIfNotErrorSuccess
+
+    SetLastError(ERROR_SUCCESS)
+    return _IsTokenRestricted(hTokenHandle)
+
+# BOOL WINAPI LookupPrivilegeValue(
+#   __in_opt  LPCTSTR lpSystemName,
+#   __in      LPCTSTR lpName,
+#   __out     PLUID lpLuid
+# );
+def LookupPrivilegeValueA(lpSystemName, lpName):
+    _LookupPrivilegeValueA = windll.advapi32.LookupPrivilegeValueA
+    _LookupPrivilegeValueA.argtypes = [LPSTR, LPSTR, PLUID]
+    _LookupPrivilegeValueA.restype  = bool
+    _LookupPrivilegeValueA.errcheck = RaiseIfZero
+
+    lpLuid = LUID()
+    if not lpSystemName:
+        lpSystemName = None
+    _LookupPrivilegeValueA(lpSystemName, lpName, byref(lpLuid))
+    return lpLuid
+
+def LookupPrivilegeValueW(lpSystemName, lpName):
+    _LookupPrivilegeValueW = windll.advapi32.LookupPrivilegeValueW
+    _LookupPrivilegeValueW.argtypes = [LPWSTR, LPWSTR, PLUID]
+    _LookupPrivilegeValueW.restype  = bool
+    _LookupPrivilegeValueW.errcheck = RaiseIfZero
+
+    lpLuid = LUID()
+    if not lpSystemName:
+        lpSystemName = None
+    _LookupPrivilegeValueW(lpSystemName, lpName, byref(lpLuid))
+    return lpLuid
+
+LookupPrivilegeValue = GuessStringType(LookupPrivilegeValueA, LookupPrivilegeValueW)
+
+# BOOL WINAPI LookupPrivilegeName(
+#   __in_opt   LPCTSTR lpSystemName,
+#   __in       PLUID lpLuid,
+#   __out_opt  LPTSTR lpName,
+#   __inout    LPDWORD cchName
+# );
+
+def LookupPrivilegeNameA(lpSystemName, lpLuid):
+    _LookupPrivilegeNameA = windll.advapi32.LookupPrivilegeNameA
+    _LookupPrivilegeNameA.argtypes = [LPSTR, PLUID, LPSTR, LPDWORD]
+    _LookupPrivilegeNameA.restype  = bool
+    _LookupPrivilegeNameA.errcheck = RaiseIfZero
+
+    cchName = DWORD(0)
+    _LookupPrivilegeNameA(lpSystemName, byref(lpLuid), NULL, byref(cchName))
+    lpName = ctypes.create_string_buffer("", cchName.value)
+    _LookupPrivilegeNameA(lpSystemName, byref(lpLuid), byref(lpName), byref(cchName))
+    return lpName.value
+
+def LookupPrivilegeNameW(lpSystemName, lpLuid):
+    _LookupPrivilegeNameW = windll.advapi32.LookupPrivilegeNameW
+    _LookupPrivilegeNameW.argtypes = [LPWSTR, PLUID, LPWSTR, LPDWORD]
+    _LookupPrivilegeNameW.restype  = bool
+    _LookupPrivilegeNameW.errcheck = RaiseIfZero
+
+    cchName = DWORD(0)
+    _LookupPrivilegeNameW(lpSystemName, byref(lpLuid), NULL, byref(cchName))
+    lpName = ctypes.create_unicode_buffer(u"", cchName.value)
+    _LookupPrivilegeNameW(lpSystemName, byref(lpLuid), byref(lpName), byref(cchName))
+    return lpName.value
+
+LookupPrivilegeName = GuessStringType(LookupPrivilegeNameA, LookupPrivilegeNameW)
+
+# BOOL WINAPI AdjustTokenPrivileges(
+#   __in       HANDLE TokenHandle,
+#   __in       BOOL DisableAllPrivileges,
+#   __in_opt   PTOKEN_PRIVILEGES NewState,
+#   __in       DWORD BufferLength,
+#   __out_opt  PTOKEN_PRIVILEGES PreviousState,
+#   __out_opt  PDWORD ReturnLength
+# );
+def AdjustTokenPrivileges(TokenHandle, NewState = ()):
+    _AdjustTokenPrivileges = windll.advapi32.AdjustTokenPrivileges
+    _AdjustTokenPrivileges.argtypes = [HANDLE, BOOL, LPVOID, DWORD, LPVOID, LPVOID]
+    _AdjustTokenPrivileges.restype  = bool
+    _AdjustTokenPrivileges.errcheck = RaiseIfZero
+    #
+    # I don't know how to allocate variable sized structures in ctypes :(
+    # so this hack will work by using always TOKEN_PRIVILEGES of one element
+    # and calling the API many times. This also means the PreviousState
+    # parameter won't be supported yet as it's too much hassle. In a future
+    # version I look forward to implementing this function correctly.
+    #
+    if not NewState:
+        _AdjustTokenPrivileges(TokenHandle, TRUE, NULL, 0, NULL, NULL)
+    else:
+        success = True
+        for (privilege, enabled) in NewState:
+            if not isinstance(privilege, LUID):
+                privilege = LookupPrivilegeValue(NULL, privilege)
+            if enabled == True:
+                flags = SE_PRIVILEGE_ENABLED
+            elif enabled == False:
+                flags = SE_PRIVILEGE_REMOVED
+            elif enabled == None:
+                flags = 0
+            else:
+                flags = enabled
+            laa = LUID_AND_ATTRIBUTES(privilege, flags)
+            tp  = TOKEN_PRIVILEGES(1, laa)
+            _AdjustTokenPrivileges(TokenHandle, FALSE, byref(tp), sizeof(tp), NULL, NULL)
+
+# BOOL WINAPI GetTokenInformation(
+#   __in       HANDLE TokenHandle,
+#   __in       TOKEN_INFORMATION_CLASS TokenInformationClass,
+#   __out_opt  LPVOID TokenInformation,
+#   __in       DWORD TokenInformationLength,
+#   __out      PDWORD ReturnLength
+# );
+def GetTokenInformation(hTokenHandle, TokenInformationClass):
+    if TokenInformationClass <= 0 or TokenInformationClass > MaxTokenInfoClass:
+        raise ValueError("Invalid value for TokenInformationClass (%i)" % TokenInformationClass)
+
+    # User SID.
+    if TokenInformationClass == TokenUser:
+        TokenInformation = TOKEN_USER()
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.User.Sid.value
+
+    # Owner SID.
+    if TokenInformationClass == TokenOwner:
+        TokenInformation = TOKEN_OWNER()
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.Owner.value
+
+    # Primary group SID.
+    if TokenInformationClass == TokenOwner:
+        TokenInformation = TOKEN_PRIMARY_GROUP()
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.PrimaryGroup.value
+
+    # App container SID.
+    if TokenInformationClass == TokenAppContainerSid:
+        TokenInformation = TOKEN_APPCONTAINER_INFORMATION()
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.TokenAppContainer.value
+
+    # Integrity level SID.
+    if TokenInformationClass == TokenIntegrityLevel:
+        TokenInformation = TOKEN_MANDATORY_LABEL()
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.Label.Sid.value, TokenInformation.Label.Attributes
+
+    # Logon session LUID.
+    if TokenInformationClass == TokenOrigin:
+        TokenInformation = TOKEN_ORIGIN()
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.OriginatingLogonSession
+
+    # Primary or impersonation token.
+    if TokenInformationClass == TokenType:
+        TokenInformation = TOKEN_TYPE(0)
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.value
+
+    # Elevated token.
+    if TokenInformationClass == TokenElevation:
+        TokenInformation = TOKEN_ELEVATION(0)
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.value
+
+    # Security impersonation level.
+    if TokenInformationClass == TokenElevation:
+        TokenInformation = SECURITY_IMPERSONATION_LEVEL(0)
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.value
+
+    # Session ID and other DWORD values.
+    if TokenInformationClass in (TokenSessionId, TokenAppContainerNumber):
+        TokenInformation = DWORD(0)
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation.value
+
+    # Various boolean flags.
+    if TokenInformationClass in (TokenSandBoxInert, TokenHasRestrictions, TokenUIAccess,
+                                 TokenVirtualizationAllowed, TokenVirtualizationEnabled):
+        TokenInformation = DWORD(0)
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return bool(TokenInformation.value)
+
+    # Linked token.
+    if TokenInformationClass == TokenLinkedToken:
+        TokenInformation = TOKEN_LINKED_TOKEN(0)
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenHandle(TokenInformation.LinkedToken.value, bOwnership = True)
+
+    # Token statistics.
+    if TokenInformationClass == TokenStatistics:
+        TokenInformation = TOKEN_STATISTICS()
+        _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation)
+        return TokenInformation # TODO add a class wrapper?
+
+    # Currently unsupported flags.
+    raise NotImplementedError("TokenInformationClass(%i) not yet supported!" % TokenInformationClass)
+
+def _internal_GetTokenInformation(hTokenHandle, TokenInformationClass, TokenInformation):
+    _GetTokenInformation = windll.advapi32.GetTokenInformation
+    _GetTokenInformation.argtypes = [HANDLE, TOKEN_INFORMATION_CLASS, LPVOID, DWORD, PDWORD]
+    _GetTokenInformation.restype  = bool
+    _GetTokenInformation.errcheck = RaiseIfZero
+
+    ReturnLength = DWORD(0)
+    TokenInformationLength = SIZEOF(TokenInformation)
+    _GetTokenInformation(hTokenHandle, TokenInformationClass, byref(TokenInformation), TokenInformationLength, byref(ReturnLength))
+    if ReturnLength.value != TokenInformationLength:
+        raise ctypes.WinError(ERROR_INSUFFICIENT_BUFFER)
+    return TokenInformation
+
+# BOOL WINAPI SetTokenInformation(
+#   __in  HANDLE TokenHandle,
+#   __in  TOKEN_INFORMATION_CLASS TokenInformationClass,
+#   __in  LPVOID TokenInformation,
+#   __in  DWORD TokenInformationLength
+# );
+
+# XXX TODO
+
+# BOOL WINAPI CreateProcessWithLogonW(
+#   __in         LPCWSTR lpUsername,
+#   __in_opt     LPCWSTR lpDomain,
+#   __in         LPCWSTR lpPassword,
+#   __in         DWORD dwLogonFlags,
+#   __in_opt     LPCWSTR lpApplicationName,
+#   __inout_opt  LPWSTR lpCommandLine,
+#   __in         DWORD dwCreationFlags,
+#   __in_opt     LPVOID lpEnvironment,
+#   __in_opt     LPCWSTR lpCurrentDirectory,
+#   __in         LPSTARTUPINFOW lpStartupInfo,
+#   __out        LPPROCESS_INFORMATION lpProcessInfo
+# );
+def CreateProcessWithLogonW(lpUsername = None, lpDomain = None, lpPassword = None, dwLogonFlags = 0, lpApplicationName = None, lpCommandLine = None, dwCreationFlags = 0, lpEnvironment = None, lpCurrentDirectory = None, lpStartupInfo = None):
+    _CreateProcessWithLogonW = windll.advapi32.CreateProcessWithLogonW
+    _CreateProcessWithLogonW.argtypes = [LPWSTR, LPWSTR, LPWSTR, DWORD, LPWSTR, LPWSTR, DWORD, LPVOID, LPWSTR, LPVOID, LPPROCESS_INFORMATION]
+    _CreateProcessWithLogonW.restype = bool
+    _CreateProcessWithLogonW.errcheck = RaiseIfZero
+
+    if not lpUsername:
+        lpUsername          = None
+    if not lpDomain:
+        lpDomain            = None
+    if not lpPassword:
+        lpPassword          = None
+    if not lpApplicationName:
+        lpApplicationName   = None
+    if not lpCommandLine:
+        lpCommandLine       = None
+    else:
+        lpCommandLine       = ctypes.create_unicode_buffer(lpCommandLine, max(MAX_PATH, len(lpCommandLine) + 1))
+    if not lpEnvironment:
+        lpEnvironment       = None
+    else:
+        lpEnvironment       = ctypes.create_unicode_buffer(lpEnvironment)
+    if not lpCurrentDirectory:
+        lpCurrentDirectory  = None
+    if not lpStartupInfo:
+        lpStartupInfo              = STARTUPINFOW()
+        lpStartupInfo.cb           = sizeof(STARTUPINFOW)
+        lpStartupInfo.lpReserved   = 0
+        lpStartupInfo.lpDesktop    = 0
+        lpStartupInfo.lpTitle      = 0
+        lpStartupInfo.dwFlags      = 0
+        lpStartupInfo.cbReserved2  = 0
+        lpStartupInfo.lpReserved2  = 0
+    lpProcessInformation              = PROCESS_INFORMATION()
+    lpProcessInformation.hProcess     = INVALID_HANDLE_VALUE
+    lpProcessInformation.hThread      = INVALID_HANDLE_VALUE
+    lpProcessInformation.dwProcessId  = 0
+    lpProcessInformation.dwThreadId   = 0
+    _CreateProcessWithLogonW(lpUsername, lpDomain, lpPassword, dwLogonFlags, lpApplicationName, lpCommandLine, dwCreationFlags, lpEnvironment, lpCurrentDirectory, byref(lpStartupInfo), byref(lpProcessInformation))
+    return ProcessInformation(lpProcessInformation)
+
+CreateProcessWithLogonA = MakeANSIVersion(CreateProcessWithLogonW)
+CreateProcessWithLogon = DefaultStringType(CreateProcessWithLogonA, CreateProcessWithLogonW)
+
+# BOOL WINAPI CreateProcessWithTokenW(
+#   __in         HANDLE hToken,
+#   __in         DWORD dwLogonFlags,
+#   __in_opt     LPCWSTR lpApplicationName,
+#   __inout_opt  LPWSTR lpCommandLine,
+#   __in         DWORD dwCreationFlags,
+#   __in_opt     LPVOID lpEnvironment,
+#   __in_opt     LPCWSTR lpCurrentDirectory,
+#   __in         LPSTARTUPINFOW lpStartupInfo,
+#   __out        LPPROCESS_INFORMATION lpProcessInfo
+# );
+def CreateProcessWithTokenW(hToken = None, dwLogonFlags = 0, lpApplicationName = None, lpCommandLine = None, dwCreationFlags = 0, lpEnvironment = None, lpCurrentDirectory = None, lpStartupInfo = None):
+    _CreateProcessWithTokenW = windll.advapi32.CreateProcessWithTokenW
+    _CreateProcessWithTokenW.argtypes = [HANDLE, DWORD, LPWSTR, LPWSTR, DWORD, LPVOID, LPWSTR, LPVOID, LPPROCESS_INFORMATION]
+    _CreateProcessWithTokenW.restype = bool
+    _CreateProcessWithTokenW.errcheck = RaiseIfZero
+
+    if not hToken:
+        hToken              = None
+    if not lpApplicationName:
+        lpApplicationName   = None
+    if not lpCommandLine:
+        lpCommandLine       = None
+    else:
+        lpCommandLine       = ctypes.create_unicode_buffer(lpCommandLine, max(MAX_PATH, len(lpCommandLine) + 1))
+    if not lpEnvironment:
+        lpEnvironment       = None
+    else:
+        lpEnvironment       = ctypes.create_unicode_buffer(lpEnvironment)
+    if not lpCurrentDirectory:
+        lpCurrentDirectory  = None
+    if not lpStartupInfo:
+        lpStartupInfo              = STARTUPINFOW()
+        lpStartupInfo.cb           = sizeof(STARTUPINFOW)
+        lpStartupInfo.lpReserved   = 0
+        lpStartupInfo.lpDesktop    = 0
+        lpStartupInfo.lpTitle      = 0
+        lpStartupInfo.dwFlags      = 0
+        lpStartupInfo.cbReserved2  = 0
+        lpStartupInfo.lpReserved2  = 0
+    lpProcessInformation              = PROCESS_INFORMATION()
+    lpProcessInformation.hProcess     = INVALID_HANDLE_VALUE
+    lpProcessInformation.hThread      = INVALID_HANDLE_VALUE
+    lpProcessInformation.dwProcessId  = 0
+    lpProcessInformation.dwThreadId   = 0
+    _CreateProcessWithTokenW(hToken, dwLogonFlags, lpApplicationName, lpCommandLine, dwCreationFlags, lpEnvironment, lpCurrentDirectory, byref(lpStartupInfo), byref(lpProcessInformation))
+    return ProcessInformation(lpProcessInformation)
+
+CreateProcessWithTokenA = MakeANSIVersion(CreateProcessWithTokenW)
+CreateProcessWithToken = DefaultStringType(CreateProcessWithTokenA, CreateProcessWithTokenW)
+
+# BOOL WINAPI CreateProcessAsUser(
+#   __in_opt     HANDLE hToken,
+#   __in_opt     LPCTSTR lpApplicationName,
+#   __inout_opt  LPTSTR lpCommandLine,
+#   __in_opt     LPSECURITY_ATTRIBUTES lpProcessAttributes,
+#   __in_opt     LPSECURITY_ATTRIBUTES lpThreadAttributes,
+#   __in         BOOL bInheritHandles,
+#   __in         DWORD dwCreationFlags,
+#   __in_opt     LPVOID lpEnvironment,
+#   __in_opt     LPCTSTR lpCurrentDirectory,
+#   __in         LPSTARTUPINFO lpStartupInfo,
+#   __out        LPPROCESS_INFORMATION lpProcessInformation
+# );
+def CreateProcessAsUserA(hToken = None, lpApplicationName = None, lpCommandLine=None, lpProcessAttributes=None, lpThreadAttributes=None, bInheritHandles=False, dwCreationFlags=0, lpEnvironment=None, lpCurrentDirectory=None, lpStartupInfo=None):
+    _CreateProcessAsUserA = windll.advapi32.CreateProcessAsUserA
+    _CreateProcessAsUserA.argtypes = [HANDLE, LPSTR, LPSTR, LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPSTR, LPVOID, LPPROCESS_INFORMATION]
+    _CreateProcessAsUserA.restype  = bool
+    _CreateProcessAsUserA.errcheck = RaiseIfZero
+
+    if not lpApplicationName:
+        lpApplicationName   = None
+    if not lpCommandLine:
+        lpCommandLine       = None
+    else:
+        lpCommandLine       = ctypes.create_string_buffer(lpCommandLine, max(MAX_PATH, len(lpCommandLine) + 1))
+    if not lpEnvironment:
+        lpEnvironment       = None
+    else:
+        lpEnvironment       = ctypes.create_string_buffer(lpEnvironment)
+    if not lpCurrentDirectory:
+        lpCurrentDirectory  = None
+    if not lpProcessAttributes:
+        lpProcessAttributes = None
+    else:
+        lpProcessAttributes = byref(lpProcessAttributes)
+    if not lpThreadAttributes:
+        lpThreadAttributes = None
+    else:
+        lpThreadAttributes = byref(lpThreadAttributes)
+    if not lpStartupInfo:
+        lpStartupInfo              = STARTUPINFO()
+        lpStartupInfo.cb           = sizeof(STARTUPINFO)
+        lpStartupInfo.lpReserved   = 0
+        lpStartupInfo.lpDesktop    = 0
+        lpStartupInfo.lpTitle      = 0
+        lpStartupInfo.dwFlags      = 0
+        lpStartupInfo.cbReserved2  = 0
+        lpStartupInfo.lpReserved2  = 0
+    lpProcessInformation              = PROCESS_INFORMATION()
+    lpProcessInformation.hProcess     = INVALID_HANDLE_VALUE
+    lpProcessInformation.hThread      = INVALID_HANDLE_VALUE
+    lpProcessInformation.dwProcessId  = 0
+    lpProcessInformation.dwThreadId   = 0
+    _CreateProcessAsUserA(hToken, lpApplicationName, lpCommandLine, lpProcessAttributes, lpThreadAttributes, bool(bInheritHandles), dwCreationFlags, lpEnvironment, lpCurrentDirectory, byref(lpStartupInfo), byref(lpProcessInformation))
+    return ProcessInformation(lpProcessInformation)
+
+def CreateProcessAsUserW(hToken = None, lpApplicationName = None, lpCommandLine=None, lpProcessAttributes=None, lpThreadAttributes=None, bInheritHandles=False, dwCreationFlags=0, lpEnvironment=None, lpCurrentDirectory=None, lpStartupInfo=None):
+    _CreateProcessAsUserW = windll.advapi32.CreateProcessAsUserW
+    _CreateProcessAsUserW.argtypes = [HANDLE, LPWSTR, LPWSTR, LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPWSTR, LPVOID, LPPROCESS_INFORMATION]
+    _CreateProcessAsUserW.restype  = bool
+    _CreateProcessAsUserW.errcheck = RaiseIfZero
+
+    if not lpApplicationName:
+        lpApplicationName   = None
+    if not lpCommandLine:
+        lpCommandLine       = None
+    else:
+        lpCommandLine       = ctypes.create_unicode_buffer(lpCommandLine, max(MAX_PATH, len(lpCommandLine) + 1))
+    if not lpEnvironment:
+        lpEnvironment       = None
+    else:
+        lpEnvironment       = ctypes.create_unicode_buffer(lpEnvironment)
+    if not lpCurrentDirectory:
+        lpCurrentDirectory  = None
+    if not lpProcessAttributes:
+        lpProcessAttributes = None
+    else:
+        lpProcessAttributes = byref(lpProcessAttributes)
+    if not lpThreadAttributes:
+        lpThreadAttributes = None
+    else:
+        lpThreadAttributes = byref(lpThreadAttributes)
+    if not lpStartupInfo:
+        lpStartupInfo              = STARTUPINFO()
+        lpStartupInfo.cb           = sizeof(STARTUPINFO)
+        lpStartupInfo.lpReserved   = 0
+        lpStartupInfo.lpDesktop    = 0
+        lpStartupInfo.lpTitle      = 0
+        lpStartupInfo.dwFlags      = 0
+        lpStartupInfo.cbReserved2  = 0
+        lpStartupInfo.lpReserved2  = 0
+    lpProcessInformation              = PROCESS_INFORMATION()
+    lpProcessInformation.hProcess     = INVALID_HANDLE_VALUE
+    lpProcessInformation.hThread      = INVALID_HANDLE_VALUE
+    lpProcessInformation.dwProcessId  = 0
+    lpProcessInformation.dwThreadId   = 0
+    _CreateProcessAsUserW(hToken, lpApplicationName, lpCommandLine, lpProcessAttributes, lpThreadAttributes, bool(bInheritHandles), dwCreationFlags, lpEnvironment, lpCurrentDirectory, byref(lpStartupInfo), byref(lpProcessInformation))
+    return ProcessInformation(lpProcessInformation)
+
+CreateProcessAsUser = GuessStringType(CreateProcessAsUserA, CreateProcessAsUserW)
+
+# VOID CALLBACK WaitChainCallback(
+#     HWCT WctHandle,
+#     DWORD_PTR Context,
+#     DWORD CallbackStatus,
+#     LPDWORD NodeCount,
+#     PWAITCHAIN_NODE_INFO NodeInfoArray,
+#     LPBOOL IsCycle
+# );
+PWAITCHAINCALLBACK = WINFUNCTYPE(HWCT, DWORD_PTR, DWORD, LPDWORD, PWAITCHAIN_NODE_INFO, LPBOOL)
+
+# HWCT WINAPI OpenThreadWaitChainSession(
+#   __in      DWORD Flags,
+#   __in_opt  PWAITCHAINCALLBACK callback
+# );
+def OpenThreadWaitChainSession(Flags = 0, callback = None):
+    _OpenThreadWaitChainSession = windll.advapi32.OpenThreadWaitChainSession
+    _OpenThreadWaitChainSession.argtypes = [DWORD, PVOID]
+    _OpenThreadWaitChainSession.restype  = HWCT
+    _OpenThreadWaitChainSession.errcheck = RaiseIfZero
+
+    if callback is not None:
+        callback = PWAITCHAINCALLBACK(callback)
+    aHandle = _OpenThreadWaitChainSession(Flags, callback)
+    return ThreadWaitChainSessionHandle(aHandle)
+
+# BOOL WINAPI GetThreadWaitChain(
+#   _In_      HWCT WctHandle,
+#   _In_opt_  DWORD_PTR Context,
+#   _In_      DWORD Flags,
+#   _In_      DWORD ThreadId,
+#   _Inout_   LPDWORD NodeCount,
+#   _Out_     PWAITCHAIN_NODE_INFO NodeInfoArray,
+#   _Out_     LPBOOL IsCycle
+# );
+def GetThreadWaitChain(WctHandle, Context = None, Flags = WCTP_GETINFO_ALL_FLAGS, ThreadId = -1, NodeCount = WCT_MAX_NODE_COUNT):
+    _GetThreadWaitChain = windll.advapi32.GetThreadWaitChain
+    _GetThreadWaitChain.argtypes = [HWCT, LPDWORD, DWORD, DWORD, LPDWORD, PWAITCHAIN_NODE_INFO, LPBOOL]
+    _GetThreadWaitChain.restype  = bool
+    _GetThreadWaitChain.errcheck = RaiseIfZero
+
+    dwNodeCount = DWORD(NodeCount)
+    NodeInfoArray = (WAITCHAIN_NODE_INFO * NodeCount)()
+    IsCycle = BOOL(0)
+    _GetThreadWaitChain(WctHandle, Context, Flags, ThreadId, byref(dwNodeCount), ctypes.cast(ctypes.pointer(NodeInfoArray), PWAITCHAIN_NODE_INFO), byref(IsCycle))
+    while dwNodeCount.value > NodeCount:
+        NodeCount = dwNodeCount.value
+        NodeInfoArray = (WAITCHAIN_NODE_INFO * NodeCount)()
+        _GetThreadWaitChain(WctHandle, Context, Flags, ThreadId, byref(dwNodeCount), ctypes.cast(ctypes.pointer(NodeInfoArray), PWAITCHAIN_NODE_INFO), byref(IsCycle))
+    return (
+        [ WaitChainNodeInfo(NodeInfoArray[index]) for index in range(dwNodeCount.value) ],
+        bool(IsCycle.value)
+    )
+
+# VOID WINAPI CloseThreadWaitChainSession(
+#   __in  HWCT WctHandle
+# );
+def CloseThreadWaitChainSession(WctHandle):
+    _CloseThreadWaitChainSession = windll.advapi32.CloseThreadWaitChainSession
+    _CloseThreadWaitChainSession.argtypes = [HWCT]
+    _CloseThreadWaitChainSession(WctHandle)
+
+# BOOL WINAPI SaferCreateLevel(
+#   __in        DWORD dwScopeId,
+#   __in        DWORD dwLevelId,
+#   __in        DWORD OpenFlags,
+#   __out       SAFER_LEVEL_HANDLE *pLevelHandle,
+#   __reserved  LPVOID lpReserved
+# );
+def SaferCreateLevel(dwScopeId=SAFER_SCOPEID_USER, dwLevelId=SAFER_LEVELID_NORMALUSER, OpenFlags=0):
+    _SaferCreateLevel = windll.advapi32.SaferCreateLevel
+    _SaferCreateLevel.argtypes = [DWORD, DWORD, DWORD, POINTER(SAFER_LEVEL_HANDLE), LPVOID]
+    _SaferCreateLevel.restype  = BOOL
+    _SaferCreateLevel.errcheck = RaiseIfZero
+
+    hLevelHandle = SAFER_LEVEL_HANDLE(INVALID_HANDLE_VALUE)
+    _SaferCreateLevel(dwScopeId, dwLevelId, OpenFlags, byref(hLevelHandle), None)
+    return SaferLevelHandle(hLevelHandle.value)
+
+# BOOL WINAPI SaferIdentifyLevel(
+#   __in        DWORD dwNumProperties,
+#   __in_opt    PSAFER_CODE_PROPERTIES pCodeProperties,
+#   __out       SAFER_LEVEL_HANDLE *pLevelHandle,
+#   __reserved  LPVOID lpReserved
+# );
+
+# XXX TODO
+
+# BOOL WINAPI SaferComputeTokenFromLevel(
+#   __in         SAFER_LEVEL_HANDLE LevelHandle,
+#   __in_opt     HANDLE InAccessToken,
+#   __out        PHANDLE OutAccessToken,
+#   __in         DWORD dwFlags,
+#   __inout_opt  LPVOID lpReserved
+# );
+def SaferComputeTokenFromLevel(LevelHandle, InAccessToken=None, dwFlags=0):
+    _SaferComputeTokenFromLevel = windll.advapi32.SaferComputeTokenFromLevel
+    _SaferComputeTokenFromLevel.argtypes = [SAFER_LEVEL_HANDLE, HANDLE, PHANDLE, DWORD, LPDWORD]
+    _SaferComputeTokenFromLevel.restype  = BOOL
+    _SaferComputeTokenFromLevel.errcheck = RaiseIfZero
+
+    OutAccessToken = HANDLE(INVALID_HANDLE_VALUE)
+    lpReserved = DWORD(0)
+    _SaferComputeTokenFromLevel(LevelHandle, InAccessToken, byref(OutAccessToken), dwFlags, byref(lpReserved))
+    return TokenHandle(OutAccessToken.value), lpReserved.value
+
+# BOOL WINAPI SaferCloseLevel(
+#   __in  SAFER_LEVEL_HANDLE hLevelHandle
+# );
+def SaferCloseLevel(hLevelHandle):
+    _SaferCloseLevel = windll.advapi32.SaferCloseLevel
+    _SaferCloseLevel.argtypes = [SAFER_LEVEL_HANDLE]
+    _SaferCloseLevel.restype  = BOOL
+    _SaferCloseLevel.errcheck = RaiseIfZero
+
+    if hasattr(hLevelHandle, 'value'):
+        _SaferCloseLevel(hLevelHandle.value)
+    else:
+        _SaferCloseLevel(hLevelHandle)
+
+# BOOL SaferiIsExecutableFileType(
+#   __in  LPCWSTR szFullPath,
+#   __in  BOOLEAN bFromShellExecute
+# );
+def SaferiIsExecutableFileType(szFullPath, bFromShellExecute = False):
+    _SaferiIsExecutableFileType = windll.advapi32.SaferiIsExecutableFileType
+    _SaferiIsExecutableFileType.argtypes = [LPWSTR, BOOLEAN]
+    _SaferiIsExecutableFileType.restype  = BOOL
+    _SaferiIsExecutableFileType.errcheck = RaiseIfLastError
+
+    SetLastError(ERROR_SUCCESS)
+    return bool(_SaferiIsExecutableFileType(unicode(szFullPath), bFromShellExecute))
+
+# useful alias since I'm likely to misspell it :P
+SaferIsExecutableFileType = SaferiIsExecutableFileType
+
+#------------------------------------------------------------------------------
+
+# LONG WINAPI RegCloseKey(
+#   __in  HKEY hKey
+# );
+def RegCloseKey(hKey):
+    if hasattr(hKey, 'value'):
+        value = hKey.value
+    else:
+        value = hKey
+
+    if value in (
+            HKEY_CLASSES_ROOT,
+            HKEY_CURRENT_USER,
+            HKEY_LOCAL_MACHINE,
+            HKEY_USERS,
+            HKEY_PERFORMANCE_DATA,
+            HKEY_CURRENT_CONFIG
+        ):
+        return
+
+    _RegCloseKey = windll.advapi32.RegCloseKey
+    _RegCloseKey.argtypes = [HKEY]
+    _RegCloseKey.restype  = LONG
+    _RegCloseKey.errcheck = RaiseIfNotErrorSuccess
+    _RegCloseKey(hKey)
+
+# LONG WINAPI RegConnectRegistry(
+#   __in_opt  LPCTSTR lpMachineName,
+#   __in      HKEY hKey,
+#   __out     PHKEY phkResult
+# );
+def RegConnectRegistryA(lpMachineName = None, hKey = HKEY_LOCAL_MACHINE):
+    _RegConnectRegistryA = windll.advapi32.RegConnectRegistryA
+    _RegConnectRegistryA.argtypes = [LPSTR, HKEY, PHKEY]
+    _RegConnectRegistryA.restype  = LONG
+    _RegConnectRegistryA.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegConnectRegistryA(lpMachineName, hKey, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+def RegConnectRegistryW(lpMachineName = None, hKey = HKEY_LOCAL_MACHINE):
+    _RegConnectRegistryW = windll.advapi32.RegConnectRegistryW
+    _RegConnectRegistryW.argtypes = [LPWSTR, HKEY, PHKEY]
+    _RegConnectRegistryW.restype  = LONG
+    _RegConnectRegistryW.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegConnectRegistryW(lpMachineName, hKey, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+RegConnectRegistry = GuessStringType(RegConnectRegistryA, RegConnectRegistryW)
+
+# LONG WINAPI RegCreateKey(
+#   __in      HKEY hKey,
+#   __in_opt  LPCTSTR lpSubKey,
+#   __out     PHKEY phkResult
+# );
+def RegCreateKeyA(hKey = HKEY_LOCAL_MACHINE, lpSubKey = None):
+    _RegCreateKeyA = windll.advapi32.RegCreateKeyA
+    _RegCreateKeyA.argtypes = [HKEY, LPSTR, PHKEY]
+    _RegCreateKeyA.restype  = LONG
+    _RegCreateKeyA.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegCreateKeyA(hKey, lpSubKey, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+def RegCreateKeyW(hKey = HKEY_LOCAL_MACHINE, lpSubKey = None):
+    _RegCreateKeyW = windll.advapi32.RegCreateKeyW
+    _RegCreateKeyW.argtypes = [HKEY, LPWSTR, PHKEY]
+    _RegCreateKeyW.restype  = LONG
+    _RegCreateKeyW.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegCreateKeyW(hKey, lpSubKey, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+RegCreateKey = GuessStringType(RegCreateKeyA, RegCreateKeyW)
+
+# LONG WINAPI RegCreateKeyEx(
+#   __in        HKEY hKey,
+#   __in        LPCTSTR lpSubKey,
+#   __reserved  DWORD Reserved,
+#   __in_opt    LPTSTR lpClass,
+#   __in        DWORD dwOptions,
+#   __in        REGSAM samDesired,
+#   __in_opt    LPSECURITY_ATTRIBUTES lpSecurityAttributes,
+#   __out       PHKEY phkResult,
+#   __out_opt   LPDWORD lpdwDisposition
+# );
+
+# XXX TODO
+
+# LONG WINAPI RegOpenKey(
+#   __in      HKEY hKey,
+#   __in_opt  LPCTSTR lpSubKey,
+#   __out     PHKEY phkResult
+# );
+def RegOpenKeyA(hKey = HKEY_LOCAL_MACHINE, lpSubKey = None):
+    _RegOpenKeyA = windll.advapi32.RegOpenKeyA
+    _RegOpenKeyA.argtypes = [HKEY, LPSTR, PHKEY]
+    _RegOpenKeyA.restype  = LONG
+    _RegOpenKeyA.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegOpenKeyA(hKey, lpSubKey, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+def RegOpenKeyW(hKey = HKEY_LOCAL_MACHINE, lpSubKey = None):
+    _RegOpenKeyW = windll.advapi32.RegOpenKeyW
+    _RegOpenKeyW.argtypes = [HKEY, LPWSTR, PHKEY]
+    _RegOpenKeyW.restype  = LONG
+    _RegOpenKeyW.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegOpenKeyW(hKey, lpSubKey, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+RegOpenKey = GuessStringType(RegOpenKeyA, RegOpenKeyW)
+
+# LONG WINAPI RegOpenKeyEx(
+#   __in        HKEY hKey,
+#   __in_opt    LPCTSTR lpSubKey,
+#   __reserved  DWORD ulOptions,
+#   __in        REGSAM samDesired,
+#   __out       PHKEY phkResult
+# );
+def RegOpenKeyExA(hKey = HKEY_LOCAL_MACHINE, lpSubKey = None, samDesired = KEY_ALL_ACCESS):
+    _RegOpenKeyExA = windll.advapi32.RegOpenKeyExA
+    _RegOpenKeyExA.argtypes = [HKEY, LPSTR, DWORD, REGSAM, PHKEY]
+    _RegOpenKeyExA.restype  = LONG
+    _RegOpenKeyExA.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegOpenKeyExA(hKey, lpSubKey, 0, samDesired, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+def RegOpenKeyExW(hKey = HKEY_LOCAL_MACHINE, lpSubKey = None, samDesired = KEY_ALL_ACCESS):
+    _RegOpenKeyExW = windll.advapi32.RegOpenKeyExW
+    _RegOpenKeyExW.argtypes = [HKEY, LPWSTR, DWORD, REGSAM, PHKEY]
+    _RegOpenKeyExW.restype  = LONG
+    _RegOpenKeyExW.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegOpenKeyExW(hKey, lpSubKey, 0, samDesired, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+RegOpenKeyEx = GuessStringType(RegOpenKeyExA, RegOpenKeyExW)
+
+# LONG WINAPI RegOpenCurrentUser(
+#   __in   REGSAM samDesired,
+#   __out  PHKEY phkResult
+# );
+def RegOpenCurrentUser(samDesired = KEY_ALL_ACCESS):
+    _RegOpenCurrentUser = windll.advapi32.RegOpenCurrentUser
+    _RegOpenCurrentUser.argtypes = [REGSAM, PHKEY]
+    _RegOpenCurrentUser.restype  = LONG
+    _RegOpenCurrentUser.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegOpenCurrentUser(samDesired, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+# LONG WINAPI RegOpenUserClassesRoot(
+#   __in        HANDLE hToken,
+#   __reserved  DWORD dwOptions,
+#   __in        REGSAM samDesired,
+#   __out       PHKEY phkResult
+# );
+def RegOpenUserClassesRoot(hToken, samDesired = KEY_ALL_ACCESS):
+    _RegOpenUserClassesRoot = windll.advapi32.RegOpenUserClassesRoot
+    _RegOpenUserClassesRoot.argtypes = [HANDLE, DWORD, REGSAM, PHKEY]
+    _RegOpenUserClassesRoot.restype  = LONG
+    _RegOpenUserClassesRoot.errcheck = RaiseIfNotErrorSuccess
+
+    hkResult = HKEY(INVALID_HANDLE_VALUE)
+    _RegOpenUserClassesRoot(hToken, 0, samDesired, byref(hkResult))
+    return RegistryKeyHandle(hkResult.value)
+
+# LONG WINAPI RegQueryValue(
+#   __in         HKEY hKey,
+#   __in_opt     LPCTSTR lpSubKey,
+#   __out_opt    LPTSTR lpValue,
+#   __inout_opt  PLONG lpcbValue
+# );
+def RegQueryValueA(hKey, lpSubKey = None):
+    _RegQueryValueA = windll.advapi32.RegQueryValueA
+    _RegQueryValueA.argtypes = [HKEY, LPSTR, LPVOID, PLONG]
+    _RegQueryValueA.restype  = LONG
+    _RegQueryValueA.errcheck = RaiseIfNotErrorSuccess
+
+    cbValue = LONG(0)
+    _RegQueryValueA(hKey, lpSubKey, None, byref(cbValue))
+    lpValue = ctypes.create_string_buffer(cbValue.value)
+    _RegQueryValueA(hKey, lpSubKey, lpValue, byref(cbValue))
+    return lpValue.value
+
+def RegQueryValueW(hKey, lpSubKey = None):
+    _RegQueryValueW = windll.advapi32.RegQueryValueW
+    _RegQueryValueW.argtypes = [HKEY, LPWSTR, LPVOID, PLONG]
+    _RegQueryValueW.restype  = LONG
+    _RegQueryValueW.errcheck = RaiseIfNotErrorSuccess
+
+    cbValue = LONG(0)
+    _RegQueryValueW(hKey, lpSubKey, None, byref(cbValue))
+    lpValue = ctypes.create_unicode_buffer(cbValue.value * sizeof(WCHAR))
+    _RegQueryValueW(hKey, lpSubKey, lpValue, byref(cbValue))
+    return lpValue.value
+
+RegQueryValue = GuessStringType(RegQueryValueA, RegQueryValueW)
+
+# LONG WINAPI RegQueryValueEx(
+#   __in         HKEY hKey,
+#   __in_opt     LPCTSTR lpValueName,
+#   __reserved   LPDWORD lpReserved,
+#   __out_opt    LPDWORD lpType,
+#   __out_opt    LPBYTE lpData,
+#   __inout_opt  LPDWORD lpcbData
+# );
+def _internal_RegQueryValueEx(ansi, hKey, lpValueName = None, bGetData = True):
+    _RegQueryValueEx = _caller_RegQueryValueEx(ansi)
+
+    cbData = DWORD(0)
+    dwType = DWORD(-1)
+    _RegQueryValueEx(hKey, lpValueName, None, byref(dwType), None, byref(cbData))
+    Type = dwType.value
+
+    if not bGetData:
+        return cbData.value, Type
+
+    if Type in (REG_DWORD, REG_DWORD_BIG_ENDIAN):   # REG_DWORD_LITTLE_ENDIAN
+        if cbData.value != 4:
+            raise ValueError("REG_DWORD value of size %d" % cbData.value)
+        dwData = DWORD(0)
+        _RegQueryValueEx(hKey, lpValueName, None, None, byref(dwData), byref(cbData))
+        return dwData.value, Type
+
+    if Type == REG_QWORD:   # REG_QWORD_LITTLE_ENDIAN
+        if cbData.value != 8:
+            raise ValueError("REG_QWORD value of size %d" % cbData.value)
+        qwData = QWORD(0)
+        _RegQueryValueEx(hKey, lpValueName, None, None, byref(qwData), byref(cbData))
+        return qwData.value, Type
+
+    if Type in (REG_SZ, REG_EXPAND_SZ):
+        if ansi:
+            szData = ctypes.create_string_buffer(cbData.value)
+        else:
+            szData = ctypes.create_unicode_buffer(cbData.value)
+        _RegQueryValueEx(hKey, lpValueName, None, None, byref(szData), byref(cbData))
+        return szData.value, Type
+
+    if Type == REG_MULTI_SZ:
+        if ansi:
+            szData = ctypes.create_string_buffer(cbData.value)
+        else:
+            szData = ctypes.create_unicode_buffer(cbData.value)
+        _RegQueryValueEx(hKey, lpValueName, None, None, byref(szData), byref(cbData))
+        Data = szData[:]
+        if ansi:
+            aData = Data.split('\0')
+        else:
+            aData = Data.split(u'\0')
+        aData = [token for token in aData if token]
+        return aData, Type
+
+    if Type == REG_LINK:
+        szData = ctypes.create_unicode_buffer(cbData.value)
+        _RegQueryValueEx(hKey, lpValueName, None, None, byref(szData), byref(cbData))
+        return szData.value, Type
+
+    # REG_BINARY, REG_NONE, and any future types
+    szData = ctypes.create_string_buffer(cbData.value)
+    _RegQueryValueEx(hKey, lpValueName, None, None, byref(szData), byref(cbData))
+    return szData.raw, Type
+
+def _caller_RegQueryValueEx(ansi):
+    if ansi:
+        _RegQueryValueEx = windll.advapi32.RegQueryValueExA
+        _RegQueryValueEx.argtypes = [HKEY, LPSTR, LPVOID, PDWORD, LPVOID, PDWORD]
+    else:
+        _RegQueryValueEx = windll.advapi32.RegQueryValueExW
+        _RegQueryValueEx.argtypes = [HKEY, LPWSTR, LPVOID, PDWORD, LPVOID, PDWORD]
+    _RegQueryValueEx.restype  = LONG
+    _RegQueryValueEx.errcheck = RaiseIfNotErrorSuccess
+    return _RegQueryValueEx
+
+# see _internal_RegQueryValueEx
+def RegQueryValueExA(hKey, lpValueName = None, bGetData = True):
+    return _internal_RegQueryValueEx(True, hKey, lpValueName, bGetData)
+
+# see _internal_RegQueryValueEx
+def RegQueryValueExW(hKey, lpValueName = None, bGetData = True):
+    return _internal_RegQueryValueEx(False, hKey, lpValueName, bGetData)
+
+RegQueryValueEx = GuessStringType(RegQueryValueExA, RegQueryValueExW)
+
+# LONG WINAPI RegSetValueEx(
+#   __in        HKEY hKey,
+#   __in_opt    LPCTSTR lpValueName,
+#   __reserved  DWORD Reserved,
+#   __in        DWORD dwType,
+#   __in_opt    const BYTE *lpData,
+#   __in        DWORD cbData
+# );
+def RegSetValueEx(hKey, lpValueName = None, lpData = None, dwType = None):
+
+    # Determine which version of the API to use, ANSI or Widechar.
+    if lpValueName is None:
+        if isinstance(lpData, GuessStringType.t_ansi):
+            ansi = True
+        elif isinstance(lpData, GuessStringType.t_unicode):
+            ansi = False
+        else:
+            ansi = (GuessStringType.t_ansi == GuessStringType.t_default)
+    elif isinstance(lpValueName, GuessStringType.t_ansi):
+        ansi = True
+    elif isinstance(lpValueName, GuessStringType.t_unicode):
+        ansi = False
+    else:
+        raise TypeError("String expected, got %s instead" % type(lpValueName))
+
+    # Autodetect the type when not given.
+    # TODO: improve detection of DWORD and QWORD by seeing if the value "fits".
+    if dwType is None:
+        if lpValueName is None:
+            dwType = REG_SZ
+        elif lpData is None:
+            dwType = REG_NONE
+        elif isinstance(lpData, GuessStringType.t_ansi):
+            dwType = REG_SZ
+        elif isinstance(lpData, GuessStringType.t_unicode):
+            dwType = REG_SZ
+        elif isinstance(lpData, int):
+            dwType = REG_DWORD
+        elif isinstance(lpData, int):
+            dwType = REG_QWORD
+        else:
+            dwType = REG_BINARY
+
+    # Load the ctypes caller.
+    if ansi:
+        _RegSetValueEx = windll.advapi32.RegSetValueExA
+        _RegSetValueEx.argtypes = [HKEY, LPSTR, DWORD, DWORD, LPVOID, DWORD]
+    else:
+        _RegSetValueEx = windll.advapi32.RegSetValueExW
+        _RegSetValueEx.argtypes = [HKEY, LPWSTR, DWORD, DWORD, LPVOID, DWORD]
+    _RegSetValueEx.restype  = LONG
+    _RegSetValueEx.errcheck = RaiseIfNotErrorSuccess
+
+    # Convert the arguments so ctypes can understand them.
+    if lpData is None:
+        DataRef  = None
+        DataSize = 0
+    else:
+        if dwType in (REG_DWORD, REG_DWORD_BIG_ENDIAN):  # REG_DWORD_LITTLE_ENDIAN
+            Data = DWORD(lpData)
+        elif dwType == REG_QWORD:   # REG_QWORD_LITTLE_ENDIAN
+            Data = QWORD(lpData)
+        elif dwType in (REG_SZ, REG_EXPAND_SZ):
+            if ansi:
+                Data = ctypes.create_string_buffer(lpData)
+            else:
+                Data = ctypes.create_unicode_buffer(lpData)
+        elif dwType == REG_MULTI_SZ:
+            if ansi:
+                Data = ctypes.create_string_buffer('\0'.join(lpData) + '\0\0')
+            else:
+                Data = ctypes.create_unicode_buffer(u'\0'.join(lpData) + u'\0\0')
+        elif dwType == REG_LINK:
+            Data = ctypes.create_unicode_buffer(lpData)
+        else:
+            Data = ctypes.create_string_buffer(lpData)
+        DataRef  = byref(Data)
+        DataSize = sizeof(Data)
+
+    # Call the API with the converted arguments.
+    _RegSetValueEx(hKey, lpValueName, 0, dwType, DataRef, DataSize)
+
+# No "GuessStringType" here since detection is done inside.
+RegSetValueExA = RegSetValueExW = RegSetValueEx
+
+# LONG WINAPI RegEnumKey(
+#   __in   HKEY hKey,
+#   __in   DWORD dwIndex,
+#   __out  LPTSTR lpName,
+#   __in   DWORD cchName
+# );
+def RegEnumKeyA(hKey, dwIndex):
+    _RegEnumKeyA = windll.advapi32.RegEnumKeyA
+    _RegEnumKeyA.argtypes = [HKEY, DWORD, LPSTR, DWORD]
+    _RegEnumKeyA.restype  = LONG
+
+    cchName = 1024
+    while True:
+        lpName = ctypes.create_string_buffer(cchName)
+        errcode = _RegEnumKeyA(hKey, dwIndex, lpName, cchName)
+        if errcode != ERROR_MORE_DATA:
+            break
+        cchName = cchName + 1024
+        if cchName > 65536:
+            raise ctypes.WinError(errcode)
+    if errcode == ERROR_NO_MORE_ITEMS:
+        return None
+    if errcode != ERROR_SUCCESS:
+        raise ctypes.WinError(errcode)
+    return lpName.value
+
+def RegEnumKeyW(hKey, dwIndex):
+    _RegEnumKeyW = windll.advapi32.RegEnumKeyW
+    _RegEnumKeyW.argtypes = [HKEY, DWORD, LPWSTR, DWORD]
+    _RegEnumKeyW.restype  = LONG
+
+    cchName = 512
+    while True:
+        lpName = ctypes.create_unicode_buffer(cchName)
+        errcode = _RegEnumKeyW(hKey, dwIndex, lpName, cchName * 2)
+        if errcode != ERROR_MORE_DATA:
+            break
+        cchName = cchName + 512
+        if cchName > 32768:
+            raise ctypes.WinError(errcode)
+    if errcode == ERROR_NO_MORE_ITEMS:
+        return None
+    if errcode != ERROR_SUCCESS:
+        raise ctypes.WinError(errcode)
+    return lpName.value
+
+RegEnumKey = DefaultStringType(RegEnumKeyA, RegEnumKeyW)
+
+# LONG WINAPI RegEnumKeyEx(
+#   __in         HKEY hKey,
+#   __in         DWORD dwIndex,
+#   __out        LPTSTR lpName,
+#   __inout      LPDWORD lpcName,
+#   __reserved   LPDWORD lpReserved,
+#   __inout      LPTSTR lpClass,
+#   __inout_opt  LPDWORD lpcClass,
+#   __out_opt    PFILETIME lpftLastWriteTime
+# );
+
+# XXX TODO
+
+# LONG WINAPI RegEnumValue(
+#   __in         HKEY hKey,
+#   __in         DWORD dwIndex,
+#   __out        LPTSTR lpValueName,
+#   __inout      LPDWORD lpcchValueName,
+#   __reserved   LPDWORD lpReserved,
+#   __out_opt    LPDWORD lpType,
+#   __out_opt    LPBYTE lpData,
+#   __inout_opt  LPDWORD lpcbData
+# );
+def _internal_RegEnumValue(ansi, hKey, dwIndex, bGetData = True):
+    if ansi:
+        _RegEnumValue = windll.advapi32.RegEnumValueA
+        _RegEnumValue.argtypes = [HKEY, DWORD, LPSTR, LPDWORD, LPVOID, LPDWORD, LPVOID, LPDWORD]
+    else:
+        _RegEnumValue = windll.advapi32.RegEnumValueW
+        _RegEnumValue.argtypes = [HKEY, DWORD, LPWSTR, LPDWORD, LPVOID, LPDWORD, LPVOID, LPDWORD]
+    _RegEnumValue.restype  = LONG
+
+    cchValueName = DWORD(1024)
+    dwType = DWORD(-1)
+    lpcchValueName = byref(cchValueName)
+    lpType = byref(dwType)
+    if ansi:
+        lpValueName = ctypes.create_string_buffer(cchValueName.value)
+    else:
+        lpValueName = ctypes.create_unicode_buffer(cchValueName.value)
+    if bGetData:
+        cbData = DWORD(0)
+        lpcbData = byref(cbData)
+    else:
+        lpcbData = None
+    lpData = None
+    errcode = _RegEnumValue(hKey, dwIndex, lpValueName, lpcchValueName, None, lpType, lpData, lpcbData)
+
+    if errcode == ERROR_MORE_DATA or (bGetData and errcode == ERROR_SUCCESS):
+        if ansi:
+            cchValueName.value = cchValueName.value + sizeof(CHAR)
+            lpValueName = ctypes.create_string_buffer(cchValueName.value)
+        else:
+            cchValueName.value = cchValueName.value + sizeof(WCHAR)
+            lpValueName = ctypes.create_unicode_buffer(cchValueName.value)
+
+        if bGetData:
+            Type = dwType.value
+
+            if Type in (REG_DWORD, REG_DWORD_BIG_ENDIAN):   # REG_DWORD_LITTLE_ENDIAN
+                if cbData.value != sizeof(DWORD):
+                    raise ValueError("REG_DWORD value of size %d" % cbData.value)
+                Data = DWORD(0)
+
+            elif Type == REG_QWORD:   # REG_QWORD_LITTLE_ENDIAN
+                if cbData.value != sizeof(QWORD):
+                    raise ValueError("REG_QWORD value of size %d" % cbData.value)
+                Data = QWORD(0)
+
+            elif Type in (REG_SZ, REG_EXPAND_SZ, REG_MULTI_SZ):
+                if ansi:
+                    Data = ctypes.create_string_buffer(cbData.value)
+                else:
+                    Data = ctypes.create_unicode_buffer(cbData.value)
+
+            elif Type == REG_LINK:
+                Data = ctypes.create_unicode_buffer(cbData.value)
+
+            else:       # REG_BINARY, REG_NONE, and any future types
+                Data = ctypes.create_string_buffer(cbData.value)
+
+            lpData = byref(Data)
+
+        errcode = _RegEnumValue(hKey, dwIndex, lpValueName, lpcchValueName, None, lpType, lpData, lpcbData)
+
+    if errcode == ERROR_NO_MORE_ITEMS:
+        return None
+    #if errcode  != ERROR_SUCCESS:
+    #    raise ctypes.WinError(errcode)
+
+    if not bGetData:
+        return lpValueName.value, dwType.value
+
+    if Type in (REG_DWORD, REG_DWORD_BIG_ENDIAN, REG_QWORD, REG_SZ, REG_EXPAND_SZ, REG_LINK): # REG_DWORD_LITTLE_ENDIAN, REG_QWORD_LITTLE_ENDIAN
+        return lpValueName.value, dwType.value, Data.value
+
+    if Type == REG_MULTI_SZ:
+        sData = Data[:]
+        del Data
+        if ansi:
+            aData = sData.split('\0')
+        else:
+            aData = sData.split(u'\0')
+        aData = [token for token in aData if token]
+        return lpValueName.value, dwType.value, aData
+
+    # REG_BINARY, REG_NONE, and any future types
+    return lpValueName.value, dwType.value, Data.raw
+
+def RegEnumValueA(hKey, dwIndex, bGetData = True):
+    return _internal_RegEnumValue(True, hKey, dwIndex, bGetData)
+
+def RegEnumValueW(hKey, dwIndex, bGetData = True):
+    return _internal_RegEnumValue(False, hKey, dwIndex, bGetData)
+
+RegEnumValue = DefaultStringType(RegEnumValueA, RegEnumValueW)
+
+# XXX TODO
+
+# LONG WINAPI RegSetKeyValue(
+#   __in      HKEY hKey,
+#   __in_opt  LPCTSTR lpSubKey,
+#   __in_opt  LPCTSTR lpValueName,
+#   __in      DWORD dwType,
+#   __in_opt  LPCVOID lpData,
+#   __in      DWORD cbData
+# );
+
+# XXX TODO
+
+# LONG WINAPI RegQueryMultipleValues(
+#   __in         HKEY hKey,
+#   __out        PVALENT val_list,
+#   __in         DWORD num_vals,
+#   __out_opt    LPTSTR lpValueBuf,
+#   __inout_opt  LPDWORD ldwTotsize
+# );
+
+# XXX TODO
+
+# LONG WINAPI RegDeleteValue(
+#   __in      HKEY hKey,
+#   __in_opt  LPCTSTR lpValueName
+# );
+def RegDeleteValueA(hKeySrc, lpValueName = None):
+    _RegDeleteValueA = windll.advapi32.RegDeleteValueA
+    _RegDeleteValueA.argtypes = [HKEY, LPSTR]
+    _RegDeleteValueA.restype  = LONG
+    _RegDeleteValueA.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteValueA(hKeySrc, lpValueName)
+def RegDeleteValueW(hKeySrc, lpValueName = None):
+    _RegDeleteValueW = windll.advapi32.RegDeleteValueW
+    _RegDeleteValueW.argtypes = [HKEY, LPWSTR]
+    _RegDeleteValueW.restype  = LONG
+    _RegDeleteValueW.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteValueW(hKeySrc, lpValueName)
+RegDeleteValue = GuessStringType(RegDeleteValueA, RegDeleteValueW)
+
+# LONG WINAPI RegDeleteKeyValue(
+#   __in      HKEY hKey,
+#   __in_opt  LPCTSTR lpSubKey,
+#   __in_opt  LPCTSTR lpValueName
+# );
+def RegDeleteKeyValueA(hKeySrc, lpSubKey = None, lpValueName = None):
+    _RegDeleteKeyValueA = windll.advapi32.RegDeleteKeyValueA
+    _RegDeleteKeyValueA.argtypes = [HKEY, LPSTR, LPSTR]
+    _RegDeleteKeyValueA.restype  = LONG
+    _RegDeleteKeyValueA.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteKeyValueA(hKeySrc, lpSubKey, lpValueName)
+def RegDeleteKeyValueW(hKeySrc, lpSubKey = None, lpValueName = None):
+    _RegDeleteKeyValueW = windll.advapi32.RegDeleteKeyValueW
+    _RegDeleteKeyValueW.argtypes = [HKEY, LPWSTR, LPWSTR]
+    _RegDeleteKeyValueW.restype  = LONG
+    _RegDeleteKeyValueW.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteKeyValueW(hKeySrc, lpSubKey, lpValueName)
+RegDeleteKeyValue = GuessStringType(RegDeleteKeyValueA, RegDeleteKeyValueW)
+
+# LONG WINAPI RegDeleteKey(
+#   __in  HKEY hKey,
+#   __in  LPCTSTR lpSubKey
+# );
+def RegDeleteKeyA(hKeySrc, lpSubKey = None):
+    _RegDeleteKeyA = windll.advapi32.RegDeleteKeyA
+    _RegDeleteKeyA.argtypes = [HKEY, LPSTR]
+    _RegDeleteKeyA.restype  = LONG
+    _RegDeleteKeyA.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteKeyA(hKeySrc, lpSubKey)
+def RegDeleteKeyW(hKeySrc, lpSubKey = None):
+    _RegDeleteKeyW = windll.advapi32.RegDeleteKeyW
+    _RegDeleteKeyW.argtypes = [HKEY, LPWSTR]
+    _RegDeleteKeyW.restype  = LONG
+    _RegDeleteKeyW.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteKeyW(hKeySrc, lpSubKey)
+RegDeleteKey = GuessStringType(RegDeleteKeyA, RegDeleteKeyW)
+
+# LONG WINAPI RegDeleteKeyEx(
+#   __in        HKEY hKey,
+#   __in        LPCTSTR lpSubKey,
+#   __in        REGSAM samDesired,
+#   __reserved  DWORD Reserved
+# );
+
+def RegDeleteKeyExA(hKeySrc, lpSubKey = None, samDesired = KEY_WOW64_32KEY):
+    _RegDeleteKeyExA = windll.advapi32.RegDeleteKeyExA
+    _RegDeleteKeyExA.argtypes = [HKEY, LPSTR, REGSAM, DWORD]
+    _RegDeleteKeyExA.restype  = LONG
+    _RegDeleteKeyExA.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteKeyExA(hKeySrc, lpSubKey, samDesired, 0)
+def RegDeleteKeyExW(hKeySrc, lpSubKey = None, samDesired = KEY_WOW64_32KEY):
+    _RegDeleteKeyExW = windll.advapi32.RegDeleteKeyExW
+    _RegDeleteKeyExW.argtypes = [HKEY, LPWSTR, REGSAM, DWORD]
+    _RegDeleteKeyExW.restype  = LONG
+    _RegDeleteKeyExW.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteKeyExW(hKeySrc, lpSubKey, samDesired, 0)
+RegDeleteKeyEx = GuessStringType(RegDeleteKeyExA, RegDeleteKeyExW)
+
+# LONG WINAPI RegCopyTree(
+#   __in      HKEY hKeySrc,
+#   __in_opt  LPCTSTR lpSubKey,
+#   __in      HKEY hKeyDest
+# );
+def RegCopyTreeA(hKeySrc, lpSubKey, hKeyDest):
+    _RegCopyTreeA = windll.advapi32.RegCopyTreeA
+    _RegCopyTreeA.argtypes = [HKEY, LPSTR, HKEY]
+    _RegCopyTreeA.restype  = LONG
+    _RegCopyTreeA.errcheck = RaiseIfNotErrorSuccess
+    _RegCopyTreeA(hKeySrc, lpSubKey, hKeyDest)
+def RegCopyTreeW(hKeySrc, lpSubKey, hKeyDest):
+    _RegCopyTreeW = windll.advapi32.RegCopyTreeW
+    _RegCopyTreeW.argtypes = [HKEY, LPWSTR, HKEY]
+    _RegCopyTreeW.restype  = LONG
+    _RegCopyTreeW.errcheck = RaiseIfNotErrorSuccess
+    _RegCopyTreeW(hKeySrc, lpSubKey, hKeyDest)
+RegCopyTree = GuessStringType(RegCopyTreeA, RegCopyTreeW)
+
+# LONG WINAPI RegDeleteTree(
+#   __in      HKEY hKey,
+#   __in_opt  LPCTSTR lpSubKey
+# );
+def RegDeleteTreeA(hKey, lpSubKey = None):
+    _RegDeleteTreeA = windll.advapi32.RegDeleteTreeA
+    _RegDeleteTreeA.argtypes = [HKEY, LPWSTR]
+    _RegDeleteTreeA.restype  = LONG
+    _RegDeleteTreeA.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteTreeA(hKey, lpSubKey)
+def RegDeleteTreeW(hKey, lpSubKey = None):
+    _RegDeleteTreeW = windll.advapi32.RegDeleteTreeW
+    _RegDeleteTreeW.argtypes = [HKEY, LPWSTR]
+    _RegDeleteTreeW.restype  = LONG
+    _RegDeleteTreeW.errcheck = RaiseIfNotErrorSuccess
+    _RegDeleteTreeW(hKey, lpSubKey)
+RegDeleteTree = GuessStringType(RegDeleteTreeA, RegDeleteTreeW)
+
+# LONG WINAPI RegFlushKey(
+#   __in  HKEY hKey
+# );
+def RegFlushKey(hKey):
+    _RegFlushKey = windll.advapi32.RegFlushKey
+    _RegFlushKey.argtypes = [HKEY]
+    _RegFlushKey.restype  = LONG
+    _RegFlushKey.errcheck = RaiseIfNotErrorSuccess
+    _RegFlushKey(hKey)
+
+# LONG WINAPI RegLoadMUIString(
+#   _In_       HKEY hKey,
+#   _In_opt_   LPCTSTR pszValue,
+#   _Out_opt_  LPTSTR pszOutBuf,
+#   _In_       DWORD cbOutBuf,
+#   _Out_opt_  LPDWORD pcbData,
+#   _In_       DWORD Flags,
+#   _In_opt_   LPCTSTR pszDirectory
+# );
+
+# TO DO
+
+#------------------------------------------------------------------------------
+
+# BOOL WINAPI CloseServiceHandle(
+#   _In_  SC_HANDLE hSCObject
+# );
+def CloseServiceHandle(hSCObject):
+    _CloseServiceHandle = windll.advapi32.CloseServiceHandle
+    _CloseServiceHandle.argtypes = [SC_HANDLE]
+    _CloseServiceHandle.restype  = bool
+    _CloseServiceHandle.errcheck = RaiseIfZero
+
+    if isinstance(hSCObject, Handle):
+        # Prevents the handle from being closed without notifying the Handle object.
+        hSCObject.close()
+    else:
+        _CloseServiceHandle(hSCObject)
+
+# SC_HANDLE WINAPI OpenSCManager(
+#   _In_opt_  LPCTSTR lpMachineName,
+#   _In_opt_  LPCTSTR lpDatabaseName,
+#   _In_      DWORD dwDesiredAccess
+# );
+def OpenSCManagerA(lpMachineName = None, lpDatabaseName = None, dwDesiredAccess = SC_MANAGER_ALL_ACCESS):
+    _OpenSCManagerA = windll.advapi32.OpenSCManagerA
+    _OpenSCManagerA.argtypes = [LPSTR, LPSTR, DWORD]
+    _OpenSCManagerA.restype  = SC_HANDLE
+    _OpenSCManagerA.errcheck = RaiseIfZero
+
+    hSCObject = _OpenSCManagerA(lpMachineName, lpDatabaseName, dwDesiredAccess)
+    return ServiceControlManagerHandle(hSCObject)
+
+def OpenSCManagerW(lpMachineName = None, lpDatabaseName = None, dwDesiredAccess = SC_MANAGER_ALL_ACCESS):
+    _OpenSCManagerW = windll.advapi32.OpenSCManagerW
+    _OpenSCManagerW.argtypes = [LPWSTR, LPWSTR, DWORD]
+    _OpenSCManagerW.restype  = SC_HANDLE
+    _OpenSCManagerW.errcheck = RaiseIfZero
+
+    hSCObject = _OpenSCManagerA(lpMachineName, lpDatabaseName, dwDesiredAccess)
+    return ServiceControlManagerHandle(hSCObject)
+
+OpenSCManager = GuessStringType(OpenSCManagerA, OpenSCManagerW)
+
+# SC_HANDLE WINAPI OpenService(
+#   _In_  SC_HANDLE hSCManager,
+#   _In_  LPCTSTR lpServiceName,
+#   _In_  DWORD dwDesiredAccess
+# );
+def OpenServiceA(hSCManager, lpServiceName, dwDesiredAccess = SERVICE_ALL_ACCESS):
+    _OpenServiceA = windll.advapi32.OpenServiceA
+    _OpenServiceA.argtypes = [SC_HANDLE, LPSTR, DWORD]
+    _OpenServiceA.restype  = SC_HANDLE
+    _OpenServiceA.errcheck = RaiseIfZero
+    return ServiceHandle( _OpenServiceA(hSCManager, lpServiceName, dwDesiredAccess) )
+
+def OpenServiceW(hSCManager, lpServiceName, dwDesiredAccess = SERVICE_ALL_ACCESS):
+    _OpenServiceW = windll.advapi32.OpenServiceW
+    _OpenServiceW.argtypes = [SC_HANDLE, LPWSTR, DWORD]
+    _OpenServiceW.restype  = SC_HANDLE
+    _OpenServiceW.errcheck = RaiseIfZero
+    return ServiceHandle( _OpenServiceW(hSCManager, lpServiceName, dwDesiredAccess) )
+
+OpenService = GuessStringType(OpenServiceA, OpenServiceW)
+
+# SC_HANDLE WINAPI CreateService(
+#   _In_       SC_HANDLE hSCManager,
+#   _In_       LPCTSTR lpServiceName,
+#   _In_opt_   LPCTSTR lpDisplayName,
+#   _In_       DWORD dwDesiredAccess,
+#   _In_       DWORD dwServiceType,
+#   _In_       DWORD dwStartType,
+#   _In_       DWORD dwErrorControl,
+#   _In_opt_   LPCTSTR lpBinaryPathName,
+#   _In_opt_   LPCTSTR lpLoadOrderGroup,
+#   _Out_opt_  LPDWORD lpdwTagId,
+#   _In_opt_   LPCTSTR lpDependencies,
+#   _In_opt_   LPCTSTR lpServiceStartName,
+#   _In_opt_   LPCTSTR lpPassword
+# );
+def CreateServiceA(hSCManager, lpServiceName,
+                   lpDisplayName = None,
+                   dwDesiredAccess = SERVICE_ALL_ACCESS,
+                   dwServiceType = SERVICE_WIN32_OWN_PROCESS,
+                   dwStartType = SERVICE_DEMAND_START,
+                   dwErrorControl = SERVICE_ERROR_NORMAL,
+                   lpBinaryPathName = None,
+                   lpLoadOrderGroup = None,
+                   lpDependencies = None,
+                   lpServiceStartName = None,
+                   lpPassword = None):
+
+    _CreateServiceA = windll.advapi32.CreateServiceA
+    _CreateServiceA.argtypes = [SC_HANDLE, LPSTR, LPSTR, DWORD, DWORD, DWORD, DWORD, LPSTR, LPSTR, LPDWORD, LPSTR, LPSTR, LPSTR]
+    _CreateServiceA.restype  = SC_HANDLE
+    _CreateServiceA.errcheck = RaiseIfZero
+
+    dwTagId = DWORD(0)
+    hService = _CreateServiceA(hSCManager, lpServiceName, dwDesiredAccess, dwServiceType, dwStartType, dwErrorControl, lpBinaryPathName, lpLoadOrderGroup, byref(dwTagId), lpDependencies, lpServiceStartName, lpPassword)
+    return ServiceHandle(hService), dwTagId.value
+
+def CreateServiceW(hSCManager, lpServiceName,
+                   lpDisplayName = None,
+                   dwDesiredAccess = SERVICE_ALL_ACCESS,
+                   dwServiceType = SERVICE_WIN32_OWN_PROCESS,
+                   dwStartType = SERVICE_DEMAND_START,
+                   dwErrorControl = SERVICE_ERROR_NORMAL,
+                   lpBinaryPathName = None,
+                   lpLoadOrderGroup = None,
+                   lpDependencies = None,
+                   lpServiceStartName = None,
+                   lpPassword = None):
+
+    _CreateServiceW = windll.advapi32.CreateServiceW
+    _CreateServiceW.argtypes = [SC_HANDLE, LPWSTR, LPWSTR, DWORD, DWORD, DWORD, DWORD, LPWSTR, LPWSTR, LPDWORD, LPWSTR, LPWSTR, LPWSTR]
+    _CreateServiceW.restype  = SC_HANDLE
+    _CreateServiceW.errcheck = RaiseIfZero
+
+    dwTagId = DWORD(0)
+    hService = _CreateServiceW(hSCManager, lpServiceName, dwDesiredAccess, dwServiceType, dwStartType, dwErrorControl, lpBinaryPathName, lpLoadOrderGroup, byref(dwTagId), lpDependencies, lpServiceStartName, lpPassword)
+    return ServiceHandle(hService), dwTagId.value
+
+CreateService = GuessStringType(CreateServiceA, CreateServiceW)
+
+# BOOL WINAPI DeleteService(
+#   _In_  SC_HANDLE hService
+# );
+def DeleteService(hService):
+    _DeleteService = windll.advapi32.DeleteService
+    _DeleteService.argtypes = [SC_HANDLE]
+    _DeleteService.restype  = bool
+    _DeleteService.errcheck = RaiseIfZero
+    _DeleteService(hService)
+
+# BOOL WINAPI GetServiceKeyName(
+#   _In_       SC_HANDLE hSCManager,
+#   _In_       LPCTSTR lpDisplayName,
+#   _Out_opt_  LPTSTR lpServiceName,
+#   _Inout_    LPDWORD lpcchBuffer
+# );
+def GetServiceKeyNameA(hSCManager, lpDisplayName):
+    _GetServiceKeyNameA = windll.advapi32.GetServiceKeyNameA
+    _GetServiceKeyNameA.argtypes = [SC_HANDLE, LPSTR, LPSTR, LPDWORD]
+    _GetServiceKeyNameA.restype  = bool
+
+    cchBuffer = DWORD(0)
+    _GetServiceKeyNameA(hSCManager, lpDisplayName, None, byref(cchBuffer))
+    if cchBuffer.value == 0:
+        raise ctypes.WinError()
+    lpServiceName = ctypes.create_string_buffer(cchBuffer.value + 1)
+    cchBuffer.value = sizeof(lpServiceName)
+    success = _GetServiceKeyNameA(hSCManager, lpDisplayName, lpServiceName, byref(cchBuffer))
+    if not success:
+        raise ctypes.WinError()
+    return lpServiceName.value
+
+def GetServiceKeyNameW(hSCManager, lpDisplayName):
+    _GetServiceKeyNameW = windll.advapi32.GetServiceKeyNameW
+    _GetServiceKeyNameW.argtypes = [SC_HANDLE, LPWSTR, LPWSTR, LPDWORD]
+    _GetServiceKeyNameW.restype  = bool
+
+    cchBuffer = DWORD(0)
+    _GetServiceKeyNameW(hSCManager, lpDisplayName, None, byref(cchBuffer))
+    if cchBuffer.value == 0:
+        raise ctypes.WinError()
+    lpServiceName = ctypes.create_unicode_buffer(cchBuffer.value + 2)
+    cchBuffer.value = sizeof(lpServiceName)
+    success = _GetServiceKeyNameW(hSCManager, lpDisplayName, lpServiceName, byref(cchBuffer))
+    if not success:
+        raise ctypes.WinError()
+    return lpServiceName.value
+
+GetServiceKeyName = GuessStringType(GetServiceKeyNameA, GetServiceKeyNameW)
+
+# BOOL WINAPI GetServiceDisplayName(
+#   _In_       SC_HANDLE hSCManager,
+#   _In_       LPCTSTR lpServiceName,
+#   _Out_opt_  LPTSTR lpDisplayName,
+#   _Inout_    LPDWORD lpcchBuffer
+# );
+def GetServiceDisplayNameA(hSCManager, lpServiceName):
+    _GetServiceDisplayNameA = windll.advapi32.GetServiceDisplayNameA
+    _GetServiceDisplayNameA.argtypes = [SC_HANDLE, LPSTR, LPSTR, LPDWORD]
+    _GetServiceDisplayNameA.restype  = bool
+
+    cchBuffer = DWORD(0)
+    _GetServiceDisplayNameA(hSCManager, lpServiceName, None, byref(cchBuffer))
+    if cchBuffer.value == 0:
+        raise ctypes.WinError()
+    lpDisplayName = ctypes.create_string_buffer(cchBuffer.value + 1)
+    cchBuffer.value = sizeof(lpDisplayName)
+    success = _GetServiceDisplayNameA(hSCManager, lpServiceName, lpDisplayName, byref(cchBuffer))
+    if not success:
+        raise ctypes.WinError()
+    return lpDisplayName.value
+
+def GetServiceDisplayNameW(hSCManager, lpServiceName):
+    _GetServiceDisplayNameW = windll.advapi32.GetServiceDisplayNameW
+    _GetServiceDisplayNameW.argtypes = [SC_HANDLE, LPWSTR, LPWSTR, LPDWORD]
+    _GetServiceDisplayNameW.restype  = bool
+
+    cchBuffer = DWORD(0)
+    _GetServiceDisplayNameW(hSCManager, lpServiceName, None, byref(cchBuffer))
+    if cchBuffer.value == 0:
+        raise ctypes.WinError()
+    lpDisplayName = ctypes.create_unicode_buffer(cchBuffer.value + 2)
+    cchBuffer.value = sizeof(lpDisplayName)
+    success = _GetServiceDisplayNameW(hSCManager, lpServiceName, lpDisplayName, byref(cchBuffer))
+    if not success:
+        raise ctypes.WinError()
+    return lpDisplayName.value
+
+GetServiceDisplayName = GuessStringType(GetServiceDisplayNameA, GetServiceDisplayNameW)
+
+# BOOL WINAPI QueryServiceConfig(
+#   _In_       SC_HANDLE hService,
+#   _Out_opt_  LPQUERY_SERVICE_CONFIG lpServiceConfig,
+#   _In_       DWORD cbBufSize,
+#   _Out_      LPDWORD pcbBytesNeeded
+# );
+
+# TO DO
+
+# BOOL WINAPI QueryServiceConfig2(
+#   _In_       SC_HANDLE hService,
+#   _In_       DWORD dwInfoLevel,
+#   _Out_opt_  LPBYTE lpBuffer,
+#   _In_       DWORD cbBufSize,
+#   _Out_      LPDWORD pcbBytesNeeded
+# );
+
+# TO DO
+
+# BOOL WINAPI ChangeServiceConfig(
+#   _In_       SC_HANDLE hService,
+#   _In_       DWORD dwServiceType,
+#   _In_       DWORD dwStartType,
+#   _In_       DWORD dwErrorControl,
+#   _In_opt_   LPCTSTR lpBinaryPathName,
+#   _In_opt_   LPCTSTR lpLoadOrderGroup,
+#   _Out_opt_  LPDWORD lpdwTagId,
+#   _In_opt_   LPCTSTR lpDependencies,
+#   _In_opt_   LPCTSTR lpServiceStartName,
+#   _In_opt_   LPCTSTR lpPassword,
+#   _In_opt_   LPCTSTR lpDisplayName
+# );
+
+# TO DO
+
+# BOOL WINAPI ChangeServiceConfig2(
+#   _In_      SC_HANDLE hService,
+#   _In_      DWORD dwInfoLevel,
+#   _In_opt_  LPVOID lpInfo
+# );
+
+# TO DO
+
+# BOOL WINAPI StartService(
+#   _In_      SC_HANDLE hService,
+#   _In_      DWORD dwNumServiceArgs,
+#   _In_opt_  LPCTSTR *lpServiceArgVectors
+# );
+def StartServiceA(hService, ServiceArgVectors = None):
+    _StartServiceA = windll.advapi32.StartServiceA
+    _StartServiceA.argtypes = [SC_HANDLE, DWORD, LPVOID]
+    _StartServiceA.restype  = bool
+    _StartServiceA.errcheck = RaiseIfZero
+
+    if ServiceArgVectors:
+        dwNumServiceArgs = len(ServiceArgVectors)
+        CServiceArgVectors = (LPSTR * dwNumServiceArgs)(*ServiceArgVectors)
+        lpServiceArgVectors = ctypes.pointer(CServiceArgVectors)
+    else:
+        dwNumServiceArgs = 0
+        lpServiceArgVectors = None
+    _StartServiceA(hService, dwNumServiceArgs, lpServiceArgVectors)
+
+def StartServiceW(hService, ServiceArgVectors = None):
+    _StartServiceW = windll.advapi32.StartServiceW
+    _StartServiceW.argtypes = [SC_HANDLE, DWORD, LPVOID]
+    _StartServiceW.restype  = bool
+    _StartServiceW.errcheck = RaiseIfZero
+
+    if ServiceArgVectors:
+        dwNumServiceArgs = len(ServiceArgVectors)
+        CServiceArgVectors = (LPWSTR * dwNumServiceArgs)(*ServiceArgVectors)
+        lpServiceArgVectors = ctypes.pointer(CServiceArgVectors)
+    else:
+        dwNumServiceArgs = 0
+        lpServiceArgVectors = None
+    _StartServiceW(hService, dwNumServiceArgs, lpServiceArgVectors)
+
+StartService = GuessStringType(StartServiceA, StartServiceW)
+
+# BOOL WINAPI ControlService(
+#   _In_   SC_HANDLE hService,
+#   _In_   DWORD dwControl,
+#   _Out_  LPSERVICE_STATUS lpServiceStatus
+# );
+def ControlService(hService, dwControl):
+    _ControlService = windll.advapi32.ControlService
+    _ControlService.argtypes = [SC_HANDLE, DWORD, LPSERVICE_STATUS]
+    _ControlService.restype  = bool
+    _ControlService.errcheck = RaiseIfZero
+
+    rawServiceStatus = SERVICE_STATUS()
+    _ControlService(hService, dwControl, byref(rawServiceStatus))
+    return ServiceStatus(rawServiceStatus)
+
+# BOOL WINAPI ControlServiceEx(
+#   _In_     SC_HANDLE hService,
+#   _In_     DWORD dwControl,
+#   _In_     DWORD dwInfoLevel,
+#   _Inout_  PVOID pControlParams
+# );
+
+# TO DO
+
+# DWORD WINAPI NotifyServiceStatusChange(
+#   _In_  SC_HANDLE hService,
+#   _In_  DWORD dwNotifyMask,
+#   _In_  PSERVICE_NOTIFY pNotifyBuffer
+# );
+
+# TO DO
+
+# BOOL WINAPI QueryServiceStatus(
+#   _In_   SC_HANDLE hService,
+#   _Out_  LPSERVICE_STATUS lpServiceStatus
+# );
+def QueryServiceStatus(hService):
+    _QueryServiceStatus = windll.advapi32.QueryServiceStatus
+    _QueryServiceStatus.argtypes = [SC_HANDLE, LPSERVICE_STATUS]
+    _QueryServiceStatus.restype  = bool
+    _QueryServiceStatus.errcheck = RaiseIfZero
+
+    rawServiceStatus = SERVICE_STATUS()
+    _QueryServiceStatus(hService, byref(rawServiceStatus))
+    return ServiceStatus(rawServiceStatus)
+
+# BOOL WINAPI QueryServiceStatusEx(
+#   _In_       SC_HANDLE hService,
+#   _In_       SC_STATUS_TYPE InfoLevel,
+#   _Out_opt_  LPBYTE lpBuffer,
+#   _In_       DWORD cbBufSize,
+#   _Out_      LPDWORD pcbBytesNeeded
+# );
+def QueryServiceStatusEx(hService, InfoLevel = SC_STATUS_PROCESS_INFO):
+
+    if InfoLevel != SC_STATUS_PROCESS_INFO:
+        raise NotImplementedError()
+
+    _QueryServiceStatusEx = windll.advapi32.QueryServiceStatusEx
+    _QueryServiceStatusEx.argtypes = [SC_HANDLE, SC_STATUS_TYPE, LPVOID, DWORD, LPDWORD]
+    _QueryServiceStatusEx.restype  = bool
+    _QueryServiceStatusEx.errcheck = RaiseIfZero
+
+    lpBuffer = SERVICE_STATUS_PROCESS()
+    cbBytesNeeded = DWORD(sizeof(lpBuffer))
+    _QueryServiceStatusEx(hService, InfoLevel, byref(lpBuffer), sizeof(lpBuffer), byref(cbBytesNeeded))
+    return ServiceStatusProcess(lpBuffer)
+
+# BOOL WINAPI EnumServicesStatus(
+#   _In_         SC_HANDLE hSCManager,
+#   _In_         DWORD dwServiceType,
+#   _In_         DWORD dwServiceState,
+#   _Out_opt_    LPENUM_SERVICE_STATUS lpServices,
+#   _In_         DWORD cbBufSize,
+#   _Out_        LPDWORD pcbBytesNeeded,
+#   _Out_        LPDWORD lpServicesReturned,
+#   _Inout_opt_  LPDWORD lpResumeHandle
+# );
+def EnumServicesStatusA(hSCManager, dwServiceType = SERVICE_DRIVER | SERVICE_WIN32, dwServiceState = SERVICE_STATE_ALL):
+    _EnumServicesStatusA = windll.advapi32.EnumServicesStatusA
+    _EnumServicesStatusA.argtypes = [SC_HANDLE, DWORD, DWORD, LPVOID, DWORD, LPDWORD, LPDWORD, LPDWORD]
+    _EnumServicesStatusA.restype  = bool
+
+    cbBytesNeeded    = DWORD(0)
+    ServicesReturned = DWORD(0)
+    ResumeHandle     = DWORD(0)
+
+    _EnumServicesStatusA(hSCManager, dwServiceType, dwServiceState, None, 0, byref(cbBytesNeeded), byref(ServicesReturned), byref(ResumeHandle))
+
+    Services = []
+    success = False
+    while GetLastError() == ERROR_MORE_DATA:
+        if cbBytesNeeded.value < sizeof(ENUM_SERVICE_STATUSA):
+            break
+        ServicesBuffer = ctypes.create_string_buffer("", cbBytesNeeded.value)
+        success = _EnumServicesStatusA(hSCManager, dwServiceType, dwServiceState, byref(ServicesBuffer), sizeof(ServicesBuffer), byref(cbBytesNeeded), byref(ServicesReturned), byref(ResumeHandle))
+        if sizeof(ServicesBuffer) < (sizeof(ENUM_SERVICE_STATUSA) * ServicesReturned.value):
+            raise ctypes.WinError()
+        lpServicesArray = ctypes.cast(ctypes.cast(ctypes.pointer(ServicesBuffer), ctypes.c_void_p), LPENUM_SERVICE_STATUSA)
+        for index in range(0, ServicesReturned.value):
+            Services.append( ServiceStatusEntry(lpServicesArray[index]) )
+        if success: break
+    if not success:
+        raise ctypes.WinError()
+
+    return Services
+
+def EnumServicesStatusW(hSCManager, dwServiceType = SERVICE_DRIVER | SERVICE_WIN32, dwServiceState = SERVICE_STATE_ALL):
+    _EnumServicesStatusW = windll.advapi32.EnumServicesStatusW
+    _EnumServicesStatusW.argtypes = [SC_HANDLE, DWORD, DWORD, LPVOID, DWORD, LPDWORD, LPDWORD, LPDWORD]
+    _EnumServicesStatusW.restype  = bool
+
+    cbBytesNeeded    = DWORD(0)
+    ServicesReturned = DWORD(0)
+    ResumeHandle     = DWORD(0)
+
+    _EnumServicesStatusW(hSCManager, dwServiceType, dwServiceState, None, 0, byref(cbBytesNeeded), byref(ServicesReturned), byref(ResumeHandle))
+
+    Services = []
+    success = False
+    while GetLastError() == ERROR_MORE_DATA:
+        if cbBytesNeeded.value < sizeof(ENUM_SERVICE_STATUSW):
+            break
+        ServicesBuffer = ctypes.create_string_buffer("", cbBytesNeeded.value)
+        success = _EnumServicesStatusW(hSCManager, dwServiceType, dwServiceState, byref(ServicesBuffer), sizeof(ServicesBuffer), byref(cbBytesNeeded), byref(ServicesReturned), byref(ResumeHandle))
+        if sizeof(ServicesBuffer) < (sizeof(ENUM_SERVICE_STATUSW) * ServicesReturned.value):
+            raise ctypes.WinError()
+        lpServicesArray = ctypes.cast(ctypes.cast(ctypes.pointer(ServicesBuffer), ctypes.c_void_p), LPENUM_SERVICE_STATUSW)
+        for index in range(0, ServicesReturned.value):
+            Services.append( ServiceStatusEntry(lpServicesArray[index]) )
+        if success: break
+    if not success:
+        raise ctypes.WinError()
+
+    return Services
+
+EnumServicesStatus = DefaultStringType(EnumServicesStatusA, EnumServicesStatusW)
+
+# BOOL WINAPI EnumServicesStatusEx(
+#   _In_         SC_HANDLE hSCManager,
+#   _In_         SC_ENUM_TYPE InfoLevel,
+#   _In_         DWORD dwServiceType,
+#   _In_         DWORD dwServiceState,
+#   _Out_opt_    LPBYTE lpServices,
+#   _In_         DWORD cbBufSize,
+#   _Out_        LPDWORD pcbBytesNeeded,
+#   _Out_        LPDWORD lpServicesReturned,
+#   _Inout_opt_  LPDWORD lpResumeHandle,
+#   _In_opt_     LPCTSTR pszGroupName
+# );
+def EnumServicesStatusExA(hSCManager, InfoLevel = SC_ENUM_PROCESS_INFO, dwServiceType = SERVICE_DRIVER | SERVICE_WIN32, dwServiceState = SERVICE_STATE_ALL, pszGroupName = None):
+
+    if InfoLevel != SC_ENUM_PROCESS_INFO:
+        raise NotImplementedError()
+
+    _EnumServicesStatusExA = windll.advapi32.EnumServicesStatusExA
+    _EnumServicesStatusExA.argtypes = [SC_HANDLE, SC_ENUM_TYPE, DWORD, DWORD, LPVOID, DWORD, LPDWORD, LPDWORD, LPDWORD, LPSTR]
+    _EnumServicesStatusExA.restype  = bool
+
+    cbBytesNeeded    = DWORD(0)
+    ServicesReturned = DWORD(0)
+    ResumeHandle     = DWORD(0)
+
+    _EnumServicesStatusExA(hSCManager, InfoLevel, dwServiceType, dwServiceState, None, 0, byref(cbBytesNeeded), byref(ServicesReturned), byref(ResumeHandle), pszGroupName)
+
+    Services = []
+    success = False
+    while GetLastError() == ERROR_MORE_DATA:
+        if cbBytesNeeded.value < sizeof(ENUM_SERVICE_STATUS_PROCESSA):
+            break
+        ServicesBuffer = ctypes.create_string_buffer("", cbBytesNeeded.value)
+        success = _EnumServicesStatusExA(hSCManager, InfoLevel, dwServiceType, dwServiceState, byref(ServicesBuffer), sizeof(ServicesBuffer), byref(cbBytesNeeded), byref(ServicesReturned), byref(ResumeHandle), pszGroupName)
+        if sizeof(ServicesBuffer) < (sizeof(ENUM_SERVICE_STATUS_PROCESSA) * ServicesReturned.value):
+            raise ctypes.WinError()
+        lpServicesArray = ctypes.cast(ctypes.cast(ctypes.pointer(ServicesBuffer), ctypes.c_void_p), LPENUM_SERVICE_STATUS_PROCESSA)
+        for index in range(0, ServicesReturned.value):
+            Services.append( ServiceStatusProcessEntry(lpServicesArray[index]) )
+        if success: break
+    if not success:
+        raise ctypes.WinError()
+
+    return Services
+
+def EnumServicesStatusExW(hSCManager, InfoLevel = SC_ENUM_PROCESS_INFO, dwServiceType = SERVICE_DRIVER | SERVICE_WIN32, dwServiceState = SERVICE_STATE_ALL, pszGroupName = None):
+    _EnumServicesStatusExW = windll.advapi32.EnumServicesStatusExW
+    _EnumServicesStatusExW.argtypes = [SC_HANDLE, SC_ENUM_TYPE, DWORD, DWORD, LPVOID, DWORD, LPDWORD, LPDWORD, LPDWORD, LPWSTR]
+    _EnumServicesStatusExW.restype  = bool
+
+    if InfoLevel != SC_ENUM_PROCESS_INFO:
+        raise NotImplementedError()
+
+    cbBytesNeeded    = DWORD(0)
+    ServicesReturned = DWORD(0)
+    ResumeHandle     = DWORD(0)
+
+    _EnumServicesStatusExW(hSCManager, InfoLevel, dwServiceType, dwServiceState, None, 0, byref(cbBytesNeeded), byref(ServicesReturned), byref(ResumeHandle), pszGroupName)
+
+    Services = []
+    success = False
+    while GetLastError() == ERROR_MORE_DATA:
+        if cbBytesNeeded.value < sizeof(ENUM_SERVICE_STATUS_PROCESSW):
+            break
+        ServicesBuffer = ctypes.create_string_buffer("", cbBytesNeeded.value)
+        success = _EnumServicesStatusExW(hSCManager, InfoLevel, dwServiceType, dwServiceState, byref(ServicesBuffer), sizeof(ServicesBuffer), byref(cbBytesNeeded), byref(ServicesReturned), byref(ResumeHandle), pszGroupName)
+        if sizeof(ServicesBuffer) < (sizeof(ENUM_SERVICE_STATUS_PROCESSW) * ServicesReturned.value):
+            raise ctypes.WinError()
+        lpServicesArray = ctypes.cast(ctypes.cast(ctypes.pointer(ServicesBuffer), ctypes.c_void_p), LPENUM_SERVICE_STATUS_PROCESSW)
+        for index in range(0, ServicesReturned.value):
+            Services.append( ServiceStatusProcessEntry(lpServicesArray[index]) )
+        if success: break
+    if not success:
+        raise ctypes.WinError()
+
+    return Services
+
+EnumServicesStatusEx = DefaultStringType(EnumServicesStatusExA, EnumServicesStatusExW)
+
+# BOOL WINAPI EnumDependentServices(
+#   _In_       SC_HANDLE hService,
+#   _In_       DWORD dwServiceState,
+#   _Out_opt_  LPENUM_SERVICE_STATUS lpServices,
+#   _In_       DWORD cbBufSize,
+#   _Out_      LPDWORD pcbBytesNeeded,
+#   _Out_      LPDWORD lpServicesReturned
+# );
+
+# TO DO
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/context_amd64.py b/scripts/win32/context_amd64.py
new file mode 100644
index 0000000..fb6b3ff
--- /dev/null
+++ b/scripts/win32/context_amd64.py
@@ -0,0 +1,762 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+CONTEXT structure for amd64.
+"""
+
+from .defines import *  # NOQA
+from .version import ARCH_AMD64
+from . import context_i386
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- CONTEXT structures and constants -----------------------------------------
+
+# The following values specify the type of access in the first parameter
+# of the exception record when the exception code specifies an access
+# violation.
+EXCEPTION_READ_FAULT        = 0     # exception caused by a read
+EXCEPTION_WRITE_FAULT       = 1     # exception caused by a write
+EXCEPTION_EXECUTE_FAULT     = 8     # exception caused by an instruction fetch
+
+CONTEXT_AMD64           = 0x00100000
+
+CONTEXT_CONTROL         = (CONTEXT_AMD64 | 0x1)
+CONTEXT_INTEGER         = (CONTEXT_AMD64 | 0x2)
+CONTEXT_SEGMENTS        = (CONTEXT_AMD64 | 0x4)
+CONTEXT_FLOATING_POINT  = (CONTEXT_AMD64 | 0x8)
+CONTEXT_DEBUG_REGISTERS = (CONTEXT_AMD64 | 0x10)
+
+CONTEXT_MMX_REGISTERS   = CONTEXT_FLOATING_POINT
+
+CONTEXT_FULL = (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT)
+
+CONTEXT_ALL = (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | \
+               CONTEXT_FLOATING_POINT | CONTEXT_DEBUG_REGISTERS)
+
+CONTEXT_EXCEPTION_ACTIVE    = 0x8000000
+CONTEXT_SERVICE_ACTIVE      = 0x10000000
+CONTEXT_EXCEPTION_REQUEST   = 0x40000000
+CONTEXT_EXCEPTION_REPORTING = 0x80000000
+
+INITIAL_MXCSR = 0x1f80            # initial MXCSR value
+INITIAL_FPCSR = 0x027f            # initial FPCSR value
+
+# typedef struct _XMM_SAVE_AREA32 {
+#     WORD   ControlWord;
+#     WORD   StatusWord;
+#     BYTE  TagWord;
+#     BYTE  Reserved1;
+#     WORD   ErrorOpcode;
+#     DWORD ErrorOffset;
+#     WORD   ErrorSelector;
+#     WORD   Reserved2;
+#     DWORD DataOffset;
+#     WORD   DataSelector;
+#     WORD   Reserved3;
+#     DWORD MxCsr;
+#     DWORD MxCsr_Mask;
+#     M128A FloatRegisters[8];
+#     M128A XmmRegisters[16];
+#     BYTE  Reserved4[96];
+# } XMM_SAVE_AREA32, *PXMM_SAVE_AREA32;
+class XMM_SAVE_AREA32(Structure):
+    _pack_ = 1
+    _fields_ = [
+        ('ControlWord',     WORD),
+        ('StatusWord',      WORD),
+        ('TagWord',         BYTE),
+        ('Reserved1',       BYTE),
+        ('ErrorOpcode',     WORD),
+        ('ErrorOffset',     DWORD),
+        ('ErrorSelector',   WORD),
+        ('Reserved2',       WORD),
+        ('DataOffset',      DWORD),
+        ('DataSelector',    WORD),
+        ('Reserved3',       WORD),
+        ('MxCsr',           DWORD),
+        ('MxCsr_Mask',      DWORD),
+        ('FloatRegisters',  M128A * 8),
+        ('XmmRegisters',    M128A * 16),
+        ('Reserved4',       BYTE * 96),
+    ]
+
+    def from_dict(self):
+        raise NotImplementedError()
+
+    def to_dict(self):
+        d = dict()
+        for name, type in self._fields_:
+            if name in ('FloatRegisters', 'XmmRegisters'):
+                d[name] = tuple([ (x.LowPart + (x.HighPart << 64)) for x in getattr(self, name) ])
+            elif name == 'Reserved4':
+                d[name] = tuple([ chr(x) for x in getattr(self, name) ])
+            else:
+                d[name] = getattr(self, name)
+        return d
+
+LEGACY_SAVE_AREA_LENGTH = sizeof(XMM_SAVE_AREA32)
+
+PXMM_SAVE_AREA32 = ctypes.POINTER(XMM_SAVE_AREA32)
+LPXMM_SAVE_AREA32 = PXMM_SAVE_AREA32
+
+# //
+# // Context Frame
+# //
+# //  This frame has a several purposes: 1) it is used as an argument to
+# //  NtContinue, 2) is is used to constuct a call frame for APC delivery,
+# //  and 3) it is used in the user level thread creation routines.
+# //
+# //
+# // The flags field within this record controls the contents of a CONTEXT
+# // record.
+# //
+# // If the context record is used as an input parameter, then for each
+# // portion of the context record controlled by a flag whose value is
+# // set, it is assumed that that portion of the context record contains
+# // valid context. If the context record is being used to modify a threads
+# // context, then only that portion of the threads context is modified.
+# //
+# // If the context record is used as an output parameter to capture the
+# // context of a thread, then only those portions of the thread's context
+# // corresponding to set flags will be returned.
+# //
+# // CONTEXT_CONTROL specifies SegSs, Rsp, SegCs, Rip, and EFlags.
+# //
+# // CONTEXT_INTEGER specifies Rax, Rcx, Rdx, Rbx, Rbp, Rsi, Rdi, and R8-R15.
+# //
+# // CONTEXT_SEGMENTS specifies SegDs, SegEs, SegFs, and SegGs.
+# //
+# // CONTEXT_DEBUG_REGISTERS specifies Dr0-Dr3 and Dr6-Dr7.
+# //
+# // CONTEXT_MMX_REGISTERS specifies the floating point and extended registers
+# //     Mm0/St0-Mm7/St7 and Xmm0-Xmm15).
+# //
+#
+# typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
+#
+#     //
+#     // Register parameter home addresses.
+#     //
+#     // N.B. These fields are for convience - they could be used to extend the
+#     //      context record in the future.
+#     //
+#
+#     DWORD64 P1Home;
+#     DWORD64 P2Home;
+#     DWORD64 P3Home;
+#     DWORD64 P4Home;
+#     DWORD64 P5Home;
+#     DWORD64 P6Home;
+#
+#     //
+#     // Control flags.
+#     //
+#
+#     DWORD ContextFlags;
+#     DWORD MxCsr;
+#
+#     //
+#     // Segment Registers and processor flags.
+#     //
+#
+#     WORD   SegCs;
+#     WORD   SegDs;
+#     WORD   SegEs;
+#     WORD   SegFs;
+#     WORD   SegGs;
+#     WORD   SegSs;
+#     DWORD EFlags;
+#
+#     //
+#     // Debug registers
+#     //
+#
+#     DWORD64 Dr0;
+#     DWORD64 Dr1;
+#     DWORD64 Dr2;
+#     DWORD64 Dr3;
+#     DWORD64 Dr6;
+#     DWORD64 Dr7;
+#
+#     //
+#     // Integer registers.
+#     //
+#
+#     DWORD64 Rax;
+#     DWORD64 Rcx;
+#     DWORD64 Rdx;
+#     DWORD64 Rbx;
+#     DWORD64 Rsp;
+#     DWORD64 Rbp;
+#     DWORD64 Rsi;
+#     DWORD64 Rdi;
+#     DWORD64 R8;
+#     DWORD64 R9;
+#     DWORD64 R10;
+#     DWORD64 R11;
+#     DWORD64 R12;
+#     DWORD64 R13;
+#     DWORD64 R14;
+#     DWORD64 R15;
+#
+#     //
+#     // Program counter.
+#     //
+#
+#     DWORD64 Rip;
+#
+#     //
+#     // Floating point state.
+#     //
+#
+#     union {
+#         XMM_SAVE_AREA32 FltSave;
+#         struct {
+#             M128A Header[2];
+#             M128A Legacy[8];
+#             M128A Xmm0;
+#             M128A Xmm1;
+#             M128A Xmm2;
+#             M128A Xmm3;
+#             M128A Xmm4;
+#             M128A Xmm5;
+#             M128A Xmm6;
+#             M128A Xmm7;
+#             M128A Xmm8;
+#             M128A Xmm9;
+#             M128A Xmm10;
+#             M128A Xmm11;
+#             M128A Xmm12;
+#             M128A Xmm13;
+#             M128A Xmm14;
+#             M128A Xmm15;
+#         };
+#     };
+#
+#     //
+#     // Vector registers.
+#     //
+#
+#     M128A VectorRegister[26];
+#     DWORD64 VectorControl;
+#
+#     //
+#     // Special debug control registers.
+#     //
+#
+#     DWORD64 DebugControl;
+#     DWORD64 LastBranchToRip;
+#     DWORD64 LastBranchFromRip;
+#     DWORD64 LastExceptionToRip;
+#     DWORD64 LastExceptionFromRip;
+# } CONTEXT, *PCONTEXT;
+
+class _CONTEXT_FLTSAVE_STRUCT(Structure):
+    _fields_ = [
+        ('Header',                  M128A * 2),
+        ('Legacy',                  M128A * 8),
+        ('Xmm0',                    M128A),
+        ('Xmm1',                    M128A),
+        ('Xmm2',                    M128A),
+        ('Xmm3',                    M128A),
+        ('Xmm4',                    M128A),
+        ('Xmm5',                    M128A),
+        ('Xmm6',                    M128A),
+        ('Xmm7',                    M128A),
+        ('Xmm8',                    M128A),
+        ('Xmm9',                    M128A),
+        ('Xmm10',                   M128A),
+        ('Xmm11',                   M128A),
+        ('Xmm12',                   M128A),
+        ('Xmm13',                   M128A),
+        ('Xmm14',                   M128A),
+        ('Xmm15',                   M128A),
+    ]
+
+    def from_dict(self):
+        raise NotImplementedError()
+
+    def to_dict(self):
+        d = dict()
+        for name, type in self._fields_:
+            if name in ('Header', 'Legacy'):
+                d[name] = tuple([ (x.Low + (x.High << 64)) for x in getattr(self, name) ])
+            else:
+                x = getattr(self, name)
+                d[name] = x.Low + (x.High << 64)
+        return d
+
+class _CONTEXT_FLTSAVE_UNION(Union):
+    _fields_ = [
+        ('flt',                     XMM_SAVE_AREA32),
+        ('xmm',                     _CONTEXT_FLTSAVE_STRUCT),
+    ]
+
+    def from_dict(self):
+        raise NotImplementedError()
+
+    def to_dict(self):
+        d = dict()
+        d['flt'] = self.flt.to_dict()
+        d['xmm'] = self.xmm.to_dict()
+        return d
+
+class CONTEXT(Structure):
+    arch = ARCH_AMD64
+
+    _pack_ = 16
+    _fields_ = [
+
+        # Register parameter home addresses.
+        ('P1Home',                  DWORD64),
+        ('P2Home',                  DWORD64),
+        ('P3Home',                  DWORD64),
+        ('P4Home',                  DWORD64),
+        ('P5Home',                  DWORD64),
+        ('P6Home',                  DWORD64),
+
+        # Control flags.
+        ('ContextFlags',            DWORD),
+        ('MxCsr',                   DWORD),
+
+        # Segment Registers and processor flags.
+        ('SegCs',                   WORD),
+        ('SegDs',                   WORD),
+        ('SegEs',                   WORD),
+        ('SegFs',                   WORD),
+        ('SegGs',                   WORD),
+        ('SegSs',                   WORD),
+        ('EFlags',                  DWORD),
+
+        # Debug registers.
+        ('Dr0',                     DWORD64),
+        ('Dr1',                     DWORD64),
+        ('Dr2',                     DWORD64),
+        ('Dr3',                     DWORD64),
+        ('Dr6',                     DWORD64),
+        ('Dr7',                     DWORD64),
+
+        # Integer registers.
+        ('Rax',                     DWORD64),
+        ('Rcx',                     DWORD64),
+        ('Rdx',                     DWORD64),
+        ('Rbx',                     DWORD64),
+        ('Rsp',                     DWORD64),
+        ('Rbp',                     DWORD64),
+        ('Rsi',                     DWORD64),
+        ('Rdi',                     DWORD64),
+        ('R8',                      DWORD64),
+        ('R9',                      DWORD64),
+        ('R10',                     DWORD64),
+        ('R11',                     DWORD64),
+        ('R12',                     DWORD64),
+        ('R13',                     DWORD64),
+        ('R14',                     DWORD64),
+        ('R15',                     DWORD64),
+
+        # Program counter.
+        ('Rip',                     DWORD64),
+
+        # Floating point state.
+        ('FltSave',                 _CONTEXT_FLTSAVE_UNION),
+
+        # Vector registers.
+        ('VectorRegister',          M128A * 26),
+        ('VectorControl',           DWORD64),
+
+        # Special debug control registers.
+        ('DebugControl',            DWORD64),
+        ('LastBranchToRip',         DWORD64),
+        ('LastBranchFromRip',       DWORD64),
+        ('LastExceptionToRip',      DWORD64),
+        ('LastExceptionFromRip',    DWORD64),
+    ]
+
+    _others = ('P1Home', 'P2Home', 'P3Home', 'P4Home', 'P5Home', 'P6Home', \
+               'MxCsr', 'VectorRegister', 'VectorControl')
+    _control = ('SegSs', 'Rsp', 'SegCs', 'Rip', 'EFlags')
+    _integer = ('Rax', 'Rcx', 'Rdx', 'Rbx', 'Rsp', 'Rbp', 'Rsi', 'Rdi', \
+                'R8', 'R9', 'R10', 'R11', 'R12', 'R13', 'R14', 'R15')
+    _segments = ('SegDs', 'SegEs', 'SegFs', 'SegGs')
+    _debug = ('Dr0', 'Dr1', 'Dr2', 'Dr3', 'Dr6', 'Dr7', \
+              'DebugControl', 'LastBranchToRip', 'LastBranchFromRip', \
+              'LastExceptionToRip', 'LastExceptionFromRip')
+    _mmx = ('Xmm0', 'Xmm1', 'Xmm2', 'Xmm3', 'Xmm4', 'Xmm5', 'Xmm6', 'Xmm7', \
+          'Xmm8', 'Xmm9', 'Xmm10', 'Xmm11', 'Xmm12', 'Xmm13', 'Xmm14', 'Xmm15')
+
+    # XXX TODO
+    # Convert VectorRegister and Xmm0-Xmm15 to pure Python types!
+
+    @classmethod
+    def from_dict(cls, ctx):
+        'Instance a new structure from a Python native type.'
+        ctx = Context(ctx)
+        s = cls()
+        ContextFlags = ctx['ContextFlags']
+        s.ContextFlags = ContextFlags
+        for key in cls._others:
+            if key != 'VectorRegister':
+                setattr(s, key, ctx[key])
+            else:
+                w = ctx[key]
+                v = (M128A * len(w))()
+                i = 0
+                for x in w:
+                    y = M128A()
+                    y.High = x >> 64
+                    y.Low = x - (x >> 64)
+                    v[i] = y
+                    i += 1
+                setattr(s, key, v)
+        if (ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL:
+            for key in cls._control:
+                setattr(s, key, ctx[key])
+        if (ContextFlags & CONTEXT_INTEGER) == CONTEXT_INTEGER:
+            for key in cls._integer:
+                setattr(s, key, ctx[key])
+        if (ContextFlags & CONTEXT_SEGMENTS) == CONTEXT_SEGMENTS:
+            for key in cls._segments:
+                setattr(s, key, ctx[key])
+        if (ContextFlags & CONTEXT_DEBUG_REGISTERS) == CONTEXT_DEBUG_REGISTERS:
+            for key in cls._debug:
+                setattr(s, key, ctx[key])
+        if (ContextFlags & CONTEXT_MMX_REGISTERS) == CONTEXT_MMX_REGISTERS:
+            xmm = s.FltSave.xmm
+            for key in cls._mmx:
+                y = M128A()
+                y.High = x >> 64
+                y.Low = x - (x >> 64)
+                setattr(xmm, key, y)
+        return s
+
+    def to_dict(self):
+        'Convert a structure into a Python dictionary.'
+        ctx = Context()
+        ContextFlags = self.ContextFlags
+        ctx['ContextFlags'] = ContextFlags
+        for key in self._others:
+            if key != 'VectorRegister':
+                ctx[key] = getattr(self, key)
+            else:
+                ctx[key] = tuple([ (x.Low + (x.High << 64)) for x in getattr(self, key) ])
+        if (ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL:
+            for key in self._control:
+                ctx[key] = getattr(self, key)
+        if (ContextFlags & CONTEXT_INTEGER) == CONTEXT_INTEGER:
+            for key in self._integer:
+                ctx[key] = getattr(self, key)
+        if (ContextFlags & CONTEXT_SEGMENTS) == CONTEXT_SEGMENTS:
+            for key in self._segments:
+                ctx[key] = getattr(self, key)
+        if (ContextFlags & CONTEXT_DEBUG_REGISTERS) == CONTEXT_DEBUG_REGISTERS:
+            for key in self._debug:
+                ctx[key] = getattr(self, key)
+        if (ContextFlags & CONTEXT_MMX_REGISTERS) == CONTEXT_MMX_REGISTERS:
+            xmm = self.FltSave.xmm.to_dict()
+            for key in self._mmx:
+                ctx[key] = xmm.get(key)
+        return ctx
+
+PCONTEXT = ctypes.POINTER(CONTEXT)
+LPCONTEXT = PCONTEXT
+
+class Context(dict):
+    """
+    Register context dictionary for the amd64 architecture.
+    """
+
+    arch = CONTEXT.arch
+
+    def __get_pc(self):
+        return self['Rip']
+    def __set_pc(self, value):
+        self['Rip'] = value
+    pc = property(__get_pc, __set_pc)
+
+    def __get_sp(self):
+        return self['Rsp']
+    def __set_sp(self, value):
+        self['Rsp'] = value
+    sp = property(__get_sp, __set_sp)
+
+    def __get_fp(self):
+        return self['Rbp']
+    def __set_fp(self, value):
+        self['Rbp'] = value
+    fp = property(__get_fp, __set_fp)
+
+#--- LDT_ENTRY structure ------------------------------------------------------
+
+# typedef struct _LDT_ENTRY {
+#   WORD LimitLow;
+#   WORD BaseLow;
+#   union {
+#     struct {
+#       BYTE BaseMid;
+#       BYTE Flags1;
+#       BYTE Flags2;
+#       BYTE BaseHi;
+#     } Bytes;
+#     struct {
+#       DWORD BaseMid  :8;
+#       DWORD Type  :5;
+#       DWORD Dpl  :2;
+#       DWORD Pres  :1;
+#       DWORD LimitHi  :4;
+#       DWORD Sys  :1;
+#       DWORD Reserved_0  :1;
+#       DWORD Default_Big  :1;
+#       DWORD Granularity  :1;
+#       DWORD BaseHi  :8;
+#     } Bits;
+#   } HighWord;
+# } LDT_ENTRY,
+#  *PLDT_ENTRY;
+
+class _LDT_ENTRY_BYTES_(Structure):
+    _pack_ = 1
+    _fields_ = [
+        ('BaseMid',         BYTE),
+        ('Flags1',          BYTE),
+        ('Flags2',          BYTE),
+        ('BaseHi',          BYTE),
+    ]
+
+class _LDT_ENTRY_BITS_(Structure):
+    _pack_ = 1
+    _fields_ = [
+        ('BaseMid',         DWORD,  8),
+        ('Type',            DWORD,  5),
+        ('Dpl',             DWORD,  2),
+        ('Pres',            DWORD,  1),
+        ('LimitHi',         DWORD,  4),
+        ('Sys',             DWORD,  1),
+        ('Reserved_0',      DWORD,  1),
+        ('Default_Big',     DWORD,  1),
+        ('Granularity',     DWORD,  1),
+        ('BaseHi',          DWORD,  8),
+    ]
+
+class _LDT_ENTRY_HIGHWORD_(Union):
+    _pack_ = 1
+    _fields_ = [
+        ('Bytes',           _LDT_ENTRY_BYTES_),
+        ('Bits',            _LDT_ENTRY_BITS_),
+    ]
+
+class LDT_ENTRY(Structure):
+    _pack_ = 1
+    _fields_ = [
+        ('LimitLow',        WORD),
+        ('BaseLow',         WORD),
+        ('HighWord',        _LDT_ENTRY_HIGHWORD_),
+    ]
+
+PLDT_ENTRY = POINTER(LDT_ENTRY)
+LPLDT_ENTRY = PLDT_ENTRY
+
+#--- WOW64 CONTEXT structure and constants ------------------------------------
+
+# Value of SegCs in a Wow64 thread when running in 32 bits mode
+WOW64_CS32 = 0x23
+
+WOW64_CONTEXT_i386 = 0x00010000
+WOW64_CONTEXT_i486 = 0x00010000
+
+WOW64_CONTEXT_CONTROL               = (WOW64_CONTEXT_i386 | 0x00000001)
+WOW64_CONTEXT_INTEGER               = (WOW64_CONTEXT_i386 | 0x00000002)
+WOW64_CONTEXT_SEGMENTS              = (WOW64_CONTEXT_i386 | 0x00000004)
+WOW64_CONTEXT_FLOATING_POINT        = (WOW64_CONTEXT_i386 | 0x00000008)
+WOW64_CONTEXT_DEBUG_REGISTERS       = (WOW64_CONTEXT_i386 | 0x00000010)
+WOW64_CONTEXT_EXTENDED_REGISTERS    = (WOW64_CONTEXT_i386 | 0x00000020)
+
+WOW64_CONTEXT_FULL                  = (WOW64_CONTEXT_CONTROL | WOW64_CONTEXT_INTEGER | WOW64_CONTEXT_SEGMENTS)
+WOW64_CONTEXT_ALL                   = (WOW64_CONTEXT_CONTROL | WOW64_CONTEXT_INTEGER | WOW64_CONTEXT_SEGMENTS | WOW64_CONTEXT_FLOATING_POINT | WOW64_CONTEXT_DEBUG_REGISTERS | WOW64_CONTEXT_EXTENDED_REGISTERS)
+
+WOW64_SIZE_OF_80387_REGISTERS       = 80
+WOW64_MAXIMUM_SUPPORTED_EXTENSION   = 512
+
+class WOW64_FLOATING_SAVE_AREA (context_i386.FLOATING_SAVE_AREA):
+    pass
+
+class WOW64_CONTEXT (context_i386.CONTEXT):
+    pass
+
+class WOW64_LDT_ENTRY (context_i386.LDT_ENTRY):
+    pass
+
+PWOW64_FLOATING_SAVE_AREA   = POINTER(WOW64_FLOATING_SAVE_AREA)
+PWOW64_CONTEXT              = POINTER(WOW64_CONTEXT)
+PWOW64_LDT_ENTRY            = POINTER(WOW64_LDT_ENTRY)
+
+###############################################################################
+
+# BOOL WINAPI GetThreadSelectorEntry(
+#   __in   HANDLE hThread,
+#   __in   DWORD dwSelector,
+#   __out  LPLDT_ENTRY lpSelectorEntry
+# );
+def GetThreadSelectorEntry(hThread, dwSelector):
+    _GetThreadSelectorEntry = windll.kernel32.GetThreadSelectorEntry
+    _GetThreadSelectorEntry.argtypes = [HANDLE, DWORD, LPLDT_ENTRY]
+    _GetThreadSelectorEntry.restype  = bool
+    _GetThreadSelectorEntry.errcheck = RaiseIfZero
+
+    ldt = LDT_ENTRY()
+    _GetThreadSelectorEntry(hThread, dwSelector, byref(ldt))
+    return ldt
+
+# BOOL WINAPI GetThreadContext(
+#   __in     HANDLE hThread,
+#   __inout  LPCONTEXT lpContext
+# );
+def GetThreadContext(hThread, ContextFlags = None, raw = False):
+    _GetThreadContext = windll.kernel32.GetThreadContext
+    _GetThreadContext.argtypes = [HANDLE, LPCONTEXT]
+    _GetThreadContext.restype  = bool
+    _GetThreadContext.errcheck = RaiseIfZero
+
+    if ContextFlags is None:
+        ContextFlags = CONTEXT_ALL | CONTEXT_AMD64
+    Context = CONTEXT()
+    Context.ContextFlags = ContextFlags
+    _GetThreadContext(hThread, byref(Context))
+    if raw:
+        return Context
+    return Context.to_dict()
+
+# BOOL WINAPI SetThreadContext(
+#   __in  HANDLE hThread,
+#   __in  const CONTEXT* lpContext
+# );
+def SetThreadContext(hThread, lpContext):
+    _SetThreadContext = windll.kernel32.SetThreadContext
+    _SetThreadContext.argtypes = [HANDLE, LPCONTEXT]
+    _SetThreadContext.restype  = bool
+    _SetThreadContext.errcheck = RaiseIfZero
+
+    if isinstance(lpContext, dict):
+        lpContext = CONTEXT.from_dict(lpContext)
+    _SetThreadContext(hThread, byref(lpContext))
+
+# BOOL Wow64GetThreadSelectorEntry(
+#   __in   HANDLE hThread,
+#   __in   DWORD dwSelector,
+#   __out  PWOW64_LDT_ENTRY lpSelectorEntry
+# );
+def Wow64GetThreadSelectorEntry(hThread, dwSelector):
+    _Wow64GetThreadSelectorEntry = windll.kernel32.Wow64GetThreadSelectorEntry
+    _Wow64GetThreadSelectorEntry.argtypes = [HANDLE, DWORD, PWOW64_LDT_ENTRY]
+    _Wow64GetThreadSelectorEntry.restype  = bool
+    _Wow64GetThreadSelectorEntry.errcheck = RaiseIfZero
+
+    lpSelectorEntry = WOW64_LDT_ENTRY()
+    _Wow64GetThreadSelectorEntry(hThread, dwSelector, byref(lpSelectorEntry))
+    return lpSelectorEntry
+
+# DWORD WINAPI Wow64ResumeThread(
+#   __in  HANDLE hThread
+# );
+def Wow64ResumeThread(hThread):
+    _Wow64ResumeThread = windll.kernel32.Wow64ResumeThread
+    _Wow64ResumeThread.argtypes = [HANDLE]
+    _Wow64ResumeThread.restype  = DWORD
+
+    previousCount = _Wow64ResumeThread(hThread)
+    if previousCount == DWORD(-1).value:
+        raise ctypes.WinError()
+    return previousCount
+
+# DWORD WINAPI Wow64SuspendThread(
+#   __in  HANDLE hThread
+# );
+def Wow64SuspendThread(hThread):
+    _Wow64SuspendThread = windll.kernel32.Wow64SuspendThread
+    _Wow64SuspendThread.argtypes = [HANDLE]
+    _Wow64SuspendThread.restype  = DWORD
+
+    previousCount = _Wow64SuspendThread(hThread)
+    if previousCount == DWORD(-1).value:
+        raise ctypes.WinError()
+    return previousCount
+
+# XXX TODO Use this http://www.nynaeve.net/Code/GetThreadWow64Context.cpp
+# Also see http://www.woodmann.com/forum/archive/index.php/t-11162.html
+
+# BOOL WINAPI Wow64GetThreadContext(
+#   __in     HANDLE hThread,
+#   __inout  PWOW64_CONTEXT lpContext
+# );
+def Wow64GetThreadContext(hThread, ContextFlags = None, raw = False):
+    _Wow64GetThreadContext = windll.kernel32.Wow64GetThreadContext
+    _Wow64GetThreadContext.argtypes = [HANDLE, PWOW64_CONTEXT]
+    _Wow64GetThreadContext.restype  = bool
+    _Wow64GetThreadContext.errcheck = RaiseIfZero
+
+    # XXX doesn't exist in XP 64 bits
+
+    Context = WOW64_CONTEXT()
+    if ContextFlags is None:
+        Context.ContextFlags = WOW64_CONTEXT_ALL | WOW64_CONTEXT_i386
+    else:
+        Context.ContextFlags = ContextFlags
+    _Wow64GetThreadContext(hThread, byref(Context))
+    if raw:
+        return Context
+    return Context.to_dict()
+
+# BOOL WINAPI Wow64SetThreadContext(
+#   __in  HANDLE hThread,
+#   __in  const WOW64_CONTEXT *lpContext
+# );
+def Wow64SetThreadContext(hThread, lpContext):
+    _Wow64SetThreadContext = windll.kernel32.Wow64SetThreadContext
+    _Wow64SetThreadContext.argtypes = [HANDLE, PWOW64_CONTEXT]
+    _Wow64SetThreadContext.restype  = bool
+    _Wow64SetThreadContext.errcheck = RaiseIfZero
+
+    # XXX doesn't exist in XP 64 bits
+
+    if isinstance(lpContext, dict):
+        lpContext = WOW64_CONTEXT.from_dict(lpContext)
+    _Wow64SetThreadContext(hThread, byref(lpContext))
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/context_i386.py b/scripts/win32/context_i386.py
new file mode 100644
index 0000000..36c0923
--- /dev/null
+++ b/scripts/win32/context_i386.py
@@ -0,0 +1,447 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+CONTEXT structure for i386.
+"""
+
+from .defines import *  # NOQA
+from .version import ARCH_I386
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- CONTEXT structures and constants -----------------------------------------
+
+# The following values specify the type of access in the first parameter
+# of the exception record when the exception code specifies an access
+# violation.
+EXCEPTION_READ_FAULT        = 0     # exception caused by a read
+EXCEPTION_WRITE_FAULT       = 1     # exception caused by a write
+EXCEPTION_EXECUTE_FAULT     = 8     # exception caused by an instruction fetch
+
+CONTEXT_i386                = 0x00010000    # this assumes that i386 and
+CONTEXT_i486                = 0x00010000    # i486 have identical context records
+
+CONTEXT_CONTROL             = (CONTEXT_i386 | 0x00000001) # SS:SP, CS:IP, FLAGS, BP
+CONTEXT_INTEGER             = (CONTEXT_i386 | 0x00000002) # AX, BX, CX, DX, SI, DI
+CONTEXT_SEGMENTS            = (CONTEXT_i386 | 0x00000004) # DS, ES, FS, GS
+CONTEXT_FLOATING_POINT      = (CONTEXT_i386 | 0x00000008) # 387 state
+CONTEXT_DEBUG_REGISTERS     = (CONTEXT_i386 | 0x00000010) # DB 0-3,6,7
+CONTEXT_EXTENDED_REGISTERS  = (CONTEXT_i386 | 0x00000020) # cpu specific extensions
+
+CONTEXT_FULL = (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS)
+
+CONTEXT_ALL = (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | \
+                CONTEXT_FLOATING_POINT | CONTEXT_DEBUG_REGISTERS | \
+                CONTEXT_EXTENDED_REGISTERS)
+
+SIZE_OF_80387_REGISTERS     = 80
+MAXIMUM_SUPPORTED_EXTENSION = 512
+
+# typedef struct _FLOATING_SAVE_AREA {
+#     DWORD   ControlWord;
+#     DWORD   StatusWord;
+#     DWORD   TagWord;
+#     DWORD   ErrorOffset;
+#     DWORD   ErrorSelector;
+#     DWORD   DataOffset;
+#     DWORD   DataSelector;
+#     BYTE    RegisterArea[SIZE_OF_80387_REGISTERS];
+#     DWORD   Cr0NpxState;
+# } FLOATING_SAVE_AREA;
+class FLOATING_SAVE_AREA(Structure):
+    _pack_ = 1
+    _fields_ = [
+        ('ControlWord',     DWORD),
+        ('StatusWord',      DWORD),
+        ('TagWord',         DWORD),
+        ('ErrorOffset',     DWORD),
+        ('ErrorSelector',   DWORD),
+        ('DataOffset',      DWORD),
+        ('DataSelector',    DWORD),
+        ('RegisterArea',    BYTE * SIZE_OF_80387_REGISTERS),
+        ('Cr0NpxState',     DWORD),
+    ]
+
+    _integer_members = ('ControlWord', 'StatusWord', 'TagWord', 'ErrorOffset', 'ErrorSelector', 'DataOffset', 'DataSelector', 'Cr0NpxState')
+
+    @classmethod
+    def from_dict(cls, fsa):
+        'Instance a new structure from a Python dictionary.'
+        fsa = dict(fsa)
+        s = cls()
+        for key in cls._integer_members:
+            setattr(s, key, fsa.get(key))
+        ra = fsa.get('RegisterArea', None)
+        if ra is not None:
+            for index in range(0, SIZE_OF_80387_REGISTERS):
+                s.RegisterArea[index] = ra[index]
+        return s
+
+    def to_dict(self):
+        'Convert a structure into a Python dictionary.'
+        fsa = dict()
+        for key in self._integer_members:
+            fsa[key] = getattr(self, key)
+        ra = [ self.RegisterArea[index] for index in range(0, SIZE_OF_80387_REGISTERS) ]
+        ra = tuple(ra)
+        fsa['RegisterArea'] = ra
+        return fsa
+
+PFLOATING_SAVE_AREA = POINTER(FLOATING_SAVE_AREA)
+LPFLOATING_SAVE_AREA = PFLOATING_SAVE_AREA
+
+# typedef struct _CONTEXT {
+#     DWORD ContextFlags;
+#     DWORD   Dr0;
+#     DWORD   Dr1;
+#     DWORD   Dr2;
+#     DWORD   Dr3;
+#     DWORD   Dr6;
+#     DWORD   Dr7;
+#     FLOATING_SAVE_AREA FloatSave;
+#     DWORD   SegGs;
+#     DWORD   SegFs;
+#     DWORD   SegEs;
+#     DWORD   SegDs;
+#     DWORD   Edi;
+#     DWORD   Esi;
+#     DWORD   Ebx;
+#     DWORD   Edx;
+#     DWORD   Ecx;
+#     DWORD   Eax;
+#     DWORD   Ebp;
+#     DWORD   Eip;
+#     DWORD   SegCs;
+#     DWORD   EFlags;
+#     DWORD   Esp;
+#     DWORD   SegSs;
+#     BYTE    ExtendedRegisters[MAXIMUM_SUPPORTED_EXTENSION];
+# } CONTEXT;
+class CONTEXT(Structure):
+    arch = ARCH_I386
+
+    _pack_ = 1
+
+    # Context Frame
+    #
+    #  This frame has a several purposes: 1) it is used as an argument to
+    #  NtContinue, 2) is is used to constuct a call frame for APC delivery,
+    #  and 3) it is used in the user level thread creation routines.
+    #
+    #  The layout of the record conforms to a standard call frame.
+
+    _fields_ = [
+
+        # The flags values within this flag control the contents of
+        # a CONTEXT record.
+        #
+        # If the context record is used as an input parameter, then
+        # for each portion of the context record controlled by a flag
+        # whose value is set, it is assumed that that portion of the
+        # context record contains valid context. If the context record
+        # is being used to modify a threads context, then only that
+        # portion of the threads context will be modified.
+        #
+        # If the context record is used as an IN OUT parameter to capture
+        # the context of a thread, then only those portions of the thread's
+        # context corresponding to set flags will be returned.
+        #
+        # The context record is never used as an OUT only parameter.
+
+        ('ContextFlags',        DWORD),
+
+        # This section is specified/returned if CONTEXT_DEBUG_REGISTERS is
+        # set in ContextFlags.  Note that CONTEXT_DEBUG_REGISTERS is NOT
+        # included in CONTEXT_FULL.
+
+        ('Dr0',                 DWORD),
+        ('Dr1',                 DWORD),
+        ('Dr2',                 DWORD),
+        ('Dr3',                 DWORD),
+        ('Dr6',                 DWORD),
+        ('Dr7',                 DWORD),
+
+        # This section is specified/returned if the
+        # ContextFlags word contains the flag CONTEXT_FLOATING_POINT.
+
+        ('FloatSave',           FLOATING_SAVE_AREA),
+
+        # This section is specified/returned if the
+        # ContextFlags word contains the flag CONTEXT_SEGMENTS.
+
+        ('SegGs',               DWORD),
+        ('SegFs',               DWORD),
+        ('SegEs',               DWORD),
+        ('SegDs',               DWORD),
+
+        # This section is specified/returned if the
+        # ContextFlags word contains the flag CONTEXT_INTEGER.
+
+        ('Edi',                 DWORD),
+        ('Esi',                 DWORD),
+        ('Ebx',                 DWORD),
+        ('Edx',                 DWORD),
+        ('Ecx',                 DWORD),
+        ('Eax',                 DWORD),
+
+        # This section is specified/returned if the
+        # ContextFlags word contains the flag CONTEXT_CONTROL.
+
+        ('Ebp',                 DWORD),
+        ('Eip',                 DWORD),
+        ('SegCs',               DWORD),         # MUST BE SANITIZED
+        ('EFlags',              DWORD),         # MUST BE SANITIZED
+        ('Esp',                 DWORD),
+        ('SegSs',               DWORD),
+
+        # This section is specified/returned if the ContextFlags word
+        # contains the flag CONTEXT_EXTENDED_REGISTERS.
+        # The format and contexts are processor specific.
+
+        ('ExtendedRegisters',   BYTE * MAXIMUM_SUPPORTED_EXTENSION),
+    ]
+
+    _ctx_debug   = ('Dr0', 'Dr1', 'Dr2', 'Dr3', 'Dr6', 'Dr7')
+    _ctx_segs    = ('SegGs', 'SegFs', 'SegEs', 'SegDs', )
+    _ctx_int     = ('Edi', 'Esi', 'Ebx', 'Edx', 'Ecx', 'Eax')
+    _ctx_ctrl    = ('Ebp', 'Eip', 'SegCs', 'EFlags', 'Esp', 'SegSs')
+
+    @classmethod
+    def from_dict(cls, ctx):
+        'Instance a new structure from a Python dictionary.'
+        ctx = Context(ctx)
+        s = cls()
+        ContextFlags = ctx['ContextFlags']
+        setattr(s, 'ContextFlags', ContextFlags)
+        if (ContextFlags & CONTEXT_DEBUG_REGISTERS) == CONTEXT_DEBUG_REGISTERS:
+            for key in s._ctx_debug:
+                setattr(s, key, ctx[key])
+        if (ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT:
+            fsa = ctx['FloatSave']
+            s.FloatSave = FLOATING_SAVE_AREA.from_dict(fsa)
+        if (ContextFlags & CONTEXT_SEGMENTS) == CONTEXT_SEGMENTS:
+            for key in s._ctx_segs:
+                setattr(s, key, ctx[key])
+        if (ContextFlags & CONTEXT_INTEGER) == CONTEXT_INTEGER:
+            for key in s._ctx_int:
+                setattr(s, key, ctx[key])
+        if (ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL:
+            for key in s._ctx_ctrl:
+                setattr(s, key, ctx[key])
+        if (ContextFlags & CONTEXT_EXTENDED_REGISTERS) == CONTEXT_EXTENDED_REGISTERS:
+            er = ctx['ExtendedRegisters']
+            for index in range(0, MAXIMUM_SUPPORTED_EXTENSION):
+                s.ExtendedRegisters[index] = er[index]
+        return s
+
+    def to_dict(self):
+        'Convert a structure into a Python native type.'
+        ctx = Context()
+        ContextFlags = self.ContextFlags
+        ctx['ContextFlags'] = ContextFlags
+        if (ContextFlags & CONTEXT_DEBUG_REGISTERS) == CONTEXT_DEBUG_REGISTERS:
+            for key in self._ctx_debug:
+                ctx[key] = getattr(self, key)
+        if (ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT:
+            ctx['FloatSave'] = self.FloatSave.to_dict()
+        if (ContextFlags & CONTEXT_SEGMENTS) == CONTEXT_SEGMENTS:
+            for key in self._ctx_segs:
+                ctx[key] = getattr(self, key)
+        if (ContextFlags & CONTEXT_INTEGER) == CONTEXT_INTEGER:
+            for key in self._ctx_int:
+                ctx[key] = getattr(self, key)
+        if (ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL:
+            for key in self._ctx_ctrl:
+                ctx[key] = getattr(self, key)
+        if (ContextFlags & CONTEXT_EXTENDED_REGISTERS) == CONTEXT_EXTENDED_REGISTERS:
+            er = [ self.ExtendedRegisters[index] for index in range(0, MAXIMUM_SUPPORTED_EXTENSION) ]
+            er = tuple(er)
+            ctx['ExtendedRegisters'] = er
+        return ctx
+
+PCONTEXT = POINTER(CONTEXT)
+LPCONTEXT = PCONTEXT
+
+class Context(dict):
+    """
+    Register context dictionary for the i386 architecture.
+    """
+
+    arch = CONTEXT.arch
+
+    def __get_pc(self):
+        return self['Eip']
+    def __set_pc(self, value):
+        self['Eip'] = value
+    pc = property(__get_pc, __set_pc)
+
+    def __get_sp(self):
+        return self['Esp']
+    def __set_sp(self, value):
+        self['Esp'] = value
+    sp = property(__get_sp, __set_sp)
+
+    def __get_fp(self):
+        return self['Ebp']
+    def __set_fp(self, value):
+        self['Ebp'] = value
+    fp = property(__get_fp, __set_fp)
+
+#--- LDT_ENTRY structure ------------------------------------------------------
+
+# typedef struct _LDT_ENTRY {
+#   WORD LimitLow;
+#   WORD BaseLow;
+#   union {
+#     struct {
+#       BYTE BaseMid;
+#       BYTE Flags1;
+#       BYTE Flags2;
+#       BYTE BaseHi;
+#     } Bytes;
+#     struct {
+#       DWORD BaseMid  :8;
+#       DWORD Type  :5;
+#       DWORD Dpl  :2;
+#       DWORD Pres  :1;
+#       DWORD LimitHi  :4;
+#       DWORD Sys  :1;
+#       DWORD Reserved_0  :1;
+#       DWORD Default_Big  :1;
+#       DWORD Granularity  :1;
+#       DWORD BaseHi  :8;
+#     } Bits;
+#   } HighWord;
+# } LDT_ENTRY,
+#  *PLDT_ENTRY;
+
+class _LDT_ENTRY_BYTES_(Structure):
+    _pack_ = 1
+    _fields_ = [
+        ('BaseMid',         BYTE),
+        ('Flags1',          BYTE),
+        ('Flags2',          BYTE),
+        ('BaseHi',          BYTE),
+    ]
+
+class _LDT_ENTRY_BITS_(Structure):
+    _pack_ = 1
+    _fields_ = [
+        ('BaseMid',         DWORD,  8),
+        ('Type',            DWORD,  5),
+        ('Dpl',             DWORD,  2),
+        ('Pres',            DWORD,  1),
+        ('LimitHi',         DWORD,  4),
+        ('Sys',             DWORD,  1),
+        ('Reserved_0',      DWORD,  1),
+        ('Default_Big',     DWORD,  1),
+        ('Granularity',     DWORD,  1),
+        ('BaseHi',          DWORD,  8),
+    ]
+
+class _LDT_ENTRY_HIGHWORD_(Union):
+    _pack_ = 1
+    _fields_ = [
+        ('Bytes',           _LDT_ENTRY_BYTES_),
+        ('Bits',            _LDT_ENTRY_BITS_),
+    ]
+
+class LDT_ENTRY(Structure):
+    _pack_ = 1
+    _fields_ = [
+        ('LimitLow',        WORD),
+        ('BaseLow',         WORD),
+        ('HighWord',        _LDT_ENTRY_HIGHWORD_),
+    ]
+
+PLDT_ENTRY = POINTER(LDT_ENTRY)
+LPLDT_ENTRY = PLDT_ENTRY
+
+###############################################################################
+
+# BOOL WINAPI GetThreadSelectorEntry(
+#   __in   HANDLE hThread,
+#   __in   DWORD dwSelector,
+#   __out  LPLDT_ENTRY lpSelectorEntry
+# );
+def GetThreadSelectorEntry(hThread, dwSelector):
+    _GetThreadSelectorEntry = windll.kernel32.GetThreadSelectorEntry
+    _GetThreadSelectorEntry.argtypes = [HANDLE, DWORD, LPLDT_ENTRY]
+    _GetThreadSelectorEntry.restype  = bool
+    _GetThreadSelectorEntry.errcheck = RaiseIfZero
+
+    ldt = LDT_ENTRY()
+    _GetThreadSelectorEntry(hThread, dwSelector, byref(ldt))
+    return ldt
+
+# BOOL WINAPI GetThreadContext(
+#   __in     HANDLE hThread,
+#   __inout  LPCONTEXT lpContext
+# );
+def GetThreadContext(hThread, ContextFlags = None, raw = False):
+    _GetThreadContext = windll.kernel32.GetThreadContext
+    _GetThreadContext.argtypes = [HANDLE, LPCONTEXT]
+    _GetThreadContext.restype  = bool
+    _GetThreadContext.errcheck = RaiseIfZero
+
+    if ContextFlags is None:
+        ContextFlags = CONTEXT_ALL | CONTEXT_i386
+    Context = CONTEXT()
+    Context.ContextFlags = ContextFlags
+    _GetThreadContext(hThread, byref(Context))
+    if raw:
+        return Context
+    return Context.to_dict()
+
+# BOOL WINAPI SetThreadContext(
+#   __in  HANDLE hThread,
+#   __in  const CONTEXT* lpContext
+# );
+def SetThreadContext(hThread, lpContext):
+    _SetThreadContext = windll.kernel32.SetThreadContext
+    _SetThreadContext.argtypes = [HANDLE, LPCONTEXT]
+    _SetThreadContext.restype  = bool
+    _SetThreadContext.errcheck = RaiseIfZero
+
+    if isinstance(lpContext, dict):
+        lpContext = CONTEXT.from_dict(lpContext)
+    _SetThreadContext(hThread, byref(lpContext))
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/dbghelp.py b/scripts/win32/dbghelp.py
new file mode 100644
index 0000000..f7bbbd4
--- /dev/null
+++ b/scripts/win32/dbghelp.py
@@ -0,0 +1,1271 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for dbghelp.dll in ctypes.
+"""
+
+from .defines import *  # NOQA
+from .version import *  # NOQA
+from .kernel32 import *  # NOQA
+
+# DbgHelp versions and features list:
+# http://msdn.microsoft.com/en-us/library/windows/desktop/ms679294(v=vs.85).aspx
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+# SymGetHomeDirectory "type" values
+hdBase = 0
+hdSym  = 1
+hdSrc  = 2
+
+UNDNAME_32_BIT_DECODE           = 0x0800
+UNDNAME_COMPLETE                = 0x0000
+UNDNAME_NAME_ONLY               = 0x1000
+UNDNAME_NO_ACCESS_SPECIFIERS    = 0x0080
+UNDNAME_NO_ALLOCATION_LANGUAGE  = 0x0010
+UNDNAME_NO_ALLOCATION_MODEL     = 0x0008
+UNDNAME_NO_ARGUMENTS            = 0x2000
+UNDNAME_NO_CV_THISTYPE          = 0x0040
+UNDNAME_NO_FUNCTION_RETURNS     = 0x0004
+UNDNAME_NO_LEADING_UNDERSCORES  = 0x0001
+UNDNAME_NO_MEMBER_TYPE          = 0x0200
+UNDNAME_NO_MS_KEYWORDS          = 0x0002
+UNDNAME_NO_MS_THISTYPE          = 0x0020
+UNDNAME_NO_RETURN_UDT_MODEL     = 0x0400
+UNDNAME_NO_SPECIAL_SYMS         = 0x4000
+UNDNAME_NO_THISTYPE             = 0x0060
+UNDNAME_NO_THROW_SIGNATURES     = 0x0100
+
+#--- IMAGEHLP_MODULE structure and related ------------------------------------
+
+SYMOPT_ALLOW_ABSOLUTE_SYMBOLS       = 0x00000800
+SYMOPT_ALLOW_ZERO_ADDRESS           = 0x01000000
+SYMOPT_AUTO_PUBLICS                 = 0x00010000
+SYMOPT_CASE_INSENSITIVE             = 0x00000001
+SYMOPT_DEBUG                        = 0x80000000
+SYMOPT_DEFERRED_LOADS               = 0x00000004
+SYMOPT_DISABLE_SYMSRV_AUTODETECT    = 0x02000000
+SYMOPT_EXACT_SYMBOLS                = 0x00000400
+SYMOPT_FAIL_CRITICAL_ERRORS         = 0x00000200
+SYMOPT_FAVOR_COMPRESSED             = 0x00800000
+SYMOPT_FLAT_DIRECTORY               = 0x00400000
+SYMOPT_IGNORE_CVREC                 = 0x00000080
+SYMOPT_IGNORE_IMAGEDIR              = 0x00200000
+SYMOPT_IGNORE_NT_SYMPATH            = 0x00001000
+SYMOPT_INCLUDE_32BIT_MODULES        = 0x00002000
+SYMOPT_LOAD_ANYTHING                = 0x00000040
+SYMOPT_LOAD_LINES                   = 0x00000010
+SYMOPT_NO_CPP                       = 0x00000008
+SYMOPT_NO_IMAGE_SEARCH              = 0x00020000
+SYMOPT_NO_PROMPTS                   = 0x00080000
+SYMOPT_NO_PUBLICS                   = 0x00008000
+SYMOPT_NO_UNQUALIFIED_LOADS         = 0x00000100
+SYMOPT_OVERWRITE                    = 0x00100000
+SYMOPT_PUBLICS_ONLY                 = 0x00004000
+SYMOPT_SECURE                       = 0x00040000
+SYMOPT_UNDNAME                      = 0x00000002
+
+##SSRVOPT_DWORD
+##SSRVOPT_DWORDPTR
+##SSRVOPT_GUIDPTR
+##
+##SSRVOPT_CALLBACK
+##SSRVOPT_DOWNSTREAM_STORE
+##SSRVOPT_FLAT_DEFAULT_STORE
+##SSRVOPT_FAVOR_COMPRESSED
+##SSRVOPT_NOCOPY
+##SSRVOPT_OVERWRITE
+##SSRVOPT_PARAMTYPE
+##SSRVOPT_PARENTWIN
+##SSRVOPT_PROXY
+##SSRVOPT_RESET
+##SSRVOPT_SECURE
+##SSRVOPT_SETCONTEXT
+##SSRVOPT_TRACE
+##SSRVOPT_UNATTENDED
+
+#    typedef enum
+#    {
+#        SymNone = 0,
+#        SymCoff,
+#        SymCv,
+#        SymPdb,
+#        SymExport,
+#        SymDeferred,
+#        SymSym,
+#        SymDia,
+#        SymVirtual,
+#        NumSymTypes
+#    } SYM_TYPE;
+SymNone     = 0
+SymCoff     = 1
+SymCv       = 2
+SymPdb      = 3
+SymExport   = 4
+SymDeferred = 5
+SymSym      = 6
+SymDia      = 7
+SymVirtual  = 8
+NumSymTypes = 9
+
+#    typedef struct _IMAGEHLP_MODULE64 {
+#      DWORD    SizeOfStruct;
+#      DWORD64  BaseOfImage;
+#      DWORD    ImageSize;
+#      DWORD    TimeDateStamp;
+#      DWORD    CheckSum;
+#      DWORD    NumSyms;
+#      SYM_TYPE SymType;
+#      TCHAR    ModuleName[32];
+#      TCHAR    ImageName[256];
+#      TCHAR    LoadedImageName[256];
+#      TCHAR    LoadedPdbName[256];
+#      DWORD    CVSig;
+#      TCHAR    CVData[MAX_PATH*3];
+#      DWORD    PdbSig;
+#      GUID     PdbSig70;
+#      DWORD    PdbAge;
+#      BOOL     PdbUnmatched;
+#      BOOL     DbgUnmatched;
+#      BOOL     LineNumbers;
+#      BOOL     GlobalSymbols;
+#      BOOL     TypeInfo;
+#      BOOL     SourceIndexed;
+#      BOOL     Publics;
+#    } IMAGEHLP_MODULE64, *PIMAGEHLP_MODULE64;
+
+class IMAGEHLP_MODULE (Structure):
+    _fields_ = [
+        ("SizeOfStruct",    DWORD),
+        ("BaseOfImage",     DWORD),
+        ("ImageSize",       DWORD),
+        ("TimeDateStamp",   DWORD),
+        ("CheckSum",        DWORD),
+        ("NumSyms",         DWORD),
+        ("SymType",         DWORD),         # SYM_TYPE
+        ("ModuleName",      CHAR * 32),
+        ("ImageName",       CHAR * 256),
+        ("LoadedImageName", CHAR * 256),
+    ]
+PIMAGEHLP_MODULE = POINTER(IMAGEHLP_MODULE)
+
+class IMAGEHLP_MODULE64 (Structure):
+    _fields_ = [
+        ("SizeOfStruct",    DWORD),
+        ("BaseOfImage",     DWORD64),
+        ("ImageSize",       DWORD),
+        ("TimeDateStamp",   DWORD),
+        ("CheckSum",        DWORD),
+        ("NumSyms",         DWORD),
+        ("SymType",         DWORD),         # SYM_TYPE
+        ("ModuleName",      CHAR * 32),
+        ("ImageName",       CHAR * 256),
+        ("LoadedImageName", CHAR * 256),
+        ("LoadedPdbName",   CHAR * 256),
+        ("CVSig",           DWORD),
+        ("CVData",          CHAR * (MAX_PATH * 3)),
+        ("PdbSig",          DWORD),
+        ("PdbSig70",        GUID),
+        ("PdbAge",          DWORD),
+        ("PdbUnmatched",    BOOL),
+        ("DbgUnmatched",    BOOL),
+        ("LineNumbers",     BOOL),
+        ("GlobalSymbols",   BOOL),
+        ("TypeInfo",        BOOL),
+        ("SourceIndexed",   BOOL),
+        ("Publics",         BOOL),
+    ]
+PIMAGEHLP_MODULE64 = POINTER(IMAGEHLP_MODULE64)
+
+class IMAGEHLP_MODULEW (Structure):
+    _fields_ = [
+        ("SizeOfStruct",    DWORD),
+        ("BaseOfImage",     DWORD),
+        ("ImageSize",       DWORD),
+        ("TimeDateStamp",   DWORD),
+        ("CheckSum",        DWORD),
+        ("NumSyms",         DWORD),
+        ("SymType",         DWORD),         # SYM_TYPE
+        ("ModuleName",      WCHAR * 32),
+        ("ImageName",       WCHAR * 256),
+        ("LoadedImageName", WCHAR * 256),
+    ]
+PIMAGEHLP_MODULEW = POINTER(IMAGEHLP_MODULEW)
+
+class IMAGEHLP_MODULEW64 (Structure):
+    _fields_ = [
+        ("SizeOfStruct",    DWORD),
+        ("BaseOfImage",     DWORD64),
+        ("ImageSize",       DWORD),
+        ("TimeDateStamp",   DWORD),
+        ("CheckSum",        DWORD),
+        ("NumSyms",         DWORD),
+        ("SymType",         DWORD),         # SYM_TYPE
+        ("ModuleName",      WCHAR * 32),
+        ("ImageName",       WCHAR * 256),
+        ("LoadedImageName", WCHAR * 256),
+        ("LoadedPdbName",   WCHAR * 256),
+        ("CVSig",           DWORD),
+        ("CVData",          WCHAR * (MAX_PATH * 3)),
+        ("PdbSig",          DWORD),
+        ("PdbSig70",        GUID),
+        ("PdbAge",          DWORD),
+        ("PdbUnmatched",    BOOL),
+        ("DbgUnmatched",    BOOL),
+        ("LineNumbers",     BOOL),
+        ("GlobalSymbols",   BOOL),
+        ("TypeInfo",        BOOL),
+        ("SourceIndexed",   BOOL),
+        ("Publics",         BOOL),
+    ]
+PIMAGEHLP_MODULEW64 = POINTER(IMAGEHLP_MODULEW64)
+
+#--- dbghelp.dll --------------------------------------------------------------
+
+# XXX the ANSI versions of these functions don't end in "A" as expected!
+
+# BOOL WINAPI MakeSureDirectoryPathExists(
+#   _In_  PCSTR DirPath
+# );
+def MakeSureDirectoryPathExistsA(DirPath):
+    _MakeSureDirectoryPathExists = windll.dbghelp.MakeSureDirectoryPathExists
+    _MakeSureDirectoryPathExists.argtypes = [LPSTR]
+    _MakeSureDirectoryPathExists.restype  = bool
+    _MakeSureDirectoryPathExists.errcheck = RaiseIfZero
+    return _MakeSureDirectoryPathExists(DirPath)
+
+MakeSureDirectoryPathExistsW = MakeWideVersion(MakeSureDirectoryPathExistsA)
+MakeSureDirectoryPathExists = GuessStringType(MakeSureDirectoryPathExistsA, MakeSureDirectoryPathExistsW)
+
+# BOOL WINAPI SymInitialize(
+#   __in      HANDLE hProcess,
+#   __in_opt  PCTSTR UserSearchPath,
+#   __in      BOOL fInvadeProcess
+# );
+def SymInitializeA(hProcess, UserSearchPath = None, fInvadeProcess = False):
+    _SymInitialize = windll.dbghelp.SymInitialize
+    _SymInitialize.argtypes = [HANDLE, LPSTR, BOOL]
+    _SymInitialize.restype  = bool
+    _SymInitialize.errcheck = RaiseIfZero
+    if not UserSearchPath:
+        UserSearchPath = None
+    _SymInitialize(hProcess, UserSearchPath, fInvadeProcess)
+
+SymInitializeW = MakeWideVersion(SymInitializeA)
+SymInitialize = GuessStringType(SymInitializeA, SymInitializeW)
+
+# BOOL WINAPI SymCleanup(
+#   __in  HANDLE hProcess
+# );
+def SymCleanup(hProcess):
+    _SymCleanup = windll.dbghelp.SymCleanup
+    _SymCleanup.argtypes = [HANDLE]
+    _SymCleanup.restype  = bool
+    _SymCleanup.errcheck = RaiseIfZero
+    _SymCleanup(hProcess)
+
+# BOOL WINAPI SymRefreshModuleList(
+#   __in  HANDLE hProcess
+# );
+def SymRefreshModuleList(hProcess):
+    _SymRefreshModuleList = windll.dbghelp.SymRefreshModuleList
+    _SymRefreshModuleList.argtypes = [HANDLE]
+    _SymRefreshModuleList.restype  = bool
+    _SymRefreshModuleList.errcheck = RaiseIfZero
+    _SymRefreshModuleList(hProcess)
+
+# BOOL WINAPI SymSetParentWindow(
+#   __in  HWND hwnd
+# );
+def SymSetParentWindow(hwnd):
+    _SymSetParentWindow = windll.dbghelp.SymSetParentWindow
+    _SymSetParentWindow.argtypes = [HWND]
+    _SymSetParentWindow.restype  = bool
+    _SymSetParentWindow.errcheck = RaiseIfZero
+    _SymSetParentWindow(hwnd)
+
+# DWORD WINAPI SymSetOptions(
+#   __in  DWORD SymOptions
+# );
+def SymSetOptions(SymOptions):
+    _SymSetOptions = windll.dbghelp.SymSetOptions
+    _SymSetOptions.argtypes = [DWORD]
+    _SymSetOptions.restype  = DWORD
+    _SymSetOptions.errcheck = RaiseIfZero
+    _SymSetOptions(SymOptions)
+
+# DWORD WINAPI SymGetOptions(void);
+def SymGetOptions():
+    _SymGetOptions = windll.dbghelp.SymGetOptions
+    _SymGetOptions.argtypes = []
+    _SymGetOptions.restype  = DWORD
+    return _SymGetOptions()
+
+# DWORD WINAPI SymLoadModule(
+#   __in      HANDLE hProcess,
+#   __in_opt  HANDLE hFile,
+#   __in_opt  PCSTR ImageName,
+#   __in_opt  PCSTR ModuleName,
+#   __in      DWORD BaseOfDll,
+#   __in      DWORD SizeOfDll
+# );
+def SymLoadModuleA(hProcess, hFile = None, ImageName = None, ModuleName = None, BaseOfDll = None, SizeOfDll = None):
+    _SymLoadModule = windll.dbghelp.SymLoadModule
+    _SymLoadModule.argtypes = [HANDLE, HANDLE, LPSTR, LPSTR, DWORD, DWORD]
+    _SymLoadModule.restype  = DWORD
+
+    if not ImageName:
+        ImageName = None
+    if not ModuleName:
+        ModuleName = None
+    if not BaseOfDll:
+        BaseOfDll = 0
+    if not SizeOfDll:
+        SizeOfDll = 0
+    SetLastError(ERROR_SUCCESS)
+    lpBaseAddress = _SymLoadModule(hProcess, hFile, ImageName, ModuleName, BaseOfDll, SizeOfDll)
+    if lpBaseAddress == NULL:
+        dwErrorCode = GetLastError()
+        if dwErrorCode != ERROR_SUCCESS:
+            raise ctypes.WinError(dwErrorCode)
+    return lpBaseAddress
+
+SymLoadModuleW = MakeWideVersion(SymLoadModuleA)
+SymLoadModule = GuessStringType(SymLoadModuleA, SymLoadModuleW)
+
+# DWORD64 WINAPI SymLoadModule64(
+#   __in      HANDLE hProcess,
+#   __in_opt  HANDLE hFile,
+#   __in_opt  PCSTR ImageName,
+#   __in_opt  PCSTR ModuleName,
+#   __in      DWORD64 BaseOfDll,
+#   __in      DWORD SizeOfDll
+# );
+def SymLoadModule64A(hProcess, hFile = None, ImageName = None, ModuleName = None, BaseOfDll = None, SizeOfDll = None):
+    _SymLoadModule64 = windll.dbghelp.SymLoadModule64
+    _SymLoadModule64.argtypes = [HANDLE, HANDLE, LPSTR, LPSTR, DWORD64, DWORD]
+    _SymLoadModule64.restype  = DWORD64
+
+    if not ImageName:
+        ImageName = None
+    if not ModuleName:
+        ModuleName = None
+    if not BaseOfDll:
+        BaseOfDll = 0
+    if not SizeOfDll:
+        SizeOfDll = 0
+    SetLastError(ERROR_SUCCESS)
+    lpBaseAddress = _SymLoadModule64(hProcess, hFile, ImageName, ModuleName, BaseOfDll, SizeOfDll)
+    if lpBaseAddress == NULL:
+        dwErrorCode = GetLastError()
+        if dwErrorCode != ERROR_SUCCESS:
+            raise ctypes.WinError(dwErrorCode)
+    return lpBaseAddress
+
+SymLoadModule64W = MakeWideVersion(SymLoadModule64A)
+SymLoadModule64 = GuessStringType(SymLoadModule64A, SymLoadModule64W)
+
+# BOOL WINAPI SymUnloadModule(
+#   __in  HANDLE hProcess,
+#   __in  DWORD BaseOfDll
+# );
+def SymUnloadModule(hProcess, BaseOfDll):
+    _SymUnloadModule = windll.dbghelp.SymUnloadModule
+    _SymUnloadModule.argtypes = [HANDLE, DWORD]
+    _SymUnloadModule.restype  = bool
+    _SymUnloadModule.errcheck = RaiseIfZero
+    _SymUnloadModule(hProcess, BaseOfDll)
+
+# BOOL WINAPI SymUnloadModule64(
+#   __in  HANDLE hProcess,
+#   __in  DWORD64 BaseOfDll
+# );
+def SymUnloadModule64(hProcess, BaseOfDll):
+    _SymUnloadModule64 = windll.dbghelp.SymUnloadModule64
+    _SymUnloadModule64.argtypes = [HANDLE, DWORD64]
+    _SymUnloadModule64.restype  = bool
+    _SymUnloadModule64.errcheck = RaiseIfZero
+    _SymUnloadModule64(hProcess, BaseOfDll)
+
+# BOOL WINAPI SymGetModuleInfo(
+#   __in   HANDLE hProcess,
+#   __in   DWORD dwAddr,
+#   __out  PIMAGEHLP_MODULE ModuleInfo
+# );
+def SymGetModuleInfoA(hProcess, dwAddr):
+    _SymGetModuleInfo = windll.dbghelp.SymGetModuleInfo
+    _SymGetModuleInfo.argtypes = [HANDLE, DWORD, PIMAGEHLP_MODULE]
+    _SymGetModuleInfo.restype  = bool
+    _SymGetModuleInfo.errcheck = RaiseIfZero
+
+    ModuleInfo = IMAGEHLP_MODULE()
+    ModuleInfo.SizeOfStruct = sizeof(ModuleInfo)
+    _SymGetModuleInfo(hProcess, dwAddr, byref(ModuleInfo))
+    return ModuleInfo
+
+def SymGetModuleInfoW(hProcess, dwAddr):
+    _SymGetModuleInfoW = windll.dbghelp.SymGetModuleInfoW
+    _SymGetModuleInfoW.argtypes = [HANDLE, DWORD, PIMAGEHLP_MODULEW]
+    _SymGetModuleInfoW.restype  = bool
+    _SymGetModuleInfoW.errcheck = RaiseIfZero
+
+    ModuleInfo = IMAGEHLP_MODULEW()
+    ModuleInfo.SizeOfStruct = sizeof(ModuleInfo)
+    _SymGetModuleInfoW(hProcess, dwAddr, byref(ModuleInfo))
+    return ModuleInfo
+
+SymGetModuleInfo = GuessStringType(SymGetModuleInfoA, SymGetModuleInfoW)
+
+# BOOL WINAPI SymGetModuleInfo64(
+#   __in   HANDLE hProcess,
+#   __in   DWORD64 dwAddr,
+#   __out  PIMAGEHLP_MODULE64 ModuleInfo
+# );
+def SymGetModuleInfo64A(hProcess, dwAddr):
+    _SymGetModuleInfo64 = windll.dbghelp.SymGetModuleInfo64
+    _SymGetModuleInfo64.argtypes = [HANDLE, DWORD64, PIMAGEHLP_MODULE64]
+    _SymGetModuleInfo64.restype  = bool
+    _SymGetModuleInfo64.errcheck = RaiseIfZero
+
+    ModuleInfo = IMAGEHLP_MODULE64()
+    ModuleInfo.SizeOfStruct = sizeof(ModuleInfo)
+    _SymGetModuleInfo64(hProcess, dwAddr, byref(ModuleInfo))
+    return ModuleInfo
+
+def SymGetModuleInfo64W(hProcess, dwAddr):
+    _SymGetModuleInfo64W = windll.dbghelp.SymGetModuleInfo64W
+    _SymGetModuleInfo64W.argtypes = [HANDLE, DWORD64, PIMAGEHLP_MODULE64W]
+    _SymGetModuleInfo64W.restype  = bool
+    _SymGetModuleInfo64W.errcheck = RaiseIfZero
+
+    ModuleInfo = IMAGEHLP_MODULE64W()
+    ModuleInfo.SizeOfStruct = sizeof(ModuleInfo)
+    _SymGetModuleInfo64W(hProcess, dwAddr, byref(ModuleInfo))
+    return ModuleInfo
+
+SymGetModuleInfo64 = GuessStringType(SymGetModuleInfo64A, SymGetModuleInfo64W)
+
+# BOOL CALLBACK SymEnumerateModulesProc(
+#   __in      PCTSTR ModuleName,
+#   __in      DWORD BaseOfDll,
+#   __in_opt  PVOID UserContext
+# );
+PSYM_ENUMMODULES_CALLBACK    = WINFUNCTYPE(BOOL, LPSTR,  DWORD,   PVOID)
+PSYM_ENUMMODULES_CALLBACKW   = WINFUNCTYPE(BOOL, LPWSTR, DWORD,   PVOID)
+
+# BOOL CALLBACK SymEnumerateModulesProc64(
+#   __in      PCTSTR ModuleName,
+#   __in      DWORD64 BaseOfDll,
+#   __in_opt  PVOID UserContext
+# );
+PSYM_ENUMMODULES_CALLBACK64  = WINFUNCTYPE(BOOL, LPSTR,  DWORD64, PVOID)
+PSYM_ENUMMODULES_CALLBACKW64 = WINFUNCTYPE(BOOL, LPWSTR, DWORD64, PVOID)
+
+# BOOL WINAPI SymEnumerateModules(
+#   __in      HANDLE hProcess,
+#   __in      PSYM_ENUMMODULES_CALLBACK EnumModulesCallback,
+#   __in_opt  PVOID UserContext
+# );
+def SymEnumerateModulesA(hProcess, EnumModulesCallback, UserContext = None):
+    _SymEnumerateModules = windll.dbghelp.SymEnumerateModules
+    _SymEnumerateModules.argtypes = [HANDLE, PSYM_ENUMMODULES_CALLBACK, PVOID]
+    _SymEnumerateModules.restype  = bool
+    _SymEnumerateModules.errcheck = RaiseIfZero
+
+    EnumModulesCallback = PSYM_ENUMMODULES_CALLBACK(EnumModulesCallback)
+    if UserContext:
+        UserContext = ctypes.pointer(UserContext)
+    else:
+        UserContext = LPVOID(NULL)
+    _SymEnumerateModules(hProcess, EnumModulesCallback, UserContext)
+
+def SymEnumerateModulesW(hProcess, EnumModulesCallback, UserContext = None):
+    _SymEnumerateModulesW = windll.dbghelp.SymEnumerateModulesW
+    _SymEnumerateModulesW.argtypes = [HANDLE, PSYM_ENUMMODULES_CALLBACKW, PVOID]
+    _SymEnumerateModulesW.restype  = bool
+    _SymEnumerateModulesW.errcheck = RaiseIfZero
+
+    EnumModulesCallback = PSYM_ENUMMODULES_CALLBACKW(EnumModulesCallback)
+    if UserContext:
+        UserContext = ctypes.pointer(UserContext)
+    else:
+        UserContext = LPVOID(NULL)
+    _SymEnumerateModulesW(hProcess, EnumModulesCallback, UserContext)
+
+SymEnumerateModules = GuessStringType(SymEnumerateModulesA, SymEnumerateModulesW)
+
+# BOOL WINAPI SymEnumerateModules64(
+#   __in      HANDLE hProcess,
+#   __in      PSYM_ENUMMODULES_CALLBACK64 EnumModulesCallback,
+#   __in_opt  PVOID UserContext
+# );
+def SymEnumerateModules64A(hProcess, EnumModulesCallback, UserContext = None):
+    _SymEnumerateModules64 = windll.dbghelp.SymEnumerateModules64
+    _SymEnumerateModules64.argtypes = [HANDLE, PSYM_ENUMMODULES_CALLBACK64, PVOID]
+    _SymEnumerateModules64.restype  = bool
+    _SymEnumerateModules64.errcheck = RaiseIfZero
+
+    EnumModulesCallback = PSYM_ENUMMODULES_CALLBACK64(EnumModulesCallback)
+    if UserContext:
+        UserContext = ctypes.pointer(UserContext)
+    else:
+        UserContext = LPVOID(NULL)
+    _SymEnumerateModules64(hProcess, EnumModulesCallback, UserContext)
+
+def SymEnumerateModules64W(hProcess, EnumModulesCallback, UserContext = None):
+    _SymEnumerateModules64W = windll.dbghelp.SymEnumerateModules64W
+    _SymEnumerateModules64W.argtypes = [HANDLE, PSYM_ENUMMODULES_CALLBACK64W, PVOID]
+    _SymEnumerateModules64W.restype  = bool
+    _SymEnumerateModules64W.errcheck = RaiseIfZero
+
+    EnumModulesCallback = PSYM_ENUMMODULES_CALLBACK64W(EnumModulesCallback)
+    if UserContext:
+        UserContext = ctypes.pointer(UserContext)
+    else:
+        UserContext = LPVOID(NULL)
+    _SymEnumerateModules64W(hProcess, EnumModulesCallback, UserContext)
+
+SymEnumerateModules64 = GuessStringType(SymEnumerateModules64A, SymEnumerateModules64W)
+
+# BOOL CALLBACK SymEnumerateSymbolsProc(
+#   __in      PCTSTR SymbolName,
+#   __in      DWORD SymbolAddress,
+#   __in      ULONG SymbolSize,
+#   __in_opt  PVOID UserContext
+# );
+PSYM_ENUMSYMBOLS_CALLBACK    = WINFUNCTYPE(BOOL, LPSTR,  DWORD,   ULONG, PVOID)
+PSYM_ENUMSYMBOLS_CALLBACKW   = WINFUNCTYPE(BOOL, LPWSTR, DWORD,   ULONG, PVOID)
+
+# BOOL CALLBACK SymEnumerateSymbolsProc64(
+#   __in      PCTSTR SymbolName,
+#   __in      DWORD64 SymbolAddress,
+#   __in      ULONG SymbolSize,
+#   __in_opt  PVOID UserContext
+# );
+PSYM_ENUMSYMBOLS_CALLBACK64  = WINFUNCTYPE(BOOL, LPSTR,  DWORD64, ULONG, PVOID)
+PSYM_ENUMSYMBOLS_CALLBACKW64 = WINFUNCTYPE(BOOL, LPWSTR, DWORD64, ULONG, PVOID)
+
+# BOOL WINAPI SymEnumerateSymbols(
+#   __in      HANDLE hProcess,
+#   __in      ULONG BaseOfDll,
+#   __in      PSYM_ENUMSYMBOLS_CALLBACK EnumSymbolsCallback,
+#   __in_opt  PVOID UserContext
+# );
+def SymEnumerateSymbolsA(hProcess, BaseOfDll, EnumSymbolsCallback, UserContext = None):
+    _SymEnumerateSymbols = windll.dbghelp.SymEnumerateSymbols
+    _SymEnumerateSymbols.argtypes = [HANDLE, ULONG, PSYM_ENUMSYMBOLS_CALLBACK, PVOID]
+    _SymEnumerateSymbols.restype  = bool
+    _SymEnumerateSymbols.errcheck = RaiseIfZero
+
+    EnumSymbolsCallback = PSYM_ENUMSYMBOLS_CALLBACK(EnumSymbolsCallback)
+    if UserContext:
+        UserContext = ctypes.pointer(UserContext)
+    else:
+        UserContext = LPVOID(NULL)
+    _SymEnumerateSymbols(hProcess, BaseOfDll, EnumSymbolsCallback, UserContext)
+
+def SymEnumerateSymbolsW(hProcess, BaseOfDll, EnumSymbolsCallback, UserContext = None):
+    _SymEnumerateSymbolsW = windll.dbghelp.SymEnumerateSymbolsW
+    _SymEnumerateSymbolsW.argtypes = [HANDLE, ULONG, PSYM_ENUMSYMBOLS_CALLBACKW, PVOID]
+    _SymEnumerateSymbolsW.restype  = bool
+    _SymEnumerateSymbolsW.errcheck = RaiseIfZero
+
+    EnumSymbolsCallback = PSYM_ENUMSYMBOLS_CALLBACKW(EnumSymbolsCallback)
+    if UserContext:
+        UserContext = ctypes.pointer(UserContext)
+    else:
+        UserContext = LPVOID(NULL)
+    _SymEnumerateSymbolsW(hProcess, BaseOfDll, EnumSymbolsCallback, UserContext)
+
+SymEnumerateSymbols = GuessStringType(SymEnumerateSymbolsA, SymEnumerateSymbolsW)
+
+# BOOL WINAPI SymEnumerateSymbols64(
+#   __in      HANDLE hProcess,
+#   __in      ULONG64 BaseOfDll,
+#   __in      PSYM_ENUMSYMBOLS_CALLBACK64 EnumSymbolsCallback,
+#   __in_opt  PVOID UserContext
+# );
+def SymEnumerateSymbols64A(hProcess, BaseOfDll, EnumSymbolsCallback, UserContext = None):
+    _SymEnumerateSymbols64 = windll.dbghelp.SymEnumerateSymbols64
+    _SymEnumerateSymbols64.argtypes = [HANDLE, ULONG64, PSYM_ENUMSYMBOLS_CALLBACK64, PVOID]
+    _SymEnumerateSymbols64.restype  = bool
+    _SymEnumerateSymbols64.errcheck = RaiseIfZero
+
+    EnumSymbolsCallback = PSYM_ENUMSYMBOLS_CALLBACK64(EnumSymbolsCallback)
+    if UserContext:
+        UserContext = ctypes.pointer(UserContext)
+    else:
+        UserContext = LPVOID(NULL)
+    _SymEnumerateSymbols64(hProcess, BaseOfDll, EnumSymbolsCallback, UserContext)
+
+def SymEnumerateSymbols64W(hProcess, BaseOfDll, EnumSymbolsCallback, UserContext = None):
+    _SymEnumerateSymbols64W = windll.dbghelp.SymEnumerateSymbols64W
+    _SymEnumerateSymbols64W.argtypes = [HANDLE, ULONG64, PSYM_ENUMSYMBOLS_CALLBACK64W, PVOID]
+    _SymEnumerateSymbols64W.restype  = bool
+    _SymEnumerateSymbols64W.errcheck = RaiseIfZero
+
+    EnumSymbolsCallback = PSYM_ENUMSYMBOLS_CALLBACK64W(EnumSymbolsCallback)
+    if UserContext:
+        UserContext = ctypes.pointer(UserContext)
+    else:
+        UserContext = LPVOID(NULL)
+    _SymEnumerateSymbols64W(hProcess, BaseOfDll, EnumSymbolsCallback, UserContext)
+
+SymEnumerateSymbols64 = GuessStringType(SymEnumerateSymbols64A, SymEnumerateSymbols64W)
+
+# DWORD WINAPI UnDecorateSymbolName(
+#   __in   PCTSTR DecoratedName,
+#   __out  PTSTR UnDecoratedName,
+#   __in   DWORD UndecoratedLength,
+#   __in   DWORD Flags
+# );
+def UnDecorateSymbolNameA(DecoratedName, Flags = UNDNAME_COMPLETE):
+    _UnDecorateSymbolNameA = windll.dbghelp.UnDecorateSymbolName
+    _UnDecorateSymbolNameA.argtypes = [LPSTR, LPSTR, DWORD, DWORD]
+    _UnDecorateSymbolNameA.restype  = DWORD
+    _UnDecorateSymbolNameA.errcheck = RaiseIfZero
+
+    UndecoratedLength = _UnDecorateSymbolNameA(DecoratedName, None, 0, Flags)
+    UnDecoratedName = ctypes.create_string_buffer('', UndecoratedLength + 1)
+    _UnDecorateSymbolNameA(DecoratedName, UnDecoratedName, UndecoratedLength, Flags)
+    return UnDecoratedName.value
+
+def UnDecorateSymbolNameW(DecoratedName, Flags = UNDNAME_COMPLETE):
+    _UnDecorateSymbolNameW = windll.dbghelp.UnDecorateSymbolNameW
+    _UnDecorateSymbolNameW.argtypes = [LPWSTR, LPWSTR, DWORD, DWORD]
+    _UnDecorateSymbolNameW.restype  = DWORD
+    _UnDecorateSymbolNameW.errcheck = RaiseIfZero
+
+    UndecoratedLength = _UnDecorateSymbolNameW(DecoratedName, None, 0, Flags)
+    UnDecoratedName = ctypes.create_unicode_buffer(u'', UndecoratedLength + 1)
+    _UnDecorateSymbolNameW(DecoratedName, UnDecoratedName, UndecoratedLength, Flags)
+    return UnDecoratedName.value
+
+UnDecorateSymbolName = GuessStringType(UnDecorateSymbolNameA, UnDecorateSymbolNameW)
+
+# BOOL WINAPI SymGetSearchPath(
+#   __in   HANDLE hProcess,
+#   __out  PTSTR SearchPath,
+#   __in   DWORD SearchPathLength
+# );
+def SymGetSearchPathA(hProcess):
+    _SymGetSearchPath = windll.dbghelp.SymGetSearchPath
+    _SymGetSearchPath.argtypes = [HANDLE, LPSTR, DWORD]
+    _SymGetSearchPath.restype  = bool
+    _SymGetSearchPath.errcheck = RaiseIfZero
+
+    SearchPathLength = MAX_PATH
+    SearchPath = ctypes.create_string_buffer("", SearchPathLength)
+    _SymGetSearchPath(hProcess, SearchPath, SearchPathLength)
+    return SearchPath.value
+
+def SymGetSearchPathW(hProcess):
+    _SymGetSearchPathW = windll.dbghelp.SymGetSearchPathW
+    _SymGetSearchPathW.argtypes = [HANDLE, LPWSTR, DWORD]
+    _SymGetSearchPathW.restype  = bool
+    _SymGetSearchPathW.errcheck = RaiseIfZero
+
+    SearchPathLength = MAX_PATH
+    SearchPath = ctypes.create_unicode_buffer(u"", SearchPathLength)
+    _SymGetSearchPathW(hProcess, SearchPath, SearchPathLength)
+    return SearchPath.value
+
+SymGetSearchPath = GuessStringType(SymGetSearchPathA, SymGetSearchPathW)
+
+# BOOL WINAPI SymSetSearchPath(
+#   __in      HANDLE hProcess,
+#   __in_opt  PCTSTR SearchPath
+# );
+def SymSetSearchPathA(hProcess, SearchPath = None):
+    _SymSetSearchPath = windll.dbghelp.SymSetSearchPath
+    _SymSetSearchPath.argtypes = [HANDLE, LPSTR]
+    _SymSetSearchPath.restype  = bool
+    _SymSetSearchPath.errcheck = RaiseIfZero
+    if not SearchPath:
+        SearchPath = None
+    _SymSetSearchPath(hProcess, SearchPath)
+
+def SymSetSearchPathW(hProcess, SearchPath = None):
+    _SymSetSearchPathW = windll.dbghelp.SymSetSearchPathW
+    _SymSetSearchPathW.argtypes = [HANDLE, LPWSTR]
+    _SymSetSearchPathW.restype  = bool
+    _SymSetSearchPathW.errcheck = RaiseIfZero
+    if not SearchPath:
+        SearchPath = None
+    _SymSetSearchPathW(hProcess, SearchPath)
+
+SymSetSearchPath = GuessStringType(SymSetSearchPathA, SymSetSearchPathW)
+
+# PTCHAR WINAPI SymGetHomeDirectory(
+#   __in   DWORD type,
+#   __out  PTSTR dir,
+#   __in   size_t size
+# );
+def SymGetHomeDirectoryA(type):
+    _SymGetHomeDirectoryA = windll.dbghelp.SymGetHomeDirectoryA
+    _SymGetHomeDirectoryA.argtypes = [DWORD, LPSTR, SIZE_T]
+    _SymGetHomeDirectoryA.restype  = LPSTR
+    _SymGetHomeDirectoryA.errcheck = RaiseIfZero
+
+    size = MAX_PATH
+    dir  = ctypes.create_string_buffer("", size)
+    _SymGetHomeDirectoryA(type, dir, size)
+    return dir.value
+
+def SymGetHomeDirectoryW(type):
+    _SymGetHomeDirectoryW = windll.dbghelp.SymGetHomeDirectoryW
+    _SymGetHomeDirectoryW.argtypes = [DWORD, LPWSTR, SIZE_T]
+    _SymGetHomeDirectoryW.restype  = LPWSTR
+    _SymGetHomeDirectoryW.errcheck = RaiseIfZero
+
+    size = MAX_PATH
+    dir  = ctypes.create_unicode_buffer(u"", size)
+    _SymGetHomeDirectoryW(type, dir, size)
+    return dir.value
+
+SymGetHomeDirectory = GuessStringType(SymGetHomeDirectoryA, SymGetHomeDirectoryW)
+
+# PTCHAR WINAPI SymSetHomeDirectory(
+#   __in      HANDLE hProcess,
+#   __in_opt  PCTSTR dir
+# );
+def SymSetHomeDirectoryA(hProcess, dir = None):
+    _SymSetHomeDirectoryA = windll.dbghelp.SymSetHomeDirectoryA
+    _SymSetHomeDirectoryA.argtypes = [HANDLE, LPSTR]
+    _SymSetHomeDirectoryA.restype  = LPSTR
+    _SymSetHomeDirectoryA.errcheck = RaiseIfZero
+    if not dir:
+        dir = None
+    _SymSetHomeDirectoryA(hProcess, dir)
+    return dir
+
+def SymSetHomeDirectoryW(hProcess, dir = None):
+    _SymSetHomeDirectoryW = windll.dbghelp.SymSetHomeDirectoryW
+    _SymSetHomeDirectoryW.argtypes = [HANDLE, LPWSTR]
+    _SymSetHomeDirectoryW.restype  = LPWSTR
+    _SymSetHomeDirectoryW.errcheck = RaiseIfZero
+    if not dir:
+        dir = None
+    _SymSetHomeDirectoryW(hProcess, dir)
+    return dir
+
+SymSetHomeDirectory = GuessStringType(SymSetHomeDirectoryA, SymSetHomeDirectoryW)
+
+#--- DbgHelp 5+ support, patch by Neitsa --------------------------------------
+
+# XXX TODO
+# + use the GuessStringType decorator for ANSI/Wide versions
+# + replace hardcoded struct sizes with sizeof() calls
+# + StackWalk64 should raise on error, but something has to be done about it
+#   not setting the last error code (maybe we should call SetLastError
+#   ourselves with a default error code?)
+# /Mario
+
+#maximum length of a symbol name
+MAX_SYM_NAME = 2000
+
+class SYM_INFO(Structure):
+    _fields_ = [
+        ("SizeOfStruct",    ULONG),
+        ("TypeIndex",       ULONG),
+        ("Reserved",        ULONG64 * 2),
+        ("Index",           ULONG),
+        ("Size",            ULONG),
+        ("ModBase",         ULONG64),
+        ("Flags",           ULONG),
+        ("Value",           ULONG64),
+        ("Address",         ULONG64),
+        ("Register",        ULONG),
+        ("Scope",           ULONG),
+        ("Tag",             ULONG),
+        ("NameLen",         ULONG),
+        ("MaxNameLen",      ULONG),
+        ("Name",            CHAR * (MAX_SYM_NAME + 1)),
+    ]
+PSYM_INFO = POINTER(SYM_INFO)
+
+class SYM_INFOW(Structure):
+    _fields_ = [
+        ("SizeOfStruct",    ULONG),
+        ("TypeIndex",       ULONG),
+        ("Reserved",        ULONG64 * 2),
+        ("Index",           ULONG),
+        ("Size",            ULONG),
+        ("ModBase",         ULONG64),
+        ("Flags",           ULONG),
+        ("Value",           ULONG64),
+        ("Address",         ULONG64),
+        ("Register",        ULONG),
+        ("Scope",           ULONG),
+        ("Tag",             ULONG),
+        ("NameLen",         ULONG),
+        ("MaxNameLen",      ULONG),
+        ("Name",            WCHAR * (MAX_SYM_NAME + 1)),
+    ]
+PSYM_INFOW = POINTER(SYM_INFOW)
+
+#===============================================================================
+# BOOL WINAPI SymFromName(
+#  __in     HANDLE hProcess,
+#  __in     PCTSTR Name,
+#  __inout  PSYMBOL_INFO Symbol
+# );
+#===============================================================================
+def SymFromName(hProcess, Name):
+    _SymFromNameA = windll.dbghelp.SymFromName
+    _SymFromNameA.argtypes = [HANDLE, LPSTR, PSYM_INFO]
+    _SymFromNameA.restype = bool
+    _SymFromNameA.errcheck = RaiseIfZero
+
+    SymInfo = SYM_INFO()
+    SymInfo.SizeOfStruct = 88 # *don't modify*: sizeof(SYMBOL_INFO) in C.
+    SymInfo.MaxNameLen = MAX_SYM_NAME
+
+    _SymFromNameA(hProcess, Name, byref(SymInfo))
+
+    return SymInfo
+
+def SymFromNameW(hProcess, Name):
+    _SymFromNameW = windll.dbghelp.SymFromNameW
+    _SymFromNameW.argtypes = [HANDLE, LPWSTR, PSYM_INFOW]
+    _SymFromNameW.restype = bool
+    _SymFromNameW.errcheck = RaiseIfZero
+
+    SymInfo = SYM_INFOW()
+    SymInfo.SizeOfStruct = 88 # *don't modify*: sizeof(SYMBOL_INFOW) in C.
+    SymInfo.MaxNameLen = MAX_SYM_NAME
+
+    _SymFromNameW(hProcess, Name, byref(SymInfo))
+
+    return SymInfo
+
+#===============================================================================
+# BOOL WINAPI SymFromAddr(
+#  __in       HANDLE hProcess,
+#  __in       DWORD64 Address,
+#  __out_opt  PDWORD64 Displacement,
+#  __inout    PSYMBOL_INFO Symbol
+# );
+#===============================================================================
+def SymFromAddr(hProcess, Address):
+    _SymFromAddr = windll.dbghelp.SymFromAddr
+    _SymFromAddr.argtypes = [HANDLE, DWORD64, PDWORD64, PSYM_INFO]
+    _SymFromAddr.restype = bool
+    _SymFromAddr.errcheck = RaiseIfZero
+
+    SymInfo = SYM_INFO()
+    SymInfo.SizeOfStruct = 88 # *don't modify*: sizeof(SYMBOL_INFO) in C.
+    SymInfo.MaxNameLen = MAX_SYM_NAME
+
+    Displacement = DWORD64(0)
+    _SymFromAddr(hProcess, Address, byref(Displacement), byref(SymInfo))
+
+    return (Displacement.value, SymInfo)
+
+def SymFromAddrW(hProcess, Address):
+    _SymFromAddr = windll.dbghelp.SymFromAddrW
+    _SymFromAddr.argtypes = [HANDLE, DWORD64, PDWORD64, PSYM_INFOW]
+    _SymFromAddr.restype = bool
+    _SymFromAddr.errcheck = RaiseIfZero
+
+    SymInfo = SYM_INFOW()
+    SymInfo.SizeOfStruct = 88 # *don't modify*: sizeof(SYMBOL_INFOW) in C.
+    SymInfo.MaxNameLen = MAX_SYM_NAME
+
+    Displacement = DWORD64(0)
+    _SymFromAddr(hProcess, Address, byref(Displacement), byref(SymInfo))
+
+    return (Displacement.value, SymInfo)
+
+#===============================================================================
+# typedef struct _IMAGEHLP_SYMBOL64 {
+#  DWORD   SizeOfStruct;
+#  DWORD64 Address;
+#  DWORD   Size;
+#  DWORD   Flags;
+#  DWORD   MaxNameLength;
+#  CHAR   Name[1];
+# } IMAGEHLP_SYMBOL64, *PIMAGEHLP_SYMBOL64;
+#===============================================================================
+class IMAGEHLP_SYMBOL64 (Structure):
+    _fields_ = [
+        ("SizeOfStruct",    DWORD),
+        ("Address",         DWORD64),
+        ("Size",            DWORD),
+        ("Flags",           DWORD),
+        ("MaxNameLength",   DWORD),
+        ("Name",            CHAR * (MAX_SYM_NAME + 1)),
+    ]
+PIMAGEHLP_SYMBOL64 = POINTER(IMAGEHLP_SYMBOL64)
+
+#===============================================================================
+# typedef struct _IMAGEHLP_SYMBOLW64 {
+#  DWORD   SizeOfStruct;
+#  DWORD64 Address;
+#  DWORD   Size;
+#  DWORD   Flags;
+#  DWORD   MaxNameLength;
+#  WCHAR   Name[1];
+# } IMAGEHLP_SYMBOLW64, *PIMAGEHLP_SYMBOLW64;
+#===============================================================================
+class IMAGEHLP_SYMBOLW64 (Structure):
+    _fields_ = [
+        ("SizeOfStruct",    DWORD),
+        ("Address",         DWORD64),
+        ("Size",            DWORD),
+        ("Flags",           DWORD),
+        ("MaxNameLength",   DWORD),
+        ("Name",            WCHAR * (MAX_SYM_NAME + 1)),
+    ]
+PIMAGEHLP_SYMBOLW64 = POINTER(IMAGEHLP_SYMBOLW64)
+
+#===============================================================================
+# BOOL WINAPI SymGetSymFromAddr64(
+#  __in       HANDLE hProcess,
+#  __in       DWORD64 Address,
+#  __out_opt  PDWORD64 Displacement,
+#  __inout    PIMAGEHLP_SYMBOL64 Symbol
+# );
+#===============================================================================
+def SymGetSymFromAddr64(hProcess, Address):
+    _SymGetSymFromAddr64 = windll.dbghelp.SymGetSymFromAddr64
+    _SymGetSymFromAddr64.argtypes = [HANDLE, DWORD64, PDWORD64, PIMAGEHLP_SYMBOL64]
+    _SymGetSymFromAddr64.restype = bool
+    _SymGetSymFromAddr64.errcheck = RaiseIfZero
+
+    imagehlp_symbol64 = IMAGEHLP_SYMBOL64()
+    imagehlp_symbol64.SizeOfStruct = 32 # *don't modify*: sizeof(IMAGEHLP_SYMBOL64) in C.
+    imagehlp_symbol64.MaxNameLen = MAX_SYM_NAME
+
+    Displacement = DWORD64(0)
+    _SymGetSymFromAddr64(hProcess, Address, byref(Displacement), byref(imagehlp_symbol64))
+
+    return (Displacement.value, imagehlp_symbol64)
+
+#TODO: check for the 'W' version of SymGetSymFromAddr64()
+
+#===============================================================================
+# typedef struct _IMAGEHLP_LINE64 {
+#  DWORD   SizeOfStruct;
+#  PVOID   Key;
+#  DWORD   LineNumber;
+#  PSTR    FileName;
+#  DWORD64 Address;
+# } IMAGEHLP_LINE64, *PIMAGEHLP_LINE64;
+#===============================================================================
+class IMAGEHLP_LINE64 (Structure):
+    _fields_ = [
+        ("SizeOfStruct",    DWORD),
+        ("Key",             PVOID),
+        ("LineNumber",      DWORD),
+        ("FileName",        PSTR),
+        ("Address",         DWORD64),
+    ]
+PIMAGEHLP_LINE64 = POINTER(IMAGEHLP_LINE64)
+
+#===============================================================================
+# BOOL WINAPI SymGetLineFromAddr64(
+#  __in  HANDLE           hProcess,
+#  __in  DWORD64          dwAddr,
+#  __out PDWORD           pdwDisplacement,
+#  __out PIMAGEHLP_LINE64 Line
+# );
+#===============================================================================
+def SymGetLineFromAddr64(hProcess, dwAddr):
+    _SymGetLineFromAddr64 = windll.dbghelp.SymGetLineFromAddr64
+    _SymGetLineFromAddr64.argtypes = [HANDLE, DWORD64, PDWORD, PIMAGEHLP_LINE64]
+    _SymGetLineFromAddr64.restype = bool
+    _SymGetLineFromAddr64.errcheck = RaiseIfZero
+
+    imagehlp_line64 = IMAGEHLP_LINE64()
+    imagehlp_line64.SizeOfStruct = 32 # *don't modify*: sizeof(IMAGEHLP_LINE64) in C.
+
+    pdwDisplacement = DWORD(0)
+    _SymGetLineFromAddr64(hProcess, dwAddr, byref(pdwDisplacement), byref(imagehlp_line64))
+
+    return (pdwDisplacement.value, imagehlp_line64)
+
+#TODO: check for the 'W' version of SymGetLineFromAddr64()
+
+#===============================================================================
+# typedef struct API_VERSION {
+#  USHORT MajorVersion;
+#  USHORT MinorVersion;
+#  USHORT Revision;
+#  USHORT Reserved;
+# } API_VERSION, *LPAPI_VERSION;
+#===============================================================================
+class API_VERSION (Structure):
+    _fields_ = [
+        ("MajorVersion",    USHORT),
+        ("MinorVersion",    USHORT),
+        ("Revision",        USHORT),
+        ("Reserved",        USHORT),
+    ]
+PAPI_VERSION = POINTER(API_VERSION)
+LPAPI_VERSION = PAPI_VERSION
+
+#===============================================================================
+# LPAPI_VERSION WINAPI ImagehlpApiVersion(void);
+#===============================================================================
+def ImagehlpApiVersion():
+    _ImagehlpApiVersion = windll.dbghelp.ImagehlpApiVersion
+    _ImagehlpApiVersion.restype = LPAPI_VERSION
+
+    api_version = _ImagehlpApiVersion()
+    return api_version.contents
+
+
+#===============================================================================
+# LPAPI_VERSION WINAPI ImagehlpApiVersionEx(
+#  __in  LPAPI_VERSION AppVersion
+# );
+#===============================================================================
+def ImagehlpApiVersionEx(MajorVersion, MinorVersion, Revision):
+    _ImagehlpApiVersionEx = windll.dbghelp.ImagehlpApiVersionEx
+    _ImagehlpApiVersionEx.argtypes = [LPAPI_VERSION]
+    _ImagehlpApiVersionEx.restype = LPAPI_VERSION
+
+    api_version = API_VERSION(MajorVersion, MinorVersion, Revision, 0)
+
+    ret_api_version = _ImagehlpApiVersionEx(byref(api_version))
+
+    return ret_api_version.contents
+
+#===============================================================================
+# typedef enum {
+#     AddrMode1616,
+#     AddrMode1632,
+#     AddrModeReal,
+#     AddrModeFlat
+# } ADDRESS_MODE;
+#===============================================================================
+AddrMode1616 = 0
+AddrMode1632 = 1
+AddrModeReal = 2
+AddrModeFlat = 3
+
+ADDRESS_MODE = DWORD #needed for the size of an ADDRESS_MODE (see ADDRESS64)
+
+#===============================================================================
+# typedef struct _tagADDRESS64 {
+#  DWORD64      Offset;
+#  WORD         Segment;
+#  ADDRESS_MODE Mode;
+# } ADDRESS64, *LPADDRESS64;
+#===============================================================================
+class ADDRESS64 (Structure):
+    _fields_ = [
+        ("Offset",      DWORD64),
+        ("Segment",     WORD),
+        ("Mode",        ADDRESS_MODE),  #it's a member of the ADDRESS_MODE enum.
+    ]
+LPADDRESS64 = POINTER(ADDRESS64)
+
+#===============================================================================
+# typedef struct _KDHELP64 {
+#    DWORD64   Thread;
+#    DWORD   ThCallbackStack;
+#    DWORD   ThCallbackBStore;
+#    DWORD   NextCallback;
+#    DWORD   FramePointer;
+#    DWORD64   KiCallUserMode;
+#    DWORD64   KeUserCallbackDispatcher;
+#    DWORD64   SystemRangeStart;
+#    DWORD64   KiUserExceptionDispatcher;
+#    DWORD64   StackBase;
+#    DWORD64   StackLimit;
+#    DWORD64   Reserved[5];
+# } KDHELP64, *PKDHELP64;
+#===============================================================================
+class KDHELP64 (Structure):
+    _fields_ = [
+        ("Thread",              DWORD64),
+        ("ThCallbackStack",     DWORD),
+        ("ThCallbackBStore",    DWORD),
+        ("NextCallback",        DWORD),
+        ("FramePointer",        DWORD),
+        ("KiCallUserMode",      DWORD64),
+        ("KeUserCallbackDispatcher",    DWORD64),
+        ("SystemRangeStart",    DWORD64),
+        ("KiUserExceptionDispatcher",   DWORD64),
+        ("StackBase",           DWORD64),
+        ("StackLimit",          DWORD64),
+        ("Reserved",            DWORD64 * 5),
+    ]
+PKDHELP64 = POINTER(KDHELP64)
+
+#===============================================================================
+# typedef struct _tagSTACKFRAME64 {
+#  ADDRESS64 AddrPC;
+#  ADDRESS64 AddrReturn;
+#  ADDRESS64 AddrFrame;
+#  ADDRESS64 AddrStack;
+#  ADDRESS64 AddrBStore;
+#  PVOID     FuncTableEntry;
+#  DWORD64   Params[4];
+#  BOOL      Far;
+#  BOOL      Virtual;
+#  DWORD64   Reserved[3];
+#  KDHELP64  KdHelp;
+# } STACKFRAME64, *LPSTACKFRAME64;
+#===============================================================================
+class STACKFRAME64(Structure):
+    _fields_ = [
+        ("AddrPC",          ADDRESS64),
+        ("AddrReturn",      ADDRESS64),
+        ("AddrFrame",       ADDRESS64),
+        ("AddrStack",       ADDRESS64),
+        ("AddrBStore",      ADDRESS64),
+        ("FuncTableEntry",  PVOID),
+        ("Params",          DWORD64 * 4),
+        ("Far",             BOOL),
+        ("Virtual",         BOOL),
+        ("Reserved",        DWORD64 * 3),
+        ("KdHelp",          KDHELP64),
+    ]
+LPSTACKFRAME64 = POINTER(STACKFRAME64)
+
+#===============================================================================
+# BOOL CALLBACK ReadProcessMemoryProc64(
+#  __in   HANDLE hProcess,
+#  __in   DWORD64 lpBaseAddress,
+#  __out  PVOID lpBuffer,
+#  __in   DWORD nSize,
+#  __out  LPDWORD lpNumberOfBytesRead
+# );
+#===============================================================================
+PREAD_PROCESS_MEMORY_ROUTINE64 = WINFUNCTYPE(BOOL, HANDLE, DWORD64, PVOID, DWORD, LPDWORD)
+
+#===============================================================================
+# PVOID CALLBACK FunctionTableAccessProc64(
+#  __in  HANDLE hProcess,
+#  __in  DWORD64 AddrBase
+# );
+#===============================================================================
+PFUNCTION_TABLE_ACCESS_ROUTINE64 = WINFUNCTYPE(PVOID, HANDLE, DWORD64)
+
+#===============================================================================
+# DWORD64 CALLBACK GetModuleBaseProc64(
+#  __in  HANDLE hProcess,
+#  __in  DWORD64 Address
+# );
+#===============================================================================
+PGET_MODULE_BASE_ROUTINE64 = WINFUNCTYPE(DWORD64, HANDLE, DWORD64)
+
+#===============================================================================
+# DWORD64 CALLBACK GetModuleBaseProc64(
+#  __in  HANDLE hProcess,
+#  __in  DWORD64 Address
+# );
+#===============================================================================
+PTRANSLATE_ADDRESS_ROUTINE64 = WINFUNCTYPE(DWORD64, HANDLE, DWORD64)
+
+# Valid machine types for StackWalk64 function
+IMAGE_FILE_MACHINE_I386 = 0x014c    #Intel x86
+IMAGE_FILE_MACHINE_IA64 = 0x0200    #Intel Itanium Processor Family (IPF)
+IMAGE_FILE_MACHINE_AMD64 = 0x8664   #x64 (AMD64 or EM64T)
+
+#===============================================================================
+# BOOL WINAPI StackWalk64(
+#  __in      DWORD MachineType,
+#  __in      HANDLE hProcess,
+#  __in      HANDLE hThread,
+#  __inout   LPSTACKFRAME64 StackFrame,
+#  __inout   PVOID ContextRecord,
+#  __in_opt  PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,
+#  __in_opt  PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,
+#  __in_opt  PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,
+#  __in_opt  PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress
+# );
+#===============================================================================
+def StackWalk64(MachineType, hProcess, hThread, StackFrame, ContextRecord,
+                ReadMemoryRoutine = None,
+                FunctionTableAccessRoutine = None, GetModuleBaseRoutine = None,
+                TranslateAddress = None):
+
+    _StackWalk64 = windll.dbghelp.StackWalk64
+    _StackWalk64.argtypes = [DWORD, HANDLE, HANDLE, LPSTACKFRAME64, PVOID,
+                             PREAD_PROCESS_MEMORY_ROUTINE64,
+                             PFUNCTION_TABLE_ACCESS_ROUTINE64,
+                             PGET_MODULE_BASE_ROUTINE64,
+                             PTRANSLATE_ADDRESS_ROUTINE64]
+    _StackWalk64.restype = bool
+
+    pReadMemoryRoutine = None
+    if ReadMemoryRoutine:
+        pReadMemoryRoutine = PREAD_PROCESS_MEMORY_ROUTINE64(ReadMemoryRoutine)
+    else:
+        pReadMemoryRoutine = ctypes.cast(None, PREAD_PROCESS_MEMORY_ROUTINE64)
+
+    pFunctionTableAccessRoutine = None
+    if FunctionTableAccessRoutine:
+        pFunctionTableAccessRoutine = PFUNCTION_TABLE_ACCESS_ROUTINE64(FunctionTableAccessRoutine)
+    else:
+        pFunctionTableAccessRoutine = ctypes.cast(None, PFUNCTION_TABLE_ACCESS_ROUTINE64)
+
+    pGetModuleBaseRoutine = None
+    if GetModuleBaseRoutine:
+        pGetModuleBaseRoutine = PGET_MODULE_BASE_ROUTINE64(GetModuleBaseRoutine)
+    else:
+        pGetModuleBaseRoutine = ctypes.cast(None, PGET_MODULE_BASE_ROUTINE64)
+
+    pTranslateAddress = None
+    if TranslateAddress:
+        pTranslateAddress =  PTRANSLATE_ADDRESS_ROUTINE64(TranslateAddress)
+    else:
+        pTranslateAddress = ctypes.cast(None, PTRANSLATE_ADDRESS_ROUTINE64)
+
+    if ContextRecord is None:
+        raise ValueError("ContextRecord cannot be None")
+    try:
+        pContextRecord = PCONTEXT(ContextRecord)
+    except:
+        pContextRecord = PWOW64_CONTEXT(ContextRecord)
+
+    #this function *DOESN'T* set last error [GetLastError()] properly most of the time.
+    ret = _StackWalk64(MachineType, hProcess, hThread, byref(StackFrame),
+                       pContextRecord, pReadMemoryRoutine,
+                       pFunctionTableAccessRoutine, pGetModuleBaseRoutine,
+                       pTranslateAddress)
+
+    return ret
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/defines.py b/scripts/win32/defines.py
new file mode 100644
index 0000000..d2b1418
--- /dev/null
+++ b/scripts/win32/defines.py
@@ -0,0 +1,741 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Common definitions.
+"""
+
+# TODO
+# + add TCHAR and related types?
+
+import ctypes
+import functools
+from builtins import str as text
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+# Cygwin compatibility.
+try:
+    WindowsError
+except NameError:
+    _gle = None
+    class WindowsError(OSError):
+        def __init__(self, *args, **kwargs):
+            OSError.__init__(self, *args, **kwargs)
+            global _gle
+            if _gle is None:
+                from kernel32 import GetLastError as _gle
+            self.winerror = _gle()
+
+    from os import getenv as _real_getenv
+    def getenv(key, default=None):
+        value = _real_getenv(key, None)
+        if value is None:
+            value = _real_getenv(key.upper(), default)
+        return value
+
+#------------------------------------------------------------------------------
+
+# Some stuff from ctypes we'll be using very frequently.
+addressof   = ctypes.addressof
+sizeof      = ctypes.sizeof
+SIZEOF      = ctypes.sizeof
+POINTER     = ctypes.POINTER
+WINFUNCTYPE = ctypes.WINFUNCTYPE
+windll      = ctypes.windll
+
+# Automatically disable padding of structs and unions on 32 bits.
+class Structure(ctypes.Structure):
+    if sizeof(ctypes.c_void_p) == 4:
+        _pack_ = 1
+class Union(ctypes.Union):
+    if sizeof(ctypes.c_void_p) == 4:
+        _pack_ = 1
+
+# The IronPython implementation of byref() was giving some problems,
+# so it's best to replace it with the slower pointer() function.
+try:
+    ctypes.c_void_p(ctypes.byref(ctypes.c_char()))  # this fails in IronPython
+    byref = ctypes.byref
+except TypeError:
+    byref = ctypes.pointer
+
+# XXX DEBUG
+# The following code can be enabled to make the Win32 API wrappers log to
+# standard output the dll and function names, the parameter values and the
+# return value for each call.
+
+##WIN32_VERBOSE_MODE = True
+WIN32_VERBOSE_MODE = False
+
+if WIN32_VERBOSE_MODE:
+
+    class WinDllHook(object):
+        def __getattr__(self, name):
+            if name.startswith('_'):
+                return object.__getattr__(self, name)
+            return WinFuncHook(name)
+
+    class WinFuncHook(object):
+        def __init__(self, name):
+            self.__name = name
+
+        def __getattr__(self, name):
+            if name.startswith('_'):
+                return object.__getattr__(self, name)
+            return WinCallHook(self.__name, name)
+
+    class WinCallHook(object):
+        def __init__(self, dllname, funcname):
+            self.__dllname = dllname
+            self.__funcname = funcname
+            self.__func = getattr(getattr(ctypes.windll, dllname), funcname)
+
+        def __copy_attribute(self, attribute):
+            try:
+                value = getattr(self, attribute)
+                setattr(self.__func, attribute, value)
+            except AttributeError:
+                try:
+                    delattr(self.__func, attribute)
+                except AttributeError:
+                    pass
+
+        def __call__(self, *argv):
+            self.__copy_attribute('argtypes')
+            self.__copy_attribute('restype')
+            self.__copy_attribute('errcheck')
+            print ("-"*10)
+            print ("%s ! %s %r" % (self.__dllname, self.__funcname, argv))
+            retval = self.__func(*argv)
+            print ("== %r" % (retval,))
+            return retval
+
+    windll = WinDllHook()
+
+#------------------------------------------------------------------------------
+
+def RaiseIfZero(result, func = None, arguments = ()):
+    """
+    Error checking for most Win32 API calls.
+
+    The function is assumed to return an integer, which is C{0} on error.
+    In that case the C{WindowsError} exception is raised.
+    """
+    if not result:
+        raise ctypes.WinError()
+    return result
+
+def RaiseIfNotZero(result, func = None, arguments = ()):
+    """
+    Error checking for some odd Win32 API calls.
+
+    The function is assumed to return an integer, which is zero on success.
+    If the return value is nonzero the C{WindowsError} exception is raised.
+
+    This is mostly useful for free() like functions, where the return value is
+    the pointer to the memory block on failure or a C{NULL} pointer on success.
+    """
+    if result:
+        raise ctypes.WinError()
+    return result
+
+def RaiseIfNotErrorSuccess(result, func = None, arguments = ()):
+    """
+    Error checking for Win32 Registry API calls.
+
+    The function is assumed to return a Win32 error code. If the code is not
+    C{ERROR_SUCCESS} then a C{WindowsError} exception is raised.
+    """
+    if result != ERROR_SUCCESS:
+        raise ctypes.WinError(result)
+    return result
+
+class GuessStringType(object):
+    """
+    Decorator that guesses the correct version (A or W) to call
+    based on the types of the strings passed as parameters.
+
+    Calls the B{ANSI} version if the only string types are ANSI.
+
+    Calls the B{Unicode} version if Unicode or mixed string types are passed.
+
+    The default if no string arguments are passed depends on the value of the
+    L{t_default} class variable.
+
+    @type fn_ansi: function
+    @ivar fn_ansi: ANSI version of the API function to call.
+    @type fn_unicode: function
+    @ivar fn_unicode: Unicode (wide) version of the API function to call.
+
+    @type t_default: type
+    @cvar t_default: Default string type to use.
+        Possible values are:
+         - type('') for ANSI
+         - type(u'') for Unicode
+    """
+
+    # ANSI and Unicode types
+    t_ansi    = type('')
+    t_unicode = type(u'')
+
+    # Default is ANSI for Python 2.x
+    t_default = t_ansi
+
+    def __init__(self, fn_ansi, fn_unicode):
+        """
+        @type  fn_ansi: function
+        @param fn_ansi: ANSI version of the API function to call.
+        @type  fn_unicode: function
+        @param fn_unicode: Unicode (wide) version of the API function to call.
+        """
+        self.fn_ansi    = fn_ansi
+        self.fn_unicode = fn_unicode
+
+        # Copy the wrapped function attributes.
+        try:
+            self.__name__ = self.fn_ansi.__name__[:-1]  # remove the A or W
+        except AttributeError:
+            pass
+        try:
+            self.__module__ = self.fn_ansi.__module__
+        except AttributeError:
+            pass
+        try:
+            self.__doc__ = self.fn_ansi.__doc__
+        except AttributeError:
+            pass
+
+    def __call__(self, *argv, **argd):
+
+        # Shortcut to self.t_ansi
+        t_ansi    = self.t_ansi
+
+        # Get the types of all arguments for the function
+        v_types   = [ type(item) for item in argv ]
+        v_types.extend( [ type(value) for (key, value) in argd.items() ] )
+
+        # Get the appropriate function for the default type
+        if self.t_default == t_ansi:
+            fn = self.fn_ansi
+        else:
+            fn = self.fn_unicode
+
+        # If at least one argument is a Unicode string...
+        if self.t_unicode in v_types:
+
+            # If al least one argument is an ANSI string,
+            # convert all ANSI strings to Unicode
+            if t_ansi in v_types:
+                argv = list(argv)
+                for index in range(len(argv)):
+                    if v_types[index] == t_ansi:
+                        argv[index] = text(argv[index])
+                for (key, value) in argd.items():
+                    if type(value) == t_ansi:
+                        argd[key] = text(value)
+
+            # Use the W version
+            fn = self.fn_unicode
+
+        # If at least one argument is an ANSI string,
+        # but there are no Unicode strings...
+        elif t_ansi in v_types:
+
+            # Use the A version
+            fn = self.fn_ansi
+
+        # Call the function and return the result
+        return fn(*argv, **argd)
+
+class DefaultStringType(object):
+    """
+    Decorator that uses the default version (A or W) to call
+    based on the configuration of the L{GuessStringType} decorator.
+
+    @see: L{GuessStringType.t_default}
+
+    @type fn_ansi: function
+    @ivar fn_ansi: ANSI version of the API function to call.
+    @type fn_unicode: function
+    @ivar fn_unicode: Unicode (wide) version of the API function to call.
+    """
+
+    def __init__(self, fn_ansi, fn_unicode):
+        """
+        @type  fn_ansi: function
+        @param fn_ansi: ANSI version of the API function to call.
+        @type  fn_unicode: function
+        @param fn_unicode: Unicode (wide) version of the API function to call.
+        """
+        self.fn_ansi    = fn_ansi
+        self.fn_unicode = fn_unicode
+
+        # Copy the wrapped function attributes.
+        try:
+            self.__name__ = self.fn_ansi.__name__[:-1]  # remove the A or W
+        except AttributeError:
+            pass
+        try:
+            self.__module__ = self.fn_ansi.__module__
+        except AttributeError:
+            pass
+        try:
+            self.__doc__ = self.fn_ansi.__doc__
+        except AttributeError:
+            pass
+
+    def __call__(self, *argv, **argd):
+
+        # Get the appropriate function based on the default.
+        if GuessStringType.t_default == GuessStringType.t_ansi:
+            fn = self.fn_ansi
+        else:
+            fn = self.fn_unicode
+
+        # Call the function and return the result
+        return fn(*argv, **argd)
+
+def MakeANSIVersion(fn):
+    """
+    Decorator that generates an ANSI version of a Unicode (wide) only API call.
+
+    @type  fn: callable
+    @param fn: Unicode (wide) version of the API function to call.
+    """
+    @functools.wraps(fn)
+    def wrapper(*argv, **argd):
+        t_ansi    = GuessStringType.t_ansi
+        t_unicode = GuessStringType.t_unicode
+        v_types   = [ type(item) for item in argv ]
+        v_types.extend( [ type(value) for (key, value) in argd.items() ] )
+        if t_ansi in v_types:
+            argv = list(argv)
+            for index in range(len(argv)):
+                if v_types[index] == t_ansi:
+                    argv[index] = t_unicode(argv[index])
+            for key, value in argd.items():
+                if type(value) == t_ansi:
+                    argd[key] = t_unicode(value)
+        return fn(*argv, **argd)
+    return wrapper
+
+def MakeWideVersion(fn):
+    """
+    Decorator that generates a Unicode (wide) version of an ANSI only API call.
+
+    @type  fn: callable
+    @param fn: ANSI version of the API function to call.
+    """
+    @functools.wraps(fn)
+    def wrapper(*argv, **argd):
+        t_ansi    = GuessStringType.t_ansi
+        t_unicode = GuessStringType.t_unicode
+        v_types   = [ type(item) for item in argv ]
+        v_types.extend( [ type(value) for (key, value) in argd.items() ] )
+        if t_unicode in v_types:
+            argv = list(argv)
+            for index in range(len(argv)):
+                if v_types[index] == t_unicode:
+                    argv[index] = t_ansi(argv[index])
+            for key, value in argd.items():
+                if type(value) == t_unicode:
+                    argd[key] = t_ansi(value)
+        return fn(*argv, **argd)
+    return wrapper
+
+#--- Types --------------------------------------------------------------------
+# http://msdn.microsoft.com/en-us/library/aa383751(v=vs.85).aspx
+
+# Map of basic C types to Win32 types
+LPVOID      = ctypes.c_void_p
+CHAR        = ctypes.c_char
+WCHAR       = ctypes.c_wchar
+BYTE        = ctypes.c_ubyte
+SBYTE       = ctypes.c_byte
+WORD        = ctypes.c_uint16
+SWORD       = ctypes.c_int16
+DWORD       = ctypes.c_uint32
+SDWORD      = ctypes.c_int32
+QWORD       = ctypes.c_uint64
+SQWORD      = ctypes.c_int64
+SHORT       = ctypes.c_int16
+USHORT      = ctypes.c_uint16
+INT         = ctypes.c_int32
+UINT        = ctypes.c_uint32
+LONG        = ctypes.c_int32
+ULONG       = ctypes.c_uint32
+LONGLONG    = ctypes.c_int64        # c_longlong
+ULONGLONG   = ctypes.c_uint64       # c_ulonglong
+LPSTR       = ctypes.c_char_p
+LPWSTR      = ctypes.c_wchar_p
+INT8        = ctypes.c_int8
+INT16       = ctypes.c_int16
+INT32       = ctypes.c_int32
+INT64       = ctypes.c_int64
+UINT8       = ctypes.c_uint8
+UINT16      = ctypes.c_uint16
+UINT32      = ctypes.c_uint32
+UINT64      = ctypes.c_uint64
+LONG32      = ctypes.c_int32
+LONG64      = ctypes.c_int64
+ULONG32     = ctypes.c_uint32
+ULONG64     = ctypes.c_uint64
+DWORD32     = ctypes.c_uint32
+DWORD64     = ctypes.c_uint64
+BOOL        = ctypes.c_int32
+FLOAT       = ctypes.c_float        # not sure on cygwin
+DOUBLE      = ctypes.c_double       # not sure on cygwin
+
+# Map size_t to SIZE_T
+try:
+    SIZE_T  = ctypes.c_size_t
+    SSIZE_T = ctypes.c_ssize_t
+except AttributeError:
+    # Size of a pointer
+    SIZE_T  = {1:BYTE, 2:WORD, 4:DWORD, 8:QWORD}[sizeof(LPVOID)]
+    SSIZE_T = {1:SBYTE, 2:SWORD, 4:SDWORD, 8:SQWORD}[sizeof(LPVOID)]
+PSIZE_T     = POINTER(SIZE_T)
+
+# Not really pointers but pointer-sized integers
+DWORD_PTR   = SIZE_T
+ULONG_PTR   = SIZE_T
+LONG_PTR    = SIZE_T
+
+# Other Win32 types, more may be added as needed
+PVOID       = LPVOID
+PPVOID      = POINTER(PVOID)
+PSTR        = LPSTR
+PWSTR       = LPWSTR
+PCHAR       = LPSTR
+PWCHAR      = LPWSTR
+LPBYTE      = POINTER(BYTE)
+LPSBYTE     = POINTER(SBYTE)
+LPWORD      = POINTER(WORD)
+LPSWORD     = POINTER(SWORD)
+LPDWORD     = POINTER(DWORD)
+LPSDWORD    = POINTER(SDWORD)
+LPULONG     = POINTER(ULONG)
+LPLONG      = POINTER(LONG)
+PDWORD      = LPDWORD
+PDWORD_PTR  = POINTER(DWORD_PTR)
+PULONG      = LPULONG
+PLONG       = LPLONG
+CCHAR       = CHAR
+BOOLEAN     = BYTE
+PBOOL       = POINTER(BOOL)
+LPBOOL      = PBOOL
+TCHAR       = CHAR      # XXX ANSI by default?
+UCHAR       = BYTE
+DWORDLONG   = ULONGLONG
+LPDWORD32   = POINTER(DWORD32)
+LPULONG32   = POINTER(ULONG32)
+LPDWORD64   = POINTER(DWORD64)
+LPULONG64   = POINTER(ULONG64)
+PDWORD32    = LPDWORD32
+PULONG32    = LPULONG32
+PDWORD64    = LPDWORD64
+PULONG64    = LPULONG64
+ATOM        = WORD
+HANDLE      = LPVOID
+PHANDLE     = POINTER(HANDLE)
+LPHANDLE    = PHANDLE
+HMODULE     = HANDLE
+HINSTANCE   = HANDLE
+HTASK       = HANDLE
+HKEY        = HANDLE
+PHKEY       = POINTER(HKEY)
+HDESK       = HANDLE
+HRSRC       = HANDLE
+HSTR        = HANDLE
+HWINSTA     = HANDLE
+HKL         = HANDLE
+HDWP        = HANDLE
+HFILE       = HANDLE
+HRESULT     = LONG
+HGLOBAL     = HANDLE
+HLOCAL      = HANDLE
+HGDIOBJ     = HANDLE
+HDC         = HGDIOBJ
+HRGN        = HGDIOBJ
+HBITMAP     = HGDIOBJ
+HPALETTE    = HGDIOBJ
+HPEN        = HGDIOBJ
+HBRUSH      = HGDIOBJ
+HMF         = HGDIOBJ
+HEMF        = HGDIOBJ
+HENHMETAFILE = HGDIOBJ
+HMETAFILE   = HGDIOBJ
+HMETAFILEPICT = HGDIOBJ
+HWND        = HANDLE
+NTSTATUS    = LONG
+PNTSTATUS   = POINTER(NTSTATUS)
+KAFFINITY   = ULONG_PTR
+RVA         = DWORD
+RVA64       = QWORD
+WPARAM      = DWORD
+LPARAM      = LPVOID
+LRESULT     = LPVOID
+ACCESS_MASK = DWORD
+REGSAM      = ACCESS_MASK
+PACCESS_MASK = POINTER(ACCESS_MASK)
+PREGSAM     = POINTER(REGSAM)
+
+# Since the SID is an opaque structure, let's treat its pointers as void*
+PSID = PVOID
+
+# typedef union _LARGE_INTEGER {
+#   struct {
+#     DWORD LowPart;
+#     LONG HighPart;
+#   } ;
+#   struct {
+#     DWORD LowPart;
+#     LONG HighPart;
+#   } u;
+#   LONGLONG QuadPart;
+# } LARGE_INTEGER,
+#  *PLARGE_INTEGER;
+
+# XXX TODO
+
+# typedef struct _FLOAT128 {
+#     __int64 LowPart;
+#     __int64 HighPart;
+# } FLOAT128;
+class FLOAT128 (Structure):
+    _fields_ = [
+        ("LowPart",     QWORD),
+        ("HighPart",    QWORD),
+    ]
+PFLOAT128 = POINTER(FLOAT128)
+
+# typedef struct DECLSPEC_ALIGN(16) _M128A {
+#     ULONGLONG Low;
+#     LONGLONG High;
+# } M128A, *PM128A;
+class M128A(Structure):
+    _fields_ = [
+        ("Low",     ULONGLONG),
+        ("High",    LONGLONG),
+    ]
+PM128A = POINTER(M128A)
+
+#--- Constants ----------------------------------------------------------------
+
+NULL        = None
+INFINITE    = -1
+TRUE        = 1
+FALSE       = 0
+
+# http://blogs.msdn.com/oldnewthing/archive/2004/08/26/220873.aspx
+ANYSIZE_ARRAY = 1
+
+# Invalid handle value is -1 casted to void pointer.
+try:
+    INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value #-1 #0xFFFFFFFF
+except TypeError:
+    if sizeof(ctypes.c_void_p) == 4:
+        INVALID_HANDLE_VALUE = 0xFFFFFFFF
+    elif sizeof(ctypes.c_void_p) == 8:
+        INVALID_HANDLE_VALUE = 0xFFFFFFFFFFFFFFFF
+    else:
+        raise
+
+MAX_MODULE_NAME32   = 255
+MAX_PATH            = 260
+
+# Error codes
+# TODO maybe add more error codes?
+# if they're too many they could be pickled instead,
+# or at the very least put in a new file
+ERROR_SUCCESS                       = 0
+ERROR_INVALID_FUNCTION              = 1
+ERROR_FILE_NOT_FOUND                = 2
+ERROR_PATH_NOT_FOUND                = 3
+ERROR_ACCESS_DENIED                 = 5
+ERROR_INVALID_HANDLE                = 6
+ERROR_NOT_ENOUGH_MEMORY             = 8
+ERROR_INVALID_DRIVE                 = 15
+ERROR_NO_MORE_FILES                 = 18
+ERROR_BAD_LENGTH                    = 24
+ERROR_HANDLE_EOF                    = 38
+ERROR_HANDLE_DISK_FULL              = 39
+ERROR_NOT_SUPPORTED                 = 50
+ERROR_FILE_EXISTS                   = 80
+ERROR_INVALID_PARAMETER             = 87
+ERROR_BUFFER_OVERFLOW               = 111
+ERROR_DISK_FULL                     = 112
+ERROR_CALL_NOT_IMPLEMENTED          = 120
+ERROR_SEM_TIMEOUT                   = 121
+ERROR_INSUFFICIENT_BUFFER           = 122
+ERROR_INVALID_NAME                  = 123
+ERROR_MOD_NOT_FOUND                 = 126
+ERROR_PROC_NOT_FOUND                = 127
+ERROR_DIR_NOT_EMPTY                 = 145
+ERROR_BAD_THREADID_ADDR             = 159
+ERROR_BAD_ARGUMENTS                 = 160
+ERROR_BAD_PATHNAME                  = 161
+ERROR_ALREADY_EXISTS                = 183
+ERROR_INVALID_FLAG_NUMBER           = 186
+ERROR_ENVVAR_NOT_FOUND              = 203
+ERROR_FILENAME_EXCED_RANGE          = 206
+ERROR_MORE_DATA                     = 234
+
+WAIT_TIMEOUT                        = 258
+
+ERROR_NO_MORE_ITEMS                 = 259
+ERROR_PARTIAL_COPY                  = 299
+ERROR_INVALID_ADDRESS               = 487
+ERROR_THREAD_NOT_IN_PROCESS         = 566
+ERROR_CONTROL_C_EXIT                = 572
+ERROR_UNHANDLED_EXCEPTION           = 574
+ERROR_ASSERTION_FAILURE             = 668
+ERROR_WOW_ASSERTION                 = 670
+
+ERROR_DBG_EXCEPTION_NOT_HANDLED     = 688
+ERROR_DBG_REPLY_LATER               = 689
+ERROR_DBG_UNABLE_TO_PROVIDE_HANDLE  = 690
+ERROR_DBG_TERMINATE_THREAD          = 691
+ERROR_DBG_TERMINATE_PROCESS         = 692
+ERROR_DBG_CONTROL_C                 = 693
+ERROR_DBG_PRINTEXCEPTION_C          = 694
+ERROR_DBG_RIPEXCEPTION              = 695
+ERROR_DBG_CONTROL_BREAK             = 696
+ERROR_DBG_COMMAND_EXCEPTION         = 697
+ERROR_DBG_EXCEPTION_HANDLED         = 766
+ERROR_DBG_CONTINUE                  = 767
+
+ERROR_ELEVATION_REQUIRED            = 740
+ERROR_NOACCESS                      = 998
+
+ERROR_CIRCULAR_DEPENDENCY           = 1059
+ERROR_SERVICE_DOES_NOT_EXIST        = 1060
+ERROR_SERVICE_CANNOT_ACCEPT_CTRL    = 1061
+ERROR_SERVICE_NOT_ACTIVE            = 1062
+ERROR_FAILED_SERVICE_CONTROLLER_CONNECT = 1063
+ERROR_EXCEPTION_IN_SERVICE          = 1064
+ERROR_DATABASE_DOES_NOT_EXIST       = 1065
+ERROR_SERVICE_SPECIFIC_ERROR        = 1066
+ERROR_PROCESS_ABORTED               = 1067
+ERROR_SERVICE_DEPENDENCY_FAIL       = 1068
+ERROR_SERVICE_LOGON_FAILED          = 1069
+ERROR_SERVICE_START_HANG            = 1070
+ERROR_INVALID_SERVICE_LOCK          = 1071
+ERROR_SERVICE_MARKED_FOR_DELETE     = 1072
+ERROR_SERVICE_EXISTS                = 1073
+ERROR_ALREADY_RUNNING_LKG           = 1074
+ERROR_SERVICE_DEPENDENCY_DELETED    = 1075
+ERROR_BOOT_ALREADY_ACCEPTED         = 1076
+ERROR_SERVICE_NEVER_STARTED         = 1077
+ERROR_DUPLICATE_SERVICE_NAME        = 1078
+ERROR_DIFFERENT_SERVICE_ACCOUNT     = 1079
+ERROR_CANNOT_DETECT_DRIVER_FAILURE  = 1080
+ERROR_CANNOT_DETECT_PROCESS_ABORT   = 1081
+ERROR_NO_RECOVERY_PROGRAM           = 1082
+ERROR_SERVICE_NOT_IN_EXE            = 1083
+ERROR_NOT_SAFEBOOT_SERVICE          = 1084
+
+ERROR_DEBUGGER_INACTIVE             = 1284
+
+ERROR_PRIVILEGE_NOT_HELD            = 1314
+
+ERROR_NONE_MAPPED                   = 1332
+
+RPC_S_SERVER_UNAVAILABLE            = 1722
+
+# Standard access rights
+DELETE                           = 0x00010000
+READ_CONTROL                     = 0x00020000
+WRITE_DAC                        = 0x00040000
+WRITE_OWNER                      = 0x00080000
+SYNCHRONIZE                      = 0x00100000
+STANDARD_RIGHTS_REQUIRED         = 0x000F0000
+STANDARD_RIGHTS_READ             = READ_CONTROL
+STANDARD_RIGHTS_WRITE            = READ_CONTROL
+STANDARD_RIGHTS_EXECUTE          = READ_CONTROL
+STANDARD_RIGHTS_ALL              = 0x001F0000
+SPECIFIC_RIGHTS_ALL              = 0x0000FFFF
+
+#--- Structures ---------------------------------------------------------------
+
+# typedef struct _LSA_UNICODE_STRING {
+#   USHORT Length;
+#   USHORT MaximumLength;
+#   PWSTR Buffer;
+# } LSA_UNICODE_STRING,
+#  *PLSA_UNICODE_STRING,
+#  UNICODE_STRING,
+#  *PUNICODE_STRING;
+class UNICODE_STRING(Structure):
+    _fields_ = [
+        ("Length",          USHORT),
+        ("MaximumLength",   USHORT),
+        ("Buffer",          PVOID),
+    ]
+
+# From MSDN:
+#
+# typedef struct _GUID {
+#   DWORD Data1;
+#   WORD Data2;
+#   WORD Data3;
+#   BYTE Data4[8];
+# } GUID;
+class GUID(Structure):
+    _fields_ = [
+        ("Data1",   DWORD),
+        ("Data2",   WORD),
+        ("Data3",   WORD),
+        ("Data4",   BYTE * 8),
+]
+
+# From MSDN:
+#
+# typedef struct _LIST_ENTRY {
+#     struct _LIST_ENTRY *Flink;
+#     struct _LIST_ENTRY *Blink;
+# } LIST_ENTRY, *PLIST_ENTRY, *RESTRICTED_POINTER PRLIST_ENTRY;
+class LIST_ENTRY(Structure):
+    _fields_ = [
+        ("Flink",   PVOID),     # POINTER(LIST_ENTRY)
+        ("Blink",   PVOID),     # POINTER(LIST_ENTRY)
+]
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+##__all__ = [_x for _x in _all if not _x.startswith('_')]
+##__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/gdi32.py b/scripts/win32/gdi32.py
new file mode 100644
index 0000000..8670f2d
--- /dev/null
+++ b/scripts/win32/gdi32.py
@@ -0,0 +1,362 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for gdi32.dll in ctypes.
+"""
+
+from .defines import *  # NOQA
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- Helpers ------------------------------------------------------------------
+
+#--- Types --------------------------------------------------------------------
+
+#--- Constants ----------------------------------------------------------------
+
+# GDI object types
+OBJ_PEN             = 1
+OBJ_BRUSH           = 2
+OBJ_DC              = 3
+OBJ_METADC          = 4
+OBJ_PAL             = 5
+OBJ_FONT            = 6
+OBJ_BITMAP          = 7
+OBJ_REGION          = 8
+OBJ_METAFILE        = 9
+OBJ_MEMDC           = 10
+OBJ_EXTPEN          = 11
+OBJ_ENHMETADC       = 12
+OBJ_ENHMETAFILE     = 13
+OBJ_COLORSPACE      = 14
+GDI_OBJ_LAST        = OBJ_COLORSPACE
+
+# Ternary raster operations
+SRCCOPY         = 0x00CC0020 # dest = source
+SRCPAINT        = 0x00EE0086 # dest = source OR dest
+SRCAND          = 0x008800C6 # dest = source AND dest
+SRCINVERT       = 0x00660046 # dest = source XOR dest
+SRCERASE        = 0x00440328 # dest = source AND (NOT dest)
+NOTSRCCOPY      = 0x00330008 # dest = (NOT source)
+NOTSRCERASE     = 0x001100A6 # dest = (NOT src) AND (NOT dest)
+MERGECOPY       = 0x00C000CA # dest = (source AND pattern)
+MERGEPAINT      = 0x00BB0226 # dest = (NOT source) OR dest
+PATCOPY         = 0x00F00021 # dest = pattern
+PATPAINT        = 0x00FB0A09 # dest = DPSnoo
+PATINVERT       = 0x005A0049 # dest = pattern XOR dest
+DSTINVERT       = 0x00550009 # dest = (NOT dest)
+BLACKNESS       = 0x00000042 # dest = BLACK
+WHITENESS       = 0x00FF0062 # dest = WHITE
+NOMIRRORBITMAP  = 0x80000000 # Do not Mirror the bitmap in this call
+CAPTUREBLT      = 0x40000000 # Include layered windows
+
+# Region flags
+ERROR               = 0
+NULLREGION          = 1
+SIMPLEREGION        = 2
+COMPLEXREGION       = 3
+RGN_ERROR           = ERROR
+
+# CombineRgn() styles
+RGN_AND             = 1
+RGN_OR              = 2
+RGN_XOR             = 3
+RGN_DIFF            = 4
+RGN_COPY            = 5
+RGN_MIN             = RGN_AND
+RGN_MAX             = RGN_COPY
+
+# StretchBlt() modes
+BLACKONWHITE        = 1
+WHITEONBLACK        = 2
+COLORONCOLOR        = 3
+HALFTONE            = 4
+MAXSTRETCHBLTMODE   = 4
+STRETCH_ANDSCANS    = BLACKONWHITE
+STRETCH_ORSCANS     = WHITEONBLACK
+STRETCH_DELETESCANS = COLORONCOLOR
+STRETCH_HALFTONE    = HALFTONE
+
+# PolyFill() modes
+ALTERNATE       = 1
+WINDING         = 2
+POLYFILL_LAST   = 2
+
+# Layout orientation options
+LAYOUT_RTL                         = 0x00000001 # Right to left
+LAYOUT_BTT                         = 0x00000002 # Bottom to top
+LAYOUT_VBH                         = 0x00000004 # Vertical before horizontal
+LAYOUT_ORIENTATIONMASK             = LAYOUT_RTL + LAYOUT_BTT + LAYOUT_VBH
+LAYOUT_BITMAPORIENTATIONPRESERVED  = 0x00000008
+
+# Stock objects
+WHITE_BRUSH         = 0
+LTGRAY_BRUSH        = 1
+GRAY_BRUSH          = 2
+DKGRAY_BRUSH        = 3
+BLACK_BRUSH         = 4
+NULL_BRUSH          = 5
+HOLLOW_BRUSH        = NULL_BRUSH
+WHITE_PEN           = 6
+BLACK_PEN           = 7
+NULL_PEN            = 8
+OEM_FIXED_FONT      = 10
+ANSI_FIXED_FONT     = 11
+ANSI_VAR_FONT       = 12
+SYSTEM_FONT         = 13
+DEVICE_DEFAULT_FONT = 14
+DEFAULT_PALETTE     = 15
+SYSTEM_FIXED_FONT   = 16
+
+# Metafile functions
+META_SETBKCOLOR              = 0x0201
+META_SETBKMODE               = 0x0102
+META_SETMAPMODE              = 0x0103
+META_SETROP2                 = 0x0104
+META_SETRELABS               = 0x0105
+META_SETPOLYFILLMODE         = 0x0106
+META_SETSTRETCHBLTMODE       = 0x0107
+META_SETTEXTCHAREXTRA        = 0x0108
+META_SETTEXTCOLOR            = 0x0209
+META_SETTEXTJUSTIFICATION    = 0x020A
+META_SETWINDOWORG            = 0x020B
+META_SETWINDOWEXT            = 0x020C
+META_SETVIEWPORTORG          = 0x020D
+META_SETVIEWPORTEXT          = 0x020E
+META_OFFSETWINDOWORG         = 0x020F
+META_SCALEWINDOWEXT          = 0x0410
+META_OFFSETVIEWPORTORG       = 0x0211
+META_SCALEVIEWPORTEXT        = 0x0412
+META_LINETO                  = 0x0213
+META_MOVETO                  = 0x0214
+META_EXCLUDECLIPRECT         = 0x0415
+META_INTERSECTCLIPRECT       = 0x0416
+META_ARC                     = 0x0817
+META_ELLIPSE                 = 0x0418
+META_FLOODFILL               = 0x0419
+META_PIE                     = 0x081A
+META_RECTANGLE               = 0x041B
+META_ROUNDRECT               = 0x061C
+META_PATBLT                  = 0x061D
+META_SAVEDC                  = 0x001E
+META_SETPIXEL                = 0x041F
+META_OFFSETCLIPRGN           = 0x0220
+META_TEXTOUT                 = 0x0521
+META_BITBLT                  = 0x0922
+META_STRETCHBLT              = 0x0B23
+META_POLYGON                 = 0x0324
+META_POLYLINE                = 0x0325
+META_ESCAPE                  = 0x0626
+META_RESTOREDC               = 0x0127
+META_FILLREGION              = 0x0228
+META_FRAMEREGION             = 0x0429
+META_INVERTREGION            = 0x012A
+META_PAINTREGION             = 0x012B
+META_SELECTCLIPREGION        = 0x012C
+META_SELECTOBJECT            = 0x012D
+META_SETTEXTALIGN            = 0x012E
+META_CHORD                   = 0x0830
+META_SETMAPPERFLAGS          = 0x0231
+META_EXTTEXTOUT              = 0x0a32
+META_SETDIBTODEV             = 0x0d33
+META_SELECTPALETTE           = 0x0234
+META_REALIZEPALETTE          = 0x0035
+META_ANIMATEPALETTE          = 0x0436
+META_SETPALENTRIES           = 0x0037
+META_POLYPOLYGON             = 0x0538
+META_RESIZEPALETTE           = 0x0139
+META_DIBBITBLT               = 0x0940
+META_DIBSTRETCHBLT           = 0x0b41
+META_DIBCREATEPATTERNBRUSH   = 0x0142
+META_STRETCHDIB              = 0x0f43
+META_EXTFLOODFILL            = 0x0548
+META_SETLAYOUT               = 0x0149
+META_DELETEOBJECT            = 0x01f0
+META_CREATEPALETTE           = 0x00f7
+META_CREATEPATTERNBRUSH      = 0x01F9
+META_CREATEPENINDIRECT       = 0x02FA
+META_CREATEFONTINDIRECT      = 0x02FB
+META_CREATEBRUSHINDIRECT     = 0x02FC
+META_CREATEREGION            = 0x06FF
+
+# Metafile escape codes
+NEWFRAME                     = 1
+ABORTDOC                     = 2
+NEXTBAND                     = 3
+SETCOLORTABLE                = 4
+GETCOLORTABLE                = 5
+FLUSHOUTPUT                  = 6
+DRAFTMODE                    = 7
+QUERYESCSUPPORT              = 8
+SETABORTPROC                 = 9
+STARTDOC                     = 10
+ENDDOC                       = 11
+GETPHYSPAGESIZE              = 12
+GETPRINTINGOFFSET            = 13
+GETSCALINGFACTOR             = 14
+MFCOMMENT                    = 15
+GETPENWIDTH                  = 16
+SETCOPYCOUNT                 = 17
+SELECTPAPERSOURCE            = 18
+DEVICEDATA                   = 19
+PASSTHROUGH                  = 19
+GETTECHNOLGY                 = 20
+GETTECHNOLOGY                = 20
+SETLINECAP                   = 21
+SETLINEJOIN                  = 22
+SETMITERLIMIT                = 23
+BANDINFO                     = 24
+DRAWPATTERNRECT              = 25
+GETVECTORPENSIZE             = 26
+GETVECTORBRUSHSIZE           = 27
+ENABLEDUPLEX                 = 28
+GETSETPAPERBINS              = 29
+GETSETPRINTORIENT            = 30
+ENUMPAPERBINS                = 31
+SETDIBSCALING                = 32
+EPSPRINTING                  = 33
+ENUMPAPERMETRICS             = 34
+GETSETPAPERMETRICS           = 35
+POSTSCRIPT_DATA              = 37
+POSTSCRIPT_IGNORE            = 38
+MOUSETRAILS                  = 39
+GETDEVICEUNITS               = 42
+GETEXTENDEDTEXTMETRICS       = 256
+GETEXTENTTABLE               = 257
+GETPAIRKERNTABLE             = 258
+GETTRACKKERNTABLE            = 259
+EXTTEXTOUT                   = 512
+GETFACENAME                  = 513
+DOWNLOADFACE                 = 514
+ENABLERELATIVEWIDTHS         = 768
+ENABLEPAIRKERNING            = 769
+SETKERNTRACK                 = 770
+SETALLJUSTVALUES             = 771
+SETCHARSET                   = 772
+STRETCHBLT                   = 2048
+METAFILE_DRIVER              = 2049
+GETSETSCREENPARAMS           = 3072
+QUERYDIBSUPPORT              = 3073
+BEGIN_PATH                   = 4096
+CLIP_TO_PATH                 = 4097
+END_PATH                     = 4098
+EXT_DEVICE_CAPS              = 4099
+RESTORE_CTM                  = 4100
+SAVE_CTM                     = 4101
+SET_ARC_DIRECTION            = 4102
+SET_BACKGROUND_COLOR         = 4103
+SET_POLY_MODE                = 4104
+SET_SCREEN_ANGLE             = 4105
+SET_SPREAD                   = 4106
+TRANSFORM_CTM                = 4107
+SET_CLIP_BOX                 = 4108
+SET_BOUNDS                   = 4109
+SET_MIRROR_MODE              = 4110
+OPENCHANNEL                  = 4110
+DOWNLOADHEADER               = 4111
+CLOSECHANNEL                 = 4112
+POSTSCRIPT_PASSTHROUGH       = 4115
+ENCAPSULATED_POSTSCRIPT      = 4116
+POSTSCRIPT_IDENTIFY          = 4117
+POSTSCRIPT_INJECTION         = 4118
+CHECKJPEGFORMAT              = 4119
+CHECKPNGFORMAT               = 4120
+GET_PS_FEATURESETTING        = 4121
+GDIPLUS_TS_QUERYVER          = 4122
+GDIPLUS_TS_RECORD            = 4123
+SPCLPASSTHROUGH2             = 4568
+
+#--- Structures ---------------------------------------------------------------
+
+# typedef struct _RECT {
+#   LONG left;
+#   LONG top;
+#   LONG right;
+#   LONG bottom;
+# }RECT, *PRECT;
+class RECT(Structure):
+    _fields_ = [
+        ('left',    LONG),
+        ('top',     LONG),
+        ('right',   LONG),
+        ('bottom',  LONG),
+    ]
+PRECT  = POINTER(RECT)
+LPRECT = PRECT
+
+# typedef struct tagPOINT {
+#   LONG x;
+#   LONG y;
+# } POINT;
+class POINT(Structure):
+    _fields_ = [
+        ('x',   LONG),
+        ('y',   LONG),
+    ]
+PPOINT  = POINTER(POINT)
+LPPOINT = PPOINT
+
+# typedef struct tagBITMAP {
+#   LONG   bmType;
+#   LONG   bmWidth;
+#   LONG   bmHeight;
+#   LONG   bmWidthBytes;
+#   WORD   bmPlanes;
+#   WORD   bmBitsPixel;
+#   LPVOID bmBits;
+# } BITMAP, *PBITMAP;
+class BITMAP(Structure):
+    _fields_ = [
+        ("bmType",          LONG),
+        ("bmWidth",         LONG),
+        ("bmHeight",        LONG),
+        ("bmWidthBytes",    LONG),
+        ("bmPlanes",        WORD),
+        ("bmBitsPixel",     WORD),
+        ("bmBits",          LPVOID),
+    ]
+PBITMAP  = POINTER(BITMAP)
+LPBITMAP = PBITMAP
+
+#--- High level classes -------------------------------------------------------
+
+#--- gdi32.dll ----------------------------------------------------------------
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/kernel32.py b/scripts/win32/kernel32.py
new file mode 100644
index 0000000..4d7e2a8
--- /dev/null
+++ b/scripts/win32/kernel32.py
@@ -0,0 +1,4741 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for kernel32.dll in ctypes.
+"""
+
+import warnings
+
+from builtins import str as text
+
+from .defines import *  # NOQA
+
+from . import context_i386  # NOQA
+from . import context_amd64  # NOQA
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+_all.add('version')
+#==============================================================================
+
+from .version import *
+
+#------------------------------------------------------------------------------
+
+# This can't be defined in defines.py because it calls GetLastError().
+def RaiseIfLastError(result, func = None, arguments = ()):
+    """
+    Error checking for Win32 API calls with no error-specific return value.
+
+    Regardless of the return value, the function calls GetLastError(). If the
+    code is not C{ERROR_SUCCESS} then a C{WindowsError} exception is raised.
+
+    For this to work, the user MUST call SetLastError(ERROR_SUCCESS) prior to
+    calling the API. Otherwise an exception may be raised even on success,
+    since most API calls don't clear the error status code.
+    """
+    code = GetLastError()
+    if code != ERROR_SUCCESS:
+        raise ctypes.WinError(code)
+    return result
+
+#--- CONTEXT structure and constants ------------------------------------------
+
+ContextArchMask = 0x0FFF0000    # just guessing here! seems to work, though
+
+if   arch == ARCH_I386:
+    from .context_i386 import *
+elif arch == ARCH_AMD64:
+    if bits == 64:
+        from .context_amd64 import *
+    else:
+        from .context_i386 import *
+else:
+    warnings.warn("Unknown or unsupported architecture: %s" % arch)
+
+#--- Constants ----------------------------------------------------------------
+
+STILL_ACTIVE = 259
+
+WAIT_TIMEOUT        = 0x102
+WAIT_FAILED         = -1
+WAIT_OBJECT_0       = 0
+
+EXCEPTION_NONCONTINUABLE        = 0x1       # Noncontinuable exception
+EXCEPTION_MAXIMUM_PARAMETERS    = 15        # maximum number of exception parameters
+MAXIMUM_WAIT_OBJECTS            = 64        # Maximum number of wait objects
+MAXIMUM_SUSPEND_COUNT           = 0x7f      # Maximum times thread can be suspended
+
+FORMAT_MESSAGE_ALLOCATE_BUFFER  = 0x00000100
+FORMAT_MESSAGE_FROM_SYSTEM      = 0x00001000
+
+GR_GDIOBJECTS  = 0
+GR_USEROBJECTS = 1
+
+PROCESS_NAME_NATIVE = 1
+
+MAXINTATOM = 0xC000
+
+STD_INPUT_HANDLE  = 0xFFFFFFF6      # (DWORD)-10
+STD_OUTPUT_HANDLE = 0xFFFFFFF5      # (DWORD)-11
+STD_ERROR_HANDLE  = 0xFFFFFFF4      # (DWORD)-12
+
+ATTACH_PARENT_PROCESS = 0xFFFFFFFF  # (DWORD)-1
+
+# LoadLibraryEx constants
+DONT_RESOLVE_DLL_REFERENCES         = 0x00000001
+LOAD_LIBRARY_AS_DATAFILE            = 0x00000002
+LOAD_WITH_ALTERED_SEARCH_PATH       = 0x00000008
+LOAD_IGNORE_CODE_AUTHZ_LEVEL        = 0x00000010
+LOAD_LIBRARY_AS_IMAGE_RESOURCE      = 0x00000020
+LOAD_LIBRARY_AS_DATAFILE_EXCLUSIVE  = 0x00000040
+
+# SetSearchPathMode flags
+# TODO I couldn't find these constants :(
+##BASE_SEARCH_PATH_ENABLE_SAFE_SEARCHMODE     = ???
+##BASE_SEARCH_PATH_DISABLE_SAFE_SEARCHMODE    = ???
+##BASE_SEARCH_PATH_PERMANENT                  = ???
+
+# Console control events
+CTRL_C_EVENT        = 0
+CTRL_BREAK_EVENT    = 1
+CTRL_CLOSE_EVENT    = 2
+CTRL_LOGOFF_EVENT   = 5
+CTRL_SHUTDOWN_EVENT = 6
+
+# Heap flags
+HEAP_NO_SERIALIZE           = 0x00000001
+HEAP_GENERATE_EXCEPTIONS    = 0x00000004
+HEAP_ZERO_MEMORY            = 0x00000008
+HEAP_CREATE_ENABLE_EXECUTE  = 0x00040000
+
+# Standard access rights
+DELETE                      = (0x00010000)
+READ_CONTROL                = (0x00020000)
+WRITE_DAC                   = (0x00040000)
+WRITE_OWNER                 = (0x00080000)
+SYNCHRONIZE                 = (0x00100000)
+STANDARD_RIGHTS_REQUIRED    = (0x000F0000)
+STANDARD_RIGHTS_READ        = (READ_CONTROL)
+STANDARD_RIGHTS_WRITE       = (READ_CONTROL)
+STANDARD_RIGHTS_EXECUTE     = (READ_CONTROL)
+STANDARD_RIGHTS_ALL         = (0x001F0000)
+SPECIFIC_RIGHTS_ALL         = (0x0000FFFF)
+
+# Mutex access rights
+MUTEX_ALL_ACCESS   = 0x1F0001
+MUTEX_MODIFY_STATE = 1
+
+# Event access rights
+EVENT_ALL_ACCESS   = 0x1F0003
+EVENT_MODIFY_STATE = 2
+
+# Semaphore access rights
+SEMAPHORE_ALL_ACCESS   = 0x1F0003
+SEMAPHORE_MODIFY_STATE = 2
+
+# Timer access rights
+TIMER_ALL_ACCESS   = 0x1F0003
+TIMER_MODIFY_STATE = 2
+TIMER_QUERY_STATE  = 1
+
+# Process access rights for OpenProcess
+PROCESS_TERMINATE                 = 0x0001
+PROCESS_CREATE_THREAD             = 0x0002
+PROCESS_SET_SESSIONID             = 0x0004
+PROCESS_VM_OPERATION              = 0x0008
+PROCESS_VM_READ                   = 0x0010
+PROCESS_VM_WRITE                  = 0x0020
+PROCESS_DUP_HANDLE                = 0x0040
+PROCESS_CREATE_PROCESS            = 0x0080
+PROCESS_SET_QUOTA                 = 0x0100
+PROCESS_SET_INFORMATION           = 0x0200
+PROCESS_QUERY_INFORMATION         = 0x0400
+PROCESS_SUSPEND_RESUME            = 0x0800
+PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
+
+# Thread access rights for OpenThread
+THREAD_TERMINATE                 = 0x0001
+THREAD_SUSPEND_RESUME            = 0x0002
+THREAD_ALERT                     = 0x0004
+THREAD_GET_CONTEXT               = 0x0008
+THREAD_SET_CONTEXT               = 0x0010
+THREAD_SET_INFORMATION           = 0x0020
+THREAD_QUERY_INFORMATION         = 0x0040
+THREAD_SET_THREAD_TOKEN          = 0x0080
+THREAD_IMPERSONATE               = 0x0100
+THREAD_DIRECT_IMPERSONATION      = 0x0200
+THREAD_SET_LIMITED_INFORMATION   = 0x0400
+THREAD_QUERY_LIMITED_INFORMATION = 0x0800
+
+# The values of PROCESS_ALL_ACCESS and THREAD_ALL_ACCESS were changed in Vista/2008
+PROCESS_ALL_ACCESS_NT = (STANDARD_RIGHTS_REQUIRED | SYNCHRONIZE | 0xFFF)
+PROCESS_ALL_ACCESS_VISTA = (STANDARD_RIGHTS_REQUIRED | SYNCHRONIZE | 0xFFFF)
+THREAD_ALL_ACCESS_NT = (STANDARD_RIGHTS_REQUIRED | SYNCHRONIZE | 0x3FF)
+THREAD_ALL_ACCESS_VISTA = (STANDARD_RIGHTS_REQUIRED | SYNCHRONIZE | 0xFFFF)
+if NTDDI_VERSION < NTDDI_VISTA:
+    PROCESS_ALL_ACCESS = PROCESS_ALL_ACCESS_NT
+    THREAD_ALL_ACCESS = THREAD_ALL_ACCESS_NT
+else:
+    PROCESS_ALL_ACCESS = PROCESS_ALL_ACCESS_VISTA
+    THREAD_ALL_ACCESS = THREAD_ALL_ACCESS_VISTA
+
+# Process priority classes
+
+IDLE_PRIORITY_CLASS         = 0x00000040
+BELOW_NORMAL_PRIORITY_CLASS = 0x00004000
+NORMAL_PRIORITY_CLASS       = 0x00000020
+ABOVE_NORMAL_PRIORITY_CLASS = 0x00008000
+HIGH_PRIORITY_CLASS         = 0x00000080
+REALTIME_PRIORITY_CLASS     = 0x00000100
+
+PROCESS_MODE_BACKGROUND_BEGIN   = 0x00100000
+PROCESS_MODE_BACKGROUND_END     = 0x00200000
+
+# dwCreationFlag values
+
+DEBUG_PROCESS                     = 0x00000001
+DEBUG_ONLY_THIS_PROCESS           = 0x00000002
+CREATE_SUSPENDED                  = 0x00000004    # Threads and processes
+DETACHED_PROCESS                  = 0x00000008
+CREATE_NEW_CONSOLE                = 0x00000010
+NORMAL_PRIORITY_CLASS             = 0x00000020
+IDLE_PRIORITY_CLASS               = 0x00000040
+HIGH_PRIORITY_CLASS               = 0x00000080
+REALTIME_PRIORITY_CLASS           = 0x00000100
+CREATE_NEW_PROCESS_GROUP          = 0x00000200
+CREATE_UNICODE_ENVIRONMENT        = 0x00000400
+CREATE_SEPARATE_WOW_VDM           = 0x00000800
+CREATE_SHARED_WOW_VDM             = 0x00001000
+CREATE_FORCEDOS                   = 0x00002000
+BELOW_NORMAL_PRIORITY_CLASS       = 0x00004000
+ABOVE_NORMAL_PRIORITY_CLASS       = 0x00008000
+INHERIT_PARENT_AFFINITY           = 0x00010000
+STACK_SIZE_PARAM_IS_A_RESERVATION = 0x00010000    # Threads only
+INHERIT_CALLER_PRIORITY           = 0x00020000    # Deprecated
+CREATE_PROTECTED_PROCESS          = 0x00040000
+EXTENDED_STARTUPINFO_PRESENT      = 0x00080000
+PROCESS_MODE_BACKGROUND_BEGIN     = 0x00100000
+PROCESS_MODE_BACKGROUND_END       = 0x00200000
+CREATE_BREAKAWAY_FROM_JOB         = 0x01000000
+CREATE_PRESERVE_CODE_AUTHZ_LEVEL  = 0x02000000
+CREATE_DEFAULT_ERROR_MODE         = 0x04000000
+CREATE_NO_WINDOW                  = 0x08000000
+PROFILE_USER                      = 0x10000000
+PROFILE_KERNEL                    = 0x20000000
+PROFILE_SERVER                    = 0x40000000
+CREATE_IGNORE_SYSTEM_DEFAULT      = 0x80000000
+
+# Thread priority values
+
+THREAD_BASE_PRIORITY_LOWRT  = 15    # value that gets a thread to LowRealtime-1
+THREAD_BASE_PRIORITY_MAX    = 2     # maximum thread base priority boost
+THREAD_BASE_PRIORITY_MIN    = (-2)  # minimum thread base priority boost
+THREAD_BASE_PRIORITY_IDLE   = (-15) # value that gets a thread to idle
+
+THREAD_PRIORITY_LOWEST          = THREAD_BASE_PRIORITY_MIN
+THREAD_PRIORITY_BELOW_NORMAL    = (THREAD_PRIORITY_LOWEST+1)
+THREAD_PRIORITY_NORMAL          = 0
+THREAD_PRIORITY_HIGHEST         = THREAD_BASE_PRIORITY_MAX
+THREAD_PRIORITY_ABOVE_NORMAL    = (THREAD_PRIORITY_HIGHEST-1)
+THREAD_PRIORITY_ERROR_RETURN    = (0xFFFFFFFF)
+
+THREAD_PRIORITY_TIME_CRITICAL   = THREAD_BASE_PRIORITY_LOWRT
+THREAD_PRIORITY_IDLE            = THREAD_BASE_PRIORITY_IDLE
+
+# Memory access
+SECTION_QUERY                = 0x0001
+SECTION_MAP_WRITE            = 0x0002
+SECTION_MAP_READ             = 0x0004
+SECTION_MAP_EXECUTE          = 0x0008
+SECTION_EXTEND_SIZE          = 0x0010
+SECTION_MAP_EXECUTE_EXPLICIT = 0x0020 # not included in SECTION_ALL_ACCESS
+
+SECTION_ALL_ACCESS = (STANDARD_RIGHTS_REQUIRED|SECTION_QUERY|\
+                             SECTION_MAP_WRITE |      \
+                             SECTION_MAP_READ |       \
+                             SECTION_MAP_EXECUTE |    \
+                             SECTION_EXTEND_SIZE)
+PAGE_NOACCESS          = 0x01
+PAGE_READONLY          = 0x02
+PAGE_READWRITE         = 0x04
+PAGE_WRITECOPY         = 0x08
+PAGE_EXECUTE           = 0x10
+PAGE_EXECUTE_READ      = 0x20
+PAGE_EXECUTE_READWRITE = 0x40
+PAGE_EXECUTE_WRITECOPY = 0x80
+PAGE_GUARD            = 0x100
+PAGE_NOCACHE          = 0x200
+PAGE_WRITECOMBINE     = 0x400
+MEM_COMMIT           = 0x1000
+MEM_RESERVE          = 0x2000
+MEM_DECOMMIT         = 0x4000
+MEM_RELEASE          = 0x8000
+MEM_FREE            = 0x10000
+MEM_PRIVATE         = 0x20000
+MEM_MAPPED          = 0x40000
+MEM_RESET           = 0x80000
+MEM_TOP_DOWN       = 0x100000
+MEM_WRITE_WATCH    = 0x200000
+MEM_PHYSICAL       = 0x400000
+MEM_RESET_UNDO    = 0x1000000
+MEM_LARGE_PAGES  = 0x20000000
+MEM_4MB_PAGES    = 0x80000000
+SEC_FILE           = 0x800000
+SEC_IMAGE         = 0x1000000
+SEC_RESERVE       = 0x4000000
+SEC_COMMIT        = 0x8000000
+SEC_NOCACHE      = 0x10000000
+SEC_LARGE_PAGES  = 0x80000000
+MEM_IMAGE         = SEC_IMAGE
+WRITE_WATCH_FLAG_RESET = 0x01
+FILE_MAP_ALL_ACCESS = 0xF001F
+
+SECTION_QUERY                   = 0x0001
+SECTION_MAP_WRITE               = 0x0002
+SECTION_MAP_READ                = 0x0004
+SECTION_MAP_EXECUTE             = 0x0008
+SECTION_EXTEND_SIZE             = 0x0010
+SECTION_MAP_EXECUTE_EXPLICIT    = 0x0020 # not included in SECTION_ALL_ACCESS
+
+SECTION_ALL_ACCESS = (STANDARD_RIGHTS_REQUIRED|SECTION_QUERY|\
+                 SECTION_MAP_WRITE |      \
+                 SECTION_MAP_READ |       \
+                 SECTION_MAP_EXECUTE |    \
+                 SECTION_EXTEND_SIZE)
+
+FILE_MAP_COPY       = SECTION_QUERY
+FILE_MAP_WRITE      = SECTION_MAP_WRITE
+FILE_MAP_READ       = SECTION_MAP_READ
+FILE_MAP_ALL_ACCESS = SECTION_ALL_ACCESS
+FILE_MAP_EXECUTE    = SECTION_MAP_EXECUTE_EXPLICIT  # not included in FILE_MAP_ALL_ACCESS
+
+GENERIC_READ                     = 0x80000000
+GENERIC_WRITE                    = 0x40000000
+GENERIC_EXECUTE                  = 0x20000000
+GENERIC_ALL                      = 0x10000000
+
+FILE_SHARE_READ                  = 0x00000001
+FILE_SHARE_WRITE                 = 0x00000002
+FILE_SHARE_DELETE                = 0x00000004
+
+CREATE_NEW                       = 1
+CREATE_ALWAYS                    = 2
+OPEN_EXISTING                    = 3
+OPEN_ALWAYS                      = 4
+TRUNCATE_EXISTING                = 5
+
+FILE_ATTRIBUTE_READONLY          = 0x00000001
+FILE_ATTRIBUTE_NORMAL            = 0x00000080
+FILE_ATTRIBUTE_TEMPORARY         = 0x00000100
+
+FILE_FLAG_WRITE_THROUGH          = 0x80000000
+FILE_FLAG_NO_BUFFERING           = 0x20000000
+FILE_FLAG_RANDOM_ACCESS          = 0x10000000
+FILE_FLAG_SEQUENTIAL_SCAN        = 0x08000000
+FILE_FLAG_DELETE_ON_CLOSE        = 0x04000000
+FILE_FLAG_OVERLAPPED             = 0x40000000
+
+FILE_ATTRIBUTE_READONLY          = 0x00000001
+FILE_ATTRIBUTE_HIDDEN            = 0x00000002
+FILE_ATTRIBUTE_SYSTEM            = 0x00000004
+FILE_ATTRIBUTE_DIRECTORY         = 0x00000010
+FILE_ATTRIBUTE_ARCHIVE           = 0x00000020
+FILE_ATTRIBUTE_DEVICE            = 0x00000040
+FILE_ATTRIBUTE_NORMAL            = 0x00000080
+FILE_ATTRIBUTE_TEMPORARY         = 0x00000100
+
+# Debug events
+EXCEPTION_DEBUG_EVENT       = 1
+CREATE_THREAD_DEBUG_EVENT   = 2
+CREATE_PROCESS_DEBUG_EVENT  = 3
+EXIT_THREAD_DEBUG_EVENT     = 4
+EXIT_PROCESS_DEBUG_EVENT    = 5
+LOAD_DLL_DEBUG_EVENT        = 6
+UNLOAD_DLL_DEBUG_EVENT      = 7
+OUTPUT_DEBUG_STRING_EVENT   = 8
+RIP_EVENT                   = 9
+
+# Debug status codes (ContinueDebugEvent)
+DBG_EXCEPTION_HANDLED           = 0x00010001
+DBG_CONTINUE                    = 0x00010002
+DBG_REPLY_LATER                 = 0x40010001
+DBG_UNABLE_TO_PROVIDE_HANDLE    = 0x40010002
+DBG_TERMINATE_THREAD            = 0x40010003
+DBG_TERMINATE_PROCESS           = 0x40010004
+DBG_CONTROL_C                   = 0x40010005
+DBG_PRINTEXCEPTION_C            = 0x40010006
+DBG_RIPEXCEPTION                = 0x40010007
+DBG_CONTROL_BREAK               = 0x40010008
+DBG_COMMAND_EXCEPTION           = 0x40010009
+DBG_EXCEPTION_NOT_HANDLED       = 0x80010001
+DBG_NO_STATE_CHANGE             = 0xC0010001
+DBG_APP_NOT_IDLE                = 0xC0010002
+
+# Status codes
+STATUS_WAIT_0                   = 0x00000000
+STATUS_ABANDONED_WAIT_0         = 0x00000080
+STATUS_USER_APC                 = 0x000000C0
+STATUS_TIMEOUT                  = 0x00000102
+STATUS_PENDING                  = 0x00000103
+STATUS_SEGMENT_NOTIFICATION     = 0x40000005
+STATUS_GUARD_PAGE_VIOLATION     = 0x80000001
+STATUS_DATATYPE_MISALIGNMENT    = 0x80000002
+STATUS_BREAKPOINT               = 0x80000003
+STATUS_SINGLE_STEP              = 0x80000004
+STATUS_INVALID_INFO_CLASS       = 0xC0000003
+STATUS_ACCESS_VIOLATION         = 0xC0000005
+STATUS_IN_PAGE_ERROR            = 0xC0000006
+STATUS_INVALID_HANDLE           = 0xC0000008
+STATUS_NO_MEMORY                = 0xC0000017
+STATUS_ILLEGAL_INSTRUCTION      = 0xC000001D
+STATUS_NONCONTINUABLE_EXCEPTION = 0xC0000025
+STATUS_INVALID_DISPOSITION      = 0xC0000026
+STATUS_ARRAY_BOUNDS_EXCEEDED    = 0xC000008C
+STATUS_FLOAT_DENORMAL_OPERAND   = 0xC000008D
+STATUS_FLOAT_DIVIDE_BY_ZERO     = 0xC000008E
+STATUS_FLOAT_INEXACT_RESULT     = 0xC000008F
+STATUS_FLOAT_INVALID_OPERATION  = 0xC0000090
+STATUS_FLOAT_OVERFLOW           = 0xC0000091
+STATUS_FLOAT_STACK_CHECK        = 0xC0000092
+STATUS_FLOAT_UNDERFLOW          = 0xC0000093
+STATUS_INTEGER_DIVIDE_BY_ZERO   = 0xC0000094
+STATUS_INTEGER_OVERFLOW         = 0xC0000095
+STATUS_PRIVILEGED_INSTRUCTION   = 0xC0000096
+STATUS_STACK_OVERFLOW           = 0xC00000FD
+STATUS_CONTROL_C_EXIT           = 0xC000013A
+STATUS_FLOAT_MULTIPLE_FAULTS    = 0xC00002B4
+STATUS_FLOAT_MULTIPLE_TRAPS     = 0xC00002B5
+STATUS_REG_NAT_CONSUMPTION      = 0xC00002C9
+STATUS_SXS_EARLY_DEACTIVATION   = 0xC015000F
+STATUS_SXS_INVALID_DEACTIVATION = 0xC0150010
+
+STATUS_STACK_BUFFER_OVERRUN     = 0xC0000409
+STATUS_WX86_BREAKPOINT          = 0x4000001F
+STATUS_HEAP_CORRUPTION          = 0xC0000374
+
+STATUS_POSSIBLE_DEADLOCK        = 0xC0000194
+
+STATUS_UNWIND_CONSOLIDATE       = 0x80000029
+
+# Exception codes
+
+EXCEPTION_ACCESS_VIOLATION          = STATUS_ACCESS_VIOLATION
+EXCEPTION_ARRAY_BOUNDS_EXCEEDED     = STATUS_ARRAY_BOUNDS_EXCEEDED
+EXCEPTION_BREAKPOINT                = STATUS_BREAKPOINT
+EXCEPTION_DATATYPE_MISALIGNMENT     = STATUS_DATATYPE_MISALIGNMENT
+EXCEPTION_FLT_DENORMAL_OPERAND      = STATUS_FLOAT_DENORMAL_OPERAND
+EXCEPTION_FLT_DIVIDE_BY_ZERO        = STATUS_FLOAT_DIVIDE_BY_ZERO
+EXCEPTION_FLT_INEXACT_RESULT        = STATUS_FLOAT_INEXACT_RESULT
+EXCEPTION_FLT_INVALID_OPERATION     = STATUS_FLOAT_INVALID_OPERATION
+EXCEPTION_FLT_OVERFLOW              = STATUS_FLOAT_OVERFLOW
+EXCEPTION_FLT_STACK_CHECK           = STATUS_FLOAT_STACK_CHECK
+EXCEPTION_FLT_UNDERFLOW             = STATUS_FLOAT_UNDERFLOW
+EXCEPTION_ILLEGAL_INSTRUCTION       = STATUS_ILLEGAL_INSTRUCTION
+EXCEPTION_IN_PAGE_ERROR             = STATUS_IN_PAGE_ERROR
+EXCEPTION_INT_DIVIDE_BY_ZERO        = STATUS_INTEGER_DIVIDE_BY_ZERO
+EXCEPTION_INT_OVERFLOW              = STATUS_INTEGER_OVERFLOW
+EXCEPTION_INVALID_DISPOSITION       = STATUS_INVALID_DISPOSITION
+EXCEPTION_NONCONTINUABLE_EXCEPTION  = STATUS_NONCONTINUABLE_EXCEPTION
+EXCEPTION_PRIV_INSTRUCTION          = STATUS_PRIVILEGED_INSTRUCTION
+EXCEPTION_SINGLE_STEP               = STATUS_SINGLE_STEP
+EXCEPTION_STACK_OVERFLOW            = STATUS_STACK_OVERFLOW
+
+EXCEPTION_GUARD_PAGE                = STATUS_GUARD_PAGE_VIOLATION
+EXCEPTION_INVALID_HANDLE            = STATUS_INVALID_HANDLE
+EXCEPTION_POSSIBLE_DEADLOCK         = STATUS_POSSIBLE_DEADLOCK
+EXCEPTION_WX86_BREAKPOINT           = STATUS_WX86_BREAKPOINT
+
+CONTROL_C_EXIT                      = STATUS_CONTROL_C_EXIT
+
+DBG_CONTROL_C                       = 0x40010005
+MS_VC_EXCEPTION                     = 0x406D1388
+
+# Access violation types
+ACCESS_VIOLATION_TYPE_READ      = EXCEPTION_READ_FAULT
+ACCESS_VIOLATION_TYPE_WRITE     = EXCEPTION_WRITE_FAULT
+ACCESS_VIOLATION_TYPE_DEP       = EXCEPTION_EXECUTE_FAULT
+
+# RIP event types
+SLE_ERROR      = 1
+SLE_MINORERROR = 2
+SLE_WARNING    = 3
+
+# DuplicateHandle constants
+DUPLICATE_CLOSE_SOURCE      = 0x00000001
+DUPLICATE_SAME_ACCESS       = 0x00000002
+
+# GetFinalPathNameByHandle constants
+FILE_NAME_NORMALIZED        = 0x0
+FILE_NAME_OPENED            = 0x8
+VOLUME_NAME_DOS             = 0x0
+VOLUME_NAME_GUID            = 0x1
+VOLUME_NAME_NONE            = 0x4
+VOLUME_NAME_NT              = 0x2
+
+# GetProductInfo constants
+PRODUCT_BUSINESS = 0x00000006
+PRODUCT_BUSINESS_N = 0x00000010
+PRODUCT_CLUSTER_SERVER = 0x00000012
+PRODUCT_DATACENTER_SERVER = 0x00000008
+PRODUCT_DATACENTER_SERVER_CORE = 0x0000000C
+PRODUCT_DATACENTER_SERVER_CORE_V = 0x00000027
+PRODUCT_DATACENTER_SERVER_V = 0x00000025
+PRODUCT_ENTERPRISE = 0x00000004
+PRODUCT_ENTERPRISE_E = 0x00000046
+PRODUCT_ENTERPRISE_N = 0x0000001B
+PRODUCT_ENTERPRISE_SERVER = 0x0000000A
+PRODUCT_ENTERPRISE_SERVER_CORE = 0x0000000E
+PRODUCT_ENTERPRISE_SERVER_CORE_V = 0x00000029
+PRODUCT_ENTERPRISE_SERVER_IA64 = 0x0000000F
+PRODUCT_ENTERPRISE_SERVER_V = 0x00000026
+PRODUCT_HOME_BASIC = 0x00000002
+PRODUCT_HOME_BASIC_E = 0x00000043
+PRODUCT_HOME_BASIC_N = 0x00000005
+PRODUCT_HOME_PREMIUM = 0x00000003
+PRODUCT_HOME_PREMIUM_E = 0x00000044
+PRODUCT_HOME_PREMIUM_N = 0x0000001A
+PRODUCT_HYPERV = 0x0000002A
+PRODUCT_MEDIUMBUSINESS_SERVER_MANAGEMENT = 0x0000001E
+PRODUCT_MEDIUMBUSINESS_SERVER_MESSAGING = 0x00000020
+PRODUCT_MEDIUMBUSINESS_SERVER_SECURITY = 0x0000001F
+PRODUCT_PROFESSIONAL = 0x00000030
+PRODUCT_PROFESSIONAL_E = 0x00000045
+PRODUCT_PROFESSIONAL_N = 0x00000031
+PRODUCT_SERVER_FOR_SMALLBUSINESS = 0x00000018
+PRODUCT_SERVER_FOR_SMALLBUSINESS_V = 0x00000023
+PRODUCT_SERVER_FOUNDATION = 0x00000021
+PRODUCT_SMALLBUSINESS_SERVER = 0x00000009
+PRODUCT_STANDARD_SERVER = 0x00000007
+PRODUCT_STANDARD_SERVER_CORE = 0x0000000D
+PRODUCT_STANDARD_SERVER_CORE_V = 0x00000028
+PRODUCT_STANDARD_SERVER_V = 0x00000024
+PRODUCT_STARTER = 0x0000000B
+PRODUCT_STARTER_E = 0x00000042
+PRODUCT_STARTER_N = 0x0000002F
+PRODUCT_STORAGE_ENTERPRISE_SERVER = 0x00000017
+PRODUCT_STORAGE_EXPRESS_SERVER = 0x00000014
+PRODUCT_STORAGE_STANDARD_SERVER = 0x00000015
+PRODUCT_STORAGE_WORKGROUP_SERVER = 0x00000016
+PRODUCT_UNDEFINED = 0x00000000
+PRODUCT_UNLICENSED = 0xABCDABCD
+PRODUCT_ULTIMATE = 0x00000001
+PRODUCT_ULTIMATE_E = 0x00000047
+PRODUCT_ULTIMATE_N = 0x0000001C
+PRODUCT_WEB_SERVER = 0x00000011
+PRODUCT_WEB_SERVER_CORE = 0x0000001D
+
+# DEP policy flags
+PROCESS_DEP_ENABLE = 1
+PROCESS_DEP_DISABLE_ATL_THUNK_EMULATION = 2
+
+# Error modes
+SEM_FAILCRITICALERRORS      = 0x001
+SEM_NOGPFAULTERRORBOX       = 0x002
+SEM_NOALIGNMENTFAULTEXCEPT  = 0x004
+SEM_NOOPENFILEERRORBOX      = 0x800
+
+# GetHandleInformation / SetHandleInformation
+HANDLE_FLAG_INHERIT             = 0x00000001
+HANDLE_FLAG_PROTECT_FROM_CLOSE  = 0x00000002
+
+#--- Handle wrappers ----------------------------------------------------------
+
+class Handle (object):
+    """
+    Encapsulates Win32 handles to avoid leaking them.
+
+    @type inherit: bool
+    @ivar inherit: C{True} if the handle is to be inherited by child processes,
+        C{False} otherwise.
+
+    @type protectFromClose: bool
+    @ivar protectFromClose: Set to C{True} to prevent the handle from being
+        closed. Must be set to C{False} before you're done using the handle,
+        or it will be left open until the debugger exits. Use with care!
+
+    @see:
+        L{ProcessHandle}, L{ThreadHandle}, L{FileHandle}, L{SnapshotHandle}
+    """
+
+    # XXX DEBUG
+    # When this private flag is True each Handle will print a message to
+    # standard output when it's created and destroyed. This is useful for
+    # detecting handle leaks within WinAppDbg itself.
+    __bLeakDetection = False
+
+    def __init__(self, aHandle = None, bOwnership = True):
+        """
+        @type  aHandle: int
+        @param aHandle: Win32 handle value.
+
+        @type  bOwnership: bool
+        @param bOwnership:
+           C{True} if we own the handle and we need to close it.
+           C{False} if someone else will be calling L{CloseHandle}.
+        """
+        super(Handle, self).__init__()
+        self._value     = self._normalize(aHandle)
+        self.bOwnership = bOwnership
+        if Handle.__bLeakDetection:     # XXX DEBUG
+            print ("INIT HANDLE (%r) %r" % (self.value, self))
+
+    @property
+    def value(self):
+        return self._value
+
+    def __del__(self):
+        """
+        Closes the Win32 handle when the Python object is destroyed.
+        """
+        try:
+            if Handle.__bLeakDetection:     # XXX DEBUG
+                print ("DEL HANDLE %r" % self)
+            self.close()
+        except Exception:
+            pass
+
+    def __enter__(self):
+        """
+        Compatibility with the "C{with}" Python statement.
+        """
+        if Handle.__bLeakDetection:     # XXX DEBUG
+            print ("ENTER HANDLE %r" % self)
+        return self
+
+    def __exit__(self, type, value, traceback):
+        """
+        Compatibility with the "C{with}" Python statement.
+        """
+        if Handle.__bLeakDetection:     # XXX DEBUG
+            print ("EXIT HANDLE %r" % self)
+        try:
+            self.close()
+        except Exception:
+            pass
+
+    def __copy__(self):
+        """
+        Duplicates the Win32 handle when copying the Python object.
+
+        @rtype:  L{Handle}
+        @return: A new handle to the same Win32 object.
+        """
+        return self.dup()
+
+    def __deepcopy__(self):
+        """
+        Duplicates the Win32 handle when copying the Python object.
+
+        @rtype:  L{Handle}
+        @return: A new handle to the same win32 object.
+        """
+        return self.dup()
+
+    @property
+    def _as_parameter_(self):
+        """
+        Compatibility with ctypes.
+        Allows passing transparently a Handle object to an API call.
+        """
+        return HANDLE(self.value)
+
+    @staticmethod
+    def from_param(value):
+        """
+        Compatibility with ctypes.
+        Allows passing transparently a Handle object to an API call.
+
+        @type  value: int
+        @param value: Numeric handle value.
+        """
+        return HANDLE(value)
+
+    def close(self):
+        """
+        Closes the Win32 handle.
+        """
+        if self.bOwnership and self.value not in (None, INVALID_HANDLE_VALUE):
+            if Handle.__bLeakDetection:     # XXX DEBUG
+                print ("CLOSE HANDLE (%d) %r" % (self.value, self))
+            try:
+                self._close()
+            finally:
+                self._value = None
+
+    def _close(self):
+        """
+        Low-level close method.
+        This is a private method, do not call it.
+        """
+        CloseHandle(self.value)
+
+    def dup(self):
+        """
+        @rtype:  L{Handle}
+        @return: A new handle to the same Win32 object.
+        """
+        if self.value is None:
+            raise ValueError("Closed handles can't be duplicated!")
+        new_handle = DuplicateHandle(self.value)
+        if Handle.__bLeakDetection:     # XXX DEBUG
+            print ("DUP HANDLE (%d -> %d) %r %r" % \
+                            (self.value, new_handle.value, self, new_handle))
+        return new_handle
+
+    @staticmethod
+    def _normalize(value):
+        """
+        Normalize handle values.
+        """
+        if hasattr(value, 'value'):
+            value = value.value
+        if value is not None:
+            value = int(value)
+        return value
+
+    def wait(self, dwMilliseconds = None):
+        """
+        Wait for the Win32 object to be signaled.
+
+        @type  dwMilliseconds: int
+        @param dwMilliseconds: (Optional) Timeout value in milliseconds.
+            Use C{INFINITE} or C{None} for no timeout.
+        """
+        if self.value is None:
+            raise ValueError("Handle is already closed!")
+        if dwMilliseconds is None:
+            dwMilliseconds = INFINITE
+        r = WaitForSingleObject(self.value, dwMilliseconds)
+        if r != WAIT_OBJECT_0:
+            raise ctypes.WinError(r)
+
+    def __repr__(self):
+        return '<%s: %s>' % (self.__class__.__name__, self.value)
+
+    def __get_inherit(self):
+        if self.value is None:
+            raise ValueError("Handle is already closed!")
+        return bool( GetHandleInformation(self.value) & HANDLE_FLAG_INHERIT )
+
+    def __set_inherit(self, value):
+        if self.value is None:
+            raise ValueError("Handle is already closed!")
+        flag = (0, HANDLE_FLAG_INHERIT)[ bool(value) ]
+        SetHandleInformation(self.value, flag, flag)
+
+    inherit = property(__get_inherit, __set_inherit)
+
+    def __get_protectFromClose(self):
+        if self.value is None:
+            raise ValueError("Handle is already closed!")
+        return bool( GetHandleInformation(self.value) & HANDLE_FLAG_PROTECT_FROM_CLOSE )
+
+    def __set_protectFromClose(self, value):
+        if self.value is None:
+            raise ValueError("Handle is already closed!")
+        flag = (0, HANDLE_FLAG_PROTECT_FROM_CLOSE)[ bool(value) ]
+        SetHandleInformation(self.value, flag, flag)
+
+    protectFromClose = property(__get_protectFromClose, __set_protectFromClose)
+
+class UserModeHandle (Handle):
+    """
+    Base class for non-kernel handles. Generally this means they are closed
+    by special Win32 API functions instead of CloseHandle() and some standard
+    operations (synchronizing, duplicating, inheritance) are not supported.
+
+    @type _TYPE: C type
+    @cvar _TYPE: C type to translate this handle to.
+        Subclasses should override this.
+        Defaults to L{HANDLE}.
+    """
+
+    # Subclasses should override this.
+    _TYPE = HANDLE
+
+    # This method must be implemented by subclasses.
+    def _close(self):
+        raise NotImplementedError()
+
+    # Translation to C type.
+    @property
+    def _as_parameter_(self):
+        return self._TYPE(self.value)
+
+    # Translation to C type.
+    @staticmethod
+    def from_param(value):
+        return self._TYPE(self.value)
+
+    # Operation not supported.
+    @property
+    def inherit(self):
+        return False
+
+    # Operation not supported.
+    @property
+    def protectFromClose(self):
+        return False
+
+    # Operation not supported.
+    def dup(self):
+        raise NotImplementedError()
+
+    # Operation not supported.
+    def wait(self, dwMilliseconds = None):
+        raise NotImplementedError()
+
+class ProcessHandle (Handle):
+    """
+    Win32 process handle.
+
+    @type dwAccess: int
+    @ivar dwAccess: Current access flags to this handle.
+            This is the same value passed to L{OpenProcess}.
+            Can only be C{None} if C{aHandle} is also C{None}.
+            Defaults to L{PROCESS_ALL_ACCESS}.
+
+    @see: L{Handle}
+    """
+
+    def __init__(self, aHandle = None, bOwnership = True,
+                       dwAccess = PROCESS_ALL_ACCESS):
+        """
+        @type  aHandle: int
+        @param aHandle: Win32 handle value.
+
+        @type  bOwnership: bool
+        @param bOwnership:
+           C{True} if we own the handle and we need to close it.
+           C{False} if someone else will be calling L{CloseHandle}.
+
+        @type  dwAccess: int
+        @param dwAccess: Current access flags to this handle.
+            This is the same value passed to L{OpenProcess}.
+            Can only be C{None} if C{aHandle} is also C{None}.
+            Defaults to L{PROCESS_ALL_ACCESS}.
+        """
+        super(ProcessHandle, self).__init__(aHandle, bOwnership)
+        self.dwAccess = dwAccess
+        if aHandle is not None and dwAccess is None:
+            msg = "Missing access flags for process handle: %x" % aHandle
+            raise TypeError(msg)
+
+    def get_pid(self):
+        """
+        @rtype:  int
+        @return: Process global ID.
+        """
+        return GetProcessId(self.value)
+
+class ThreadHandle (Handle):
+    """
+    Win32 thread handle.
+
+    @type dwAccess: int
+    @ivar dwAccess: Current access flags to this handle.
+            This is the same value passed to L{OpenThread}.
+            Can only be C{None} if C{aHandle} is also C{None}.
+            Defaults to L{THREAD_ALL_ACCESS}.
+
+    @see: L{Handle}
+    """
+
+    def __init__(self, aHandle = None, bOwnership = True,
+                       dwAccess = THREAD_ALL_ACCESS):
+        """
+        @type  aHandle: int
+        @param aHandle: Win32 handle value.
+
+        @type  bOwnership: bool
+        @param bOwnership:
+           C{True} if we own the handle and we need to close it.
+           C{False} if someone else will be calling L{CloseHandle}.
+
+        @type  dwAccess: int
+        @param dwAccess: Current access flags to this handle.
+            This is the same value passed to L{OpenThread}.
+            Can only be C{None} if C{aHandle} is also C{None}.
+            Defaults to L{THREAD_ALL_ACCESS}.
+        """
+        super(ThreadHandle, self).__init__(aHandle, bOwnership)
+        self.dwAccess = dwAccess
+        if aHandle is not None and dwAccess is None:
+            msg = "Missing access flags for thread handle: %x" % aHandle
+            raise TypeError(msg)
+
+    def get_tid(self):
+        """
+        @rtype:  int
+        @return: Thread global ID.
+        """
+        return GetThreadId(self.value)
+
+class FileHandle (Handle):
+    """
+    Win32 file handle.
+
+    @see: L{Handle}
+    """
+
+    def get_filename(self):
+        """
+        @rtype:  None or str
+        @return: Name of the open file, or C{None} if unavailable.
+        """
+        #
+        # XXX BUG
+        #
+        # This code truncates the first two bytes of the path.
+        # It seems to be the expected behavior of NtQueryInformationFile.
+        #
+        # My guess is it only returns the NT pathname, without the device name.
+        # It's like dropping the drive letter in a Win32 pathname.
+        #
+        # Note that using the "official" GetFileInformationByHandleEx
+        # API introduced in Vista doesn't change the results!
+        #
+        dwBufferSize      = 0x1004
+        lpFileInformation = ctypes.create_string_buffer(dwBufferSize)
+        try:
+            GetFileInformationByHandleEx(self.value,
+                                        FILE_INFO_BY_HANDLE_CLASS.FileNameInfo,
+                                        lpFileInformation, dwBufferSize)
+        except AttributeError:
+            from .ntdll import NtQueryInformationFile, \
+                              FileNameInformation
+            NtQueryInformationFile(self.value,
+                                   FileNameInformation,
+                                   lpFileInformation,
+                                   dwBufferSize)
+        FileName = text(lpFileInformation.raw[sizeof(DWORD):], 'U16')
+        FileName = ctypes.create_unicode_buffer(FileName).value
+        if not FileName:
+            FileName = None
+        elif FileName[1:2] != ':':
+            # When the drive letter is missing, we'll assume SYSTEMROOT.
+            # Not a good solution but it could be worse.
+            import os
+            FileName = os.environ['SYSTEMROOT'][:2] + FileName
+        return FileName
+
+class FileMappingHandle (Handle):
+    """
+    File mapping handle.
+
+    @see: L{Handle}
+    """
+    pass
+
+# XXX maybe add functions related to the toolhelp snapshots here?
+class SnapshotHandle (Handle):
+    """
+    Toolhelp32 snapshot handle.
+
+    @see: L{Handle}
+    """
+    pass
+
+#--- Structure wrappers -------------------------------------------------------
+
+class ProcessInformation (object):
+    """
+    Process information object returned by L{CreateProcess}.
+    """
+
+    def __init__(self, pi):
+        self.hProcess    = ProcessHandle(pi.hProcess)
+        self.hThread     = ThreadHandle(pi.hThread)
+        self.dwProcessId = pi.dwProcessId
+        self.dwThreadId  = pi.dwThreadId
+
+# Don't psyco-optimize this class because it needs to be serialized.
+class MemoryBasicInformation (object):
+    """
+    Memory information object returned by L{VirtualQueryEx}.
+    """
+
+    READABLE = (
+                PAGE_EXECUTE_READ       |
+                PAGE_EXECUTE_READWRITE  |
+                PAGE_EXECUTE_WRITECOPY  |
+                PAGE_READONLY           |
+                PAGE_READWRITE          |
+                PAGE_WRITECOPY
+    )
+
+    WRITEABLE = (
+                PAGE_EXECUTE_READWRITE  |
+                PAGE_EXECUTE_WRITECOPY  |
+                PAGE_READWRITE          |
+                PAGE_WRITECOPY
+    )
+
+    COPY_ON_WRITE = (
+                PAGE_EXECUTE_WRITECOPY  |
+                PAGE_WRITECOPY
+    )
+
+    EXECUTABLE = (
+                PAGE_EXECUTE            |
+                PAGE_EXECUTE_READ       |
+                PAGE_EXECUTE_READWRITE  |
+                PAGE_EXECUTE_WRITECOPY
+    )
+
+    EXECUTABLE_AND_WRITEABLE = (
+                PAGE_EXECUTE_READWRITE  |
+                PAGE_EXECUTE_WRITECOPY
+    )
+
+    def __init__(self, mbi=None):
+        """
+        @type  mbi: L{MEMORY_BASIC_INFORMATION} or L{MemoryBasicInformation}
+        @param mbi: Either a L{MEMORY_BASIC_INFORMATION} structure or another
+            L{MemoryBasicInformation} instance.
+        """
+        if mbi is None:
+            self.BaseAddress        = None
+            self.AllocationBase     = None
+            self.AllocationProtect  = None
+            self.RegionSize         = None
+            self.State              = None
+            self.Protect            = None
+            self.Type               = None
+        else:
+            self.BaseAddress        = mbi.BaseAddress
+            self.AllocationBase     = mbi.AllocationBase
+            self.AllocationProtect  = mbi.AllocationProtect
+            self.RegionSize         = mbi.RegionSize
+            self.State              = mbi.State
+            self.Protect            = mbi.Protect
+            self.Type               = mbi.Type
+
+            # Only used when copying MemoryBasicInformation objects, instead of
+            # instancing them from a MEMORY_BASIC_INFORMATION structure.
+            if hasattr(mbi, 'content'):
+                self.content = mbi.content
+            if hasattr(mbi, 'filename'):
+                self.content = mbi.filename
+
+    def __contains__(self, address):
+        """
+        Test if the given memory address falls within this memory region.
+
+        @type  address: int
+        @param address: Memory address to test.
+
+        @rtype:  bool
+        @return: C{True} if the given memory address falls within this memory
+            region, C{False} otherwise.
+        """
+        return self.BaseAddress <= address < (self.BaseAddress + self.RegionSize)
+
+    def is_free(self):
+        """
+        @rtype:  bool
+        @return: C{True} if the memory in this region is free.
+        """
+        return self.State == MEM_FREE
+
+    def is_reserved(self):
+        """
+        @rtype:  bool
+        @return: C{True} if the memory in this region is reserved.
+        """
+        return self.State == MEM_RESERVE
+
+    def is_commited(self):
+        """
+        @rtype:  bool
+        @return: C{True} if the memory in this region is commited.
+        """
+        return self.State == MEM_COMMIT
+
+    def is_image(self):
+        """
+        @rtype:  bool
+        @return: C{True} if the memory in this region belongs to an executable
+            image.
+        """
+        return self.Type == MEM_IMAGE
+
+    def is_mapped(self):
+        """
+        @rtype:  bool
+        @return: C{True} if the memory in this region belongs to a mapped file.
+        """
+        return self.Type == MEM_MAPPED
+
+    def is_private(self):
+        """
+        @rtype:  bool
+        @return: C{True} if the memory in this region is private.
+        """
+        return self.Type == MEM_PRIVATE
+
+    def is_guard(self):
+        """
+        @rtype:  bool
+        @return: C{True} if all pages in this region are guard pages.
+        """
+        return self.is_commited() and bool(self.Protect & PAGE_GUARD)
+
+    def has_content(self):
+        """
+        @rtype:  bool
+        @return: C{True} if the memory in this region has any data in it.
+        """
+        return self.is_commited() and not bool(self.Protect & (PAGE_GUARD | PAGE_NOACCESS))
+
+    def is_readable(self):
+        """
+        @rtype:  bool
+        @return: C{True} if all pages in this region are readable.
+        """
+        return self.has_content() and bool(self.Protect & self.READABLE)
+
+    def is_writeable(self):
+        """
+        @rtype:  bool
+        @return: C{True} if all pages in this region are writeable.
+        """
+        return self.has_content() and bool(self.Protect & self.WRITEABLE)
+
+    def is_copy_on_write(self):
+        """
+        @rtype:  bool
+        @return: C{True} if all pages in this region are marked as
+            copy-on-write. This means the pages are writeable, but changes
+            are not propagated to disk.
+        @note:
+            Typically data sections in executable images are marked like this.
+        """
+        return self.has_content() and bool(self.Protect & self.COPY_ON_WRITE)
+
+    def is_executable(self):
+        """
+        @rtype:  bool
+        @return: C{True} if all pages in this region are executable.
+        @note: Executable pages are always readable.
+        """
+        return self.has_content() and bool(self.Protect & self.EXECUTABLE)
+
+    def is_executable_and_writeable(self):
+        """
+        @rtype:  bool
+        @return: C{True} if all pages in this region are executable and
+            writeable.
+        @note: The presence of such pages make memory corruption
+            vulnerabilities much easier to exploit.
+        """
+        return self.has_content() and bool(self.Protect & self.EXECUTABLE_AND_WRITEABLE)
+
+class ProcThreadAttributeList (object):
+    """
+    Extended process and thread attribute support.
+
+    To be used with L{STARTUPINFOEX}.
+    Only available for Windows Vista and above.
+
+    @type AttributeList: list of tuple( int, ctypes-compatible object )
+    @ivar AttributeList: List of (Attribute, Value) pairs.
+
+    @type AttributeListBuffer: L{LPPROC_THREAD_ATTRIBUTE_LIST}
+    @ivar AttributeListBuffer: Memory buffer used to store the attribute list.
+        L{InitializeProcThreadAttributeList},
+        L{UpdateProcThreadAttribute},
+        L{DeleteProcThreadAttributeList} and
+        L{STARTUPINFOEX}.
+    """
+
+    def __init__(self, AttributeList):
+        """
+        @type  AttributeList: list of tuple( int, ctypes-compatible object )
+        @param AttributeList: List of (Attribute, Value) pairs.
+        """
+        self.AttributeList = AttributeList
+        self.AttributeListBuffer = InitializeProcThreadAttributeList(
+                                                            len(AttributeList))
+        try:
+            for Attribute, Value in AttributeList:
+                UpdateProcThreadAttribute(self.AttributeListBuffer,
+                                          Attribute, Value)
+        except:
+            ProcThreadAttributeList.__del__(self)
+            raise
+
+    def __del__(self):
+        try:
+            DeleteProcThreadAttributeList(self.AttributeListBuffer)
+            del self.AttributeListBuffer
+        except Exception:
+            pass
+
+    def __copy__(self):
+        return self.__deepcopy__()
+
+    def __deepcopy__(self):
+        return self.__class__(self.AttributeList)
+
+    @property
+    def value(self):
+        return ctypes.cast(ctypes.pointer(self.AttributeListBuffer), LPVOID)
+
+    @property
+    def _as_parameter_(self):
+        return self.value
+
+    # XXX TODO
+    @staticmethod
+    def from_param(value):
+        raise NotImplementedError()
+
+#--- OVERLAPPED structure -----------------------------------------------------
+
+# typedef struct _OVERLAPPED {
+#   ULONG_PTR Internal;
+#   ULONG_PTR InternalHigh;
+#   union {
+#     struct {
+#       DWORD Offset;
+#       DWORD OffsetHigh;
+#     } ;
+#     PVOID Pointer;
+#   } ;
+#   HANDLE    hEvent;
+# }OVERLAPPED, *LPOVERLAPPED;
+class _OVERLAPPED_STRUCT(Structure):
+    _fields_ = [
+        ('Offset',          DWORD),
+        ('OffsetHigh',      DWORD),
+    ]
+class _OVERLAPPED_UNION(Union):
+    _fields_ = [
+        ('s',               _OVERLAPPED_STRUCT),
+        ('Pointer',         PVOID),
+    ]
+class OVERLAPPED(Structure):
+    _fields_ = [
+        ('Internal',        ULONG_PTR),
+        ('InternalHigh',    ULONG_PTR),
+        ('u',               _OVERLAPPED_UNION),
+        ('hEvent',          HANDLE),
+    ]
+LPOVERLAPPED = POINTER(OVERLAPPED)
+
+#--- SECURITY_ATTRIBUTES structure --------------------------------------------
+
+# typedef struct _SECURITY_ATTRIBUTES {
+#     DWORD nLength;
+#     LPVOID lpSecurityDescriptor;
+#     BOOL bInheritHandle;
+# } SECURITY_ATTRIBUTES, *PSECURITY_ATTRIBUTES, *LPSECURITY_ATTRIBUTES;
+class SECURITY_ATTRIBUTES(Structure):
+    _fields_ = [
+        ('nLength',                 DWORD),
+        ('lpSecurityDescriptor',    LPVOID),
+        ('bInheritHandle',          BOOL),
+    ]
+LPSECURITY_ATTRIBUTES = POINTER(SECURITY_ATTRIBUTES)
+
+# --- Extended process and thread attribute support ---------------------------
+
+PPROC_THREAD_ATTRIBUTE_LIST  = LPVOID
+LPPROC_THREAD_ATTRIBUTE_LIST = PPROC_THREAD_ATTRIBUTE_LIST
+
+PROC_THREAD_ATTRIBUTE_NUMBER   = 0x0000FFFF
+PROC_THREAD_ATTRIBUTE_THREAD   = 0x00010000  # Attribute may be used with thread creation
+PROC_THREAD_ATTRIBUTE_INPUT    = 0x00020000  # Attribute is input only
+PROC_THREAD_ATTRIBUTE_ADDITIVE = 0x00040000  # Attribute may be "accumulated," e.g. bitmasks, counters, etc.
+
+# PROC_THREAD_ATTRIBUTE_NUM
+ProcThreadAttributeParentProcess    = 0
+ProcThreadAttributeExtendedFlags    = 1
+ProcThreadAttributeHandleList       = 2
+ProcThreadAttributeGroupAffinity    = 3
+ProcThreadAttributePreferredNode    = 4
+ProcThreadAttributeIdealProcessor   = 5
+ProcThreadAttributeUmsThread        = 6
+ProcThreadAttributeMitigationPolicy = 7
+ProcThreadAttributeMax              = 8
+
+PROC_THREAD_ATTRIBUTE_PARENT_PROCESS    = ProcThreadAttributeParentProcess      |                                PROC_THREAD_ATTRIBUTE_INPUT
+PROC_THREAD_ATTRIBUTE_EXTENDED_FLAGS    = ProcThreadAttributeExtendedFlags      |                                PROC_THREAD_ATTRIBUTE_INPUT | PROC_THREAD_ATTRIBUTE_ADDITIVE
+PROC_THREAD_ATTRIBUTE_HANDLE_LIST       = ProcThreadAttributeHandleList         |                                PROC_THREAD_ATTRIBUTE_INPUT
+PROC_THREAD_ATTRIBUTE_GROUP_AFFINITY    = ProcThreadAttributeGroupAffinity      | PROC_THREAD_ATTRIBUTE_THREAD | PROC_THREAD_ATTRIBUTE_INPUT
+PROC_THREAD_ATTRIBUTE_PREFERRED_NODE    = ProcThreadAttributePreferredNode      |                                PROC_THREAD_ATTRIBUTE_INPUT
+PROC_THREAD_ATTRIBUTE_IDEAL_PROCESSOR   = ProcThreadAttributeIdealProcessor     | PROC_THREAD_ATTRIBUTE_THREAD | PROC_THREAD_ATTRIBUTE_INPUT
+PROC_THREAD_ATTRIBUTE_UMS_THREAD        = ProcThreadAttributeUmsThread          | PROC_THREAD_ATTRIBUTE_THREAD | PROC_THREAD_ATTRIBUTE_INPUT
+PROC_THREAD_ATTRIBUTE_MITIGATION_POLICY = ProcThreadAttributeMitigationPolicy   |                                PROC_THREAD_ATTRIBUTE_INPUT
+
+PROCESS_CREATION_MITIGATION_POLICY_DEP_ENABLE           = 0x01
+PROCESS_CREATION_MITIGATION_POLICY_DEP_ATL_THUNK_ENABLE = 0x02
+PROCESS_CREATION_MITIGATION_POLICY_SEHOP_ENABLE         = 0x04
+
+#--- VS_FIXEDFILEINFO structure -----------------------------------------------
+
+# struct VS_FIXEDFILEINFO {
+#   DWORD dwSignature;
+#   DWORD dwStrucVersion;
+#   DWORD dwFileVersionMS;
+#   DWORD dwFileVersionLS;
+#   DWORD dwProductVersionMS;
+#   DWORD dwProductVersionLS;
+#   DWORD dwFileFlagsMask;
+#   DWORD dwFileFlags;
+#   DWORD dwFileOS;
+#   DWORD dwFileType;
+#   DWORD dwFileSubtype;
+#   DWORD dwFileDateMS;
+#   DWORD dwFileDateLS;
+# };
+class VS_FIXEDFILEINFO (Structure):
+    _fields_ = [
+        ("dwSignature",             DWORD),     # 0xFEEF04BD
+        ("dwStrucVersion",          DWORD),
+        ("dwFileVersionMS",         DWORD),
+        ("dwFileVersionLS",         DWORD),
+        ("dwProductVersionMS",      DWORD),
+        ("dwProductVersionLS",      DWORD),
+        ("dwFileFlagsMask",         DWORD),
+        ("dwFileFlags",             DWORD),
+        ("dwFileOS",                DWORD),
+        ("dwFileType",              DWORD),
+        ("dwFileSubtype",           DWORD),
+        ("dwFileDateMS",            DWORD),
+        ("dwFileDateLS",            DWORD),
+    ]
+
+#--- THREADNAME_INFO structure ------------------------------------------------
+
+# typedef struct tagTHREADNAME_INFO
+# {
+#    DWORD dwType; // Must be 0x1000.
+#    LPCSTR szName; // Pointer to name (in user addr space).
+#    DWORD dwThreadID; // Thread ID (-1=caller thread).
+#    DWORD dwFlags; // Reserved for future use, must be zero.
+# } THREADNAME_INFO;
+class THREADNAME_INFO(Structure):
+    _fields_ = [
+        ("dwType",      DWORD),     # 0x1000
+        ("szName",      LPVOID),    # remote pointer
+        ("dwThreadID",  DWORD),     # -1 usually
+        ("dwFlags",     DWORD),     # 0
+    ]
+
+#--- MEMORY_BASIC_INFORMATION structure ---------------------------------------
+
+# typedef struct _MEMORY_BASIC_INFORMATION32 {
+#     DWORD BaseAddress;
+#     DWORD AllocationBase;
+#     DWORD AllocationProtect;
+#     DWORD RegionSize;
+#     DWORD State;
+#     DWORD Protect;
+#     DWORD Type;
+# } MEMORY_BASIC_INFORMATION32, *PMEMORY_BASIC_INFORMATION32;
+class MEMORY_BASIC_INFORMATION32(Structure):
+    _fields_ = [
+        ('BaseAddress',         DWORD),         # remote pointer
+        ('AllocationBase',      DWORD),         # remote pointer
+        ('AllocationProtect',   DWORD),
+        ('RegionSize',          DWORD),
+        ('State',               DWORD),
+        ('Protect',             DWORD),
+        ('Type',                DWORD),
+    ]
+
+# typedef struct DECLSPEC_ALIGN(16) _MEMORY_BASIC_INFORMATION64 {
+#     ULONGLONG BaseAddress;
+#     ULONGLONG AllocationBase;
+#     DWORD     AllocationProtect;
+#     DWORD     __alignment1;
+#     ULONGLONG RegionSize;
+#     DWORD     State;
+#     DWORD     Protect;
+#     DWORD     Type;
+#     DWORD     __alignment2;
+# } MEMORY_BASIC_INFORMATION64, *PMEMORY_BASIC_INFORMATION64;
+class MEMORY_BASIC_INFORMATION64(Structure):
+    _fields_ = [
+        ('BaseAddress',         ULONGLONG),     # remote pointer
+        ('AllocationBase',      ULONGLONG),     # remote pointer
+        ('AllocationProtect',   DWORD),
+        ('__alignment1',        DWORD),
+        ('RegionSize',          ULONGLONG),
+        ('State',               DWORD),
+        ('Protect',             DWORD),
+        ('Type',                DWORD),
+        ('__alignment2',        DWORD),
+    ]
+
+# typedef struct _MEMORY_BASIC_INFORMATION {
+#     PVOID BaseAddress;
+#     PVOID AllocationBase;
+#     DWORD AllocationProtect;
+#     SIZE_T RegionSize;
+#     DWORD State;
+#     DWORD Protect;
+#     DWORD Type;
+# } MEMORY_BASIC_INFORMATION, *PMEMORY_BASIC_INFORMATION;
+class MEMORY_BASIC_INFORMATION(Structure):
+    _fields_ = [
+        ('BaseAddress',         SIZE_T),    # remote pointer
+        ('AllocationBase',      SIZE_T),    # remote pointer
+        ('AllocationProtect',   DWORD),
+        ('RegionSize',          SIZE_T),
+        ('State',               DWORD),
+        ('Protect',             DWORD),
+        ('Type',                DWORD),
+    ]
+PMEMORY_BASIC_INFORMATION = POINTER(MEMORY_BASIC_INFORMATION)
+
+#--- BY_HANDLE_FILE_INFORMATION structure -------------------------------------
+
+# typedef struct _FILETIME {
+#    DWORD dwLowDateTime;
+#    DWORD dwHighDateTime;
+# } FILETIME, *PFILETIME;
+class FILETIME(Structure):
+    _fields_ = [
+        ('dwLowDateTime',       DWORD),
+        ('dwHighDateTime',      DWORD),
+    ]
+LPFILETIME = POINTER(FILETIME)
+
+# typedef struct _SYSTEMTIME {
+#   WORD wYear;
+#   WORD wMonth;
+#   WORD wDayOfWeek;
+#   WORD wDay;
+#   WORD wHour;
+#   WORD wMinute;
+#   WORD wSecond;
+#   WORD wMilliseconds;
+# }SYSTEMTIME, *PSYSTEMTIME;
+class SYSTEMTIME(Structure):
+    _fields_ = [
+        ('wYear',           WORD),
+        ('wMonth',          WORD),
+        ('wDayOfWeek',      WORD),
+        ('wDay',            WORD),
+        ('wHour',           WORD),
+        ('wMinute',         WORD),
+        ('wSecond',         WORD),
+        ('wMilliseconds',   WORD),
+    ]
+LPSYSTEMTIME = POINTER(SYSTEMTIME)
+
+# typedef struct _BY_HANDLE_FILE_INFORMATION {
+#   DWORD dwFileAttributes;
+#   FILETIME ftCreationTime;
+#   FILETIME ftLastAccessTime;
+#   FILETIME ftLastWriteTime;
+#   DWORD dwVolumeSerialNumber;
+#   DWORD nFileSizeHigh;
+#   DWORD nFileSizeLow;
+#   DWORD nNumberOfLinks;
+#   DWORD nFileIndexHigh;
+#   DWORD nFileIndexLow;
+# } BY_HANDLE_FILE_INFORMATION, *PBY_HANDLE_FILE_INFORMATION;
+class BY_HANDLE_FILE_INFORMATION(Structure):
+    _fields_ = [
+        ('dwFileAttributes',        DWORD),
+        ('ftCreationTime',          FILETIME),
+        ('ftLastAccessTime',        FILETIME),
+        ('ftLastWriteTime',         FILETIME),
+        ('dwVolumeSerialNumber',    DWORD),
+        ('nFileSizeHigh',           DWORD),
+        ('nFileSizeLow',            DWORD),
+        ('nNumberOfLinks',          DWORD),
+        ('nFileIndexHigh',          DWORD),
+        ('nFileIndexLow',           DWORD),
+    ]
+LPBY_HANDLE_FILE_INFORMATION = POINTER(BY_HANDLE_FILE_INFORMATION)
+
+# typedef enum _FILE_INFO_BY_HANDLE_CLASS {
+#   FileBasicInfo = 0,
+#   FileStandardInfo = 1,
+#   FileNameInfo = 2,
+#   FileRenameInfo = 3,
+#   FileDispositionInfo = 4,
+#   FileAllocationInfo = 5,
+#   FileEndOfFileInfo = 6,
+#   FileStreamInfo = 7,
+#   FileCompressionInfo = 8,
+#   FileAttributeTagInfo = 9,
+#   FileIdBothDirectoryInfo = 10,
+#   FileIdBothDirectoryRestartInfo = 11,
+#   FileIoPriorityHintInfo = 12,
+#   MaximumFileInfoByHandlesClass = 13
+# } FILE_INFO_BY_HANDLE_CLASS, *PFILE_INFO_BY_HANDLE_CLASS;
+class FILE_INFO_BY_HANDLE_CLASS(object):
+    FileBasicInfo                   = 0
+    FileStandardInfo                = 1
+    FileNameInfo                    = 2
+    FileRenameInfo                  = 3
+    FileDispositionInfo             = 4
+    FileAllocationInfo              = 5
+    FileEndOfFileInfo               = 6
+    FileStreamInfo                  = 7
+    FileCompressionInfo             = 8
+    FileAttributeTagInfo            = 9
+    FileIdBothDirectoryInfo         = 10
+    FileIdBothDirectoryRestartInfo  = 11
+    FileIoPriorityHintInfo          = 12
+    MaximumFileInfoByHandlesClass   = 13
+
+# typedef struct _FILE_NAME_INFO {
+#   DWORD  FileNameLength;
+#   WCHAR FileName[1];
+# } FILE_NAME_INFO, *PFILE_NAME_INFO;
+##class FILE_NAME_INFO(Structure):
+##    _fields_ = [
+##        ('FileNameLength',  DWORD),
+##        ('FileName',        WCHAR * 1),
+##    ]
+
+# TO DO: add more structures used by GetFileInformationByHandleEx()
+
+#--- PROCESS_INFORMATION structure --------------------------------------------
+
+# typedef struct _PROCESS_INFORMATION {
+#     HANDLE hProcess;
+#     HANDLE hThread;
+#     DWORD dwProcessId;
+#     DWORD dwThreadId;
+# } PROCESS_INFORMATION, *PPROCESS_INFORMATION, *LPPROCESS_INFORMATION;
+class PROCESS_INFORMATION(Structure):
+    _fields_ = [
+        ('hProcess',    HANDLE),
+        ('hThread',     HANDLE),
+        ('dwProcessId', DWORD),
+        ('dwThreadId',  DWORD),
+    ]
+LPPROCESS_INFORMATION = POINTER(PROCESS_INFORMATION)
+
+#--- STARTUPINFO and STARTUPINFOEX structures ---------------------------------
+
+# typedef struct _STARTUPINFO {
+#   DWORD  cb;
+#   LPTSTR lpReserved;
+#   LPTSTR lpDesktop;
+#   LPTSTR lpTitle;
+#   DWORD  dwX;
+#   DWORD  dwY;
+#   DWORD  dwXSize;
+#   DWORD  dwYSize;
+#   DWORD  dwXCountChars;
+#   DWORD  dwYCountChars;
+#   DWORD  dwFillAttribute;
+#   DWORD  dwFlags;
+#   WORD   wShowWindow;
+#   WORD   cbReserved2;
+#   LPBYTE lpReserved2;
+#   HANDLE hStdInput;
+#   HANDLE hStdOutput;
+#   HANDLE hStdError;
+# }STARTUPINFO, *LPSTARTUPINFO;
+class STARTUPINFO(Structure):
+    _fields_ = [
+        ('cb',              DWORD),
+        ('lpReserved',      LPSTR),
+        ('lpDesktop',       LPSTR),
+        ('lpTitle',         LPSTR),
+        ('dwX',             DWORD),
+        ('dwY',             DWORD),
+        ('dwXSize',         DWORD),
+        ('dwYSize',         DWORD),
+        ('dwXCountChars',   DWORD),
+        ('dwYCountChars',   DWORD),
+        ('dwFillAttribute', DWORD),
+        ('dwFlags',         DWORD),
+        ('wShowWindow',     WORD),
+        ('cbReserved2',     WORD),
+        ('lpReserved2',     LPVOID),    # LPBYTE
+        ('hStdInput',       HANDLE),
+        ('hStdOutput',      HANDLE),
+        ('hStdError',       HANDLE),
+    ]
+LPSTARTUPINFO = POINTER(STARTUPINFO)
+
+# typedef struct _STARTUPINFOEX {
+#   STARTUPINFO StartupInfo;
+#   PPROC_THREAD_ATTRIBUTE_LIST lpAttributeList;
+# } STARTUPINFOEX,  *LPSTARTUPINFOEX;
+class STARTUPINFOEX(Structure):
+    _fields_ = [
+        ('StartupInfo',     STARTUPINFO),
+        ('lpAttributeList', PPROC_THREAD_ATTRIBUTE_LIST),
+    ]
+LPSTARTUPINFOEX = POINTER(STARTUPINFOEX)
+
+class STARTUPINFOW(Structure):
+    _fields_ = [
+        ('cb',              DWORD),
+        ('lpReserved',      LPWSTR),
+        ('lpDesktop',       LPWSTR),
+        ('lpTitle',         LPWSTR),
+        ('dwX',             DWORD),
+        ('dwY',             DWORD),
+        ('dwXSize',         DWORD),
+        ('dwYSize',         DWORD),
+        ('dwXCountChars',   DWORD),
+        ('dwYCountChars',   DWORD),
+        ('dwFillAttribute', DWORD),
+        ('dwFlags',         DWORD),
+        ('wShowWindow',     WORD),
+        ('cbReserved2',     WORD),
+        ('lpReserved2',     LPVOID),    # LPBYTE
+        ('hStdInput',       HANDLE),
+        ('hStdOutput',      HANDLE),
+        ('hStdError',       HANDLE),
+    ]
+LPSTARTUPINFOW = POINTER(STARTUPINFOW)
+
+class STARTUPINFOEXW(Structure):
+    _fields_ = [
+        ('StartupInfo',     STARTUPINFOW),
+        ('lpAttributeList', PPROC_THREAD_ATTRIBUTE_LIST),
+    ]
+LPSTARTUPINFOEXW = POINTER(STARTUPINFOEXW)
+
+#--- JIT_DEBUG_INFO structure -------------------------------------------------
+
+# typedef struct _JIT_DEBUG_INFO {
+#     DWORD dwSize;
+#     DWORD dwProcessorArchitecture;
+#     DWORD dwThreadID;
+#     DWORD dwReserved0;
+#     ULONG64 lpExceptionAddress;
+#     ULONG64 lpExceptionRecord;
+#     ULONG64 lpContextRecord;
+# } JIT_DEBUG_INFO, *LPJIT_DEBUG_INFO;
+class JIT_DEBUG_INFO(Structure):
+    _fields_ = [
+        ('dwSize',                  DWORD),
+        ('dwProcessorArchitecture', DWORD),
+        ('dwThreadID',              DWORD),
+        ('dwReserved0',             DWORD),
+        ('lpExceptionAddress',      ULONG64),
+        ('lpExceptionRecord',       ULONG64),
+        ('lpContextRecord',         ULONG64),
+    ]
+JIT_DEBUG_INFO32 = JIT_DEBUG_INFO
+JIT_DEBUG_INFO64 = JIT_DEBUG_INFO
+
+LPJIT_DEBUG_INFO   = POINTER(JIT_DEBUG_INFO)
+LPJIT_DEBUG_INFO32 = POINTER(JIT_DEBUG_INFO32)
+LPJIT_DEBUG_INFO64 = POINTER(JIT_DEBUG_INFO64)
+
+#--- DEBUG_EVENT structure ----------------------------------------------------
+
+# typedef struct _EXCEPTION_RECORD32 {
+#     DWORD ExceptionCode;
+#     DWORD ExceptionFlags;
+#     DWORD ExceptionRecord;
+#     DWORD ExceptionAddress;
+#     DWORD NumberParameters;
+#     DWORD ExceptionInformation[EXCEPTION_MAXIMUM_PARAMETERS];
+# } EXCEPTION_RECORD32, *PEXCEPTION_RECORD32;
+class EXCEPTION_RECORD32(Structure):
+    _fields_ = [
+        ('ExceptionCode',           DWORD),
+        ('ExceptionFlags',          DWORD),
+        ('ExceptionRecord',         DWORD),
+        ('ExceptionAddress',        DWORD),
+        ('NumberParameters',        DWORD),
+        ('ExceptionInformation',    DWORD * EXCEPTION_MAXIMUM_PARAMETERS),
+    ]
+
+PEXCEPTION_RECORD32 = POINTER(EXCEPTION_RECORD32)
+
+# typedef struct _EXCEPTION_RECORD64 {
+#     DWORD    ExceptionCode;
+#     DWORD ExceptionFlags;
+#     DWORD64 ExceptionRecord;
+#     DWORD64 ExceptionAddress;
+#     DWORD NumberParameters;
+#     DWORD __unusedAlignment;
+#     DWORD64 ExceptionInformation[EXCEPTION_MAXIMUM_PARAMETERS];
+# } EXCEPTION_RECORD64, *PEXCEPTION_RECORD64;
+class EXCEPTION_RECORD64(Structure):
+    _fields_ = [
+        ('ExceptionCode',           DWORD),
+        ('ExceptionFlags',          DWORD),
+        ('ExceptionRecord',         DWORD64),
+        ('ExceptionAddress',        DWORD64),
+        ('NumberParameters',        DWORD),
+        ('__unusedAlignment',       DWORD),
+        ('ExceptionInformation',    DWORD64 * EXCEPTION_MAXIMUM_PARAMETERS),
+    ]
+
+PEXCEPTION_RECORD64 = POINTER(EXCEPTION_RECORD64)
+
+# typedef struct _EXCEPTION_RECORD {
+#     DWORD ExceptionCode;
+#     DWORD ExceptionFlags;
+#     LPVOID ExceptionRecord;
+#     LPVOID ExceptionAddress;
+#     DWORD NumberParameters;
+#     LPVOID ExceptionInformation[EXCEPTION_MAXIMUM_PARAMETERS];
+# } EXCEPTION_RECORD, *PEXCEPTION_RECORD;
+class EXCEPTION_RECORD(Structure):
+    pass
+PEXCEPTION_RECORD = POINTER(EXCEPTION_RECORD)
+EXCEPTION_RECORD._fields_ = [
+        ('ExceptionCode',           DWORD),
+        ('ExceptionFlags',          DWORD),
+        ('ExceptionRecord',         PEXCEPTION_RECORD),
+        ('ExceptionAddress',        LPVOID),
+        ('NumberParameters',        DWORD),
+        ('ExceptionInformation',    LPVOID * EXCEPTION_MAXIMUM_PARAMETERS),
+    ]
+
+# typedef struct _EXCEPTION_DEBUG_INFO {
+#   EXCEPTION_RECORD ExceptionRecord;
+#   DWORD dwFirstChance;
+# } EXCEPTION_DEBUG_INFO;
+class EXCEPTION_DEBUG_INFO(Structure):
+    _fields_ = [
+        ('ExceptionRecord',     EXCEPTION_RECORD),
+        ('dwFirstChance',       DWORD),
+    ]
+
+# typedef struct _CREATE_THREAD_DEBUG_INFO {
+#   HANDLE hThread;
+#   LPVOID lpThreadLocalBase;
+#   LPTHREAD_START_ROUTINE lpStartAddress;
+# } CREATE_THREAD_DEBUG_INFO;
+class CREATE_THREAD_DEBUG_INFO(Structure):
+    _fields_ = [
+        ('hThread',             HANDLE),
+        ('lpThreadLocalBase',   LPVOID),
+        ('lpStartAddress',      LPVOID),
+    ]
+
+# typedef struct _CREATE_PROCESS_DEBUG_INFO {
+#   HANDLE hFile;
+#   HANDLE hProcess;
+#   HANDLE hThread;
+#   LPVOID lpBaseOfImage;
+#   DWORD dwDebugInfoFileOffset;
+#   DWORD nDebugInfoSize;
+#   LPVOID lpThreadLocalBase;
+#   LPTHREAD_START_ROUTINE lpStartAddress;
+#   LPVOID lpImageName;
+#   WORD fUnicode;
+# } CREATE_PROCESS_DEBUG_INFO;
+class CREATE_PROCESS_DEBUG_INFO(Structure):
+    _fields_ = [
+        ('hFile',                   HANDLE),
+        ('hProcess',                HANDLE),
+        ('hThread',                 HANDLE),
+        ('lpBaseOfImage',           LPVOID),
+        ('dwDebugInfoFileOffset',   DWORD),
+        ('nDebugInfoSize',          DWORD),
+        ('lpThreadLocalBase',       LPVOID),
+        ('lpStartAddress',          LPVOID),
+        ('lpImageName',             LPVOID),
+        ('fUnicode',                WORD),
+    ]
+
+# typedef struct _EXIT_THREAD_DEBUG_INFO {
+#   DWORD dwExitCode;
+# } EXIT_THREAD_DEBUG_INFO;
+class EXIT_THREAD_DEBUG_INFO(Structure):
+    _fields_ = [
+        ('dwExitCode',          DWORD),
+    ]
+
+# typedef struct _EXIT_PROCESS_DEBUG_INFO {
+#   DWORD dwExitCode;
+# } EXIT_PROCESS_DEBUG_INFO;
+class EXIT_PROCESS_DEBUG_INFO(Structure):
+    _fields_ = [
+        ('dwExitCode',          DWORD),
+    ]
+
+# typedef struct _LOAD_DLL_DEBUG_INFO {
+#   HANDLE hFile;
+#   LPVOID lpBaseOfDll;
+#   DWORD dwDebugInfoFileOffset;
+#   DWORD nDebugInfoSize;
+#   LPVOID lpImageName;
+#   WORD fUnicode;
+# } LOAD_DLL_DEBUG_INFO;
+class LOAD_DLL_DEBUG_INFO(Structure):
+    _fields_ = [
+        ('hFile',                   HANDLE),
+        ('lpBaseOfDll',             LPVOID),
+        ('dwDebugInfoFileOffset',   DWORD),
+        ('nDebugInfoSize',          DWORD),
+        ('lpImageName',             LPVOID),
+        ('fUnicode',                WORD),
+    ]
+
+# typedef struct _UNLOAD_DLL_DEBUG_INFO {
+#   LPVOID lpBaseOfDll;
+# } UNLOAD_DLL_DEBUG_INFO;
+class UNLOAD_DLL_DEBUG_INFO(Structure):
+    _fields_ = [
+        ('lpBaseOfDll',         LPVOID),
+    ]
+
+# typedef struct _OUTPUT_DEBUG_STRING_INFO {
+#   LPSTR lpDebugStringData;
+#   WORD fUnicode;
+#   WORD nDebugStringLength;
+# } OUTPUT_DEBUG_STRING_INFO;
+class OUTPUT_DEBUG_STRING_INFO(Structure):
+    _fields_ = [
+        ('lpDebugStringData',   LPVOID),    # don't use LPSTR
+        ('fUnicode',            WORD),
+        ('nDebugStringLength',  WORD),
+    ]
+
+# typedef struct _RIP_INFO {
+#     DWORD dwError;
+#     DWORD dwType;
+# } RIP_INFO, *LPRIP_INFO;
+class RIP_INFO(Structure):
+    _fields_ = [
+        ('dwError',             DWORD),
+        ('dwType',              DWORD),
+    ]
+
+# typedef struct _DEBUG_EVENT {
+#   DWORD dwDebugEventCode;
+#   DWORD dwProcessId;
+#   DWORD dwThreadId;
+#   union {
+#     EXCEPTION_DEBUG_INFO Exception;
+#     CREATE_THREAD_DEBUG_INFO CreateThread;
+#     CREATE_PROCESS_DEBUG_INFO CreateProcessInfo;
+#     EXIT_THREAD_DEBUG_INFO ExitThread;
+#     EXIT_PROCESS_DEBUG_INFO ExitProcess;
+#     LOAD_DLL_DEBUG_INFO LoadDll;
+#     UNLOAD_DLL_DEBUG_INFO UnloadDll;
+#     OUTPUT_DEBUG_STRING_INFO DebugString;
+#     RIP_INFO RipInfo;
+#   } u;
+# } DEBUG_EVENT;.
+class _DEBUG_EVENT_UNION_(Union):
+    _fields_ = [
+        ('Exception',           EXCEPTION_DEBUG_INFO),
+        ('CreateThread',        CREATE_THREAD_DEBUG_INFO),
+        ('CreateProcessInfo',   CREATE_PROCESS_DEBUG_INFO),
+        ('ExitThread',          EXIT_THREAD_DEBUG_INFO),
+        ('ExitProcess',         EXIT_PROCESS_DEBUG_INFO),
+        ('LoadDll',             LOAD_DLL_DEBUG_INFO),
+        ('UnloadDll',           UNLOAD_DLL_DEBUG_INFO),
+        ('DebugString',         OUTPUT_DEBUG_STRING_INFO),
+        ('RipInfo',             RIP_INFO),
+    ]
+class DEBUG_EVENT(Structure):
+    _fields_ = [
+        ('dwDebugEventCode',    DWORD),
+        ('dwProcessId',         DWORD),
+        ('dwThreadId',          DWORD),
+        ('u',                   _DEBUG_EVENT_UNION_),
+    ]
+LPDEBUG_EVENT = POINTER(DEBUG_EVENT)
+
+#--- Console API defines and structures ---------------------------------------
+
+FOREGROUND_MASK = 0x000F
+BACKGROUND_MASK = 0x00F0
+COMMON_LVB_MASK = 0xFF00
+
+FOREGROUND_BLACK     = 0x0000
+FOREGROUND_BLUE      = 0x0001
+FOREGROUND_GREEN     = 0x0002
+FOREGROUND_CYAN      = 0x0003
+FOREGROUND_RED       = 0x0004
+FOREGROUND_MAGENTA   = 0x0005
+FOREGROUND_YELLOW    = 0x0006
+FOREGROUND_GREY      = 0x0007
+FOREGROUND_INTENSITY = 0x0008
+
+BACKGROUND_BLACK     = 0x0000
+BACKGROUND_BLUE      = 0x0010
+BACKGROUND_GREEN     = 0x0020
+BACKGROUND_CYAN      = 0x0030
+BACKGROUND_RED       = 0x0040
+BACKGROUND_MAGENTA   = 0x0050
+BACKGROUND_YELLOW    = 0x0060
+BACKGROUND_GREY      = 0x0070
+BACKGROUND_INTENSITY = 0x0080
+
+COMMON_LVB_LEADING_BYTE    = 0x0100
+COMMON_LVB_TRAILING_BYTE   = 0x0200
+COMMON_LVB_GRID_HORIZONTAL = 0x0400
+COMMON_LVB_GRID_LVERTICAL  = 0x0800
+COMMON_LVB_GRID_RVERTICAL  = 0x1000
+COMMON_LVB_REVERSE_VIDEO   = 0x4000
+COMMON_LVB_UNDERSCORE      = 0x8000
+
+# typedef struct _CHAR_INFO {
+#   union {
+#     WCHAR UnicodeChar;
+#     CHAR  AsciiChar;
+#   } Char;
+#   WORD  Attributes;
+# } CHAR_INFO, *PCHAR_INFO;
+class _CHAR_INFO_CHAR(Union):
+    _fields_ = [
+        ('UnicodeChar', WCHAR),
+        ('AsciiChar',   CHAR),
+    ]
+class CHAR_INFO(Structure):
+    _fields_ = [
+        ('Char',       _CHAR_INFO_CHAR),
+        ('Attributes', WORD),
+   ]
+PCHAR_INFO = POINTER(CHAR_INFO)
+
+# typedef struct _COORD {
+#   SHORT X;
+#   SHORT Y;
+# } COORD, *PCOORD;
+class COORD(Structure):
+    _fields_ = [
+        ('X', SHORT),
+        ('Y', SHORT),
+    ]
+PCOORD = POINTER(COORD)
+
+# typedef struct _SMALL_RECT {
+#   SHORT Left;
+#   SHORT Top;
+#   SHORT Right;
+#   SHORT Bottom;
+# } SMALL_RECT;
+class SMALL_RECT(Structure):
+    _fields_ = [
+        ('Left',   SHORT),
+        ('Top',    SHORT),
+        ('Right',  SHORT),
+        ('Bottom', SHORT),
+    ]
+PSMALL_RECT = POINTER(SMALL_RECT)
+
+# typedef struct _CONSOLE_SCREEN_BUFFER_INFO {
+#   COORD      dwSize;
+#   COORD      dwCursorPosition;
+#   WORD       wAttributes;
+#   SMALL_RECT srWindow;
+#   COORD      dwMaximumWindowSize;
+# } CONSOLE_SCREEN_BUFFER_INFO;
+class CONSOLE_SCREEN_BUFFER_INFO(Structure):
+    _fields_ = [
+        ('dwSize',              COORD),
+        ('dwCursorPosition',    COORD),
+        ('wAttributes',         WORD),
+        ('srWindow',            SMALL_RECT),
+        ('dwMaximumWindowSize', COORD),
+    ]
+PCONSOLE_SCREEN_BUFFER_INFO = POINTER(CONSOLE_SCREEN_BUFFER_INFO)
+
+#--- Toolhelp library defines and structures ----------------------------------
+
+TH32CS_SNAPHEAPLIST = 0x00000001
+TH32CS_SNAPPROCESS  = 0x00000002
+TH32CS_SNAPTHREAD   = 0x00000004
+TH32CS_SNAPMODULE   = 0x00000008
+TH32CS_INHERIT      = 0x80000000
+TH32CS_SNAPALL      = (TH32CS_SNAPHEAPLIST | TH32CS_SNAPPROCESS | TH32CS_SNAPTHREAD | TH32CS_SNAPMODULE)
+
+# typedef struct tagTHREADENTRY32 {
+#   DWORD dwSize;
+#   DWORD cntUsage;
+#   DWORD th32ThreadID;
+#   DWORD th32OwnerProcessID;
+#   LONG tpBasePri;
+#   LONG tpDeltaPri;
+#   DWORD dwFlags;
+# } THREADENTRY32,  *PTHREADENTRY32;
+class THREADENTRY32(Structure):
+    _fields_ = [
+        ('dwSize',             DWORD),
+        ('cntUsage',           DWORD),
+        ('th32ThreadID',       DWORD),
+        ('th32OwnerProcessID', DWORD),
+        ('tpBasePri',          LONG),
+        ('tpDeltaPri',         LONG),
+        ('dwFlags',            DWORD),
+    ]
+LPTHREADENTRY32 = POINTER(THREADENTRY32)
+
+# typedef struct tagPROCESSENTRY32 {
+#    DWORD dwSize;
+#    DWORD cntUsage;
+#    DWORD th32ProcessID;
+#    ULONG_PTR th32DefaultHeapID;
+#    DWORD th32ModuleID;
+#    DWORD cntThreads;
+#    DWORD th32ParentProcessID;
+#    LONG pcPriClassBase;
+#    DWORD dwFlags;
+#    TCHAR szExeFile[MAX_PATH];
+# } PROCESSENTRY32,  *PPROCESSENTRY32;
+class PROCESSENTRY32(Structure):
+    _fields_ = [
+        ('dwSize',              DWORD),
+        ('cntUsage',            DWORD),
+        ('th32ProcessID',       DWORD),
+        ('th32DefaultHeapID',   ULONG_PTR),
+        ('th32ModuleID',        DWORD),
+        ('cntThreads',          DWORD),
+        ('th32ParentProcessID', DWORD),
+        ('pcPriClassBase',      LONG),
+        ('dwFlags',             DWORD),
+        ('szExeFile',           TCHAR * 260),
+    ]
+LPPROCESSENTRY32 = POINTER(PROCESSENTRY32)
+
+# typedef struct tagPROCESSENTRY32W {
+#    DWORD dwSize;
+#    DWORD cntUsage;
+#    DWORD th32ProcessID;
+#    ULONG_PTR th32DefaultHeapID;
+#    DWORD th32ModuleID;
+#    DWORD cntThreads;
+#    DWORD th32ParentProcessID;
+#    LONG pcPriClassBase;
+#    DWORD dwFlags;
+#    WCHAR szExeFile[MAX_PATH];
+# } PROCESSENTRY32W,  *PPROCESSENTRY32W;
+class PROCESSENTRY32W(Structure):
+    _fields_ = [
+        ('dwSize',              DWORD),
+        ('cntUsage',            DWORD),
+        ('th32ProcessID',       DWORD),
+        ('th32DefaultHeapID',   ULONG_PTR),
+        ('th32ModuleID',        DWORD),
+        ('cntThreads',          DWORD),
+        ('th32ParentProcessID', DWORD),
+        ('pcPriClassBase',      LONG),
+        ('dwFlags',             DWORD),
+        ('szExeFile',           WCHAR * 260),
+    ]
+LPPROCESSENTRY32W = POINTER(PROCESSENTRY32W)
+
+
+
+# typedef struct tagMODULEENTRY32 {
+#   DWORD dwSize;
+#   DWORD th32ModuleID;
+#   DWORD th32ProcessID;
+#   DWORD GlblcntUsage;
+#   DWORD ProccntUsage;
+#   BYTE* modBaseAddr;
+#   DWORD modBaseSize;
+#   HMODULE hModule;
+#   TCHAR szModule[MAX_MODULE_NAME32 + 1];
+#   TCHAR szExePath[MAX_PATH];
+# } MODULEENTRY32,  *PMODULEENTRY32;
+class MODULEENTRY32(Structure):
+    _fields_ = [
+        ("dwSize",        DWORD),
+        ("th32ModuleID",  DWORD),
+        ("th32ProcessID", DWORD),
+        ("GlblcntUsage",  DWORD),
+        ("ProccntUsage",  DWORD),
+        ("modBaseAddr",   LPVOID),  # BYTE*
+        ("modBaseSize",   DWORD),
+        ("hModule",       HMODULE),
+        ("szModule",      TCHAR * (MAX_MODULE_NAME32 + 1)),
+        ("szExePath",     TCHAR * MAX_PATH),
+    ]
+LPMODULEENTRY32 = POINTER(MODULEENTRY32)
+
+# typedef struct tagHEAPENTRY32 {
+#   SIZE_T    dwSize;
+#   HANDLE    hHandle;
+#   ULONG_PTR dwAddress;
+#   SIZE_T    dwBlockSize;
+#   DWORD     dwFlags;
+#   DWORD     dwLockCount;
+#   DWORD     dwResvd;
+#   DWORD     th32ProcessID;
+#   ULONG_PTR th32HeapID;
+# } HEAPENTRY32,
+# *PHEAPENTRY32;
+class HEAPENTRY32(Structure):
+    _fields_ = [
+        ("dwSize",          SIZE_T),
+        ("hHandle",         HANDLE),
+        ("dwAddress",       ULONG_PTR),
+        ("dwBlockSize",     SIZE_T),
+        ("dwFlags",         DWORD),
+        ("dwLockCount",     DWORD),
+        ("dwResvd",         DWORD),
+        ("th32ProcessID",   DWORD),
+        ("th32HeapID",      ULONG_PTR),
+]
+LPHEAPENTRY32 = POINTER(HEAPENTRY32)
+
+# typedef struct tagHEAPLIST32 {
+#   SIZE_T    dwSize;
+#   DWORD     th32ProcessID;
+#   ULONG_PTR th32HeapID;
+#   DWORD     dwFlags;
+# } HEAPLIST32,
+#  *PHEAPLIST32;
+class HEAPLIST32(Structure):
+    _fields_ = [
+        ("dwSize",          SIZE_T),
+        ("th32ProcessID",   DWORD),
+        ("th32HeapID",      ULONG_PTR),
+        ("dwFlags",         DWORD),
+]
+LPHEAPLIST32 = POINTER(HEAPLIST32)
+
+#--- kernel32.dll -------------------------------------------------------------
+
+# DWORD WINAPI GetLastError(void);
+def GetLastError():
+    _GetLastError = windll.kernel32.GetLastError
+    _GetLastError.argtypes = []
+    _GetLastError.restype  = DWORD
+    return _GetLastError()
+
+# void WINAPI SetLastError(
+#   __in  DWORD dwErrCode
+# );
+def SetLastError(dwErrCode):
+    _SetLastError = windll.kernel32.SetLastError
+    _SetLastError.argtypes = [DWORD]
+    _SetLastError.restype  = None
+    _SetLastError(dwErrCode)
+
+# UINT WINAPI GetErrorMode(void);
+def GetErrorMode():
+    _GetErrorMode = windll.kernel32.GetErrorMode
+    _GetErrorMode.argtypes = []
+    _GetErrorMode.restype  = UINT
+    return _GetErrorMode()
+
+# UINT WINAPI SetErrorMode(
+#   __in  UINT uMode
+# );
+def SetErrorMode(uMode):
+    _SetErrorMode = windll.kernel32.SetErrorMode
+    _SetErrorMode.argtypes = [UINT]
+    _SetErrorMode.restype  = UINT
+    return _SetErrorMode(dwErrCode)
+
+# DWORD GetThreadErrorMode(void);
+def GetThreadErrorMode():
+    _GetThreadErrorMode = windll.kernel32.GetThreadErrorMode
+    _GetThreadErrorMode.argtypes = []
+    _GetThreadErrorMode.restype  = DWORD
+    return _GetThreadErrorMode()
+
+# BOOL SetThreadErrorMode(
+#   __in   DWORD dwNewMode,
+#   __out  LPDWORD lpOldMode
+# );
+def SetThreadErrorMode(dwNewMode):
+    _SetThreadErrorMode = windll.kernel32.SetThreadErrorMode
+    _SetThreadErrorMode.argtypes = [DWORD, LPDWORD]
+    _SetThreadErrorMode.restype  = BOOL
+    _SetThreadErrorMode.errcheck = RaiseIfZero
+
+    old = DWORD(0)
+    _SetThreadErrorMode(dwErrCode, byref(old))
+    return old.value
+
+# BOOL WINAPI CloseHandle(
+#   __in  HANDLE hObject
+# );
+def CloseHandle(hHandle):
+    if isinstance(hHandle, Handle):
+        # Prevents the handle from being closed without notifying the Handle object.
+        hHandle.close()
+    else:
+        _CloseHandle = windll.kernel32.CloseHandle
+        _CloseHandle.argtypes = [HANDLE]
+        _CloseHandle.restype  = bool
+        _CloseHandle.errcheck = RaiseIfZero
+        _CloseHandle(hHandle)
+
+# BOOL WINAPI DuplicateHandle(
+#   __in   HANDLE hSourceProcessHandle,
+#   __in   HANDLE hSourceHandle,
+#   __in   HANDLE hTargetProcessHandle,
+#   __out  LPHANDLE lpTargetHandle,
+#   __in   DWORD dwDesiredAccess,
+#   __in   BOOL bInheritHandle,
+#   __in   DWORD dwOptions
+# );
+def DuplicateHandle(hSourceHandle, hSourceProcessHandle = None, hTargetProcessHandle = None, dwDesiredAccess = STANDARD_RIGHTS_ALL, bInheritHandle = False, dwOptions = DUPLICATE_SAME_ACCESS):
+    _DuplicateHandle = windll.kernel32.DuplicateHandle
+    _DuplicateHandle.argtypes = [HANDLE, HANDLE, HANDLE, LPHANDLE, DWORD, BOOL, DWORD]
+    _DuplicateHandle.restype  = bool
+    _DuplicateHandle.errcheck = RaiseIfZero
+
+    # NOTE: the arguments to this function are in a different order,
+    # so we can set default values for all of them but one (hSourceHandle).
+
+    if hSourceProcessHandle is None:
+        hSourceProcessHandle = GetCurrentProcess()
+    if hTargetProcessHandle is None:
+        hTargetProcessHandle = hSourceProcessHandle
+    lpTargetHandle = HANDLE(INVALID_HANDLE_VALUE)
+    _DuplicateHandle(hSourceProcessHandle, hSourceHandle, hTargetProcessHandle, byref(lpTargetHandle), dwDesiredAccess, bool(bInheritHandle), dwOptions)
+    if isinstance(hSourceHandle, Handle):
+        HandleClass = hSourceHandle.__class__
+    else:
+        HandleClass = Handle
+    if hasattr(hSourceHandle, 'dwAccess'):
+        return HandleClass(lpTargetHandle.value, dwAccess = hSourceHandle.dwAccess)
+    else:
+        return HandleClass(lpTargetHandle.value)
+
+# HLOCAL WINAPI LocalFree(
+#   __in  HLOCAL hMem
+# );
+def LocalFree(hMem):
+    _LocalFree = windll.kernel32.LocalFree
+    _LocalFree.argtypes = [HLOCAL]
+    _LocalFree.restype  = HLOCAL
+
+    result = _LocalFree(hMem)
+    if result != NULL:
+        ctypes.WinError()
+
+#------------------------------------------------------------------------------
+# Console API
+
+# HANDLE WINAPI GetStdHandle(
+#   _In_  DWORD nStdHandle
+# );
+def GetStdHandle(nStdHandle):
+    _GetStdHandle = windll.kernel32.GetStdHandle
+    _GetStdHandle.argytpes = [DWORD]
+    _GetStdHandle.restype  = HANDLE
+    _GetStdHandle.errcheck = RaiseIfZero
+    return Handle( _GetStdHandle(nStdHandle), bOwnership = False )
+
+# BOOL WINAPI SetStdHandle(
+#   _In_  DWORD nStdHandle,
+#   _In_  HANDLE hHandle
+# );
+
+# TODO
+
+# UINT WINAPI GetConsoleCP(void);
+def GetConsoleCP():
+    _GetConsoleCP = windll.kernel32.GetConsoleCP
+    _GetConsoleCP.argytpes = []
+    _GetConsoleCP.restype  = UINT
+    return _GetConsoleCP()
+
+# UINT WINAPI GetConsoleOutputCP(void);
+def GetConsoleOutputCP():
+    _GetConsoleOutputCP = windll.kernel32.GetConsoleOutputCP
+    _GetConsoleOutputCP.argytpes = []
+    _GetConsoleOutputCP.restype  = UINT
+    return _GetConsoleOutputCP()
+
+#BOOL WINAPI SetConsoleCP(
+#  _In_  UINT wCodePageID
+#);
+def SetConsoleCP(wCodePageID):
+    _SetConsoleCP = windll.kernel32.SetConsoleCP
+    _SetConsoleCP.argytpes = [UINT]
+    _SetConsoleCP.restype  = bool
+    _SetConsoleCP.errcheck = RaiseIfZero
+    _SetConsoleCP(wCodePageID)
+
+#BOOL WINAPI SetConsoleOutputCP(
+#  _In_  UINT wCodePageID
+#);
+def SetConsoleOutputCP(wCodePageID):
+    _SetConsoleOutputCP = windll.kernel32.SetConsoleOutputCP
+    _SetConsoleOutputCP.argytpes = [UINT]
+    _SetConsoleOutputCP.restype  = bool
+    _SetConsoleOutputCP.errcheck = RaiseIfZero
+    _SetConsoleOutputCP(wCodePageID)
+
+# HANDLE WINAPI CreateConsoleScreenBuffer(
+#   _In_        DWORD dwDesiredAccess,
+#   _In_        DWORD dwShareMode,
+#   _In_opt_    const SECURITY_ATTRIBUTES *lpSecurityAttributes,
+#   _In_        DWORD dwFlags,
+#   _Reserved_  LPVOID lpScreenBufferData
+# );
+
+# TODO
+
+# BOOL WINAPI SetConsoleActiveScreenBuffer(
+#   _In_  HANDLE hConsoleOutput
+# );
+def SetConsoleActiveScreenBuffer(hConsoleOutput = None):
+    _SetConsoleActiveScreenBuffer = windll.kernel32.SetConsoleActiveScreenBuffer
+    _SetConsoleActiveScreenBuffer.argytpes = [HANDLE]
+    _SetConsoleActiveScreenBuffer.restype  = bool
+    _SetConsoleActiveScreenBuffer.errcheck = RaiseIfZero
+
+    if hConsoleOutput is None:
+        hConsoleOutput = GetStdHandle(STD_OUTPUT_HANDLE)
+    _SetConsoleActiveScreenBuffer(hConsoleOutput)
+
+# BOOL WINAPI GetConsoleScreenBufferInfo(
+#   _In_   HANDLE hConsoleOutput,
+#   _Out_  PCONSOLE_SCREEN_BUFFER_INFO lpConsoleScreenBufferInfo
+# );
+def GetConsoleScreenBufferInfo(hConsoleOutput = None):
+    _GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo
+    _GetConsoleScreenBufferInfo.argytpes = [HANDLE, PCONSOLE_SCREEN_BUFFER_INFO]
+    _GetConsoleScreenBufferInfo.restype  = bool
+    _GetConsoleScreenBufferInfo.errcheck = RaiseIfZero
+
+    if hConsoleOutput is None:
+        hConsoleOutput = GetStdHandle(STD_OUTPUT_HANDLE)
+    ConsoleScreenBufferInfo = CONSOLE_SCREEN_BUFFER_INFO()
+    _GetConsoleScreenBufferInfo(hConsoleOutput, byref(ConsoleScreenBufferInfo))
+    return ConsoleScreenBufferInfo
+
+# BOOL WINAPI GetConsoleScreenBufferInfoEx(
+#   _In_   HANDLE hConsoleOutput,
+#   _Out_  PCONSOLE_SCREEN_BUFFER_INFOEX lpConsoleScreenBufferInfoEx
+# );
+
+# TODO
+
+# BOOL WINAPI SetConsoleWindowInfo(
+#   _In_  HANDLE hConsoleOutput,
+#   _In_  BOOL bAbsolute,
+#   _In_  const SMALL_RECT *lpConsoleWindow
+# );
+def SetConsoleWindowInfo(hConsoleOutput, bAbsolute, lpConsoleWindow):
+    _SetConsoleWindowInfo = windll.kernel32.SetConsoleWindowInfo
+    _SetConsoleWindowInfo.argytpes = [HANDLE, BOOL, PSMALL_RECT]
+    _SetConsoleWindowInfo.restype  = bool
+    _SetConsoleWindowInfo.errcheck = RaiseIfZero
+
+    if hConsoleOutput is None:
+        hConsoleOutput = GetStdHandle(STD_OUTPUT_HANDLE)
+    if isinstance(lpConsoleWindow, SMALL_RECT):
+        ConsoleWindow = lpConsoleWindow
+    else:
+        ConsoleWindow = SMALL_RECT(*lpConsoleWindow)
+    _SetConsoleWindowInfo(hConsoleOutput, bAbsolute, byref(ConsoleWindow))
+
+# BOOL WINAPI SetConsoleTextAttribute(
+#   _In_  HANDLE hConsoleOutput,
+#   _In_  WORD wAttributes
+# );
+def SetConsoleTextAttribute(hConsoleOutput = None, wAttributes = 0):
+    _SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute
+    _SetConsoleTextAttribute.argytpes = [HANDLE, WORD]
+    _SetConsoleTextAttribute.restype  = bool
+    _SetConsoleTextAttribute.errcheck = RaiseIfZero
+
+    if hConsoleOutput is None:
+        hConsoleOutput = GetStdHandle(STD_OUTPUT_HANDLE)
+    _SetConsoleTextAttribute(hConsoleOutput, wAttributes)
+
+# HANDLE WINAPI CreateConsoleScreenBuffer(
+#   _In_        DWORD dwDesiredAccess,
+#   _In_        DWORD dwShareMode,
+#   _In_opt_    const SECURITY_ATTRIBUTES *lpSecurityAttributes,
+#   _In_        DWORD dwFlags,
+#   _Reserved_  LPVOID lpScreenBufferData
+# );
+
+# TODO
+
+# BOOL WINAPI AllocConsole(void);
+def AllocConsole():
+    _AllocConsole = windll.kernel32.AllocConsole
+    _AllocConsole.argytpes = []
+    _AllocConsole.restype  = bool
+    _AllocConsole.errcheck = RaiseIfZero
+    _AllocConsole()
+
+# BOOL WINAPI AttachConsole(
+#   _In_  DWORD dwProcessId
+# );
+def AttachConsole(dwProcessId = ATTACH_PARENT_PROCESS):
+    _AttachConsole = windll.kernel32.AttachConsole
+    _AttachConsole.argytpes = [DWORD]
+    _AttachConsole.restype  = bool
+    _AttachConsole.errcheck = RaiseIfZero
+    _AttachConsole(dwProcessId)
+
+# BOOL WINAPI FreeConsole(void);
+def FreeConsole():
+    _FreeConsole = windll.kernel32.FreeConsole
+    _FreeConsole.argytpes = []
+    _FreeConsole.restype  = bool
+    _FreeConsole.errcheck = RaiseIfZero
+    _FreeConsole()
+
+# DWORD WINAPI GetConsoleProcessList(
+#   _Out_  LPDWORD lpdwProcessList,
+#   _In_   DWORD dwProcessCount
+# );
+
+# TODO
+
+# DWORD WINAPI GetConsoleTitle(
+#   _Out_  LPTSTR lpConsoleTitle,
+#   _In_   DWORD nSize
+# );
+
+# TODO
+
+#BOOL WINAPI SetConsoleTitle(
+#  _In_  LPCTSTR lpConsoleTitle
+#);
+
+# TODO
+
+# COORD WINAPI GetLargestConsoleWindowSize(
+#   _In_  HANDLE hConsoleOutput
+# );
+
+# TODO
+
+# BOOL WINAPI GetConsoleHistoryInfo(
+#   _Out_  PCONSOLE_HISTORY_INFO lpConsoleHistoryInfo
+# );
+
+# TODO
+
+#------------------------------------------------------------------------------
+# DLL API
+
+# DWORD WINAPI GetDllDirectory(
+#   __in   DWORD nBufferLength,
+#   __out  LPTSTR lpBuffer
+# );
+def GetDllDirectoryA():
+    _GetDllDirectoryA = windll.kernel32.GetDllDirectoryA
+    _GetDllDirectoryA.argytpes = [DWORD, LPSTR]
+    _GetDllDirectoryA.restype  = DWORD
+
+    nBufferLength = _GetDllDirectoryA(0, None)
+    if nBufferLength == 0:
+        return None
+    lpBuffer = ctypes.create_string_buffer("", nBufferLength)
+    _GetDllDirectoryA(nBufferLength, byref(lpBuffer))
+    return lpBuffer.value
+
+def GetDllDirectoryW():
+    _GetDllDirectoryW = windll.kernel32.GetDllDirectoryW
+    _GetDllDirectoryW.argytpes = [DWORD, LPWSTR]
+    _GetDllDirectoryW.restype  = DWORD
+
+    nBufferLength = _GetDllDirectoryW(0, None)
+    if nBufferLength == 0:
+        return None
+    lpBuffer = ctypes.create_unicode_buffer(u"", nBufferLength)
+    _GetDllDirectoryW(nBufferLength, byref(lpBuffer))
+    return lpBuffer.value
+
+GetDllDirectory = GuessStringType(GetDllDirectoryA, GetDllDirectoryW)
+
+# BOOL WINAPI SetDllDirectory(
+#   __in_opt  LPCTSTR lpPathName
+# );
+def SetDllDirectoryA(lpPathName = None):
+    _SetDllDirectoryA = windll.kernel32.SetDllDirectoryA
+    _SetDllDirectoryA.argytpes = [LPSTR]
+    _SetDllDirectoryA.restype  = bool
+    _SetDllDirectoryA.errcheck = RaiseIfZero
+    _SetDllDirectoryA(lpPathName)
+
+def SetDllDirectoryW(lpPathName):
+    _SetDllDirectoryW = windll.kernel32.SetDllDirectoryW
+    _SetDllDirectoryW.argytpes = [LPWSTR]
+    _SetDllDirectoryW.restype  = bool
+    _SetDllDirectoryW.errcheck = RaiseIfZero
+    _SetDllDirectoryW(lpPathName)
+
+SetDllDirectory = GuessStringType(SetDllDirectoryA, SetDllDirectoryW)
+
+# HMODULE WINAPI LoadLibrary(
+#   __in  LPCTSTR lpFileName
+# );
+def LoadLibraryA(pszLibrary):
+    _LoadLibraryA = windll.kernel32.LoadLibraryA
+    _LoadLibraryA.argtypes = [LPSTR]
+    _LoadLibraryA.restype  = HMODULE
+    hModule = _LoadLibraryA(pszLibrary)
+    if hModule == NULL:
+        raise ctypes.WinError()
+    return hModule
+
+def LoadLibraryW(pszLibrary):
+    _LoadLibraryW = windll.kernel32.LoadLibraryW
+    _LoadLibraryW.argtypes = [LPWSTR]
+    _LoadLibraryW.restype  = HMODULE
+    hModule = _LoadLibraryW(pszLibrary)
+    if hModule == NULL:
+        raise ctypes.WinError()
+    return hModule
+
+LoadLibrary = GuessStringType(LoadLibraryA, LoadLibraryW)
+
+# HMODULE WINAPI LoadLibraryEx(
+#   __in        LPCTSTR lpFileName,
+#   __reserved  HANDLE hFile,
+#   __in        DWORD dwFlags
+# );
+def LoadLibraryExA(pszLibrary, dwFlags = 0):
+    _LoadLibraryExA = windll.kernel32.LoadLibraryExA
+    _LoadLibraryExA.argtypes = [LPSTR, HANDLE, DWORD]
+    _LoadLibraryExA.restype  = HMODULE
+    hModule = _LoadLibraryExA(pszLibrary, NULL, dwFlags)
+    if hModule == NULL:
+        raise ctypes.WinError()
+    return hModule
+
+def LoadLibraryExW(pszLibrary, dwFlags = 0):
+    _LoadLibraryExW = windll.kernel32.LoadLibraryExW
+    _LoadLibraryExW.argtypes = [LPWSTR, HANDLE, DWORD]
+    _LoadLibraryExW.restype  = HMODULE
+    hModule = _LoadLibraryExW(pszLibrary, NULL, dwFlags)
+    if hModule == NULL:
+        raise ctypes.WinError()
+    return hModule
+
+LoadLibraryEx = GuessStringType(LoadLibraryExA, LoadLibraryExW)
+
+# HMODULE WINAPI GetModuleHandle(
+#   __in_opt  LPCTSTR lpModuleName
+# );
+def GetModuleHandleA(lpModuleName):
+    _GetModuleHandleA = windll.kernel32.GetModuleHandleA
+    _GetModuleHandleA.argtypes = [LPSTR]
+    _GetModuleHandleA.restype  = HMODULE
+    hModule = _GetModuleHandleA(lpModuleName)
+    if hModule == NULL:
+        raise ctypes.WinError()
+    return hModule
+
+def GetModuleHandleW(lpModuleName):
+    _GetModuleHandleW = windll.kernel32.GetModuleHandleW
+    _GetModuleHandleW.argtypes = [LPWSTR]
+    _GetModuleHandleW.restype  = HMODULE
+    hModule = _GetModuleHandleW(lpModuleName)
+    if hModule == NULL:
+        raise ctypes.WinError()
+    return hModule
+
+GetModuleHandle = GuessStringType(GetModuleHandleA, GetModuleHandleW)
+
+# FARPROC WINAPI GetProcAddress(
+#   __in  HMODULE hModule,
+#   __in  LPCSTR lpProcName
+# );
+def GetProcAddressA(hModule, lpProcName):
+    _GetProcAddress = windll.kernel32.GetProcAddress
+    _GetProcAddress.argtypes = [HMODULE, LPVOID]
+    _GetProcAddress.restype  = LPVOID
+
+    if type(lpProcName) in (type(0), type(0)):
+        lpProcName = LPVOID(lpProcName)
+        if lpProcName.value & (~0xFFFF):
+            raise ValueError('Ordinal number too large: %d' % lpProcName.value)
+    elif type(lpProcName) == bytes:
+        lpProcName = ctypes.c_char_p(lpProcName)
+    else:
+        raise TypeError(str(type(lpProcName)))
+    return _GetProcAddress(hModule, lpProcName)
+
+GetProcAddressW = MakeWideVersion(GetProcAddressA)
+GetProcAddress = GuessStringType(GetProcAddressA, GetProcAddressW)
+
+# BOOL WINAPI FreeLibrary(
+#   __in  HMODULE hModule
+# );
+def FreeLibrary(hModule):
+    _FreeLibrary = windll.kernel32.FreeLibrary
+    _FreeLibrary.argtypes = [HMODULE]
+    _FreeLibrary.restype  = bool
+    _FreeLibrary.errcheck = RaiseIfZero
+    _FreeLibrary(hModule)
+
+# PVOID WINAPI RtlPcToFileHeader(
+#   __in   PVOID PcValue,
+#   __out  PVOID *BaseOfImage
+# );
+def RtlPcToFileHeader(PcValue):
+    _RtlPcToFileHeader = windll.kernel32.RtlPcToFileHeader
+    _RtlPcToFileHeader.argtypes = [PVOID, POINTER(PVOID)]
+    _RtlPcToFileHeader.restype  = PRUNTIME_FUNCTION
+
+    BaseOfImage = PVOID(0)
+    _RtlPcToFileHeader(PcValue, byref(BaseOfImage))
+    return BaseOfImage.value
+
+#------------------------------------------------------------------------------
+# File API and related
+
+# BOOL WINAPI GetHandleInformation(
+#   __in   HANDLE hObject,
+#   __out  LPDWORD lpdwFlags
+# );
+def GetHandleInformation(hObject):
+    _GetHandleInformation = windll.kernel32.GetHandleInformation
+    _GetHandleInformation.argtypes = [HANDLE, PDWORD]
+    _GetHandleInformation.restype  = bool
+    _GetHandleInformation.errcheck = RaiseIfZero
+
+    dwFlags = DWORD(0)
+    _GetHandleInformation(hObject, byref(dwFlags))
+    return dwFlags.value
+
+# BOOL WINAPI SetHandleInformation(
+#   __in  HANDLE hObject,
+#   __in  DWORD dwMask,
+#   __in  DWORD dwFlags
+# );
+def SetHandleInformation(hObject, dwMask, dwFlags):
+    _SetHandleInformation = windll.kernel32.SetHandleInformation
+    _SetHandleInformation.argtypes = [HANDLE, DWORD, DWORD]
+    _SetHandleInformation.restype  = bool
+    _SetHandleInformation.errcheck = RaiseIfZero
+    _SetHandleInformation(hObject, dwMask, dwFlags)
+
+# UINT WINAPI GetWindowModuleFileName(
+#   __in   HWND hwnd,
+#   __out  LPTSTR lpszFileName,
+#   __in   UINT cchFileNameMax
+# );
+# Not included because it doesn't work in other processes.
+# See: http://support.microsoft.com/?id=228469
+
+# BOOL WINAPI QueryFullProcessImageName(
+#   __in     HANDLE hProcess,
+#   __in     DWORD dwFlags,
+#   __out    LPTSTR lpExeName,
+#   __inout  PDWORD lpdwSize
+# );
+def QueryFullProcessImageNameA(hProcess, dwFlags = 0):
+    _QueryFullProcessImageNameA = windll.kernel32.QueryFullProcessImageNameA
+    _QueryFullProcessImageNameA.argtypes = [HANDLE, DWORD, LPSTR, PDWORD]
+    _QueryFullProcessImageNameA.restype  = bool
+
+    dwSize = MAX_PATH
+    while 1:
+        lpdwSize = DWORD(dwSize)
+        lpExeName = ctypes.create_string_buffer('', lpdwSize.value + 1)
+        success = _QueryFullProcessImageNameA(hProcess, dwFlags, lpExeName, byref(lpdwSize))
+        if success and 0 < lpdwSize.value < dwSize:
+            break
+        error = GetLastError()
+        if error != ERROR_INSUFFICIENT_BUFFER:
+            raise ctypes.WinError(error)
+        dwSize = dwSize + 256
+        if dwSize > 0x1000:
+            # this prevents an infinite loop in Windows 2008 when the path has spaces,
+            # see http://msdn.microsoft.com/en-us/library/ms684919(VS.85).aspx#4
+            raise ctypes.WinError(error)
+    return lpExeName.value
+
+def QueryFullProcessImageNameW(hProcess, dwFlags = 0):
+    _QueryFullProcessImageNameW = windll.kernel32.QueryFullProcessImageNameW
+    _QueryFullProcessImageNameW.argtypes = [HANDLE, DWORD, LPWSTR, PDWORD]
+    _QueryFullProcessImageNameW.restype  = bool
+
+    dwSize = MAX_PATH
+    while 1:
+        lpdwSize = DWORD(dwSize)
+        lpExeName = ctypes.create_unicode_buffer('', lpdwSize.value + 1)
+        success = _QueryFullProcessImageNameW(hProcess, dwFlags, lpExeName, byref(lpdwSize))
+        if success and 0 < lpdwSize.value < dwSize:
+            break
+        error = GetLastError()
+        if error != ERROR_INSUFFICIENT_BUFFER:
+            raise ctypes.WinError(error)
+        dwSize = dwSize + 256
+        if dwSize > 0x1000:
+            # this prevents an infinite loop in Windows 2008 when the path has spaces,
+            # see http://msdn.microsoft.com/en-us/library/ms684919(VS.85).aspx#4
+            raise ctypes.WinError(error)
+    return lpExeName.value
+
+QueryFullProcessImageName = GuessStringType(QueryFullProcessImageNameA, QueryFullProcessImageNameW)
+
+# DWORD WINAPI GetLogicalDriveStrings(
+#   __in   DWORD nBufferLength,
+#   __out  LPTSTR lpBuffer
+# );
+def GetLogicalDriveStringsA():
+    _GetLogicalDriveStringsA = ctypes.windll.kernel32.GetLogicalDriveStringsA
+    _GetLogicalDriveStringsA.argtypes = [DWORD, LPSTR]
+    _GetLogicalDriveStringsA.restype  = DWORD
+    _GetLogicalDriveStringsA.errcheck = RaiseIfZero
+
+    nBufferLength = (4 * 26) + 1    # "X:\\\0" from A to Z plus empty string
+    lpBuffer = ctypes.create_string_buffer('', nBufferLength)
+    _GetLogicalDriveStringsA(nBufferLength, lpBuffer)
+    drive_strings = list()
+    string_p = addressof(lpBuffer)
+    sizeof_char = sizeof(ctypes.c_char)
+    while True:
+        string_v = ctypes.string_at(string_p)
+        if string_v == '':
+            break
+        drive_strings.append(string_v)
+        string_p += len(string_v) + sizeof_char
+    return drive_strings
+
+def GetLogicalDriveStringsW():
+    _GetLogicalDriveStringsW = ctypes.windll.kernel32.GetLogicalDriveStringsW
+    _GetLogicalDriveStringsW.argtypes = [DWORD, LPWSTR]
+    _GetLogicalDriveStringsW.restype  = DWORD
+    _GetLogicalDriveStringsW.errcheck = RaiseIfZero
+
+    nBufferLength = (4 * 26) + 1    # "X:\\\0" from A to Z plus empty string
+    lpBuffer = ctypes.create_unicode_buffer(u'', nBufferLength)
+    _GetLogicalDriveStringsW(nBufferLength, lpBuffer)
+    drive_strings = list()
+    string_p = addressof(lpBuffer)
+    sizeof_wchar = sizeof(ctypes.c_wchar)
+    while True:
+        string_v = ctypes.wstring_at(string_p)
+        if string_v == u'':
+            break
+        drive_strings.append(string_v)
+        string_p += (len(string_v) * sizeof_wchar) + sizeof_wchar
+    return drive_strings
+
+##def GetLogicalDriveStringsA():
+##    _GetLogicalDriveStringsA = windll.kernel32.GetLogicalDriveStringsA
+##    _GetLogicalDriveStringsA.argtypes = [DWORD, LPSTR]
+##    _GetLogicalDriveStringsA.restype  = DWORD
+##    _GetLogicalDriveStringsA.errcheck = RaiseIfZero
+##
+##    nBufferLength = (4 * 26) + 1    # "X:\\\0" from A to Z plus empty string
+##    lpBuffer = ctypes.create_string_buffer('', nBufferLength)
+##    _GetLogicalDriveStringsA(nBufferLength, lpBuffer)
+##    result = list()
+##    index = 0
+##    while 1:
+##        string = list()
+##        while 1:
+##            character = lpBuffer[index]
+##            index = index + 1
+##            if character == '\0':
+##                break
+##            string.append(character)
+##        if not string:
+##            break
+##        result.append(''.join(string))
+##    return result
+##
+##def GetLogicalDriveStringsW():
+##    _GetLogicalDriveStringsW = windll.kernel32.GetLogicalDriveStringsW
+##    _GetLogicalDriveStringsW.argtypes = [DWORD, LPWSTR]
+##    _GetLogicalDriveStringsW.restype  = DWORD
+##    _GetLogicalDriveStringsW.errcheck = RaiseIfZero
+##
+##    nBufferLength = (4 * 26) + 1    # "X:\\\0" from A to Z plus empty string
+##    lpBuffer = ctypes.create_unicode_buffer(u'', nBufferLength)
+##    _GetLogicalDriveStringsW(nBufferLength, lpBuffer)
+##    result = list()
+##    index = 0
+##    while 1:
+##        string = list()
+##        while 1:
+##            character = lpBuffer[index]
+##            index = index + 1
+##            if character == u'\0':
+##                break
+##            string.append(character)
+##        if not string:
+##            break
+##        result.append(u''.join(string))
+##    return result
+
+GetLogicalDriveStrings = GuessStringType(GetLogicalDriveStringsA, GetLogicalDriveStringsW)
+
+# DWORD WINAPI QueryDosDevice(
+#   __in_opt  LPCTSTR lpDeviceName,
+#   __out     LPTSTR lpTargetPath,
+#   __in      DWORD ucchMax
+# );
+def QueryDosDeviceA(lpDeviceName = None):
+    _QueryDosDeviceA = windll.kernel32.QueryDosDeviceA
+    _QueryDosDeviceA.argtypes = [LPSTR, LPSTR, DWORD]
+    _QueryDosDeviceA.restype  = DWORD
+    _QueryDosDeviceA.errcheck = RaiseIfZero
+
+    if not lpDeviceName:
+        lpDeviceName = None
+    ucchMax = 0x1000
+    lpTargetPath = ctypes.create_string_buffer('', ucchMax)
+    _QueryDosDeviceA(lpDeviceName, lpTargetPath, ucchMax)
+    return lpTargetPath.value
+
+def QueryDosDeviceW(lpDeviceName):
+    _QueryDosDeviceW = windll.kernel32.QueryDosDeviceW
+    _QueryDosDeviceW.argtypes = [LPWSTR, LPWSTR, DWORD]
+    _QueryDosDeviceW.restype  = DWORD
+    _QueryDosDeviceW.errcheck = RaiseIfZero
+
+    if not lpDeviceName:
+        lpDeviceName = None
+    ucchMax = 0x1000
+    lpTargetPath = ctypes.create_unicode_buffer(u'', ucchMax)
+    _QueryDosDeviceW(lpDeviceName, lpTargetPath, ucchMax)
+    return lpTargetPath.value
+
+QueryDosDevice = GuessStringType(QueryDosDeviceA, QueryDosDeviceW)
+
+# LPVOID WINAPI MapViewOfFile(
+#   __in  HANDLE hFileMappingObject,
+#   __in  DWORD dwDesiredAccess,
+#   __in  DWORD dwFileOffsetHigh,
+#   __in  DWORD dwFileOffsetLow,
+#   __in  SIZE_T dwNumberOfBytesToMap
+# );
+def MapViewOfFile(hFileMappingObject, dwDesiredAccess = FILE_MAP_ALL_ACCESS | FILE_MAP_EXECUTE, dwFileOffsetHigh = 0, dwFileOffsetLow = 0, dwNumberOfBytesToMap = 0):
+    _MapViewOfFile = windll.kernel32.MapViewOfFile
+    _MapViewOfFile.argtypes = [HANDLE, DWORD, DWORD, DWORD, SIZE_T]
+    _MapViewOfFile.restype  = LPVOID
+    lpBaseAddress = _MapViewOfFile(hFileMappingObject, dwDesiredAccess, dwFileOffsetHigh, dwFileOffsetLow, dwNumberOfBytesToMap)
+    if lpBaseAddress == NULL:
+        raise ctypes.WinError()
+    return lpBaseAddress
+
+# BOOL WINAPI UnmapViewOfFile(
+#   __in  LPCVOID lpBaseAddress
+# );
+def UnmapViewOfFile(lpBaseAddress):
+    _UnmapViewOfFile = windll.kernel32.UnmapViewOfFile
+    _UnmapViewOfFile.argtypes = [LPVOID]
+    _UnmapViewOfFile.restype  = bool
+    _UnmapViewOfFile.errcheck = RaiseIfZero
+    _UnmapViewOfFile(lpBaseAddress)
+
+# HANDLE WINAPI OpenFileMapping(
+#   __in  DWORD dwDesiredAccess,
+#   __in  BOOL bInheritHandle,
+#   __in  LPCTSTR lpName
+# );
+def OpenFileMappingA(dwDesiredAccess, bInheritHandle, lpName):
+    _OpenFileMappingA = windll.kernel32.OpenFileMappingA
+    _OpenFileMappingA.argtypes = [DWORD, BOOL, LPSTR]
+    _OpenFileMappingA.restype  = HANDLE
+    _OpenFileMappingA.errcheck = RaiseIfZero
+    hFileMappingObject = _OpenFileMappingA(dwDesiredAccess, bool(bInheritHandle), lpName)
+    return FileMappingHandle(hFileMappingObject)
+
+def OpenFileMappingW(dwDesiredAccess, bInheritHandle, lpName):
+    _OpenFileMappingW = windll.kernel32.OpenFileMappingW
+    _OpenFileMappingW.argtypes = [DWORD, BOOL, LPWSTR]
+    _OpenFileMappingW.restype  = HANDLE
+    _OpenFileMappingW.errcheck = RaiseIfZero
+    hFileMappingObject = _OpenFileMappingW(dwDesiredAccess, bool(bInheritHandle), lpName)
+    return FileMappingHandle(hFileMappingObject)
+
+OpenFileMapping = GuessStringType(OpenFileMappingA, OpenFileMappingW)
+
+# HANDLE WINAPI CreateFileMapping(
+#   __in      HANDLE hFile,
+#   __in_opt  LPSECURITY_ATTRIBUTES lpAttributes,
+#   __in      DWORD flProtect,
+#   __in      DWORD dwMaximumSizeHigh,
+#   __in      DWORD dwMaximumSizeLow,
+#   __in_opt  LPCTSTR lpName
+# );
+def CreateFileMappingA(hFile, lpAttributes = None, flProtect = PAGE_EXECUTE_READWRITE, dwMaximumSizeHigh = 0, dwMaximumSizeLow = 0, lpName = None):
+    _CreateFileMappingA = windll.kernel32.CreateFileMappingA
+    _CreateFileMappingA.argtypes = [HANDLE, LPVOID, DWORD, DWORD, DWORD, LPSTR]
+    _CreateFileMappingA.restype  = HANDLE
+    _CreateFileMappingA.errcheck = RaiseIfZero
+
+    if lpAttributes:
+        lpAttributes = ctypes.pointer(lpAttributes)
+    if not lpName:
+        lpName = None
+    hFileMappingObject = _CreateFileMappingA(hFile, lpAttributes, flProtect, dwMaximumSizeHigh, dwMaximumSizeLow, lpName)
+    return FileMappingHandle(hFileMappingObject)
+
+def CreateFileMappingW(hFile, lpAttributes = None, flProtect = PAGE_EXECUTE_READWRITE, dwMaximumSizeHigh = 0, dwMaximumSizeLow = 0, lpName = None):
+    _CreateFileMappingW = windll.kernel32.CreateFileMappingW
+    _CreateFileMappingW.argtypes = [HANDLE, LPVOID, DWORD, DWORD, DWORD, LPWSTR]
+    _CreateFileMappingW.restype  = HANDLE
+    _CreateFileMappingW.errcheck = RaiseIfZero
+
+    if lpAttributes:
+        lpAttributes = ctypes.pointer(lpAttributes)
+    if not lpName:
+        lpName = None
+    hFileMappingObject = _CreateFileMappingW(hFile, lpAttributes, flProtect, dwMaximumSizeHigh, dwMaximumSizeLow, lpName)
+    return FileMappingHandle(hFileMappingObject)
+
+CreateFileMapping = GuessStringType(CreateFileMappingA, CreateFileMappingW)
+
+# HANDLE WINAPI CreateFile(
+#   __in      LPCTSTR lpFileName,
+#   __in      DWORD dwDesiredAccess,
+#   __in      DWORD dwShareMode,
+#   __in_opt  LPSECURITY_ATTRIBUTES lpSecurityAttributes,
+#   __in      DWORD dwCreationDisposition,
+#   __in      DWORD dwFlagsAndAttributes,
+#   __in_opt  HANDLE hTemplateFile
+# );
+def CreateFileA(lpFileName, dwDesiredAccess = GENERIC_ALL, dwShareMode = 0, lpSecurityAttributes = None, dwCreationDisposition = OPEN_ALWAYS, dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL, hTemplateFile = None):
+    _CreateFileA = windll.kernel32.CreateFileA
+    _CreateFileA.argtypes = [LPSTR, DWORD, DWORD, LPVOID, DWORD, DWORD, HANDLE]
+    _CreateFileA.restype  = HANDLE
+
+    if not lpFileName:
+        lpFileName = None
+    if lpSecurityAttributes:
+        lpSecurityAttributes = ctypes.pointer(lpSecurityAttributes)
+    hFile = _CreateFileA(lpFileName, dwDesiredAccess, dwShareMode, lpSecurityAttributes, dwCreationDisposition, dwFlagsAndAttributes, hTemplateFile)
+    if hFile == INVALID_HANDLE_VALUE:
+        raise ctypes.WinError()
+    return FileHandle(hFile)
+
+def CreateFileW(lpFileName, dwDesiredAccess = GENERIC_ALL, dwShareMode = 0, lpSecurityAttributes = None, dwCreationDisposition = OPEN_ALWAYS, dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL, hTemplateFile = None):
+    _CreateFileW = windll.kernel32.CreateFileW
+    _CreateFileW.argtypes = [LPWSTR, DWORD, DWORD, LPVOID, DWORD, DWORD, HANDLE]
+    _CreateFileW.restype  = HANDLE
+
+    if not lpFileName:
+        lpFileName = None
+    if lpSecurityAttributes:
+        lpSecurityAttributes = ctypes.pointer(lpSecurityAttributes)
+    hFile = _CreateFileW(lpFileName, dwDesiredAccess, dwShareMode, lpSecurityAttributes, dwCreationDisposition, dwFlagsAndAttributes, hTemplateFile)
+    if hFile == INVALID_HANDLE_VALUE:
+        raise ctypes.WinError()
+    return FileHandle(hFile)
+
+CreateFile = GuessStringType(CreateFileA, CreateFileW)
+
+# BOOL WINAPI FlushFileBuffers(
+#   __in  HANDLE hFile
+# );
+def FlushFileBuffers(hFile):
+    _FlushFileBuffers = windll.kernel32.FlushFileBuffers
+    _FlushFileBuffers.argtypes = [HANDLE]
+    _FlushFileBuffers.restype  = bool
+    _FlushFileBuffers.errcheck = RaiseIfZero
+    _FlushFileBuffers(hFile)
+
+# BOOL WINAPI FlushViewOfFile(
+#   __in  LPCVOID lpBaseAddress,
+#   __in  SIZE_T dwNumberOfBytesToFlush
+# );
+def FlushViewOfFile(lpBaseAddress, dwNumberOfBytesToFlush = 0):
+    _FlushViewOfFile = windll.kernel32.FlushViewOfFile
+    _FlushViewOfFile.argtypes = [LPVOID, SIZE_T]
+    _FlushViewOfFile.restype  = bool
+    _FlushViewOfFile.errcheck = RaiseIfZero
+    _FlushViewOfFile(lpBaseAddress, dwNumberOfBytesToFlush)
+
+# DWORD WINAPI SearchPath(
+#   __in_opt   LPCTSTR lpPath,
+#   __in       LPCTSTR lpFileName,
+#   __in_opt   LPCTSTR lpExtension,
+#   __in       DWORD nBufferLength,
+#   __out      LPTSTR lpBuffer,
+#   __out_opt  LPTSTR *lpFilePart
+# );
+def SearchPathA(lpPath, lpFileName, lpExtension):
+    _SearchPathA = windll.kernel32.SearchPathA
+    _SearchPathA.argtypes = [LPSTR, LPSTR, LPSTR, DWORD, LPSTR, POINTER(LPSTR)]
+    _SearchPathA.restype  = DWORD
+    _SearchPathA.errcheck = RaiseIfZero
+
+    if not lpPath:
+        lpPath = None
+    if not lpExtension:
+        lpExtension = None
+    nBufferLength = _SearchPathA(lpPath, lpFileName, lpExtension, 0, None, None)
+    lpBuffer = ctypes.create_string_buffer('', nBufferLength + 1)
+    lpFilePart = LPSTR()
+    _SearchPathA(lpPath, lpFileName, lpExtension, nBufferLength, lpBuffer, byref(lpFilePart))
+    lpFilePart = lpFilePart.value
+    lpBuffer = lpBuffer.value
+    if lpBuffer == '':
+        if GetLastError() == ERROR_SUCCESS:
+            raise ctypes.WinError(ERROR_FILE_NOT_FOUND)
+        raise ctypes.WinError()
+    return (lpBuffer, lpFilePart)
+
+def SearchPathW(lpPath, lpFileName, lpExtension):
+    _SearchPathW = windll.kernel32.SearchPathW
+    _SearchPathW.argtypes = [LPWSTR, LPWSTR, LPWSTR, DWORD, LPWSTR, POINTER(LPWSTR)]
+    _SearchPathW.restype  = DWORD
+    _SearchPathW.errcheck = RaiseIfZero
+
+    if not lpPath:
+        lpPath = None
+    if not lpExtension:
+        lpExtension = None
+    nBufferLength = _SearchPathW(lpPath, lpFileName, lpExtension, 0, None, None)
+    lpBuffer = ctypes.create_unicode_buffer(u'', nBufferLength + 1)
+    lpFilePart = LPWSTR()
+    _SearchPathW(lpPath, lpFileName, lpExtension, nBufferLength, lpBuffer, byref(lpFilePart))
+    lpFilePart = lpFilePart.value
+    lpBuffer = lpBuffer.value
+    if lpBuffer == u'':
+        if GetLastError() == ERROR_SUCCESS:
+            raise ctypes.WinError(ERROR_FILE_NOT_FOUND)
+        raise ctypes.WinError()
+    return (lpBuffer, lpFilePart)
+
+SearchPath = GuessStringType(SearchPathA, SearchPathW)
+
+# BOOL SetSearchPathMode(
+#   __in  DWORD Flags
+# );
+def SetSearchPathMode(Flags):
+    _SetSearchPathMode = windll.kernel32.SetSearchPathMode
+    _SetSearchPathMode.argtypes = [DWORD]
+    _SetSearchPathMode.restype  = bool
+    _SetSearchPathMode.errcheck = RaiseIfZero
+    _SetSearchPathMode(Flags)
+
+# BOOL WINAPI DeviceIoControl(
+#   __in         HANDLE hDevice,
+#   __in         DWORD dwIoControlCode,
+#   __in_opt     LPVOID lpInBuffer,
+#   __in         DWORD nInBufferSize,
+#   __out_opt    LPVOID lpOutBuffer,
+#   __in         DWORD nOutBufferSize,
+#   __out_opt    LPDWORD lpBytesReturned,
+#   __inout_opt  LPOVERLAPPED lpOverlapped
+# );
+def DeviceIoControl(hDevice, dwIoControlCode, lpInBuffer, nInBufferSize, lpOutBuffer, nOutBufferSize, lpOverlapped):
+    _DeviceIoControl = windll.kernel32.DeviceIoControl
+    _DeviceIoControl.argtypes = [HANDLE, DWORD, LPVOID, DWORD, LPVOID, DWORD, LPDWORD, LPOVERLAPPED]
+    _DeviceIoControl.restype  = bool
+    _DeviceIoControl.errcheck = RaiseIfZero
+
+    if not lpInBuffer:
+        lpInBuffer = None
+    if not lpOutBuffer:
+        lpOutBuffer = None
+    if lpOverlapped:
+        lpOverlapped = ctypes.pointer(lpOverlapped)
+    lpBytesReturned = DWORD(0)
+    _DeviceIoControl(hDevice, dwIoControlCode, lpInBuffer, nInBufferSize, lpOutBuffer, nOutBufferSize, byref(lpBytesReturned), lpOverlapped)
+    return lpBytesReturned.value
+
+# BOOL GetFileInformationByHandle(
+#   HANDLE hFile,
+#   LPBY_HANDLE_FILE_INFORMATION lpFileInformation
+# );
+def GetFileInformationByHandle(hFile):
+    _GetFileInformationByHandle = windll.kernel32.GetFileInformationByHandle
+    _GetFileInformationByHandle.argtypes = [HANDLE, LPBY_HANDLE_FILE_INFORMATION]
+    _GetFileInformationByHandle.restype  = bool
+    _GetFileInformationByHandle.errcheck = RaiseIfZero
+
+    lpFileInformation = BY_HANDLE_FILE_INFORMATION()
+    _GetFileInformationByHandle(hFile, byref(lpFileInformation))
+    return lpFileInformation
+
+# BOOL WINAPI GetFileInformationByHandleEx(
+#   __in   HANDLE hFile,
+#   __in   FILE_INFO_BY_HANDLE_CLASS FileInformationClass,
+#   __out  LPVOID lpFileInformation,
+#   __in   DWORD dwBufferSize
+# );
+def GetFileInformationByHandleEx(hFile, FileInformationClass, lpFileInformation, dwBufferSize):
+    _GetFileInformationByHandleEx = windll.kernel32.GetFileInformationByHandleEx
+    _GetFileInformationByHandleEx.argtypes = [HANDLE, DWORD, LPVOID, DWORD]
+    _GetFileInformationByHandleEx.restype  = bool
+    _GetFileInformationByHandleEx.errcheck = RaiseIfZero
+    # XXX TODO
+    # support each FileInformationClass so the function can allocate the
+    # corresponding structure for the lpFileInformation parameter
+    _GetFileInformationByHandleEx(hFile, FileInformationClass, byref(lpFileInformation), dwBufferSize)
+
+# DWORD WINAPI GetFinalPathNameByHandle(
+#   __in   HANDLE hFile,
+#   __out  LPTSTR lpszFilePath,
+#   __in   DWORD cchFilePath,
+#   __in   DWORD dwFlags
+# );
+def GetFinalPathNameByHandleA(hFile, dwFlags = FILE_NAME_NORMALIZED | VOLUME_NAME_DOS):
+    _GetFinalPathNameByHandleA = windll.kernel32.GetFinalPathNameByHandleA
+    _GetFinalPathNameByHandleA.argtypes = [HANDLE, LPSTR, DWORD, DWORD]
+    _GetFinalPathNameByHandleA.restype  = DWORD
+
+    cchFilePath = _GetFinalPathNameByHandleA(hFile, None, 0, dwFlags)
+    if cchFilePath == 0:
+        raise ctypes.WinError()
+    lpszFilePath = ctypes.create_string_buffer('', cchFilePath + 1)
+    nCopied = _GetFinalPathNameByHandleA(hFile, lpszFilePath, cchFilePath, dwFlags)
+    if nCopied <= 0 or nCopied > cchFilePath:
+        raise ctypes.WinError()
+    return lpszFilePath.value
+
+def GetFinalPathNameByHandleW(hFile, dwFlags = FILE_NAME_NORMALIZED | VOLUME_NAME_DOS):
+    _GetFinalPathNameByHandleW = windll.kernel32.GetFinalPathNameByHandleW
+    _GetFinalPathNameByHandleW.argtypes = [HANDLE, LPWSTR, DWORD, DWORD]
+    _GetFinalPathNameByHandleW.restype  = DWORD
+
+    cchFilePath = _GetFinalPathNameByHandleW(hFile, None, 0, dwFlags)
+    if cchFilePath == 0:
+        raise ctypes.WinError()
+    lpszFilePath = ctypes.create_unicode_buffer(u'', cchFilePath + 1)
+    nCopied = _GetFinalPathNameByHandleW(hFile, lpszFilePath, cchFilePath, dwFlags)
+    if nCopied <= 0 or nCopied > cchFilePath:
+        raise ctypes.WinError()
+    return lpszFilePath.value
+
+GetFinalPathNameByHandle = GuessStringType(GetFinalPathNameByHandleA, GetFinalPathNameByHandleW)
+
+# DWORD WINAPI GetTempPath(
+#   __in   DWORD nBufferLength,
+#   __out  LPTSTR lpBuffer
+# );
+def GetTempPathA():
+    _GetTempPathA = windll.kernel32.GetTempPathA
+    _GetTempPathA.argtypes = [DWORD, LPSTR]
+    _GetTempPathA.restype  = DWORD
+
+    nBufferLength = _GetTempPathA(0, None)
+    if nBufferLength <= 0:
+        raise ctypes.WinError()
+    lpBuffer = ctypes.create_string_buffer('', nBufferLength)
+    nCopied = _GetTempPathA(nBufferLength, lpBuffer)
+    if nCopied > nBufferLength or nCopied == 0:
+        raise ctypes.WinError()
+    return lpBuffer.value
+
+def GetTempPathW():
+    _GetTempPathW = windll.kernel32.GetTempPathW
+    _GetTempPathW.argtypes = [DWORD, LPWSTR]
+    _GetTempPathW.restype  = DWORD
+
+    nBufferLength = _GetTempPathW(0, None)
+    if nBufferLength <= 0:
+        raise ctypes.WinError()
+    lpBuffer = ctypes.create_unicode_buffer(u'', nBufferLength)
+    nCopied = _GetTempPathW(nBufferLength, lpBuffer)
+    if nCopied > nBufferLength or nCopied == 0:
+        raise ctypes.WinError()
+    return lpBuffer.value
+
+GetTempPath = DefaultStringType(GetTempPathA, GetTempPathW)
+
+# UINT WINAPI GetTempFileName(
+#   __in   LPCTSTR lpPathName,
+#   __in   LPCTSTR lpPrefixString,
+#   __in   UINT uUnique,
+#   __out  LPTSTR lpTempFileName
+# );
+def GetTempFileNameA(lpPathName = None, lpPrefixString = "TMP", uUnique = 0):
+    _GetTempFileNameA = windll.kernel32.GetTempFileNameA
+    _GetTempFileNameA.argtypes = [LPSTR, LPSTR, UINT, LPSTR]
+    _GetTempFileNameA.restype  = UINT
+
+    if lpPathName is None:
+        lpPathName = GetTempPathA()
+    lpTempFileName = ctypes.create_string_buffer('', MAX_PATH)
+    uUnique = _GetTempFileNameA(lpPathName, lpPrefixString, uUnique, lpTempFileName)
+    if uUnique == 0:
+        raise ctypes.WinError()
+    return lpTempFileName.value, uUnique
+
+def GetTempFileNameW(lpPathName = None, lpPrefixString = u"TMP", uUnique = 0):
+    _GetTempFileNameW = windll.kernel32.GetTempFileNameW
+    _GetTempFileNameW.argtypes = [LPWSTR, LPWSTR, UINT, LPWSTR]
+    _GetTempFileNameW.restype  = UINT
+
+    if lpPathName is None:
+        lpPathName = GetTempPathW()
+    lpTempFileName = ctypes.create_unicode_buffer(u'', MAX_PATH)
+    uUnique = _GetTempFileNameW(lpPathName, lpPrefixString, uUnique, lpTempFileName)
+    if uUnique == 0:
+        raise ctypes.WinError()
+    return lpTempFileName.value, uUnique
+
+GetTempFileName = GuessStringType(GetTempFileNameA, GetTempFileNameW)
+
+# DWORD WINAPI GetCurrentDirectory(
+#   __in   DWORD nBufferLength,
+#   __out  LPTSTR lpBuffer
+# );
+def GetCurrentDirectoryA():
+    _GetCurrentDirectoryA = windll.kernel32.GetCurrentDirectoryA
+    _GetCurrentDirectoryA.argtypes = [DWORD, LPSTR]
+    _GetCurrentDirectoryA.restype  = DWORD
+
+    nBufferLength = _GetCurrentDirectoryA(0, None)
+    if nBufferLength <= 0:
+        raise ctypes.WinError()
+    lpBuffer = ctypes.create_string_buffer('', nBufferLength)
+    nCopied = _GetCurrentDirectoryA(nBufferLength, lpBuffer)
+    if nCopied > nBufferLength or nCopied == 0:
+        raise ctypes.WinError()
+    return lpBuffer.value
+
+def GetCurrentDirectoryW():
+    _GetCurrentDirectoryW = windll.kernel32.GetCurrentDirectoryW
+    _GetCurrentDirectoryW.argtypes = [DWORD, LPWSTR]
+    _GetCurrentDirectoryW.restype  = DWORD
+
+    nBufferLength = _GetCurrentDirectoryW(0, None)
+    if nBufferLength <= 0:
+        raise ctypes.WinError()
+    lpBuffer = ctypes.create_unicode_buffer(u'', nBufferLength)
+    nCopied = _GetCurrentDirectoryW(nBufferLength, lpBuffer)
+    if nCopied > nBufferLength or nCopied == 0:
+        raise ctypes.WinError()
+    return lpBuffer.value
+
+GetCurrentDirectory = DefaultStringType(GetCurrentDirectoryA, GetCurrentDirectoryW)
+
+# UINT WINAPI GetSystemDirectory(
+#   _Out_ LPTSTR lpBuffer,
+#   _In_  UINT   uSize
+# );
+def GetSystemDirectoryA():
+    _GetSystemDirectoryA = windll.kernel32.GetSystemDirectoryA
+    _GetSystemDirectoryA.argtypes = [LPSTR, UINT]
+    _GetSystemDirectoryA.restype  = UINT
+
+    nBufferLength = _GetSystemDirectoryA(0, None)
+    if nBufferLength <= 0:
+        raise ctypes.WinError()
+    lpBuffer = ctypes.create_string_buffer('', nBufferLength)
+    nCopied = _GetSystemDirectoryA(nBufferLength, lpBuffer)
+    if nCopied > nBufferLength or nCopied == 0:
+        raise ctypes.WinError()
+    return lpBuffer.value
+
+def GetSystemDirectoryW():
+    _GetSystemDirectoryW = windll.kernel32.GetSystemDirectoryW
+    _GetSystemDirectoryW.argtypes = [LPWSTR, UINT]
+    _GetSystemDirectoryW.restype  = UINT
+
+    nBufferLength = _GetSystemDirectoryW(0, None)
+    if nBufferLength <= 0:
+        raise ctypes.WinError()
+    lpBuffer = ctypes.create_unicode_buffer(u'', nBufferLength)
+    nCopied = _GetSystemDirectoryW(nBufferLength, lpBuffer)
+    if nCopied > nBufferLength or nCopied == 0:
+        raise ctypes.WinError()
+    return lpBuffer.value
+
+GetSystemDirectory = DefaultStringType(GetSystemDirectoryA, GetSystemDirectoryW)
+
+#------------------------------------------------------------------------------
+# Contrl-C handler
+
+# BOOL WINAPI HandlerRoutine(
+#   __in  DWORD dwCtrlType
+# );
+PHANDLER_ROUTINE = ctypes.WINFUNCTYPE(BOOL, DWORD)
+
+# BOOL WINAPI SetConsoleCtrlHandler(
+#   __in_opt  PHANDLER_ROUTINE HandlerRoutine,
+#   __in      BOOL Add
+# );
+def SetConsoleCtrlHandler(HandlerRoutine = None, Add = True):
+    _SetConsoleCtrlHandler = windll.kernel32.SetConsoleCtrlHandler
+    _SetConsoleCtrlHandler.argtypes = [PHANDLER_ROUTINE, BOOL]
+    _SetConsoleCtrlHandler.restype  = bool
+    _SetConsoleCtrlHandler.errcheck = RaiseIfZero
+    _SetConsoleCtrlHandler(HandlerRoutine, bool(Add))
+    # we can't automagically transform Python functions to PHANDLER_ROUTINE
+    # because a) the actual pointer value is meaningful to the API
+    # and b) if it gets garbage collected bad things would happen
+
+# BOOL WINAPI GenerateConsoleCtrlEvent(
+#   __in  DWORD dwCtrlEvent,
+#   __in  DWORD dwProcessGroupId
+# );
+def GenerateConsoleCtrlEvent(dwCtrlEvent, dwProcessGroupId):
+    _GenerateConsoleCtrlEvent = windll.kernel32.GenerateConsoleCtrlEvent
+    _GenerateConsoleCtrlEvent.argtypes = [DWORD, DWORD]
+    _GenerateConsoleCtrlEvent.restype  = bool
+    _GenerateConsoleCtrlEvent.errcheck = RaiseIfZero
+    _GenerateConsoleCtrlEvent(dwCtrlEvent, dwProcessGroupId)
+
+#------------------------------------------------------------------------------
+# Synchronization API
+
+# XXX NOTE
+#
+# Instead of waiting forever, we wait for a small period of time and loop.
+# This is a workaround for an unwanted behavior of psyco-accelerated code:
+# you can't interrupt a blocking call using Ctrl+C, because signal processing
+# is only done between C calls.
+#
+# Also see: bug #2793618 in Psyco project
+# http://sourceforge.net/tracker/?func=detail&aid=2793618&group_id=41036&atid=429622
+
+# DWORD WINAPI WaitForSingleObject(
+#   HANDLE hHandle,
+#   DWORD dwMilliseconds
+# );
+def WaitForSingleObject(hHandle, dwMilliseconds = INFINITE):
+    _WaitForSingleObject = windll.kernel32.WaitForSingleObject
+    _WaitForSingleObject.argtypes = [HANDLE, DWORD]
+    _WaitForSingleObject.restype  = DWORD
+
+    if not dwMilliseconds and dwMilliseconds != 0:
+        dwMilliseconds = INFINITE
+    if dwMilliseconds != INFINITE:
+        r = _WaitForSingleObject(hHandle, dwMilliseconds)
+        if r == WAIT_FAILED:
+            raise ctypes.WinError()
+    else:
+        while 1:
+            r = _WaitForSingleObject(hHandle, 100)
+            if r == WAIT_FAILED:
+                raise ctypes.WinError()
+            if r != WAIT_TIMEOUT:
+                break
+    return r
+
+# DWORD WINAPI WaitForSingleObjectEx(
+#   HANDLE hHandle,
+#   DWORD dwMilliseconds,
+#   BOOL bAlertable
+# );
+def WaitForSingleObjectEx(hHandle, dwMilliseconds = INFINITE, bAlertable = True):
+    _WaitForSingleObjectEx = windll.kernel32.WaitForSingleObjectEx
+    _WaitForSingleObjectEx.argtypes = [HANDLE, DWORD, BOOL]
+    _WaitForSingleObjectEx.restype  = DWORD
+
+    if not dwMilliseconds and dwMilliseconds != 0:
+        dwMilliseconds = INFINITE
+    if dwMilliseconds != INFINITE:
+        r = _WaitForSingleObjectEx(hHandle, dwMilliseconds, bool(bAlertable))
+        if r == WAIT_FAILED:
+            raise ctypes.WinError()
+    else:
+        while 1:
+            r = _WaitForSingleObjectEx(hHandle, 100, bool(bAlertable))
+            if r == WAIT_FAILED:
+                raise ctypes.WinError()
+            if r != WAIT_TIMEOUT:
+                break
+    return r
+
+# DWORD WINAPI WaitForMultipleObjects(
+#   DWORD nCount,
+#   const HANDLE *lpHandles,
+#   BOOL bWaitAll,
+#   DWORD dwMilliseconds
+# );
+def WaitForMultipleObjects(handles, bWaitAll = False, dwMilliseconds = INFINITE):
+    _WaitForMultipleObjects = windll.kernel32.WaitForMultipleObjects
+    _WaitForMultipleObjects.argtypes = [DWORD, POINTER(HANDLE), BOOL, DWORD]
+    _WaitForMultipleObjects.restype  = DWORD
+
+    if not dwMilliseconds and dwMilliseconds != 0:
+        dwMilliseconds = INFINITE
+    nCount          = len(handles)
+    lpHandlesType   = HANDLE * nCount
+    lpHandles       = lpHandlesType(*handles)
+    if dwMilliseconds != INFINITE:
+        r = _WaitForMultipleObjects(nCount, byref(lpHandles), bool(bWaitAll), dwMilliseconds)
+        if r == WAIT_FAILED:
+            raise ctypes.WinError()
+    else:
+        while 1:
+            r = _WaitForMultipleObjects(nCount, byref(lpHandles), bool(bWaitAll), 100)
+            if r == WAIT_FAILED:
+                raise ctypes.WinError()
+            if r != WAIT_TIMEOUT:
+                break
+    return r
+
+# DWORD WINAPI WaitForMultipleObjectsEx(
+#   DWORD nCount,
+#   const HANDLE *lpHandles,
+#   BOOL bWaitAll,
+#   DWORD dwMilliseconds,
+#   BOOL bAlertable
+# );
+def WaitForMultipleObjectsEx(handles, bWaitAll = False, dwMilliseconds = INFINITE, bAlertable = True):
+    _WaitForMultipleObjectsEx = windll.kernel32.WaitForMultipleObjectsEx
+    _WaitForMultipleObjectsEx.argtypes = [DWORD, POINTER(HANDLE), BOOL, DWORD]
+    _WaitForMultipleObjectsEx.restype  = DWORD
+
+    if not dwMilliseconds and dwMilliseconds != 0:
+        dwMilliseconds = INFINITE
+    nCount          = len(handles)
+    lpHandlesType   = HANDLE * nCount
+    lpHandles       = lpHandlesType(*handles)
+    if dwMilliseconds != INFINITE:
+        r = _WaitForMultipleObjectsEx(nCount, byref(lpHandles), bool(bWaitAll), dwMilliseconds, bool(bAlertable))
+        if r == WAIT_FAILED:
+            raise ctypes.WinError()
+    else:
+        while 1:
+            r = _WaitForMultipleObjectsEx(nCount, byref(lpHandles), bool(bWaitAll), 100, bool(bAlertable))
+            if r == WAIT_FAILED:
+                raise ctypes.WinError()
+            if r != WAIT_TIMEOUT:
+                break
+    return r
+
+# HANDLE WINAPI CreateMutex(
+#   _In_opt_  LPSECURITY_ATTRIBUTES lpMutexAttributes,
+#   _In_      BOOL bInitialOwner,
+#   _In_opt_  LPCTSTR lpName
+# );
+def CreateMutexA(lpMutexAttributes = None, bInitialOwner = True, lpName = None):
+    _CreateMutexA = windll.kernel32.CreateMutexA
+    _CreateMutexA.argtypes = [LPVOID, BOOL, LPSTR]
+    _CreateMutexA.restype  = HANDLE
+    _CreateMutexA.errcheck = RaiseIfZero
+    return Handle( _CreateMutexA(lpMutexAttributes, bInitialOwner, lpName) )
+
+def CreateMutexW(lpMutexAttributes = None, bInitialOwner = True, lpName = None):
+    _CreateMutexW = windll.kernel32.CreateMutexW
+    _CreateMutexW.argtypes = [LPVOID, BOOL, LPWSTR]
+    _CreateMutexW.restype  = HANDLE
+    _CreateMutexW.errcheck = RaiseIfZero
+    return Handle( _CreateMutexW(lpMutexAttributes, bInitialOwner, lpName) )
+
+CreateMutex = GuessStringType(CreateMutexA, CreateMutexW)
+
+# HANDLE WINAPI OpenMutex(
+#   _In_  DWORD dwDesiredAccess,
+#   _In_  BOOL bInheritHandle,
+#   _In_  LPCTSTR lpName
+# );
+def OpenMutexA(dwDesiredAccess = MUTEX_ALL_ACCESS, bInitialOwner = True, lpName = None):
+    _OpenMutexA = windll.kernel32.OpenMutexA
+    _OpenMutexA.argtypes = [DWORD, BOOL, LPSTR]
+    _OpenMutexA.restype  = HANDLE
+    _OpenMutexA.errcheck = RaiseIfZero
+    return Handle( _OpenMutexA(lpMutexAttributes, bInitialOwner, lpName) )
+
+def OpenMutexW(dwDesiredAccess = MUTEX_ALL_ACCESS, bInitialOwner = True, lpName = None):
+    _OpenMutexW = windll.kernel32.OpenMutexW
+    _OpenMutexW.argtypes = [DWORD, BOOL, LPWSTR]
+    _OpenMutexW.restype  = HANDLE
+    _OpenMutexW.errcheck = RaiseIfZero
+    return Handle( _OpenMutexW(lpMutexAttributes, bInitialOwner, lpName) )
+
+OpenMutex = GuessStringType(OpenMutexA, OpenMutexW)
+
+# HANDLE WINAPI CreateEvent(
+#   _In_opt_  LPSECURITY_ATTRIBUTES lpEventAttributes,
+#   _In_      BOOL bManualReset,
+#   _In_      BOOL bInitialState,
+#   _In_opt_  LPCTSTR lpName
+# );
+def CreateEventA(lpMutexAttributes = None, bManualReset = False, bInitialState = False, lpName = None):
+    _CreateEventA = windll.kernel32.CreateEventA
+    _CreateEventA.argtypes = [LPVOID, BOOL, BOOL, LPSTR]
+    _CreateEventA.restype  = HANDLE
+    _CreateEventA.errcheck = RaiseIfZero
+    return Handle( _CreateEventA(lpMutexAttributes, bManualReset, bInitialState, lpName) )
+
+def CreateEventW(lpMutexAttributes = None, bManualReset = False, bInitialState = False, lpName = None):
+    _CreateEventW = windll.kernel32.CreateEventW
+    _CreateEventW.argtypes = [LPVOID, BOOL, BOOL, LPWSTR]
+    _CreateEventW.restype  = HANDLE
+    _CreateEventW.errcheck = RaiseIfZero
+    return Handle( _CreateEventW(lpMutexAttributes, bManualReset, bInitialState, lpName) )
+
+CreateEvent = GuessStringType(CreateEventA, CreateEventW)
+
+# HANDLE WINAPI OpenEvent(
+#   _In_  DWORD dwDesiredAccess,
+#   _In_  BOOL bInheritHandle,
+#   _In_  LPCTSTR lpName
+# );
+def OpenEventA(dwDesiredAccess = EVENT_ALL_ACCESS, bInheritHandle = False, lpName = None):
+    _OpenEventA = windll.kernel32.OpenEventA
+    _OpenEventA.argtypes = [DWORD, BOOL, LPSTR]
+    _OpenEventA.restype  = HANDLE
+    _OpenEventA.errcheck = RaiseIfZero
+    return Handle( _OpenEventA(dwDesiredAccess, bInheritHandle, lpName) )
+
+def OpenEventW(dwDesiredAccess = EVENT_ALL_ACCESS, bInheritHandle = False, lpName = None):
+    _OpenEventW = windll.kernel32.OpenEventW
+    _OpenEventW.argtypes = [DWORD, BOOL, LPWSTR]
+    _OpenEventW.restype  = HANDLE
+    _OpenEventW.errcheck = RaiseIfZero
+    return Handle( _OpenEventW(dwDesiredAccess, bInheritHandle, lpName) )
+
+OpenEvent = GuessStringType(OpenEventA, OpenEventW)
+
+# HANDLE WINAPI CreateSemaphore(
+#   _In_opt_  LPSECURITY_ATTRIBUTES lpSemaphoreAttributes,
+#   _In_      LONG lInitialCount,
+#   _In_      LONG lMaximumCount,
+#   _In_opt_  LPCTSTR lpName
+# );
+
+# TODO
+
+# HANDLE WINAPI OpenSemaphore(
+#   _In_  DWORD dwDesiredAccess,
+#   _In_  BOOL bInheritHandle,
+#   _In_  LPCTSTR lpName
+# );
+
+# TODO
+
+# BOOL WINAPI ReleaseMutex(
+#   _In_  HANDLE hMutex
+# );
+def ReleaseMutex(hMutex):
+    _ReleaseMutex = windll.kernel32.ReleaseMutex
+    _ReleaseMutex.argtypes = [HANDLE]
+    _ReleaseMutex.restype  = bool
+    _ReleaseMutex.errcheck = RaiseIfZero
+    _ReleaseMutex(hMutex)
+
+# BOOL WINAPI SetEvent(
+#   _In_  HANDLE hEvent
+# );
+def SetEvent(hEvent):
+    _SetEvent = windll.kernel32.SetEvent
+    _SetEvent.argtypes = [HANDLE]
+    _SetEvent.restype  = bool
+    _SetEvent.errcheck = RaiseIfZero
+    _SetEvent(hEvent)
+
+# BOOL WINAPI ResetEvent(
+#   _In_  HANDLE hEvent
+# );
+def ResetEvent(hEvent):
+    _ResetEvent = windll.kernel32.ResetEvent
+    _ResetEvent.argtypes = [HANDLE]
+    _ResetEvent.restype  = bool
+    _ResetEvent.errcheck = RaiseIfZero
+    _ResetEvent(hEvent)
+
+# BOOL WINAPI PulseEvent(
+#   _In_  HANDLE hEvent
+# );
+def PulseEvent(hEvent):
+    _PulseEvent = windll.kernel32.PulseEvent
+    _PulseEvent.argtypes = [HANDLE]
+    _PulseEvent.restype  = bool
+    _PulseEvent.errcheck = RaiseIfZero
+    _PulseEvent(hEvent)
+
+# BOOL WINAPI ReleaseSemaphore(
+#   _In_       HANDLE hSemaphore,
+#   _In_       LONG lReleaseCount,
+#   _Out_opt_  LPLONG lpPreviousCount
+# );
+
+# TODO
+
+#------------------------------------------------------------------------------
+# Debug API
+
+# BOOL WaitForDebugEvent(
+#   LPDEBUG_EVENT lpDebugEvent,
+#   DWORD dwMilliseconds
+# );
+def WaitForDebugEvent(dwMilliseconds = INFINITE):
+    _WaitForDebugEvent = windll.kernel32.WaitForDebugEvent
+    _WaitForDebugEvent.argtypes = [LPDEBUG_EVENT, DWORD]
+    _WaitForDebugEvent.restype  = DWORD
+
+    if not dwMilliseconds and dwMilliseconds != 0:
+        dwMilliseconds = INFINITE
+    lpDebugEvent                  = DEBUG_EVENT()
+    lpDebugEvent.dwDebugEventCode = 0
+    lpDebugEvent.dwProcessId      = 0
+    lpDebugEvent.dwThreadId       = 0
+    if dwMilliseconds != INFINITE:
+        success = _WaitForDebugEvent(byref(lpDebugEvent), dwMilliseconds)
+        if success == 0:
+            raise ctypes.WinError()
+    else:
+        # this avoids locking the Python GIL for too long
+        while 1:
+            success = _WaitForDebugEvent(byref(lpDebugEvent), 100)
+            if success != 0:
+                break
+            code = GetLastError()
+            if code not in (ERROR_SEM_TIMEOUT, WAIT_TIMEOUT):
+                raise ctypes.WinError(code)
+    return lpDebugEvent
+
+# BOOL ContinueDebugEvent(
+#   DWORD dwProcessId,
+#   DWORD dwThreadId,
+#   DWORD dwContinueStatus
+# );
+def ContinueDebugEvent(dwProcessId, dwThreadId, dwContinueStatus = DBG_EXCEPTION_NOT_HANDLED):
+    _ContinueDebugEvent = windll.kernel32.ContinueDebugEvent
+    _ContinueDebugEvent.argtypes = [DWORD, DWORD, DWORD]
+    _ContinueDebugEvent.restype  = bool
+    _ContinueDebugEvent.errcheck = RaiseIfZero
+    _ContinueDebugEvent(dwProcessId, dwThreadId, dwContinueStatus)
+
+# BOOL WINAPI FlushInstructionCache(
+#   __in  HANDLE hProcess,
+#   __in  LPCVOID lpBaseAddress,
+#   __in  SIZE_T dwSize
+# );
+def FlushInstructionCache(hProcess, lpBaseAddress = None, dwSize = 0):
+    # http://blogs.msdn.com/oldnewthing/archive/2003/12/08/55954.aspx#55958
+    _FlushInstructionCache = windll.kernel32.FlushInstructionCache
+    _FlushInstructionCache.argtypes = [HANDLE, LPVOID, SIZE_T]
+    _FlushInstructionCache.restype  = bool
+    _FlushInstructionCache.errcheck = RaiseIfZero
+    _FlushInstructionCache(hProcess, lpBaseAddress, dwSize)
+
+# BOOL DebugActiveProcess(
+#   DWORD dwProcessId
+# );
+def DebugActiveProcess(dwProcessId):
+    _DebugActiveProcess = windll.kernel32.DebugActiveProcess
+    _DebugActiveProcess.argtypes = [DWORD]
+    _DebugActiveProcess.restype  = bool
+    _DebugActiveProcess.errcheck = RaiseIfZero
+    _DebugActiveProcess(dwProcessId)
+
+# BOOL DebugActiveProcessStop(
+#   DWORD dwProcessId
+# );
+def DebugActiveProcessStop(dwProcessId):
+    _DebugActiveProcessStop = windll.kernel32.DebugActiveProcessStop
+    _DebugActiveProcessStop.argtypes = [DWORD]
+    _DebugActiveProcessStop.restype  = bool
+    _DebugActiveProcessStop.errcheck = RaiseIfZero
+    _DebugActiveProcessStop(dwProcessId)
+
+# BOOL CheckRemoteDebuggerPresent(
+#   HANDLE hProcess,
+#   PBOOL pbDebuggerPresent
+# );
+def CheckRemoteDebuggerPresent(hProcess):
+    _CheckRemoteDebuggerPresent = windll.kernel32.CheckRemoteDebuggerPresent
+    _CheckRemoteDebuggerPresent.argtypes = [HANDLE, PBOOL]
+    _CheckRemoteDebuggerPresent.restype  = bool
+    _CheckRemoteDebuggerPresent.errcheck = RaiseIfZero
+
+    pbDebuggerPresent = BOOL(0)
+    _CheckRemoteDebuggerPresent(hProcess, byref(pbDebuggerPresent))
+    return bool(pbDebuggerPresent.value)
+
+# BOOL DebugSetProcessKillOnExit(
+#   BOOL KillOnExit
+# );
+def DebugSetProcessKillOnExit(KillOnExit):
+    _DebugSetProcessKillOnExit = windll.kernel32.DebugSetProcessKillOnExit
+    _DebugSetProcessKillOnExit.argtypes = [BOOL]
+    _DebugSetProcessKillOnExit.restype  = bool
+    _DebugSetProcessKillOnExit.errcheck = RaiseIfZero
+    _DebugSetProcessKillOnExit(bool(KillOnExit))
+
+# BOOL DebugBreakProcess(
+#   HANDLE Process
+# );
+def DebugBreakProcess(hProcess):
+    _DebugBreakProcess = windll.kernel32.DebugBreakProcess
+    _DebugBreakProcess.argtypes = [HANDLE]
+    _DebugBreakProcess.restype  = bool
+    _DebugBreakProcess.errcheck = RaiseIfZero
+    _DebugBreakProcess(hProcess)
+
+# void WINAPI OutputDebugString(
+#   __in_opt  LPCTSTR lpOutputString
+# );
+def OutputDebugStringA(lpOutputString):
+    _OutputDebugStringA = windll.kernel32.OutputDebugStringA
+    _OutputDebugStringA.argtypes = [LPSTR]
+    _OutputDebugStringA.restype  = None
+    _OutputDebugStringA(lpOutputString)
+
+def OutputDebugStringW(lpOutputString):
+    _OutputDebugStringW = windll.kernel32.OutputDebugStringW
+    _OutputDebugStringW.argtypes = [LPWSTR]
+    _OutputDebugStringW.restype  = None
+    _OutputDebugStringW(lpOutputString)
+
+OutputDebugString = GuessStringType(OutputDebugStringA, OutputDebugStringW)
+
+# BOOL WINAPI ReadProcessMemory(
+#   __in   HANDLE hProcess,
+#   __in   LPCVOID lpBaseAddress,
+#   __out  LPVOID lpBuffer,
+#   __in   SIZE_T nSize,
+#   __out  SIZE_T* lpNumberOfBytesRead
+# );
+def ReadProcessMemory(hProcess, lpBaseAddress, nSize):
+    _ReadProcessMemory = windll.kernel32.ReadProcessMemory
+    _ReadProcessMemory.argtypes = [HANDLE, LPVOID, LPVOID, SIZE_T, POINTER(SIZE_T)]
+    _ReadProcessMemory.restype  = bool
+
+    lpBuffer            = ctypes.create_string_buffer(b'', nSize)
+    lpNumberOfBytesRead = SIZE_T(0)
+    success = _ReadProcessMemory(hProcess, lpBaseAddress, lpBuffer, nSize, byref(lpNumberOfBytesRead))
+    if not success and GetLastError() != ERROR_PARTIAL_COPY:
+        raise ctypes.WinError()
+    return lpBuffer.raw[:lpNumberOfBytesRead.value]
+
+# BOOL WINAPI WriteProcessMemory(
+#   __in   HANDLE hProcess,
+#   __in   LPCVOID lpBaseAddress,
+#   __in   LPVOID lpBuffer,
+#   __in   SIZE_T nSize,
+#   __out  SIZE_T* lpNumberOfBytesWritten
+# );
+def WriteProcessMemory(hProcess, lpBaseAddress, lpBuffer):
+    _WriteProcessMemory = windll.kernel32.WriteProcessMemory
+    _WriteProcessMemory.argtypes = [HANDLE, LPVOID, LPVOID, SIZE_T, POINTER(SIZE_T)]
+    _WriteProcessMemory.restype  = bool
+
+    nSize                   = len(lpBuffer)
+    lpBuffer                = ctypes.create_string_buffer(lpBuffer)
+    lpNumberOfBytesWritten  = SIZE_T(0)
+    success = _WriteProcessMemory(hProcess, lpBaseAddress, lpBuffer, nSize, byref(lpNumberOfBytesWritten))
+    if not success and GetLastError() != ERROR_PARTIAL_COPY:
+        raise ctypes.WinError()
+    return lpNumberOfBytesWritten.value
+
+# LPVOID WINAPI VirtualAllocEx(
+#   __in      HANDLE hProcess,
+#   __in_opt  LPVOID lpAddress,
+#   __in      SIZE_T dwSize,
+#   __in      DWORD flAllocationType,
+#   __in      DWORD flProtect
+# );
+def VirtualAllocEx(hProcess, lpAddress = 0, dwSize = 0x1000, flAllocationType = MEM_COMMIT | MEM_RESERVE, flProtect = PAGE_EXECUTE_READWRITE):
+    _VirtualAllocEx = windll.kernel32.VirtualAllocEx
+    _VirtualAllocEx.argtypes = [HANDLE, LPVOID, SIZE_T, DWORD, DWORD]
+    _VirtualAllocEx.restype  = LPVOID
+
+    lpAddress = _VirtualAllocEx(hProcess, lpAddress, dwSize, flAllocationType, flProtect)
+    if lpAddress == NULL:
+        raise ctypes.WinError()
+    return lpAddress
+
+# SIZE_T WINAPI VirtualQueryEx(
+#   __in      HANDLE hProcess,
+#   __in_opt  LPCVOID lpAddress,
+#   __out     PMEMORY_BASIC_INFORMATION lpBuffer,
+#   __in      SIZE_T dwLength
+# );
+def VirtualQueryEx(hProcess, lpAddress):
+    _VirtualQueryEx = windll.kernel32.VirtualQueryEx
+    _VirtualQueryEx.argtypes = [HANDLE, LPVOID, PMEMORY_BASIC_INFORMATION, SIZE_T]
+    _VirtualQueryEx.restype  = SIZE_T
+
+    lpBuffer  = MEMORY_BASIC_INFORMATION()
+    dwLength  = sizeof(MEMORY_BASIC_INFORMATION)
+    success   = _VirtualQueryEx(hProcess, lpAddress, byref(lpBuffer), dwLength)
+    if success == 0:
+        raise ctypes.WinError()
+    return MemoryBasicInformation(lpBuffer)
+
+# BOOL WINAPI VirtualProtectEx(
+#   __in   HANDLE hProcess,
+#   __in   LPVOID lpAddress,
+#   __in   SIZE_T dwSize,
+#   __in   DWORD flNewProtect,
+#   __out  PDWORD lpflOldProtect
+# );
+def VirtualProtectEx(hProcess, lpAddress, dwSize, flNewProtect = PAGE_EXECUTE_READWRITE):
+    _VirtualProtectEx = windll.kernel32.VirtualProtectEx
+    _VirtualProtectEx.argtypes = [HANDLE, LPVOID, SIZE_T, DWORD, PDWORD]
+    _VirtualProtectEx.restype  = bool
+    _VirtualProtectEx.errcheck = RaiseIfZero
+
+    flOldProtect = DWORD(0)
+    _VirtualProtectEx(hProcess, lpAddress, dwSize, flNewProtect, byref(flOldProtect))
+    return flOldProtect.value
+
+# BOOL WINAPI VirtualFreeEx(
+#   __in  HANDLE hProcess,
+#   __in  LPVOID lpAddress,
+#   __in  SIZE_T dwSize,
+#   __in  DWORD dwFreeType
+# );
+def VirtualFreeEx(hProcess, lpAddress, dwSize = 0, dwFreeType = MEM_RELEASE):
+    _VirtualFreeEx = windll.kernel32.VirtualFreeEx
+    _VirtualFreeEx.argtypes = [HANDLE, LPVOID, SIZE_T, DWORD]
+    _VirtualFreeEx.restype  = bool
+    _VirtualFreeEx.errcheck = RaiseIfZero
+    _VirtualFreeEx(hProcess, lpAddress, dwSize, dwFreeType)
+
+# HANDLE WINAPI CreateRemoteThread(
+#   __in   HANDLE hProcess,
+#   __in   LPSECURITY_ATTRIBUTES lpThreadAttributes,
+#   __in   SIZE_T dwStackSize,
+#   __in   LPTHREAD_START_ROUTINE lpStartAddress,
+#   __in   LPVOID lpParameter,
+#   __in   DWORD dwCreationFlags,
+#   __out  LPDWORD lpThreadId
+# );
+def CreateRemoteThread(hProcess, lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags):
+    _CreateRemoteThread = windll.kernel32.CreateRemoteThread
+    _CreateRemoteThread.argtypes = [HANDLE, LPSECURITY_ATTRIBUTES, SIZE_T, LPVOID, LPVOID, DWORD, LPDWORD]
+    _CreateRemoteThread.restype  = HANDLE
+
+    if not lpThreadAttributes:
+        lpThreadAttributes = None
+    else:
+        lpThreadAttributes = byref(lpThreadAttributes)
+    dwThreadId = DWORD(0)
+    hThread = _CreateRemoteThread(hProcess, lpThreadAttributes, dwStackSize, lpStartAddress, lpParameter, dwCreationFlags, byref(dwThreadId))
+    if not hThread:
+        raise ctypes.WinError()
+    return ThreadHandle(hThread), dwThreadId.value
+
+#------------------------------------------------------------------------------
+# Process API
+
+# BOOL WINAPI CreateProcess(
+#   __in_opt     LPCTSTR lpApplicationName,
+#   __inout_opt  LPTSTR lpCommandLine,
+#   __in_opt     LPSECURITY_ATTRIBUTES lpProcessAttributes,
+#   __in_opt     LPSECURITY_ATTRIBUTES lpThreadAttributes,
+#   __in         BOOL bInheritHandles,
+#   __in         DWORD dwCreationFlags,
+#   __in_opt     LPVOID lpEnvironment,
+#   __in_opt     LPCTSTR lpCurrentDirectory,
+#   __in         LPSTARTUPINFO lpStartupInfo,
+#   __out        LPPROCESS_INFORMATION lpProcessInformation
+# );
+def CreateProcessA(lpApplicationName, lpCommandLine=None, lpProcessAttributes=None, lpThreadAttributes=None, bInheritHandles=False, dwCreationFlags=0, lpEnvironment=None, lpCurrentDirectory=None, lpStartupInfo=None):
+    _CreateProcessA = windll.kernel32.CreateProcessA
+    _CreateProcessA.argtypes = [LPSTR, LPSTR, LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPSTR, LPVOID, LPPROCESS_INFORMATION]
+    _CreateProcessA.restype  = bool
+    _CreateProcessA.errcheck = RaiseIfZero
+
+    if not lpApplicationName:
+        lpApplicationName   = None
+    if not lpCommandLine:
+        lpCommandLine       = None
+    else:
+        lpCommandLine       = ctypes.create_string_buffer(lpCommandLine, max(MAX_PATH, len(lpCommandLine) + 1))
+    if not lpEnvironment:
+        lpEnvironment       = None
+    else:
+        lpEnvironment       = ctypes.create_string_buffer(lpEnvironment)
+    if not lpCurrentDirectory:
+        lpCurrentDirectory  = None
+    if not lpProcessAttributes:
+        lpProcessAttributes = None
+    else:
+        lpProcessAttributes = byref(lpProcessAttributes)
+    if not lpThreadAttributes:
+        lpThreadAttributes = None
+    else:
+        lpThreadAttributes = byref(lpThreadAttributes)
+    if not lpStartupInfo:
+        lpStartupInfo              = STARTUPINFO()
+        lpStartupInfo.cb           = sizeof(STARTUPINFO)
+        lpStartupInfo.lpReserved   = 0
+        lpStartupInfo.lpDesktop    = 0
+        lpStartupInfo.lpTitle      = 0
+        lpStartupInfo.dwFlags      = 0
+        lpStartupInfo.cbReserved2  = 0
+        lpStartupInfo.lpReserved2  = 0
+    lpProcessInformation              = PROCESS_INFORMATION()
+    lpProcessInformation.hProcess     = INVALID_HANDLE_VALUE
+    lpProcessInformation.hThread      = INVALID_HANDLE_VALUE
+    lpProcessInformation.dwProcessId  = 0
+    lpProcessInformation.dwThreadId   = 0
+    _CreateProcessA(lpApplicationName, lpCommandLine, lpProcessAttributes, lpThreadAttributes, bool(bInheritHandles), dwCreationFlags, lpEnvironment, lpCurrentDirectory, byref(lpStartupInfo), byref(lpProcessInformation))
+    return ProcessInformation(lpProcessInformation)
+
+def CreateProcessW(lpApplicationName, lpCommandLine=None, lpProcessAttributes=None, lpThreadAttributes=None, bInheritHandles=False, dwCreationFlags=0, lpEnvironment=None, lpCurrentDirectory=None, lpStartupInfo=None):
+    _CreateProcessW = windll.kernel32.CreateProcessW
+    _CreateProcessW.argtypes = [LPWSTR, LPWSTR, LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPWSTR, LPVOID, LPPROCESS_INFORMATION]
+    _CreateProcessW.restype  = bool
+    _CreateProcessW.errcheck = RaiseIfZero
+
+    if not lpApplicationName:
+        lpApplicationName   = None
+    if not lpCommandLine:
+        lpCommandLine       = None
+    else:
+        lpCommandLine       = ctypes.create_unicode_buffer(lpCommandLine, max(MAX_PATH, len(lpCommandLine) + 1))
+    if not lpEnvironment:
+        lpEnvironment       = None
+    else:
+        lpEnvironment       = ctypes.create_unicode_buffer(lpEnvironment)
+    if not lpCurrentDirectory:
+        lpCurrentDirectory  = None
+    if not lpProcessAttributes:
+        lpProcessAttributes = None
+    else:
+        lpProcessAttributes = byref(lpProcessAttributes)
+    if not lpThreadAttributes:
+        lpThreadAttributes = None
+    else:
+        lpThreadAttributes = byref(lpThreadAttributes)
+    if not lpStartupInfo:
+        lpStartupInfo              = STARTUPINFO()
+        lpStartupInfo.cb           = sizeof(STARTUPINFO)
+        lpStartupInfo.lpReserved   = 0
+        lpStartupInfo.lpDesktop    = 0
+        lpStartupInfo.lpTitle      = 0
+        lpStartupInfo.dwFlags      = 0
+        lpStartupInfo.cbReserved2  = 0
+        lpStartupInfo.lpReserved2  = 0
+    lpProcessInformation              = PROCESS_INFORMATION()
+    lpProcessInformation.hProcess     = INVALID_HANDLE_VALUE
+    lpProcessInformation.hThread      = INVALID_HANDLE_VALUE
+    lpProcessInformation.dwProcessId  = 0
+    lpProcessInformation.dwThreadId   = 0
+    _CreateProcessW(lpApplicationName, lpCommandLine, lpProcessAttributes, lpThreadAttributes, bool(bInheritHandles), dwCreationFlags, lpEnvironment, lpCurrentDirectory, byref(lpStartupInfo), byref(lpProcessInformation))
+    return ProcessInformation(lpProcessInformation)
+
+CreateProcess = GuessStringType(CreateProcessA, CreateProcessW)
+
+# BOOL WINAPI InitializeProcThreadAttributeList(
+#   __out_opt   LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList,
+#   __in        DWORD dwAttributeCount,
+#   __reserved  DWORD dwFlags,
+#   __inout     PSIZE_T lpSize
+# );
+def InitializeProcThreadAttributeList(dwAttributeCount):
+    _InitializeProcThreadAttributeList = windll.kernel32.InitializeProcThreadAttributeList
+    _InitializeProcThreadAttributeList.argtypes = [LPPROC_THREAD_ATTRIBUTE_LIST, DWORD, DWORD, PSIZE_T]
+    _InitializeProcThreadAttributeList.restype  = bool
+
+    Size = SIZE_T(0)
+    _InitializeProcThreadAttributeList(None, dwAttributeCount, 0, byref(Size))
+    RaiseIfZero(Size.value)
+    AttributeList = (BYTE * Size.value)()
+    success = _InitializeProcThreadAttributeList(byref(AttributeList), dwAttributeCount, 0, byref(Size))
+    RaiseIfZero(success)
+    return AttributeList
+
+# BOOL WINAPI UpdateProcThreadAttribute(
+#   __inout    LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList,
+#   __in       DWORD dwFlags,
+#   __in       DWORD_PTR Attribute,
+#   __in       PVOID lpValue,
+#   __in       SIZE_T cbSize,
+#   __out_opt  PVOID lpPreviousValue,
+#   __in_opt   PSIZE_T lpReturnSize
+# );
+def UpdateProcThreadAttribute(lpAttributeList, Attribute, Value, cbSize = None):
+    _UpdateProcThreadAttribute = windll.kernel32.UpdateProcThreadAttribute
+    _UpdateProcThreadAttribute.argtypes = [LPPROC_THREAD_ATTRIBUTE_LIST, DWORD, DWORD_PTR, PVOID, SIZE_T, PVOID, PSIZE_T]
+    _UpdateProcThreadAttribute.restype  = bool
+    _UpdateProcThreadAttribute.errcheck = RaiseIfZero
+
+    if cbSize is None:
+        cbSize = sizeof(Value)
+    _UpdateProcThreadAttribute(byref(lpAttributeList), 0, Attribute, byref(Value), cbSize, None, None)
+
+# VOID WINAPI DeleteProcThreadAttributeList(
+#   __inout  LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList
+# );
+def DeleteProcThreadAttributeList(lpAttributeList):
+    _DeleteProcThreadAttributeList = windll.kernel32.DeleteProcThreadAttributeList
+    _DeleteProcThreadAttributeList.restype = None
+    _DeleteProcThreadAttributeList(byref(lpAttributeList))
+
+# HANDLE WINAPI OpenProcess(
+#   __in  DWORD dwDesiredAccess,
+#   __in  BOOL bInheritHandle,
+#   __in  DWORD dwProcessId
+# );
+def OpenProcess(dwDesiredAccess, bInheritHandle, dwProcessId):
+    _OpenProcess = windll.kernel32.OpenProcess
+    _OpenProcess.argtypes = [DWORD, BOOL, DWORD]
+    _OpenProcess.restype  = HANDLE
+
+    hProcess = _OpenProcess(dwDesiredAccess, bool(bInheritHandle), dwProcessId)
+    if hProcess == NULL:
+        raise ctypes.WinError()
+    return ProcessHandle(hProcess, dwAccess = dwDesiredAccess)
+
+# HANDLE WINAPI OpenThread(
+#   __in  DWORD dwDesiredAccess,
+#   __in  BOOL bInheritHandle,
+#   __in  DWORD dwThreadId
+# );
+def OpenThread(dwDesiredAccess, bInheritHandle, dwThreadId):
+    _OpenThread = windll.kernel32.OpenThread
+    _OpenThread.argtypes = [DWORD, BOOL, DWORD]
+    _OpenThread.restype  = HANDLE
+
+    hThread = _OpenThread(dwDesiredAccess, bool(bInheritHandle), dwThreadId)
+    if hThread == NULL:
+        raise ctypes.WinError()
+    return ThreadHandle(hThread, dwAccess = dwDesiredAccess)
+
+# DWORD WINAPI SuspendThread(
+#   __in  HANDLE hThread
+# );
+def SuspendThread(hThread):
+    _SuspendThread = windll.kernel32.SuspendThread
+    _SuspendThread.argtypes = [HANDLE]
+    _SuspendThread.restype  = DWORD
+
+    previousCount = _SuspendThread(hThread)
+    if previousCount == DWORD(-1).value:
+        raise ctypes.WinError()
+    return previousCount
+
+# DWORD WINAPI ResumeThread(
+#   __in  HANDLE hThread
+# );
+def ResumeThread(hThread):
+    _ResumeThread = windll.kernel32.ResumeThread
+    _ResumeThread.argtypes = [HANDLE]
+    _ResumeThread.restype  = DWORD
+
+    previousCount = _ResumeThread(hThread)
+    if previousCount == DWORD(-1).value:
+        raise ctypes.WinError()
+    return previousCount
+
+# BOOL WINAPI TerminateThread(
+#   __inout  HANDLE hThread,
+#   __in     DWORD dwExitCode
+# );
+def TerminateThread(hThread, dwExitCode = 0):
+    _TerminateThread = windll.kernel32.TerminateThread
+    _TerminateThread.argtypes = [HANDLE, DWORD]
+    _TerminateThread.restype  = bool
+    _TerminateThread.errcheck = RaiseIfZero
+    _TerminateThread(hThread, dwExitCode)
+
+# BOOL WINAPI TerminateProcess(
+#   __inout  HANDLE hProcess,
+#   __in     DWORD dwExitCode
+# );
+def TerminateProcess(hProcess, dwExitCode = 0):
+    _TerminateProcess = windll.kernel32.TerminateProcess
+    _TerminateProcess.argtypes = [HANDLE, DWORD]
+    _TerminateProcess.restype  = bool
+    _TerminateProcess.errcheck = RaiseIfZero
+    _TerminateProcess(hProcess, dwExitCode)
+
+# DWORD WINAPI GetCurrentProcessId(void);
+def GetCurrentProcessId():
+    _GetCurrentProcessId = windll.kernel32.GetCurrentProcessId
+    _GetCurrentProcessId.argtypes = []
+    _GetCurrentProcessId.restype  = DWORD
+    return _GetCurrentProcessId()
+
+# DWORD WINAPI GetCurrentThreadId(void);
+def GetCurrentThreadId():
+    _GetCurrentThreadId = windll.kernel32.GetCurrentThreadId
+    _GetCurrentThreadId.argtypes = []
+    _GetCurrentThreadId.restype  = DWORD
+    return _GetCurrentThreadId()
+
+# DWORD WINAPI GetProcessId(
+#   __in  HANDLE hProcess
+# );
+def GetProcessId(hProcess):
+    _GetProcessId = windll.kernel32.GetProcessId
+    _GetProcessId.argtypes = [HANDLE]
+    _GetProcessId.restype  = DWORD
+    _GetProcessId.errcheck = RaiseIfZero
+    return _GetProcessId(hProcess)
+
+# DWORD WINAPI GetThreadId(
+#   __in  HANDLE hThread
+# );
+def GetThreadId(hThread):
+    _GetThreadId = windll.kernel32.GetThreadId
+    _GetThreadId.argtypes = [HANDLE]
+    _GetThreadId.restype  = DWORD
+
+    dwThreadId = _GetThreadId(hThread)
+    if dwThreadId == 0:
+        raise ctypes.WinError()
+    return dwThreadId
+
+# DWORD WINAPI GetProcessIdOfThread(
+#   __in  HANDLE hThread
+# );
+def GetProcessIdOfThread(hThread):
+    _GetProcessIdOfThread = windll.kernel32.GetProcessIdOfThread
+    _GetProcessIdOfThread.argtypes = [HANDLE]
+    _GetProcessIdOfThread.restype  = DWORD
+
+    dwProcessId = _GetProcessIdOfThread(hThread)
+    if dwProcessId == 0:
+        raise ctypes.WinError()
+    return dwProcessId
+
+# BOOL WINAPI GetExitCodeProcess(
+#   __in   HANDLE hProcess,
+#   __out  LPDWORD lpExitCode
+# );
+def GetExitCodeProcess(hProcess):
+    _GetExitCodeProcess = windll.kernel32.GetExitCodeProcess
+    _GetExitCodeProcess.argtypes = [HANDLE, PDWORD]
+    _GetExitCodeProcess.restype  = bool
+    _GetExitCodeProcess.errcheck = RaiseIfZero
+
+    lpExitCode = DWORD(0)
+    _GetExitCodeProcess(hProcess, byref(lpExitCode))
+    return lpExitCode.value
+
+# BOOL WINAPI GetExitCodeThread(
+#   __in   HANDLE hThread,
+#   __out  LPDWORD lpExitCode
+# );
+def GetExitCodeThread(hThread):
+    _GetExitCodeThread = windll.kernel32.GetExitCodeThread
+    _GetExitCodeThread.argtypes = [HANDLE, PDWORD]
+    _GetExitCodeThread.restype  = bool
+    _GetExitCodeThread.errcheck = RaiseIfZero
+
+    lpExitCode = DWORD(0)
+    _GetExitCodeThread(hThread, byref(lpExitCode))
+    return lpExitCode.value
+
+# DWORD WINAPI GetProcessVersion(
+#   __in  DWORD ProcessId
+# );
+def GetProcessVersion(ProcessId):
+    _GetProcessVersion = windll.kernel32.GetProcessVersion
+    _GetProcessVersion.argtypes = [DWORD]
+    _GetProcessVersion.restype  = DWORD
+
+    retval = _GetProcessVersion(ProcessId)
+    if retval == 0:
+        raise ctypes.WinError()
+    return retval
+
+# DWORD WINAPI GetPriorityClass(
+#   __in  HANDLE hProcess
+# );
+def GetPriorityClass(hProcess):
+    _GetPriorityClass = windll.kernel32.GetPriorityClass
+    _GetPriorityClass.argtypes = [HANDLE]
+    _GetPriorityClass.restype  = DWORD
+
+    retval = _GetPriorityClass(hProcess)
+    if retval == 0:
+        raise ctypes.WinError()
+    return retval
+
+# BOOL WINAPI SetPriorityClass(
+#   __in  HANDLE hProcess,
+#   __in  DWORD dwPriorityClass
+# );
+def SetPriorityClass(hProcess, dwPriorityClass = NORMAL_PRIORITY_CLASS):
+    _SetPriorityClass = windll.kernel32.SetPriorityClass
+    _SetPriorityClass.argtypes = [HANDLE, DWORD]
+    _SetPriorityClass.restype  = bool
+    _SetPriorityClass.errcheck = RaiseIfZero
+    _SetPriorityClass(hProcess, dwPriorityClass)
+
+# BOOL WINAPI GetProcessPriorityBoost(
+#   __in   HANDLE hProcess,
+#   __out  PBOOL pDisablePriorityBoost
+# );
+def GetProcessPriorityBoost(hProcess):
+    _GetProcessPriorityBoost = windll.kernel32.GetProcessPriorityBoost
+    _GetProcessPriorityBoost.argtypes = [HANDLE, PBOOL]
+    _GetProcessPriorityBoost.restype  = bool
+    _GetProcessPriorityBoost.errcheck = RaiseIfZero
+
+    pDisablePriorityBoost = BOOL(False)
+    _GetProcessPriorityBoost(hProcess, byref(pDisablePriorityBoost))
+    return bool(pDisablePriorityBoost.value)
+
+# BOOL WINAPI SetProcessPriorityBoost(
+#   __in  HANDLE hProcess,
+#   __in  BOOL DisablePriorityBoost
+# );
+def SetProcessPriorityBoost(hProcess, DisablePriorityBoost):
+    _SetProcessPriorityBoost = windll.kernel32.SetProcessPriorityBoost
+    _SetProcessPriorityBoost.argtypes = [HANDLE, BOOL]
+    _SetProcessPriorityBoost.restype  = bool
+    _SetProcessPriorityBoost.errcheck = RaiseIfZero
+    _SetProcessPriorityBoost(hProcess, bool(DisablePriorityBoost))
+
+# BOOL WINAPI GetProcessAffinityMask(
+#   __in   HANDLE hProcess,
+#   __out  PDWORD_PTR lpProcessAffinityMask,
+#   __out  PDWORD_PTR lpSystemAffinityMask
+# );
+def GetProcessAffinityMask(hProcess):
+    _GetProcessAffinityMask = windll.kernel32.GetProcessAffinityMask
+    _GetProcessAffinityMask.argtypes = [HANDLE, PDWORD_PTR, PDWORD_PTR]
+    _GetProcessAffinityMask.restype  = bool
+    _GetProcessAffinityMask.errcheck = RaiseIfZero
+
+    lpProcessAffinityMask = DWORD_PTR(0)
+    lpSystemAffinityMask  = DWORD_PTR(0)
+    _GetProcessAffinityMask(hProcess, byref(lpProcessAffinityMask), byref(lpSystemAffinityMask))
+    return lpProcessAffinityMask.value, lpSystemAffinityMask.value
+
+# BOOL WINAPI SetProcessAffinityMask(
+#   __in  HANDLE hProcess,
+#   __in  DWORD_PTR dwProcessAffinityMask
+# );
+def SetProcessAffinityMask(hProcess, dwProcessAffinityMask):
+    _SetProcessAffinityMask = windll.kernel32.SetProcessAffinityMask
+    _SetProcessAffinityMask.argtypes = [HANDLE, DWORD_PTR]
+    _SetProcessAffinityMask.restype  = bool
+    _SetProcessAffinityMask.errcheck = RaiseIfZero
+    _SetProcessAffinityMask(hProcess, dwProcessAffinityMask)
+
+#------------------------------------------------------------------------------
+# Toolhelp32 API
+
+# HANDLE WINAPI CreateToolhelp32Snapshot(
+#   __in  DWORD dwFlags,
+#   __in  DWORD th32ProcessID
+# );
+def CreateToolhelp32Snapshot(dwFlags = TH32CS_SNAPALL, th32ProcessID = 0):
+    _CreateToolhelp32Snapshot = windll.kernel32.CreateToolhelp32Snapshot
+    _CreateToolhelp32Snapshot.argtypes = [DWORD, DWORD]
+    _CreateToolhelp32Snapshot.restype  = HANDLE
+
+    hSnapshot = _CreateToolhelp32Snapshot(dwFlags, th32ProcessID)
+    if hSnapshot == INVALID_HANDLE_VALUE:
+        raise ctypes.WinError()
+    return SnapshotHandle(hSnapshot)
+
+# BOOL WINAPI Process32First(
+#   __in     HANDLE hSnapshot,
+#   __inout  LPPROCESSENTRY32 lppe
+# );
+def Process32First(hSnapshot):
+    _Process32First = windll.kernel32.Process32First
+    _Process32First.argtypes = [HANDLE, LPPROCESSENTRY32]
+    _Process32First.restype  = bool
+
+    pe        = PROCESSENTRY32()
+    pe.dwSize = sizeof(PROCESSENTRY32)
+    success = _Process32First(hSnapshot, byref(pe))
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return pe
+
+# BOOL WINAPI Process32Next(
+#   __in     HANDLE hSnapshot,
+#   __out  LPPROCESSENTRY32 lppe
+# );
+def Process32Next(hSnapshot, pe = None):
+    _Process32Next = windll.kernel32.Process32Next
+    _Process32Next.argtypes = [HANDLE, LPPROCESSENTRY32]
+    _Process32Next.restype  = bool
+
+    if pe is None:
+        pe = PROCESSENTRY32()
+    pe.dwSize = sizeof(PROCESSENTRY32)
+    success = _Process32Next(hSnapshot, byref(pe))
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return pe
+
+# BOOL WINAPI Thread32First(
+#   __in     HANDLE hSnapshot,
+#   __inout  LPTHREADENTRY32 lpte
+# );
+def Thread32First(hSnapshot):
+    _Thread32First = windll.kernel32.Thread32First
+    _Thread32First.argtypes = [HANDLE, LPTHREADENTRY32]
+    _Thread32First.restype  = bool
+
+    te = THREADENTRY32()
+    te.dwSize = sizeof(THREADENTRY32)
+    success = _Thread32First(hSnapshot, byref(te))
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return te
+
+# BOOL WINAPI Thread32Next(
+#   __in     HANDLE hSnapshot,
+#   __out  LPTHREADENTRY32 lpte
+# );
+def Thread32Next(hSnapshot, te = None):
+    _Thread32Next = windll.kernel32.Thread32Next
+    _Thread32Next.argtypes = [HANDLE, LPTHREADENTRY32]
+    _Thread32Next.restype  = bool
+
+    if te is None:
+        te = THREADENTRY32()
+    te.dwSize = sizeof(THREADENTRY32)
+    success = _Thread32Next(hSnapshot, byref(te))
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return te
+
+# BOOL WINAPI Module32First(
+#   __in     HANDLE hSnapshot,
+#   __inout  LPMODULEENTRY32 lpme
+# );
+def Module32First(hSnapshot):
+    _Module32First = windll.kernel32.Module32First
+    _Module32First.argtypes = [HANDLE, LPMODULEENTRY32]
+    _Module32First.restype  = bool
+
+    me = MODULEENTRY32()
+    me.dwSize = sizeof(MODULEENTRY32)
+    success = _Module32First(hSnapshot, byref(me))
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return me
+
+# BOOL WINAPI Module32Next(
+#   __in     HANDLE hSnapshot,
+#   __out  LPMODULEENTRY32 lpme
+# );
+def Module32Next(hSnapshot, me = None):
+    _Module32Next = windll.kernel32.Module32Next
+    _Module32Next.argtypes = [HANDLE, LPMODULEENTRY32]
+    _Module32Next.restype  = bool
+
+    if me is None:
+        me = MODULEENTRY32()
+    me.dwSize = sizeof(MODULEENTRY32)
+    success = _Module32Next(hSnapshot, byref(me))
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return me
+
+# BOOL WINAPI Heap32First(
+#   __inout  LPHEAPENTRY32 lphe,
+#   __in     DWORD th32ProcessID,
+#   __in     ULONG_PTR th32HeapID
+# );
+def Heap32First(th32ProcessID, th32HeapID):
+    _Heap32First = windll.kernel32.Heap32First
+    _Heap32First.argtypes = [LPHEAPENTRY32, DWORD, ULONG_PTR]
+    _Heap32First.restype  = bool
+
+    he = HEAPENTRY32()
+    he.dwSize = sizeof(HEAPENTRY32)
+    success = _Heap32First(byref(he), th32ProcessID, th32HeapID)
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return he
+
+# BOOL WINAPI Heap32Next(
+#   __out  LPHEAPENTRY32 lphe
+# );
+def Heap32Next(he):
+    _Heap32Next = windll.kernel32.Heap32Next
+    _Heap32Next.argtypes = [LPHEAPENTRY32]
+    _Heap32Next.restype  = bool
+
+    he.dwSize = sizeof(HEAPENTRY32)
+    success = _Heap32Next(byref(he))
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return he
+
+# BOOL WINAPI Heap32ListFirst(
+#   __in     HANDLE hSnapshot,
+#   __inout  LPHEAPLIST32 lphl
+# );
+def Heap32ListFirst(hSnapshot):
+    _Heap32ListFirst = windll.kernel32.Heap32ListFirst
+    _Heap32ListFirst.argtypes = [HANDLE, LPHEAPLIST32]
+    _Heap32ListFirst.restype  = bool
+
+    hl = HEAPLIST32()
+    hl.dwSize = sizeof(HEAPLIST32)
+    success = _Heap32ListFirst(hSnapshot, byref(hl))
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return hl
+
+# BOOL WINAPI Heap32ListNext(
+#   __in     HANDLE hSnapshot,
+#   __out  LPHEAPLIST32 lphl
+# );
+def Heap32ListNext(hSnapshot, hl = None):
+    _Heap32ListNext = windll.kernel32.Heap32ListNext
+    _Heap32ListNext.argtypes = [HANDLE, LPHEAPLIST32]
+    _Heap32ListNext.restype  = bool
+
+    if hl is None:
+        hl = HEAPLIST32()
+    hl.dwSize = sizeof(HEAPLIST32)
+    success = _Heap32ListNext(hSnapshot, byref(hl))
+    if not success:
+        if GetLastError() == ERROR_NO_MORE_FILES:
+            return None
+        raise ctypes.WinError()
+    return hl
+
+# BOOL WINAPI Toolhelp32ReadProcessMemory(
+#   __in   DWORD th32ProcessID,
+#   __in   LPCVOID lpBaseAddress,
+#   __out  LPVOID lpBuffer,
+#   __in   SIZE_T cbRead,
+#   __out  SIZE_T lpNumberOfBytesRead
+# );
+def Toolhelp32ReadProcessMemory(th32ProcessID, lpBaseAddress, cbRead):
+    _Toolhelp32ReadProcessMemory = windll.kernel32.Toolhelp32ReadProcessMemory
+    _Toolhelp32ReadProcessMemory.argtypes = [DWORD, LPVOID, LPVOID, SIZE_T, POINTER(SIZE_T)]
+    _Toolhelp32ReadProcessMemory.restype  = bool
+
+    lpBuffer            = ctypes.create_string_buffer('', cbRead)
+    lpNumberOfBytesRead = SIZE_T(0)
+    success = _Toolhelp32ReadProcessMemory(th32ProcessID, lpBaseAddress, lpBuffer, cbRead, byref(lpNumberOfBytesRead))
+    if not success and GetLastError() != ERROR_PARTIAL_COPY:
+        raise ctypes.WinError()
+    return str(lpBuffer.raw)[:lpNumberOfBytesRead.value]
+
+#------------------------------------------------------------------------------
+# Miscellaneous system information
+
+# BOOL WINAPI GetProcessDEPPolicy(
+#  __in   HANDLE hProcess,
+#  __out  LPDWORD lpFlags,
+#  __out  PBOOL lpPermanent
+# );
+# Contribution by ivanlef0u (http://ivanlef0u.fr/)
+# XP SP3 and > only
+def GetProcessDEPPolicy(hProcess):
+    _GetProcessDEPPolicy = windll.kernel32.GetProcessDEPPolicy
+    _GetProcessDEPPolicy.argtypes = [HANDLE, LPDWORD, PBOOL]
+    _GetProcessDEPPolicy.restype  = bool
+    _GetProcessDEPPolicy.errcheck = RaiseIfZero
+
+    lpFlags = DWORD(0)
+    lpPermanent = BOOL(0)
+    _GetProcessDEPPolicy(hProcess, byref(lpFlags), byref(lpPermanent))
+    return (lpFlags.value, lpPermanent.value)
+
+# DWORD WINAPI GetCurrentProcessorNumber(void);
+def GetCurrentProcessorNumber():
+    _GetCurrentProcessorNumber = windll.kernel32.GetCurrentProcessorNumber
+    _GetCurrentProcessorNumber.argtypes = []
+    _GetCurrentProcessorNumber.restype  = DWORD
+    _GetCurrentProcessorNumber.errcheck = RaiseIfZero
+    return _GetCurrentProcessorNumber()
+
+# VOID WINAPI FlushProcessWriteBuffers(void);
+def FlushProcessWriteBuffers():
+    _FlushProcessWriteBuffers = windll.kernel32.FlushProcessWriteBuffers
+    _FlushProcessWriteBuffers.argtypes = []
+    _FlushProcessWriteBuffers.restype  = None
+    _FlushProcessWriteBuffers()
+
+# BOOL WINAPI GetLogicalProcessorInformation(
+#   __out    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer,
+#   __inout  PDWORD ReturnLength
+# );
+
+# TO DO http://msdn.microsoft.com/en-us/library/ms683194(VS.85).aspx
+
+# BOOL WINAPI GetProcessIoCounters(
+#   __in   HANDLE hProcess,
+#   __out  PIO_COUNTERS lpIoCounters
+# );
+
+# TO DO http://msdn.microsoft.com/en-us/library/ms683218(VS.85).aspx
+
+# DWORD WINAPI GetGuiResources(
+#   __in  HANDLE hProcess,
+#   __in  DWORD uiFlags
+# );
+def GetGuiResources(hProcess, uiFlags = GR_GDIOBJECTS):
+    _GetGuiResources = windll.kernel32.GetGuiResources
+    _GetGuiResources.argtypes = [HANDLE, DWORD]
+    _GetGuiResources.restype  = DWORD
+
+    dwCount = _GetGuiResources(hProcess, uiFlags)
+    if dwCount == 0:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    return dwCount
+
+# BOOL WINAPI GetProcessHandleCount(
+#   __in     HANDLE hProcess,
+#   __inout  PDWORD pdwHandleCount
+# );
+def GetProcessHandleCount(hProcess):
+    _GetProcessHandleCount = windll.kernel32.GetProcessHandleCount
+    _GetProcessHandleCount.argtypes = [HANDLE, PDWORD]
+    _GetProcessHandleCount.restype  = DWORD
+    _GetProcessHandleCount.errcheck = RaiseIfZero
+
+    pdwHandleCount = DWORD(0)
+    _GetProcessHandleCount(hProcess, byref(pdwHandleCount))
+    return pdwHandleCount.value
+
+# BOOL WINAPI GetProcessTimes(
+#   __in   HANDLE hProcess,
+#   __out  LPFILETIME lpCreationTime,
+#   __out  LPFILETIME lpExitTime,
+#   __out  LPFILETIME lpKernelTime,
+#   __out  LPFILETIME lpUserTime
+# );
+def GetProcessTimes(hProcess = None):
+    _GetProcessTimes = windll.kernel32.GetProcessTimes
+    _GetProcessTimes.argtypes = [HANDLE, LPFILETIME, LPFILETIME, LPFILETIME, LPFILETIME]
+    _GetProcessTimes.restype  = bool
+    _GetProcessTimes.errcheck = RaiseIfZero
+
+    if hProcess is None:
+        hProcess = GetCurrentProcess()
+
+    CreationTime = FILETIME()
+    ExitTime     = FILETIME()
+    KernelTime   = FILETIME()
+    UserTime     = FILETIME()
+
+    _GetProcessTimes(hProcess, byref(CreationTime), byref(ExitTime), byref(KernelTime), byref(UserTime))
+
+    return (CreationTime, ExitTime, KernelTime, UserTime)
+
+# BOOL WINAPI FileTimeToSystemTime(
+#   __in   const FILETIME *lpFileTime,
+#   __out  LPSYSTEMTIME lpSystemTime
+# );
+def FileTimeToSystemTime(lpFileTime):
+    _FileTimeToSystemTime = windll.kernel32.FileTimeToSystemTime
+    _FileTimeToSystemTime.argtypes = [LPFILETIME, LPSYSTEMTIME]
+    _FileTimeToSystemTime.restype  = bool
+    _FileTimeToSystemTime.errcheck = RaiseIfZero
+
+    if isinstance(lpFileTime, FILETIME):
+        FileTime = lpFileTime
+    else:
+        FileTime = FILETIME()
+        FileTime.dwLowDateTime  = lpFileTime & 0xFFFFFFFF
+        FileTime.dwHighDateTime = lpFileTime >> 32
+    SystemTime = SYSTEMTIME()
+    _FileTimeToSystemTime(byref(FileTime), byref(SystemTime))
+    return SystemTime
+
+# void WINAPI GetSystemTimeAsFileTime(
+#   __out  LPFILETIME lpSystemTimeAsFileTime
+# );
+def GetSystemTimeAsFileTime():
+    _GetSystemTimeAsFileTime = windll.kernel32.GetSystemTimeAsFileTime
+    _GetSystemTimeAsFileTime.argtypes = [LPFILETIME]
+    _GetSystemTimeAsFileTime.restype  = None
+
+    FileTime = FILETIME()
+    _GetSystemTimeAsFileTime(byref(FileTime))
+    return FileTime
+
+#------------------------------------------------------------------------------
+# Global ATOM API
+
+# ATOM GlobalAddAtom(
+#   __in  LPCTSTR lpString
+# );
+def GlobalAddAtomA(lpString):
+    _GlobalAddAtomA = windll.kernel32.GlobalAddAtomA
+    _GlobalAddAtomA.argtypes = [LPSTR]
+    _GlobalAddAtomA.restype  = ATOM
+    _GlobalAddAtomA.errcheck = RaiseIfZero
+    return _GlobalAddAtomA(lpString)
+
+def GlobalAddAtomW(lpString):
+    _GlobalAddAtomW = windll.kernel32.GlobalAddAtomW
+    _GlobalAddAtomW.argtypes = [LPWSTR]
+    _GlobalAddAtomW.restype  = ATOM
+    _GlobalAddAtomW.errcheck = RaiseIfZero
+    return _GlobalAddAtomW(lpString)
+
+GlobalAddAtom = GuessStringType(GlobalAddAtomA, GlobalAddAtomW)
+
+# ATOM GlobalFindAtom(
+#   __in  LPCTSTR lpString
+# );
+def GlobalFindAtomA(lpString):
+    _GlobalFindAtomA = windll.kernel32.GlobalFindAtomA
+    _GlobalFindAtomA.argtypes = [LPSTR]
+    _GlobalFindAtomA.restype  = ATOM
+    _GlobalFindAtomA.errcheck = RaiseIfZero
+    return _GlobalFindAtomA(lpString)
+
+def GlobalFindAtomW(lpString):
+    _GlobalFindAtomW = windll.kernel32.GlobalFindAtomW
+    _GlobalFindAtomW.argtypes = [LPWSTR]
+    _GlobalFindAtomW.restype  = ATOM
+    _GlobalFindAtomW.errcheck = RaiseIfZero
+    return _GlobalFindAtomW(lpString)
+
+GlobalFindAtom = GuessStringType(GlobalFindAtomA, GlobalFindAtomW)
+
+# UINT GlobalGetAtomName(
+#   __in   ATOM nAtom,
+#   __out  LPTSTR lpBuffer,
+#   __in   int nSize
+# );
+def GlobalGetAtomNameA(nAtom):
+    _GlobalGetAtomNameA = windll.kernel32.GlobalGetAtomNameA
+    _GlobalGetAtomNameA.argtypes = [ATOM, LPSTR, ctypes.c_int]
+    _GlobalGetAtomNameA.restype  = UINT
+    _GlobalGetAtomNameA.errcheck = RaiseIfZero
+
+    nSize = 64
+    while 1:
+        lpBuffer = ctypes.create_string_buffer("", nSize)
+        nCopied  = _GlobalGetAtomNameA(nAtom, lpBuffer, nSize)
+        if nCopied < nSize - 1:
+            break
+        nSize = nSize + 64
+    return lpBuffer.value
+
+def GlobalGetAtomNameW(nAtom):
+    _GlobalGetAtomNameW = windll.kernel32.GlobalGetAtomNameW
+    _GlobalGetAtomNameW.argtypes = [ATOM, LPWSTR, ctypes.c_int]
+    _GlobalGetAtomNameW.restype  = UINT
+    _GlobalGetAtomNameW.errcheck = RaiseIfZero
+
+    nSize = 64
+    while 1:
+        lpBuffer = ctypes.create_unicode_buffer(u"", nSize)
+        nCopied  = _GlobalGetAtomNameW(nAtom, lpBuffer, nSize)
+        if nCopied < nSize - 1:
+            break
+        nSize = nSize + 64
+    return lpBuffer.value
+
+GlobalGetAtomName = GuessStringType(GlobalGetAtomNameA, GlobalGetAtomNameW)
+
+# ATOM GlobalDeleteAtom(
+#   __in  ATOM nAtom
+# );
+def GlobalDeleteAtom(nAtom):
+    _GlobalDeleteAtom = windll.kernel32.GlobalDeleteAtom
+    _GlobalDeleteAtom.argtypes
+    _GlobalDeleteAtom.restype
+    SetLastError(ERROR_SUCCESS)
+    _GlobalDeleteAtom(nAtom)
+    error = GetLastError()
+    if error != ERROR_SUCCESS:
+        raise ctypes.WinError(error)
+
+#------------------------------------------------------------------------------
+# Wow64
+
+# DWORD WINAPI Wow64SuspendThread(
+#   _In_  HANDLE hThread
+# );
+def Wow64SuspendThread(hThread):
+    _Wow64SuspendThread = windll.kernel32.Wow64SuspendThread
+    _Wow64SuspendThread.argtypes = [HANDLE]
+    _Wow64SuspendThread.restype  = DWORD
+
+    previousCount = _Wow64SuspendThread(hThread)
+    if previousCount == DWORD(-1).value:
+        raise ctypes.WinError()
+    return previousCount
+
+# BOOLEAN WINAPI Wow64EnableWow64FsRedirection(
+#   __in  BOOLEAN Wow64FsEnableRedirection
+# );
+def Wow64EnableWow64FsRedirection(Wow64FsEnableRedirection):
+    """
+    This function may not work reliably when there are nested calls. Therefore,
+    this function has been replaced by the L{Wow64DisableWow64FsRedirection}
+    and L{Wow64RevertWow64FsRedirection} functions.
+
+    @see: U{http://msdn.microsoft.com/en-us/library/windows/desktop/aa365744(v=vs.85).aspx}
+    """
+    _Wow64EnableWow64FsRedirection = windll.kernel32.Wow64EnableWow64FsRedirection
+    _Wow64EnableWow64FsRedirection.argtypes = [BOOLEAN]
+    _Wow64EnableWow64FsRedirection.restype  = BOOLEAN
+    _Wow64EnableWow64FsRedirection.errcheck = RaiseIfZero
+
+# BOOL WINAPI Wow64DisableWow64FsRedirection(
+#   __out  PVOID *OldValue
+# );
+def Wow64DisableWow64FsRedirection():
+    _Wow64DisableWow64FsRedirection = windll.kernel32.Wow64DisableWow64FsRedirection
+    _Wow64DisableWow64FsRedirection.argtypes = [PPVOID]
+    _Wow64DisableWow64FsRedirection.restype  = BOOL
+    _Wow64DisableWow64FsRedirection.errcheck = RaiseIfZero
+
+    OldValue = PVOID(None)
+    _Wow64DisableWow64FsRedirection(byref(OldValue))
+    return OldValue
+
+# BOOL WINAPI Wow64RevertWow64FsRedirection(
+#   __in  PVOID OldValue
+# );
+def Wow64RevertWow64FsRedirection(OldValue):
+    _Wow64RevertWow64FsRedirection = windll.kernel32.Wow64RevertWow64FsRedirection
+    _Wow64RevertWow64FsRedirection.argtypes = [PVOID]
+    _Wow64RevertWow64FsRedirection.restype  = BOOL
+    _Wow64RevertWow64FsRedirection.errcheck = RaiseIfZero
+    _Wow64RevertWow64FsRedirection(OldValue)
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
+
+#==============================================================================
+# Mark functions that Psyco cannot compile.
+# In your programs, don't use psyco.full().
+# Call psyco.bind() on your main function instead.
+
+try:
+    import psyco
+    psyco.cannotcompile(WaitForDebugEvent)
+    psyco.cannotcompile(WaitForSingleObject)
+    psyco.cannotcompile(WaitForSingleObjectEx)
+    psyco.cannotcompile(WaitForMultipleObjects)
+    psyco.cannotcompile(WaitForMultipleObjectsEx)
+except ImportError:
+    pass
+#==============================================================================
diff --git a/scripts/win32/ntdll.py b/scripts/win32/ntdll.py
new file mode 100644
index 0000000..28fca0b
--- /dev/null
+++ b/scripts/win32/ntdll.py
@@ -0,0 +1,537 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for ntdll.dll in ctypes.
+"""
+
+from .defines import *  # NOQA
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+_all.add('peb_teb')
+#==============================================================================
+
+from .peb_teb import *
+
+#--- Types --------------------------------------------------------------------
+
+SYSDBG_COMMAND          = DWORD
+PROCESSINFOCLASS        = DWORD
+THREADINFOCLASS         = DWORD
+FILE_INFORMATION_CLASS  = DWORD
+
+#--- Constants ----------------------------------------------------------------
+
+# DEP flags for ProcessExecuteFlags
+MEM_EXECUTE_OPTION_ENABLE               = 1
+MEM_EXECUTE_OPTION_DISABLE              = 2
+MEM_EXECUTE_OPTION_ATL7_THUNK_EMULATION = 4
+MEM_EXECUTE_OPTION_PERMANENT            = 8
+
+# SYSTEM_INFORMATION_CLASS
+# http://www.informit.com/articles/article.aspx?p=22442&seqNum=4
+SystemBasicInformation                  = 1     # 0x002C
+SystemProcessorInformation              = 2     # 0x000C
+SystemPerformanceInformation            = 3     # 0x0138
+SystemTimeInformation                   = 4     # 0x0020
+SystemPathInformation                   = 5     # not implemented
+SystemProcessInformation                = 6     # 0x00F8 + per process
+SystemCallInformation                   = 7     # 0x0018 + (n * 0x0004)
+SystemConfigurationInformation          = 8     # 0x0018
+SystemProcessorCounters                 = 9     # 0x0030 per cpu
+SystemGlobalFlag                        = 10    # 0x0004
+SystemInfo10                            = 11    # not implemented
+SystemModuleInformation                 = 12    # 0x0004 + (n * 0x011C)
+SystemLockInformation                   = 13    # 0x0004 + (n * 0x0024)
+SystemInfo13                            = 14    # not implemented
+SystemPagedPoolInformation              = 15    # checked build only
+SystemNonPagedPoolInformation           = 16    # checked build only
+SystemHandleInformation                 = 17    # 0x0004 + (n * 0x0010)
+SystemObjectInformation                 = 18    # 0x0038+ + (n * 0x0030+)
+SystemPagefileInformation               = 19    # 0x0018+ per page file
+SystemInstemulInformation               = 20    # 0x0088
+SystemInfo20                            = 21    # invalid info class
+SystemCacheInformation                  = 22    # 0x0024
+SystemPoolTagInformation                = 23    # 0x0004 + (n * 0x001C)
+SystemProcessorStatistics               = 24    # 0x0000, or 0x0018 per cpu
+SystemDpcInformation                    = 25    # 0x0014
+SystemMemoryUsageInformation1           = 26    # checked build only
+SystemLoadImage                         = 27    # 0x0018, set mode only
+SystemUnloadImage                       = 28    # 0x0004, set mode only
+SystemTimeAdjustmentInformation         = 29    # 0x000C, 0x0008 writeable
+SystemMemoryUsageInformation2           = 30    # checked build only
+SystemInfo30                            = 31    # checked build only
+SystemInfo31                            = 32    # checked build only
+SystemCrashDumpInformation              = 33    # 0x0004
+SystemExceptionInformation              = 34    # 0x0010
+SystemCrashDumpStateInformation         = 35    # 0x0008
+SystemDebuggerInformation               = 36    # 0x0002
+SystemThreadSwitchInformation           = 37    # 0x0030
+SystemRegistryQuotaInformation          = 38    # 0x000C
+SystemLoadDriver                        = 39    # 0x0008, set mode only
+SystemPrioritySeparationInformation     = 40    # 0x0004, set mode only
+SystemInfo40                            = 41    # not implemented
+SystemInfo41                            = 42    # not implemented
+SystemInfo42                            = 43    # invalid info class
+SystemInfo43                            = 44    # invalid info class
+SystemTimeZoneInformation               = 45    # 0x00AC
+SystemLookasideInformation              = 46    # n * 0x0020
+# info classes specific to Windows 2000
+# WTS = Windows Terminal Server
+SystemSetTimeSlipEvent                  = 47    # set mode only
+SystemCreateSession                     = 48    # WTS, set mode only
+SystemDeleteSession                     = 49    # WTS, set mode only
+SystemInfo49                            = 50    # invalid info class
+SystemRangeStartInformation             = 51    # 0x0004
+SystemVerifierInformation               = 52    # 0x0068
+SystemAddVerifier                       = 53    # set mode only
+SystemSessionProcessesInformation       = 54    # WTS
+
+# NtQueryInformationProcess constants (from MSDN)
+##ProcessBasicInformation = 0
+##ProcessDebugPort        = 7
+##ProcessWow64Information = 26
+##ProcessImageFileName    = 27
+
+# PROCESS_INFORMATION_CLASS
+# http://undocumented.ntinternals.net/UserMode/Undocumented%20Functions/NT%20Objects/Process/PROCESS_INFORMATION_CLASS.html
+ProcessBasicInformation             = 0
+ProcessQuotaLimits                  = 1
+ProcessIoCounters                   = 2
+ProcessVmCounters                   = 3
+ProcessTimes                        = 4
+ProcessBasePriority                 = 5
+ProcessRaisePriority                = 6
+ProcessDebugPort                    = 7
+ProcessExceptionPort                = 8
+ProcessAccessToken                  = 9
+ProcessLdtInformation               = 10
+ProcessLdtSize                      = 11
+ProcessDefaultHardErrorMode         = 12
+ProcessIoPortHandlers               = 13
+ProcessPooledUsageAndLimits         = 14
+ProcessWorkingSetWatch              = 15
+ProcessUserModeIOPL                 = 16
+ProcessEnableAlignmentFaultFixup    = 17
+ProcessPriorityClass                = 18
+ProcessWx86Information              = 19
+ProcessHandleCount                  = 20
+ProcessAffinityMask                 = 21
+ProcessPriorityBoost                = 22
+
+ProcessWow64Information             = 26
+ProcessImageFileName                = 27
+
+# http://www.codeproject.com/KB/security/AntiReverseEngineering.aspx
+ProcessDebugObjectHandle            = 30
+
+ProcessExecuteFlags                 = 34
+
+# THREAD_INFORMATION_CLASS
+ThreadBasicInformation              = 0
+ThreadTimes                         = 1
+ThreadPriority                      = 2
+ThreadBasePriority                  = 3
+ThreadAffinityMask                  = 4
+ThreadImpersonationToken            = 5
+ThreadDescriptorTableEntry          = 6
+ThreadEnableAlignmentFaultFixup     = 7
+ThreadEventPair                     = 8
+ThreadQuerySetWin32StartAddress     = 9
+ThreadZeroTlsCell                   = 10
+ThreadPerformanceCount              = 11
+ThreadAmILastThread                 = 12
+ThreadIdealProcessor                = 13
+ThreadPriorityBoost                 = 14
+ThreadSetTlsArrayAddress            = 15
+ThreadIsIoPending                   = 16
+ThreadHideFromDebugger              = 17
+
+# OBJECT_INFORMATION_CLASS
+ObjectBasicInformation              = 0
+ObjectNameInformation               = 1
+ObjectTypeInformation               = 2
+ObjectAllTypesInformation           = 3
+ObjectHandleInformation             = 4
+
+# FILE_INFORMATION_CLASS
+FileDirectoryInformation            = 1
+FileFullDirectoryInformation        = 2
+FileBothDirectoryInformation        = 3
+FileBasicInformation                = 4
+FileStandardInformation             = 5
+FileInternalInformation             = 6
+FileEaInformation                   = 7
+FileAccessInformation               = 8
+FileNameInformation                 = 9
+FileRenameInformation               = 10
+FileLinkInformation                 = 11
+FileNamesInformation                = 12
+FileDispositionInformation          = 13
+FilePositionInformation             = 14
+FileFullEaInformation               = 15
+FileModeInformation                 = 16
+FileAlignmentInformation            = 17
+FileAllInformation                  = 18
+FileAllocationInformation           = 19
+FileEndOfFileInformation            = 20
+FileAlternateNameInformation        = 21
+FileStreamInformation               = 22
+FilePipeInformation                 = 23
+FilePipeLocalInformation            = 24
+FilePipeRemoteInformation           = 25
+FileMailslotQueryInformation        = 26
+FileMailslotSetInformation          = 27
+FileCompressionInformation          = 28
+FileCopyOnWriteInformation          = 29
+FileCompletionInformation           = 30
+FileMoveClusterInformation          = 31
+FileQuotaInformation                = 32
+FileReparsePointInformation         = 33
+FileNetworkOpenInformation          = 34
+FileObjectIdInformation             = 35
+FileTrackingInformation             = 36
+FileOleDirectoryInformation         = 37
+FileContentIndexInformation         = 38
+FileInheritContentIndexInformation  = 37
+FileOleInformation                  = 39
+FileMaximumInformation              = 40
+
+# From http://www.nirsoft.net/kernel_struct/vista/EXCEPTION_DISPOSITION.html
+# typedef enum _EXCEPTION_DISPOSITION
+# {
+#          ExceptionContinueExecution = 0,
+#          ExceptionContinueSearch = 1,
+#          ExceptionNestedException = 2,
+#          ExceptionCollidedUnwind = 3
+# } EXCEPTION_DISPOSITION;
+ExceptionContinueExecution  = 0
+ExceptionContinueSearch     = 1
+ExceptionNestedException    = 2
+ExceptionCollidedUnwind     = 3
+
+#--- PROCESS_BASIC_INFORMATION structure --------------------------------------
+
+# From MSDN:
+#
+# typedef struct _PROCESS_BASIC_INFORMATION {
+#     PVOID Reserved1;
+#     PPEB PebBaseAddress;
+#     PVOID Reserved2[2];
+#     ULONG_PTR UniqueProcessId;
+#     PVOID Reserved3;
+# } PROCESS_BASIC_INFORMATION;
+##class PROCESS_BASIC_INFORMATION(Structure):
+##    _fields_ = [
+##        ("Reserved1",       PVOID),
+##        ("PebBaseAddress",  PPEB),
+##        ("Reserved2",       PVOID * 2),
+##        ("UniqueProcessId", ULONG_PTR),
+##        ("Reserved3",       PVOID),
+##]
+
+# From http://catch22.net/tuts/tips2
+# (Only valid for 32 bits)
+#
+# typedef struct
+# {
+#     ULONG      ExitStatus;
+#     PVOID      PebBaseAddress;
+#     ULONG      AffinityMask;
+#     ULONG      BasePriority;
+#     ULONG_PTR  UniqueProcessId;
+#     ULONG_PTR  InheritedFromUniqueProcessId;
+# } PROCESS_BASIC_INFORMATION;
+
+# My own definition follows:
+class PROCESS_BASIC_INFORMATION(Structure):
+    _fields_ = [
+        ("ExitStatus",                      SIZE_T),
+        ("PebBaseAddress",                  PVOID),     # PPEB
+        ("AffinityMask",                    KAFFINITY),
+        ("BasePriority",                    SDWORD),
+        ("UniqueProcessId",                 ULONG_PTR),
+        ("InheritedFromUniqueProcessId",    ULONG_PTR),
+]
+
+#--- THREAD_BASIC_INFORMATION structure ---------------------------------------
+
+# From http://undocumented.ntinternals.net/UserMode/Structures/THREAD_BASIC_INFORMATION.html
+#
+# typedef struct _THREAD_BASIC_INFORMATION {
+#   NTSTATUS ExitStatus;
+#   PVOID TebBaseAddress;
+#   CLIENT_ID ClientId;
+#   KAFFINITY AffinityMask;
+#   KPRIORITY Priority;
+#   KPRIORITY BasePriority;
+# } THREAD_BASIC_INFORMATION, *PTHREAD_BASIC_INFORMATION;
+class THREAD_BASIC_INFORMATION(Structure):
+    _fields_ = [
+        ("ExitStatus",      NTSTATUS),
+        ("TebBaseAddress",  PVOID),     # PTEB
+        ("ClientId",        CLIENT_ID),
+        ("AffinityMask",    KAFFINITY),
+        ("Priority",        SDWORD),
+        ("BasePriority",    SDWORD),
+]
+
+#--- FILE_NAME_INFORMATION structure ------------------------------------------
+
+# typedef struct _FILE_NAME_INFORMATION {
+#     ULONG FileNameLength;
+#     WCHAR FileName[1];
+# } FILE_NAME_INFORMATION, *PFILE_NAME_INFORMATION;
+class FILE_NAME_INFORMATION(Structure):
+    _fields_ = [
+        ("FileNameLength",  ULONG),
+        ("FileName",        WCHAR * 1),
+    ]
+
+#--- SYSDBG_MSR structure and constants ---------------------------------------
+
+SysDbgReadMsr  = 16
+SysDbgWriteMsr = 17
+
+class SYSDBG_MSR(Structure):
+    _fields_ = [
+        ("Address", ULONG),
+        ("Data",    ULONGLONG),
+]
+
+#--- IO_STATUS_BLOCK structure ------------------------------------------------
+
+# typedef struct _IO_STATUS_BLOCK {
+#     union {
+#         NTSTATUS Status;
+#         PVOID Pointer;
+#     };
+#     ULONG_PTR Information;
+# } IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;
+class IO_STATUS_BLOCK(Structure):
+    _fields_ = [
+        ("Status",      NTSTATUS),
+        ("Information", ULONG_PTR),
+    ]
+    def __get_Pointer(self):
+        return PVOID(self.Status)
+    def __set_Pointer(self, ptr):
+        self.Status = ptr.value
+    Pointer = property(__get_Pointer, __set_Pointer)
+
+PIO_STATUS_BLOCK = POINTER(IO_STATUS_BLOCK)
+
+#--- ntdll.dll ----------------------------------------------------------------
+
+# ULONG WINAPI RtlNtStatusToDosError(
+#   __in  NTSTATUS Status
+# );
+def RtlNtStatusToDosError(Status):
+    _RtlNtStatusToDosError = windll.ntdll.RtlNtStatusToDosError
+    _RtlNtStatusToDosError.argtypes = [NTSTATUS]
+    _RtlNtStatusToDosError.restype = ULONG
+    return _RtlNtStatusToDosError(Status)
+
+# NTSYSAPI NTSTATUS NTAPI NtSystemDebugControl(
+#   IN SYSDBG_COMMAND Command,
+#   IN PVOID InputBuffer OPTIONAL,
+#   IN ULONG InputBufferLength,
+#   OUT PVOID OutputBuffer OPTIONAL,
+#   IN ULONG OutputBufferLength,
+#   OUT PULONG ReturnLength OPTIONAL
+# );
+def NtSystemDebugControl(Command, InputBuffer = None, InputBufferLength = None, OutputBuffer = None, OutputBufferLength = None):
+    _NtSystemDebugControl = windll.ntdll.NtSystemDebugControl
+    _NtSystemDebugControl.argtypes = [SYSDBG_COMMAND, PVOID, ULONG, PVOID, ULONG, PULONG]
+    _NtSystemDebugControl.restype = NTSTATUS
+
+    # Validate the input buffer
+    if InputBuffer is None:
+        if InputBufferLength is None:
+            InputBufferLength = 0
+        else:
+            raise ValueError(
+                "Invalid call to NtSystemDebugControl: "
+                "input buffer length given but no input buffer!")
+    else:
+        if InputBufferLength is None:
+            InputBufferLength = sizeof(InputBuffer)
+        InputBuffer = byref(InputBuffer)
+
+    # Validate the output buffer
+    if OutputBuffer is None:
+        if OutputBufferLength is None:
+            OutputBufferLength = 0
+        else:
+            OutputBuffer = ctypes.create_string_buffer("", OutputBufferLength)
+    elif OutputBufferLength is None:
+        OutputBufferLength = sizeof(OutputBuffer)
+
+    # Make the call (with an output buffer)
+    if OutputBuffer is not None:
+        ReturnLength = ULONG(0)
+        ntstatus = _NtSystemDebugControl(Command, InputBuffer, InputBufferLength, byref(OutputBuffer), OutputBufferLength, byref(ReturnLength))
+        if ntstatus != 0:
+            raise ctypes.WinError( RtlNtStatusToDosError(ntstatus) )
+        ReturnLength = ReturnLength.value
+        if ReturnLength != OutputBufferLength:
+            raise ctypes.WinError(ERROR_BAD_LENGTH)
+        return OutputBuffer, ReturnLength
+
+    # Make the call (without an output buffer)
+    ntstatus = _NtSystemDebugControl(Command, InputBuffer, InputBufferLength, OutputBuffer, OutputBufferLength, None)
+    if ntstatus != 0:
+        raise ctypes.WinError( RtlNtStatusToDosError(ntstatus) )
+
+ZwSystemDebugControl = NtSystemDebugControl
+
+# NTSTATUS WINAPI NtQueryInformationProcess(
+#   __in       HANDLE ProcessHandle,
+#   __in       PROCESSINFOCLASS ProcessInformationClass,
+#   __out      PVOID ProcessInformation,
+#   __in       ULONG ProcessInformationLength,
+#   __out_opt  PULONG ReturnLength
+# );
+def NtQueryInformationProcess(ProcessHandle, ProcessInformationClass, ProcessInformationLength = None):
+    _NtQueryInformationProcess = windll.ntdll.NtQueryInformationProcess
+    _NtQueryInformationProcess.argtypes = [HANDLE, PROCESSINFOCLASS, PVOID, ULONG, PULONG]
+    _NtQueryInformationProcess.restype = NTSTATUS
+    if ProcessInformationLength is not None:
+        ProcessInformation = ctypes.create_string_buffer("", ProcessInformationLength)
+    else:
+        if   ProcessInformationClass == ProcessBasicInformation:
+            ProcessInformation = PROCESS_BASIC_INFORMATION()
+            ProcessInformationLength = sizeof(PROCESS_BASIC_INFORMATION)
+        elif ProcessInformationClass == ProcessImageFileName:
+            unicode_buffer = ctypes.create_unicode_buffer(u"", 0x1000)
+            ProcessInformation = UNICODE_STRING(0, 0x1000, addressof(unicode_buffer))
+            ProcessInformationLength = sizeof(UNICODE_STRING)
+        elif ProcessInformationClass in (ProcessDebugPort, ProcessWow64Information, ProcessWx86Information, ProcessHandleCount, ProcessPriorityBoost):
+            ProcessInformation = DWORD()
+            ProcessInformationLength = sizeof(DWORD)
+        else:
+            raise Exception("Unknown ProcessInformationClass, use an explicit ProcessInformationLength value instead")
+    ReturnLength = ULONG(0)
+    ntstatus = _NtQueryInformationProcess(ProcessHandle, ProcessInformationClass, byref(ProcessInformation), ProcessInformationLength, byref(ReturnLength))
+    if ntstatus != 0:
+        raise ctypes.WinError( RtlNtStatusToDosError(ntstatus) )
+    if   ProcessInformationClass == ProcessBasicInformation:
+        retval = ProcessInformation
+    elif ProcessInformationClass in (ProcessDebugPort, ProcessWow64Information, ProcessWx86Information, ProcessHandleCount, ProcessPriorityBoost):
+        retval = ProcessInformation.value
+    elif ProcessInformationClass == ProcessImageFileName:
+        vptr = ctypes.c_void_p(ProcessInformation.Buffer)
+        cptr = ctypes.cast( vptr, ctypes.c_wchar * ProcessInformation.Length )
+        retval = cptr.contents.raw
+    else:
+        retval = ProcessInformation.raw[:ReturnLength.value]
+    return retval
+
+ZwQueryInformationProcess = NtQueryInformationProcess
+
+# NTSTATUS WINAPI NtQueryInformationThread(
+#   __in       HANDLE ThreadHandle,
+#   __in       THREADINFOCLASS ThreadInformationClass,
+#   __out      PVOID ThreadInformation,
+#   __in       ULONG ThreadInformationLength,
+#   __out_opt  PULONG ReturnLength
+# );
+def NtQueryInformationThread(ThreadHandle, ThreadInformationClass, ThreadInformationLength = None):
+    _NtQueryInformationThread = windll.ntdll.NtQueryInformationThread
+    _NtQueryInformationThread.argtypes = [HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG]
+    _NtQueryInformationThread.restype = NTSTATUS
+    if ThreadInformationLength is not None:
+        ThreadInformation = ctypes.create_string_buffer("", ThreadInformationLength)
+    else:
+        if   ThreadInformationClass == ThreadBasicInformation:
+            ThreadInformation = THREAD_BASIC_INFORMATION()
+        elif ThreadInformationClass == ThreadHideFromDebugger:
+            ThreadInformation = BOOLEAN()
+        elif ThreadInformationClass == ThreadQuerySetWin32StartAddress:
+            ThreadInformation = PVOID()
+        elif ThreadInformationClass in (ThreadAmILastThread, ThreadPriorityBoost):
+            ThreadInformation = DWORD()
+        elif ThreadInformationClass == ThreadPerformanceCount:
+            ThreadInformation = LONGLONG()  # LARGE_INTEGER
+        else:
+            raise Exception("Unknown ThreadInformationClass, use an explicit ThreadInformationLength value instead")
+        ThreadInformationLength = sizeof(ThreadInformation)
+    ReturnLength = ULONG(0)
+    ntstatus = _NtQueryInformationThread(ThreadHandle, ThreadInformationClass, byref(ThreadInformation), ThreadInformationLength, byref(ReturnLength))
+    if ntstatus != 0:
+        raise ctypes.WinError( RtlNtStatusToDosError(ntstatus) )
+    if   ThreadInformationClass == ThreadBasicInformation:
+        retval = ThreadInformation
+    elif ThreadInformationClass == ThreadHideFromDebugger:
+        retval = bool(ThreadInformation.value)
+    elif ThreadInformationClass in (ThreadQuerySetWin32StartAddress, ThreadAmILastThread, ThreadPriorityBoost, ThreadPerformanceCount):
+        retval = ThreadInformation.value
+    else:
+        retval = ThreadInformation.raw[:ReturnLength.value]
+    return retval
+
+ZwQueryInformationThread = NtQueryInformationThread
+
+# NTSTATUS
+#   NtQueryInformationFile(
+#     IN HANDLE  FileHandle,
+#     OUT PIO_STATUS_BLOCK  IoStatusBlock,
+#     OUT PVOID  FileInformation,
+#     IN ULONG  Length,
+#     IN FILE_INFORMATION_CLASS  FileInformationClass
+#     );
+def NtQueryInformationFile(FileHandle, FileInformationClass, FileInformation, Length):
+    _NtQueryInformationFile = windll.ntdll.NtQueryInformationFile
+    _NtQueryInformationFile.argtypes = [HANDLE, PIO_STATUS_BLOCK, PVOID, ULONG, DWORD]
+    _NtQueryInformationFile.restype = NTSTATUS
+    IoStatusBlock = IO_STATUS_BLOCK()
+    ntstatus = _NtQueryInformationFile(FileHandle, byref(IoStatusBlock), byref(FileInformation), Length, FileInformationClass)
+    if ntstatus != 0:
+        raise ctypes.WinError( RtlNtStatusToDosError(ntstatus) )
+    return IoStatusBlock
+
+ZwQueryInformationFile = NtQueryInformationFile
+
+# DWORD STDCALL CsrGetProcessId (VOID);
+def CsrGetProcessId():
+    _CsrGetProcessId = windll.ntdll.CsrGetProcessId
+    _CsrGetProcessId.argtypes = []
+    _CsrGetProcessId.restype = DWORD
+    return _CsrGetProcessId()
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/peb_teb.py b/scripts/win32/peb_teb.py
new file mode 100644
index 0000000..d5c2cde
--- /dev/null
+++ b/scripts/win32/peb_teb.py
@@ -0,0 +1,3433 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+PEB and TEB structures, constants and data types.
+"""
+
+from .defines import *  # NOQA
+from .version import os
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- PEB and TEB structures, constants and data types -------------------------
+
+# From http://www.nirsoft.net/kernel_struct/vista/CLIENT_ID.html
+#
+# typedef struct _CLIENT_ID
+# {
+#     PVOID UniqueProcess;
+#     PVOID UniqueThread;
+# } CLIENT_ID, *PCLIENT_ID;
+class CLIENT_ID(Structure):
+    _fields_ = [
+        ("UniqueProcess",   PVOID),
+        ("UniqueThread",    PVOID),
+]
+
+# From MSDN:
+#
+# typedef struct _LDR_DATA_TABLE_ENTRY {
+#     BYTE Reserved1[2];
+#     LIST_ENTRY InMemoryOrderLinks;
+#     PVOID Reserved2[2];
+#     PVOID DllBase;
+#     PVOID EntryPoint;
+#     PVOID Reserved3;
+#     UNICODE_STRING FullDllName;
+#     BYTE Reserved4[8];
+#     PVOID Reserved5[3];
+#     union {
+#         ULONG CheckSum;
+#         PVOID Reserved6;
+#     };
+#     ULONG TimeDateStamp;
+# } LDR_DATA_TABLE_ENTRY, *PLDR_DATA_TABLE_ENTRY;
+##class LDR_DATA_TABLE_ENTRY(Structure):
+##    _fields_ = [
+##        ("Reserved1",           BYTE * 2),
+##        ("InMemoryOrderLinks",  LIST_ENTRY),
+##        ("Reserved2",           PVOID * 2),
+##        ("DllBase",             PVOID),
+##        ("EntryPoint",          PVOID),
+##        ("Reserved3",           PVOID),
+##        ("FullDllName",           UNICODE_STRING),
+##        ("Reserved4",           BYTE * 8),
+##        ("Reserved5",           PVOID * 3),
+##        ("CheckSum",            ULONG),
+##        ("TimeDateStamp",       ULONG),
+##]
+
+# From MSDN:
+#
+# typedef struct _PEB_LDR_DATA {
+#   BYTE         Reserved1[8];
+#   PVOID        Reserved2[3];
+#   LIST_ENTRY   InMemoryOrderModuleList;
+# } PEB_LDR_DATA,
+#  *PPEB_LDR_DATA;
+##class PEB_LDR_DATA(Structure):
+##    _fields_ = [
+##        ("Reserved1",               BYTE),
+##        ("Reserved2",               PVOID),
+##        ("InMemoryOrderModuleList", LIST_ENTRY),
+##]
+
+# From http://undocumented.ntinternals.net/UserMode/Structures/RTL_USER_PROCESS_PARAMETERS.html
+# typedef struct _RTL_USER_PROCESS_PARAMETERS {
+#   ULONG                   MaximumLength;
+#   ULONG                   Length;
+#   ULONG                   Flags;
+#   ULONG                   DebugFlags;
+#   PVOID                   ConsoleHandle;
+#   ULONG                   ConsoleFlags;
+#   HANDLE                  StdInputHandle;
+#   HANDLE                  StdOutputHandle;
+#   HANDLE                  StdErrorHandle;
+#   UNICODE_STRING          CurrentDirectoryPath;
+#   HANDLE                  CurrentDirectoryHandle;
+#   UNICODE_STRING          DllPath;
+#   UNICODE_STRING          ImagePathName;
+#   UNICODE_STRING          CommandLine;
+#   PVOID                   Environment;
+#   ULONG                   StartingPositionLeft;
+#   ULONG                   StartingPositionTop;
+#   ULONG                   Width;
+#   ULONG                   Height;
+#   ULONG                   CharWidth;
+#   ULONG                   CharHeight;
+#   ULONG                   ConsoleTextAttributes;
+#   ULONG                   WindowFlags;
+#   ULONG                   ShowWindowFlags;
+#   UNICODE_STRING          WindowTitle;
+#   UNICODE_STRING          DesktopName;
+#   UNICODE_STRING          ShellInfo;
+#   UNICODE_STRING          RuntimeData;
+#   RTL_DRIVE_LETTER_CURDIR DLCurrentDirectory[0x20];
+# } RTL_USER_PROCESS_PARAMETERS, *PRTL_USER_PROCESS_PARAMETERS;
+
+# kd> dt _RTL_USER_PROCESS_PARAMETERS
+# ntdll!_RTL_USER_PROCESS_PARAMETERS
+#    +0x000 MaximumLength    : Uint4B
+#    +0x004 Length           : Uint4B
+#    +0x008 Flags            : Uint4B
+#    +0x00c DebugFlags       : Uint4B
+#    +0x010 ConsoleHandle    : Ptr32 Void
+#    +0x014 ConsoleFlags     : Uint4B
+#    +0x018 StandardInput    : Ptr32 Void
+#    +0x01c StandardOutput   : Ptr32 Void
+#    +0x020 StandardError    : Ptr32 Void
+#    +0x024 CurrentDirectory : _CURDIR
+#    +0x030 DllPath          : _UNICODE_STRING
+#    +0x038 ImagePathName    : _UNICODE_STRING
+#    +0x040 CommandLine      : _UNICODE_STRING
+#    +0x048 Environment      : Ptr32 Void
+#    +0x04c StartingX        : Uint4B
+#    +0x050 StartingY        : Uint4B
+#    +0x054 CountX           : Uint4B
+#    +0x058 CountY           : Uint4B
+#    +0x05c CountCharsX      : Uint4B
+#    +0x060 CountCharsY      : Uint4B
+#    +0x064 FillAttribute    : Uint4B
+#    +0x068 WindowFlags      : Uint4B
+#    +0x06c ShowWindowFlags  : Uint4B
+#    +0x070 WindowTitle      : _UNICODE_STRING
+#    +0x078 DesktopInfo      : _UNICODE_STRING
+#    +0x080 ShellInfo        : _UNICODE_STRING
+#    +0x088 RuntimeData      : _UNICODE_STRING
+#    +0x090 CurrentDirectores : [32] _RTL_DRIVE_LETTER_CURDIR
+#    +0x290 EnvironmentSize  : Uint4B
+##class RTL_USER_PROCESS_PARAMETERS(Structure):
+##    _fields_ = [
+##        ("MaximumLength",           ULONG),
+##        ("Length",                  ULONG),
+##        ("Flags",                   ULONG),
+##        ("DebugFlags",              ULONG),
+##        ("ConsoleHandle",           PVOID),
+##        ("ConsoleFlags",            ULONG),
+##        ("StandardInput",           HANDLE),
+##        ("StandardOutput",          HANDLE),
+##        ("StandardError",           HANDLE),
+##        ("CurrentDirectory",        CURDIR),
+##        ("DllPath",                 UNICODE_STRING),
+##        ("ImagePathName",           UNICODE_STRING),
+##        ("CommandLine",             UNICODE_STRING),
+##        ("Environment",             PVOID),
+##        ("StartingX",               ULONG),
+##        ("StartingY",               ULONG),
+##        ("CountX",                  ULONG),
+##        ("CountY",                  ULONG),
+##        ("CountCharsX",             ULONG),
+##        ("CountCharsY",             ULONG),
+##        ("FillAttribute",           ULONG),
+##        ("WindowFlags",             ULONG),
+##        ("ShowWindowFlags",         ULONG),
+##        ("WindowTitle",             UNICODE_STRING),
+##        ("DesktopInfo",             UNICODE_STRING),
+##        ("ShellInfo",               UNICODE_STRING),
+##        ("RuntimeData",             UNICODE_STRING),
+##        ("CurrentDirectores",       RTL_DRIVE_LETTER_CURDIR * 32), # typo here?
+##
+##        # Windows 2008 and Vista
+##        ("EnvironmentSize",         ULONG),
+##]
+##    @property
+##    def CurrentDirectories(self):
+##        return self.CurrentDirectores
+
+# From MSDN:
+#
+# typedef struct _RTL_USER_PROCESS_PARAMETERS {
+#   BYTE             Reserved1[16];
+#   PVOID            Reserved2[10];
+#   UNICODE_STRING   ImagePathName;
+#   UNICODE_STRING   CommandLine;
+# } RTL_USER_PROCESS_PARAMETERS,
+#  *PRTL_USER_PROCESS_PARAMETERS;
+class RTL_USER_PROCESS_PARAMETERS(Structure):
+    _fields_ = [
+        ("Reserved1",               BYTE * 16),
+        ("Reserved2",               PVOID * 10),
+        ("ImagePathName",           UNICODE_STRING),
+        ("CommandLine",             UNICODE_STRING),
+        ("Environment",             PVOID),             # undocumented!
+        #
+        # XXX TODO
+        # This structure should be defined with all undocumented fields for
+        # each version of Windows, just like it's being done for PEB and TEB.
+        #
+]
+
+PPS_POST_PROCESS_INIT_ROUTINE = PVOID
+
+#from MSDN:
+#
+# typedef struct _PEB {
+#     BYTE Reserved1[2];
+#     BYTE BeingDebugged;
+#     BYTE Reserved2[21];
+#     PPEB_LDR_DATA LoaderData;
+#     PRTL_USER_PROCESS_PARAMETERS ProcessParameters;
+#     BYTE Reserved3[520];
+#     PPS_POST_PROCESS_INIT_ROUTINE PostProcessInitRoutine;
+#     BYTE Reserved4[136];
+#     ULONG SessionId;
+# } PEB;
+##class PEB(Structure):
+##    _fields_ = [
+##        ("Reserved1",               BYTE * 2),
+##        ("BeingDebugged",           BYTE),
+##        ("Reserved2",               BYTE * 21),
+##        ("LoaderData",              PVOID,    # PPEB_LDR_DATA
+##        ("ProcessParameters",       PVOID,    # PRTL_USER_PROCESS_PARAMETERS
+##        ("Reserved3",               BYTE * 520),
+##        ("PostProcessInitRoutine",  PPS_POST_PROCESS_INIT_ROUTINE),
+##        ("Reserved4",               BYTE),
+##        ("SessionId",               ULONG),
+##]
+
+# from MSDN:
+#
+# typedef struct _TEB {
+#   BYTE    Reserved1[1952];
+#   PVOID   Reserved2[412];
+#   PVOID   TlsSlots[64];
+#   BYTE    Reserved3[8];
+#   PVOID   Reserved4[26];
+#   PVOID   ReservedForOle;
+#   PVOID   Reserved5[4];
+#   PVOID   TlsExpansionSlots;
+# } TEB,
+#  *PTEB;
+##class TEB(Structure):
+##    _fields_ = [
+##        ("Reserved1",           PVOID * 1952),
+##        ("Reserved2",           PVOID * 412),
+##        ("TlsSlots",            PVOID * 64),
+##        ("Reserved3",           BYTE  * 8),
+##        ("Reserved4",           PVOID * 26),
+##        ("ReservedForOle",      PVOID),
+##        ("Reserved5",           PVOID * 4),
+##        ("TlsExpansionSlots",   PVOID),
+##]
+
+# from http://undocumented.ntinternals.net/UserMode/Structures/LDR_MODULE.html
+#
+# typedef struct _LDR_MODULE {
+#   LIST_ENTRY InLoadOrderModuleList;
+#   LIST_ENTRY InMemoryOrderModuleList;
+#   LIST_ENTRY InInitializationOrderModuleList;
+#   PVOID BaseAddress;
+#   PVOID EntryPoint;
+#   ULONG SizeOfImage;
+#   UNICODE_STRING FullDllName;
+#   UNICODE_STRING BaseDllName;
+#   ULONG Flags;
+#   SHORT LoadCount;
+#   SHORT TlsIndex;
+#   LIST_ENTRY HashTableEntry;
+#   ULONG TimeDateStamp;
+# } LDR_MODULE, *PLDR_MODULE;
+class LDR_MODULE(Structure):
+    _fields_ = [
+        ("InLoadOrderModuleList",           LIST_ENTRY),
+        ("InMemoryOrderModuleList",         LIST_ENTRY),
+        ("InInitializationOrderModuleList", LIST_ENTRY),
+        ("BaseAddress",                     PVOID),
+        ("EntryPoint",                      PVOID),
+        ("SizeOfImage",                     ULONG),
+        ("FullDllName",                     UNICODE_STRING),
+        ("BaseDllName",                     UNICODE_STRING),
+        ("Flags",                           ULONG),
+        ("LoadCount",                       SHORT),
+        ("TlsIndex",                        SHORT),
+        ("HashTableEntry",                  LIST_ENTRY),
+        ("TimeDateStamp",                   ULONG),
+]
+
+# from http://undocumented.ntinternals.net/UserMode/Structures/PEB_LDR_DATA.html
+#
+# typedef struct _PEB_LDR_DATA {
+#   ULONG Length;
+#   BOOLEAN Initialized;
+#   PVOID SsHandle;
+#   LIST_ENTRY InLoadOrderModuleList;
+#   LIST_ENTRY InMemoryOrderModuleList;
+#   LIST_ENTRY InInitializationOrderModuleList;
+# } PEB_LDR_DATA, *PPEB_LDR_DATA;
+class PEB_LDR_DATA(Structure):
+    _fields_ = [
+        ("Length",                          ULONG),
+        ("Initialized",                     BOOLEAN),
+        ("SsHandle",                        PVOID),
+        ("InLoadOrderModuleList",           LIST_ENTRY),
+        ("InMemoryOrderModuleList",         LIST_ENTRY),
+        ("InInitializationOrderModuleList", LIST_ENTRY),
+]
+
+# From http://undocumented.ntinternals.net/UserMode/Undocumented%20Functions/NT%20Objects/Process/PEB_FREE_BLOCK.html
+#
+# typedef struct _PEB_FREE_BLOCK {
+#   PEB_FREE_BLOCK *Next;
+#   ULONG Size;
+# } PEB_FREE_BLOCK, *PPEB_FREE_BLOCK;
+class PEB_FREE_BLOCK(Structure):
+    pass
+
+##PPEB_FREE_BLOCK = POINTER(PEB_FREE_BLOCK)
+PPEB_FREE_BLOCK = PVOID
+
+PEB_FREE_BLOCK._fields_ = [
+        ("Next", PPEB_FREE_BLOCK),
+        ("Size", ULONG),
+]
+
+# From http://undocumented.ntinternals.net/UserMode/Structures/RTL_DRIVE_LETTER_CURDIR.html
+#
+# typedef struct _RTL_DRIVE_LETTER_CURDIR {
+#   USHORT Flags;
+#   USHORT Length;
+#   ULONG TimeStamp;
+#   UNICODE_STRING DosPath;
+# } RTL_DRIVE_LETTER_CURDIR, *PRTL_DRIVE_LETTER_CURDIR;
+class RTL_DRIVE_LETTER_CURDIR(Structure):
+    _fields_ = [
+        ("Flags",       USHORT),
+        ("Length",      USHORT),
+        ("TimeStamp",   ULONG),
+        ("DosPath",     UNICODE_STRING),
+]
+
+# From http://www.nirsoft.net/kernel_struct/vista/CURDIR.html
+#
+# typedef struct _CURDIR
+# {
+#      UNICODE_STRING DosPath;
+#      PVOID Handle;
+# } CURDIR, *PCURDIR;
+class CURDIR(Structure):
+    _fields_ = [
+        ("DosPath", UNICODE_STRING),
+        ("Handle",  PVOID),
+]
+
+# From http://www.nirsoft.net/kernel_struct/vista/RTL_CRITICAL_SECTION_DEBUG.html
+#
+# typedef struct _RTL_CRITICAL_SECTION_DEBUG
+# {
+#      WORD Type;
+#      WORD CreatorBackTraceIndex;
+#      PRTL_CRITICAL_SECTION CriticalSection;
+#      LIST_ENTRY ProcessLocksList;
+#      ULONG EntryCount;
+#      ULONG ContentionCount;
+#      ULONG Flags;
+#      WORD CreatorBackTraceIndexHigh;
+#      WORD SpareUSHORT;
+# } RTL_CRITICAL_SECTION_DEBUG, *PRTL_CRITICAL_SECTION_DEBUG;
+#
+# From http://www.nirsoft.net/kernel_struct/vista/RTL_CRITICAL_SECTION.html
+#
+# typedef struct _RTL_CRITICAL_SECTION
+# {
+#      PRTL_CRITICAL_SECTION_DEBUG DebugInfo;
+#      LONG LockCount;
+#      LONG RecursionCount;
+#      PVOID OwningThread;
+#      PVOID LockSemaphore;
+#      ULONG SpinCount;
+# } RTL_CRITICAL_SECTION, *PRTL_CRITICAL_SECTION;
+#
+class RTL_CRITICAL_SECTION(Structure):
+    _fields_ = [
+        ("DebugInfo",       PVOID),     # PRTL_CRITICAL_SECTION_DEBUG
+        ("LockCount",       LONG),
+        ("RecursionCount",  LONG),
+        ("OwningThread",    PVOID),
+        ("LockSemaphore",   PVOID),
+        ("SpinCount",       ULONG),
+]
+class RTL_CRITICAL_SECTION_DEBUG(Structure):
+    _fields_ = [
+        ("Type",                        WORD),
+        ("CreatorBackTraceIndex",       WORD),
+        ("CriticalSection",             PVOID),         # PRTL_CRITICAL_SECTION
+        ("ProcessLocksList",            LIST_ENTRY),
+        ("EntryCount",                  ULONG),
+        ("ContentionCount",             ULONG),
+        ("Flags",                       ULONG),
+        ("CreatorBackTraceIndexHigh",   WORD),
+        ("SpareUSHORT",                 WORD),
+]
+PRTL_CRITICAL_SECTION       = POINTER(RTL_CRITICAL_SECTION)
+PRTL_CRITICAL_SECTION_DEBUG = POINTER(RTL_CRITICAL_SECTION_DEBUG)
+
+PPEB_LDR_DATA                   = POINTER(PEB_LDR_DATA)
+PRTL_USER_PROCESS_PARAMETERS    = POINTER(RTL_USER_PROCESS_PARAMETERS)
+
+PPEBLOCKROUTINE                 = PVOID
+
+# BitField
+ImageUsesLargePages             = 1 << 0
+IsProtectedProcess              = 1 << 1
+IsLegacyProcess                 = 1 << 2
+IsImageDynamicallyRelocated     = 1 << 3
+SkipPatchingUser32Forwarders    = 1 << 4
+
+# CrossProcessFlags
+ProcessInJob                    = 1 << 0
+ProcessInitializing             = 1 << 1
+ProcessUsingVEH                 = 1 << 2
+ProcessUsingVCH                 = 1 << 3
+ProcessUsingFTH                 = 1 << 4
+
+# TracingFlags
+HeapTracingEnabled              = 1 << 0
+CritSecTracingEnabled           = 1 << 1
+
+# NtGlobalFlags
+FLG_VALID_BITS                  = 0x003FFFFF    # not a flag
+FLG_STOP_ON_EXCEPTION           = 0x00000001
+FLG_SHOW_LDR_SNAPS              = 0x00000002
+FLG_DEBUG_INITIAL_COMMAND       = 0x00000004
+FLG_STOP_ON_HUNG_GUI            = 0x00000008
+FLG_HEAP_ENABLE_TAIL_CHECK      = 0x00000010
+FLG_HEAP_ENABLE_FREE_CHECK      = 0x00000020
+FLG_HEAP_VALIDATE_PARAMETERS    = 0x00000040
+FLG_HEAP_VALIDATE_ALL           = 0x00000080
+FLG_POOL_ENABLE_TAIL_CHECK      = 0x00000100
+FLG_POOL_ENABLE_FREE_CHECK      = 0x00000200
+FLG_POOL_ENABLE_TAGGING         = 0x00000400
+FLG_HEAP_ENABLE_TAGGING         = 0x00000800
+FLG_USER_STACK_TRACE_DB         = 0x00001000
+FLG_KERNEL_STACK_TRACE_DB       = 0x00002000
+FLG_MAINTAIN_OBJECT_TYPELIST    = 0x00004000
+FLG_HEAP_ENABLE_TAG_BY_DLL      = 0x00008000
+FLG_IGNORE_DEBUG_PRIV           = 0x00010000
+FLG_ENABLE_CSRDEBUG             = 0x00020000
+FLG_ENABLE_KDEBUG_SYMBOL_LOAD   = 0x00040000
+FLG_DISABLE_PAGE_KERNEL_STACKS  = 0x00080000
+FLG_HEAP_ENABLE_CALL_TRACING    = 0x00100000
+FLG_HEAP_DISABLE_COALESCING     = 0x00200000
+FLG_ENABLE_CLOSE_EXCEPTION      = 0x00400000
+FLG_ENABLE_EXCEPTION_LOGGING    = 0x00800000
+FLG_ENABLE_HANDLE_TYPE_TAGGING  = 0x01000000
+FLG_HEAP_PAGE_ALLOCS            = 0x02000000
+FLG_DEBUG_WINLOGON              = 0x04000000
+FLG_ENABLE_DBGPRINT_BUFFERING   = 0x08000000
+FLG_EARLY_CRITICAL_SECTION_EVT  = 0x10000000
+FLG_DISABLE_DLL_VERIFICATION    = 0x80000000
+
+class _PEB_NT(Structure):
+    _pack_   = 4
+    _fields_ = [
+        ("InheritedAddressSpace",               BOOLEAN),
+        ("ReadImageFileExecOptions",            UCHAR),
+        ("BeingDebugged",                       BOOLEAN),
+        ("BitField",                            UCHAR),
+        ("Mutant",                              HANDLE),
+        ("ImageBaseAddress",                    PVOID),
+        ("Ldr",                                 PVOID), # PPEB_LDR_DATA
+        ("ProcessParameters",                   PVOID), # PRTL_USER_PROCESS_PARAMETERS
+        ("SubSystemData",                       PVOID),
+        ("ProcessHeap",                         PVOID),
+        ("FastPebLock",                         PVOID),
+        ("FastPebLockRoutine",                  PVOID), # PPEBLOCKROUTINE
+        ("FastPebUnlockRoutine",                PVOID), # PPEBLOCKROUTINE
+        ("EnvironmentUpdateCount",              ULONG),
+        ("KernelCallbackTable",                 PVOID), # Ptr32 Ptr32 Void
+        ("EventLogSection",                     PVOID),
+        ("EventLog",                            PVOID),
+        ("FreeList",                            PVOID), # PPEB_FREE_BLOCK
+        ("TlsExpansionCounter",                 ULONG),
+        ("TlsBitmap",                           PVOID),
+        ("TlsBitmapBits",                       ULONG * 2),
+        ("ReadOnlySharedMemoryBase",            PVOID),
+        ("ReadOnlySharedMemoryHeap",            PVOID),
+        ("ReadOnlyStaticServerData",            PVOID), # Ptr32 Ptr32 Void
+        ("AnsiCodePageData",                    PVOID),
+        ("OemCodePageData",                     PVOID),
+        ("UnicodeCaseTableData",                PVOID),
+        ("NumberOfProcessors",                  ULONG),
+        ("NtGlobalFlag",                        ULONG),
+        ("Spare2",                              BYTE * 4),
+        ("CriticalSectionTimeout",              LONGLONG),  # LARGE_INTEGER
+        ("HeapSegmentReserve",                  ULONG),
+        ("HeapSegmentCommit",                   ULONG),
+        ("HeapDeCommitTotalFreeThreshold",      ULONG),
+        ("HeapDeCommitFreeBlockThreshold",      ULONG),
+        ("NumberOfHeaps",                       ULONG),
+        ("MaximumNumberOfHeaps",                ULONG),
+        ("ProcessHeaps",                        PVOID), # Ptr32 Ptr32 Void
+        ("GdiSharedHandleTable",                PVOID),
+        ("ProcessStarterHelper",                PVOID),
+        ("GdiDCAttributeList",                  PVOID),
+        ("LoaderLock",                          PVOID), # PRTL_CRITICAL_SECTION
+        ("OSMajorVersion",                      ULONG),
+        ("OSMinorVersion",                      ULONG),
+        ("OSBuildNumber",                       ULONG),
+        ("OSPlatformId",                        ULONG),
+        ("ImageSubSystem",                      ULONG),
+        ("ImageSubSystemMajorVersion",          ULONG),
+        ("ImageSubSystemMinorVersion",          ULONG),
+        ("ImageProcessAffinityMask",            ULONG),
+        ("GdiHandleBuffer",                     ULONG * 34),
+        ("PostProcessInitRoutine",              PPS_POST_PROCESS_INIT_ROUTINE),
+        ("TlsExpansionBitmap",                  ULONG),
+        ("TlsExpansionBitmapBits",              BYTE * 128),
+        ("SessionId",                           ULONG),
+    ]
+
+# not really, but "dt _PEB" in w2k isn't working for me :(
+_PEB_2000 = _PEB_NT
+
+#    +0x000 InheritedAddressSpace : UChar
+#    +0x001 ReadImageFileExecOptions : UChar
+#    +0x002 BeingDebugged    : UChar
+#    +0x003 SpareBool        : UChar
+#    +0x004 Mutant           : Ptr32 Void
+#    +0x008 ImageBaseAddress : Ptr32 Void
+#    +0x00c Ldr              : Ptr32 _PEB_LDR_DATA
+#    +0x010 ProcessParameters : Ptr32 _RTL_USER_PROCESS_PARAMETERS
+#    +0x014 SubSystemData    : Ptr32 Void
+#    +0x018 ProcessHeap      : Ptr32 Void
+#    +0x01c FastPebLock      : Ptr32 _RTL_CRITICAL_SECTION
+#    +0x020 FastPebLockRoutine : Ptr32 Void
+#    +0x024 FastPebUnlockRoutine : Ptr32 Void
+#    +0x028 EnvironmentUpdateCount : Uint4B
+#    +0x02c KernelCallbackTable : Ptr32 Void
+#    +0x030 SystemReserved   : [1] Uint4B
+#    +0x034 AtlThunkSListPtr32 : Uint4B
+#    +0x038 FreeList         : Ptr32 _PEB_FREE_BLOCK
+#    +0x03c TlsExpansionCounter : Uint4B
+#    +0x040 TlsBitmap        : Ptr32 Void
+#    +0x044 TlsBitmapBits    : [2] Uint4B
+#    +0x04c ReadOnlySharedMemoryBase : Ptr32 Void
+#    +0x050 ReadOnlySharedMemoryHeap : Ptr32 Void
+#    +0x054 ReadOnlyStaticServerData : Ptr32 Ptr32 Void
+#    +0x058 AnsiCodePageData : Ptr32 Void
+#    +0x05c OemCodePageData  : Ptr32 Void
+#    +0x060 UnicodeCaseTableData : Ptr32 Void
+#    +0x064 NumberOfProcessors : Uint4B
+#    +0x068 NtGlobalFlag     : Uint4B
+#    +0x070 CriticalSectionTimeout : _LARGE_INTEGER
+#    +0x078 HeapSegmentReserve : Uint4B
+#    +0x07c HeapSegmentCommit : Uint4B
+#    +0x080 HeapDeCommitTotalFreeThreshold : Uint4B
+#    +0x084 HeapDeCommitFreeBlockThreshold : Uint4B
+#    +0x088 NumberOfHeaps    : Uint4B
+#    +0x08c MaximumNumberOfHeaps : Uint4B
+#    +0x090 ProcessHeaps     : Ptr32 Ptr32 Void
+#    +0x094 GdiSharedHandleTable : Ptr32 Void
+#    +0x098 ProcessStarterHelper : Ptr32 Void
+#    +0x09c GdiDCAttributeList : Uint4B
+#    +0x0a0 LoaderLock       : Ptr32 Void
+#    +0x0a4 OSMajorVersion   : Uint4B
+#    +0x0a8 OSMinorVersion   : Uint4B
+#    +0x0ac OSBuildNumber    : Uint2B
+#    +0x0ae OSCSDVersion     : Uint2B
+#    +0x0b0 OSPlatformId     : Uint4B
+#    +0x0b4 ImageSubsystem   : Uint4B
+#    +0x0b8 ImageSubsystemMajorVersion : Uint4B
+#    +0x0bc ImageSubsystemMinorVersion : Uint4B
+#    +0x0c0 ImageProcessAffinityMask : Uint4B
+#    +0x0c4 GdiHandleBuffer  : [34] Uint4B
+#    +0x14c PostProcessInitRoutine : Ptr32     void
+#    +0x150 TlsExpansionBitmap : Ptr32 Void
+#    +0x154 TlsExpansionBitmapBits : [32] Uint4B
+#    +0x1d4 SessionId        : Uint4B
+#    +0x1d8 AppCompatFlags   : _ULARGE_INTEGER
+#    +0x1e0 AppCompatFlagsUser : _ULARGE_INTEGER
+#    +0x1e8 pShimData        : Ptr32 Void
+#    +0x1ec AppCompatInfo    : Ptr32 Void
+#    +0x1f0 CSDVersion       : _UNICODE_STRING
+#    +0x1f8 ActivationContextData : Ptr32 Void
+#    +0x1fc ProcessAssemblyStorageMap : Ptr32 Void
+#    +0x200 SystemDefaultActivationContextData : Ptr32 Void
+#    +0x204 SystemAssemblyStorageMap : Ptr32 Void
+#    +0x208 MinimumStackCommit : Uint4B
+class _PEB_XP(Structure):
+    _pack_   = 8
+    _fields_ = [
+        ("InheritedAddressSpace",               BOOLEAN),
+        ("ReadImageFileExecOptions",            UCHAR),
+        ("BeingDebugged",                       BOOLEAN),
+        ("SpareBool",                           UCHAR),
+        ("Mutant",                              HANDLE),
+        ("ImageBaseAddress",                    PVOID),
+        ("Ldr",                                 PVOID), # PPEB_LDR_DATA
+        ("ProcessParameters",                   PVOID), # PRTL_USER_PROCESS_PARAMETERS
+        ("SubSystemData",                       PVOID),
+        ("ProcessHeap",                         PVOID),
+        ("FastPebLock",                         PVOID),
+        ("FastPebLockRoutine",                  PVOID),
+        ("FastPebUnlockRoutine",                PVOID),
+        ("EnvironmentUpdateCount",              DWORD),
+        ("KernelCallbackTable",                 PVOID),
+        ("SystemReserved",                      DWORD),
+        ("AtlThunkSListPtr32",                  DWORD),
+        ("FreeList",                            PVOID), # PPEB_FREE_BLOCK
+        ("TlsExpansionCounter",                 DWORD),
+        ("TlsBitmap",                           PVOID),
+        ("TlsBitmapBits",                       DWORD * 2),
+        ("ReadOnlySharedMemoryBase",            PVOID),
+        ("ReadOnlySharedMemoryHeap",            PVOID),
+        ("ReadOnlyStaticServerData",            PVOID), # Ptr32 Ptr32 Void
+        ("AnsiCodePageData",                    PVOID),
+        ("OemCodePageData",                     PVOID),
+        ("UnicodeCaseTableData",                PVOID),
+        ("NumberOfProcessors",                  DWORD),
+        ("NtGlobalFlag",                        DWORD),
+        ("CriticalSectionTimeout",              LONGLONG),  # LARGE_INTEGER
+        ("HeapSegmentReserve",                  DWORD),
+        ("HeapSegmentCommit",                   DWORD),
+        ("HeapDeCommitTotalFreeThreshold",      DWORD),
+        ("HeapDeCommitFreeBlockThreshold",      DWORD),
+        ("NumberOfHeaps",                       DWORD),
+        ("MaximumNumberOfHeaps",                DWORD),
+        ("ProcessHeaps",                        PVOID), # Ptr32 Ptr32 Void
+        ("GdiSharedHandleTable",                PVOID),
+        ("ProcessStarterHelper",                PVOID),
+        ("GdiDCAttributeList",                  DWORD),
+        ("LoaderLock",                          PVOID), # PRTL_CRITICAL_SECTION
+        ("OSMajorVersion",                      DWORD),
+        ("OSMinorVersion",                      DWORD),
+        ("OSBuildNumber",                       WORD),
+        ("OSCSDVersion",                        WORD),
+        ("OSPlatformId",                        DWORD),
+        ("ImageSubsystem",                      DWORD),
+        ("ImageSubsystemMajorVersion",          DWORD),
+        ("ImageSubsystemMinorVersion",          DWORD),
+        ("ImageProcessAffinityMask",            DWORD),
+        ("GdiHandleBuffer",                     DWORD * 34),
+        ("PostProcessInitRoutine",              PPS_POST_PROCESS_INIT_ROUTINE),
+        ("TlsExpansionBitmap",                  PVOID),
+        ("TlsExpansionBitmapBits",              DWORD * 32),
+        ("SessionId",                           DWORD),
+        ("AppCompatFlags",                      ULONGLONG), # ULARGE_INTEGER
+        ("AppCompatFlagsUser",                  ULONGLONG), # ULARGE_INTEGER
+        ("pShimData",                           PVOID),
+        ("AppCompatInfo",                       PVOID),
+        ("CSDVersion",                          UNICODE_STRING),
+        ("ActivationContextData",               PVOID), # ACTIVATION_CONTEXT_DATA
+        ("ProcessAssemblyStorageMap",           PVOID), # ASSEMBLY_STORAGE_MAP
+        ("SystemDefaultActivationContextData",  PVOID), # ACTIVATION_CONTEXT_DATA
+        ("SystemAssemblyStorageMap",            PVOID), # ASSEMBLY_STORAGE_MAP
+        ("MinimumStackCommit",                  DWORD),
+    ]
+
+#    +0x000 InheritedAddressSpace : UChar
+#    +0x001 ReadImageFileExecOptions : UChar
+#    +0x002 BeingDebugged    : UChar
+#    +0x003 BitField         : UChar
+#    +0x003 ImageUsesLargePages : Pos 0, 1 Bit
+#    +0x003 SpareBits        : Pos 1, 7 Bits
+#    +0x008 Mutant           : Ptr64 Void
+#    +0x010 ImageBaseAddress : Ptr64 Void
+#    +0x018 Ldr              : Ptr64 _PEB_LDR_DATA
+#    +0x020 ProcessParameters : Ptr64 _RTL_USER_PROCESS_PARAMETERS
+#    +0x028 SubSystemData    : Ptr64 Void
+#    +0x030 ProcessHeap      : Ptr64 Void
+#    +0x038 FastPebLock      : Ptr64 _RTL_CRITICAL_SECTION
+#    +0x040 AtlThunkSListPtr : Ptr64 Void
+#    +0x048 SparePtr2        : Ptr64 Void
+#    +0x050 EnvironmentUpdateCount : Uint4B
+#    +0x058 KernelCallbackTable : Ptr64 Void
+#    +0x060 SystemReserved   : [1] Uint4B
+#    +0x064 SpareUlong       : Uint4B
+#    +0x068 FreeList         : Ptr64 _PEB_FREE_BLOCK
+#    +0x070 TlsExpansionCounter : Uint4B
+#    +0x078 TlsBitmap        : Ptr64 Void
+#    +0x080 TlsBitmapBits    : [2] Uint4B
+#    +0x088 ReadOnlySharedMemoryBase : Ptr64 Void
+#    +0x090 ReadOnlySharedMemoryHeap : Ptr64 Void
+#    +0x098 ReadOnlyStaticServerData : Ptr64 Ptr64 Void
+#    +0x0a0 AnsiCodePageData : Ptr64 Void
+#    +0x0a8 OemCodePageData  : Ptr64 Void
+#    +0x0b0 UnicodeCaseTableData : Ptr64 Void
+#    +0x0b8 NumberOfProcessors : Uint4B
+#    +0x0bc NtGlobalFlag     : Uint4B
+#    +0x0c0 CriticalSectionTimeout : _LARGE_INTEGER
+#    +0x0c8 HeapSegmentReserve : Uint8B
+#    +0x0d0 HeapSegmentCommit : Uint8B
+#    +0x0d8 HeapDeCommitTotalFreeThreshold : Uint8B
+#    +0x0e0 HeapDeCommitFreeBlockThreshold : Uint8B
+#    +0x0e8 NumberOfHeaps    : Uint4B
+#    +0x0ec MaximumNumberOfHeaps : Uint4B
+#    +0x0f0 ProcessHeaps     : Ptr64 Ptr64 Void
+#    +0x0f8 GdiSharedHandleTable : Ptr64 Void
+#    +0x100 ProcessStarterHelper : Ptr64 Void
+#    +0x108 GdiDCAttributeList : Uint4B
+#    +0x110 LoaderLock       : Ptr64 _RTL_CRITICAL_SECTION
+#    +0x118 OSMajorVersion   : Uint4B
+#    +0x11c OSMinorVersion   : Uint4B
+#    +0x120 OSBuildNumber    : Uint2B
+#    +0x122 OSCSDVersion     : Uint2B
+#    +0x124 OSPlatformId     : Uint4B
+#    +0x128 ImageSubsystem   : Uint4B
+#    +0x12c ImageSubsystemMajorVersion : Uint4B
+#    +0x130 ImageSubsystemMinorVersion : Uint4B
+#    +0x138 ImageProcessAffinityMask : Uint8B
+#    +0x140 GdiHandleBuffer  : [60] Uint4B
+#    +0x230 PostProcessInitRoutine : Ptr64     void
+#    +0x238 TlsExpansionBitmap : Ptr64 Void
+#    +0x240 TlsExpansionBitmapBits : [32] Uint4B
+#    +0x2c0 SessionId        : Uint4B
+#    +0x2c8 AppCompatFlags   : _ULARGE_INTEGER
+#    +0x2d0 AppCompatFlagsUser : _ULARGE_INTEGER
+#    +0x2d8 pShimData        : Ptr64 Void
+#    +0x2e0 AppCompatInfo    : Ptr64 Void
+#    +0x2e8 CSDVersion       : _UNICODE_STRING
+#    +0x2f8 ActivationContextData : Ptr64 _ACTIVATION_CONTEXT_DATA
+#    +0x300 ProcessAssemblyStorageMap : Ptr64 _ASSEMBLY_STORAGE_MAP
+#    +0x308 SystemDefaultActivationContextData : Ptr64 _ACTIVATION_CONTEXT_DATA
+#    +0x310 SystemAssemblyStorageMap : Ptr64 _ASSEMBLY_STORAGE_MAP
+#    +0x318 MinimumStackCommit : Uint8B
+#    +0x320 FlsCallback      : Ptr64 Ptr64 Void
+#    +0x328 FlsListHead      : _LIST_ENTRY
+#    +0x338 FlsBitmap        : Ptr64 Void
+#    +0x340 FlsBitmapBits    : [4] Uint4B
+#    +0x350 FlsHighIndex     : Uint4B
+class _PEB_XP_64(Structure):
+    _pack_   = 8
+    _fields_ = [
+        ("InheritedAddressSpace",               BOOLEAN),
+        ("ReadImageFileExecOptions",            UCHAR),
+        ("BeingDebugged",                       BOOLEAN),
+        ("BitField",                            UCHAR),
+        ("Mutant",                              HANDLE),
+        ("ImageBaseAddress",                    PVOID),
+        ("Ldr",                                 PVOID), # PPEB_LDR_DATA
+        ("ProcessParameters",                   PVOID), # PRTL_USER_PROCESS_PARAMETERS
+        ("SubSystemData",                       PVOID),
+        ("ProcessHeap",                         PVOID),
+        ("FastPebLock",                         PVOID), # PRTL_CRITICAL_SECTION
+        ("AtlThunkSListPtr",                    PVOID),
+        ("SparePtr2",                           PVOID),
+        ("EnvironmentUpdateCount",              DWORD),
+        ("KernelCallbackTable",                 PVOID),
+        ("SystemReserved",                      DWORD),
+        ("SpareUlong",                          DWORD),
+        ("FreeList",                            PVOID), # PPEB_FREE_BLOCK
+        ("TlsExpansionCounter",                 DWORD),
+        ("TlsBitmap",                           PVOID),
+        ("TlsBitmapBits",                       DWORD * 2),
+        ("ReadOnlySharedMemoryBase",            PVOID),
+        ("ReadOnlySharedMemoryHeap",            PVOID),
+        ("ReadOnlyStaticServerData",            PVOID), # Ptr64 Ptr64 Void
+        ("AnsiCodePageData",                    PVOID),
+        ("OemCodePageData",                     PVOID),
+        ("UnicodeCaseTableData",                PVOID),
+        ("NumberOfProcessors",                  DWORD),
+        ("NtGlobalFlag",                        DWORD),
+        ("CriticalSectionTimeout",              LONGLONG),  # LARGE_INTEGER
+        ("HeapSegmentReserve",                  QWORD),
+        ("HeapSegmentCommit",                   QWORD),
+        ("HeapDeCommitTotalFreeThreshold",      QWORD),
+        ("HeapDeCommitFreeBlockThreshold",      QWORD),
+        ("NumberOfHeaps",                       DWORD),
+        ("MaximumNumberOfHeaps",                DWORD),
+        ("ProcessHeaps",                        PVOID), # Ptr64 Ptr64 Void
+        ("GdiSharedHandleTable",                PVOID),
+        ("ProcessStarterHelper",                PVOID),
+        ("GdiDCAttributeList",                  DWORD),
+        ("LoaderLock",                          PVOID), # PRTL_CRITICAL_SECTION
+        ("OSMajorVersion",                      DWORD),
+        ("OSMinorVersion",                      DWORD),
+        ("OSBuildNumber",                       WORD),
+        ("OSCSDVersion",                        WORD),
+        ("OSPlatformId",                        DWORD),
+        ("ImageSubsystem",                      DWORD),
+        ("ImageSubsystemMajorVersion",          DWORD),
+        ("ImageSubsystemMinorVersion",          DWORD),
+        ("ImageProcessAffinityMask",            QWORD),
+        ("GdiHandleBuffer",                     DWORD * 60),
+        ("PostProcessInitRoutine",              PPS_POST_PROCESS_INIT_ROUTINE),
+        ("TlsExpansionBitmap",                  PVOID),
+        ("TlsExpansionBitmapBits",              DWORD * 32),
+        ("SessionId",                           DWORD),
+        ("AppCompatFlags",                      ULONGLONG), # ULARGE_INTEGER
+        ("AppCompatFlagsUser",                  ULONGLONG), # ULARGE_INTEGER
+        ("pShimData",                           PVOID),
+        ("AppCompatInfo",                       PVOID),
+        ("CSDVersion",                          UNICODE_STRING),
+        ("ActivationContextData",               PVOID), # ACTIVATION_CONTEXT_DATA
+        ("ProcessAssemblyStorageMap",           PVOID), # ASSEMBLY_STORAGE_MAP
+        ("SystemDefaultActivationContextData",  PVOID), # ACTIVATION_CONTEXT_DATA
+        ("SystemAssemblyStorageMap",            PVOID), # ASSEMBLY_STORAGE_MAP
+        ("MinimumStackCommit",                  QWORD),
+        ("FlsCallback",                         PVOID), # Ptr64 Ptr64 Void
+        ("FlsListHead",                         LIST_ENTRY),
+        ("FlsBitmap",                           PVOID),
+        ("FlsBitmapBits",                       DWORD * 4),
+        ("FlsHighIndex",                        DWORD),
+    ]
+
+#    +0x000 InheritedAddressSpace : UChar
+#    +0x001 ReadImageFileExecOptions : UChar
+#    +0x002 BeingDebugged    : UChar
+#    +0x003 BitField         : UChar
+#    +0x003 ImageUsesLargePages : Pos 0, 1 Bit
+#    +0x003 SpareBits        : Pos 1, 7 Bits
+#    +0x004 Mutant           : Ptr32 Void
+#    +0x008 ImageBaseAddress : Ptr32 Void
+#    +0x00c Ldr              : Ptr32 _PEB_LDR_DATA
+#    +0x010 ProcessParameters : Ptr32 _RTL_USER_PROCESS_PARAMETERS
+#    +0x014 SubSystemData    : Ptr32 Void
+#    +0x018 ProcessHeap      : Ptr32 Void
+#    +0x01c FastPebLock      : Ptr32 _RTL_CRITICAL_SECTION
+#    +0x020 AtlThunkSListPtr : Ptr32 Void
+#    +0x024 SparePtr2        : Ptr32 Void
+#    +0x028 EnvironmentUpdateCount : Uint4B
+#    +0x02c KernelCallbackTable : Ptr32 Void
+#    +0x030 SystemReserved   : [1] Uint4B
+#    +0x034 SpareUlong       : Uint4B
+#    +0x038 FreeList         : Ptr32 _PEB_FREE_BLOCK
+#    +0x03c TlsExpansionCounter : Uint4B
+#    +0x040 TlsBitmap        : Ptr32 Void
+#    +0x044 TlsBitmapBits    : [2] Uint4B
+#    +0x04c ReadOnlySharedMemoryBase : Ptr32 Void
+#    +0x050 ReadOnlySharedMemoryHeap : Ptr32 Void
+#    +0x054 ReadOnlyStaticServerData : Ptr32 Ptr32 Void
+#    +0x058 AnsiCodePageData : Ptr32 Void
+#    +0x05c OemCodePageData  : Ptr32 Void
+#    +0x060 UnicodeCaseTableData : Ptr32 Void
+#    +0x064 NumberOfProcessors : Uint4B
+#    +0x068 NtGlobalFlag     : Uint4B
+#    +0x070 CriticalSectionTimeout : _LARGE_INTEGER
+#    +0x078 HeapSegmentReserve : Uint4B
+#    +0x07c HeapSegmentCommit : Uint4B
+#    +0x080 HeapDeCommitTotalFreeThreshold : Uint4B
+#    +0x084 HeapDeCommitFreeBlockThreshold : Uint4B
+#    +0x088 NumberOfHeaps    : Uint4B
+#    +0x08c MaximumNumberOfHeaps : Uint4B
+#    +0x090 ProcessHeaps     : Ptr32 Ptr32 Void
+#    +0x094 GdiSharedHandleTable : Ptr32 Void
+#    +0x098 ProcessStarterHelper : Ptr32 Void
+#    +0x09c GdiDCAttributeList : Uint4B
+#    +0x0a0 LoaderLock       : Ptr32 _RTL_CRITICAL_SECTION
+#    +0x0a4 OSMajorVersion   : Uint4B
+#    +0x0a8 OSMinorVersion   : Uint4B
+#    +0x0ac OSBuildNumber    : Uint2B
+#    +0x0ae OSCSDVersion     : Uint2B
+#    +0x0b0 OSPlatformId     : Uint4B
+#    +0x0b4 ImageSubsystem   : Uint4B
+#    +0x0b8 ImageSubsystemMajorVersion : Uint4B
+#    +0x0bc ImageSubsystemMinorVersion : Uint4B
+#    +0x0c0 ImageProcessAffinityMask : Uint4B
+#    +0x0c4 GdiHandleBuffer  : [34] Uint4B
+#    +0x14c PostProcessInitRoutine : Ptr32     void
+#    +0x150 TlsExpansionBitmap : Ptr32 Void
+#    +0x154 TlsExpansionBitmapBits : [32] Uint4B
+#    +0x1d4 SessionId        : Uint4B
+#    +0x1d8 AppCompatFlags   : _ULARGE_INTEGER
+#    +0x1e0 AppCompatFlagsUser : _ULARGE_INTEGER
+#    +0x1e8 pShimData        : Ptr32 Void
+#    +0x1ec AppCompatInfo    : Ptr32 Void
+#    +0x1f0 CSDVersion       : _UNICODE_STRING
+#    +0x1f8 ActivationContextData : Ptr32 _ACTIVATION_CONTEXT_DATA
+#    +0x1fc ProcessAssemblyStorageMap : Ptr32 _ASSEMBLY_STORAGE_MAP
+#    +0x200 SystemDefaultActivationContextData : Ptr32 _ACTIVATION_CONTEXT_DATA
+#    +0x204 SystemAssemblyStorageMap : Ptr32 _ASSEMBLY_STORAGE_MAP
+#    +0x208 MinimumStackCommit : Uint4B
+#    +0x20c FlsCallback      : Ptr32 Ptr32 Void
+#    +0x210 FlsListHead      : _LIST_ENTRY
+#    +0x218 FlsBitmap        : Ptr32 Void
+#    +0x21c FlsBitmapBits    : [4] Uint4B
+#    +0x22c FlsHighIndex     : Uint4B
+class _PEB_2003(Structure):
+    _pack_   = 8
+    _fields_ = [
+        ("InheritedAddressSpace",               BOOLEAN),
+        ("ReadImageFileExecOptions",            UCHAR),
+        ("BeingDebugged",                       BOOLEAN),
+        ("BitField",                            UCHAR),
+        ("Mutant",                              HANDLE),
+        ("ImageBaseAddress",                    PVOID),
+        ("Ldr",                                 PVOID), # PPEB_LDR_DATA
+        ("ProcessParameters",                   PVOID), # PRTL_USER_PROCESS_PARAMETERS
+        ("SubSystemData",                       PVOID),
+        ("ProcessHeap",                         PVOID),
+        ("FastPebLock",                         PVOID), # PRTL_CRITICAL_SECTION
+        ("AtlThunkSListPtr",                    PVOID),
+        ("SparePtr2",                           PVOID),
+        ("EnvironmentUpdateCount",              DWORD),
+        ("KernelCallbackTable",                 PVOID),
+        ("SystemReserved",                      DWORD),
+        ("SpareUlong",                          DWORD),
+        ("FreeList",                            PVOID), # PPEB_FREE_BLOCK
+        ("TlsExpansionCounter",                 DWORD),
+        ("TlsBitmap",                           PVOID),
+        ("TlsBitmapBits",                       DWORD * 2),
+        ("ReadOnlySharedMemoryBase",            PVOID),
+        ("ReadOnlySharedMemoryHeap",            PVOID),
+        ("ReadOnlyStaticServerData",            PVOID), # Ptr32 Ptr32 Void
+        ("AnsiCodePageData",                    PVOID),
+        ("OemCodePageData",                     PVOID),
+        ("UnicodeCaseTableData",                PVOID),
+        ("NumberOfProcessors",                  DWORD),
+        ("NtGlobalFlag",                        DWORD),
+        ("CriticalSectionTimeout",              LONGLONG),  # LARGE_INTEGER
+        ("HeapSegmentReserve",                  DWORD),
+        ("HeapSegmentCommit",                   DWORD),
+        ("HeapDeCommitTotalFreeThreshold",      DWORD),
+        ("HeapDeCommitFreeBlockThreshold",      DWORD),
+        ("NumberOfHeaps",                       DWORD),
+        ("MaximumNumberOfHeaps",                DWORD),
+        ("ProcessHeaps",                        PVOID), # Ptr32 Ptr32 Void
+        ("GdiSharedHandleTable",                PVOID),
+        ("ProcessStarterHelper",                PVOID),
+        ("GdiDCAttributeList",                  DWORD),
+        ("LoaderLock",                          PVOID), # PRTL_CRITICAL_SECTION
+        ("OSMajorVersion",                      DWORD),
+        ("OSMinorVersion",                      DWORD),
+        ("OSBuildNumber",                       WORD),
+        ("OSCSDVersion",                        WORD),
+        ("OSPlatformId",                        DWORD),
+        ("ImageSubsystem",                      DWORD),
+        ("ImageSubsystemMajorVersion",          DWORD),
+        ("ImageSubsystemMinorVersion",          DWORD),
+        ("ImageProcessAffinityMask",            DWORD),
+        ("GdiHandleBuffer",                     DWORD * 34),
+        ("PostProcessInitRoutine",              PPS_POST_PROCESS_INIT_ROUTINE),
+        ("TlsExpansionBitmap",                  PVOID),
+        ("TlsExpansionBitmapBits",              DWORD * 32),
+        ("SessionId",                           DWORD),
+        ("AppCompatFlags",                      ULONGLONG), # ULARGE_INTEGER
+        ("AppCompatFlagsUser",                  ULONGLONG), # ULARGE_INTEGER
+        ("pShimData",                           PVOID),
+        ("AppCompatInfo",                       PVOID),
+        ("CSDVersion",                          UNICODE_STRING),
+        ("ActivationContextData",               PVOID), # ACTIVATION_CONTEXT_DATA
+        ("ProcessAssemblyStorageMap",           PVOID), # ASSEMBLY_STORAGE_MAP
+        ("SystemDefaultActivationContextData",  PVOID), # ACTIVATION_CONTEXT_DATA
+        ("SystemAssemblyStorageMap",            PVOID), # ASSEMBLY_STORAGE_MAP
+        ("MinimumStackCommit",                  QWORD),
+        ("FlsCallback",                         PVOID), # Ptr32 Ptr32 Void
+        ("FlsListHead",                         LIST_ENTRY),
+        ("FlsBitmap",                           PVOID),
+        ("FlsBitmapBits",                       DWORD * 4),
+        ("FlsHighIndex",                        DWORD),
+    ]
+
+_PEB_2003_64    = _PEB_XP_64
+_PEB_2003_R2    = _PEB_2003
+_PEB_2003_R2_64 = _PEB_2003_64
+
+#    +0x000 InheritedAddressSpace : UChar
+#    +0x001 ReadImageFileExecOptions : UChar
+#    +0x002 BeingDebugged    : UChar
+#    +0x003 BitField         : UChar
+#    +0x003 ImageUsesLargePages : Pos 0, 1 Bit
+#    +0x003 IsProtectedProcess : Pos 1, 1 Bit
+#    +0x003 IsLegacyProcess  : Pos 2, 1 Bit
+#    +0x003 IsImageDynamicallyRelocated : Pos 3, 1 Bit
+#    +0x003 SkipPatchingUser32Forwarders : Pos 4, 1 Bit
+#    +0x003 SpareBits        : Pos 5, 3 Bits
+#    +0x004 Mutant           : Ptr32 Void
+#    +0x008 ImageBaseAddress : Ptr32 Void
+#    +0x00c Ldr              : Ptr32 _PEB_LDR_DATA
+#    +0x010 ProcessParameters : Ptr32 _RTL_USER_PROCESS_PARAMETERS
+#    +0x014 SubSystemData    : Ptr32 Void
+#    +0x018 ProcessHeap      : Ptr32 Void
+#    +0x01c FastPebLock      : Ptr32 _RTL_CRITICAL_SECTION
+#    +0x020 AtlThunkSListPtr : Ptr32 Void
+#    +0x024 IFEOKey          : Ptr32 Void
+#    +0x028 CrossProcessFlags : Uint4B
+#    +0x028 ProcessInJob     : Pos 0, 1 Bit
+#    +0x028 ProcessInitializing : Pos 1, 1 Bit
+#    +0x028 ProcessUsingVEH  : Pos 2, 1 Bit
+#    +0x028 ProcessUsingVCH  : Pos 3, 1 Bit
+#    +0x028 ReservedBits0    : Pos 4, 28 Bits
+#    +0x02c KernelCallbackTable : Ptr32 Void
+#    +0x02c UserSharedInfoPtr : Ptr32 Void
+#    +0x030 SystemReserved   : [1] Uint4B
+#    +0x034 SpareUlong       : Uint4B
+#    +0x038 SparePebPtr0     : Uint4B
+#    +0x03c TlsExpansionCounter : Uint4B
+#    +0x040 TlsBitmap        : Ptr32 Void
+#    +0x044 TlsBitmapBits    : [2] Uint4B
+#    +0x04c ReadOnlySharedMemoryBase : Ptr32 Void
+#    +0x050 HotpatchInformation : Ptr32 Void
+#    +0x054 ReadOnlyStaticServerData : Ptr32 Ptr32 Void
+#    +0x058 AnsiCodePageData : Ptr32 Void
+#    +0x05c OemCodePageData  : Ptr32 Void
+#    +0x060 UnicodeCaseTableData : Ptr32 Void
+#    +0x064 NumberOfProcessors : Uint4B
+#    +0x068 NtGlobalFlag     : Uint4B
+#    +0x070 CriticalSectionTimeout : _LARGE_INTEGER
+#    +0x078 HeapSegmentReserve : Uint4B
+#    +0x07c HeapSegmentCommit : Uint4B
+#    +0x080 HeapDeCommitTotalFreeThreshold : Uint4B
+#    +0x084 HeapDeCommitFreeBlockThreshold : Uint4B
+#    +0x088 NumberOfHeaps    : Uint4B
+#    +0x08c MaximumNumberOfHeaps : Uint4B
+#    +0x090 ProcessHeaps     : Ptr32 Ptr32 Void
+#    +0x094 GdiSharedHandleTable : Ptr32 Void
+#    +0x098 ProcessStarterHelper : Ptr32 Void
+#    +0x09c GdiDCAttributeList : Uint4B
+#    +0x0a0 LoaderLock       : Ptr32 _RTL_CRITICAL_SECTION
+#    +0x0a4 OSMajorVersion   : Uint4B
+#    +0x0a8 OSMinorVersion   : Uint4B
+#    +0x0ac OSBuildNumber    : Uint2B
+#    +0x0ae OSCSDVersion     : Uint2B
+#    +0x0b0 OSPlatformId     : Uint4B
+#    +0x0b4 ImageSubsystem   : Uint4B
+#    +0x0b8 ImageSubsystemMajorVersion : Uint4B
+#    +0x0bc ImageSubsystemMinorVersion : Uint4B
+#    +0x0c0 ActiveProcessAffinityMask : Uint4B
+#    +0x0c4 GdiHandleBuffer  : [34] Uint4B
+#    +0x14c PostProcessInitRoutine : Ptr32     void
+#    +0x150 TlsExpansionBitmap : Ptr32 Void
+#    +0x154 TlsExpansionBitmapBits : [32] Uint4B
+#    +0x1d4 SessionId        : Uint4B
+#    +0x1d8 AppCompatFlags   : _ULARGE_INTEGER
+#    +0x1e0 AppCompatFlagsUser : _ULARGE_INTEGER
+#    +0x1e8 pShimData        : Ptr32 Void
+#    +0x1ec AppCompatInfo    : Ptr32 Void
+#    +0x1f0 CSDVersion       : _UNICODE_STRING
+#    +0x1f8 ActivationContextData : Ptr32 _ACTIVATION_CONTEXT_DATA
+#    +0x1fc ProcessAssemblyStorageMap : Ptr32 _ASSEMBLY_STORAGE_MAP
+#    +0x200 SystemDefaultActivationContextData : Ptr32 _ACTIVATION_CONTEXT_DATA
+#    +0x204 SystemAssemblyStorageMap : Ptr32 _ASSEMBLY_STORAGE_MAP
+#    +0x208 MinimumStackCommit : Uint4B
+#    +0x20c FlsCallback      : Ptr32 _FLS_CALLBACK_INFO
+#    +0x210 FlsListHead      : _LIST_ENTRY
+#    +0x218 FlsBitmap        : Ptr32 Void
+#    +0x21c FlsBitmapBits    : [4] Uint4B
+#    +0x22c FlsHighIndex     : Uint4B
+#    +0x230 WerRegistrationData : Ptr32 Void
+#    +0x234 WerShipAssertPtr : Ptr32 Void
+class _PEB_2008(Structure):
+    _pack_   = 8
+    _fields_ = [
+        ("InheritedAddressSpace",               BOOLEAN),
+        ("ReadImageFileExecOptions",            UCHAR),
+        ("BeingDebugged",                       BOOLEAN),
+        ("BitField",                            UCHAR),
+        ("Mutant",                              HANDLE),
+        ("ImageBaseAddress",                    PVOID),
+        ("Ldr",                                 PVOID), # PPEB_LDR_DATA
+        ("ProcessParameters",                   PVOID), # PRTL_USER_PROCESS_PARAMETERS
+        ("SubSystemData",                       PVOID),
+        ("ProcessHeap",                         PVOID),
+        ("FastPebLock",                         PVOID), # PRTL_CRITICAL_SECTION
+        ("AtlThunkSListPtr",                    PVOID),
+        ("IFEOKey",                             PVOID),
+        ("CrossProcessFlags",                   DWORD),
+        ("KernelCallbackTable",                 PVOID),
+        ("SystemReserved",                      DWORD),
+        ("SpareUlong",                          DWORD),
+        ("SparePebPtr0",                        PVOID),
+        ("TlsExpansionCounter",                 DWORD),
+        ("TlsBitmap",                           PVOID),
+        ("TlsBitmapBits",                       DWORD * 2),
+        ("ReadOnlySharedMemoryBase",            PVOID),
+        ("HotpatchInformation",                 PVOID),
+        ("ReadOnlyStaticServerData",            PVOID), # Ptr32 Ptr32 Void
+        ("AnsiCodePageData",                    PVOID),
+        ("OemCodePageData",                     PVOID),
+        ("UnicodeCaseTableData",                PVOID),
+        ("NumberOfProcessors",                  DWORD),
+        ("NtGlobalFlag",                        DWORD),
+        ("CriticalSectionTimeout",              LONGLONG),  # LARGE_INTEGER
+        ("HeapSegmentReserve",                  DWORD),
+        ("HeapSegmentCommit",                   DWORD),
+        ("HeapDeCommitTotalFreeThreshold",      DWORD),
+        ("HeapDeCommitFreeBlockThreshold",      DWORD),
+        ("NumberOfHeaps",                       DWORD),
+        ("MaximumNumberOfHeaps",                DWORD),
+        ("ProcessHeaps",                        PVOID), # Ptr32 Ptr32 Void
+        ("GdiSharedHandleTable",                PVOID),
+        ("ProcessStarterHelper",                PVOID),
+        ("GdiDCAttributeList",                  DWORD),
+        ("LoaderLock",                          PVOID), # PRTL_CRITICAL_SECTION
+        ("OSMajorVersion",                      DWORD),
+        ("OSMinorVersion",                      DWORD),
+        ("OSBuildNumber",                       WORD),
+        ("OSCSDVersion",                        WORD),
+        ("OSPlatformId",                        DWORD),
+        ("ImageSubsystem",                      DWORD),
+        ("ImageSubsystemMajorVersion",          DWORD),
+        ("ImageSubsystemMinorVersion",          DWORD),
+        ("ActiveProcessAffinityMask",           DWORD),
+        ("GdiHandleBuffer",                     DWORD * 34),
+        ("PostProcessInitRoutine",              PPS_POST_PROCESS_INIT_ROUTINE),
+        ("TlsExpansionBitmap",                  PVOID),
+        ("TlsExpansionBitmapBits",              DWORD * 32),
+        ("SessionId",                           DWORD),
+        ("AppCompatFlags",                      ULONGLONG), # ULARGE_INTEGER
+        ("AppCompatFlagsUser",                  ULONGLONG), # ULARGE_INTEGER
+        ("pShimData",                           PVOID),
+        ("AppCompatInfo",                       PVOID),
+        ("CSDVersion",                          UNICODE_STRING),
+        ("ActivationContextData",               PVOID), # ACTIVATION_CONTEXT_DATA
+        ("ProcessAssemblyStorageMap",           PVOID), # ASSEMBLY_STORAGE_MAP
+        ("SystemDefaultActivationContextData",  PVOID), # ACTIVATION_CONTEXT_DATA
+        ("SystemAssemblyStorageMap",            PVOID), # ASSEMBLY_STORAGE_MAP
+        ("MinimumStackCommit",                  DWORD),
+        ("FlsCallback",                         PVOID), # PFLS_CALLBACK_INFO
+        ("FlsListHead",                         LIST_ENTRY),
+        ("FlsBitmap",                           PVOID),
+        ("FlsBitmapBits",                       DWORD * 4),
+        ("FlsHighIndex",                        DWORD),
+        ("WerRegistrationData",                 PVOID),
+        ("WerShipAssertPtr",                    PVOID),
+    ]
+    def __get_UserSharedInfoPtr(self):
+        return self.KernelCallbackTable
+    def __set_UserSharedInfoPtr(self, value):
+        self.KernelCallbackTable = value
+    UserSharedInfoPtr = property(__get_UserSharedInfoPtr, __set_UserSharedInfoPtr)
+
+#    +0x000 InheritedAddressSpace : UChar
+#    +0x001 ReadImageFileExecOptions : UChar
+#    +0x002 BeingDebugged    : UChar
+#    +0x003 BitField         : UChar
+#    +0x003 ImageUsesLargePages : Pos 0, 1 Bit
+#    +0x003 IsProtectedProcess : Pos 1, 1 Bit
+#    +0x003 IsLegacyProcess  : Pos 2, 1 Bit
+#    +0x003 IsImageDynamicallyRelocated : Pos 3, 1 Bit
+#    +0x003 SkipPatchingUser32Forwarders : Pos 4, 1 Bit
+#    +0x003 SpareBits        : Pos 5, 3 Bits
+#    +0x008 Mutant           : Ptr64 Void
+#    +0x010 ImageBaseAddress : Ptr64 Void
+#    +0x018 Ldr              : Ptr64 _PEB_LDR_DATA
+#    +0x020 ProcessParameters : Ptr64 _RTL_USER_PROCESS_PARAMETERS
+#    +0x028 SubSystemData    : Ptr64 Void
+#    +0x030 ProcessHeap      : Ptr64 Void
+#    +0x038 FastPebLock      : Ptr64 _RTL_CRITICAL_SECTION
+#    +0x040 AtlThunkSListPtr : Ptr64 Void
+#    +0x048 IFEOKey          : Ptr64 Void
+#    +0x050 CrossProcessFlags : Uint4B
+#    +0x050 ProcessInJob     : Pos 0, 1 Bit
+#    +0x050 ProcessInitializing : Pos 1, 1 Bit
+#    +0x050 ProcessUsingVEH  : Pos 2, 1 Bit
+#    +0x050 ProcessUsingVCH  : Pos 3, 1 Bit
+#    +0x050 ReservedBits0    : Pos 4, 28 Bits
+#    +0x058 KernelCallbackTable : Ptr64 Void
+#    +0x058 UserSharedInfoPtr : Ptr64 Void
+#    +0x060 SystemReserved   : [1] Uint4B
+#    +0x064 SpareUlong       : Uint4B
+#    +0x068 SparePebPtr0     : Uint8B
+#    +0x070 TlsExpansionCounter : Uint4B
+#    +0x078 TlsBitmap        : Ptr64 Void
+#    +0x080 TlsBitmapBits    : [2] Uint4B
+#    +0x088 ReadOnlySharedMemoryBase : Ptr64 Void
+#    +0x090 HotpatchInformation : Ptr64 Void
+#    +0x098 ReadOnlyStaticServerData : Ptr64 Ptr64 Void
+#    +0x0a0 AnsiCodePageData : Ptr64 Void
+#    +0x0a8 OemCodePageData  : Ptr64 Void
+#    +0x0b0 UnicodeCaseTableData : Ptr64 Void
+#    +0x0b8 NumberOfProcessors : Uint4B
+#    +0x0bc NtGlobalFlag     : Uint4B
+#    +0x0c0 CriticalSectionTimeout : _LARGE_INTEGER
+#    +0x0c8 HeapSegmentReserve : Uint8B
+#    +0x0d0 HeapSegmentCommit : Uint8B
+#    +0x0d8 HeapDeCommitTotalFreeThreshold : Uint8B
+#    +0x0e0 HeapDeCommitFreeBlockThreshold : Uint8B
+#    +0x0e8 NumberOfHeaps    : Uint4B
+#    +0x0ec MaximumNumberOfHeaps : Uint4B
+#    +0x0f0 ProcessHeaps     : Ptr64 Ptr64 Void
+#    +0x0f8 GdiSharedHandleTable : Ptr64 Void
+#    +0x100 ProcessStarterHelper : Ptr64 Void
+#    +0x108 GdiDCAttributeList : Uint4B
+#    +0x110 LoaderLock       : Ptr64 _RTL_CRITICAL_SECTION
+#    +0x118 OSMajorVersion   : Uint4B
+#    +0x11c OSMinorVersion   : Uint4B
+#    +0x120 OSBuildNumber    : Uint2B
+#    +0x122 OSCSDVersion     : Uint2B
+#    +0x124 OSPlatformId     : Uint4B
+#    +0x128 ImageSubsystem   : Uint4B
+#    +0x12c ImageSubsystemMajorVersion : Uint4B
+#    +0x130 ImageSubsystemMinorVersion : Uint4B
+#    +0x138 ActiveProcessAffinityMask : Uint8B
+#    +0x140 GdiHandleBuffer  : [60] Uint4B
+#    +0x230 PostProcessInitRoutine : Ptr64     void
+#    +0x238 TlsExpansionBitmap : Ptr64 Void
+#    +0x240 TlsExpansionBitmapBits : [32] Uint4B
+#    +0x2c0 SessionId        : Uint4B
+#    +0x2c8 AppCompatFlags   : _ULARGE_INTEGER
+#    +0x2d0 AppCompatFlagsUser : _ULARGE_INTEGER
+#    +0x2d8 pShimData        : Ptr64 Void
+#    +0x2e0 AppCompatInfo    : Ptr64 Void
+#    +0x2e8 CSDVersion       : _UNICODE_STRING
+#    +0x2f8 ActivationContextData : Ptr64 _ACTIVATION_CONTEXT_DATA
+#    +0x300 ProcessAssemblyStorageMap : Ptr64 _ASSEMBLY_STORAGE_MAP
+#    +0x308 SystemDefaultActivationContextData : Ptr64 _ACTIVATION_CONTEXT_DATA
+#    +0x310 SystemAssemblyStorageMap : Ptr64 _ASSEMBLY_STORAGE_MAP
+#    +0x318 MinimumStackCommit : Uint8B
+#    +0x320 FlsCallback      : Ptr64 _FLS_CALLBACK_INFO
+#    +0x328 FlsListHead      : _LIST_ENTRY
+#    +0x338 FlsBitmap        : Ptr64 Void
+#    +0x340 FlsBitmapBits    : [4] Uint4B
+#    +0x350 FlsHighIndex     : Uint4B
+#    +0x358 WerRegistrationData : Ptr64 Void
+#    +0x360 WerShipAssertPtr : Ptr64 Void
+class _PEB_2008_64(Structure):
+    _pack_   = 8
+    _fields_ = [
+        ("InheritedAddressSpace",               BOOLEAN),
+        ("ReadImageFileExecOptions",            UCHAR),
+        ("BeingDebugged",                       BOOLEAN),
+        ("BitField",                            UCHAR),
+        ("Mutant",                              HANDLE),
+        ("ImageBaseAddress",                    PVOID),
+        ("Ldr",                                 PVOID), # PPEB_LDR_DATA
+        ("ProcessParameters",                   PVOID), # PRTL_USER_PROCESS_PARAMETERS
+        ("SubSystemData",                       PVOID),
+        ("ProcessHeap",                         PVOID),
+        ("FastPebLock",                         PVOID), # PRTL_CRITICAL_SECTION
+        ("AtlThunkSListPtr",                    PVOID),
+        ("IFEOKey",                             PVOID),
+        ("CrossProcessFlags",                   DWORD),
+        ("KernelCallbackTable",                 PVOID),
+        ("SystemReserved",                      DWORD),
+        ("SpareUlong",                          DWORD),
+        ("SparePebPtr0",                        PVOID),
+        ("TlsExpansionCounter",                 DWORD),
+        ("TlsBitmap",                           PVOID),
+        ("TlsBitmapBits",                       DWORD * 2),
+        ("ReadOnlySharedMemoryBase",            PVOID),
+        ("HotpatchInformation",                 PVOID),
+        ("ReadOnlyStaticServerData",            PVOID), # Ptr64 Ptr64 Void
+        ("AnsiCodePageData",                    PVOID),
+        ("OemCodePageData",                     PVOID),
+        ("UnicodeCaseTableData",                PVOID),
+        ("NumberOfProcessors",                  DWORD),
+        ("NtGlobalFlag",                        DWORD),
+        ("CriticalSectionTimeout",              LONGLONG),  # LARGE_INTEGER
+        ("HeapSegmentReserve",                  QWORD),
+        ("HeapSegmentCommit",                   QWORD),
+        ("HeapDeCommitTotalFreeThreshold",      QWORD),
+        ("HeapDeCommitFreeBlockThreshold",      QWORD),
+        ("NumberOfHeaps",                       DWORD),
+        ("MaximumNumberOfHeaps",                DWORD),
+        ("ProcessHeaps",                        PVOID), # Ptr64 Ptr64 Void
+        ("GdiSharedHandleTable",                PVOID),
+        ("ProcessStarterHelper",                PVOID),
+        ("GdiDCAttributeList",                  DWORD),
+        ("LoaderLock",                          PVOID), # PRTL_CRITICAL_SECTION
+        ("OSMajorVersion",                      DWORD),
+        ("OSMinorVersion",                      DWORD),
+        ("OSBuildNumber",                       WORD),
+        ("OSCSDVersion",                        WORD),
+        ("OSPlatformId",                        DWORD),
+        ("ImageSubsystem",                      DWORD),
+        ("ImageSubsystemMajorVersion",          DWORD),
+        ("ImageSubsystemMinorVersion",          DWORD),
+        ("ActiveProcessAffinityMask",           QWORD),
+        ("GdiHandleBuffer",                     DWORD * 60),
+        ("PostProcessInitRoutine",              PPS_POST_PROCESS_INIT_ROUTINE),
+        ("TlsExpansionBitmap",                  PVOID),
+        ("TlsExpansionBitmapBits",              DWORD * 32),
+        ("SessionId",                           DWORD),
+        ("AppCompatFlags",                      ULONGLONG), # ULARGE_INTEGER
+        ("AppCompatFlagsUser",                  ULONGLONG), # ULARGE_INTEGER
+        ("pShimData",                           PVOID),
+        ("AppCompatInfo",                       PVOID),
+        ("CSDVersion",                          UNICODE_STRING),
+        ("ActivationContextData",               PVOID), # ACTIVATION_CONTEXT_DATA
+        ("ProcessAssemblyStorageMap",           PVOID), # ASSEMBLY_STORAGE_MAP
+        ("SystemDefaultActivationContextData",  PVOID), # ACTIVATION_CONTEXT_DATA
+        ("SystemAssemblyStorageMap",            PVOID), # ASSEMBLY_STORAGE_MAP
+        ("MinimumStackCommit",                  QWORD),
+        ("FlsCallback",                         PVOID), # PFLS_CALLBACK_INFO
+        ("FlsListHead",                         LIST_ENTRY),
+        ("FlsBitmap",                           PVOID),
+        ("FlsBitmapBits",                       DWORD * 4),
+        ("FlsHighIndex",                        DWORD),
+        ("WerRegistrationData",                 PVOID),
+        ("WerShipAssertPtr",                    PVOID),
+    ]
+    def __get_UserSharedInfoPtr(self):
+        return self.KernelCallbackTable
+    def __set_UserSharedInfoPtr(self, value):
+        self.KernelCallbackTable = value
+    UserSharedInfoPtr = property(__get_UserSharedInfoPtr, __set_UserSharedInfoPtr)
+
+#    +0x000 InheritedAddressSpace : UChar
+#    +0x001 ReadImageFileExecOptions : UChar
+#    +0x002 BeingDebugged    : UChar
+#    +0x003 BitField         : UChar
+#    +0x003 ImageUsesLargePages : Pos 0, 1 Bit
+#    +0x003 IsProtectedProcess : Pos 1, 1 Bit
+#    +0x003 IsLegacyProcess  : Pos 2, 1 Bit
+#    +0x003 IsImageDynamicallyRelocated : Pos 3, 1 Bit
+#    +0x003 SkipPatchingUser32Forwarders : Pos 4, 1 Bit
+#    +0x003 SpareBits        : Pos 5, 3 Bits
+#    +0x004 Mutant           : Ptr32 Void
+#    +0x008 ImageBaseAddress : Ptr32 Void
+#    +0x00c Ldr              : Ptr32 _PEB_LDR_DATA
+#    +0x010 ProcessParameters : Ptr32 _RTL_USER_PROCESS_PARAMETERS
+#    +0x014 SubSystemData    : Ptr32 Void
+#    +0x018 ProcessHeap      : Ptr32 Void
+#    +0x01c FastPebLock      : Ptr32 _RTL_CRITICAL_SECTION
+#    +0x020 AtlThunkSListPtr : Ptr32 Void
+#    +0x024 IFEOKey          : Ptr32 Void
+#    +0x028 CrossProcessFlags : Uint4B
+#    +0x028 ProcessInJob     : Pos 0, 1 Bit
+#    +0x028 ProcessInitializing : Pos 1, 1 Bit
+#    +0x028 ProcessUsingVEH  : Pos 2, 1 Bit
+#    +0x028 ProcessUsingVCH  : Pos 3, 1 Bit
+#    +0x028 ProcessUsingFTH  : Pos 4, 1 Bit
+#    +0x028 ReservedBits0    : Pos 5, 27 Bits
+#    +0x02c KernelCallbackTable : Ptr32 Void
+#    +0x02c UserSharedInfoPtr : Ptr32 Void
+#    +0x030 SystemReserved   : [1] Uint4B
+#    +0x034 AtlThunkSListPtr32 : Uint4B
+#    +0x038 ApiSetMap        : Ptr32 Void
+#    +0x03c TlsExpansionCounter : Uint4B
+#    +0x040 TlsBitmap        : Ptr32 Void
+#    +0x044 TlsBitmapBits    : [2] Uint4B
+#    +0x04c ReadOnlySharedMemoryBase : Ptr32 Void
+#    +0x050 HotpatchInformation : Ptr32 Void
+#    +0x054 ReadOnlyStaticServerData : Ptr32 Ptr32 Void
+#    +0x058 AnsiCodePageData : Ptr32 Void
+#    +0x05c OemCodePageData  : Ptr32 Void
+#    +0x060 UnicodeCaseTableData : Ptr32 Void
+#    +0x064 NumberOfProcessors : Uint4B
+#    +0x068 NtGlobalFlag     : Uint4B
+#    +0x070 CriticalSectionTimeout : _LARGE_INTEGER
+#    +0x078 HeapSegmentReserve : Uint4B
+#    +0x07c HeapSegmentCommit : Uint4B
+#    +0x080 HeapDeCommitTotalFreeThreshold : Uint4B
+#    +0x084 HeapDeCommitFreeBlockThreshold : Uint4B
+#    +0x088 NumberOfHeaps    : Uint4B
+#    +0x08c MaximumNumberOfHeaps : Uint4B
+#    +0x090 ProcessHeaps     : Ptr32 Ptr32 Void
+#    +0x094 GdiSharedHandleTable : Ptr32 Void
+#    +0x098 ProcessStarterHelper : Ptr32 Void
+#    +0x09c GdiDCAttributeList : Uint4B
+#    +0x0a0 LoaderLock       : Ptr32 _RTL_CRITICAL_SECTION
+#    +0x0a4 OSMajorVersion   : Uint4B
+#    +0x0a8 OSMinorVersion   : Uint4B
+#    +0x0ac OSBuildNumber    : Uint2B
+#    +0x0ae OSCSDVersion     : Uint2B
+#    +0x0b0 OSPlatformId     : Uint4B
+#    +0x0b4 ImageSubsystem   : Uint4B
+#    +0x0b8 ImageSubsystemMajorVersion : Uint4B
+#    +0x0bc ImageSubsystemMinorVersion : Uint4B
+#    +0x0c0 ActiveProcessAffinityMask : Uint4B
+#    +0x0c4 GdiHandleBuffer  : [34] Uint4B
+#    +0x14c PostProcessInitRoutine : Ptr32     void
+#    +0x150 TlsExpansionBitmap : Ptr32 Void
+#    +0x154 TlsExpansionBitmapBits : [32] Uint4B
+#    +0x1d4 SessionId        : Uint4B
+#    +0x1d8 AppCompatFlags   : _ULARGE_INTEGER
+#    +0x1e0 AppCompatFlagsUser : _ULARGE_INTEGER
+#    +0x1e8 pShimData        : Ptr32 Void
+#    +0x1ec AppCompatInfo    : Ptr32 Void
+#    +0x1f0 CSDVersion       : _UNICODE_STRING
+#    +0x1f8 ActivationContextData : Ptr32 _ACTIVATION_CONTEXT_DATA
+#    +0x1fc ProcessAssemblyStorageMap : Ptr32 _ASSEMBLY_STORAGE_MAP
+#    +0x200 SystemDefaultActivationContextData : Ptr32 _ACTIVATION_CONTEXT_DATA
+#    +0x204 SystemAssemblyStorageMap : Ptr32 _ASSEMBLY_STORAGE_MAP
+#    +0x208 MinimumStackCommit : Uint4B
+#    +0x20c FlsCallback      : Ptr32 _FLS_CALLBACK_INFO
+#    +0x210 FlsListHead      : _LIST_ENTRY
+#    +0x218 FlsBitmap        : Ptr32 Void
+#    +0x21c FlsBitmapBits    : [4] Uint4B
+#    +0x22c FlsHighIndex     : Uint4B
+#    +0x230 WerRegistrationData : Ptr32 Void
+#    +0x234 WerShipAssertPtr : Ptr32 Void
+#    +0x238 pContextData     : Ptr32 Void
+#    +0x23c pImageHeaderHash : Ptr32 Void
+#    +0x240 TracingFlags     : Uint4B
+#    +0x240 HeapTracingEnabled : Pos 0, 1 Bit
+#    +0x240 CritSecTracingEnabled : Pos 1, 1 Bit
+#    +0x240 SpareTracingBits : Pos 2, 30 Bits
+class _PEB_2008_R2(Structure):
+    _pack_   = 8
+    _fields_ = [
+        ("InheritedAddressSpace",               BOOLEAN),
+        ("ReadImageFileExecOptions",            UCHAR),
+        ("BeingDebugged",                       BOOLEAN),
+        ("BitField",                            UCHAR),
+        ("Mutant",                              HANDLE),
+        ("ImageBaseAddress",                    PVOID),
+        ("Ldr",                                 PVOID), # PPEB_LDR_DATA
+        ("ProcessParameters",                   PVOID), # PRTL_USER_PROCESS_PARAMETERS
+        ("SubSystemData",                       PVOID),
+        ("ProcessHeap",                         PVOID),
+        ("FastPebLock",                         PVOID), # PRTL_CRITICAL_SECTION
+        ("AtlThunkSListPtr",                    PVOID),
+        ("IFEOKey",                             PVOID),
+        ("CrossProcessFlags",                   DWORD),
+        ("KernelCallbackTable",                 PVOID),
+        ("SystemReserved",                      DWORD),
+        ("AtlThunkSListPtr32",                  PVOID),
+        ("ApiSetMap",                           PVOID),
+        ("TlsExpansionCounter",                 DWORD),
+        ("TlsBitmap",                           PVOID),
+        ("TlsBitmapBits",                       DWORD * 2),
+        ("ReadOnlySharedMemoryBase",            PVOID),
+        ("HotpatchInformation",                 PVOID),
+        ("ReadOnlyStaticServerData",            PVOID), # Ptr32 Ptr32 Void
+        ("AnsiCodePageData",                    PVOID),
+        ("OemCodePageData",                     PVOID),
+        ("UnicodeCaseTableData",                PVOID),
+        ("NumberOfProcessors",                  DWORD),
+        ("NtGlobalFlag",                        DWORD),
+        ("CriticalSectionTimeout",              LONGLONG),  # LARGE_INTEGER
+        ("HeapSegmentReserve",                  DWORD),
+        ("HeapSegmentCommit",                   DWORD),
+        ("HeapDeCommitTotalFreeThreshold",      DWORD),
+        ("HeapDeCommitFreeBlockThreshold",      DWORD),
+        ("NumberOfHeaps",                       DWORD),
+        ("MaximumNumberOfHeaps",                DWORD),
+        ("ProcessHeaps",                        PVOID), # Ptr32 Ptr32 Void
+        ("GdiSharedHandleTable",                PVOID),
+        ("ProcessStarterHelper",                PVOID),
+        ("GdiDCAttributeList",                  DWORD),
+        ("LoaderLock",                          PVOID), # PRTL_CRITICAL_SECTION
+        ("OSMajorVersion",                      DWORD),
+        ("OSMinorVersion",                      DWORD),
+        ("OSBuildNumber",                       WORD),
+        ("OSCSDVersion",                        WORD),
+        ("OSPlatformId",                        DWORD),
+        ("ImageSubsystem",                      DWORD),
+        ("ImageSubsystemMajorVersion",          DWORD),
+        ("ImageSubsystemMinorVersion",          DWORD),
+        ("ActiveProcessAffinityMask",           DWORD),
+        ("GdiHandleBuffer",                     DWORD * 34),
+        ("PostProcessInitRoutine",              PPS_POST_PROCESS_INIT_ROUTINE),
+        ("TlsExpansionBitmap",                  PVOID),
+        ("TlsExpansionBitmapBits",              DWORD * 32),
+        ("SessionId",                           DWORD),
+        ("AppCompatFlags",                      ULONGLONG), # ULARGE_INTEGER
+        ("AppCompatFlagsUser",                  ULONGLONG), # ULARGE_INTEGER
+        ("pShimData",                           PVOID),
+        ("AppCompatInfo",                       PVOID),
+        ("CSDVersion",                          UNICODE_STRING),
+        ("ActivationContextData",               PVOID), # ACTIVATION_CONTEXT_DATA
+        ("ProcessAssemblyStorageMap",           PVOID), # ASSEMBLY_STORAGE_MAP
+        ("SystemDefaultActivationContextData",  PVOID), # ACTIVATION_CONTEXT_DATA
+        ("SystemAssemblyStorageMap",            PVOID), # ASSEMBLY_STORAGE_MAP
+        ("MinimumStackCommit",                  DWORD),
+        ("FlsCallback",                         PVOID), # PFLS_CALLBACK_INFO
+        ("FlsListHead",                         LIST_ENTRY),
+        ("FlsBitmap",                           PVOID),
+        ("FlsBitmapBits",                       DWORD * 4),
+        ("FlsHighIndex",                        DWORD),
+        ("WerRegistrationData",                 PVOID),
+        ("WerShipAssertPtr",                    PVOID),
+        ("pContextData",                        PVOID),
+        ("pImageHeaderHash",                    PVOID),
+        ("TracingFlags",                        DWORD),
+    ]
+    def __get_UserSharedInfoPtr(self):
+        return self.KernelCallbackTable
+    def __set_UserSharedInfoPtr(self, value):
+        self.KernelCallbackTable = value
+    UserSharedInfoPtr = property(__get_UserSharedInfoPtr, __set_UserSharedInfoPtr)
+
+#    +0x000 InheritedAddressSpace : UChar
+#    +0x001 ReadImageFileExecOptions : UChar
+#    +0x002 BeingDebugged    : UChar
+#    +0x003 BitField         : UChar
+#    +0x003 ImageUsesLargePages : Pos 0, 1 Bit
+#    +0x003 IsProtectedProcess : Pos 1, 1 Bit
+#    +0x003 IsLegacyProcess  : Pos 2, 1 Bit
+#    +0x003 IsImageDynamicallyRelocated : Pos 3, 1 Bit
+#    +0x003 SkipPatchingUser32Forwarders : Pos 4, 1 Bit
+#    +0x003 SpareBits        : Pos 5, 3 Bits
+#    +0x008 Mutant           : Ptr64 Void
+#    +0x010 ImageBaseAddress : Ptr64 Void
+#    +0x018 Ldr              : Ptr64 _PEB_LDR_DATA
+#    +0x020 ProcessParameters : Ptr64 _RTL_USER_PROCESS_PARAMETERS
+#    +0x028 SubSystemData    : Ptr64 Void
+#    +0x030 ProcessHeap      : Ptr64 Void
+#    +0x038 FastPebLock      : Ptr64 _RTL_CRITICAL_SECTION
+#    +0x040 AtlThunkSListPtr : Ptr64 Void
+#    +0x048 IFEOKey          : Ptr64 Void
+#    +0x050 CrossProcessFlags : Uint4B
+#    +0x050 ProcessInJob     : Pos 0, 1 Bit
+#    +0x050 ProcessInitializing : Pos 1, 1 Bit
+#    +0x050 ProcessUsingVEH  : Pos 2, 1 Bit
+#    +0x050 ProcessUsingVCH  : Pos 3, 1 Bit
+#    +0x050 ProcessUsingFTH  : Pos 4, 1 Bit
+#    +0x050 ReservedBits0    : Pos 5, 27 Bits
+#    +0x058 KernelCallbackTable : Ptr64 Void
+#    +0x058 UserSharedInfoPtr : Ptr64 Void
+#    +0x060 SystemReserved   : [1] Uint4B
+#    +0x064 AtlThunkSListPtr32 : Uint4B
+#    +0x068 ApiSetMap        : Ptr64 Void
+#    +0x070 TlsExpansionCounter : Uint4B
+#    +0x078 TlsBitmap        : Ptr64 Void
+#    +0x080 TlsBitmapBits    : [2] Uint4B
+#    +0x088 ReadOnlySharedMemoryBase : Ptr64 Void
+#    +0x090 HotpatchInformation : Ptr64 Void
+#    +0x098 ReadOnlyStaticServerData : Ptr64 Ptr64 Void
+#    +0x0a0 AnsiCodePageData : Ptr64 Void
+#    +0x0a8 OemCodePageData  : Ptr64 Void
+#    +0x0b0 UnicodeCaseTableData : Ptr64 Void
+#    +0x0b8 NumberOfProcessors : Uint4B
+#    +0x0bc NtGlobalFlag     : Uint4B
+#    +0x0c0 CriticalSectionTimeout : _LARGE_INTEGER
+#    +0x0c8 HeapSegmentReserve : Uint8B
+#    +0x0d0 HeapSegmentCommit : Uint8B
+#    +0x0d8 HeapDeCommitTotalFreeThreshold : Uint8B
+#    +0x0e0 HeapDeCommitFreeBlockThreshold : Uint8B
+#    +0x0e8 NumberOfHeaps    : Uint4B
+#    +0x0ec MaximumNumberOfHeaps : Uint4B
+#    +0x0f0 ProcessHeaps     : Ptr64 Ptr64 Void
+#    +0x0f8 GdiSharedHandleTable : Ptr64 Void
+#    +0x100 ProcessStarterHelper : Ptr64 Void
+#    +0x108 GdiDCAttributeList : Uint4B
+#    +0x110 LoaderLock       : Ptr64 _RTL_CRITICAL_SECTION
+#    +0x118 OSMajorVersion   : Uint4B
+#    +0x11c OSMinorVersion   : Uint4B
+#    +0x120 OSBuildNumber    : Uint2B
+#    +0x122 OSCSDVersion     : Uint2B
+#    +0x124 OSPlatformId     : Uint4B
+#    +0x128 ImageSubsystem   : Uint4B
+#    +0x12c ImageSubsystemMajorVersion : Uint4B
+#    +0x130 ImageSubsystemMinorVersion : Uint4B
+#    +0x138 ActiveProcessAffinityMask : Uint8B
+#    +0x140 GdiHandleBuffer  : [60] Uint4B
+#    +0x230 PostProcessInitRoutine : Ptr64     void
+#    +0x238 TlsExpansionBitmap : Ptr64 Void
+#    +0x240 TlsExpansionBitmapBits : [32] Uint4B
+#    +0x2c0 SessionId        : Uint4B
+#    +0x2c8 AppCompatFlags   : _ULARGE_INTEGER
+#    +0x2d0 AppCompatFlagsUser : _ULARGE_INTEGER
+#    +0x2d8 pShimData        : Ptr64 Void
+#    +0x2e0 AppCompatInfo    : Ptr64 Void
+#    +0x2e8 CSDVersion       : _UNICODE_STRING
+#    +0x2f8 ActivationContextData : Ptr64 _ACTIVATION_CONTEXT_DATA
+#    +0x300 ProcessAssemblyStorageMap : Ptr64 _ASSEMBLY_STORAGE_MAP
+#    +0x308 SystemDefaultActivationContextData : Ptr64 _ACTIVATION_CONTEXT_DATA
+#    +0x310 SystemAssemblyStorageMap : Ptr64 _ASSEMBLY_STORAGE_MAP
+#    +0x318 MinimumStackCommit : Uint8B
+#    +0x320 FlsCallback      : Ptr64 _FLS_CALLBACK_INFO
+#    +0x328 FlsListHead      : _LIST_ENTRY
+#    +0x338 FlsBitmap        : Ptr64 Void
+#    +0x340 FlsBitmapBits    : [4] Uint4B
+#    +0x350 FlsHighIndex     : Uint4B
+#    +0x358 WerRegistrationData : Ptr64 Void
+#    +0x360 WerShipAssertPtr : Ptr64 Void
+#    +0x368 pContextData     : Ptr64 Void
+#    +0x370 pImageHeaderHash : Ptr64 Void
+#    +0x378 TracingFlags     : Uint4B
+#    +0x378 HeapTracingEnabled : Pos 0, 1 Bit
+#    +0x378 CritSecTracingEnabled : Pos 1, 1 Bit
+#    +0x378 SpareTracingBits : Pos 2, 30 Bits
+class _PEB_2008_R2_64(Structure):
+    _pack_   = 8
+    _fields_ = [
+        ("InheritedAddressSpace",               BOOLEAN),
+        ("ReadImageFileExecOptions",            UCHAR),
+        ("BeingDebugged",                       BOOLEAN),
+        ("BitField",                            UCHAR),
+        ("Mutant",                              HANDLE),
+        ("ImageBaseAddress",                    PVOID),
+        ("Ldr",                                 PVOID), # PPEB_LDR_DATA
+        ("ProcessParameters",                   PVOID), # PRTL_USER_PROCESS_PARAMETERS
+        ("SubSystemData",                       PVOID),
+        ("ProcessHeap",                         PVOID),
+        ("FastPebLock",                         PVOID), # PRTL_CRITICAL_SECTION
+        ("AtlThunkSListPtr",                    PVOID),
+        ("IFEOKey",                             PVOID),
+        ("CrossProcessFlags",                   DWORD),
+        ("KernelCallbackTable",                 PVOID),
+        ("SystemReserved",                      DWORD),
+        ("AtlThunkSListPtr32",                  DWORD),
+        ("ApiSetMap",                           PVOID),
+        ("TlsExpansionCounter",                 DWORD),
+        ("TlsBitmap",                           PVOID),
+        ("TlsBitmapBits",                       DWORD * 2),
+        ("ReadOnlySharedMemoryBase",            PVOID),
+        ("HotpatchInformation",                 PVOID),
+        ("ReadOnlyStaticServerData",            PVOID), # Ptr32 Ptr32 Void
+        ("AnsiCodePageData",                    PVOID),
+        ("OemCodePageData",                     PVOID),
+        ("UnicodeCaseTableData",                PVOID),
+        ("NumberOfProcessors",                  DWORD),
+        ("NtGlobalFlag",                        DWORD),
+        ("CriticalSectionTimeout",              LONGLONG),  # LARGE_INTEGER
+        ("HeapSegmentReserve",                  QWORD),
+        ("HeapSegmentCommit",                   QWORD),
+        ("HeapDeCommitTotalFreeThreshold",      QWORD),
+        ("HeapDeCommitFreeBlockThreshold",      QWORD),
+        ("NumberOfHeaps",                       DWORD),
+        ("MaximumNumberOfHeaps",                DWORD),
+        ("ProcessHeaps",                        PVOID), # Ptr64 Ptr64 Void
+        ("GdiSharedHandleTable",                PVOID),
+        ("ProcessStarterHelper",                PVOID),
+        ("GdiDCAttributeList",                  DWORD),
+        ("LoaderLock",                          PVOID), # PRTL_CRITICAL_SECTION
+        ("OSMajorVersion",                      DWORD),
+        ("OSMinorVersion",                      DWORD),
+        ("OSBuildNumber",                       WORD),
+        ("OSCSDVersion",                        WORD),
+        ("OSPlatformId",                        DWORD),
+        ("ImageSubsystem",                      DWORD),
+        ("ImageSubsystemMajorVersion",          DWORD),
+        ("ImageSubsystemMinorVersion",          DWORD),
+        ("ActiveProcessAffinityMask",           QWORD),
+        ("GdiHandleBuffer",                     DWORD * 60),
+        ("PostProcessInitRoutine",              PPS_POST_PROCESS_INIT_ROUTINE),
+        ("TlsExpansionBitmap",                  PVOID),
+        ("TlsExpansionBitmapBits",              DWORD * 32),
+        ("SessionId",                           DWORD),
+        ("AppCompatFlags",                      ULONGLONG), # ULARGE_INTEGER
+        ("AppCompatFlagsUser",                  ULONGLONG), # ULARGE_INTEGER
+        ("pShimData",                           PVOID),
+        ("AppCompatInfo",                       PVOID),
+        ("CSDVersion",                          UNICODE_STRING),
+        ("ActivationContextData",               PVOID), # ACTIVATION_CONTEXT_DATA
+        ("ProcessAssemblyStorageMap",           PVOID), # ASSEMBLY_STORAGE_MAP
+        ("SystemDefaultActivationContextData",  PVOID), # ACTIVATION_CONTEXT_DATA
+        ("SystemAssemblyStorageMap",            PVOID), # ASSEMBLY_STORAGE_MAP
+        ("MinimumStackCommit",                  QWORD),
+        ("FlsCallback",                         PVOID), # PFLS_CALLBACK_INFO
+        ("FlsListHead",                         LIST_ENTRY),
+        ("FlsBitmap",                           PVOID),
+        ("FlsBitmapBits",                       DWORD * 4),
+        ("FlsHighIndex",                        DWORD),
+        ("WerRegistrationData",                 PVOID),
+        ("WerShipAssertPtr",                    PVOID),
+        ("pContextData",                        PVOID),
+        ("pImageHeaderHash",                    PVOID),
+        ("TracingFlags",                        DWORD),
+    ]
+    def __get_UserSharedInfoPtr(self):
+        return self.KernelCallbackTable
+    def __set_UserSharedInfoPtr(self, value):
+        self.KernelCallbackTable = value
+    UserSharedInfoPtr = property(__get_UserSharedInfoPtr, __set_UserSharedInfoPtr)
+
+_PEB_Vista      = _PEB_2008
+_PEB_Vista_64   = _PEB_2008_64
+_PEB_W7         = _PEB_2008_R2
+_PEB_W7_64      = _PEB_2008_R2_64
+
+#    +0x000 InheritedAddressSpace : UChar
+#    +0x001 ReadImageFileExecOptions : UChar
+#    +0x002 BeingDebugged    : UChar
+#    +0x003 BitField         : UChar
+#    +0x003 ImageUsesLargePages : Pos 0, 1 Bit
+#    +0x003 IsProtectedProcess : Pos 1, 1 Bit
+#    +0x003 IsLegacyProcess  : Pos 2, 1 Bit
+#    +0x003 IsImageDynamicallyRelocated : Pos 3, 1 Bit
+#    +0x003 SkipPatchingUser32Forwarders : Pos 4, 1 Bit
+#    +0x003 SpareBits        : Pos 5, 3 Bits
+#    +0x004 Mutant           : Ptr32 Void
+#    +0x008 ImageBaseAddress : Ptr32 Void
+#    +0x00c Ldr              : Ptr32 _PEB_LDR_DATA
+#    +0x010 ProcessParameters : Ptr32 _RTL_USER_PROCESS_PARAMETERS
+#    +0x014 SubSystemData    : Ptr32 Void
+#    +0x018 ProcessHeap      : Ptr32 Void
+#    +0x01c FastPebLock      : Ptr32 _RTL_CRITICAL_SECTION
+#    +0x020 AtlThunkSListPtr : Ptr32 Void
+#    +0x024 IFEOKey          : Ptr32 Void
+#    +0x028 CrossProcessFlags : Uint4B
+#    +0x028 ProcessInJob     : Pos 0, 1 Bit
+#    +0x028 ProcessInitializing : Pos 1, 1 Bit
+#    +0x028 ProcessUsingVEH  : Pos 2, 1 Bit
+#    +0x028 ProcessUsingVCH  : Pos 3, 1 Bit
+#    +0x028 ProcessUsingFTH  : Pos 4, 1 Bit
+#    +0x028 ReservedBits0    : Pos 5, 27 Bits
+#    +0x02c KernelCallbackTable : Ptr32 Void
+#    +0x02c UserSharedInfoPtr : Ptr32 Void
+#    +0x030 SystemReserved   : [1] Uint4B
+#    +0x034 TracingFlags     : Uint4B
+#    +0x034 HeapTracingEnabled : Pos 0, 1 Bit
+#    +0x034 CritSecTracingEnabled : Pos 1, 1 Bit
+#    +0x034 SpareTracingBits : Pos 2, 30 Bits
+#    +0x038 ApiSetMap        : Ptr32 Void
+#    +0x03c TlsExpansionCounter : Uint4B
+#    +0x040 TlsBitmap        : Ptr32 Void
+#    +0x044 TlsBitmapBits    : [2] Uint4B
+#    +0x04c ReadOnlySharedMemoryBase : Ptr32 Void
+#    +0x050 HotpatchInformation : Ptr32 Void
+#    +0x054 ReadOnlyStaticServerData : Ptr32 Ptr32 Void
+#    +0x058 AnsiCodePageData : Ptr32 Void
+#    +0x05c OemCodePageData  : Ptr32 Void
+#    +0x060 UnicodeCaseTableData : Ptr32 Void
+#    +0x064 NumberOfProcessors : Uint4B
+#    +0x068 NtGlobalFlag     : Uint4B
+#    +0x070 CriticalSectionTimeout : _LARGE_INTEGER
+#    +0x078 HeapSegmentReserve : Uint4B
+#    +0x07c HeapSegmentCommit : Uint4B
+#    +0x080 HeapDeCommitTotalFreeThreshold : Uint4B
+#    +0x084 HeapDeCommitFreeBlockThreshold : Uint4B
+#    +0x088 NumberOfHeaps    : Uint4B
+#    +0x08c MaximumNumberOfHeaps : Uint4B
+#    +0x090 ProcessHeaps     : Ptr32 Ptr32 Void
+#    +0x094 GdiSharedHandleTable : Ptr32 Void
+#    +0x098 ProcessStarterHelper : Ptr32 Void
+#    +0x09c GdiDCAttributeList : Uint4B
+#    +0x0a0 LoaderLock       : Ptr32 _RTL_CRITICAL_SECTION
+#    +0x0a4 OSMajorVersion   : Uint4B
+#    +0x0a8 OSMinorVersion   : Uint4B
+#    +0x0ac OSBuildNumber    : Uint2B
+#    +0x0ae OSCSDVersion     : Uint2B
+#    +0x0b0 OSPlatformId     : Uint4B
+#    +0x0b4 ImageSubsystem   : Uint4B
+#    +0x0b8 ImageSubsystemMajorVersion : Uint4B
+#    +0x0bc ImageSubsystemMinorVersion : Uint4B
+#    +0x0c0 ActiveProcessAffinityMask : Uint4B
+#    +0x0c4 GdiHandleBuffer  : [34] Uint4B
+#    +0x14c PostProcessInitRoutine : Ptr32     void
+#    +0x150 TlsExpansionBitmap : Ptr32 Void
+#    +0x154 TlsExpansionBitmapBits : [32] Uint4B
+#    +0x1d4 SessionId        : Uint4B
+#    +0x1d8 AppCompatFlags   : _ULARGE_INTEGER
+#    +0x1e0 AppCompatFlagsUser : _ULARGE_INTEGER
+#    +0x1e8 pShimData        : Ptr32 Void
+#    +0x1ec AppCompatInfo    : Ptr32 Void
+#    +0x1f0 CSDVersion       : _UNICODE_STRING
+#    +0x1f8 ActivationContextData : Ptr32 _ACTIVATION_CONTEXT_DATA
+#    +0x1fc ProcessAssemblyStorageMap : Ptr32 _ASSEMBLY_STORAGE_MAP
+#    +0x200 SystemDefaultActivationContextData : Ptr32 _ACTIVATION_CONTEXT_DATA
+#    +0x204 SystemAssemblyStorageMap : Ptr32 _ASSEMBLY_STORAGE_MAP
+#    +0x208 MinimumStackCommit : Uint4B
+#    +0x20c FlsCallback      : Ptr32 _FLS_CALLBACK_INFO
+#    +0x210 FlsListHead      : _LIST_ENTRY
+#    +0x218 FlsBitmap        : Ptr32 Void
+#    +0x21c FlsBitmapBits    : [4] Uint4B
+#    +0x22c FlsHighIndex     : Uint4B
+#    +0x230 WerRegistrationData : Ptr32 Void
+#    +0x234 WerShipAssertPtr : Ptr32 Void
+#    +0x238 pContextData     : Ptr32 Void
+#    +0x23c pImageHeaderHash : Ptr32 Void
+class _PEB_W7_Beta(Structure):
+    """
+    This definition of the PEB structure is only valid for the beta versions
+    of Windows 7. For the final version of Windows 7 use L{_PEB_W7} instead.
+    This structure is not chosen automatically.
+    """
+    _pack_   = 8
+    _fields_ = [
+        ("InheritedAddressSpace",               BOOLEAN),
+        ("ReadImageFileExecOptions",            UCHAR),
+        ("BeingDebugged",                       BOOLEAN),
+        ("BitField",                            UCHAR),
+        ("Mutant",                              HANDLE),
+        ("ImageBaseAddress",                    PVOID),
+        ("Ldr",                                 PVOID), # PPEB_LDR_DATA
+        ("ProcessParameters",                   PVOID), # PRTL_USER_PROCESS_PARAMETERS
+        ("SubSystemData",                       PVOID),
+        ("ProcessHeap",                         PVOID),
+        ("FastPebLock",                         PVOID), # PRTL_CRITICAL_SECTION
+        ("AtlThunkSListPtr",                    PVOID),
+        ("IFEOKey",                             PVOID),
+        ("CrossProcessFlags",                   DWORD),
+        ("KernelCallbackTable",                 PVOID),
+        ("SystemReserved",                      DWORD),
+        ("TracingFlags",                        DWORD),
+        ("ApiSetMap",                           PVOID),
+        ("TlsExpansionCounter",                 DWORD),
+        ("TlsBitmap",                           PVOID),
+        ("TlsBitmapBits",                       DWORD * 2),
+        ("ReadOnlySharedMemoryBase",            PVOID),
+        ("HotpatchInformation",                 PVOID),
+        ("ReadOnlyStaticServerData",            PVOID), # Ptr32 Ptr32 Void
+        ("AnsiCodePageData",                    PVOID),
+        ("OemCodePageData",                     PVOID),
+        ("UnicodeCaseTableData",                PVOID),
+        ("NumberOfProcessors",                  DWORD),
+        ("NtGlobalFlag",                        DWORD),
+        ("CriticalSectionTimeout",              LONGLONG),  # LARGE_INTEGER
+        ("HeapSegmentReserve",                  DWORD),
+        ("HeapSegmentCommit",                   DWORD),
+        ("HeapDeCommitTotalFreeThreshold",      DWORD),
+        ("HeapDeCommitFreeBlockThreshold",      DWORD),
+        ("NumberOfHeaps",                       DWORD),
+        ("MaximumNumberOfHeaps",                DWORD),
+        ("ProcessHeaps",                        PVOID), # Ptr32 Ptr32 Void
+        ("GdiSharedHandleTable",                PVOID),
+        ("ProcessStarterHelper",                PVOID),
+        ("GdiDCAttributeList",                  DWORD),
+        ("LoaderLock",                          PVOID), # PRTL_CRITICAL_SECTION
+        ("OSMajorVersion",                      DWORD),
+        ("OSMinorVersion",                      DWORD),
+        ("OSBuildNumber",                       WORD),
+        ("OSCSDVersion",                        WORD),
+        ("OSPlatformId",                        DWORD),
+        ("ImageSubsystem",                      DWORD),
+        ("ImageSubsystemMajorVersion",          DWORD),
+        ("ImageSubsystemMinorVersion",          DWORD),
+        ("ActiveProcessAffinityMask",           DWORD),
+        ("GdiHandleBuffer",                     DWORD * 34),
+        ("PostProcessInitRoutine",              PPS_POST_PROCESS_INIT_ROUTINE),
+        ("TlsExpansionBitmap",                  PVOID),
+        ("TlsExpansionBitmapBits",              DWORD * 32),
+        ("SessionId",                           DWORD),
+        ("AppCompatFlags",                      ULONGLONG), # ULARGE_INTEGER
+        ("AppCompatFlagsUser",                  ULONGLONG), # ULARGE_INTEGER
+        ("pShimData",                           PVOID),
+        ("AppCompatInfo",                       PVOID),
+        ("CSDVersion",                          UNICODE_STRING),
+        ("ActivationContextData",               PVOID), # ACTIVATION_CONTEXT_DATA
+        ("ProcessAssemblyStorageMap",           PVOID), # ASSEMBLY_STORAGE_MAP
+        ("SystemDefaultActivationContextData",  PVOID), # ACTIVATION_CONTEXT_DATA
+        ("SystemAssemblyStorageMap",            PVOID), # ASSEMBLY_STORAGE_MAP
+        ("MinimumStackCommit",                  DWORD),
+        ("FlsCallback",                         PVOID), # PFLS_CALLBACK_INFO
+        ("FlsListHead",                         LIST_ENTRY),
+        ("FlsBitmap",                           PVOID),
+        ("FlsBitmapBits",                       DWORD * 4),
+        ("FlsHighIndex",                        DWORD),
+        ("WerRegistrationData",                 PVOID),
+        ("WerShipAssertPtr",                    PVOID),
+        ("pContextData",                        PVOID),
+        ("pImageHeaderHash",                    PVOID),
+    ]
+    def __get_UserSharedInfoPtr(self):
+        return self.KernelCallbackTable
+    def __set_UserSharedInfoPtr(self, value):
+        self.KernelCallbackTable = value
+    UserSharedInfoPtr = property(__get_UserSharedInfoPtr, __set_UserSharedInfoPtr)
+
+# Use the correct PEB structure definition.
+# Defaults to the latest Windows version.
+class PEB(Structure):
+    _pack_ = 8
+    if os == 'Windows NT':
+        _pack_   = _PEB_NT._pack_
+        _fields_ = _PEB_NT._fields_
+    elif os == 'Windows 2000':
+        _pack_   = _PEB_2000._pack_
+        _fields_ = _PEB_2000._fields_
+    elif os == 'Windows XP':
+        _fields_ = _PEB_XP._fields_
+    elif os == 'Windows XP (64 bits)':
+        _fields_ = _PEB_XP_64._fields_
+    elif os == 'Windows 2003':
+        _fields_ = _PEB_2003._fields_
+    elif os == 'Windows 2003 (64 bits)':
+        _fields_ = _PEB_2003_64._fields_
+    elif os == 'Windows 2003 R2':
+        _fields_ = _PEB_2003_R2._fields_
+    elif os == 'Windows 2003 R2 (64 bits)':
+        _fields_ = _PEB_2003_R2_64._fields_
+    elif os == 'Windows 2008':
+        _fields_ = _PEB_2008._fields_
+    elif os == 'Windows 2008 (64 bits)':
+        _fields_ = _PEB_2008_64._fields_
+    elif os == 'Windows 2008 R2':
+        _fields_ = _PEB_2008_R2._fields_
+    elif os == 'Windows 2008 R2 (64 bits)':
+        _fields_ = _PEB_2008_R2_64._fields_
+    elif os == 'Windows Vista':
+        _fields_ = _PEB_Vista._fields_
+    elif os == 'Windows Vista (64 bits)':
+        _fields_ = _PEB_Vista_64._fields_
+    elif os == 'Windows 7':
+        _fields_ = _PEB_W7._fields_
+    elif os == 'Windows 7 (64 bits)':
+        _fields_ = _PEB_W7_64._fields_
+    elif sizeof(SIZE_T) == sizeof(DWORD):
+        _fields_ = _PEB_W7._fields_
+    else:
+        _fields_ = _PEB_W7_64._fields_
+PPEB = POINTER(PEB)
+
+# PEB structure for WOW64 processes.
+class PEB_32(Structure):
+    _pack_ = 8
+    if os == 'Windows NT':
+        _pack_   = _PEB_NT._pack_
+        _fields_ = _PEB_NT._fields_
+    elif os == 'Windows 2000':
+        _pack_   = _PEB_2000._pack_
+        _fields_ = _PEB_2000._fields_
+    elif os.startswith('Windows XP'):
+        _fields_ = _PEB_XP._fields_
+    elif os.startswith('Windows 2003 R2'):
+        _fields_ = _PEB_2003_R2._fields_
+    elif os.startswith('Windows 2003'):
+        _fields_ = _PEB_2003._fields_
+    elif os.startswith('Windows 2008 R2'):
+        _fields_ = _PEB_2008_R2._fields_
+    elif os.startswith('Windows 2008'):
+        _fields_ = _PEB_2008._fields_
+    elif os.startswith('Windows Vista'):
+        _fields_ = _PEB_Vista._fields_
+    else: #if os.startswith('Windows 7'):
+        _fields_ = _PEB_W7._fields_
+
+# from https://vmexplorer.svn.codeplex.com/svn/VMExplorer/src/Win32/Threads.cs
+#
+# [StructLayout (LayoutKind.Sequential, Size = 0x0C)]
+# public struct Wx86ThreadState
+# {
+# 	public IntPtr  CallBx86Eip; // Ptr32 to Uint4B
+# 	public IntPtr  DeallocationCpu; // Ptr32 to Void
+# 	public Byte  UseKnownWx86Dll; // UChar
+# 	public Byte  OleStubInvoked; // Char
+# };
+class Wx86ThreadState(Structure):
+    _fields_ = [
+        ("CallBx86Eip",             PVOID),
+        ("DeallocationCpu",         PVOID),
+        ("UseKnownWx86Dll",         UCHAR),
+        ("OleStubInvoked",          CHAR),
+]
+
+# ntdll!_RTL_ACTIVATION_CONTEXT_STACK_FRAME
+#    +0x000 Previous         : Ptr64 _RTL_ACTIVATION_CONTEXT_STACK_FRAME
+#    +0x008 ActivationContext : Ptr64 _ACTIVATION_CONTEXT
+#    +0x010 Flags            : Uint4B
+class RTL_ACTIVATION_CONTEXT_STACK_FRAME(Structure):
+    _fields_ = [
+        ("Previous",                    PVOID),
+        ("ActivationContext",           PVOID),
+        ("Flags",                       DWORD),
+]
+
+# ntdll!_ACTIVATION_CONTEXT_STACK
+#    +0x000 ActiveFrame      : Ptr64 _RTL_ACTIVATION_CONTEXT_STACK_FRAME
+#    +0x008 FrameListCache   : _LIST_ENTRY
+#    +0x018 Flags            : Uint4B
+#    +0x01c NextCookieSequenceNumber : Uint4B
+#    +0x020 StackId          : Uint4B
+class ACTIVATION_CONTEXT_STACK(Structure):
+    _fields_ = [
+        ("ActiveFrame",                 PVOID),
+        ("FrameListCache",              LIST_ENTRY),
+        ("Flags",                       DWORD),
+        ("NextCookieSequenceNumber",    DWORD),
+        ("StackId",                     DWORD),
+]
+
+# typedef struct _PROCESSOR_NUMBER {
+#   WORD Group;
+#   BYTE Number;
+#   BYTE Reserved;
+# }PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
+class PROCESSOR_NUMBER(Structure):
+    _fields_ = [
+        ("Group",       WORD),
+        ("Number",      BYTE),
+        ("Reserved",    BYTE),
+]
+
+# from http://www.nirsoft.net/kernel_struct/vista/NT_TIB.html
+#
+# typedef struct _NT_TIB
+# {
+#      PEXCEPTION_REGISTRATION_RECORD ExceptionList;
+#      PVOID StackBase;
+#      PVOID StackLimit;
+#      PVOID SubSystemTib;
+#      union
+#      {
+#           PVOID FiberData;
+#           ULONG Version;
+#      };
+#      PVOID ArbitraryUserPointer;
+#      PNT_TIB Self;
+# } NT_TIB, *PNT_TIB;
+class _NT_TIB_UNION(Union):
+    _fields_ = [
+        ("FiberData",   PVOID),
+        ("Version",     ULONG),
+    ]
+class NT_TIB(Structure):
+    _fields_ = [
+        ("ExceptionList",           PVOID), # PEXCEPTION_REGISTRATION_RECORD
+        ("StackBase",               PVOID),
+        ("StackLimit",              PVOID),
+        ("SubSystemTib",            PVOID),
+        ("u",                       _NT_TIB_UNION),
+        ("ArbitraryUserPointer",    PVOID),
+        ("Self",                    PVOID), # PNTTIB
+    ]
+
+    def __get_FiberData(self):
+        return self.u.FiberData
+    def __set_FiberData(self, value):
+        self.u.FiberData = value
+    FiberData = property(__get_FiberData, __set_FiberData)
+
+    def __get_Version(self):
+        return self.u.Version
+    def __set_Version(self, value):
+        self.u.Version = value
+    Version = property(__get_Version, __set_Version)
+
+PNTTIB = POINTER(NT_TIB)
+
+# From http://www.nirsoft.net/kernel_struct/vista/EXCEPTION_REGISTRATION_RECORD.html
+#
+# typedef struct _EXCEPTION_REGISTRATION_RECORD
+# {
+#      PEXCEPTION_REGISTRATION_RECORD Next;
+#      PEXCEPTION_DISPOSITION Handler;
+# } EXCEPTION_REGISTRATION_RECORD, *PEXCEPTION_REGISTRATION_RECORD;
+class EXCEPTION_REGISTRATION_RECORD(Structure):
+    pass
+
+EXCEPTION_DISPOSITION           = DWORD
+##PEXCEPTION_DISPOSITION          = POINTER(EXCEPTION_DISPOSITION)
+##PEXCEPTION_REGISTRATION_RECORD  = POINTER(EXCEPTION_REGISTRATION_RECORD)
+PEXCEPTION_DISPOSITION          = PVOID
+PEXCEPTION_REGISTRATION_RECORD  = PVOID
+
+EXCEPTION_REGISTRATION_RECORD._fields_ = [
+        ("Next",    PEXCEPTION_REGISTRATION_RECORD),
+        ("Handler", PEXCEPTION_DISPOSITION),
+]
+
+##PPEB = POINTER(PEB)
+PPEB = PVOID
+
+# From http://www.nirsoft.net/kernel_struct/vista/GDI_TEB_BATCH.html
+#
+# typedef struct _GDI_TEB_BATCH
+# {
+#      ULONG Offset;
+#      ULONG HDC;
+#      ULONG Buffer[310];
+# } GDI_TEB_BATCH, *PGDI_TEB_BATCH;
+class GDI_TEB_BATCH(Structure):
+    _fields_ = [
+        ("Offset",  ULONG),
+        ("HDC",     ULONG),
+        ("Buffer",  ULONG * 310),
+]
+
+# ntdll!_TEB_ACTIVE_FRAME_CONTEXT
+#    +0x000 Flags            : Uint4B
+#    +0x008 FrameName        : Ptr64 Char
+class TEB_ACTIVE_FRAME_CONTEXT(Structure):
+    _fields_ = [
+        ("Flags",       DWORD),
+        ("FrameName",   LPVOID),    # LPCHAR
+]
+PTEB_ACTIVE_FRAME_CONTEXT = POINTER(TEB_ACTIVE_FRAME_CONTEXT)
+
+# ntdll!_TEB_ACTIVE_FRAME
+#    +0x000 Flags            : Uint4B
+#    +0x008 Previous         : Ptr64 _TEB_ACTIVE_FRAME
+#    +0x010 Context          : Ptr64 _TEB_ACTIVE_FRAME_CONTEXT
+class TEB_ACTIVE_FRAME(Structure):
+    _fields_ = [
+        ("Flags",       DWORD),
+        ("Previous",    LPVOID),    # PTEB_ACTIVE_FRAME
+        ("Context",     LPVOID),    # PTEB_ACTIVE_FRAME_CONTEXT
+]
+PTEB_ACTIVE_FRAME = POINTER(TEB_ACTIVE_FRAME)
+
+# SameTebFlags
+DbgSafeThunkCall        = 1 << 0
+DbgInDebugPrint         = 1 << 1
+DbgHasFiberData         = 1 << 2
+DbgSkipThreadAttach     = 1 << 3
+DbgWerInShipAssertCode  = 1 << 4
+DbgRanProcessInit       = 1 << 5
+DbgClonedThread         = 1 << 6
+DbgSuppressDebugMsg     = 1 << 7
+RtlDisableUserStackWalk = 1 << 8
+RtlExceptionAttached    = 1 << 9
+RtlInitialThread        = 1 << 10
+
+# XXX This is quite wrong :P
+class _TEB_NT(Structure):
+    _pack_ = 4
+    _fields_ = [
+        ("NtTib",                           NT_TIB),
+        ("EnvironmentPointer",              PVOID),
+        ("ClientId",                        CLIENT_ID),
+        ("ActiveRpcHandle",                 HANDLE),
+        ("ThreadLocalStoragePointer",       PVOID),
+        ("ProcessEnvironmentBlock",         PPEB),
+        ("LastErrorValue",                  ULONG),
+        ("CountOfOwnedCriticalSections",    ULONG),
+        ("CsrClientThread",                 PVOID),
+        ("Win32ThreadInfo",                 PVOID),
+        ("User32Reserved",                  ULONG * 26),
+        ("UserReserved",                    ULONG * 5),
+        ("WOW32Reserved",                   PVOID), # ptr to wow64cpu!X86SwitchTo64BitMode
+        ("CurrentLocale",                   ULONG),
+        ("FpSoftwareStatusRegister",        ULONG),
+        ("SystemReserved1",                 PVOID * 54),
+        ("Spare1",                          PVOID),
+        ("ExceptionCode",                   ULONG),
+        ("ActivationContextStackPointer",   PVOID), # PACTIVATION_CONTEXT_STACK
+        ("SpareBytes1",                     ULONG * 36),
+        ("TxFsContext",                     ULONG),
+        ("GdiTebBatch",                     GDI_TEB_BATCH),
+        ("RealClientId",                    CLIENT_ID),
+        ("GdiCachedProcessHandle",          PVOID),
+        ("GdiClientPID",                    ULONG),
+        ("GdiClientTID",                    ULONG),
+        ("GdiThreadLocalInfo",              PVOID),
+        ("Win32ClientInfo",                 PVOID * 62),
+        ("glDispatchTable",                 PVOID * 233),
+        ("glReserved1",                     ULONG * 29),
+        ("glReserved2",                     PVOID),
+        ("glSectionInfo",                   PVOID),
+        ("glSection",                       PVOID),
+        ("glTable",                         PVOID),
+        ("glCurrentRC",                     PVOID),
+        ("glContext",                       PVOID),
+        ("LastStatusValue",                 NTSTATUS),
+        ("StaticUnicodeString",             UNICODE_STRING),
+        ("StaticUnicodeBuffer",             WCHAR * 261),
+        ("DeallocationStack",               PVOID),
+        ("TlsSlots",                        PVOID * 64),
+        ("TlsLinks",                        LIST_ENTRY),
+        ("Vdm",                             PVOID),
+        ("ReservedForNtRpc",                PVOID),
+        ("DbgSsReserved",                   PVOID * 2),
+        ("HardErrorDisabled",               ULONG),
+        ("Instrumentation",                 PVOID * 9),
+        ("ActivityId",                      GUID),
+        ("SubProcessTag",                   PVOID),
+        ("EtwLocalData",                    PVOID),
+        ("EtwTraceData",                    PVOID),
+        ("WinSockData",                     PVOID),
+        ("GdiBatchCount",                   ULONG),
+        ("SpareBool0",                      BOOLEAN),
+        ("SpareBool1",                      BOOLEAN),
+        ("SpareBool2",                      BOOLEAN),
+        ("IdealProcessor",                  UCHAR),
+        ("GuaranteedStackBytes",            ULONG),
+        ("ReservedForPerf",                 PVOID),
+        ("ReservedForOle",                  PVOID),
+        ("WaitingOnLoaderLock",             ULONG),
+        ("StackCommit",                     PVOID),
+        ("StackCommitMax",                  PVOID),
+        ("StackReserved",                   PVOID),
+]
+
+# not really, but "dt _TEB" in w2k isn't working for me :(
+_TEB_2000 = _TEB_NT
+
+#    +0x000 NtTib            : _NT_TIB
+#    +0x01c EnvironmentPointer : Ptr32 Void
+#    +0x020 ClientId         : _CLIENT_ID
+#    +0x028 ActiveRpcHandle  : Ptr32 Void
+#    +0x02c ThreadLocalStoragePointer : Ptr32 Void
+#    +0x030 ProcessEnvironmentBlock : Ptr32 _PEB
+#    +0x034 LastErrorValue   : Uint4B
+#    +0x038 CountOfOwnedCriticalSections : Uint4B
+#    +0x03c CsrClientThread  : Ptr32 Void
+#    +0x040 Win32ThreadInfo  : Ptr32 Void
+#    +0x044 User32Reserved   : [26] Uint4B
+#    +0x0ac UserReserved     : [5] Uint4B
+#    +0x0c0 WOW32Reserved    : Ptr32 Void
+#    +0x0c4 CurrentLocale    : Uint4B
+#    +0x0c8 FpSoftwareStatusRegister : Uint4B
+#    +0x0cc SystemReserved1  : [54] Ptr32 Void
+#    +0x1a4 ExceptionCode    : Int4B
+#    +0x1a8 ActivationContextStack : _ACTIVATION_CONTEXT_STACK
+#    +0x1bc SpareBytes1      : [24] UChar
+#    +0x1d4 GdiTebBatch      : _GDI_TEB_BATCH
+#    +0x6b4 RealClientId     : _CLIENT_ID
+#    +0x6bc GdiCachedProcessHandle : Ptr32 Void
+#    +0x6c0 GdiClientPID     : Uint4B
+#    +0x6c4 GdiClientTID     : Uint4B
+#    +0x6c8 GdiThreadLocalInfo : Ptr32 Void
+#    +0x6cc Win32ClientInfo  : [62] Uint4B
+#    +0x7c4 glDispatchTable  : [233] Ptr32 Void
+#    +0xb68 glReserved1      : [29] Uint4B
+#    +0xbdc glReserved2      : Ptr32 Void
+#    +0xbe0 glSectionInfo    : Ptr32 Void
+#    +0xbe4 glSection        : Ptr32 Void
+#    +0xbe8 glTable          : Ptr32 Void
+#    +0xbec glCurrentRC      : Ptr32 Void
+#    +0xbf0 glContext        : Ptr32 Void
+#    +0xbf4 LastStatusValue  : Uint4B
+#    +0xbf8 StaticUnicodeString : _UNICODE_STRING
+#    +0xc00 StaticUnicodeBuffer : [261] Uint2B
+#    +0xe0c DeallocationStack : Ptr32 Void
+#    +0xe10 TlsSlots         : [64] Ptr32 Void
+#    +0xf10 TlsLinks         : _LIST_ENTRY
+#    +0xf18 Vdm              : Ptr32 Void
+#    +0xf1c ReservedForNtRpc : Ptr32 Void
+#    +0xf20 DbgSsReserved    : [2] Ptr32 Void
+#    +0xf28 HardErrorsAreDisabled : Uint4B
+#    +0xf2c Instrumentation  : [16] Ptr32 Void
+#    +0xf6c WinSockData      : Ptr32 Void
+#    +0xf70 GdiBatchCount    : Uint4B
+#    +0xf74 InDbgPrint       : UChar
+#    +0xf75 FreeStackOnTermination : UChar
+#    +0xf76 HasFiberData     : UChar
+#    +0xf77 IdealProcessor   : UChar
+#    +0xf78 Spare3           : Uint4B
+#    +0xf7c ReservedForPerf  : Ptr32 Void
+#    +0xf80 ReservedForOle   : Ptr32 Void
+#    +0xf84 WaitingOnLoaderLock : Uint4B
+#    +0xf88 Wx86Thread       : _Wx86ThreadState
+#    +0xf94 TlsExpansionSlots : Ptr32 Ptr32 Void
+#    +0xf98 ImpersonationLocale : Uint4B
+#    +0xf9c IsImpersonating  : Uint4B
+#    +0xfa0 NlsCache         : Ptr32 Void
+#    +0xfa4 pShimData        : Ptr32 Void
+#    +0xfa8 HeapVirtualAffinity : Uint4B
+#    +0xfac CurrentTransactionHandle : Ptr32 Void
+#    +0xfb0 ActiveFrame      : Ptr32 _TEB_ACTIVE_FRAME
+#    +0xfb4 SafeThunkCall    : UChar
+#    +0xfb5 BooleanSpare     : [3] UChar
+class _TEB_XP(Structure):
+    _pack_ = 8
+    _fields_ = [
+        ("NtTib",                           NT_TIB),
+        ("EnvironmentPointer",              PVOID),
+        ("ClientId",                        CLIENT_ID),
+        ("ActiveRpcHandle",                 HANDLE),
+        ("ThreadLocalStoragePointer",       PVOID),
+        ("ProcessEnvironmentBlock",         PVOID), # PPEB
+        ("LastErrorValue",                  DWORD),
+        ("CountOfOwnedCriticalSections",    DWORD),
+        ("CsrClientThread",                 PVOID),
+        ("Win32ThreadInfo",                 PVOID),
+        ("User32Reserved",                  DWORD * 26),
+        ("UserReserved",                    DWORD * 5),
+        ("WOW32Reserved",                   PVOID), # ptr to wow64cpu!X86SwitchTo64BitMode
+        ("CurrentLocale",                   DWORD),
+        ("FpSoftwareStatusRegister",        DWORD),
+        ("SystemReserved1",                 PVOID * 54),
+        ("ExceptionCode",                   SDWORD),
+        ("ActivationContextStackPointer",   PVOID), # PACTIVATION_CONTEXT_STACK
+        ("SpareBytes1",                     UCHAR * 24),
+        ("TxFsContext",                     DWORD),
+        ("GdiTebBatch",                     GDI_TEB_BATCH),
+        ("RealClientId",                    CLIENT_ID),
+        ("GdiCachedProcessHandle",          HANDLE),
+        ("GdiClientPID",                    DWORD),
+        ("GdiClientTID",                    DWORD),
+        ("GdiThreadLocalInfo",              PVOID),
+        ("Win32ClientInfo",                 DWORD * 62),
+        ("glDispatchTable",                 PVOID * 233),
+        ("glReserved1",                     DWORD * 29),
+        ("glReserved2",                     PVOID),
+        ("glSectionInfo",                   PVOID),
+        ("glSection",                       PVOID),
+        ("glTable",                         PVOID),
+        ("glCurrentRC",                     PVOID),
+        ("glContext",                       PVOID),
+        ("LastStatusValue",                 NTSTATUS),
+        ("StaticUnicodeString",             UNICODE_STRING),
+        ("StaticUnicodeBuffer",             WCHAR * 261),
+        ("DeallocationStack",               PVOID),
+        ("TlsSlots",                        PVOID * 64),
+        ("TlsLinks",                        LIST_ENTRY),
+        ("Vdm",                             PVOID),
+        ("ReservedForNtRpc",                PVOID),
+        ("DbgSsReserved",                   PVOID * 2),
+        ("HardErrorsAreDisabled",           DWORD),
+        ("Instrumentation",                 PVOID * 16),
+        ("WinSockData",                     PVOID),
+        ("GdiBatchCount",                   DWORD),
+        ("InDbgPrint",                      BOOLEAN),
+        ("FreeStackOnTermination",          BOOLEAN),
+        ("HasFiberData",                    BOOLEAN),
+        ("IdealProcessor",                  UCHAR),
+        ("Spare3",                          DWORD),
+        ("ReservedForPerf",                 PVOID),
+        ("ReservedForOle",                  PVOID),
+        ("WaitingOnLoaderLock",             DWORD),
+        ("Wx86Thread",                      Wx86ThreadState),
+        ("TlsExpansionSlots",               PVOID), # Ptr32 Ptr32 Void
+        ("ImpersonationLocale",             DWORD),
+        ("IsImpersonating",                 BOOL),
+        ("NlsCache",                        PVOID),
+        ("pShimData",                       PVOID),
+        ("HeapVirtualAffinity",             DWORD),
+        ("CurrentTransactionHandle",        HANDLE),
+        ("ActiveFrame",                     PVOID), # PTEB_ACTIVE_FRAME
+        ("SafeThunkCall",                   BOOLEAN),
+        ("BooleanSpare",                    BOOLEAN * 3),
+]
+
+#    +0x000 NtTib            : _NT_TIB
+#    +0x038 EnvironmentPointer : Ptr64 Void
+#    +0x040 ClientId         : _CLIENT_ID
+#    +0x050 ActiveRpcHandle  : Ptr64 Void
+#    +0x058 ThreadLocalStoragePointer : Ptr64 Void
+#    +0x060 ProcessEnvironmentBlock : Ptr64 _PEB
+#    +0x068 LastErrorValue   : Uint4B
+#    +0x06c CountOfOwnedCriticalSections : Uint4B
+#    +0x070 CsrClientThread  : Ptr64 Void
+#    +0x078 Win32ThreadInfo  : Ptr64 Void
+#    +0x080 User32Reserved   : [26] Uint4B
+#    +0x0e8 UserReserved     : [5] Uint4B
+#    +0x100 WOW32Reserved    : Ptr64 Void
+#    +0x108 CurrentLocale    : Uint4B
+#    +0x10c FpSoftwareStatusRegister : Uint4B
+#    +0x110 SystemReserved1  : [54] Ptr64 Void
+#    +0x2c0 ExceptionCode    : Int4B
+#    +0x2c8 ActivationContextStackPointer : Ptr64 _ACTIVATION_CONTEXT_STACK
+#    +0x2d0 SpareBytes1      : [28] UChar
+#    +0x2f0 GdiTebBatch      : _GDI_TEB_BATCH
+#    +0x7d8 RealClientId     : _CLIENT_ID
+#    +0x7e8 GdiCachedProcessHandle : Ptr64 Void
+#    +0x7f0 GdiClientPID     : Uint4B
+#    +0x7f4 GdiClientTID     : Uint4B
+#    +0x7f8 GdiThreadLocalInfo : Ptr64 Void
+#    +0x800 Win32ClientInfo  : [62] Uint8B
+#    +0x9f0 glDispatchTable  : [233] Ptr64 Void
+#    +0x1138 glReserved1      : [29] Uint8B
+#    +0x1220 glReserved2      : Ptr64 Void
+#    +0x1228 glSectionInfo    : Ptr64 Void
+#    +0x1230 glSection        : Ptr64 Void
+#    +0x1238 glTable          : Ptr64 Void
+#    +0x1240 glCurrentRC      : Ptr64 Void
+#    +0x1248 glContext        : Ptr64 Void
+#    +0x1250 LastStatusValue  : Uint4B
+#    +0x1258 StaticUnicodeString : _UNICODE_STRING
+#    +0x1268 StaticUnicodeBuffer : [261] Uint2B
+#    +0x1478 DeallocationStack : Ptr64 Void
+#    +0x1480 TlsSlots         : [64] Ptr64 Void
+#    +0x1680 TlsLinks         : _LIST_ENTRY
+#    +0x1690 Vdm              : Ptr64 Void
+#    +0x1698 ReservedForNtRpc : Ptr64 Void
+#    +0x16a0 DbgSsReserved    : [2] Ptr64 Void
+#    +0x16b0 HardErrorMode    : Uint4B
+#    +0x16b8 Instrumentation  : [14] Ptr64 Void
+#    +0x1728 SubProcessTag    : Ptr64 Void
+#    +0x1730 EtwTraceData     : Ptr64 Void
+#    +0x1738 WinSockData      : Ptr64 Void
+#    +0x1740 GdiBatchCount    : Uint4B
+#    +0x1744 InDbgPrint       : UChar
+#    +0x1745 FreeStackOnTermination : UChar
+#    +0x1746 HasFiberData     : UChar
+#    +0x1747 IdealProcessor   : UChar
+#    +0x1748 GuaranteedStackBytes : Uint4B
+#    +0x1750 ReservedForPerf  : Ptr64 Void
+#    +0x1758 ReservedForOle   : Ptr64 Void
+#    +0x1760 WaitingOnLoaderLock : Uint4B
+#    +0x1768 SparePointer1    : Uint8B
+#    +0x1770 SoftPatchPtr1    : Uint8B
+#    +0x1778 SoftPatchPtr2    : Uint8B
+#    +0x1780 TlsExpansionSlots : Ptr64 Ptr64 Void
+#    +0x1788 DeallocationBStore : Ptr64 Void
+#    +0x1790 BStoreLimit      : Ptr64 Void
+#    +0x1798 ImpersonationLocale : Uint4B
+#    +0x179c IsImpersonating  : Uint4B
+#    +0x17a0 NlsCache         : Ptr64 Void
+#    +0x17a8 pShimData        : Ptr64 Void
+#    +0x17b0 HeapVirtualAffinity : Uint4B
+#    +0x17b8 CurrentTransactionHandle : Ptr64 Void
+#    +0x17c0 ActiveFrame      : Ptr64 _TEB_ACTIVE_FRAME
+#    +0x17c8 FlsData          : Ptr64 Void
+#    +0x17d0 SafeThunkCall    : UChar
+#    +0x17d1 BooleanSpare     : [3] UChar
+class _TEB_XP_64(Structure):
+    _pack_ = 8
+    _fields_ = [
+        ("NtTib",                           NT_TIB),
+        ("EnvironmentPointer",              PVOID),
+        ("ClientId",                        CLIENT_ID),
+        ("ActiveRpcHandle",                 PVOID),
+        ("ThreadLocalStoragePointer",       PVOID),
+        ("ProcessEnvironmentBlock",         PVOID), # PPEB
+        ("LastErrorValue",                  DWORD),
+        ("CountOfOwnedCriticalSections",    DWORD),
+        ("CsrClientThread",                 PVOID),
+        ("Win32ThreadInfo",                 PVOID),
+        ("User32Reserved",                  DWORD * 26),
+        ("UserReserved",                    DWORD * 5),
+        ("WOW32Reserved",                   PVOID), # ptr to wow64cpu!X86SwitchTo64BitMode
+        ("CurrentLocale",                   DWORD),
+        ("FpSoftwareStatusRegister",        DWORD),
+        ("SystemReserved1",                 PVOID * 54),
+        ("ExceptionCode",                   SDWORD),
+        ("ActivationContextStackPointer",   PVOID), # PACTIVATION_CONTEXT_STACK
+        ("SpareBytes1",                     UCHAR * 28),
+        ("GdiTebBatch",                     GDI_TEB_BATCH),
+        ("RealClientId",                    CLIENT_ID),
+        ("GdiCachedProcessHandle",          HANDLE),
+        ("GdiClientPID",                    DWORD),
+        ("GdiClientTID",                    DWORD),
+        ("GdiThreadLocalInfo",              PVOID),
+        ("Win32ClientInfo",                 QWORD * 62),
+        ("glDispatchTable",                 PVOID * 233),
+        ("glReserved1",                     QWORD * 29),
+        ("glReserved2",                     PVOID),
+        ("glSectionInfo",                   PVOID),
+        ("glSection",                       PVOID),
+        ("glTable",                         PVOID),
+        ("glCurrentRC",                     PVOID),
+        ("glContext",                       PVOID),
+        ("LastStatusValue",                 NTSTATUS),
+        ("StaticUnicodeString",             UNICODE_STRING),
+        ("StaticUnicodeBuffer",             WCHAR * 261),
+        ("DeallocationStack",               PVOID),
+        ("TlsSlots",                        PVOID * 64),
+        ("TlsLinks",                        LIST_ENTRY),
+        ("Vdm",                             PVOID),
+        ("ReservedForNtRpc",                PVOID),
+        ("DbgSsReserved",                   PVOID * 2),
+        ("HardErrorMode",                   DWORD),
+        ("Instrumentation",                 PVOID * 14),
+        ("SubProcessTag",                   PVOID),
+        ("EtwTraceData",                    PVOID),
+        ("WinSockData",                     PVOID),
+        ("GdiBatchCount",                   DWORD),
+        ("InDbgPrint",                      BOOLEAN),
+        ("FreeStackOnTermination",          BOOLEAN),
+        ("HasFiberData",                    BOOLEAN),
+        ("IdealProcessor",                  UCHAR),
+        ("GuaranteedStackBytes",            DWORD),
+        ("ReservedForPerf",                 PVOID),
+        ("ReservedForOle",                  PVOID),
+        ("WaitingOnLoaderLock",             DWORD),
+        ("SparePointer1",                   PVOID),
+        ("SoftPatchPtr1",                   PVOID),
+        ("SoftPatchPtr2",                   PVOID),
+        ("TlsExpansionSlots",               PVOID), # Ptr64 Ptr64 Void
+        ("DeallocationBStore",              PVOID),
+        ("BStoreLimit",                     PVOID),
+        ("ImpersonationLocale",             DWORD),
+        ("IsImpersonating",                 BOOL),
+        ("NlsCache",                        PVOID),
+        ("pShimData",                       PVOID),
+        ("HeapVirtualAffinity",             DWORD),
+        ("CurrentTransactionHandle",        HANDLE),
+        ("ActiveFrame",                     PVOID), # PTEB_ACTIVE_FRAME
+        ("FlsData",                         PVOID),
+        ("SafeThunkCall",                   BOOLEAN),
+        ("BooleanSpare",                    BOOLEAN * 3),
+]
+
+#    +0x000 NtTib            : _NT_TIB
+#    +0x01c EnvironmentPointer : Ptr32 Void
+#    +0x020 ClientId         : _CLIENT_ID
+#    +0x028 ActiveRpcHandle  : Ptr32 Void
+#    +0x02c ThreadLocalStoragePointer : Ptr32 Void
+#    +0x030 ProcessEnvironmentBlock : Ptr32 _PEB
+#    +0x034 LastErrorValue   : Uint4B
+#    +0x038 CountOfOwnedCriticalSections : Uint4B
+#    +0x03c CsrClientThread  : Ptr32 Void
+#    +0x040 Win32ThreadInfo  : Ptr32 Void
+#    +0x044 User32Reserved   : [26] Uint4B
+#    +0x0ac UserReserved     : [5] Uint4B
+#    +0x0c0 WOW32Reserved    : Ptr32 Void
+#    +0x0c4 CurrentLocale    : Uint4B
+#    +0x0c8 FpSoftwareStatusRegister : Uint4B
+#    +0x0cc SystemReserved1  : [54] Ptr32 Void
+#    +0x1a4 ExceptionCode    : Int4B
+#    +0x1a8 ActivationContextStackPointer : Ptr32 _ACTIVATION_CONTEXT_STACK
+#    +0x1ac SpareBytes1      : [40] UChar
+#    +0x1d4 GdiTebBatch      : _GDI_TEB_BATCH
+#    +0x6b4 RealClientId     : _CLIENT_ID
+#    +0x6bc GdiCachedProcessHandle : Ptr32 Void
+#    +0x6c0 GdiClientPID     : Uint4B
+#    +0x6c4 GdiClientTID     : Uint4B
+#    +0x6c8 GdiThreadLocalInfo : Ptr32 Void
+#    +0x6cc Win32ClientInfo  : [62] Uint4B
+#    +0x7c4 glDispatchTable  : [233] Ptr32 Void
+#    +0xb68 glReserved1      : [29] Uint4B
+#    +0xbdc glReserved2      : Ptr32 Void
+#    +0xbe0 glSectionInfo    : Ptr32 Void
+#    +0xbe4 glSection        : Ptr32 Void
+#    +0xbe8 glTable          : Ptr32 Void
+#    +0xbec glCurrentRC      : Ptr32 Void
+#    +0xbf0 glContext        : Ptr32 Void
+#    +0xbf4 LastStatusValue  : Uint4B
+#    +0xbf8 StaticUnicodeString : _UNICODE_STRING
+#    +0xc00 StaticUnicodeBuffer : [261] Uint2B
+#    +0xe0c DeallocationStack : Ptr32 Void
+#    +0xe10 TlsSlots         : [64] Ptr32 Void
+#    +0xf10 TlsLinks         : _LIST_ENTRY
+#    +0xf18 Vdm              : Ptr32 Void
+#    +0xf1c ReservedForNtRpc : Ptr32 Void
+#    +0xf20 DbgSsReserved    : [2] Ptr32 Void
+#    +0xf28 HardErrorMode    : Uint4B
+#    +0xf2c Instrumentation  : [14] Ptr32 Void
+#    +0xf64 SubProcessTag    : Ptr32 Void
+#    +0xf68 EtwTraceData     : Ptr32 Void
+#    +0xf6c WinSockData      : Ptr32 Void
+#    +0xf70 GdiBatchCount    : Uint4B
+#    +0xf74 InDbgPrint       : UChar
+#    +0xf75 FreeStackOnTermination : UChar
+#    +0xf76 HasFiberData     : UChar
+#    +0xf77 IdealProcessor   : UChar
+#    +0xf78 GuaranteedStackBytes : Uint4B
+#    +0xf7c ReservedForPerf  : Ptr32 Void
+#    +0xf80 ReservedForOle   : Ptr32 Void
+#    +0xf84 WaitingOnLoaderLock : Uint4B
+#    +0xf88 SparePointer1    : Uint4B
+#    +0xf8c SoftPatchPtr1    : Uint4B
+#    +0xf90 SoftPatchPtr2    : Uint4B
+#    +0xf94 TlsExpansionSlots : Ptr32 Ptr32 Void
+#    +0xf98 ImpersonationLocale : Uint4B
+#    +0xf9c IsImpersonating  : Uint4B
+#    +0xfa0 NlsCache         : Ptr32 Void
+#    +0xfa4 pShimData        : Ptr32 Void
+#    +0xfa8 HeapVirtualAffinity : Uint4B
+#    +0xfac CurrentTransactionHandle : Ptr32 Void
+#    +0xfb0 ActiveFrame      : Ptr32 _TEB_ACTIVE_FRAME
+#    +0xfb4 FlsData          : Ptr32 Void
+#    +0xfb8 SafeThunkCall    : UChar
+#    +0xfb9 BooleanSpare     : [3] UChar
+class _TEB_2003(Structure):
+    _pack_ = 8
+    _fields_ = [
+        ("NtTib",                           NT_TIB),
+        ("EnvironmentPointer",              PVOID),
+        ("ClientId",                        CLIENT_ID),
+        ("ActiveRpcHandle",                 HANDLE),
+        ("ThreadLocalStoragePointer",       PVOID),
+        ("ProcessEnvironmentBlock",         PVOID), # PPEB
+        ("LastErrorValue",                  DWORD),
+        ("CountOfOwnedCriticalSections",    DWORD),
+        ("CsrClientThread",                 PVOID),
+        ("Win32ThreadInfo",                 PVOID),
+        ("User32Reserved",                  DWORD * 26),
+        ("UserReserved",                    DWORD * 5),
+        ("WOW32Reserved",                   PVOID), # ptr to wow64cpu!X86SwitchTo64BitMode
+        ("CurrentLocale",                   DWORD),
+        ("FpSoftwareStatusRegister",        DWORD),
+        ("SystemReserved1",                 PVOID * 54),
+        ("ExceptionCode",                   SDWORD),
+        ("ActivationContextStackPointer",   PVOID), # PACTIVATION_CONTEXT_STACK
+        ("SpareBytes1",                     UCHAR * 40),
+        ("GdiTebBatch",                     GDI_TEB_BATCH),
+        ("RealClientId",                    CLIENT_ID),
+        ("GdiCachedProcessHandle",          HANDLE),
+        ("GdiClientPID",                    DWORD),
+        ("GdiClientTID",                    DWORD),
+        ("GdiThreadLocalInfo",              PVOID),
+        ("Win32ClientInfo",                 DWORD * 62),
+        ("glDispatchTable",                 PVOID * 233),
+        ("glReserved1",                     DWORD * 29),
+        ("glReserved2",                     PVOID),
+        ("glSectionInfo",                   PVOID),
+        ("glSection",                       PVOID),
+        ("glTable",                         PVOID),
+        ("glCurrentRC",                     PVOID),
+        ("glContext",                       PVOID),
+        ("LastStatusValue",                 NTSTATUS),
+        ("StaticUnicodeString",             UNICODE_STRING),
+        ("StaticUnicodeBuffer",             WCHAR * 261),
+        ("DeallocationStack",               PVOID),
+        ("TlsSlots",                        PVOID * 64),
+        ("TlsLinks",                        LIST_ENTRY),
+        ("Vdm",                             PVOID),
+        ("ReservedForNtRpc",                PVOID),
+        ("DbgSsReserved",                   PVOID * 2),
+        ("HardErrorMode",                   DWORD),
+        ("Instrumentation",                 PVOID * 14),
+        ("SubProcessTag",                   PVOID),
+        ("EtwTraceData",                    PVOID),
+        ("WinSockData",                     PVOID),
+        ("GdiBatchCount",                   DWORD),
+        ("InDbgPrint",                      BOOLEAN),
+        ("FreeStackOnTermination",          BOOLEAN),
+        ("HasFiberData",                    BOOLEAN),
+        ("IdealProcessor",                  UCHAR),
+        ("GuaranteedStackBytes",            DWORD),
+        ("ReservedForPerf",                 PVOID),
+        ("ReservedForOle",                  PVOID),
+        ("WaitingOnLoaderLock",             DWORD),
+        ("SparePointer1",                   PVOID),
+        ("SoftPatchPtr1",                   PVOID),
+        ("SoftPatchPtr2",                   PVOID),
+        ("TlsExpansionSlots",               PVOID), # Ptr32 Ptr32 Void
+        ("ImpersonationLocale",             DWORD),
+        ("IsImpersonating",                 BOOL),
+        ("NlsCache",                        PVOID),
+        ("pShimData",                       PVOID),
+        ("HeapVirtualAffinity",             DWORD),
+        ("CurrentTransactionHandle",        HANDLE),
+        ("ActiveFrame",                     PVOID), # PTEB_ACTIVE_FRAME
+        ("FlsData",                         PVOID),
+        ("SafeThunkCall",                   BOOLEAN),
+        ("BooleanSpare",                    BOOLEAN * 3),
+]
+
+_TEB_2003_64    = _TEB_XP_64
+_TEB_2003_R2    = _TEB_2003
+_TEB_2003_R2_64 = _TEB_2003_64
+
+#    +0x000 NtTib            : _NT_TIB
+#    +0x01c EnvironmentPointer : Ptr32 Void
+#    +0x020 ClientId         : _CLIENT_ID
+#    +0x028 ActiveRpcHandle  : Ptr32 Void
+#    +0x02c ThreadLocalStoragePointer : Ptr32 Void
+#    +0x030 ProcessEnvironmentBlock : Ptr32 _PEB
+#    +0x034 LastErrorValue   : Uint4B
+#    +0x038 CountOfOwnedCriticalSections : Uint4B
+#    +0x03c CsrClientThread  : Ptr32 Void
+#    +0x040 Win32ThreadInfo  : Ptr32 Void
+#    +0x044 User32Reserved   : [26] Uint4B
+#    +0x0ac UserReserved     : [5] Uint4B
+#    +0x0c0 WOW32Reserved    : Ptr32 Void
+#    +0x0c4 CurrentLocale    : Uint4B
+#    +0x0c8 FpSoftwareStatusRegister : Uint4B
+#    +0x0cc SystemReserved1  : [54] Ptr32 Void
+#    +0x1a4 ExceptionCode    : Int4B
+#    +0x1a8 ActivationContextStackPointer : Ptr32 _ACTIVATION_CONTEXT_STACK
+#    +0x1ac SpareBytes1      : [36] UChar
+#    +0x1d0 TxFsContext      : Uint4B
+#    +0x1d4 GdiTebBatch      : _GDI_TEB_BATCH
+#    +0x6b4 RealClientId     : _CLIENT_ID
+#    +0x6bc GdiCachedProcessHandle : Ptr32 Void
+#    +0x6c0 GdiClientPID     : Uint4B
+#    +0x6c4 GdiClientTID     : Uint4B
+#    +0x6c8 GdiThreadLocalInfo : Ptr32 Void
+#    +0x6cc Win32ClientInfo  : [62] Uint4B
+#    +0x7c4 glDispatchTable  : [233] Ptr32 Void
+#    +0xb68 glReserved1      : [29] Uint4B
+#    +0xbdc glReserved2      : Ptr32 Void
+#    +0xbe0 glSectionInfo    : Ptr32 Void
+#    +0xbe4 glSection        : Ptr32 Void
+#    +0xbe8 glTable          : Ptr32 Void
+#    +0xbec glCurrentRC      : Ptr32 Void
+#    +0xbf0 glContext        : Ptr32 Void
+#    +0xbf4 LastStatusValue  : Uint4B
+#    +0xbf8 StaticUnicodeString : _UNICODE_STRING
+#    +0xc00 StaticUnicodeBuffer : [261] Wchar
+#    +0xe0c DeallocationStack : Ptr32 Void
+#    +0xe10 TlsSlots         : [64] Ptr32 Void
+#    +0xf10 TlsLinks         : _LIST_ENTRY
+#    +0xf18 Vdm              : Ptr32 Void
+#    +0xf1c ReservedForNtRpc : Ptr32 Void
+#    +0xf20 DbgSsReserved    : [2] Ptr32 Void
+#    +0xf28 HardErrorMode    : Uint4B
+#    +0xf2c Instrumentation  : [9] Ptr32 Void
+#    +0xf50 ActivityId       : _GUID
+#    +0xf60 SubProcessTag    : Ptr32 Void
+#    +0xf64 EtwLocalData     : Ptr32 Void
+#    +0xf68 EtwTraceData     : Ptr32 Void
+#    +0xf6c WinSockData      : Ptr32 Void
+#    +0xf70 GdiBatchCount    : Uint4B
+#    +0xf74 SpareBool0       : UChar
+#    +0xf75 SpareBool1       : UChar
+#    +0xf76 SpareBool2       : UChar
+#    +0xf77 IdealProcessor   : UChar
+#    +0xf78 GuaranteedStackBytes : Uint4B
+#    +0xf7c ReservedForPerf  : Ptr32 Void
+#    +0xf80 ReservedForOle   : Ptr32 Void
+#    +0xf84 WaitingOnLoaderLock : Uint4B
+#    +0xf88 SavedPriorityState : Ptr32 Void
+#    +0xf8c SoftPatchPtr1    : Uint4B
+#    +0xf90 ThreadPoolData   : Ptr32 Void
+#    +0xf94 TlsExpansionSlots : Ptr32 Ptr32 Void
+#    +0xf98 ImpersonationLocale : Uint4B
+#    +0xf9c IsImpersonating  : Uint4B
+#    +0xfa0 NlsCache         : Ptr32 Void
+#    +0xfa4 pShimData        : Ptr32 Void
+#    +0xfa8 HeapVirtualAffinity : Uint4B
+#    +0xfac CurrentTransactionHandle : Ptr32 Void
+#    +0xfb0 ActiveFrame      : Ptr32 _TEB_ACTIVE_FRAME
+#    +0xfb4 FlsData          : Ptr32 Void
+#    +0xfb8 PreferredLanguages : Ptr32 Void
+#    +0xfbc UserPrefLanguages : Ptr32 Void
+#    +0xfc0 MergedPrefLanguages : Ptr32 Void
+#    +0xfc4 MuiImpersonation : Uint4B
+#    +0xfc8 CrossTebFlags    : Uint2B
+#    +0xfc8 SpareCrossTebBits : Pos 0, 16 Bits
+#    +0xfca SameTebFlags     : Uint2B
+#    +0xfca DbgSafeThunkCall : Pos 0, 1 Bit
+#    +0xfca DbgInDebugPrint  : Pos 1, 1 Bit
+#    +0xfca DbgHasFiberData  : Pos 2, 1 Bit
+#    +0xfca DbgSkipThreadAttach : Pos 3, 1 Bit
+#    +0xfca DbgWerInShipAssertCode : Pos 4, 1 Bit
+#    +0xfca DbgRanProcessInit : Pos 5, 1 Bit
+#    +0xfca DbgClonedThread  : Pos 6, 1 Bit
+#    +0xfca DbgSuppressDebugMsg : Pos 7, 1 Bit
+#    +0xfca RtlDisableUserStackWalk : Pos 8, 1 Bit
+#    +0xfca RtlExceptionAttached : Pos 9, 1 Bit
+#    +0xfca SpareSameTebBits : Pos 10, 6 Bits
+#    +0xfcc TxnScopeEnterCallback : Ptr32 Void
+#    +0xfd0 TxnScopeExitCallback : Ptr32 Void
+#    +0xfd4 TxnScopeContext  : Ptr32 Void
+#    +0xfd8 LockCount        : Uint4B
+#    +0xfdc ProcessRundown   : Uint4B
+#    +0xfe0 LastSwitchTime   : Uint8B
+#    +0xfe8 TotalSwitchOutTime : Uint8B
+#    +0xff0 WaitReasonBitMap : _LARGE_INTEGER
+class _TEB_2008(Structure):
+    _pack_ = 8
+    _fields_ = [
+        ("NtTib",                           NT_TIB),
+        ("EnvironmentPointer",              PVOID),
+        ("ClientId",                        CLIENT_ID),
+        ("ActiveRpcHandle",                 HANDLE),
+        ("ThreadLocalStoragePointer",       PVOID),
+        ("ProcessEnvironmentBlock",         PVOID), # PPEB
+        ("LastErrorValue",                  DWORD),
+        ("CountOfOwnedCriticalSections",    DWORD),
+        ("CsrClientThread",                 PVOID),
+        ("Win32ThreadInfo",                 PVOID),
+        ("User32Reserved",                  DWORD * 26),
+        ("UserReserved",                    DWORD * 5),
+        ("WOW32Reserved",                   PVOID), # ptr to wow64cpu!X86SwitchTo64BitMode
+        ("CurrentLocale",                   DWORD),
+        ("FpSoftwareStatusRegister",        DWORD),
+        ("SystemReserved1",                 PVOID * 54),
+        ("ExceptionCode",                   SDWORD),
+        ("ActivationContextStackPointer",   PVOID), # PACTIVATION_CONTEXT_STACK
+        ("SpareBytes1",                     UCHAR * 36),
+        ("TxFsContext",                     DWORD),
+        ("GdiTebBatch",                     GDI_TEB_BATCH),
+        ("RealClientId",                    CLIENT_ID),
+        ("GdiCachedProcessHandle",          HANDLE),
+        ("GdiClientPID",                    DWORD),
+        ("GdiClientTID",                    DWORD),
+        ("GdiThreadLocalInfo",              PVOID),
+        ("Win32ClientInfo",                 DWORD * 62),
+        ("glDispatchTable",                 PVOID * 233),
+        ("glReserved1",                     DWORD * 29),
+        ("glReserved2",                     PVOID),
+        ("glSectionInfo",                   PVOID),
+        ("glSection",                       PVOID),
+        ("glTable",                         PVOID),
+        ("glCurrentRC",                     PVOID),
+        ("glContext",                       PVOID),
+        ("LastStatusValue",                 NTSTATUS),
+        ("StaticUnicodeString",             UNICODE_STRING),
+        ("StaticUnicodeBuffer",             WCHAR * 261),
+        ("DeallocationStack",               PVOID),
+        ("TlsSlots",                        PVOID * 64),
+        ("TlsLinks",                        LIST_ENTRY),
+        ("Vdm",                             PVOID),
+        ("ReservedForNtRpc",                PVOID),
+        ("DbgSsReserved",                   PVOID * 2),
+        ("HardErrorMode",                   DWORD),
+        ("Instrumentation",                 PVOID * 9),
+        ("ActivityId",                      GUID),
+        ("SubProcessTag",                   PVOID),
+        ("EtwLocalData",                    PVOID),
+        ("EtwTraceData",                    PVOID),
+        ("WinSockData",                     PVOID),
+        ("GdiBatchCount",                   DWORD),
+        ("SpareBool0",                      BOOLEAN),
+        ("SpareBool1",                      BOOLEAN),
+        ("SpareBool2",                      BOOLEAN),
+        ("IdealProcessor",                  UCHAR),
+        ("GuaranteedStackBytes",            DWORD),
+        ("ReservedForPerf",                 PVOID),
+        ("ReservedForOle",                  PVOID),
+        ("WaitingOnLoaderLock",             DWORD),
+        ("SavedPriorityState",              PVOID),
+        ("SoftPatchPtr1",                   PVOID),
+        ("ThreadPoolData",                  PVOID),
+        ("TlsExpansionSlots",               PVOID), # Ptr32 Ptr32 Void
+        ("ImpersonationLocale",             DWORD),
+        ("IsImpersonating",                 BOOL),
+        ("NlsCache",                        PVOID),
+        ("pShimData",                       PVOID),
+        ("HeapVirtualAffinity",             DWORD),
+        ("CurrentTransactionHandle",        HANDLE),
+        ("ActiveFrame",                     PVOID), # PTEB_ACTIVE_FRAME
+        ("FlsData",                         PVOID),
+        ("PreferredLanguages",              PVOID),
+        ("UserPrefLanguages",               PVOID),
+        ("MergedPrefLanguages",             PVOID),
+        ("MuiImpersonation",                BOOL),
+        ("CrossTebFlags",                   WORD),
+        ("SameTebFlags",                    WORD),
+        ("TxnScopeEnterCallback",           PVOID),
+        ("TxnScopeExitCallback",            PVOID),
+        ("TxnScopeContext",                 PVOID),
+        ("LockCount",                       DWORD),
+        ("ProcessRundown",                  DWORD),
+        ("LastSwitchTime",                  QWORD),
+        ("TotalSwitchOutTime",              QWORD),
+        ("WaitReasonBitMap",                LONGLONG),  # LARGE_INTEGER
+]
+
+#    +0x000 NtTib            : _NT_TIB
+#    +0x038 EnvironmentPointer : Ptr64 Void
+#    +0x040 ClientId         : _CLIENT_ID
+#    +0x050 ActiveRpcHandle  : Ptr64 Void
+#    +0x058 ThreadLocalStoragePointer : Ptr64 Void
+#    +0x060 ProcessEnvironmentBlock : Ptr64 _PEB
+#    +0x068 LastErrorValue   : Uint4B
+#    +0x06c CountOfOwnedCriticalSections : Uint4B
+#    +0x070 CsrClientThread  : Ptr64 Void
+#    +0x078 Win32ThreadInfo  : Ptr64 Void
+#    +0x080 User32Reserved   : [26] Uint4B
+#    +0x0e8 UserReserved     : [5] Uint4B
+#    +0x100 WOW32Reserved    : Ptr64 Void
+#    +0x108 CurrentLocale    : Uint4B
+#    +0x10c FpSoftwareStatusRegister : Uint4B
+#    +0x110 SystemReserved1  : [54] Ptr64 Void
+#    +0x2c0 ExceptionCode    : Int4B
+#    +0x2c8 ActivationContextStackPointer : Ptr64 _ACTIVATION_CONTEXT_STACK
+#    +0x2d0 SpareBytes1      : [24] UChar
+#    +0x2e8 TxFsContext      : Uint4B
+#    +0x2f0 GdiTebBatch      : _GDI_TEB_BATCH
+#    +0x7d8 RealClientId     : _CLIENT_ID
+#    +0x7e8 GdiCachedProcessHandle : Ptr64 Void
+#    +0x7f0 GdiClientPID     : Uint4B
+#    +0x7f4 GdiClientTID     : Uint4B
+#    +0x7f8 GdiThreadLocalInfo : Ptr64 Void
+#    +0x800 Win32ClientInfo  : [62] Uint8B
+#    +0x9f0 glDispatchTable  : [233] Ptr64 Void
+#    +0x1138 glReserved1      : [29] Uint8B
+#    +0x1220 glReserved2      : Ptr64 Void
+#    +0x1228 glSectionInfo    : Ptr64 Void
+#    +0x1230 glSection        : Ptr64 Void
+#    +0x1238 glTable          : Ptr64 Void
+#    +0x1240 glCurrentRC      : Ptr64 Void
+#    +0x1248 glContext        : Ptr64 Void
+#    +0x1250 LastStatusValue  : Uint4B
+#    +0x1258 StaticUnicodeString : _UNICODE_STRING
+#    +0x1268 StaticUnicodeBuffer : [261] Wchar
+#    +0x1478 DeallocationStack : Ptr64 Void
+#    +0x1480 TlsSlots         : [64] Ptr64 Void
+#    +0x1680 TlsLinks         : _LIST_ENTRY
+#    +0x1690 Vdm              : Ptr64 Void
+#    +0x1698 ReservedForNtRpc : Ptr64 Void
+#    +0x16a0 DbgSsReserved    : [2] Ptr64 Void
+#    +0x16b0 HardErrorMode    : Uint4B
+#    +0x16b8 Instrumentation  : [11] Ptr64 Void
+#    +0x1710 ActivityId       : _GUID
+#    +0x1720 SubProcessTag    : Ptr64 Void
+#    +0x1728 EtwLocalData     : Ptr64 Void
+#    +0x1730 EtwTraceData     : Ptr64 Void
+#    +0x1738 WinSockData      : Ptr64 Void
+#    +0x1740 GdiBatchCount    : Uint4B
+#    +0x1744 SpareBool0       : UChar
+#    +0x1745 SpareBool1       : UChar
+#    +0x1746 SpareBool2       : UChar
+#    +0x1747 IdealProcessor   : UChar
+#    +0x1748 GuaranteedStackBytes : Uint4B
+#    +0x1750 ReservedForPerf  : Ptr64 Void
+#    +0x1758 ReservedForOle   : Ptr64 Void
+#    +0x1760 WaitingOnLoaderLock : Uint4B
+#    +0x1768 SavedPriorityState : Ptr64 Void
+#    +0x1770 SoftPatchPtr1    : Uint8B
+#    +0x1778 ThreadPoolData   : Ptr64 Void
+#    +0x1780 TlsExpansionSlots : Ptr64 Ptr64 Void
+#    +0x1788 DeallocationBStore : Ptr64 Void
+#    +0x1790 BStoreLimit      : Ptr64 Void
+#    +0x1798 ImpersonationLocale : Uint4B
+#    +0x179c IsImpersonating  : Uint4B
+#    +0x17a0 NlsCache         : Ptr64 Void
+#    +0x17a8 pShimData        : Ptr64 Void
+#    +0x17b0 HeapVirtualAffinity : Uint4B
+#    +0x17b8 CurrentTransactionHandle : Ptr64 Void
+#    +0x17c0 ActiveFrame      : Ptr64 _TEB_ACTIVE_FRAME
+#    +0x17c8 FlsData          : Ptr64 Void
+#    +0x17d0 PreferredLanguages : Ptr64 Void
+#    +0x17d8 UserPrefLanguages : Ptr64 Void
+#    +0x17e0 MergedPrefLanguages : Ptr64 Void
+#    +0x17e8 MuiImpersonation : Uint4B
+#    +0x17ec CrossTebFlags    : Uint2B
+#    +0x17ec SpareCrossTebBits : Pos 0, 16 Bits
+#    +0x17ee SameTebFlags     : Uint2B
+#    +0x17ee DbgSafeThunkCall : Pos 0, 1 Bit
+#    +0x17ee DbgInDebugPrint  : Pos 1, 1 Bit
+#    +0x17ee DbgHasFiberData  : Pos 2, 1 Bit
+#    +0x17ee DbgSkipThreadAttach : Pos 3, 1 Bit
+#    +0x17ee DbgWerInShipAssertCode : Pos 4, 1 Bit
+#    +0x17ee DbgRanProcessInit : Pos 5, 1 Bit
+#    +0x17ee DbgClonedThread  : Pos 6, 1 Bit
+#    +0x17ee DbgSuppressDebugMsg : Pos 7, 1 Bit
+#    +0x17ee RtlDisableUserStackWalk : Pos 8, 1 Bit
+#    +0x17ee RtlExceptionAttached : Pos 9, 1 Bit
+#    +0x17ee SpareSameTebBits : Pos 10, 6 Bits
+#    +0x17f0 TxnScopeEnterCallback : Ptr64 Void
+#    +0x17f8 TxnScopeExitCallback : Ptr64 Void
+#    +0x1800 TxnScopeContext  : Ptr64 Void
+#    +0x1808 LockCount        : Uint4B
+#    +0x180c ProcessRundown   : Uint4B
+#    +0x1810 LastSwitchTime   : Uint8B
+#    +0x1818 TotalSwitchOutTime : Uint8B
+#    +0x1820 WaitReasonBitMap : _LARGE_INTEGER
+class _TEB_2008_64(Structure):
+    _pack_ = 8
+    _fields_ = [
+        ("NtTib",                           NT_TIB),
+        ("EnvironmentPointer",              PVOID),
+        ("ClientId",                        CLIENT_ID),
+        ("ActiveRpcHandle",                 HANDLE),
+        ("ThreadLocalStoragePointer",       PVOID),
+        ("ProcessEnvironmentBlock",         PVOID), # PPEB
+        ("LastErrorValue",                  DWORD),
+        ("CountOfOwnedCriticalSections",    DWORD),
+        ("CsrClientThread",                 PVOID),
+        ("Win32ThreadInfo",                 PVOID),
+        ("User32Reserved",                  DWORD * 26),
+        ("UserReserved",                    DWORD * 5),
+        ("WOW32Reserved",                   PVOID), # ptr to wow64cpu!X86SwitchTo64BitMode
+        ("CurrentLocale",                   DWORD),
+        ("FpSoftwareStatusRegister",        DWORD),
+        ("SystemReserved1",                 PVOID * 54),
+        ("ExceptionCode",                   SDWORD),
+        ("ActivationContextStackPointer",   PVOID), # PACTIVATION_CONTEXT_STACK
+        ("SpareBytes1",                     UCHAR * 24),
+        ("TxFsContext",                     DWORD),
+        ("GdiTebBatch",                     GDI_TEB_BATCH),
+        ("RealClientId",                    CLIENT_ID),
+        ("GdiCachedProcessHandle",          HANDLE),
+        ("GdiClientPID",                    DWORD),
+        ("GdiClientTID",                    DWORD),
+        ("GdiThreadLocalInfo",              PVOID),
+        ("Win32ClientInfo",                 QWORD * 62),
+        ("glDispatchTable",                 PVOID * 233),
+        ("glReserved1",                     QWORD * 29),
+        ("glReserved2",                     PVOID),
+        ("glSectionInfo",                   PVOID),
+        ("glSection",                       PVOID),
+        ("glTable",                         PVOID),
+        ("glCurrentRC",                     PVOID),
+        ("glContext",                       PVOID),
+        ("LastStatusValue",                 NTSTATUS),
+        ("StaticUnicodeString",             UNICODE_STRING),
+        ("StaticUnicodeBuffer",             WCHAR * 261),
+        ("DeallocationStack",               PVOID),
+        ("TlsSlots",                        PVOID * 64),
+        ("TlsLinks",                        LIST_ENTRY),
+        ("Vdm",                             PVOID),
+        ("ReservedForNtRpc",                PVOID),
+        ("DbgSsReserved",                   PVOID * 2),
+        ("HardErrorMode",                   DWORD),
+        ("Instrumentation",                 PVOID * 11),
+        ("ActivityId",                      GUID),
+        ("SubProcessTag",                   PVOID),
+        ("EtwLocalData",                    PVOID),
+        ("EtwTraceData",                    PVOID),
+        ("WinSockData",                     PVOID),
+        ("GdiBatchCount",                   DWORD),
+        ("SpareBool0",                      BOOLEAN),
+        ("SpareBool1",                      BOOLEAN),
+        ("SpareBool2",                      BOOLEAN),
+        ("IdealProcessor",                  UCHAR),
+        ("GuaranteedStackBytes",            DWORD),
+        ("ReservedForPerf",                 PVOID),
+        ("ReservedForOle",                  PVOID),
+        ("WaitingOnLoaderLock",             DWORD),
+        ("SavedPriorityState",              PVOID),
+        ("SoftPatchPtr1",                   PVOID),
+        ("ThreadPoolData",                  PVOID),
+        ("TlsExpansionSlots",               PVOID), # Ptr64 Ptr64 Void
+        ("DeallocationBStore",              PVOID),
+        ("BStoreLimit",                     PVOID),
+        ("ImpersonationLocale",             DWORD),
+        ("IsImpersonating",                 BOOL),
+        ("NlsCache",                        PVOID),
+        ("pShimData",                       PVOID),
+        ("HeapVirtualAffinity",             DWORD),
+        ("CurrentTransactionHandle",        HANDLE),
+        ("ActiveFrame",                     PVOID), # PTEB_ACTIVE_FRAME
+        ("FlsData",                         PVOID),
+        ("PreferredLanguages",              PVOID),
+        ("UserPrefLanguages",               PVOID),
+        ("MergedPrefLanguages",             PVOID),
+        ("MuiImpersonation",                BOOL),
+        ("CrossTebFlags",                   WORD),
+        ("SameTebFlags",                    WORD),
+        ("TxnScopeEnterCallback",           PVOID),
+        ("TxnScopeExitCallback",            PVOID),
+        ("TxnScopeContext",                 PVOID),
+        ("LockCount",                       DWORD),
+        ("ProcessRundown",                  DWORD),
+        ("LastSwitchTime",                  QWORD),
+        ("TotalSwitchOutTime",              QWORD),
+        ("WaitReasonBitMap",                LONGLONG),  # LARGE_INTEGER
+]
+
+#    +0x000 NtTib            : _NT_TIB
+#    +0x01c EnvironmentPointer : Ptr32 Void
+#    +0x020 ClientId         : _CLIENT_ID
+#    +0x028 ActiveRpcHandle  : Ptr32 Void
+#    +0x02c ThreadLocalStoragePointer : Ptr32 Void
+#    +0x030 ProcessEnvironmentBlock : Ptr32 _PEB
+#    +0x034 LastErrorValue   : Uint4B
+#    +0x038 CountOfOwnedCriticalSections : Uint4B
+#    +0x03c CsrClientThread  : Ptr32 Void
+#    +0x040 Win32ThreadInfo  : Ptr32 Void
+#    +0x044 User32Reserved   : [26] Uint4B
+#    +0x0ac UserReserved     : [5] Uint4B
+#    +0x0c0 WOW32Reserved    : Ptr32 Void
+#    +0x0c4 CurrentLocale    : Uint4B
+#    +0x0c8 FpSoftwareStatusRegister : Uint4B
+#    +0x0cc SystemReserved1  : [54] Ptr32 Void
+#    +0x1a4 ExceptionCode    : Int4B
+#    +0x1a8 ActivationContextStackPointer : Ptr32 _ACTIVATION_CONTEXT_STACK
+#    +0x1ac SpareBytes       : [36] UChar
+#    +0x1d0 TxFsContext      : Uint4B
+#    +0x1d4 GdiTebBatch      : _GDI_TEB_BATCH
+#    +0x6b4 RealClientId     : _CLIENT_ID
+#    +0x6bc GdiCachedProcessHandle : Ptr32 Void
+#    +0x6c0 GdiClientPID     : Uint4B
+#    +0x6c4 GdiClientTID     : Uint4B
+#    +0x6c8 GdiThreadLocalInfo : Ptr32 Void
+#    +0x6cc Win32ClientInfo  : [62] Uint4B
+#    +0x7c4 glDispatchTable  : [233] Ptr32 Void
+#    +0xb68 glReserved1      : [29] Uint4B
+#    +0xbdc glReserved2      : Ptr32 Void
+#    +0xbe0 glSectionInfo    : Ptr32 Void
+#    +0xbe4 glSection        : Ptr32 Void
+#    +0xbe8 glTable          : Ptr32 Void
+#    +0xbec glCurrentRC      : Ptr32 Void
+#    +0xbf0 glContext        : Ptr32 Void
+#    +0xbf4 LastStatusValue  : Uint4B
+#    +0xbf8 StaticUnicodeString : _UNICODE_STRING
+#    +0xc00 StaticUnicodeBuffer : [261] Wchar
+#    +0xe0c DeallocationStack : Ptr32 Void
+#    +0xe10 TlsSlots         : [64] Ptr32 Void
+#    +0xf10 TlsLinks         : _LIST_ENTRY
+#    +0xf18 Vdm              : Ptr32 Void
+#    +0xf1c ReservedForNtRpc : Ptr32 Void
+#    +0xf20 DbgSsReserved    : [2] Ptr32 Void
+#    +0xf28 HardErrorMode    : Uint4B
+#    +0xf2c Instrumentation  : [9] Ptr32 Void
+#    +0xf50 ActivityId       : _GUID
+#    +0xf60 SubProcessTag    : Ptr32 Void
+#    +0xf64 EtwLocalData     : Ptr32 Void
+#    +0xf68 EtwTraceData     : Ptr32 Void
+#    +0xf6c WinSockData      : Ptr32 Void
+#    +0xf70 GdiBatchCount    : Uint4B
+#    +0xf74 CurrentIdealProcessor : _PROCESSOR_NUMBER
+#    +0xf74 IdealProcessorValue : Uint4B
+#    +0xf74 ReservedPad0     : UChar
+#    +0xf75 ReservedPad1     : UChar
+#    +0xf76 ReservedPad2     : UChar
+#    +0xf77 IdealProcessor   : UChar
+#    +0xf78 GuaranteedStackBytes : Uint4B
+#    +0xf7c ReservedForPerf  : Ptr32 Void
+#    +0xf80 ReservedForOle   : Ptr32 Void
+#    +0xf84 WaitingOnLoaderLock : Uint4B
+#    +0xf88 SavedPriorityState : Ptr32 Void
+#    +0xf8c SoftPatchPtr1    : Uint4B
+#    +0xf90 ThreadPoolData   : Ptr32 Void
+#    +0xf94 TlsExpansionSlots : Ptr32 Ptr32 Void
+#    +0xf98 MuiGeneration    : Uint4B
+#    +0xf9c IsImpersonating  : Uint4B
+#    +0xfa0 NlsCache         : Ptr32 Void
+#    +0xfa4 pShimData        : Ptr32 Void
+#    +0xfa8 HeapVirtualAffinity : Uint4B
+#    +0xfac CurrentTransactionHandle : Ptr32 Void
+#    +0xfb0 ActiveFrame      : Ptr32 _TEB_ACTIVE_FRAME
+#    +0xfb4 FlsData          : Ptr32 Void
+#    +0xfb8 PreferredLanguages : Ptr32 Void
+#    +0xfbc UserPrefLanguages : Ptr32 Void
+#    +0xfc0 MergedPrefLanguages : Ptr32 Void
+#    +0xfc4 MuiImpersonation : Uint4B
+#    +0xfc8 CrossTebFlags    : Uint2B
+#    +0xfc8 SpareCrossTebBits : Pos 0, 16 Bits
+#    +0xfca SameTebFlags     : Uint2B
+#    +0xfca SafeThunkCall    : Pos 0, 1 Bit
+#    +0xfca InDebugPrint     : Pos 1, 1 Bit
+#    +0xfca HasFiberData     : Pos 2, 1 Bit
+#    +0xfca SkipThreadAttach : Pos 3, 1 Bit
+#    +0xfca WerInShipAssertCode : Pos 4, 1 Bit
+#    +0xfca RanProcessInit   : Pos 5, 1 Bit
+#    +0xfca ClonedThread     : Pos 6, 1 Bit
+#    +0xfca SuppressDebugMsg : Pos 7, 1 Bit
+#    +0xfca DisableUserStackWalk : Pos 8, 1 Bit
+#    +0xfca RtlExceptionAttached : Pos 9, 1 Bit
+#    +0xfca InitialThread    : Pos 10, 1 Bit
+#    +0xfca SpareSameTebBits : Pos 11, 5 Bits
+#    +0xfcc TxnScopeEnterCallback : Ptr32 Void
+#    +0xfd0 TxnScopeExitCallback : Ptr32 Void
+#    +0xfd4 TxnScopeContext  : Ptr32 Void
+#    +0xfd8 LockCount        : Uint4B
+#    +0xfdc SpareUlong0      : Uint4B
+#    +0xfe0 ResourceRetValue : Ptr32 Void
+class _TEB_2008_R2(Structure):
+    _pack_ = 8
+    _fields_ = [
+        ("NtTib",                           NT_TIB),
+        ("EnvironmentPointer",              PVOID),
+        ("ClientId",                        CLIENT_ID),
+        ("ActiveRpcHandle",                 HANDLE),
+        ("ThreadLocalStoragePointer",       PVOID),
+        ("ProcessEnvironmentBlock",         PVOID), # PPEB
+        ("LastErrorValue",                  DWORD),
+        ("CountOfOwnedCriticalSections",    DWORD),
+        ("CsrClientThread",                 PVOID),
+        ("Win32ThreadInfo",                 PVOID),
+        ("User32Reserved",                  DWORD * 26),
+        ("UserReserved",                    DWORD * 5),
+        ("WOW32Reserved",                   PVOID), # ptr to wow64cpu!X86SwitchTo64BitMode
+        ("CurrentLocale",                   DWORD),
+        ("FpSoftwareStatusRegister",        DWORD),
+        ("SystemReserved1",                 PVOID * 54),
+        ("ExceptionCode",                   SDWORD),
+        ("ActivationContextStackPointer",   PVOID), # PACTIVATION_CONTEXT_STACK
+        ("SpareBytes",                      UCHAR * 36),
+        ("TxFsContext",                     DWORD),
+        ("GdiTebBatch",                     GDI_TEB_BATCH),
+        ("RealClientId",                    CLIENT_ID),
+        ("GdiCachedProcessHandle",          HANDLE),
+        ("GdiClientPID",                    DWORD),
+        ("GdiClientTID",                    DWORD),
+        ("GdiThreadLocalInfo",              PVOID),
+        ("Win32ClientInfo",                 DWORD * 62),
+        ("glDispatchTable",                 PVOID * 233),
+        ("glReserved1",                     DWORD * 29),
+        ("glReserved2",                     PVOID),
+        ("glSectionInfo",                   PVOID),
+        ("glSection",                       PVOID),
+        ("glTable",                         PVOID),
+        ("glCurrentRC",                     PVOID),
+        ("glContext",                       PVOID),
+        ("LastStatusValue",                 NTSTATUS),
+        ("StaticUnicodeString",             UNICODE_STRING),
+        ("StaticUnicodeBuffer",             WCHAR * 261),
+        ("DeallocationStack",               PVOID),
+        ("TlsSlots",                        PVOID * 64),
+        ("TlsLinks",                        LIST_ENTRY),
+        ("Vdm",                             PVOID),
+        ("ReservedForNtRpc",                PVOID),
+        ("DbgSsReserved",                   PVOID * 2),
+        ("HardErrorMode",                   DWORD),
+        ("Instrumentation",                 PVOID * 9),
+        ("ActivityId",                      GUID),
+        ("SubProcessTag",                   PVOID),
+        ("EtwLocalData",                    PVOID),
+        ("EtwTraceData",                    PVOID),
+        ("WinSockData",                     PVOID),
+        ("GdiBatchCount",                   DWORD),
+        ("CurrentIdealProcessor",           PROCESSOR_NUMBER),
+        ("IdealProcessorValue",             DWORD),
+        ("ReservedPad0",                    UCHAR),
+        ("ReservedPad1",                    UCHAR),
+        ("ReservedPad2",                    UCHAR),
+        ("IdealProcessor",                  UCHAR),
+        ("GuaranteedStackBytes",            DWORD),
+        ("ReservedForPerf",                 PVOID),
+        ("ReservedForOle",                  PVOID),
+        ("WaitingOnLoaderLock",             DWORD),
+        ("SavedPriorityState",              PVOID),
+        ("SoftPatchPtr1",                   PVOID),
+        ("ThreadPoolData",                  PVOID),
+        ("TlsExpansionSlots",               PVOID), # Ptr32 Ptr32 Void
+        ("MuiGeneration",                   DWORD),
+        ("IsImpersonating",                 BOOL),
+        ("NlsCache",                        PVOID),
+        ("pShimData",                       PVOID),
+        ("HeapVirtualAffinity",             DWORD),
+        ("CurrentTransactionHandle",        HANDLE),
+        ("ActiveFrame",                     PVOID), # PTEB_ACTIVE_FRAME
+        ("FlsData",                         PVOID),
+        ("PreferredLanguages",              PVOID),
+        ("UserPrefLanguages",               PVOID),
+        ("MergedPrefLanguages",             PVOID),
+        ("MuiImpersonation",                BOOL),
+        ("CrossTebFlags",                   WORD),
+        ("SameTebFlags",                    WORD),
+        ("TxnScopeEnterCallback",           PVOID),
+        ("TxnScopeExitCallback",            PVOID),
+        ("TxnScopeContext",                 PVOID),
+        ("LockCount",                       DWORD),
+        ("SpareUlong0",                     ULONG),
+        ("ResourceRetValue",                PVOID),
+]
+
+#    +0x000 NtTib            : _NT_TIB
+#    +0x038 EnvironmentPointer : Ptr64 Void
+#    +0x040 ClientId         : _CLIENT_ID
+#    +0x050 ActiveRpcHandle  : Ptr64 Void
+#    +0x058 ThreadLocalStoragePointer : Ptr64 Void
+#    +0x060 ProcessEnvironmentBlock : Ptr64 _PEB
+#    +0x068 LastErrorValue   : Uint4B
+#    +0x06c CountOfOwnedCriticalSections : Uint4B
+#    +0x070 CsrClientThread  : Ptr64 Void
+#    +0x078 Win32ThreadInfo  : Ptr64 Void
+#    +0x080 User32Reserved   : [26] Uint4B
+#    +0x0e8 UserReserved     : [5] Uint4B
+#    +0x100 WOW32Reserved    : Ptr64 Void
+#    +0x108 CurrentLocale    : Uint4B
+#    +0x10c FpSoftwareStatusRegister : Uint4B
+#    +0x110 SystemReserved1  : [54] Ptr64 Void
+#    +0x2c0 ExceptionCode    : Int4B
+#    +0x2c8 ActivationContextStackPointer : Ptr64 _ACTIVATION_CONTEXT_STACK
+#    +0x2d0 SpareBytes       : [24] UChar
+#    +0x2e8 TxFsContext      : Uint4B
+#    +0x2f0 GdiTebBatch      : _GDI_TEB_BATCH
+#    +0x7d8 RealClientId     : _CLIENT_ID
+#    +0x7e8 GdiCachedProcessHandle : Ptr64 Void
+#    +0x7f0 GdiClientPID     : Uint4B
+#    +0x7f4 GdiClientTID     : Uint4B
+#    +0x7f8 GdiThreadLocalInfo : Ptr64 Void
+#    +0x800 Win32ClientInfo  : [62] Uint8B
+#    +0x9f0 glDispatchTable  : [233] Ptr64 Void
+#    +0x1138 glReserved1      : [29] Uint8B
+#    +0x1220 glReserved2      : Ptr64 Void
+#    +0x1228 glSectionInfo    : Ptr64 Void
+#    +0x1230 glSection        : Ptr64 Void
+#    +0x1238 glTable          : Ptr64 Void
+#    +0x1240 glCurrentRC      : Ptr64 Void
+#    +0x1248 glContext        : Ptr64 Void
+#    +0x1250 LastStatusValue  : Uint4B
+#    +0x1258 StaticUnicodeString : _UNICODE_STRING
+#    +0x1268 StaticUnicodeBuffer : [261] Wchar
+#    +0x1478 DeallocationStack : Ptr64 Void
+#    +0x1480 TlsSlots         : [64] Ptr64 Void
+#    +0x1680 TlsLinks         : _LIST_ENTRY
+#    +0x1690 Vdm              : Ptr64 Void
+#    +0x1698 ReservedForNtRpc : Ptr64 Void
+#    +0x16a0 DbgSsReserved    : [2] Ptr64 Void
+#    +0x16b0 HardErrorMode    : Uint4B
+#    +0x16b8 Instrumentation  : [11] Ptr64 Void
+#    +0x1710 ActivityId       : _GUID
+#    +0x1720 SubProcessTag    : Ptr64 Void
+#    +0x1728 EtwLocalData     : Ptr64 Void
+#    +0x1730 EtwTraceData     : Ptr64 Void
+#    +0x1738 WinSockData      : Ptr64 Void
+#    +0x1740 GdiBatchCount    : Uint4B
+#    +0x1744 CurrentIdealProcessor : _PROCESSOR_NUMBER
+#    +0x1744 IdealProcessorValue : Uint4B
+#    +0x1744 ReservedPad0     : UChar
+#    +0x1745 ReservedPad1     : UChar
+#    +0x1746 ReservedPad2     : UChar
+#    +0x1747 IdealProcessor   : UChar
+#    +0x1748 GuaranteedStackBytes : Uint4B
+#    +0x1750 ReservedForPerf  : Ptr64 Void
+#    +0x1758 ReservedForOle   : Ptr64 Void
+#    +0x1760 WaitingOnLoaderLock : Uint4B
+#    +0x1768 SavedPriorityState : Ptr64 Void
+#    +0x1770 SoftPatchPtr1    : Uint8B
+#    +0x1778 ThreadPoolData   : Ptr64 Void
+#    +0x1780 TlsExpansionSlots : Ptr64 Ptr64 Void
+#    +0x1788 DeallocationBStore : Ptr64 Void
+#    +0x1790 BStoreLimit      : Ptr64 Void
+#    +0x1798 MuiGeneration    : Uint4B
+#    +0x179c IsImpersonating  : Uint4B
+#    +0x17a0 NlsCache         : Ptr64 Void
+#    +0x17a8 pShimData        : Ptr64 Void
+#    +0x17b0 HeapVirtualAffinity : Uint4B
+#    +0x17b8 CurrentTransactionHandle : Ptr64 Void
+#    +0x17c0 ActiveFrame      : Ptr64 _TEB_ACTIVE_FRAME
+#    +0x17c8 FlsData          : Ptr64 Void
+#    +0x17d0 PreferredLanguages : Ptr64 Void
+#    +0x17d8 UserPrefLanguages : Ptr64 Void
+#    +0x17e0 MergedPrefLanguages : Ptr64 Void
+#    +0x17e8 MuiImpersonation : Uint4B
+#    +0x17ec CrossTebFlags    : Uint2B
+#    +0x17ec SpareCrossTebBits : Pos 0, 16 Bits
+#    +0x17ee SameTebFlags     : Uint2B
+#    +0x17ee SafeThunkCall    : Pos 0, 1 Bit
+#    +0x17ee InDebugPrint     : Pos 1, 1 Bit
+#    +0x17ee HasFiberData     : Pos 2, 1 Bit
+#    +0x17ee SkipThreadAttach : Pos 3, 1 Bit
+#    +0x17ee WerInShipAssertCode : Pos 4, 1 Bit
+#    +0x17ee RanProcessInit   : Pos 5, 1 Bit
+#    +0x17ee ClonedThread     : Pos 6, 1 Bit
+#    +0x17ee SuppressDebugMsg : Pos 7, 1 Bit
+#    +0x17ee DisableUserStackWalk : Pos 8, 1 Bit
+#    +0x17ee RtlExceptionAttached : Pos 9, 1 Bit
+#    +0x17ee InitialThread    : Pos 10, 1 Bit
+#    +0x17ee SpareSameTebBits : Pos 11, 5 Bits
+#    +0x17f0 TxnScopeEnterCallback : Ptr64 Void
+#    +0x17f8 TxnScopeExitCallback : Ptr64 Void
+#    +0x1800 TxnScopeContext  : Ptr64 Void
+#    +0x1808 LockCount        : Uint4B
+#    +0x180c SpareUlong0      : Uint4B
+#    +0x1810 ResourceRetValue : Ptr64 Void
+class _TEB_2008_R2_64(Structure):
+    _pack_ = 8
+    _fields_ = [
+        ("NtTib",                           NT_TIB),
+        ("EnvironmentPointer",              PVOID),
+        ("ClientId",                        CLIENT_ID),
+        ("ActiveRpcHandle",                 HANDLE),
+        ("ThreadLocalStoragePointer",       PVOID),
+        ("ProcessEnvironmentBlock",         PVOID), # PPEB
+        ("LastErrorValue",                  DWORD),
+        ("CountOfOwnedCriticalSections",    DWORD),
+        ("CsrClientThread",                 PVOID),
+        ("Win32ThreadInfo",                 PVOID),
+        ("User32Reserved",                  DWORD * 26),
+        ("UserReserved",                    DWORD * 5),
+        ("WOW32Reserved",                   PVOID), # ptr to wow64cpu!X86SwitchTo64BitMode
+        ("CurrentLocale",                   DWORD),
+        ("FpSoftwareStatusRegister",        DWORD),
+        ("SystemReserved1",                 PVOID * 54),
+        ("ExceptionCode",                   SDWORD),
+        ("ActivationContextStackPointer",   PVOID), # PACTIVATION_CONTEXT_STACK
+        ("SpareBytes",                      UCHAR * 24),
+        ("TxFsContext",                     DWORD),
+        ("GdiTebBatch",                     GDI_TEB_BATCH),
+        ("RealClientId",                    CLIENT_ID),
+        ("GdiCachedProcessHandle",          HANDLE),
+        ("GdiClientPID",                    DWORD),
+        ("GdiClientTID",                    DWORD),
+        ("GdiThreadLocalInfo",              PVOID),
+        ("Win32ClientInfo",                 DWORD * 62),
+        ("glDispatchTable",                 PVOID * 233),
+        ("glReserved1",                     QWORD * 29),
+        ("glReserved2",                     PVOID),
+        ("glSectionInfo",                   PVOID),
+        ("glSection",                       PVOID),
+        ("glTable",                         PVOID),
+        ("glCurrentRC",                     PVOID),
+        ("glContext",                       PVOID),
+        ("LastStatusValue",                 NTSTATUS),
+        ("StaticUnicodeString",             UNICODE_STRING),
+        ("StaticUnicodeBuffer",             WCHAR * 261),
+        ("DeallocationStack",               PVOID),
+        ("TlsSlots",                        PVOID * 64),
+        ("TlsLinks",                        LIST_ENTRY),
+        ("Vdm",                             PVOID),
+        ("ReservedForNtRpc",                PVOID),
+        ("DbgSsReserved",                   PVOID * 2),
+        ("HardErrorMode",                   DWORD),
+        ("Instrumentation",                 PVOID * 11),
+        ("ActivityId",                      GUID),
+        ("SubProcessTag",                   PVOID),
+        ("EtwLocalData",                    PVOID),
+        ("EtwTraceData",                    PVOID),
+        ("WinSockData",                     PVOID),
+        ("GdiBatchCount",                   DWORD),
+        ("CurrentIdealProcessor",           PROCESSOR_NUMBER),
+        ("IdealProcessorValue",             DWORD),
+        ("ReservedPad0",                    UCHAR),
+        ("ReservedPad1",                    UCHAR),
+        ("ReservedPad2",                    UCHAR),
+        ("IdealProcessor",                  UCHAR),
+        ("GuaranteedStackBytes",            DWORD),
+        ("ReservedForPerf",                 PVOID),
+        ("ReservedForOle",                  PVOID),
+        ("WaitingOnLoaderLock",             DWORD),
+        ("SavedPriorityState",              PVOID),
+        ("SoftPatchPtr1",                   PVOID),
+        ("ThreadPoolData",                  PVOID),
+        ("TlsExpansionSlots",               PVOID), # Ptr64 Ptr64 Void
+        ("DeallocationBStore",              PVOID),
+        ("BStoreLimit",                     PVOID),
+        ("MuiGeneration",                   DWORD),
+        ("IsImpersonating",                 BOOL),
+        ("NlsCache",                        PVOID),
+        ("pShimData",                       PVOID),
+        ("HeapVirtualAffinity",             DWORD),
+        ("CurrentTransactionHandle",        HANDLE),
+        ("ActiveFrame",                     PVOID), # PTEB_ACTIVE_FRAME
+        ("FlsData",                         PVOID),
+        ("PreferredLanguages",              PVOID),
+        ("UserPrefLanguages",               PVOID),
+        ("MergedPrefLanguages",             PVOID),
+        ("MuiImpersonation",                BOOL),
+        ("CrossTebFlags",                   WORD),
+        ("SameTebFlags",                    WORD),
+        ("TxnScopeEnterCallback",           PVOID),
+        ("TxnScopeExitCallback",            PVOID),
+        ("TxnScopeContext",                 PVOID),
+        ("LockCount",                       DWORD),
+        ("SpareUlong0",                     ULONG),
+        ("ResourceRetValue",                PVOID),
+]
+
+_TEB_Vista      = _TEB_2008
+_TEB_Vista_64   = _TEB_2008_64
+_TEB_W7         = _TEB_2008_R2
+_TEB_W7_64      = _TEB_2008_R2_64
+
+# Use the correct TEB structure definition.
+# Defaults to the latest Windows version.
+class TEB(Structure):
+    _pack_ = 8
+    if os == 'Windows NT':
+        _pack_   = _TEB_NT._pack_
+        _fields_ = _TEB_NT._fields_
+    elif os == 'Windows 2000':
+        _pack_   = _TEB_2000._pack_
+        _fields_ = _TEB_2000._fields_
+    elif os == 'Windows XP':
+        _fields_ = _TEB_XP._fields_
+    elif os == 'Windows XP (64 bits)':
+        _fields_ = _TEB_XP_64._fields_
+    elif os == 'Windows 2003':
+        _fields_ = _TEB_2003._fields_
+    elif os == 'Windows 2003 (64 bits)':
+        _fields_ = _TEB_2003_64._fields_
+    elif os == 'Windows 2008':
+        _fields_ = _TEB_2008._fields_
+    elif os == 'Windows 2008 (64 bits)':
+        _fields_ = _TEB_2008_64._fields_
+    elif os == 'Windows 2003 R2':
+        _fields_ = _TEB_2003_R2._fields_
+    elif os == 'Windows 2003 R2 (64 bits)':
+        _fields_ = _TEB_2003_R2_64._fields_
+    elif os == 'Windows 2008 R2':
+        _fields_ = _TEB_2008_R2._fields_
+    elif os == 'Windows 2008 R2 (64 bits)':
+        _fields_ = _TEB_2008_R2_64._fields_
+    elif os == 'Windows Vista':
+        _fields_ = _TEB_Vista._fields_
+    elif os == 'Windows Vista (64 bits)':
+        _fields_ = _TEB_Vista_64._fields_
+    elif os == 'Windows 7':
+        _fields_ = _TEB_W7._fields_
+    elif os == 'Windows 7 (64 bits)':
+        _fields_ = _TEB_W7_64._fields_
+    elif sizeof(SIZE_T) == sizeof(DWORD):
+        _fields_ = _TEB_W7._fields_
+    else:
+        _fields_ = _TEB_W7_64._fields_
+PTEB = POINTER(TEB)
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/psapi.py b/scripts/win32/psapi.py
new file mode 100644
index 0000000..bb5efd9
--- /dev/null
+++ b/scripts/win32/psapi.py
@@ -0,0 +1,385 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for psapi.dll in ctypes.
+"""
+
+from .defines import *  # NOQA
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- PSAPI structures and constants -------------------------------------------
+
+LIST_MODULES_DEFAULT    = 0x00
+LIST_MODULES_32BIT      = 0x01
+LIST_MODULES_64BIT      = 0x02
+LIST_MODULES_ALL        = 0x03
+
+# typedef struct _MODULEINFO {
+#   LPVOID lpBaseOfDll;
+#   DWORD  SizeOfImage;
+#   LPVOID EntryPoint;
+# } MODULEINFO, *LPMODULEINFO;
+class MODULEINFO(Structure):
+    _fields_ = [
+        ("lpBaseOfDll",     LPVOID),    # remote pointer
+        ("SizeOfImage",     DWORD),
+        ("EntryPoint",      LPVOID),    # remote pointer
+]
+LPMODULEINFO = POINTER(MODULEINFO)
+
+#--- psapi.dll ----------------------------------------------------------------
+
+# BOOL WINAPI EnumDeviceDrivers(
+#   __out  LPVOID *lpImageBase,
+#   __in   DWORD cb,
+#   __out  LPDWORD lpcbNeeded
+# );
+def EnumDeviceDrivers():
+    _EnumDeviceDrivers = windll.psapi.EnumDeviceDrivers
+    _EnumDeviceDrivers.argtypes = [LPVOID, DWORD, LPDWORD]
+    _EnumDeviceDrivers.restype = bool
+    _EnumDeviceDrivers.errcheck = RaiseIfZero
+
+    size       = 0x1000
+    lpcbNeeded = DWORD(size)
+    unit       = sizeof(LPVOID)
+    while 1:
+        lpImageBase = (LPVOID * (size // unit))()
+        _EnumDeviceDrivers(byref(lpImageBase), lpcbNeeded, byref(lpcbNeeded))
+        needed = lpcbNeeded.value
+        if needed <= size:
+            break
+        size = needed
+    return [ lpImageBase[index] for index in range(0, (needed // unit)) ]
+
+# BOOL WINAPI EnumProcesses(
+#   __out  DWORD *pProcessIds,
+#   __in   DWORD cb,
+#   __out  DWORD *pBytesReturned
+# );
+def EnumProcesses():
+    _EnumProcesses = windll.psapi.EnumProcesses
+    _EnumProcesses.argtypes = [LPVOID, DWORD, LPDWORD]
+    _EnumProcesses.restype = bool
+    _EnumProcesses.errcheck = RaiseIfZero
+
+    size            = 0x1000
+    cbBytesReturned = DWORD()
+    unit            = sizeof(DWORD)
+    while 1:
+        ProcessIds = (DWORD * (size // unit))()
+        cbBytesReturned.value = size
+        _EnumProcesses(byref(ProcessIds), cbBytesReturned, byref(cbBytesReturned))
+        returned = cbBytesReturned.value
+        if returned < size:
+            break
+        size = size + 0x1000
+    ProcessIdList = list()
+    for ProcessId in ProcessIds:
+        if ProcessId is None:
+            break
+        ProcessIdList.append(ProcessId)
+    return ProcessIdList
+
+# BOOL WINAPI EnumProcessModules(
+#   __in   HANDLE hProcess,
+#   __out  HMODULE *lphModule,
+#   __in   DWORD cb,
+#   __out  LPDWORD lpcbNeeded
+# );
+def EnumProcessModules(hProcess):
+    _EnumProcessModules = windll.psapi.EnumProcessModules
+    _EnumProcessModules.argtypes = [HANDLE, LPVOID, DWORD, LPDWORD]
+    _EnumProcessModules.restype = bool
+    _EnumProcessModules.errcheck = RaiseIfZero
+
+    size = 0x1000
+    lpcbNeeded = DWORD(size)
+    unit = sizeof(HMODULE)
+    while 1:
+        lphModule = (HMODULE * (size // unit))()
+        _EnumProcessModules(hProcess, byref(lphModule), lpcbNeeded, byref(lpcbNeeded))
+        needed = lpcbNeeded.value
+        if needed <= size:
+            break
+        size = needed
+    return [ lphModule[index] for index in range(0, int(needed // unit)) ]
+
+# BOOL WINAPI EnumProcessModulesEx(
+#   __in   HANDLE hProcess,
+#   __out  HMODULE *lphModule,
+#   __in   DWORD cb,
+#   __out  LPDWORD lpcbNeeded,
+#   __in   DWORD dwFilterFlag
+# );
+def EnumProcessModulesEx(hProcess, dwFilterFlag = LIST_MODULES_DEFAULT):
+    _EnumProcessModulesEx = windll.psapi.EnumProcessModulesEx
+    _EnumProcessModulesEx.argtypes = [HANDLE, LPVOID, DWORD, LPDWORD, DWORD]
+    _EnumProcessModulesEx.restype = bool
+    _EnumProcessModulesEx.errcheck = RaiseIfZero
+
+    size = 0x1000
+    lpcbNeeded = DWORD(size)
+    unit = sizeof(HMODULE)
+    while 1:
+        lphModule = (HMODULE * (size // unit))()
+        _EnumProcessModulesEx(hProcess, byref(lphModule), lpcbNeeded, byref(lpcbNeeded), dwFilterFlag)
+        needed = lpcbNeeded.value
+        if needed <= size:
+            break
+        size = needed
+    return [ lphModule[index] for index in range(0, (needed // unit)) ]
+
+# DWORD WINAPI GetDeviceDriverBaseName(
+#   __in   LPVOID ImageBase,
+#   __out  LPTSTR lpBaseName,
+#   __in   DWORD nSize
+# );
+def GetDeviceDriverBaseNameA(ImageBase):
+    _GetDeviceDriverBaseNameA = windll.psapi.GetDeviceDriverBaseNameA
+    _GetDeviceDriverBaseNameA.argtypes = [LPVOID, LPSTR, DWORD]
+    _GetDeviceDriverBaseNameA.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpBaseName = ctypes.create_string_buffer("", nSize)
+        nCopied = _GetDeviceDriverBaseNameA(ImageBase, lpBaseName, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpBaseName.value
+
+def GetDeviceDriverBaseNameW(ImageBase):
+    _GetDeviceDriverBaseNameW = windll.psapi.GetDeviceDriverBaseNameW
+    _GetDeviceDriverBaseNameW.argtypes = [LPVOID, LPWSTR, DWORD]
+    _GetDeviceDriverBaseNameW.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpBaseName = ctypes.create_unicode_buffer(u"", nSize)
+        nCopied = _GetDeviceDriverBaseNameW(ImageBase, lpBaseName, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpBaseName.value
+
+GetDeviceDriverBaseName = GuessStringType(GetDeviceDriverBaseNameA, GetDeviceDriverBaseNameW)
+
+# DWORD WINAPI GetDeviceDriverFileName(
+#   __in   LPVOID ImageBase,
+#   __out  LPTSTR lpFilename,
+#   __in   DWORD nSize
+# );
+def GetDeviceDriverFileNameA(ImageBase):
+    _GetDeviceDriverFileNameA = windll.psapi.GetDeviceDriverFileNameA
+    _GetDeviceDriverFileNameA.argtypes = [LPVOID, LPSTR, DWORD]
+    _GetDeviceDriverFileNameA.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_string_buffer("", nSize)
+        nCopied = ctypes.windll.psapi.GetDeviceDriverFileNameA(ImageBase, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value
+
+def GetDeviceDriverFileNameW(ImageBase):
+    _GetDeviceDriverFileNameW = windll.psapi.GetDeviceDriverFileNameW
+    _GetDeviceDriverFileNameW.argtypes = [LPVOID, LPWSTR, DWORD]
+    _GetDeviceDriverFileNameW.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_unicode_buffer(u"", nSize)
+        nCopied = ctypes.windll.psapi.GetDeviceDriverFileNameW(ImageBase, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value
+
+GetDeviceDriverFileName = GuessStringType(GetDeviceDriverFileNameA, GetDeviceDriverFileNameW)
+
+# DWORD WINAPI GetMappedFileName(
+#   __in   HANDLE hProcess,
+#   __in   LPVOID lpv,
+#   __out  LPTSTR lpFilename,
+#   __in   DWORD nSize
+# );
+def GetMappedFileNameA(hProcess, lpv):
+    _GetMappedFileNameA = ctypes.windll.psapi.GetMappedFileNameA
+    _GetMappedFileNameA.argtypes = [HANDLE, LPVOID, LPSTR, DWORD]
+    _GetMappedFileNameA.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_string_buffer("", nSize)
+        nCopied = _GetMappedFileNameA(hProcess, lpv, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value
+
+def GetMappedFileNameW(hProcess, lpv):
+    _GetMappedFileNameW = ctypes.windll.psapi.GetMappedFileNameW
+    _GetMappedFileNameW.argtypes = [HANDLE, LPVOID, LPWSTR, DWORD]
+    _GetMappedFileNameW.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_unicode_buffer(u"", nSize)
+        nCopied = _GetMappedFileNameW(hProcess, lpv, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value
+
+GetMappedFileName = GuessStringType(GetMappedFileNameA, GetMappedFileNameW)
+
+# DWORD WINAPI GetModuleFileNameEx(
+#   __in      HANDLE hProcess,
+#   __in_opt  HMODULE hModule,
+#   __out     LPTSTR lpFilename,
+#   __in      DWORD nSize
+# );
+def GetModuleFileNameExA(hProcess, hModule = None):
+    _GetModuleFileNameExA = ctypes.windll.psapi.GetModuleFileNameExA
+    _GetModuleFileNameExA.argtypes = [HANDLE, HMODULE, LPSTR, DWORD]
+    _GetModuleFileNameExA.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_string_buffer("", nSize)
+        nCopied = _GetModuleFileNameExA(hProcess, hModule, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value
+
+def GetModuleFileNameExW(hProcess, hModule = None):
+    _GetModuleFileNameExW = ctypes.windll.psapi.GetModuleFileNameExW
+    _GetModuleFileNameExW.argtypes = [HANDLE, HMODULE, LPWSTR, DWORD]
+    _GetModuleFileNameExW.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_unicode_buffer(u"", nSize)
+        nCopied = _GetModuleFileNameExW(hProcess, hModule, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value
+
+GetModuleFileNameEx = GuessStringType(GetModuleFileNameExA, GetModuleFileNameExW)
+
+# BOOL WINAPI GetModuleInformation(
+#   __in   HANDLE hProcess,
+#   __in   HMODULE hModule,
+#   __out  LPMODULEINFO lpmodinfo,
+#   __in   DWORD cb
+# );
+def GetModuleInformation(hProcess, hModule, lpmodinfo = None):
+    _GetModuleInformation = windll.psapi.GetModuleInformation
+    _GetModuleInformation.argtypes = [HANDLE, HMODULE, LPMODULEINFO, DWORD]
+    _GetModuleInformation.restype = bool
+    _GetModuleInformation.errcheck = RaiseIfZero
+
+    if lpmodinfo is None:
+        lpmodinfo = MODULEINFO()
+    _GetModuleInformation(hProcess, hModule, byref(lpmodinfo), sizeof(lpmodinfo))
+    return lpmodinfo
+
+# DWORD WINAPI GetProcessImageFileName(
+#   __in   HANDLE hProcess,
+#   __out  LPTSTR lpImageFileName,
+#   __in   DWORD nSize
+# );
+def GetProcessImageFileNameA(hProcess):
+    _GetProcessImageFileNameA = windll.psapi.GetProcessImageFileNameA
+    _GetProcessImageFileNameA.argtypes = [HANDLE, LPSTR, DWORD]
+    _GetProcessImageFileNameA.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_string_buffer("", nSize)
+        nCopied = _GetProcessImageFileNameA(hProcess, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value
+
+def GetProcessImageFileNameW(hProcess):
+    _GetProcessImageFileNameW = windll.psapi.GetProcessImageFileNameW
+    _GetProcessImageFileNameW.argtypes = [HANDLE, LPWSTR, DWORD]
+    _GetProcessImageFileNameW.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_unicode_buffer(u"", nSize)
+        nCopied = _GetProcessImageFileNameW(hProcess, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value
+
+GetProcessImageFileName = GuessStringType(GetProcessImageFileNameA, GetProcessImageFileNameW)
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/shell32.py b/scripts/win32/shell32.py
new file mode 100644
index 0000000..3f284cc
--- /dev/null
+++ b/scripts/win32/shell32.py
@@ -0,0 +1,406 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for shell32.dll in ctypes.
+"""
+
+# TODO
+# * Add a class wrapper to SHELLEXECUTEINFO
+# * More logic into ShellExecuteEx
+
+from .defines import *
+from .kernel32 import LocalFree
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- Constants ----------------------------------------------------------------
+
+SEE_MASK_DEFAULT            = 0x00000000
+SEE_MASK_CLASSNAME          = 0x00000001
+SEE_MASK_CLASSKEY           = 0x00000003
+SEE_MASK_IDLIST             = 0x00000004
+SEE_MASK_INVOKEIDLIST       = 0x0000000C
+SEE_MASK_ICON               = 0x00000010
+SEE_MASK_HOTKEY             = 0x00000020
+SEE_MASK_NOCLOSEPROCESS     = 0x00000040
+SEE_MASK_CONNECTNETDRV      = 0x00000080
+SEE_MASK_NOASYNC            = 0x00000100
+SEE_MASK_DOENVSUBST         = 0x00000200
+SEE_MASK_FLAG_NO_UI         = 0x00000400
+SEE_MASK_UNICODE            = 0x00004000
+SEE_MASK_NO_CONSOLE         = 0x00008000
+SEE_MASK_ASYNCOK            = 0x00100000
+SEE_MASK_HMONITOR           = 0x00200000
+SEE_MASK_NOZONECHECKS       = 0x00800000
+SEE_MASK_WAITFORINPUTIDLE   = 0x02000000
+SEE_MASK_FLAG_LOG_USAGE     = 0x04000000
+
+SE_ERR_FNF              = 2
+SE_ERR_PNF              = 3
+SE_ERR_ACCESSDENIED     = 5
+SE_ERR_OOM              = 8
+SE_ERR_DLLNOTFOUND      = 32
+SE_ERR_SHARE            = 26
+SE_ERR_ASSOCINCOMPLETE  = 27
+SE_ERR_DDETIMEOUT       = 28
+SE_ERR_DDEFAIL          = 29
+SE_ERR_DDEBUSY          = 30
+SE_ERR_NOASSOC          = 31
+
+SHGFP_TYPE_CURRENT = 0
+SHGFP_TYPE_DEFAULT = 1
+
+CSIDL_DESKTOP                   = 0x0000
+CSIDL_INTERNET                  = 0x0001
+CSIDL_PROGRAMS                  = 0x0002
+CSIDL_CONTROLS                  = 0x0003
+CSIDL_PRINTERS                  = 0x0004
+CSIDL_PERSONAL                  = 0x0005
+CSIDL_FAVORITES                 = 0x0006
+CSIDL_STARTUP                   = 0x0007
+CSIDL_RECENT                    = 0x0008
+CSIDL_SENDTO                    = 0x0009
+CSIDL_BITBUCKET                 = 0x000a
+CSIDL_STARTMENU                 = 0x000b
+CSIDL_MYDOCUMENTS               = CSIDL_PERSONAL
+CSIDL_MYMUSIC                   = 0x000d
+CSIDL_MYVIDEO                   = 0x000e
+CSIDL_DESKTOPDIRECTORY          = 0x0010
+CSIDL_DRIVES                    = 0x0011
+CSIDL_NETWORK                   = 0x0012
+CSIDL_NETHOOD                   = 0x0013
+CSIDL_FONTS                     = 0x0014
+CSIDL_TEMPLATES                 = 0x0015
+CSIDL_COMMON_STARTMENU          = 0x0016
+CSIDL_COMMON_PROGRAMS           = 0x0017
+CSIDL_COMMON_STARTUP            = 0x0018
+CSIDL_COMMON_DESKTOPDIRECTORY   = 0x0019
+CSIDL_APPDATA                   = 0x001a
+CSIDL_PRINTHOOD                 = 0x001b
+CSIDL_LOCAL_APPDATA             = 0x001c
+CSIDL_ALTSTARTUP                = 0x001d
+CSIDL_COMMON_ALTSTARTUP         = 0x001e
+CSIDL_COMMON_FAVORITES          = 0x001f
+CSIDL_INTERNET_CACHE            = 0x0020
+CSIDL_COOKIES                   = 0x0021
+CSIDL_HISTORY                   = 0x0022
+CSIDL_COMMON_APPDATA            = 0x0023
+CSIDL_WINDOWS                   = 0x0024
+CSIDL_SYSTEM                    = 0x0025
+CSIDL_PROGRAM_FILES             = 0x0026
+CSIDL_MYPICTURES                = 0x0027
+CSIDL_PROFILE                   = 0x0028
+CSIDL_SYSTEMX86                 = 0x0029
+CSIDL_PROGRAM_FILESX86          = 0x002a
+CSIDL_PROGRAM_FILES_COMMON      = 0x002b
+CSIDL_PROGRAM_FILES_COMMONX86   = 0x002c
+CSIDL_COMMON_TEMPLATES          = 0x002d
+CSIDL_COMMON_DOCUMENTS          = 0x002e
+CSIDL_COMMON_ADMINTOOLS         = 0x002f
+CSIDL_ADMINTOOLS                = 0x0030
+CSIDL_CONNECTIONS               = 0x0031
+CSIDL_COMMON_MUSIC              = 0x0035
+CSIDL_COMMON_PICTURES           = 0x0036
+CSIDL_COMMON_VIDEO              = 0x0037
+CSIDL_RESOURCES                 = 0x0038
+CSIDL_RESOURCES_LOCALIZED       = 0x0039
+CSIDL_COMMON_OEM_LINKS          = 0x003a
+CSIDL_CDBURN_AREA               = 0x003b
+CSIDL_COMPUTERSNEARME           = 0x003d
+CSIDL_PROFILES                  = 0x003e
+
+CSIDL_FOLDER_MASK               = 0x00ff
+
+CSIDL_FLAG_PER_USER_INIT        = 0x0800
+CSIDL_FLAG_NO_ALIAS             = 0x1000
+CSIDL_FLAG_DONT_VERIFY          = 0x4000
+CSIDL_FLAG_CREATE               = 0x8000
+
+CSIDL_FLAG_MASK                 = 0xff00
+
+#--- Structures ---------------------------------------------------------------
+
+# typedef struct _SHELLEXECUTEINFO {
+#   DWORD     cbSize;
+#   ULONG     fMask;
+#   HWND      hwnd;
+#   LPCTSTR   lpVerb;
+#   LPCTSTR   lpFile;
+#   LPCTSTR   lpParameters;
+#   LPCTSTR   lpDirectory;
+#   int       nShow;
+#   HINSTANCE hInstApp;
+#   LPVOID    lpIDList;
+#   LPCTSTR   lpClass;
+#   HKEY      hkeyClass;
+#   DWORD     dwHotKey;
+#   union {
+#     HANDLE hIcon;
+#     HANDLE hMonitor;
+#   } DUMMYUNIONNAME;
+#   HANDLE    hProcess;
+# } SHELLEXECUTEINFO, *LPSHELLEXECUTEINFO;
+
+class SHELLEXECUTEINFOA(Structure):
+    _fields_ = [
+        ("cbSize",       DWORD),
+        ("fMask",        ULONG),
+        ("hwnd",         HWND),
+        ("lpVerb",       LPSTR),
+        ("lpFile",       LPSTR),
+        ("lpParameters", LPSTR),
+        ("lpDirectory",  LPSTR),
+        ("nShow",        ctypes.c_int),
+        ("hInstApp",     HINSTANCE),
+        ("lpIDList",     LPVOID),
+        ("lpClass",      LPSTR),
+        ("hkeyClass",    HKEY),
+        ("dwHotKey",     DWORD),
+        ("hIcon",        HANDLE),
+        ("hProcess",     HANDLE),
+    ]
+
+    def __get_hMonitor(self):
+        return self.hIcon
+    def __set_hMonitor(self, hMonitor):
+        self.hIcon = hMonitor
+    hMonitor = property(__get_hMonitor, __set_hMonitor)
+
+class SHELLEXECUTEINFOW(Structure):
+    _fields_ = [
+        ("cbSize",       DWORD),
+        ("fMask",        ULONG),
+        ("hwnd",         HWND),
+        ("lpVerb",       LPWSTR),
+        ("lpFile",       LPWSTR),
+        ("lpParameters", LPWSTR),
+        ("lpDirectory",  LPWSTR),
+        ("nShow",        ctypes.c_int),
+        ("hInstApp",     HINSTANCE),
+        ("lpIDList",     LPVOID),
+        ("lpClass",      LPWSTR),
+        ("hkeyClass",    HKEY),
+        ("dwHotKey",     DWORD),
+        ("hIcon",        HANDLE),
+        ("hProcess",     HANDLE),
+    ]
+
+    def __get_hMonitor(self):
+        return self.hIcon
+    def __set_hMonitor(self, hMonitor):
+        self.hIcon = hMonitor
+    hMonitor = property(__get_hMonitor, __set_hMonitor)
+
+LPSHELLEXECUTEINFOA = POINTER(SHELLEXECUTEINFOA)
+LPSHELLEXECUTEINFOW = POINTER(SHELLEXECUTEINFOW)
+
+#--- shell32.dll --------------------------------------------------------------
+
+# LPWSTR *CommandLineToArgvW(
+#     LPCWSTR lpCmdLine,
+#     int *pNumArgs
+# );
+def CommandLineToArgvW(lpCmdLine):
+    _CommandLineToArgvW = windll.shell32.CommandLineToArgvW
+    _CommandLineToArgvW.argtypes = [LPVOID, POINTER(ctypes.c_int)]
+    _CommandLineToArgvW.restype  = LPVOID
+
+    if not lpCmdLine:
+        lpCmdLine = None
+    argc = ctypes.c_int(0)
+    vptr = ctypes.windll.shell32.CommandLineToArgvW(lpCmdLine, byref(argc))
+    if vptr == NULL:
+        raise ctypes.WinError()
+    argv = vptr
+    try:
+        argc = argc.value
+        if argc <= 0:
+            raise ctypes.WinError()
+        argv = ctypes.cast(argv, ctypes.POINTER(LPWSTR * argc) )
+        argv = [ argv.contents[i] for i in range(0, argc) ]
+    finally:
+        if vptr is not None:
+            LocalFree(vptr)
+    return argv
+
+def CommandLineToArgvA(lpCmdLine):
+    t_ansi = GuessStringType.t_ansi
+    t_unicode = GuessStringType.t_unicode
+    if isinstance(lpCmdLine, t_ansi):
+        cmdline = t_unicode(lpCmdLine)
+    else:
+        cmdline = lpCmdLine
+    return [t_ansi(x) for x in CommandLineToArgvW(cmdline)]
+
+CommandLineToArgv = GuessStringType(CommandLineToArgvA, CommandLineToArgvW)
+
+# HINSTANCE ShellExecute(
+#     HWND hwnd,
+#     LPCTSTR lpOperation,
+#     LPCTSTR lpFile,
+#     LPCTSTR lpParameters,
+#     LPCTSTR lpDirectory,
+#     INT nShowCmd
+# );
+def ShellExecuteA(hwnd = None, lpOperation = None, lpFile = None, lpParameters = None, lpDirectory = None, nShowCmd = None):
+    _ShellExecuteA = windll.shell32.ShellExecuteA
+    _ShellExecuteA.argtypes = [HWND, LPSTR, LPSTR, LPSTR, LPSTR, INT]
+    _ShellExecuteA.restype  = HINSTANCE
+
+    if not nShowCmd:
+        nShowCmd = 0
+    success = _ShellExecuteA(hwnd, lpOperation, lpFile, lpParameters, lpDirectory, nShowCmd)
+    success = ctypes.cast(success, c_int)
+    success = success.value
+    if not success > 32:    # weird! isn't it?
+        raise ctypes.WinError(success)
+
+def ShellExecuteW(hwnd = None, lpOperation = None, lpFile = None, lpParameters = None, lpDirectory = None, nShowCmd = None):
+    _ShellExecuteW = windll.shell32.ShellExecuteW
+    _ShellExecuteW.argtypes = [HWND, LPWSTR, LPWSTR, LPWSTR, LPWSTR, INT]
+    _ShellExecuteW.restype  = HINSTANCE
+
+    if not nShowCmd:
+        nShowCmd = 0
+    success = _ShellExecuteW(hwnd, lpOperation, lpFile, lpParameters, lpDirectory, nShowCmd)
+    success = ctypes.cast(success, c_int)
+    success = success.value
+    if not success > 32:    # weird! isn't it?
+        raise ctypes.WinError(success)
+
+ShellExecute = GuessStringType(ShellExecuteA, ShellExecuteW)
+
+# BOOL ShellExecuteEx(
+#   __inout  LPSHELLEXECUTEINFO lpExecInfo
+# );
+def ShellExecuteEx(lpExecInfo):
+    if isinstance(lpExecInfo, SHELLEXECUTEINFOA):
+        ShellExecuteExA(lpExecInfo)
+    elif isinstance(lpExecInfo, SHELLEXECUTEINFOW):
+        ShellExecuteExW(lpExecInfo)
+    else:
+        raise TypeError("Expected SHELLEXECUTEINFOA or SHELLEXECUTEINFOW, got %s instead" % type(lpExecInfo))
+
+def ShellExecuteExA(lpExecInfo):
+    _ShellExecuteExA = windll.shell32.ShellExecuteExA
+    _ShellExecuteExA.argtypes = [LPSHELLEXECUTEINFOA]
+    _ShellExecuteExA.restype  = BOOL
+    _ShellExecuteExA.errcheck = RaiseIfZero
+    _ShellExecuteExA(byref(lpExecInfo))
+
+def ShellExecuteExW(lpExecInfo):
+    _ShellExecuteExW = windll.shell32.ShellExecuteExW
+    _ShellExecuteExW.argtypes = [LPSHELLEXECUTEINFOW]
+    _ShellExecuteExW.restype  = BOOL
+    _ShellExecuteExW.errcheck = RaiseIfZero
+    _ShellExecuteExW(byref(lpExecInfo))
+
+# HINSTANCE FindExecutable(
+#   __in      LPCTSTR lpFile,
+#   __in_opt  LPCTSTR lpDirectory,
+#   __out     LPTSTR lpResult
+# );
+def FindExecutableA(lpFile, lpDirectory = None):
+    _FindExecutableA = windll.shell32.FindExecutableA
+    _FindExecutableA.argtypes = [LPSTR, LPSTR, LPSTR]
+    _FindExecutableA.restype  = HINSTANCE
+
+    lpResult = ctypes.create_string_buffer(MAX_PATH)
+    success = _FindExecutableA(lpFile, lpDirectory, lpResult)
+    success = ctypes.cast(success, ctypes.c_void_p)
+    success = success.value
+    if not success > 32:    # weird! isn't it?
+        raise ctypes.WinError(success)
+    return lpResult.value
+
+def FindExecutableW(lpFile, lpDirectory = None):
+    _FindExecutableW = windll.shell32.FindExecutableW
+    _FindExecutableW.argtypes = [LPWSTR, LPWSTR, LPWSTR]
+    _FindExecutableW.restype  = HINSTANCE
+
+    lpResult = ctypes.create_unicode_buffer(MAX_PATH)
+    success = _FindExecutableW(lpFile, lpDirectory, lpResult)
+    success = ctypes.cast(success, ctypes.c_void_p)
+    success = success.value
+    if not success > 32:    # weird! isn't it?
+        raise ctypes.WinError(success)
+    return lpResult.value
+
+FindExecutable = GuessStringType(FindExecutableA, FindExecutableW)
+
+# HRESULT SHGetFolderPath(
+#   __in   HWND hwndOwner,
+#   __in   int nFolder,
+#   __in   HANDLE hToken,
+#   __in   DWORD dwFlags,
+#   __out  LPTSTR pszPath
+# );
+def SHGetFolderPathA(nFolder, hToken = None, dwFlags = SHGFP_TYPE_CURRENT):
+    _SHGetFolderPathA = windll.shell32.SHGetFolderPathA     # shfolder.dll in older win versions
+    _SHGetFolderPathA.argtypes = [HWND, ctypes.c_int, HANDLE, DWORD, LPSTR]
+    _SHGetFolderPathA.restype  = HRESULT
+    _SHGetFolderPathA.errcheck = RaiseIfNotZero # S_OK == 0
+
+    pszPath = ctypes.create_string_buffer(MAX_PATH + 1)
+    _SHGetFolderPathA(None, nFolder, hToken, dwFlags, pszPath)
+    return pszPath.value
+
+def SHGetFolderPathW(nFolder, hToken = None, dwFlags = SHGFP_TYPE_CURRENT):
+    _SHGetFolderPathW = windll.shell32.SHGetFolderPathW     # shfolder.dll in older win versions
+    _SHGetFolderPathW.argtypes = [HWND, ctypes.c_int, HANDLE, DWORD, LPWSTR]
+    _SHGetFolderPathW.restype  = HRESULT
+    _SHGetFolderPathW.errcheck = RaiseIfNotZero # S_OK == 0
+
+    pszPath = ctypes.create_unicode_buffer(MAX_PATH + 1)
+    _SHGetFolderPathW(None, nFolder, hToken, dwFlags, pszPath)
+    return pszPath.value
+
+SHGetFolderPath = DefaultStringType(SHGetFolderPathA, SHGetFolderPathW)
+
+# BOOL IsUserAnAdmin(void);
+def IsUserAnAdmin():
+    # Supposedly, IsUserAnAdmin() is deprecated in Vista.
+    # But I tried it on Windows 7 and it works just fine.
+    _IsUserAnAdmin = windll.shell32.IsUserAnAdmin
+    _IsUserAnAdmin.argtypes = []
+    _IsUserAnAdmin.restype  = bool
+    return _IsUserAnAdmin()
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/shlwapi.py b/scripts/win32/shlwapi.py
new file mode 100644
index 0000000..5655eae
--- /dev/null
+++ b/scripts/win32/shlwapi.py
@@ -0,0 +1,754 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for shlwapi.dll in ctypes.
+"""
+
+from .defines import *
+from .kernel32 import *
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+OS_WINDOWS                  = 0
+OS_NT                       = 1
+OS_WIN95ORGREATER           = 2
+OS_NT4ORGREATER             = 3
+OS_WIN98ORGREATER           = 5
+OS_WIN98_GOLD               = 6
+OS_WIN2000ORGREATER         = 7
+OS_WIN2000PRO               = 8
+OS_WIN2000SERVER            = 9
+OS_WIN2000ADVSERVER         = 10
+OS_WIN2000DATACENTER        = 11
+OS_WIN2000TERMINAL          = 12
+OS_EMBEDDED                 = 13
+OS_TERMINALCLIENT           = 14
+OS_TERMINALREMOTEADMIN      = 15
+OS_WIN95_GOLD               = 16
+OS_MEORGREATER              = 17
+OS_XPORGREATER              = 18
+OS_HOME                     = 19
+OS_PROFESSIONAL             = 20
+OS_DATACENTER               = 21
+OS_ADVSERVER                = 22
+OS_SERVER                   = 23
+OS_TERMINALSERVER           = 24
+OS_PERSONALTERMINALSERVER   = 25
+OS_FASTUSERSWITCHING        = 26
+OS_WELCOMELOGONUI           = 27
+OS_DOMAINMEMBER             = 28
+OS_ANYSERVER                = 29
+OS_WOW6432                  = 30
+OS_WEBSERVER                = 31
+OS_SMALLBUSINESSSERVER      = 32
+OS_TABLETPC                 = 33
+OS_SERVERADMINUI            = 34
+OS_MEDIACENTER              = 35
+OS_APPLIANCE                = 36
+
+#--- shlwapi.dll --------------------------------------------------------------
+
+# BOOL IsOS(
+#     DWORD dwOS
+# );
+def IsOS(dwOS):
+    try:
+        _IsOS = windll.shlwapi.IsOS
+        _IsOS.argtypes = [DWORD]
+        _IsOS.restype  = bool
+    except AttributeError:
+        # According to MSDN, on Windows versions prior to Vista
+        # this function is exported only by ordinal number 437.
+        # http://msdn.microsoft.com/en-us/library/bb773795%28VS.85%29.aspx
+        _GetProcAddress = windll.kernel32.GetProcAddress
+        _GetProcAddress.argtypes = [HINSTANCE, DWORD]
+        _GetProcAddress.restype  = LPVOID
+        _IsOS = windll.kernel32.GetProcAddress(windll.shlwapi._handle, 437)
+        _IsOS = WINFUNCTYPE(bool, DWORD)(_IsOS)
+    return _IsOS(dwOS)
+
+# LPTSTR PathAddBackslash(
+#     LPTSTR lpszPath
+# );
+def PathAddBackslashA(lpszPath):
+    _PathAddBackslashA = windll.shlwapi.PathAddBackslashA
+    _PathAddBackslashA.argtypes = [LPSTR]
+    _PathAddBackslashA.restype  = LPSTR
+
+    lpszPath = ctypes.create_string_buffer(lpszPath, MAX_PATH)
+    retval = _PathAddBackslashA(lpszPath)
+    if retval == NULL:
+        raise ctypes.WinError()
+    return lpszPath.value
+
+def PathAddBackslashW(lpszPath):
+    _PathAddBackslashW = windll.shlwapi.PathAddBackslashW
+    _PathAddBackslashW.argtypes = [LPWSTR]
+    _PathAddBackslashW.restype  = LPWSTR
+
+    lpszPath = ctypes.create_unicode_buffer(lpszPath, MAX_PATH)
+    retval = _PathAddBackslashW(lpszPath)
+    if retval == NULL:
+        raise ctypes.WinError()
+    return lpszPath.value
+
+PathAddBackslash = GuessStringType(PathAddBackslashA, PathAddBackslashW)
+
+# BOOL PathAddExtension(
+#     LPTSTR pszPath,
+#     LPCTSTR pszExtension
+# );
+def PathAddExtensionA(lpszPath, pszExtension = None):
+    _PathAddExtensionA = windll.shlwapi.PathAddExtensionA
+    _PathAddExtensionA.argtypes = [LPSTR, LPSTR]
+    _PathAddExtensionA.restype  = bool
+    _PathAddExtensionA.errcheck = RaiseIfZero
+
+    if not pszExtension:
+        pszExtension = None
+    lpszPath = ctypes.create_string_buffer(lpszPath, MAX_PATH)
+    _PathAddExtensionA(lpszPath, pszExtension)
+    return lpszPath.value
+
+def PathAddExtensionW(lpszPath, pszExtension = None):
+    _PathAddExtensionW = windll.shlwapi.PathAddExtensionW
+    _PathAddExtensionW.argtypes = [LPWSTR, LPWSTR]
+    _PathAddExtensionW.restype  = bool
+    _PathAddExtensionW.errcheck = RaiseIfZero
+
+    if not pszExtension:
+        pszExtension = None
+    lpszPath = ctypes.create_unicode_buffer(lpszPath, MAX_PATH)
+    _PathAddExtensionW(lpszPath, pszExtension)
+    return lpszPath.value
+
+PathAddExtension = GuessStringType(PathAddExtensionA, PathAddExtensionW)
+
+# BOOL PathAppend(
+#     LPTSTR pszPath,
+#     LPCTSTR pszMore
+# );
+def PathAppendA(lpszPath, pszMore = None):
+    _PathAppendA = windll.shlwapi.PathAppendA
+    _PathAppendA.argtypes = [LPSTR, LPSTR]
+    _PathAppendA.restype  = bool
+    _PathAppendA.errcheck = RaiseIfZero
+
+    if not pszMore:
+        pszMore = None
+    lpszPath = ctypes.create_string_buffer(lpszPath, MAX_PATH)
+    _PathAppendA(lpszPath, pszMore)
+    return lpszPath.value
+
+def PathAppendW(lpszPath, pszMore = None):
+    _PathAppendW = windll.shlwapi.PathAppendW
+    _PathAppendW.argtypes = [LPWSTR, LPWSTR]
+    _PathAppendW.restype  = bool
+    _PathAppendW.errcheck = RaiseIfZero
+
+    if not pszMore:
+        pszMore = None
+    lpszPath = ctypes.create_unicode_buffer(lpszPath, MAX_PATH)
+    _PathAppendW(lpszPath, pszMore)
+    return lpszPath.value
+
+PathAppend = GuessStringType(PathAppendA, PathAppendW)
+
+# LPTSTR PathCombine(
+#     LPTSTR lpszDest,
+#     LPCTSTR lpszDir,
+#     LPCTSTR lpszFile
+# );
+def PathCombineA(lpszDir, lpszFile):
+    _PathCombineA = windll.shlwapi.PathCombineA
+    _PathCombineA.argtypes = [LPSTR, LPSTR, LPSTR]
+    _PathCombineA.restype  = LPSTR
+
+    lpszDest = ctypes.create_string_buffer("", max(MAX_PATH, len(lpszDir) + len(lpszFile) + 1))
+    retval = _PathCombineA(lpszDest, lpszDir, lpszFile)
+    if retval == NULL:
+        return None
+    return lpszDest.value
+
+def PathCombineW(lpszDir, lpszFile):
+    _PathCombineW = windll.shlwapi.PathCombineW
+    _PathCombineW.argtypes = [LPWSTR, LPWSTR, LPWSTR]
+    _PathCombineW.restype  = LPWSTR
+
+    lpszDest = ctypes.create_unicode_buffer(u"", max(MAX_PATH, len(lpszDir) + len(lpszFile) + 1))
+    retval = _PathCombineW(lpszDest, lpszDir, lpszFile)
+    if retval == NULL:
+        return None
+    return lpszDest.value
+
+PathCombine = GuessStringType(PathCombineA, PathCombineW)
+
+# BOOL PathCanonicalize(
+#     LPTSTR lpszDst,
+#     LPCTSTR lpszSrc
+# );
+def PathCanonicalizeA(lpszSrc):
+    _PathCanonicalizeA = windll.shlwapi.PathCanonicalizeA
+    _PathCanonicalizeA.argtypes = [LPSTR, LPSTR]
+    _PathCanonicalizeA.restype  = bool
+    _PathCanonicalizeA.errcheck = RaiseIfZero
+
+    lpszDst = ctypes.create_string_buffer("", MAX_PATH)
+    _PathCanonicalizeA(lpszDst, lpszSrc)
+    return lpszDst.value
+
+def PathCanonicalizeW(lpszSrc):
+    _PathCanonicalizeW = windll.shlwapi.PathCanonicalizeW
+    _PathCanonicalizeW.argtypes = [LPWSTR, LPWSTR]
+    _PathCanonicalizeW.restype  = bool
+    _PathCanonicalizeW.errcheck = RaiseIfZero
+
+    lpszDst = ctypes.create_unicode_buffer(u"", MAX_PATH)
+    _PathCanonicalizeW(lpszDst, lpszSrc)
+    return lpszDst.value
+
+PathCanonicalize = GuessStringType(PathCanonicalizeA, PathCanonicalizeW)
+
+# BOOL PathRelativePathTo(
+#   _Out_  LPTSTR pszPath,
+#   _In_   LPCTSTR pszFrom,
+#   _In_   DWORD dwAttrFrom,
+#   _In_   LPCTSTR pszTo,
+#   _In_   DWORD dwAttrTo
+# );
+def PathRelativePathToA(pszFrom = None, dwAttrFrom = FILE_ATTRIBUTE_DIRECTORY, pszTo = None, dwAttrTo = FILE_ATTRIBUTE_DIRECTORY):
+    _PathRelativePathToA = windll.shlwapi.PathRelativePathToA
+    _PathRelativePathToA.argtypes = [LPSTR, LPSTR, DWORD, LPSTR, DWORD]
+    _PathRelativePathToA.restype  = bool
+    _PathRelativePathToA.errcheck = RaiseIfZero
+
+    # Make the paths absolute or the function fails.
+    if pszFrom:
+        pszFrom = GetFullPathNameA(pszFrom)[0]
+    else:
+        pszFrom = GetCurrentDirectoryA()
+    if pszTo:
+        pszTo = GetFullPathNameA(pszTo)[0]
+    else:
+        pszTo = GetCurrentDirectoryA()
+
+    # Argh, this function doesn't receive an output buffer size!
+    # We'll try to guess the maximum possible buffer size.
+    dwPath = max((len(pszFrom) + len(pszTo)) * 2 + 1, MAX_PATH + 1)
+    pszPath = ctypes.create_string_buffer('', dwPath)
+
+    # Also, it doesn't set the last error value.
+    # Whoever coded it must have been drunk or tripping on acid. Or both.
+    # The only failure conditions I've seen were invalid paths, paths not
+    # on the same drive, or the path is not absolute.
+    SetLastError(ERROR_INVALID_PARAMETER)
+
+    _PathRelativePathToA(pszPath, pszFrom, dwAttrFrom, pszTo, dwAttrTo)
+    return pszPath.value
+
+def PathRelativePathToW(pszFrom = None, dwAttrFrom = FILE_ATTRIBUTE_DIRECTORY, pszTo = None, dwAttrTo = FILE_ATTRIBUTE_DIRECTORY):
+    _PathRelativePathToW = windll.shlwapi.PathRelativePathToW
+    _PathRelativePathToW.argtypes = [LPWSTR, LPWSTR, DWORD, LPWSTR, DWORD]
+    _PathRelativePathToW.restype  = bool
+    _PathRelativePathToW.errcheck = RaiseIfZero
+
+    # Refer to PathRelativePathToA to know why this code is so ugly.
+    if pszFrom:
+        pszFrom = GetFullPathNameW(pszFrom)[0]
+    else:
+        pszFrom = GetCurrentDirectoryW()
+    if pszTo:
+        pszTo = GetFullPathNameW(pszTo)[0]
+    else:
+        pszTo = GetCurrentDirectoryW()
+    dwPath = max((len(pszFrom) + len(pszTo)) * 2 + 1, MAX_PATH + 1)
+    pszPath = ctypes.create_unicode_buffer(u'', dwPath)
+    SetLastError(ERROR_INVALID_PARAMETER)
+    _PathRelativePathToW(pszPath, pszFrom, dwAttrFrom, pszTo, dwAttrTo)
+    return pszPath.value
+
+PathRelativePathTo = GuessStringType(PathRelativePathToA, PathRelativePathToW)
+
+# BOOL PathFileExists(
+#     LPCTSTR pszPath
+# );
+def PathFileExistsA(pszPath):
+    _PathFileExistsA = windll.shlwapi.PathFileExistsA
+    _PathFileExistsA.argtypes = [LPSTR]
+    _PathFileExistsA.restype  = bool
+    return _PathFileExistsA(pszPath)
+
+def PathFileExistsW(pszPath):
+    _PathFileExistsW = windll.shlwapi.PathFileExistsW
+    _PathFileExistsW.argtypes = [LPWSTR]
+    _PathFileExistsW.restype  = bool
+    return _PathFileExistsW(pszPath)
+
+PathFileExists = GuessStringType(PathFileExistsA, PathFileExistsW)
+
+# LPTSTR PathFindExtension(
+#     LPCTSTR pszPath
+# );
+def PathFindExtensionA(pszPath):
+    _PathFindExtensionA = windll.shlwapi.PathFindExtensionA
+    _PathFindExtensionA.argtypes = [LPSTR]
+    _PathFindExtensionA.restype  = LPSTR
+    pszPath = ctypes.create_string_buffer(pszPath)
+    return _PathFindExtensionA(pszPath)
+
+def PathFindExtensionW(pszPath):
+    _PathFindExtensionW = windll.shlwapi.PathFindExtensionW
+    _PathFindExtensionW.argtypes = [LPWSTR]
+    _PathFindExtensionW.restype  = LPWSTR
+    pszPath = ctypes.create_unicode_buffer(pszPath)
+    return _PathFindExtensionW(pszPath)
+
+PathFindExtension = GuessStringType(PathFindExtensionA, PathFindExtensionW)
+
+# LPTSTR PathFindFileName(
+#     LPCTSTR pszPath
+# );
+def PathFindFileNameA(pszPath):
+    _PathFindFileNameA = windll.shlwapi.PathFindFileNameA
+    _PathFindFileNameA.argtypes = [LPSTR]
+    _PathFindFileNameA.restype  = LPSTR
+    pszPath = ctypes.create_string_buffer(pszPath)
+    return _PathFindFileNameA(pszPath)
+
+def PathFindFileNameW(pszPath):
+    _PathFindFileNameW = windll.shlwapi.PathFindFileNameW
+    _PathFindFileNameW.argtypes = [LPWSTR]
+    _PathFindFileNameW.restype  = LPWSTR
+    pszPath = ctypes.create_unicode_buffer(pszPath)
+    return _PathFindFileNameW(pszPath)
+
+PathFindFileName = GuessStringType(PathFindFileNameA, PathFindFileNameW)
+
+# LPTSTR PathFindNextComponent(
+#     LPCTSTR pszPath
+# );
+def PathFindNextComponentA(pszPath):
+    _PathFindNextComponentA = windll.shlwapi.PathFindNextComponentA
+    _PathFindNextComponentA.argtypes = [LPSTR]
+    _PathFindNextComponentA.restype  = LPSTR
+    pszPath = ctypes.create_string_buffer(pszPath)
+    return _PathFindNextComponentA(pszPath)
+
+def PathFindNextComponentW(pszPath):
+    _PathFindNextComponentW = windll.shlwapi.PathFindNextComponentW
+    _PathFindNextComponentW.argtypes = [LPWSTR]
+    _PathFindNextComponentW.restype  = LPWSTR
+    pszPath = ctypes.create_unicode_buffer(pszPath)
+    return _PathFindNextComponentW(pszPath)
+
+PathFindNextComponent = GuessStringType(PathFindNextComponentA, PathFindNextComponentW)
+
+# BOOL PathFindOnPath(
+#     LPTSTR pszFile,
+#     LPCTSTR *ppszOtherDirs
+# );
+def PathFindOnPathA(pszFile, ppszOtherDirs = None):
+    _PathFindOnPathA = windll.shlwapi.PathFindOnPathA
+    _PathFindOnPathA.argtypes = [LPSTR, LPSTR]
+    _PathFindOnPathA.restype  = bool
+
+    pszFile = ctypes.create_string_buffer(pszFile, MAX_PATH)
+    if not ppszOtherDirs:
+        ppszOtherDirs = None
+    else:
+        szArray = ""
+        for pszOtherDirs in ppszOtherDirs:
+            if pszOtherDirs:
+                szArray = "%s%s\0" % (szArray, pszOtherDirs)
+        szArray = szArray + "\0"
+        pszOtherDirs = ctypes.create_string_buffer(szArray)
+        ppszOtherDirs = ctypes.pointer(pszOtherDirs)
+    if _PathFindOnPathA(pszFile, ppszOtherDirs):
+        return pszFile.value
+    return None
+
+def PathFindOnPathW(pszFile, ppszOtherDirs = None):
+    _PathFindOnPathW = windll.shlwapi.PathFindOnPathA
+    _PathFindOnPathW.argtypes = [LPWSTR, LPWSTR]
+    _PathFindOnPathW.restype  = bool
+
+    pszFile = ctypes.create_unicode_buffer(pszFile, MAX_PATH)
+    if not ppszOtherDirs:
+        ppszOtherDirs = None
+    else:
+        szArray = u""
+        for pszOtherDirs in ppszOtherDirs:
+            if pszOtherDirs:
+                szArray = u"%s%s\0" % (szArray, pszOtherDirs)
+        szArray = szArray + u"\0"
+        pszOtherDirs = ctypes.create_unicode_buffer(szArray)
+        ppszOtherDirs = ctypes.pointer(pszOtherDirs)
+    if _PathFindOnPathW(pszFile, ppszOtherDirs):
+        return pszFile.value
+    return None
+
+PathFindOnPath = GuessStringType(PathFindOnPathA, PathFindOnPathW)
+
+# LPTSTR PathGetArgs(
+#     LPCTSTR pszPath
+# );
+def PathGetArgsA(pszPath):
+    _PathGetArgsA = windll.shlwapi.PathGetArgsA
+    _PathGetArgsA.argtypes = [LPSTR]
+    _PathGetArgsA.restype  = LPSTR
+    pszPath = ctypes.create_string_buffer(pszPath)
+    return _PathGetArgsA(pszPath)
+
+def PathGetArgsW(pszPath):
+    _PathGetArgsW = windll.shlwapi.PathGetArgsW
+    _PathGetArgsW.argtypes = [LPWSTR]
+    _PathGetArgsW.restype  = LPWSTR
+    pszPath = ctypes.create_unicode_buffer(pszPath)
+    return _PathGetArgsW(pszPath)
+
+PathGetArgs = GuessStringType(PathGetArgsA, PathGetArgsW)
+
+# BOOL PathIsContentType(
+#     LPCTSTR pszPath,
+#     LPCTSTR pszContentType
+# );
+def PathIsContentTypeA(pszPath, pszContentType):
+    _PathIsContentTypeA = windll.shlwapi.PathIsContentTypeA
+    _PathIsContentTypeA.argtypes = [LPSTR, LPSTR]
+    _PathIsContentTypeA.restype  = bool
+    return _PathIsContentTypeA(pszPath, pszContentType)
+
+def PathIsContentTypeW(pszPath, pszContentType):
+    _PathIsContentTypeW = windll.shlwapi.PathIsContentTypeW
+    _PathIsContentTypeW.argtypes = [LPWSTR, LPWSTR]
+    _PathIsContentTypeW.restype  = bool
+    return _PathIsContentTypeW(pszPath, pszContentType)
+
+PathIsContentType = GuessStringType(PathIsContentTypeA, PathIsContentTypeW)
+
+# BOOL PathIsDirectory(
+#     LPCTSTR pszPath
+# );
+def PathIsDirectoryA(pszPath):
+    _PathIsDirectoryA = windll.shlwapi.PathIsDirectoryA
+    _PathIsDirectoryA.argtypes = [LPSTR]
+    _PathIsDirectoryA.restype  = bool
+    return _PathIsDirectoryA(pszPath)
+
+def PathIsDirectoryW(pszPath):
+    _PathIsDirectoryW = windll.shlwapi.PathIsDirectoryW
+    _PathIsDirectoryW.argtypes = [LPWSTR]
+    _PathIsDirectoryW.restype  = bool
+    return _PathIsDirectoryW(pszPath)
+
+PathIsDirectory = GuessStringType(PathIsDirectoryA, PathIsDirectoryW)
+
+# BOOL PathIsDirectoryEmpty(
+#     LPCTSTR pszPath
+# );
+def PathIsDirectoryEmptyA(pszPath):
+    _PathIsDirectoryEmptyA = windll.shlwapi.PathIsDirectoryEmptyA
+    _PathIsDirectoryEmptyA.argtypes = [LPSTR]
+    _PathIsDirectoryEmptyA.restype  = bool
+    return _PathIsDirectoryEmptyA(pszPath)
+
+def PathIsDirectoryEmptyW(pszPath):
+    _PathIsDirectoryEmptyW = windll.shlwapi.PathIsDirectoryEmptyW
+    _PathIsDirectoryEmptyW.argtypes = [LPWSTR]
+    _PathIsDirectoryEmptyW.restype  = bool
+    return _PathIsDirectoryEmptyW(pszPath)
+
+PathIsDirectoryEmpty = GuessStringType(PathIsDirectoryEmptyA, PathIsDirectoryEmptyW)
+
+# BOOL PathIsNetworkPath(
+#     LPCTSTR pszPath
+# );
+def PathIsNetworkPathA(pszPath):
+    _PathIsNetworkPathA = windll.shlwapi.PathIsNetworkPathA
+    _PathIsNetworkPathA.argtypes = [LPSTR]
+    _PathIsNetworkPathA.restype  = bool
+    return _PathIsNetworkPathA(pszPath)
+
+def PathIsNetworkPathW(pszPath):
+    _PathIsNetworkPathW = windll.shlwapi.PathIsNetworkPathW
+    _PathIsNetworkPathW.argtypes = [LPWSTR]
+    _PathIsNetworkPathW.restype  = bool
+    return _PathIsNetworkPathW(pszPath)
+
+PathIsNetworkPath = GuessStringType(PathIsNetworkPathA, PathIsNetworkPathW)
+
+# BOOL PathIsRelative(
+#     LPCTSTR lpszPath
+# );
+def PathIsRelativeA(pszPath):
+    _PathIsRelativeA = windll.shlwapi.PathIsRelativeA
+    _PathIsRelativeA.argtypes = [LPSTR]
+    _PathIsRelativeA.restype  = bool
+    return _PathIsRelativeA(pszPath)
+
+def PathIsRelativeW(pszPath):
+    _PathIsRelativeW = windll.shlwapi.PathIsRelativeW
+    _PathIsRelativeW.argtypes = [LPWSTR]
+    _PathIsRelativeW.restype  = bool
+    return _PathIsRelativeW(pszPath)
+
+PathIsRelative = GuessStringType(PathIsRelativeA, PathIsRelativeW)
+
+# BOOL PathIsRoot(
+#     LPCTSTR pPath
+# );
+def PathIsRootA(pszPath):
+    _PathIsRootA = windll.shlwapi.PathIsRootA
+    _PathIsRootA.argtypes = [LPSTR]
+    _PathIsRootA.restype  = bool
+    return _PathIsRootA(pszPath)
+
+def PathIsRootW(pszPath):
+    _PathIsRootW = windll.shlwapi.PathIsRootW
+    _PathIsRootW.argtypes = [LPWSTR]
+    _PathIsRootW.restype  = bool
+    return _PathIsRootW(pszPath)
+
+PathIsRoot = GuessStringType(PathIsRootA, PathIsRootW)
+
+# BOOL PathIsSameRoot(
+#     LPCTSTR pszPath1,
+#     LPCTSTR pszPath2
+# );
+def PathIsSameRootA(pszPath1, pszPath2):
+    _PathIsSameRootA = windll.shlwapi.PathIsSameRootA
+    _PathIsSameRootA.argtypes = [LPSTR, LPSTR]
+    _PathIsSameRootA.restype  = bool
+    return _PathIsSameRootA(pszPath1, pszPath2)
+
+def PathIsSameRootW(pszPath1, pszPath2):
+    _PathIsSameRootW = windll.shlwapi.PathIsSameRootW
+    _PathIsSameRootW.argtypes = [LPWSTR, LPWSTR]
+    _PathIsSameRootW.restype  = bool
+    return _PathIsSameRootW(pszPath1, pszPath2)
+
+PathIsSameRoot = GuessStringType(PathIsSameRootA, PathIsSameRootW)
+
+# BOOL PathIsUNC(
+#     LPCTSTR pszPath
+# );
+def PathIsUNCA(pszPath):
+    _PathIsUNCA = windll.shlwapi.PathIsUNCA
+    _PathIsUNCA.argtypes = [LPSTR]
+    _PathIsUNCA.restype  = bool
+    return _PathIsUNCA(pszPath)
+
+def PathIsUNCW(pszPath):
+    _PathIsUNCW = windll.shlwapi.PathIsUNCW
+    _PathIsUNCW.argtypes = [LPWSTR]
+    _PathIsUNCW.restype  = bool
+    return _PathIsUNCW(pszPath)
+
+PathIsUNC = GuessStringType(PathIsUNCA, PathIsUNCW)
+
+# XXX WARNING
+# PathMakePretty turns filenames into all lowercase.
+# I'm not sure how well that might work on Wine.
+
+# BOOL PathMakePretty(
+#     LPCTSTR pszPath
+# );
+def PathMakePrettyA(pszPath):
+    _PathMakePrettyA = windll.shlwapi.PathMakePrettyA
+    _PathMakePrettyA.argtypes = [LPSTR]
+    _PathMakePrettyA.restype  = bool
+    _PathMakePrettyA.errcheck = RaiseIfZero
+
+    pszPath = ctypes.create_string_buffer(pszPath, MAX_PATH)
+    _PathMakePrettyA(pszPath)
+    return pszPath.value
+
+def PathMakePrettyW(pszPath):
+    _PathMakePrettyW = windll.shlwapi.PathMakePrettyW
+    _PathMakePrettyW.argtypes = [LPWSTR]
+    _PathMakePrettyW.restype  = bool
+    _PathMakePrettyW.errcheck = RaiseIfZero
+
+    pszPath = ctypes.create_unicode_buffer(pszPath, MAX_PATH)
+    _PathMakePrettyW(pszPath)
+    return pszPath.value
+
+PathMakePretty = GuessStringType(PathMakePrettyA, PathMakePrettyW)
+
+# void PathRemoveArgs(
+#     LPTSTR pszPath
+# );
+def PathRemoveArgsA(pszPath):
+    _PathRemoveArgsA = windll.shlwapi.PathRemoveArgsA
+    _PathRemoveArgsA.argtypes = [LPSTR]
+
+    pszPath = ctypes.create_string_buffer(pszPath, MAX_PATH)
+    _PathRemoveArgsA(pszPath)
+    return pszPath.value
+
+def PathRemoveArgsW(pszPath):
+    _PathRemoveArgsW = windll.shlwapi.PathRemoveArgsW
+    _PathRemoveArgsW.argtypes = [LPWSTR]
+
+    pszPath = ctypes.create_unicode_buffer(pszPath, MAX_PATH)
+    _PathRemoveArgsW(pszPath)
+    return pszPath.value
+
+PathRemoveArgs = GuessStringType(PathRemoveArgsA, PathRemoveArgsW)
+
+# void PathRemoveBackslash(
+#     LPTSTR pszPath
+# );
+def PathRemoveBackslashA(pszPath):
+    _PathRemoveBackslashA = windll.shlwapi.PathRemoveBackslashA
+    _PathRemoveBackslashA.argtypes = [LPSTR]
+
+    pszPath = ctypes.create_string_buffer(pszPath, MAX_PATH)
+    _PathRemoveBackslashA(pszPath)
+    return pszPath.value
+
+def PathRemoveBackslashW(pszPath):
+    _PathRemoveBackslashW = windll.shlwapi.PathRemoveBackslashW
+    _PathRemoveBackslashW.argtypes = [LPWSTR]
+
+    pszPath = ctypes.create_unicode_buffer(pszPath, MAX_PATH)
+    _PathRemoveBackslashW(pszPath)
+    return pszPath.value
+
+PathRemoveBackslash = GuessStringType(PathRemoveBackslashA, PathRemoveBackslashW)
+
+# void PathRemoveExtension(
+#     LPTSTR pszPath
+# );
+def PathRemoveExtensionA(pszPath):
+    _PathRemoveExtensionA = windll.shlwapi.PathRemoveExtensionA
+    _PathRemoveExtensionA.argtypes = [LPSTR]
+
+    pszPath = ctypes.create_string_buffer(pszPath, MAX_PATH)
+    _PathRemoveExtensionA(pszPath)
+    return pszPath.value
+
+def PathRemoveExtensionW(pszPath):
+    _PathRemoveExtensionW = windll.shlwapi.PathRemoveExtensionW
+    _PathRemoveExtensionW.argtypes = [LPWSTR]
+
+    pszPath = ctypes.create_unicode_buffer(pszPath, MAX_PATH)
+    _PathRemoveExtensionW(pszPath)
+    return pszPath.value
+
+PathRemoveExtension = GuessStringType(PathRemoveExtensionA, PathRemoveExtensionW)
+
+# void PathRemoveFileSpec(
+#     LPTSTR pszPath
+# );
+def PathRemoveFileSpecA(pszPath):
+    _PathRemoveFileSpecA = windll.shlwapi.PathRemoveFileSpecA
+    _PathRemoveFileSpecA.argtypes = [LPSTR]
+
+    pszPath = ctypes.create_string_buffer(pszPath, MAX_PATH)
+    _PathRemoveFileSpecA(pszPath)
+    return pszPath.value
+
+def PathRemoveFileSpecW(pszPath):
+    _PathRemoveFileSpecW = windll.shlwapi.PathRemoveFileSpecW
+    _PathRemoveFileSpecW.argtypes = [LPWSTR]
+
+    pszPath = ctypes.create_unicode_buffer(pszPath, MAX_PATH)
+    _PathRemoveFileSpecW(pszPath)
+    return pszPath.value
+
+PathRemoveFileSpec = GuessStringType(PathRemoveFileSpecA, PathRemoveFileSpecW)
+
+# BOOL PathRenameExtension(
+#     LPTSTR pszPath,
+#     LPCTSTR pszExt
+# );
+def PathRenameExtensionA(pszPath, pszExt):
+    _PathRenameExtensionA = windll.shlwapi.PathRenameExtensionA
+    _PathRenameExtensionA.argtypes = [LPSTR, LPSTR]
+    _PathRenameExtensionA.restype  = bool
+
+    pszPath = ctypes.create_string_buffer(pszPath, MAX_PATH)
+    if _PathRenameExtensionA(pszPath, pszExt):
+        return pszPath.value
+    return None
+
+def PathRenameExtensionW(pszPath, pszExt):
+    _PathRenameExtensionW = windll.shlwapi.PathRenameExtensionW
+    _PathRenameExtensionW.argtypes = [LPWSTR, LPWSTR]
+    _PathRenameExtensionW.restype  = bool
+
+    pszPath = ctypes.create_unicode_buffer(pszPath, MAX_PATH)
+    if _PathRenameExtensionW(pszPath, pszExt):
+        return pszPath.value
+    return None
+
+PathRenameExtension = GuessStringType(PathRenameExtensionA, PathRenameExtensionW)
+
+# BOOL PathUnExpandEnvStrings(
+#     LPCTSTR pszPath,
+#     LPTSTR pszBuf,
+#     UINT cchBuf
+# );
+def PathUnExpandEnvStringsA(pszPath):
+    _PathUnExpandEnvStringsA = windll.shlwapi.PathUnExpandEnvStringsA
+    _PathUnExpandEnvStringsA.argtypes = [LPSTR, LPSTR]
+    _PathUnExpandEnvStringsA.restype  = bool
+    _PathUnExpandEnvStringsA.errcheck = RaiseIfZero
+
+    cchBuf = MAX_PATH
+    pszBuf = ctypes.create_string_buffer("", cchBuf)
+    _PathUnExpandEnvStringsA(pszPath, pszBuf, cchBuf)
+    return pszBuf.value
+
+def PathUnExpandEnvStringsW(pszPath):
+    _PathUnExpandEnvStringsW = windll.shlwapi.PathUnExpandEnvStringsW
+    _PathUnExpandEnvStringsW.argtypes = [LPWSTR, LPWSTR]
+    _PathUnExpandEnvStringsW.restype  = bool
+    _PathUnExpandEnvStringsW.errcheck = RaiseIfZero
+
+    cchBuf = MAX_PATH
+    pszBuf = ctypes.create_unicode_buffer(u"", cchBuf)
+    _PathUnExpandEnvStringsW(pszPath, pszBuf, cchBuf)
+    return pszBuf.value
+
+PathUnExpandEnvStrings = GuessStringType(PathUnExpandEnvStringsA, PathUnExpandEnvStringsW)
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/user32.py b/scripts/win32/user32.py
new file mode 100644
index 0000000..5a75196
--- /dev/null
+++ b/scripts/win32/user32.py
@@ -0,0 +1,1756 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for user32.dll in ctypes.
+"""
+
+from .defines import *
+from .version import bits
+from .kernel32 import GetLastError, SetLastError
+from .gdi32 import POINT, LPPOINT, RECT, LPRECT
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- Helpers ------------------------------------------------------------------
+
+def MAKE_WPARAM(wParam):
+    """
+    Convert arguments to the WPARAM type.
+    Used automatically by SendMessage, PostMessage, etc.
+    You shouldn't need to call this function.
+    """
+    wParam = ctypes.cast(wParam, LPVOID).value
+    if wParam is None:
+        wParam = 0
+    return wParam
+
+def MAKE_LPARAM(lParam):
+    """
+    Convert arguments to the LPARAM type.
+    Used automatically by SendMessage, PostMessage, etc.
+    You shouldn't need to call this function.
+    """
+    return ctypes.cast(lParam, LPARAM)
+
+class __WindowEnumerator (object):
+    """
+    Window enumerator class. Used internally by the window enumeration APIs.
+    """
+    def __init__(self):
+        self.hwnd = list()
+    def __call__(self, hwnd, lParam):
+##        print hwnd  # XXX DEBUG
+        self.hwnd.append(hwnd)
+        return TRUE
+
+#--- Types --------------------------------------------------------------------
+
+WNDENUMPROC = WINFUNCTYPE(BOOL, HWND, PVOID)
+
+#--- Constants ----------------------------------------------------------------
+
+HWND_DESKTOP    = 0
+HWND_TOP        = 1
+HWND_BOTTOM     = 1
+HWND_TOPMOST    = -1
+HWND_NOTOPMOST  = -2
+HWND_MESSAGE    = -3
+
+# GetWindowLong / SetWindowLong
+GWL_WNDPROC                          = -4
+GWL_HINSTANCE                        = -6
+GWL_HWNDPARENT                       = -8
+GWL_ID                               = -12
+GWL_STYLE                            = -16
+GWL_EXSTYLE                          = -20
+GWL_USERDATA                         = -21
+
+# GetWindowLongPtr / SetWindowLongPtr
+GWLP_WNDPROC                         = GWL_WNDPROC
+GWLP_HINSTANCE                       = GWL_HINSTANCE
+GWLP_HWNDPARENT                      = GWL_HWNDPARENT
+GWLP_STYLE                           = GWL_STYLE
+GWLP_EXSTYLE                         = GWL_EXSTYLE
+GWLP_USERDATA                        = GWL_USERDATA
+GWLP_ID                              = GWL_ID
+
+# ShowWindow
+SW_HIDE                             = 0
+SW_SHOWNORMAL                       = 1
+SW_NORMAL                           = 1
+SW_SHOWMINIMIZED                    = 2
+SW_SHOWMAXIMIZED                    = 3
+SW_MAXIMIZE                         = 3
+SW_SHOWNOACTIVATE                   = 4
+SW_SHOW                             = 5
+SW_MINIMIZE                         = 6
+SW_SHOWMINNOACTIVE                  = 7
+SW_SHOWNA                           = 8
+SW_RESTORE                          = 9
+SW_SHOWDEFAULT                      = 10
+SW_FORCEMINIMIZE                    = 11
+
+# SendMessageTimeout flags
+SMTO_NORMAL                         = 0
+SMTO_BLOCK                          = 1
+SMTO_ABORTIFHUNG                    = 2
+SMTO_NOTIMEOUTIFNOTHUNG 			= 8
+SMTO_ERRORONEXIT                    = 0x20
+
+# WINDOWPLACEMENT flags
+WPF_SETMINPOSITION                  = 1
+WPF_RESTORETOMAXIMIZED              = 2
+WPF_ASYNCWINDOWPLACEMENT            = 4
+
+# GetAncestor flags
+GA_PARENT                           = 1
+GA_ROOT                             = 2
+GA_ROOTOWNER                        = 3
+
+# GetWindow flags
+GW_HWNDFIRST                        = 0
+GW_HWNDLAST                         = 1
+GW_HWNDNEXT                         = 2
+GW_HWNDPREV                         = 3
+GW_OWNER                            = 4
+GW_CHILD                            = 5
+GW_ENABLEDPOPUP                     = 6
+
+#--- Window messages ----------------------------------------------------------
+
+WM_USER                              = 0x400
+WM_APP                               = 0x800
+
+WM_NULL                              = 0
+WM_CREATE                            = 1
+WM_DESTROY                           = 2
+WM_MOVE                              = 3
+WM_SIZE                              = 5
+WM_ACTIVATE                          = 6
+WA_INACTIVE                          = 0
+WA_ACTIVE                            = 1
+WA_CLICKACTIVE                       = 2
+WM_SETFOCUS                          = 7
+WM_KILLFOCUS                         = 8
+WM_ENABLE                            = 0x0A
+WM_SETREDRAW                         = 0x0B
+WM_SETTEXT                           = 0x0C
+WM_GETTEXT                           = 0x0D
+WM_GETTEXTLENGTH                     = 0x0E
+WM_PAINT                             = 0x0F
+WM_CLOSE                             = 0x10
+WM_QUERYENDSESSION                   = 0x11
+WM_QUIT                              = 0x12
+WM_QUERYOPEN                         = 0x13
+WM_ERASEBKGND                        = 0x14
+WM_SYSCOLORCHANGE                    = 0x15
+WM_ENDSESSION                        = 0x16
+WM_SHOWWINDOW                        = 0x18
+WM_WININICHANGE                      = 0x1A
+WM_SETTINGCHANGE                	 = WM_WININICHANGE
+WM_DEVMODECHANGE                     = 0x1B
+WM_ACTIVATEAPP                       = 0x1C
+WM_FONTCHANGE                        = 0x1D
+WM_TIMECHANGE                        = 0x1E
+WM_CANCELMODE                        = 0x1F
+WM_SETCURSOR                         = 0x20
+WM_MOUSEACTIVATE                     = 0x21
+WM_CHILDACTIVATE                     = 0x22
+WM_QUEUESYNC                         = 0x23
+WM_GETMINMAXINFO                     = 0x24
+WM_PAINTICON                         = 0x26
+WM_ICONERASEBKGND                    = 0x27
+WM_NEXTDLGCTL                        = 0x28
+WM_SPOOLERSTATUS                     = 0x2A
+WM_DRAWITEM                          = 0x2B
+WM_MEASUREITEM                       = 0x2C
+WM_DELETEITEM                        = 0x2D
+WM_VKEYTOITEM                        = 0x2E
+WM_CHARTOITEM                        = 0x2F
+WM_SETFONT                           = 0x30
+WM_GETFONT                           = 0x31
+WM_SETHOTKEY                         = 0x32
+WM_GETHOTKEY                         = 0x33
+WM_QUERYDRAGICON                     = 0x37
+WM_COMPAREITEM                       = 0x39
+WM_GETOBJECT                    	 = 0x3D
+WM_COMPACTING                        = 0x41
+WM_OTHERWINDOWCREATED                = 0x42
+WM_OTHERWINDOWDESTROYED              = 0x43
+WM_COMMNOTIFY                        = 0x44
+
+CN_RECEIVE                           = 0x1
+CN_TRANSMIT                          = 0x2
+CN_EVENT                             = 0x4
+
+WM_WINDOWPOSCHANGING                 = 0x46
+WM_WINDOWPOSCHANGED                  = 0x47
+WM_POWER                             = 0x48
+
+PWR_OK                               = 1
+PWR_FAIL                             = -1
+PWR_SUSPENDREQUEST                   = 1
+PWR_SUSPENDRESUME                    = 2
+PWR_CRITICALRESUME                   = 3
+
+WM_COPYDATA                          = 0x4A
+WM_CANCELJOURNAL                     = 0x4B
+WM_NOTIFY                            = 0x4E
+WM_INPUTLANGCHANGEREQUEST            = 0x50
+WM_INPUTLANGCHANGE                   = 0x51
+WM_TCARD                             = 0x52
+WM_HELP                              = 0x53
+WM_USERCHANGED                       = 0x54
+WM_NOTIFYFORMAT                      = 0x55
+WM_CONTEXTMENU                       = 0x7B
+WM_STYLECHANGING                     = 0x7C
+WM_STYLECHANGED                      = 0x7D
+WM_DISPLAYCHANGE                     = 0x7E
+WM_GETICON                           = 0x7F
+WM_SETICON                           = 0x80
+WM_NCCREATE                          = 0x81
+WM_NCDESTROY                         = 0x82
+WM_NCCALCSIZE                        = 0x83
+WM_NCHITTEST                         = 0x84
+WM_NCPAINT                           = 0x85
+WM_NCACTIVATE                        = 0x86
+WM_GETDLGCODE                        = 0x87
+WM_SYNCPAINT                    	 = 0x88
+WM_NCMOUSEMOVE                       = 0x0A0
+WM_NCLBUTTONDOWN                     = 0x0A1
+WM_NCLBUTTONUP                       = 0x0A2
+WM_NCLBUTTONDBLCLK                   = 0x0A3
+WM_NCRBUTTONDOWN                     = 0x0A4
+WM_NCRBUTTONUP                       = 0x0A5
+WM_NCRBUTTONDBLCLK                   = 0x0A6
+WM_NCMBUTTONDOWN                     = 0x0A7
+WM_NCMBUTTONUP                       = 0x0A8
+WM_NCMBUTTONDBLCLK                   = 0x0A9
+WM_KEYFIRST                          = 0x100
+WM_KEYDOWN                           = 0x100
+WM_KEYUP                             = 0x101
+WM_CHAR                              = 0x102
+WM_DEADCHAR                          = 0x103
+WM_SYSKEYDOWN                        = 0x104
+WM_SYSKEYUP                          = 0x105
+WM_SYSCHAR                           = 0x106
+WM_SYSDEADCHAR                       = 0x107
+WM_KEYLAST                           = 0x108
+WM_INITDIALOG                        = 0x110
+WM_COMMAND                           = 0x111
+WM_SYSCOMMAND                        = 0x112
+WM_TIMER                             = 0x113
+WM_HSCROLL                           = 0x114
+WM_VSCROLL                           = 0x115
+WM_INITMENU                          = 0x116
+WM_INITMENUPOPUP                     = 0x117
+WM_MENUSELECT                        = 0x11F
+WM_MENUCHAR                          = 0x120
+WM_ENTERIDLE                         = 0x121
+WM_CTLCOLORMSGBOX                    = 0x132
+WM_CTLCOLOREDIT                      = 0x133
+WM_CTLCOLORLISTBOX                   = 0x134
+WM_CTLCOLORBTN                       = 0x135
+WM_CTLCOLORDLG                       = 0x136
+WM_CTLCOLORSCROLLBAR                 = 0x137
+WM_CTLCOLORSTATIC                    = 0x138
+WM_MOUSEFIRST                        = 0x200
+WM_MOUSEMOVE                         = 0x200
+WM_LBUTTONDOWN                       = 0x201
+WM_LBUTTONUP                         = 0x202
+WM_LBUTTONDBLCLK                     = 0x203
+WM_RBUTTONDOWN                       = 0x204
+WM_RBUTTONUP                         = 0x205
+WM_RBUTTONDBLCLK                     = 0x206
+WM_MBUTTONDOWN                       = 0x207
+WM_MBUTTONUP                         = 0x208
+WM_MBUTTONDBLCLK                     = 0x209
+WM_MOUSELAST                         = 0x209
+WM_PARENTNOTIFY                      = 0x210
+WM_ENTERMENULOOP                     = 0x211
+WM_EXITMENULOOP                      = 0x212
+WM_MDICREATE                         = 0x220
+WM_MDIDESTROY                        = 0x221
+WM_MDIACTIVATE                       = 0x222
+WM_MDIRESTORE                        = 0x223
+WM_MDINEXT                           = 0x224
+WM_MDIMAXIMIZE                       = 0x225
+WM_MDITILE                           = 0x226
+WM_MDICASCADE                        = 0x227
+WM_MDIICONARRANGE                    = 0x228
+WM_MDIGETACTIVE                      = 0x229
+WM_MDISETMENU                        = 0x230
+WM_DROPFILES                         = 0x233
+WM_MDIREFRESHMENU                    = 0x234
+WM_CUT                               = 0x300
+WM_COPY                              = 0x301
+WM_PASTE                             = 0x302
+WM_CLEAR                             = 0x303
+WM_UNDO                              = 0x304
+WM_RENDERFORMAT                      = 0x305
+WM_RENDERALLFORMATS                  = 0x306
+WM_DESTROYCLIPBOARD                  = 0x307
+WM_DRAWCLIPBOARD                     = 0x308
+WM_PAINTCLIPBOARD                    = 0x309
+WM_VSCROLLCLIPBOARD                  = 0x30A
+WM_SIZECLIPBOARD                     = 0x30B
+WM_ASKCBFORMATNAME                   = 0x30C
+WM_CHANGECBCHAIN                     = 0x30D
+WM_HSCROLLCLIPBOARD                  = 0x30E
+WM_QUERYNEWPALETTE                   = 0x30F
+WM_PALETTEISCHANGING                 = 0x310
+WM_PALETTECHANGED                    = 0x311
+WM_HOTKEY                            = 0x312
+WM_PRINT                        	 = 0x317
+WM_PRINTCLIENT                       = 0x318
+WM_PENWINFIRST                       = 0x380
+WM_PENWINLAST                        = 0x38F
+
+#--- Structures ---------------------------------------------------------------
+
+# typedef struct _WINDOWPLACEMENT {
+#     UINT length;
+#     UINT flags;
+#     UINT showCmd;
+#     POINT ptMinPosition;
+#     POINT ptMaxPosition;
+#     RECT rcNormalPosition;
+# } WINDOWPLACEMENT;
+class WINDOWPLACEMENT(Structure):
+    _fields_ = [
+        ('length',              UINT),
+        ('flags',               UINT),
+        ('showCmd',             UINT),
+        ('ptMinPosition',       POINT),
+        ('ptMaxPosition',       POINT),
+        ('rcNormalPosition',    RECT),
+    ]
+PWINDOWPLACEMENT  = POINTER(WINDOWPLACEMENT)
+LPWINDOWPLACEMENT = PWINDOWPLACEMENT
+
+# typedef struct tagGUITHREADINFO {
+#     DWORD cbSize;
+#     DWORD flags;
+#     HWND hwndActive;
+#     HWND hwndFocus;
+#     HWND hwndCapture;
+#     HWND hwndMenuOwner;
+#     HWND hwndMoveSize;
+#     HWND hwndCaret;
+#     RECT rcCaret;
+# } GUITHREADINFO, *PGUITHREADINFO;
+class GUITHREADINFO(Structure):
+    _fields_ = [
+        ('cbSize',          DWORD),
+        ('flags',           DWORD),
+        ('hwndActive',      HWND),
+        ('hwndFocus',       HWND),
+        ('hwndCapture',     HWND),
+        ('hwndMenuOwner',   HWND),
+        ('hwndMoveSize',    HWND),
+        ('hwndCaret',       HWND),
+        ('rcCaret',         RECT),
+    ]
+PGUITHREADINFO  = POINTER(GUITHREADINFO)
+LPGUITHREADINFO = PGUITHREADINFO
+
+#--- High level classes -------------------------------------------------------
+
+# Point() and Rect() are here instead of gdi32.py because they were mainly
+# created to handle window coordinates rather than drawing on the screen.
+
+# XXX not sure if these classes should be psyco-optimized,
+# it may not work if the user wants to serialize them for some reason
+
+class Point(object):
+    """
+    Python wrapper over the L{POINT} class.
+
+    @type x: int
+    @ivar x: Horizontal coordinate
+    @type y: int
+    @ivar y: Vertical coordinate
+    """
+
+    def __init__(self, x = 0, y = 0):
+        """
+        @see: L{POINT}
+        @type  x: int
+        @param x: Horizontal coordinate
+        @type  y: int
+        @param y: Vertical coordinate
+        """
+        self.x = x
+        self.y = y
+
+    def __iter__(self):
+        return (self.x, self.y).__iter__()
+
+    def __len__(self):
+        return 2
+
+    def __getitem__(self, index):
+        return (self.x, self.y) [index]
+
+    def __setitem__(self, index, value):
+        if   index == 0:
+            self.x = value
+        elif index == 1:
+            self.y = value
+        else:
+            raise IndexError("index out of range")
+
+    @property
+    def _as_parameter_(self):
+        """
+        Compatibility with ctypes.
+        Allows passing transparently a Point object to an API call.
+        """
+        return POINT(self.x, self.y)
+
+    def screen_to_client(self, hWnd):
+        """
+        Translates window screen coordinates to client coordinates.
+
+        @see: L{client_to_screen}, L{translate}
+
+        @type  hWnd: int or L{HWND} or L{system.Window}
+        @param hWnd: Window handle.
+
+        @rtype:  L{Point}
+        @return: New object containing the translated coordinates.
+        """
+        return ScreenToClient(hWnd, self)
+
+    def client_to_screen(self, hWnd):
+        """
+        Translates window client coordinates to screen coordinates.
+
+        @see: L{screen_to_client}, L{translate}
+
+        @type  hWnd: int or L{HWND} or L{system.Window}
+        @param hWnd: Window handle.
+
+        @rtype:  L{Point}
+        @return: New object containing the translated coordinates.
+        """
+        return ClientToScreen(hWnd, self)
+
+    def translate(self, hWndFrom = HWND_DESKTOP, hWndTo = HWND_DESKTOP):
+        """
+        Translate coordinates from one window to another.
+
+        @note: To translate multiple points it's more efficient to use the
+            L{MapWindowPoints} function instead.
+
+        @see: L{client_to_screen}, L{screen_to_client}
+
+        @type  hWndFrom: int or L{HWND} or L{system.Window}
+        @param hWndFrom: Window handle to translate from.
+            Use C{HWND_DESKTOP} for screen coordinates.
+
+        @type  hWndTo: int or L{HWND} or L{system.Window}
+        @param hWndTo: Window handle to translate to.
+            Use C{HWND_DESKTOP} for screen coordinates.
+
+        @rtype:  L{Point}
+        @return: New object containing the translated coordinates.
+        """
+        return MapWindowPoints(hWndFrom, hWndTo, [self])
+
+class Rect(object):
+    """
+    Python wrapper over the L{RECT} class.
+
+    @type   left: int
+    @ivar   left: Horizontal coordinate for the top left corner.
+    @type    top: int
+    @ivar    top: Vertical coordinate for the top left corner.
+    @type  right: int
+    @ivar  right: Horizontal coordinate for the bottom right corner.
+    @type bottom: int
+    @ivar bottom: Vertical coordinate for the bottom right corner.
+
+    @type  width: int
+    @ivar  width: Width in pixels. Same as C{right - left}.
+    @type height: int
+    @ivar height: Height in pixels. Same as C{bottom - top}.
+    """
+
+    def __init__(self, left = 0, top = 0, right = 0, bottom = 0):
+        """
+        @see: L{RECT}
+        @type    left: int
+        @param   left: Horizontal coordinate for the top left corner.
+        @type     top: int
+        @param    top: Vertical coordinate for the top left corner.
+        @type   right: int
+        @param  right: Horizontal coordinate for the bottom right corner.
+        @type  bottom: int
+        @param bottom: Vertical coordinate for the bottom right corner.
+        """
+        self.left   = left
+        self.top    = top
+        self.right  = right
+        self.bottom = bottom
+
+    def __iter__(self):
+        return (self.left, self.top, self.right, self.bottom).__iter__()
+
+    def __len__(self):
+        return 2
+
+    def __getitem__(self, index):
+        return (self.left, self.top, self.right, self.bottom) [index]
+
+    def __setitem__(self, index, value):
+        if   index == 0:
+            self.left   = value
+        elif index == 1:
+            self.top    = value
+        elif index == 2:
+            self.right  = value
+        elif index == 3:
+            self.bottom = value
+        else:
+            raise IndexError("index out of range")
+
+    @property
+    def _as_parameter_(self):
+        """
+        Compatibility with ctypes.
+        Allows passing transparently a Point object to an API call.
+        """
+        return RECT(self.left, self.top, self.right, self.bottom)
+
+    def __get_width(self):
+        return self.right - self.left
+
+    def __get_height(self):
+        return self.bottom - self.top
+
+    def __set_width(self, value):
+        self.right = value - self.left
+
+    def __set_height(self, value):
+        self.bottom = value - self.top
+
+    width  = property(__get_width, __set_width)
+    height = property(__get_height, __set_height)
+
+    def screen_to_client(self, hWnd):
+        """
+        Translates window screen coordinates to client coordinates.
+
+        @see: L{client_to_screen}, L{translate}
+
+        @type  hWnd: int or L{HWND} or L{system.Window}
+        @param hWnd: Window handle.
+
+        @rtype:  L{Rect}
+        @return: New object containing the translated coordinates.
+        """
+        topleft     = ScreenToClient(hWnd, (self.left,   self.top))
+        bottomright = ScreenToClient(hWnd, (self.bottom, self.right))
+        return Rect( topleft.x, topleft.y, bottomright.x, bottomright.y )
+
+    def client_to_screen(self, hWnd):
+        """
+        Translates window client coordinates to screen coordinates.
+
+        @see: L{screen_to_client}, L{translate}
+
+        @type  hWnd: int or L{HWND} or L{system.Window}
+        @param hWnd: Window handle.
+
+        @rtype:  L{Rect}
+        @return: New object containing the translated coordinates.
+        """
+        topleft     = ClientToScreen(hWnd, (self.left,   self.top))
+        bottomright = ClientToScreen(hWnd, (self.bottom, self.right))
+        return Rect( topleft.x, topleft.y, bottomright.x, bottomright.y )
+
+    def translate(self, hWndFrom = HWND_DESKTOP, hWndTo = HWND_DESKTOP):
+        """
+        Translate coordinates from one window to another.
+
+        @see: L{client_to_screen}, L{screen_to_client}
+
+        @type  hWndFrom: int or L{HWND} or L{system.Window}
+        @param hWndFrom: Window handle to translate from.
+            Use C{HWND_DESKTOP} for screen coordinates.
+
+        @type  hWndTo: int or L{HWND} or L{system.Window}
+        @param hWndTo: Window handle to translate to.
+            Use C{HWND_DESKTOP} for screen coordinates.
+
+        @rtype:  L{Rect}
+        @return: New object containing the translated coordinates.
+        """
+        points = [ (self.left, self.top), (self.right, self.bottom) ]
+        return MapWindowPoints(hWndFrom, hWndTo, points)
+
+class WindowPlacement(object):
+    """
+    Python wrapper over the L{WINDOWPLACEMENT} class.
+    """
+
+    def __init__(self, wp = None):
+        """
+        @type  wp: L{WindowPlacement} or L{WINDOWPLACEMENT}
+        @param wp: Another window placement object.
+        """
+
+        # Initialize all properties with empty values.
+        self.flags            = 0
+        self.showCmd          = 0
+        self.ptMinPosition    = Point()
+        self.ptMaxPosition    = Point()
+        self.rcNormalPosition = Rect()
+
+        # If a window placement was given copy it's properties.
+        if wp:
+            self.flags            = wp.flags
+            self.showCmd          = wp.showCmd
+            self.ptMinPosition    = Point( wp.ptMinPosition.x, wp.ptMinPosition.y )
+            self.ptMaxPosition    = Point( wp.ptMaxPosition.x, wp.ptMaxPosition.y )
+            self.rcNormalPosition = Rect(
+                                        wp.rcNormalPosition.left,
+                                        wp.rcNormalPosition.top,
+                                        wp.rcNormalPosition.right,
+                                        wp.rcNormalPosition.bottom,
+                                        )
+
+    @property
+    def _as_parameter_(self):
+        """
+        Compatibility with ctypes.
+        Allows passing transparently a Point object to an API call.
+        """
+        wp                          = WINDOWPLACEMENT()
+        wp.length                   = sizeof(wp)
+        wp.flags                    = self.flags
+        wp.showCmd                  = self.showCmd
+        wp.ptMinPosition.x          = self.ptMinPosition.x
+        wp.ptMinPosition.y          = self.ptMinPosition.y
+        wp.ptMaxPosition.x          = self.ptMaxPosition.x
+        wp.ptMaxPosition.y          = self.ptMaxPosition.y
+        wp.rcNormalPosition.left    = self.rcNormalPosition.left
+        wp.rcNormalPosition.top     = self.rcNormalPosition.top
+        wp.rcNormalPosition.right   = self.rcNormalPosition.right
+        wp.rcNormalPosition.bottom  = self.rcNormalPosition.bottom
+        return wp
+
+#--- user32.dll ---------------------------------------------------------------
+
+# void WINAPI SetLastErrorEx(
+#   __in  DWORD dwErrCode,
+#   __in  DWORD dwType
+# );
+def SetLastErrorEx(dwErrCode, dwType = 0):
+    _SetLastErrorEx = windll.user32.SetLastErrorEx
+    _SetLastErrorEx.argtypes = [DWORD, DWORD]
+    _SetLastErrorEx.restype  = None
+    _SetLastErrorEx(dwErrCode, dwType)
+
+# HDC GetDC(
+#   __in  HWND hWnd
+# );
+def GetDC(hWnd):
+    _GetDC = windll.user32.GetDC
+    _GetDC.argtypes = [HWND]
+    _GetDC.restype  = HDC
+    _GetDC.errcheck = RaiseIfZero
+    return _GetDC(hWnd)
+
+# HDC GetWindowDC(
+#   __in  HWND hWnd
+# );
+def GetWindowDC(hWnd):
+    _GetWindowDC = windll.user32.GetWindowDC
+    _GetWindowDC.argtypes = [HWND]
+    _GetWindowDC.restype  = HDC
+    _GetWindowDC.errcheck = RaiseIfZero
+    return _GetWindowDC(hWnd)
+
+# int ReleaseDC(
+#   __in  HWND hWnd,
+#   __in  HDC hDC
+# );
+def ReleaseDC(hWnd, hDC):
+    _ReleaseDC = windll.user32.ReleaseDC
+    _ReleaseDC.argtypes = [HWND, HDC]
+    _ReleaseDC.restype  = ctypes.c_int
+    _ReleaseDC.errcheck = RaiseIfZero
+    _ReleaseDC(hWnd, hDC)
+
+# HWND FindWindow(
+#     LPCTSTR lpClassName,
+#     LPCTSTR lpWindowName
+# );
+def FindWindowA(lpClassName = None, lpWindowName = None):
+    _FindWindowA = windll.user32.FindWindowA
+    _FindWindowA.argtypes = [LPSTR, LPSTR]
+    _FindWindowA.restype  = HWND
+
+    hWnd = _FindWindowA(lpClassName, lpWindowName)
+    if not hWnd:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    return hWnd
+
+def FindWindowW(lpClassName = None, lpWindowName = None):
+    _FindWindowW = windll.user32.FindWindowW
+    _FindWindowW.argtypes = [LPWSTR, LPWSTR]
+    _FindWindowW.restype  = HWND
+
+    hWnd = _FindWindowW(lpClassName, lpWindowName)
+    if not hWnd:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    return hWnd
+
+FindWindow = GuessStringType(FindWindowA, FindWindowW)
+
+# HWND WINAPI FindWindowEx(
+#   __in_opt  HWND hwndParent,
+#   __in_opt  HWND hwndChildAfter,
+#   __in_opt  LPCTSTR lpszClass,
+#   __in_opt  LPCTSTR lpszWindow
+# );
+def FindWindowExA(hwndParent = None, hwndChildAfter = None, lpClassName = None, lpWindowName = None):
+    _FindWindowExA = windll.user32.FindWindowExA
+    _FindWindowExA.argtypes = [HWND, HWND, LPSTR, LPSTR]
+    _FindWindowExA.restype  = HWND
+
+    hWnd = _FindWindowExA(hwndParent, hwndChildAfter, lpClassName, lpWindowName)
+    if not hWnd:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    return hWnd
+
+def FindWindowExW(hwndParent = None, hwndChildAfter = None, lpClassName = None, lpWindowName = None):
+    _FindWindowExW = windll.user32.FindWindowExW
+    _FindWindowExW.argtypes = [HWND, HWND, LPWSTR, LPWSTR]
+    _FindWindowExW.restype  = HWND
+
+    hWnd = _FindWindowExW(hwndParent, hwndChildAfter, lpClassName, lpWindowName)
+    if not hWnd:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    return hWnd
+
+FindWindowEx = GuessStringType(FindWindowExA, FindWindowExW)
+
+# int GetClassName(
+#     HWND hWnd,
+#     LPTSTR lpClassName,
+#     int nMaxCount
+# );
+def GetClassNameA(hWnd):
+    _GetClassNameA = windll.user32.GetClassNameA
+    _GetClassNameA.argtypes = [HWND, LPSTR, ctypes.c_int]
+    _GetClassNameA.restype = ctypes.c_int
+
+    nMaxCount = 0x1000
+    dwCharSize = sizeof(CHAR)
+    while 1:
+        lpClassName = ctypes.create_string_buffer("", nMaxCount)
+        nCount = _GetClassNameA(hWnd, lpClassName, nMaxCount)
+        if nCount == 0:
+            raise ctypes.WinError()
+        if nCount < nMaxCount - dwCharSize:
+            break
+        nMaxCount += 0x1000
+    return lpClassName.value
+
+def GetClassNameW(hWnd):
+    _GetClassNameW = windll.user32.GetClassNameW
+    _GetClassNameW.argtypes = [HWND, LPWSTR, ctypes.c_int]
+    _GetClassNameW.restype = ctypes.c_int
+
+    nMaxCount = 0x1000
+    dwCharSize = sizeof(WCHAR)
+    while 1:
+        lpClassName = ctypes.create_unicode_buffer(u"", nMaxCount)
+        nCount = _GetClassNameW(hWnd, lpClassName, nMaxCount)
+        if nCount == 0:
+            raise ctypes.WinError()
+        if nCount < nMaxCount - dwCharSize:
+            break
+        nMaxCount += 0x1000
+    return lpClassName.value
+
+GetClassName = GuessStringType(GetClassNameA, GetClassNameW)
+
+# int WINAPI GetWindowText(
+#   __in   HWND hWnd,
+#   __out  LPTSTR lpString,
+#   __in   int nMaxCount
+# );
+def GetWindowTextA(hWnd):
+    _GetWindowTextA = windll.user32.GetWindowTextA
+    _GetWindowTextA.argtypes = [HWND, LPSTR, ctypes.c_int]
+    _GetWindowTextA.restype = ctypes.c_int
+
+    nMaxCount = 0x1000
+    dwCharSize = sizeof(CHAR)
+    while 1:
+        lpString = ctypes.create_string_buffer(b"", nMaxCount)
+        nCount = _GetWindowTextA(hWnd, lpString, nMaxCount)
+        if nCount == 0:
+            raise ctypes.WinError()
+        if nCount < nMaxCount - dwCharSize:
+            break
+        nMaxCount += 0x1000
+    return lpString.value
+
+def GetWindowTextW(hWnd):
+    _GetWindowTextW = windll.user32.GetWindowTextW
+    _GetWindowTextW.argtypes = [HWND, LPWSTR, ctypes.c_int]
+    _GetWindowTextW.restype = ctypes.c_int
+
+    nMaxCount = 0x1000
+    dwCharSize = sizeof(CHAR)
+    while 1:
+        lpString = ctypes.create_unicodeg_buffer(u"", nMaxCount)
+        nCount = _GetWindowTextW(hWnd, lpString, nMaxCount)
+        if nCount == 0:
+            raise ctypes.WinError()
+        if nCount < nMaxCount - dwCharSize:
+            break
+        nMaxCount += 0x1000
+    return lpString.value
+
+GetWindowText = GuessStringType(GetWindowTextA, GetWindowTextW)
+
+# BOOL WINAPI SetWindowText(
+#   __in      HWND hWnd,
+#   __in_opt  LPCTSTR lpString
+# );
+def SetWindowTextA(hWnd, lpString = None):
+    _SetWindowTextA = windll.user32.SetWindowTextA
+    _SetWindowTextA.argtypes = [HWND, LPSTR]
+    _SetWindowTextA.restype  = bool
+    _SetWindowTextA.errcheck = RaiseIfZero
+    _SetWindowTextA(hWnd, lpString)
+
+def SetWindowTextW(hWnd, lpString = None):
+    _SetWindowTextW = windll.user32.SetWindowTextW
+    _SetWindowTextW.argtypes = [HWND, LPWSTR]
+    _SetWindowTextW.restype  = bool
+    _SetWindowTextW.errcheck = RaiseIfZero
+    _SetWindowTextW(hWnd, lpString)
+
+SetWindowText = GuessStringType(SetWindowTextA, SetWindowTextW)
+
+# LONG GetWindowLong(
+#     HWND hWnd,
+#     int nIndex
+# );
+def GetWindowLongA(hWnd, nIndex = 0):
+    _GetWindowLongA = windll.user32.GetWindowLongA
+    _GetWindowLongA.argtypes = [HWND, ctypes.c_int]
+    _GetWindowLongA.restype  = DWORD
+
+    SetLastError(ERROR_SUCCESS)
+    retval = _GetWindowLongA(hWnd, nIndex)
+    if retval == 0:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    return retval
+
+def GetWindowLongW(hWnd, nIndex = 0):
+    _GetWindowLongW = windll.user32.GetWindowLongW
+    _GetWindowLongW.argtypes = [HWND, ctypes.c_int]
+    _GetWindowLongW.restype  = DWORD
+
+    SetLastError(ERROR_SUCCESS)
+    retval = _GetWindowLongW(hWnd, nIndex)
+    if retval == 0:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    return retval
+
+GetWindowLong = DefaultStringType(GetWindowLongA, GetWindowLongW)
+
+# LONG_PTR WINAPI GetWindowLongPtr(
+#   _In_  HWND hWnd,
+#   _In_  int nIndex
+# );
+
+if bits == 32:
+
+    GetWindowLongPtrA = GetWindowLongA
+    GetWindowLongPtrW = GetWindowLongW
+    GetWindowLongPtr  = GetWindowLong
+
+else:
+
+    def GetWindowLongPtrA(hWnd, nIndex = 0):
+        _GetWindowLongPtrA = windll.user32.GetWindowLongPtrA
+        _GetWindowLongPtrA.argtypes = [HWND, ctypes.c_int]
+        _GetWindowLongPtrA.restype  = SIZE_T
+
+        SetLastError(ERROR_SUCCESS)
+        retval = _GetWindowLongPtrA(hWnd, nIndex)
+        if retval == 0:
+            errcode = GetLastError()
+            if errcode != ERROR_SUCCESS:
+                raise ctypes.WinError(errcode)
+        return retval
+
+    def GetWindowLongPtrW(hWnd, nIndex = 0):
+        _GetWindowLongPtrW = windll.user32.GetWindowLongPtrW
+        _GetWindowLongPtrW.argtypes = [HWND, ctypes.c_int]
+        _GetWindowLongPtrW.restype  = DWORD
+
+        SetLastError(ERROR_SUCCESS)
+        retval = _GetWindowLongPtrW(hWnd, nIndex)
+        if retval == 0:
+            errcode = GetLastError()
+            if errcode != ERROR_SUCCESS:
+                raise ctypes.WinError(errcode)
+        return retval
+
+    GetWindowLongPtr = DefaultStringType(GetWindowLongPtrA, GetWindowLongPtrW)
+
+# LONG WINAPI SetWindowLong(
+#   _In_  HWND hWnd,
+#   _In_  int nIndex,
+#   _In_  LONG dwNewLong
+# );
+
+def SetWindowLongA(hWnd, nIndex, dwNewLong):
+    _SetWindowLongA = windll.user32.SetWindowLongA
+    _SetWindowLongA.argtypes = [HWND, ctypes.c_int, DWORD]
+    _SetWindowLongA.restype  = DWORD
+
+    SetLastError(ERROR_SUCCESS)
+    retval = _SetWindowLongA(hWnd, nIndex, dwNewLong)
+    if retval == 0:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    return retval
+
+def SetWindowLongW(hWnd, nIndex, dwNewLong):
+    _SetWindowLongW = windll.user32.SetWindowLongW
+    _SetWindowLongW.argtypes = [HWND, ctypes.c_int, DWORD]
+    _SetWindowLongW.restype  = DWORD
+
+    SetLastError(ERROR_SUCCESS)
+    retval = _SetWindowLongW(hWnd, nIndex, dwNewLong)
+    if retval == 0:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    return retval
+
+SetWindowLong = DefaultStringType(SetWindowLongA, SetWindowLongW)
+
+# LONG_PTR WINAPI SetWindowLongPtr(
+#   _In_  HWND hWnd,
+#   _In_  int nIndex,
+#   _In_  LONG_PTR dwNewLong
+# );
+
+if bits == 32:
+
+    SetWindowLongPtrA = SetWindowLongA
+    SetWindowLongPtrW = SetWindowLongW
+    SetWindowLongPtr  = SetWindowLong
+
+else:
+
+    def SetWindowLongPtrA(hWnd, nIndex, dwNewLong):
+        _SetWindowLongPtrA = windll.user32.SetWindowLongPtrA
+        _SetWindowLongPtrA.argtypes = [HWND, ctypes.c_int, SIZE_T]
+        _SetWindowLongPtrA.restype  = SIZE_T
+
+        SetLastError(ERROR_SUCCESS)
+        retval = _SetWindowLongPtrA(hWnd, nIndex, dwNewLong)
+        if retval == 0:
+            errcode = GetLastError()
+            if errcode != ERROR_SUCCESS:
+                raise ctypes.WinError(errcode)
+        return retval
+
+    def SetWindowLongPtrW(hWnd, nIndex, dwNewLong):
+        _SetWindowLongPtrW = windll.user32.SetWindowLongPtrW
+        _SetWindowLongPtrW.argtypes = [HWND, ctypes.c_int, SIZE_T]
+        _SetWindowLongPtrW.restype  = SIZE_T
+
+        SetLastError(ERROR_SUCCESS)
+        retval = _SetWindowLongPtrW(hWnd, nIndex, dwNewLong)
+        if retval == 0:
+            errcode = GetLastError()
+            if errcode != ERROR_SUCCESS:
+                raise ctypes.WinError(errcode)
+        return retval
+
+    SetWindowLongPtr = DefaultStringType(SetWindowLongPtrA, SetWindowLongPtrW)
+
+# HWND GetShellWindow(VOID);
+def GetShellWindow():
+    _GetShellWindow = windll.user32.GetShellWindow
+    _GetShellWindow.argtypes = []
+    _GetShellWindow.restype  = HWND
+    _GetShellWindow.errcheck = RaiseIfZero
+    return _GetShellWindow()
+
+# DWORD GetWindowThreadProcessId(
+#     HWND hWnd,
+#     LPDWORD lpdwProcessId
+# );
+def GetWindowThreadProcessId(hWnd):
+    _GetWindowThreadProcessId = windll.user32.GetWindowThreadProcessId
+    _GetWindowThreadProcessId.argtypes = [HWND, LPDWORD]
+    _GetWindowThreadProcessId.restype  = DWORD
+    _GetWindowThreadProcessId.errcheck = RaiseIfZero
+
+    dwProcessId = DWORD(0)
+    dwThreadId = _GetWindowThreadProcessId(hWnd, byref(dwProcessId))
+    return (dwThreadId, dwProcessId.value)
+
+# HWND WINAPI GetWindow(
+#   __in  HWND hwnd,
+#   __in  UINT uCmd
+# );
+def GetWindow(hWnd, uCmd):
+    _GetWindow = windll.user32.GetWindow
+    _GetWindow.argtypes = [HWND, UINT]
+    _GetWindow.restype  = HWND
+
+    SetLastError(ERROR_SUCCESS)
+    hWndTarget = _GetWindow(hWnd, uCmd)
+    if not hWndTarget:
+        winerr = GetLastError()
+        if winerr != ERROR_SUCCESS:
+            raise ctypes.WinError(winerr)
+    return hWndTarget
+
+# HWND GetParent(
+#       HWND hWnd
+# );
+def GetParent(hWnd):
+    _GetParent = windll.user32.GetParent
+    _GetParent.argtypes = [HWND]
+    _GetParent.restype  = HWND
+
+    SetLastError(ERROR_SUCCESS)
+    hWndParent = _GetParent(hWnd)
+    if not hWndParent:
+        winerr = GetLastError()
+        if winerr != ERROR_SUCCESS:
+            raise ctypes.WinError(winerr)
+    return hWndParent
+
+# HWND WINAPI GetAncestor(
+#   __in  HWND hwnd,
+#   __in  UINT gaFlags
+# );
+def GetAncestor(hWnd, gaFlags = GA_PARENT):
+    _GetAncestor = windll.user32.GetAncestor
+    _GetAncestor.argtypes = [HWND, UINT]
+    _GetAncestor.restype  = HWND
+
+    SetLastError(ERROR_SUCCESS)
+    hWndParent = _GetAncestor(hWnd, gaFlags)
+    if not hWndParent:
+        winerr = GetLastError()
+        if winerr != ERROR_SUCCESS:
+            raise ctypes.WinError(winerr)
+    return hWndParent
+
+# BOOL EnableWindow(
+#     HWND hWnd,
+#     BOOL bEnable
+# );
+def EnableWindow(hWnd, bEnable = True):
+    _EnableWindow = windll.user32.EnableWindow
+    _EnableWindow.argtypes = [HWND, BOOL]
+    _EnableWindow.restype  = bool
+    return _EnableWindow(hWnd, bool(bEnable))
+
+# BOOL ShowWindow(
+#     HWND hWnd,
+#     int nCmdShow
+# );
+def ShowWindow(hWnd, nCmdShow = SW_SHOW):
+    _ShowWindow = windll.user32.ShowWindow
+    _ShowWindow.argtypes = [HWND, ctypes.c_int]
+    _ShowWindow.restype  = bool
+    return _ShowWindow(hWnd, nCmdShow)
+
+# BOOL ShowWindowAsync(
+#     HWND hWnd,
+#     int nCmdShow
+# );
+def ShowWindowAsync(hWnd, nCmdShow = SW_SHOW):
+    _ShowWindowAsync = windll.user32.ShowWindowAsync
+    _ShowWindowAsync.argtypes = [HWND, ctypes.c_int]
+    _ShowWindowAsync.restype  = bool
+    return _ShowWindowAsync(hWnd, nCmdShow)
+
+# HWND GetDesktopWindow(VOID);
+def GetDesktopWindow():
+    _GetDesktopWindow = windll.user32.GetDesktopWindow
+    _GetDesktopWindow.argtypes = []
+    _GetDesktopWindow.restype  = HWND
+    _GetDesktopWindow.errcheck = RaiseIfZero
+    return _GetDesktopWindow()
+
+# HWND GetForegroundWindow(VOID);
+def GetForegroundWindow():
+    _GetForegroundWindow = windll.user32.GetForegroundWindow
+    _GetForegroundWindow.argtypes = []
+    _GetForegroundWindow.restype  = HWND
+    _GetForegroundWindow.errcheck = RaiseIfZero
+    return _GetForegroundWindow()
+
+# BOOL IsWindow(
+#     HWND hWnd
+# );
+def IsWindow(hWnd):
+    _IsWindow = windll.user32.IsWindow
+    _IsWindow.argtypes = [HWND]
+    _IsWindow.restype  = bool
+    return _IsWindow(hWnd)
+
+# BOOL IsWindowVisible(
+#     HWND hWnd
+# );
+def IsWindowVisible(hWnd):
+    _IsWindowVisible = windll.user32.IsWindowVisible
+    _IsWindowVisible.argtypes = [HWND]
+    _IsWindowVisible.restype  = bool
+    return _IsWindowVisible(hWnd)
+
+# BOOL IsWindowEnabled(
+#     HWND hWnd
+# );
+def IsWindowEnabled(hWnd):
+    _IsWindowEnabled = windll.user32.IsWindowEnabled
+    _IsWindowEnabled.argtypes = [HWND]
+    _IsWindowEnabled.restype  = bool
+    return _IsWindowEnabled(hWnd)
+
+# BOOL IsZoomed(
+#     HWND hWnd
+# );
+def IsZoomed(hWnd):
+    _IsZoomed = windll.user32.IsZoomed
+    _IsZoomed.argtypes = [HWND]
+    _IsZoomed.restype  = bool
+    return _IsZoomed(hWnd)
+
+# BOOL IsIconic(
+#     HWND hWnd
+# );
+def IsIconic(hWnd):
+    _IsIconic = windll.user32.IsIconic
+    _IsIconic.argtypes = [HWND]
+    _IsIconic.restype  = bool
+    return _IsIconic(hWnd)
+
+# BOOL IsChild(
+#     HWND hWnd
+# );
+def IsChild(hWnd):
+    _IsChild = windll.user32.IsChild
+    _IsChild.argtypes = [HWND]
+    _IsChild.restype  = bool
+    return _IsChild(hWnd)
+
+# HWND WindowFromPoint(
+#     POINT Point
+# );
+def WindowFromPoint(point):
+    _WindowFromPoint = windll.user32.WindowFromPoint
+    _WindowFromPoint.argtypes = [POINT]
+    _WindowFromPoint.restype  = HWND
+    _WindowFromPoint.errcheck = RaiseIfZero
+    if isinstance(point, tuple):
+        point = POINT(*point)
+    return _WindowFromPoint(point)
+
+# HWND ChildWindowFromPoint(
+#     HWND hWndParent,
+#     POINT Point
+# );
+def ChildWindowFromPoint(hWndParent, point):
+    _ChildWindowFromPoint = windll.user32.ChildWindowFromPoint
+    _ChildWindowFromPoint.argtypes = [HWND, POINT]
+    _ChildWindowFromPoint.restype  = HWND
+    _ChildWindowFromPoint.errcheck = RaiseIfZero
+    if isinstance(point, tuple):
+        point = POINT(*point)
+    return _ChildWindowFromPoint(hWndParent, point)
+
+#HWND RealChildWindowFromPoint(
+#    HWND hwndParent,
+#    POINT ptParentClientCoords
+#);
+def RealChildWindowFromPoint(hWndParent, ptParentClientCoords):
+    _RealChildWindowFromPoint = windll.user32.RealChildWindowFromPoint
+    _RealChildWindowFromPoint.argtypes = [HWND, POINT]
+    _RealChildWindowFromPoint.restype  = HWND
+    _RealChildWindowFromPoint.errcheck = RaiseIfZero
+    if isinstance(ptParentClientCoords, tuple):
+        ptParentClientCoords = POINT(*ptParentClientCoords)
+    return _RealChildWindowFromPoint(hWndParent, ptParentClientCoords)
+
+# BOOL ScreenToClient(
+#   __in  HWND hWnd,
+#         LPPOINT lpPoint
+# );
+def ScreenToClient(hWnd, lpPoint):
+    _ScreenToClient = windll.user32.ScreenToClient
+    _ScreenToClient.argtypes = [HWND, LPPOINT]
+    _ScreenToClient.restype  = bool
+    _ScreenToClient.errcheck = RaiseIfZero
+
+    if isinstance(lpPoint, tuple):
+        lpPoint = POINT(*lpPoint)
+    else:
+        lpPoint = POINT(lpPoint.x, lpPoint.y)
+    _ScreenToClient(hWnd, byref(lpPoint))
+    return Point(lpPoint.x, lpPoint.y)
+
+# BOOL ClientToScreen(
+#   HWND hWnd,
+#   LPPOINT lpPoint
+# );
+def ClientToScreen(hWnd, lpPoint):
+    _ClientToScreen = windll.user32.ClientToScreen
+    _ClientToScreen.argtypes = [HWND, LPPOINT]
+    _ClientToScreen.restype  = bool
+    _ClientToScreen.errcheck = RaiseIfZero
+
+    if isinstance(lpPoint, tuple):
+        lpPoint = POINT(*lpPoint)
+    else:
+        lpPoint = POINT(lpPoint.x, lpPoint.y)
+    _ClientToScreen(hWnd, byref(lpPoint))
+    return Point(lpPoint.x, lpPoint.y)
+
+# int MapWindowPoints(
+#   __in     HWND hWndFrom,
+#   __in     HWND hWndTo,
+#   __inout  LPPOINT lpPoints,
+#   __in     UINT cPoints
+# );
+def MapWindowPoints(hWndFrom, hWndTo, lpPoints):
+    _MapWindowPoints = windll.user32.MapWindowPoints
+    _MapWindowPoints.argtypes = [HWND, HWND, LPPOINT, UINT]
+    _MapWindowPoints.restype  = ctypes.c_int
+
+    cPoints  = len(lpPoints)
+    lpPoints = (POINT * cPoints)(* lpPoints)
+    SetLastError(ERROR_SUCCESS)
+    number   = _MapWindowPoints(hWndFrom, hWndTo, byref(lpPoints), cPoints)
+    if number == 0:
+        errcode = GetLastError()
+        if errcode != ERROR_SUCCESS:
+            raise ctypes.WinError(errcode)
+    x_delta = number & 0xFFFF
+    y_delta = (number >> 16) & 0xFFFF
+    return x_delta, y_delta, [ (Point.x, Point.y) for Point in lpPoints ]
+
+#BOOL SetForegroundWindow(
+#    HWND hWnd
+#);
+def SetForegroundWindow(hWnd):
+    _SetForegroundWindow = windll.user32.SetForegroundWindow
+    _SetForegroundWindow.argtypes = [HWND]
+    _SetForegroundWindow.restype  = bool
+    _SetForegroundWindow.errcheck = RaiseIfZero
+    return _SetForegroundWindow(hWnd)
+
+# BOOL GetWindowPlacement(
+#     HWND hWnd,
+#     WINDOWPLACEMENT *lpwndpl
+# );
+def GetWindowPlacement(hWnd):
+    _GetWindowPlacement = windll.user32.GetWindowPlacement
+    _GetWindowPlacement.argtypes = [HWND, PWINDOWPLACEMENT]
+    _GetWindowPlacement.restype  = bool
+    _GetWindowPlacement.errcheck = RaiseIfZero
+
+    lpwndpl = WINDOWPLACEMENT()
+    lpwndpl.length = sizeof(lpwndpl)
+    _GetWindowPlacement(hWnd, byref(lpwndpl))
+    return WindowPlacement(lpwndpl)
+
+# BOOL SetWindowPlacement(
+#     HWND hWnd,
+#     WINDOWPLACEMENT *lpwndpl
+# );
+def SetWindowPlacement(hWnd, lpwndpl):
+    _SetWindowPlacement = windll.user32.SetWindowPlacement
+    _SetWindowPlacement.argtypes = [HWND, PWINDOWPLACEMENT]
+    _SetWindowPlacement.restype  = bool
+    _SetWindowPlacement.errcheck = RaiseIfZero
+
+    if isinstance(lpwndpl, WINDOWPLACEMENT):
+        lpwndpl.length = sizeof(lpwndpl)
+    _SetWindowPlacement(hWnd, byref(lpwndpl))
+
+# BOOL WINAPI GetWindowRect(
+#   __in   HWND hWnd,
+#   __out  LPRECT lpRect
+# );
+def GetWindowRect(hWnd):
+    _GetWindowRect = windll.user32.GetWindowRect
+    _GetWindowRect.argtypes = [HWND, LPRECT]
+    _GetWindowRect.restype  = bool
+    _GetWindowRect.errcheck = RaiseIfZero
+
+    lpRect = RECT()
+    _GetWindowRect(hWnd, byref(lpRect))
+    return Rect(lpRect.left, lpRect.top, lpRect.right, lpRect.bottom)
+
+# BOOL WINAPI GetClientRect(
+#   __in   HWND hWnd,
+#   __out  LPRECT lpRect
+# );
+def GetClientRect(hWnd):
+    _GetClientRect = windll.user32.GetClientRect
+    _GetClientRect.argtypes = [HWND, LPRECT]
+    _GetClientRect.restype  = bool
+    _GetClientRect.errcheck = RaiseIfZero
+
+    lpRect = RECT()
+    _GetClientRect(hWnd, byref(lpRect))
+    return Rect(lpRect.left, lpRect.top, lpRect.right, lpRect.bottom)
+
+#BOOL MoveWindow(
+#    HWND hWnd,
+#    int X,
+#    int Y,
+#    int nWidth,
+#    int nHeight,
+#    BOOL bRepaint
+#);
+def MoveWindow(hWnd, X, Y, nWidth, nHeight, bRepaint = True):
+    _MoveWindow = windll.user32.MoveWindow
+    _MoveWindow.argtypes = [HWND, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, BOOL]
+    _MoveWindow.restype  = bool
+    _MoveWindow.errcheck = RaiseIfZero
+    _MoveWindow(hWnd, X, Y, nWidth, nHeight, bool(bRepaint))
+
+# BOOL GetGUIThreadInfo(
+#     DWORD idThread,
+#     LPGUITHREADINFO lpgui
+# );
+def GetGUIThreadInfo(idThread):
+    _GetGUIThreadInfo = windll.user32.GetGUIThreadInfo
+    _GetGUIThreadInfo.argtypes = [DWORD, LPGUITHREADINFO]
+    _GetGUIThreadInfo.restype  = bool
+    _GetGUIThreadInfo.errcheck = RaiseIfZero
+
+    gui = GUITHREADINFO()
+    _GetGUIThreadInfo(idThread, byref(gui))
+    return gui
+
+# BOOL CALLBACK EnumWndProc(
+#     HWND hwnd,
+#     LPARAM lParam
+# );
+class __EnumWndProc (__WindowEnumerator):
+    pass
+
+# BOOL EnumWindows(
+#     WNDENUMPROC lpEnumFunc,
+#     LPARAM lParam
+# );
+def EnumWindows():
+    _EnumWindows = windll.user32.EnumWindows
+    _EnumWindows.argtypes = [WNDENUMPROC, LPARAM]
+    _EnumWindows.restype  = bool
+
+    EnumFunc = __EnumWndProc()
+    lpEnumFunc = WNDENUMPROC(EnumFunc)
+    if not _EnumWindows(lpEnumFunc, NULL):
+        errcode = GetLastError()
+        if errcode not in (ERROR_NO_MORE_FILES, ERROR_SUCCESS):
+            raise ctypes.WinError(errcode)
+    return EnumFunc.hwnd
+
+# BOOL CALLBACK EnumThreadWndProc(
+#     HWND hwnd,
+#     LPARAM lParam
+# );
+class __EnumThreadWndProc (__WindowEnumerator):
+    pass
+
+# BOOL EnumThreadWindows(
+#     DWORD dwThreadId,
+#     WNDENUMPROC lpfn,
+#     LPARAM lParam
+# );
+def EnumThreadWindows(dwThreadId):
+    _EnumThreadWindows = windll.user32.EnumThreadWindows
+    _EnumThreadWindows.argtypes = [DWORD, WNDENUMPROC, LPARAM]
+    _EnumThreadWindows.restype  = bool
+
+    fn = __EnumThreadWndProc()
+    lpfn = WNDENUMPROC(fn)
+    if not _EnumThreadWindows(dwThreadId, lpfn, NULL):
+        errcode = GetLastError()
+        if errcode not in (ERROR_NO_MORE_FILES, ERROR_SUCCESS):
+            raise ctypes.WinError(errcode)
+    return fn.hwnd
+
+# BOOL CALLBACK EnumChildProc(
+#     HWND hwnd,
+#     LPARAM lParam
+# );
+class __EnumChildProc (__WindowEnumerator):
+    pass
+
+# BOOL EnumChildWindows(
+#     HWND hWndParent,
+#     WNDENUMPROC lpEnumFunc,
+#     LPARAM lParam
+# );
+def EnumChildWindows(hWndParent = NULL):
+    _EnumChildWindows = windll.user32.EnumChildWindows
+    _EnumChildWindows.argtypes = [HWND, WNDENUMPROC, LPARAM]
+    _EnumChildWindows.restype  = bool
+
+    EnumFunc = __EnumChildProc()
+    lpEnumFunc = WNDENUMPROC(EnumFunc)
+    SetLastError(ERROR_SUCCESS)
+    _EnumChildWindows(hWndParent, lpEnumFunc, NULL)
+    errcode = GetLastError()
+    if errcode != ERROR_SUCCESS and errcode not in (ERROR_NO_MORE_FILES, ERROR_SUCCESS):
+        raise ctypes.WinError(errcode)
+    return EnumFunc.hwnd
+
+# LRESULT SendMessage(
+#     HWND hWnd,
+#     UINT Msg,
+#     WPARAM wParam,
+#     LPARAM lParam
+# );
+def SendMessageA(hWnd, Msg, wParam = 0, lParam = 0):
+    _SendMessageA = windll.user32.SendMessageA
+    _SendMessageA.argtypes = [HWND, UINT, WPARAM, LPARAM]
+    _SendMessageA.restype  = LRESULT
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    return _SendMessageA(hWnd, Msg, wParam, lParam)
+
+def SendMessageW(hWnd, Msg, wParam = 0, lParam = 0):
+    _SendMessageW = windll.user32.SendMessageW
+    _SendMessageW.argtypes = [HWND, UINT, WPARAM, LPARAM]
+    _SendMessageW.restype  = LRESULT
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    return _SendMessageW(hWnd, Msg, wParam, lParam)
+
+SendMessage = GuessStringType(SendMessageA, SendMessageW)
+
+# BOOL PostMessage(
+#     HWND hWnd,
+#     UINT Msg,
+#     WPARAM wParam,
+#     LPARAM lParam
+# );
+def PostMessageA(hWnd, Msg, wParam = 0, lParam = 0):
+    _PostMessageA = windll.user32.PostMessageA
+    _PostMessageA.argtypes = [HWND, UINT, WPARAM, LPARAM]
+    _PostMessageA.restype  = bool
+    _PostMessageA.errcheck = RaiseIfZero
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    _PostMessageA(hWnd, Msg, wParam, lParam)
+
+def PostMessageW(hWnd, Msg, wParam = 0, lParam = 0):
+    _PostMessageW = windll.user32.PostMessageW
+    _PostMessageW.argtypes = [HWND, UINT, WPARAM, LPARAM]
+    _PostMessageW.restype  = bool
+    _PostMessageW.errcheck = RaiseIfZero
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    _PostMessageW(hWnd, Msg, wParam, lParam)
+
+PostMessage = GuessStringType(PostMessageA, PostMessageW)
+
+# BOOL PostThreadMessage(
+#     DWORD idThread,
+#     UINT Msg,
+#     WPARAM wParam,
+#     LPARAM lParam
+# );
+def PostThreadMessageA(idThread, Msg, wParam = 0, lParam = 0):
+    _PostThreadMessageA = windll.user32.PostThreadMessageA
+    _PostThreadMessageA.argtypes = [DWORD, UINT, WPARAM, LPARAM]
+    _PostThreadMessageA.restype  = bool
+    _PostThreadMessageA.errcheck = RaiseIfZero
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    _PostThreadMessageA(idThread, Msg, wParam, lParam)
+
+def PostThreadMessageW(idThread, Msg, wParam = 0, lParam = 0):
+    _PostThreadMessageW = windll.user32.PostThreadMessageW
+    _PostThreadMessageW.argtypes = [DWORD, UINT, WPARAM, LPARAM]
+    _PostThreadMessageW.restype  = bool
+    _PostThreadMessageW.errcheck = RaiseIfZero
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    _PostThreadMessageW(idThread, Msg, wParam, lParam)
+
+PostThreadMessage = GuessStringType(PostThreadMessageA, PostThreadMessageW)
+
+# LRESULT c(
+#     HWND hWnd,
+#     UINT Msg,
+#     WPARAM wParam,
+#     LPARAM lParam,
+#     UINT fuFlags,
+#     UINT uTimeout,
+#     PDWORD_PTR lpdwResult
+# );
+def SendMessageTimeoutA(hWnd, Msg, wParam = 0, lParam = 0, fuFlags = 0, uTimeout = 0):
+    _SendMessageTimeoutA = windll.user32.SendMessageTimeoutA
+    _SendMessageTimeoutA.argtypes = [HWND, UINT, WPARAM, LPARAM, UINT, UINT, PDWORD_PTR]
+    _SendMessageTimeoutA.restype  = LRESULT
+    _SendMessageTimeoutA.errcheck = RaiseIfZero
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    dwResult = DWORD(0)
+    _SendMessageTimeoutA(hWnd, Msg, wParam, lParam, fuFlags, uTimeout, byref(dwResult))
+    return dwResult.value
+
+def SendMessageTimeoutW(hWnd, Msg, wParam = 0, lParam = 0):
+    _SendMessageTimeoutW = windll.user32.SendMessageTimeoutW
+    _SendMessageTimeoutW.argtypes = [HWND, UINT, WPARAM, LPARAM, UINT, UINT, PDWORD_PTR]
+    _SendMessageTimeoutW.restype  = LRESULT
+    _SendMessageTimeoutW.errcheck = RaiseIfZero
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    dwResult = DWORD(0)
+    _SendMessageTimeoutW(hWnd, Msg, wParam, lParam, fuFlags, uTimeout, byref(dwResult))
+    return dwResult.value
+
+SendMessageTimeout = GuessStringType(SendMessageTimeoutA, SendMessageTimeoutW)
+
+# BOOL SendNotifyMessage(
+#     HWND hWnd,
+#     UINT Msg,
+#     WPARAM wParam,
+#     LPARAM lParam
+# );
+def SendNotifyMessageA(hWnd, Msg, wParam = 0, lParam = 0):
+    _SendNotifyMessageA = windll.user32.SendNotifyMessageA
+    _SendNotifyMessageA.argtypes = [HWND, UINT, WPARAM, LPARAM]
+    _SendNotifyMessageA.restype  = bool
+    _SendNotifyMessageA.errcheck = RaiseIfZero
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    _SendNotifyMessageA(hWnd, Msg, wParam, lParam)
+
+def SendNotifyMessageW(hWnd, Msg, wParam = 0, lParam = 0):
+    _SendNotifyMessageW = windll.user32.SendNotifyMessageW
+    _SendNotifyMessageW.argtypes = [HWND, UINT, WPARAM, LPARAM]
+    _SendNotifyMessageW.restype  = bool
+    _SendNotifyMessageW.errcheck = RaiseIfZero
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    _SendNotifyMessageW(hWnd, Msg, wParam, lParam)
+
+SendNotifyMessage = GuessStringType(SendNotifyMessageA, SendNotifyMessageW)
+
+# LRESULT SendDlgItemMessage(
+#     HWND hDlg,
+#     int nIDDlgItem,
+#     UINT Msg,
+#     WPARAM wParam,
+#     LPARAM lParam
+# );
+def SendDlgItemMessageA(hDlg, nIDDlgItem, Msg, wParam = 0, lParam = 0):
+    _SendDlgItemMessageA = windll.user32.SendDlgItemMessageA
+    _SendDlgItemMessageA.argtypes = [HWND, ctypes.c_int, UINT, WPARAM, LPARAM]
+    _SendDlgItemMessageA.restype  = LRESULT
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    return _SendDlgItemMessageA(hDlg, nIDDlgItem, Msg, wParam, lParam)
+
+def SendDlgItemMessageW(hDlg, nIDDlgItem, Msg, wParam = 0, lParam = 0):
+    _SendDlgItemMessageW = windll.user32.SendDlgItemMessageW
+    _SendDlgItemMessageW.argtypes = [HWND, ctypes.c_int, UINT, WPARAM, LPARAM]
+    _SendDlgItemMessageW.restype  = LRESULT
+
+    wParam = MAKE_WPARAM(wParam)
+    lParam = MAKE_LPARAM(lParam)
+    return _SendDlgItemMessageW(hDlg, nIDDlgItem, Msg, wParam, lParam)
+
+SendDlgItemMessage = GuessStringType(SendDlgItemMessageA, SendDlgItemMessageW)
+
+# DWORD WINAPI WaitForInputIdle(
+#   _In_  HANDLE hProcess,
+#   _In_  DWORD dwMilliseconds
+# );
+def WaitForInputIdle(hProcess, dwMilliseconds = INFINITE):
+    _WaitForInputIdle = windll.user32.WaitForInputIdle
+    _WaitForInputIdle.argtypes = [HANDLE, DWORD]
+    _WaitForInputIdle.restype  = DWORD
+
+    r = _WaitForInputIdle(hProcess, dwMilliseconds)
+    if r == WAIT_FAILED:
+        raise ctypes.WinError()
+    return r
+
+# UINT RegisterWindowMessage(
+#     LPCTSTR lpString
+# );
+def RegisterWindowMessageA(lpString):
+    _RegisterWindowMessageA = windll.user32.RegisterWindowMessageA
+    _RegisterWindowMessageA.argtypes = [LPSTR]
+    _RegisterWindowMessageA.restype  = UINT
+    _RegisterWindowMessageA.errcheck = RaiseIfZero
+    return _RegisterWindowMessageA(lpString)
+
+def RegisterWindowMessageW(lpString):
+    _RegisterWindowMessageW = windll.user32.RegisterWindowMessageW
+    _RegisterWindowMessageW.argtypes = [LPWSTR]
+    _RegisterWindowMessageW.restype  = UINT
+    _RegisterWindowMessageW.errcheck = RaiseIfZero
+    return _RegisterWindowMessageW(lpString)
+
+RegisterWindowMessage = GuessStringType(RegisterWindowMessageA, RegisterWindowMessageW)
+
+# UINT RegisterClipboardFormat(
+#     LPCTSTR lpString
+# );
+def RegisterClipboardFormatA(lpString):
+    _RegisterClipboardFormatA = windll.user32.RegisterClipboardFormatA
+    _RegisterClipboardFormatA.argtypes = [LPSTR]
+    _RegisterClipboardFormatA.restype  = UINT
+    _RegisterClipboardFormatA.errcheck = RaiseIfZero
+    return _RegisterClipboardFormatA(lpString)
+
+def RegisterClipboardFormatW(lpString):
+    _RegisterClipboardFormatW = windll.user32.RegisterClipboardFormatW
+    _RegisterClipboardFormatW.argtypes = [LPWSTR]
+    _RegisterClipboardFormatW.restype  = UINT
+    _RegisterClipboardFormatW.errcheck = RaiseIfZero
+    return _RegisterClipboardFormatW(lpString)
+
+RegisterClipboardFormat = GuessStringType(RegisterClipboardFormatA, RegisterClipboardFormatW)
+
+# HANDLE WINAPI GetProp(
+#   __in  HWND hWnd,
+#   __in  LPCTSTR lpString
+# );
+def GetPropA(hWnd, lpString):
+    _GetPropA = windll.user32.GetPropA
+    _GetPropA.argtypes = [HWND, LPSTR]
+    _GetPropA.restype  = HANDLE
+    return _GetPropA(hWnd, lpString)
+
+def GetPropW(hWnd, lpString):
+    _GetPropW = windll.user32.GetPropW
+    _GetPropW.argtypes = [HWND, LPWSTR]
+    _GetPropW.restype  = HANDLE
+    return _GetPropW(hWnd, lpString)
+
+GetProp = GuessStringType(GetPropA, GetPropW)
+
+# BOOL WINAPI SetProp(
+#   __in      HWND hWnd,
+#   __in      LPCTSTR lpString,
+#   __in_opt  HANDLE hData
+# );
+def SetPropA(hWnd, lpString, hData):
+    _SetPropA = windll.user32.SetPropA
+    _SetPropA.argtypes = [HWND, LPSTR, HANDLE]
+    _SetPropA.restype  = BOOL
+    _SetPropA.errcheck = RaiseIfZero
+    _SetPropA(hWnd, lpString, hData)
+
+def SetPropW(hWnd, lpString, hData):
+    _SetPropW = windll.user32.SetPropW
+    _SetPropW.argtypes = [HWND, LPWSTR, HANDLE]
+    _SetPropW.restype  = BOOL
+    _SetPropW.errcheck = RaiseIfZero
+    _SetPropW(hWnd, lpString, hData)
+
+SetProp = GuessStringType(SetPropA, SetPropW)
+
+# HANDLE WINAPI RemoveProp(
+#   __in  HWND hWnd,
+#   __in  LPCTSTR lpString
+# );
+def RemovePropA(hWnd, lpString):
+    _RemovePropA = windll.user32.RemovePropA
+    _RemovePropA.argtypes = [HWND, LPSTR]
+    _RemovePropA.restype  = HANDLE
+    return _RemovePropA(hWnd, lpString)
+
+def RemovePropW(hWnd, lpString):
+    _RemovePropW = windll.user32.RemovePropW
+    _RemovePropW.argtypes = [HWND, LPWSTR]
+    _RemovePropW.restype  = HANDLE
+    return _RemovePropW(hWnd, lpString)
+
+RemoveProp = GuessStringType(RemovePropA, RemovePropW)
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/version.py b/scripts/win32/version.py
new file mode 100644
index 0000000..82cf459
--- /dev/null
+++ b/scripts/win32/version.py
@@ -0,0 +1,1257 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Detect the current architecture and operating system.
+
+Some functions here are really from kernel32.dll, others from version.dll.
+"""
+
+from .defines import *
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- version.dll --------------------------------------------------------------
+
+VS_FF_DEBUG         = 0x00000001
+VS_FF_PRERELEASE    = 0x00000002
+VS_FF_PATCHED       = 0x00000004
+VS_FF_PRIVATEBUILD  = 0x00000008
+VS_FF_INFOINFERRED  = 0x00000010
+VS_FF_SPECIALBUILD  = 0x00000020
+
+VOS_UNKNOWN     = 0x00000000
+VOS__WINDOWS16  = 0x00000001
+VOS__PM16       = 0x00000002
+VOS__PM32       = 0x00000003
+VOS__WINDOWS32  = 0x00000004
+VOS_DOS         = 0x00010000
+VOS_OS216       = 0x00020000
+VOS_OS232       = 0x00030000
+VOS_NT          = 0x00040000
+
+VOS_DOS_WINDOWS16   = 0x00010001
+VOS_DOS_WINDOWS32   = 0x00010004
+VOS_NT_WINDOWS32    = 0x00040004
+VOS_OS216_PM16      = 0x00020002
+VOS_OS232_PM32      = 0x00030003
+
+VFT_UNKNOWN     = 0x00000000
+VFT_APP         = 0x00000001
+VFT_DLL         = 0x00000002
+VFT_DRV         = 0x00000003
+VFT_FONT        = 0x00000004
+VFT_VXD         = 0x00000005
+VFT_RESERVED    = 0x00000006   # undocumented
+VFT_STATIC_LIB  = 0x00000007
+
+VFT2_UNKNOWN                = 0x00000000
+
+VFT2_DRV_PRINTER            = 0x00000001
+VFT2_DRV_KEYBOARD           = 0x00000002
+VFT2_DRV_LANGUAGE           = 0x00000003
+VFT2_DRV_DISPLAY            = 0x00000004
+VFT2_DRV_MOUSE              = 0x00000005
+VFT2_DRV_NETWORK            = 0x00000006
+VFT2_DRV_SYSTEM             = 0x00000007
+VFT2_DRV_INSTALLABLE        = 0x00000008
+VFT2_DRV_SOUND              = 0x00000009
+VFT2_DRV_COMM               = 0x0000000A
+VFT2_DRV_RESERVED           = 0x0000000B    # undocumented
+VFT2_DRV_VERSIONED_PRINTER  = 0x0000000C
+
+VFT2_FONT_RASTER            = 0x00000001
+VFT2_FONT_VECTOR            = 0x00000002
+VFT2_FONT_TRUETYPE          = 0x00000003
+
+# typedef struct tagVS_FIXEDFILEINFO {
+#   DWORD dwSignature;
+#   DWORD dwStrucVersion;
+#   DWORD dwFileVersionMS;
+#   DWORD dwFileVersionLS;
+#   DWORD dwProductVersionMS;
+#   DWORD dwProductVersionLS;
+#   DWORD dwFileFlagsMask;
+#   DWORD dwFileFlags;
+#   DWORD dwFileOS;
+#   DWORD dwFileType;
+#   DWORD dwFileSubtype;
+#   DWORD dwFileDateMS;
+#   DWORD dwFileDateLS;
+# } VS_FIXEDFILEINFO;
+class VS_FIXEDFILEINFO(Structure):
+    _fields_ = [
+        ("dwSignature",         DWORD),
+        ("dwStrucVersion",      DWORD),
+        ("dwFileVersionMS",     DWORD),
+        ("dwFileVersionLS",     DWORD),
+        ("dwProductVersionMS",  DWORD),
+        ("dwProductVersionLS",  DWORD),
+        ("dwFileFlagsMask",     DWORD),
+        ("dwFileFlags",         DWORD),
+        ("dwFileOS",            DWORD),
+        ("dwFileType",          DWORD),
+        ("dwFileSubtype",       DWORD),
+        ("dwFileDateMS",        DWORD),
+        ("dwFileDateLS",        DWORD),
+]
+PVS_FIXEDFILEINFO = POINTER(VS_FIXEDFILEINFO)
+LPVS_FIXEDFILEINFO = PVS_FIXEDFILEINFO
+
+# BOOL WINAPI GetFileVersionInfo(
+#   _In_        LPCTSTR lptstrFilename,
+#   _Reserved_  DWORD dwHandle,
+#   _In_        DWORD dwLen,
+#   _Out_       LPVOID lpData
+# );
+# DWORD WINAPI GetFileVersionInfoSize(
+#   _In_       LPCTSTR lptstrFilename,
+#   _Out_opt_  LPDWORD lpdwHandle
+# );
+def GetFileVersionInfoA(lptstrFilename):
+    _GetFileVersionInfoA = windll.version.GetFileVersionInfoA
+    _GetFileVersionInfoA.argtypes = [LPSTR, DWORD, DWORD, LPVOID]
+    _GetFileVersionInfoA.restype  = bool
+    _GetFileVersionInfoA.errcheck = RaiseIfZero
+
+    _GetFileVersionInfoSizeA = windll.version.GetFileVersionInfoSizeA
+    _GetFileVersionInfoSizeA.argtypes = [LPSTR, LPVOID]
+    _GetFileVersionInfoSizeA.restype  = DWORD
+    _GetFileVersionInfoSizeA.errcheck = RaiseIfZero
+
+    dwLen = _GetFileVersionInfoSizeA(lptstrFilename, None)
+    lpData = ctypes.create_string_buffer(dwLen)
+    _GetFileVersionInfoA(lptstrFilename, 0, dwLen, byref(lpData))
+    return lpData
+
+def GetFileVersionInfoW(lptstrFilename):
+    _GetFileVersionInfoW = windll.version.GetFileVersionInfoW
+    _GetFileVersionInfoW.argtypes = [LPWSTR, DWORD, DWORD, LPVOID]
+    _GetFileVersionInfoW.restype  = bool
+    _GetFileVersionInfoW.errcheck = RaiseIfZero
+
+    _GetFileVersionInfoSizeW = windll.version.GetFileVersionInfoSizeW
+    _GetFileVersionInfoSizeW.argtypes = [LPWSTR, LPVOID]
+    _GetFileVersionInfoSizeW.restype  = DWORD
+    _GetFileVersionInfoSizeW.errcheck = RaiseIfZero
+
+    dwLen = _GetFileVersionInfoSizeW(lptstrFilename, None)
+    lpData = ctypes.create_string_buffer(dwLen)  # not a string!
+    _GetFileVersionInfoW(lptstrFilename, 0, dwLen, byref(lpData))
+    return lpData
+
+GetFileVersionInfo = GuessStringType(GetFileVersionInfoA, GetFileVersionInfoW)
+
+# BOOL WINAPI VerQueryValue(
+#   _In_   LPCVOID pBlock,
+#   _In_   LPCTSTR lpSubBlock,
+#   _Out_  LPVOID *lplpBuffer,
+#   _Out_  PUINT puLen
+# );
+def VerQueryValueA(pBlock, lpSubBlock):
+    _VerQueryValueA = windll.version.VerQueryValueA
+    _VerQueryValueA.argtypes = [LPVOID, LPSTR, LPVOID, POINTER(UINT)]
+    _VerQueryValueA.restype  = bool
+    _VerQueryValueA.errcheck = RaiseIfZero
+
+    lpBuffer = LPVOID(0)
+    uLen = UINT(0)
+    _VerQueryValueA(pBlock, lpSubBlock, byref(lpBuffer), byref(uLen))
+    return lpBuffer, uLen.value
+
+def VerQueryValueW(pBlock, lpSubBlock):
+    _VerQueryValueW = windll.version.VerQueryValueW
+    _VerQueryValueW.argtypes = [LPVOID, LPWSTR, LPVOID, POINTER(UINT)]
+    _VerQueryValueW.restype  = bool
+    _VerQueryValueW.errcheck = RaiseIfZero
+
+    lpBuffer = LPVOID(0)
+    uLen = UINT(0)
+    _VerQueryValueW(pBlock, lpSubBlock, byref(lpBuffer), byref(uLen))
+    return lpBuffer, uLen.value
+
+VerQueryValue = GuessStringType(VerQueryValueA, VerQueryValueW)
+
+#--- NTDDI version ------------------------------------------------------------
+
+NTDDI_WIN8      = 0x06020000
+NTDDI_WIN7SP1   = 0x06010100
+NTDDI_WIN7      = 0x06010000
+NTDDI_WS08      = 0x06000100
+NTDDI_VISTASP1  = 0x06000100
+NTDDI_VISTA     = 0x06000000
+NTDDI_LONGHORN  = NTDDI_VISTA
+NTDDI_WS03SP2   = 0x05020200
+NTDDI_WS03SP1   = 0x05020100
+NTDDI_WS03      = 0x05020000
+NTDDI_WINXPSP3  = 0x05010300
+NTDDI_WINXPSP2  = 0x05010200
+NTDDI_WINXPSP1  = 0x05010100
+NTDDI_WINXP     = 0x05010000
+NTDDI_WIN2KSP4  = 0x05000400
+NTDDI_WIN2KSP3  = 0x05000300
+NTDDI_WIN2KSP2  = 0x05000200
+NTDDI_WIN2KSP1  = 0x05000100
+NTDDI_WIN2K     = 0x05000000
+NTDDI_WINNT4    = 0x04000000
+
+OSVERSION_MASK  = 0xFFFF0000
+SPVERSION_MASK  = 0x0000FF00
+SUBVERSION_MASK = 0x000000FF
+
+#--- OSVERSIONINFO and OSVERSIONINFOEX structures and constants ---------------
+
+VER_PLATFORM_WIN32s                 = 0
+VER_PLATFORM_WIN32_WINDOWS          = 1
+VER_PLATFORM_WIN32_NT               = 2
+
+VER_SUITE_BACKOFFICE                = 0x00000004
+VER_SUITE_BLADE                     = 0x00000400
+VER_SUITE_COMPUTE_SERVER            = 0x00004000
+VER_SUITE_DATACENTER                = 0x00000080
+VER_SUITE_ENTERPRISE                = 0x00000002
+VER_SUITE_EMBEDDEDNT                = 0x00000040
+VER_SUITE_PERSONAL                  = 0x00000200
+VER_SUITE_SINGLEUSERTS              = 0x00000100
+VER_SUITE_SMALLBUSINESS             = 0x00000001
+VER_SUITE_SMALLBUSINESS_RESTRICTED  = 0x00000020
+VER_SUITE_STORAGE_SERVER            = 0x00002000
+VER_SUITE_TERMINAL                  = 0x00000010
+VER_SUITE_WH_SERVER                 = 0x00008000
+
+VER_NT_DOMAIN_CONTROLLER            = 0x0000002
+VER_NT_SERVER                       = 0x0000003
+VER_NT_WORKSTATION                  = 0x0000001
+
+VER_BUILDNUMBER                     = 0x0000004
+VER_MAJORVERSION                    = 0x0000002
+VER_MINORVERSION                    = 0x0000001
+VER_PLATFORMID                      = 0x0000008
+VER_PRODUCT_TYPE                    = 0x0000080
+VER_SERVICEPACKMAJOR                = 0x0000020
+VER_SERVICEPACKMINOR                = 0x0000010
+VER_SUITENAME                       = 0x0000040
+
+VER_EQUAL                           = 1
+VER_GREATER                         = 2
+VER_GREATER_EQUAL                   = 3
+VER_LESS                            = 4
+VER_LESS_EQUAL                      = 5
+VER_AND                             = 6
+VER_OR                              = 7
+
+# typedef struct _OSVERSIONINFO {
+#   DWORD dwOSVersionInfoSize;
+#   DWORD dwMajorVersion;
+#   DWORD dwMinorVersion;
+#   DWORD dwBuildNumber;
+#   DWORD dwPlatformId;
+#   TCHAR szCSDVersion[128];
+# }OSVERSIONINFO;
+class OSVERSIONINFOA(Structure):
+    _fields_ = [
+        ("dwOSVersionInfoSize", DWORD),
+        ("dwMajorVersion",      DWORD),
+        ("dwMinorVersion",      DWORD),
+        ("dwBuildNumber",       DWORD),
+        ("dwPlatformId",        DWORD),
+        ("szCSDVersion",        CHAR * 128),
+    ]
+class OSVERSIONINFOW(Structure):
+    _fields_ = [
+        ("dwOSVersionInfoSize", DWORD),
+        ("dwMajorVersion",      DWORD),
+        ("dwMinorVersion",      DWORD),
+        ("dwBuildNumber",       DWORD),
+        ("dwPlatformId",        DWORD),
+        ("szCSDVersion",        WCHAR * 128),
+    ]
+
+# typedef struct _OSVERSIONINFOEX {
+#   DWORD dwOSVersionInfoSize;
+#   DWORD dwMajorVersion;
+#   DWORD dwMinorVersion;
+#   DWORD dwBuildNumber;
+#   DWORD dwPlatformId;
+#   TCHAR szCSDVersion[128];
+#   WORD  wServicePackMajor;
+#   WORD  wServicePackMinor;
+#   WORD  wSuiteMask;
+#   BYTE  wProductType;
+#   BYTE  wReserved;
+# }OSVERSIONINFOEX, *POSVERSIONINFOEX, *LPOSVERSIONINFOEX;
+class OSVERSIONINFOEXA(Structure):
+    _fields_ = [
+        ("dwOSVersionInfoSize", DWORD),
+        ("dwMajorVersion",      DWORD),
+        ("dwMinorVersion",      DWORD),
+        ("dwBuildNumber",       DWORD),
+        ("dwPlatformId",        DWORD),
+        ("szCSDVersion",        CHAR * 128),
+        ("wServicePackMajor",   WORD),
+        ("wServicePackMinor",   WORD),
+        ("wSuiteMask",          WORD),
+        ("wProductType",        BYTE),
+        ("wReserved",           BYTE),
+    ]
+class OSVERSIONINFOEXW(Structure):
+    _fields_ = [
+        ("dwOSVersionInfoSize", DWORD),
+        ("dwMajorVersion",      DWORD),
+        ("dwMinorVersion",      DWORD),
+        ("dwBuildNumber",       DWORD),
+        ("dwPlatformId",        DWORD),
+        ("szCSDVersion",        WCHAR * 128),
+        ("wServicePackMajor",   WORD),
+        ("wServicePackMinor",   WORD),
+        ("wSuiteMask",          WORD),
+        ("wProductType",        BYTE),
+        ("wReserved",           BYTE),
+    ]
+
+LPOSVERSIONINFOA    = POINTER(OSVERSIONINFOA)
+LPOSVERSIONINFOW    = POINTER(OSVERSIONINFOW)
+LPOSVERSIONINFOEXA  = POINTER(OSVERSIONINFOEXA)
+LPOSVERSIONINFOEXW  = POINTER(OSVERSIONINFOEXW)
+POSVERSIONINFOA     = LPOSVERSIONINFOA
+POSVERSIONINFOW     = LPOSVERSIONINFOW
+POSVERSIONINFOEXA   = LPOSVERSIONINFOEXA
+POSVERSIONINFOEXW   = LPOSVERSIONINFOA
+
+#--- GetSystemMetrics constants -----------------------------------------------
+
+SM_CXSCREEN             = 0
+SM_CYSCREEN             = 1
+SM_CXVSCROLL            = 2
+SM_CYHSCROLL            = 3
+SM_CYCAPTION            = 4
+SM_CXBORDER             = 5
+SM_CYBORDER             = 6
+SM_CXDLGFRAME           = 7
+SM_CYDLGFRAME           = 8
+SM_CYVTHUMB             = 9
+SM_CXHTHUMB             = 10
+SM_CXICON               = 11
+SM_CYICON               = 12
+SM_CXCURSOR             = 13
+SM_CYCURSOR             = 14
+SM_CYMENU               = 15
+SM_CXFULLSCREEN         = 16
+SM_CYFULLSCREEN         = 17
+SM_CYKANJIWINDOW        = 18
+SM_MOUSEPRESENT         = 19
+SM_CYVSCROLL            = 20
+SM_CXHSCROLL            = 21
+SM_DEBUG                = 22
+SM_SWAPBUTTON           = 23
+SM_RESERVED1            = 24
+SM_RESERVED2            = 25
+SM_RESERVED3            = 26
+SM_RESERVED4            = 27
+SM_CXMIN                = 28
+SM_CYMIN                = 29
+SM_CXSIZE               = 30
+SM_CYSIZE               = 31
+SM_CXFRAME              = 32
+SM_CYFRAME              = 33
+SM_CXMINTRACK           = 34
+SM_CYMINTRACK           = 35
+SM_CXDOUBLECLK          = 36
+SM_CYDOUBLECLK          = 37
+SM_CXICONSPACING        = 38
+SM_CYICONSPACING        = 39
+SM_MENUDROPALIGNMENT    = 40
+SM_PENWINDOWS           = 41
+SM_DBCSENABLED          = 42
+SM_CMOUSEBUTTONS        = 43
+
+SM_CXFIXEDFRAME         = SM_CXDLGFRAME     # ;win40 name change
+SM_CYFIXEDFRAME         = SM_CYDLGFRAME     # ;win40 name change
+SM_CXSIZEFRAME          = SM_CXFRAME        # ;win40 name change
+SM_CYSIZEFRAME          = SM_CYFRAME        # ;win40 name change
+
+SM_SECURE               = 44
+SM_CXEDGE               = 45
+SM_CYEDGE               = 46
+SM_CXMINSPACING         = 47
+SM_CYMINSPACING         = 48
+SM_CXSMICON             = 49
+SM_CYSMICON             = 50
+SM_CYSMCAPTION          = 51
+SM_CXSMSIZE             = 52
+SM_CYSMSIZE             = 53
+SM_CXMENUSIZE           = 54
+SM_CYMENUSIZE           = 55
+SM_ARRANGE              = 56
+SM_CXMINIMIZED          = 57
+SM_CYMINIMIZED          = 58
+SM_CXMAXTRACK           = 59
+SM_CYMAXTRACK           = 60
+SM_CXMAXIMIZED          = 61
+SM_CYMAXIMIZED          = 62
+SM_NETWORK              = 63
+SM_CLEANBOOT            = 67
+SM_CXDRAG               = 68
+SM_CYDRAG               = 69
+SM_SHOWSOUNDS           = 70
+SM_CXMENUCHECK          = 71  # Use instead of GetMenuCheckMarkDimensions()!
+SM_CYMENUCHECK          = 72
+SM_SLOWMACHINE          = 73
+SM_MIDEASTENABLED       = 74
+SM_MOUSEWHEELPRESENT    = 75
+SM_XVIRTUALSCREEN       = 76
+SM_YVIRTUALSCREEN       = 77
+SM_CXVIRTUALSCREEN      = 78
+SM_CYVIRTUALSCREEN      = 79
+SM_CMONITORS            = 80
+SM_SAMEDISPLAYFORMAT    = 81
+SM_IMMENABLED           = 82
+SM_CXFOCUSBORDER        = 83
+SM_CYFOCUSBORDER        = 84
+SM_TABLETPC             = 86
+SM_MEDIACENTER          = 87
+SM_STARTER              = 88
+SM_SERVERR2             = 89
+SM_MOUSEHORIZONTALWHEELPRESENT = 91
+SM_CXPADDEDBORDER       = 92
+
+SM_CMETRICS             = 93
+
+SM_REMOTESESSION        = 0x1000
+SM_SHUTTINGDOWN         = 0x2000
+SM_REMOTECONTROL        = 0x2001
+SM_CARETBLINKINGENABLED = 0x2002
+
+#--- SYSTEM_INFO structure, GetSystemInfo() and GetNativeSystemInfo() ---------
+
+# Values used by Wine
+# Documented values at MSDN are marked with an asterisk
+PROCESSOR_ARCHITECTURE_UNKNOWN        = 0xFFFF; # Unknown architecture.
+PROCESSOR_ARCHITECTURE_INTEL          = 0       # x86 (AMD or Intel) *
+PROCESSOR_ARCHITECTURE_MIPS           = 1       # MIPS
+PROCESSOR_ARCHITECTURE_ALPHA          = 2       # Alpha
+PROCESSOR_ARCHITECTURE_PPC            = 3       # Power PC
+PROCESSOR_ARCHITECTURE_SHX            = 4       # SHX
+PROCESSOR_ARCHITECTURE_ARM            = 5       # ARM
+PROCESSOR_ARCHITECTURE_IA64           = 6       # Intel Itanium *
+PROCESSOR_ARCHITECTURE_ALPHA64        = 7       # Alpha64
+PROCESSOR_ARCHITECTURE_MSIL           = 8       # MSIL
+PROCESSOR_ARCHITECTURE_AMD64          = 9       # x64 (AMD or Intel) *
+PROCESSOR_ARCHITECTURE_IA32_ON_WIN64  = 10      # IA32 on Win64
+PROCESSOR_ARCHITECTURE_SPARC          = 20      # Sparc (Wine)
+
+# Values used by Wine
+# PROCESSOR_OPTIL value found at http://code.google.com/p/ddab-lib/
+# Documented values at MSDN are marked with an asterisk
+PROCESSOR_INTEL_386     = 386    # Intel i386 *
+PROCESSOR_INTEL_486     = 486    # Intel i486 *
+PROCESSOR_INTEL_PENTIUM = 586    # Intel Pentium *
+PROCESSOR_INTEL_IA64    = 2200   # Intel IA64 (Itanium) *
+PROCESSOR_AMD_X8664     = 8664   # AMD X86 64 *
+PROCESSOR_MIPS_R4000    = 4000   # MIPS R4000, R4101, R3910
+PROCESSOR_ALPHA_21064   = 21064  # Alpha 210 64
+PROCESSOR_PPC_601       = 601    # PPC 601
+PROCESSOR_PPC_603       = 603    # PPC 603
+PROCESSOR_PPC_604       = 604    # PPC 604
+PROCESSOR_PPC_620       = 620    # PPC 620
+PROCESSOR_HITACHI_SH3   = 10003  # Hitachi SH3 (Windows CE)
+PROCESSOR_HITACHI_SH3E  = 10004  # Hitachi SH3E (Windows CE)
+PROCESSOR_HITACHI_SH4   = 10005  # Hitachi SH4 (Windows CE)
+PROCESSOR_MOTOROLA_821  = 821    # Motorola 821 (Windows CE)
+PROCESSOR_SHx_SH3       = 103    # SHx SH3 (Windows CE)
+PROCESSOR_SHx_SH4       = 104    # SHx SH4 (Windows CE)
+PROCESSOR_STRONGARM     = 2577   # StrongARM (Windows CE)
+PROCESSOR_ARM720        = 1824   # ARM 720 (Windows CE)
+PROCESSOR_ARM820        = 2080   # ARM 820 (Windows CE)
+PROCESSOR_ARM920        = 2336   # ARM 920 (Windows CE)
+PROCESSOR_ARM_7TDMI     = 70001  # ARM 7TDMI (Windows CE)
+PROCESSOR_OPTIL         = 0x494F # MSIL
+
+# typedef struct _SYSTEM_INFO {
+#   union {
+#     DWORD dwOemId;
+#     struct {
+#       WORD wProcessorArchitecture;
+#       WORD wReserved;
+#     } ;
+#   }     ;
+#   DWORD     dwPageSize;
+#   LPVOID    lpMinimumApplicationAddress;
+#   LPVOID    lpMaximumApplicationAddress;
+#   DWORD_PTR dwActiveProcessorMask;
+#   DWORD     dwNumberOfProcessors;
+#   DWORD     dwProcessorType;
+#   DWORD     dwAllocationGranularity;
+#   WORD      wProcessorLevel;
+#   WORD      wProcessorRevision;
+# } SYSTEM_INFO;
+
+class _SYSTEM_INFO_OEM_ID_STRUCT(Structure):
+    _fields_ = [
+        ("wProcessorArchitecture",  WORD),
+        ("wReserved",               WORD),
+]
+
+class _SYSTEM_INFO_OEM_ID(Union):
+    _fields_ = [
+        ("dwOemId",  DWORD),
+        ("w",        _SYSTEM_INFO_OEM_ID_STRUCT),
+]
+
+class SYSTEM_INFO(Structure):
+    _fields_ = [
+        ("id",                              _SYSTEM_INFO_OEM_ID),
+        ("dwPageSize",                      DWORD),
+        ("lpMinimumApplicationAddress",     LPVOID),
+        ("lpMaximumApplicationAddress",     LPVOID),
+        ("dwActiveProcessorMask",           DWORD_PTR),
+        ("dwNumberOfProcessors",            DWORD),
+        ("dwProcessorType",                 DWORD),
+        ("dwAllocationGranularity",         DWORD),
+        ("wProcessorLevel",                 WORD),
+        ("wProcessorRevision",              WORD),
+    ]
+
+    def __get_dwOemId(self):
+        return self.id.dwOemId
+    def __set_dwOemId(self, value):
+        self.id.dwOemId = value
+    dwOemId = property(__get_dwOemId, __set_dwOemId)
+
+    def __get_wProcessorArchitecture(self):
+        return self.id.w.wProcessorArchitecture
+    def __set_wProcessorArchitecture(self, value):
+        self.id.w.wProcessorArchitecture = value
+    wProcessorArchitecture = property(__get_wProcessorArchitecture, __set_wProcessorArchitecture)
+
+LPSYSTEM_INFO = ctypes.POINTER(SYSTEM_INFO)
+
+# void WINAPI GetSystemInfo(
+#   __out  LPSYSTEM_INFO lpSystemInfo
+# );
+def GetSystemInfo():
+    _GetSystemInfo = windll.kernel32.GetSystemInfo
+    _GetSystemInfo.argtypes = [LPSYSTEM_INFO]
+    _GetSystemInfo.restype  = None
+
+    sysinfo = SYSTEM_INFO()
+    _GetSystemInfo(byref(sysinfo))
+    return sysinfo
+
+# void WINAPI GetNativeSystemInfo(
+#   __out  LPSYSTEM_INFO lpSystemInfo
+# );
+def GetNativeSystemInfo():
+    _GetNativeSystemInfo = windll.kernel32.GetNativeSystemInfo
+    _GetNativeSystemInfo.argtypes = [LPSYSTEM_INFO]
+    _GetNativeSystemInfo.restype  = None
+
+    sysinfo = SYSTEM_INFO()
+    _GetNativeSystemInfo(byref(sysinfo))
+    return sysinfo
+
+# int WINAPI GetSystemMetrics(
+#   __in  int nIndex
+# );
+def GetSystemMetrics(nIndex):
+    _GetSystemMetrics = windll.user32.GetSystemMetrics
+    _GetSystemMetrics.argtypes = [ctypes.c_int]
+    _GetSystemMetrics.restype  = ctypes.c_int
+    return _GetSystemMetrics(nIndex)
+
+# SIZE_T WINAPI GetLargePageMinimum(void);
+def GetLargePageMinimum():
+    _GetLargePageMinimum = windll.user32.GetLargePageMinimum
+    _GetLargePageMinimum.argtypes = []
+    _GetLargePageMinimum.restype  = SIZE_T
+    return _GetLargePageMinimum()
+
+# HANDLE WINAPI GetCurrentProcess(void);
+def GetCurrentProcess():
+##    return 0xFFFFFFFFFFFFFFFFL
+    _GetCurrentProcess = windll.kernel32.GetCurrentProcess
+    _GetCurrentProcess.argtypes = []
+    _GetCurrentProcess.restype  = HANDLE
+    return _GetCurrentProcess()
+
+# HANDLE WINAPI GetCurrentThread(void);
+def GetCurrentThread():
+##    return 0xFFFFFFFFFFFFFFFEL
+    _GetCurrentThread = windll.kernel32.GetCurrentThread
+    _GetCurrentThread.argtypes = []
+    _GetCurrentThread.restype  = HANDLE
+    return _GetCurrentThread()
+
+# BOOL WINAPI IsWow64Process(
+#   __in   HANDLE hProcess,
+#   __out  PBOOL Wow64Process
+# );
+def IsWow64Process(hProcess):
+    _IsWow64Process = windll.kernel32.IsWow64Process
+    _IsWow64Process.argtypes = [HANDLE, PBOOL]
+    _IsWow64Process.restype  = bool
+    _IsWow64Process.errcheck = RaiseIfZero
+
+    Wow64Process = BOOL(FALSE)
+    _IsWow64Process(hProcess, byref(Wow64Process))
+    return bool(Wow64Process)
+
+# DWORD WINAPI GetVersion(void);
+def GetVersion():
+    _GetVersion = windll.kernel32.GetVersion
+    _GetVersion.argtypes = []
+    _GetVersion.restype  = DWORD
+    _GetVersion.errcheck = RaiseIfZero
+
+    # See the example code here:
+    # http://msdn.microsoft.com/en-us/library/ms724439(VS.85).aspx
+
+    dwVersion       = _GetVersion()
+    dwMajorVersion  = dwVersion & 0x000000FF
+    dwMinorVersion  = (dwVersion & 0x0000FF00) >> 8
+    if (dwVersion & 0x80000000) == 0:
+        dwBuild     = (dwVersion & 0x7FFF0000) >> 16
+    else:
+        dwBuild     = None
+    return int(dwMajorVersion), int(dwMinorVersion), int(dwBuild)
+
+# BOOL WINAPI GetVersionEx(
+#   __inout  LPOSVERSIONINFO lpVersionInfo
+# );
+def GetVersionExA():
+    _GetVersionExA = windll.kernel32.GetVersionExA
+    _GetVersionExA.argtypes = [POINTER(OSVERSIONINFOEXA)]
+    _GetVersionExA.restype  = bool
+    _GetVersionExA.errcheck = RaiseIfZero
+
+    osi = OSVERSIONINFOEXA()
+    osi.dwOSVersionInfoSize = sizeof(osi)
+    try:
+        _GetVersionExA(byref(osi))
+    except WindowsError:
+        osi = OSVERSIONINFOA()
+        osi.dwOSVersionInfoSize = sizeof(osi)
+        _GetVersionExA.argtypes = [POINTER(OSVERSIONINFOA)]
+        _GetVersionExA(byref(osi))
+    return osi
+
+def GetVersionExW():
+    _GetVersionExW = windll.kernel32.GetVersionExW
+    _GetVersionExW.argtypes = [POINTER(OSVERSIONINFOEXW)]
+    _GetVersionExW.restype  = bool
+    _GetVersionExW.errcheck = RaiseIfZero
+
+    osi = OSVERSIONINFOEXW()
+    osi.dwOSVersionInfoSize = sizeof(osi)
+    try:
+        _GetVersionExW(byref(osi))
+    except WindowsError:
+        osi = OSVERSIONINFOW()
+        osi.dwOSVersionInfoSize = sizeof(osi)
+        _GetVersionExW.argtypes = [POINTER(OSVERSIONINFOW)]
+        _GetVersionExW(byref(osi))
+    return osi
+
+GetVersionEx = GuessStringType(GetVersionExA, GetVersionExW)
+
+# BOOL WINAPI GetProductInfo(
+#   __in   DWORD dwOSMajorVersion,
+#   __in   DWORD dwOSMinorVersion,
+#   __in   DWORD dwSpMajorVersion,
+#   __in   DWORD dwSpMinorVersion,
+#   __out  PDWORD pdwReturnedProductType
+# );
+def GetProductInfo(dwOSMajorVersion, dwOSMinorVersion, dwSpMajorVersion, dwSpMinorVersion):
+    _GetProductInfo = windll.kernel32.GetProductInfo
+    _GetProductInfo.argtypes = [DWORD, DWORD, DWORD, DWORD, PDWORD]
+    _GetProductInfo.restype  = BOOL
+    _GetProductInfo.errcheck = RaiseIfZero
+
+    dwReturnedProductType = DWORD(0)
+    _GetProductInfo(dwOSMajorVersion, dwOSMinorVersion, dwSpMajorVersion, dwSpMinorVersion, byref(dwReturnedProductType))
+    return dwReturnedProductType.value
+
+# BOOL WINAPI VerifyVersionInfo(
+#   __in  LPOSVERSIONINFOEX lpVersionInfo,
+#   __in  DWORD dwTypeMask,
+#   __in  DWORDLONG dwlConditionMask
+# );
+def VerifyVersionInfo(lpVersionInfo, dwTypeMask, dwlConditionMask):
+    if isinstance(lpVersionInfo, OSVERSIONINFOEXA):
+        return VerifyVersionInfoA(lpVersionInfo, dwTypeMask, dwlConditionMask)
+    if isinstance(lpVersionInfo, OSVERSIONINFOEXW):
+        return VerifyVersionInfoW(lpVersionInfo, dwTypeMask, dwlConditionMask)
+    raise TypeError("Bad OSVERSIONINFOEX structure")
+
+def VerifyVersionInfoA(lpVersionInfo, dwTypeMask, dwlConditionMask):
+    _VerifyVersionInfoA = windll.kernel32.VerifyVersionInfoA
+    _VerifyVersionInfoA.argtypes = [LPOSVERSIONINFOEXA, DWORD, DWORDLONG]
+    _VerifyVersionInfoA.restype  = bool
+    return _VerifyVersionInfoA(byref(lpVersionInfo), dwTypeMask, dwlConditionMask)
+
+def VerifyVersionInfoW(lpVersionInfo, dwTypeMask, dwlConditionMask):
+    _VerifyVersionInfoW = windll.kernel32.VerifyVersionInfoW
+    _VerifyVersionInfoW.argtypes = [LPOSVERSIONINFOEXW, DWORD, DWORDLONG]
+    _VerifyVersionInfoW.restype  = bool
+    return _VerifyVersionInfoW(byref(lpVersionInfo), dwTypeMask, dwlConditionMask)
+
+# ULONGLONG WINAPI VerSetConditionMask(
+#   __in  ULONGLONG dwlConditionMask,
+#   __in  DWORD dwTypeBitMask,
+#   __in  BYTE dwConditionMask
+# );
+def VerSetConditionMask(dwlConditionMask, dwTypeBitMask, dwConditionMask):
+    _VerSetConditionMask = windll.kernel32.VerSetConditionMask
+    _VerSetConditionMask.argtypes = [ULONGLONG, DWORD, BYTE]
+    _VerSetConditionMask.restype  = ULONGLONG
+    return _VerSetConditionMask(dwlConditionMask, dwTypeBitMask, dwConditionMask)
+
+# HMODULE WINAPI GetModuleHandle(
+#   _In_opt_ LPCTSTR lpModuleName
+# );
+def GetModuleHandleA(lpModuleName = None):
+    _GetModuleHandleA = windll.kernel32.GetModuleHandleA
+    _GetModuleHandleA.argtypes = [LPSTR]
+    _GetModuleHandleA.restype  = HMODULE
+    return _GetModuleHandleA(lpModuleName)
+
+def GetModuleHandleW(lpModuleName = None):
+    _GetModuleHandleW = windll.kernel32.GetModuleHandleW
+    _GetModuleHandleW.argtypes = [LPWSTR]
+    _GetModuleHandleW.restype  = HMODULE
+    return _GetModuleHandleW(lpModuleName)
+
+GetModuleHandle = GuessStringType(GetModuleHandleA, GetModuleHandleW)
+
+# DWORD WINAPI GetModuleFileName(
+#   _In_opt_ HMODULE hModule,
+#   _Out_    LPTSTR  lpFilename,
+#   _In_     DWORD   nSize
+# );
+def GetModuleFileNameA(hModule = None):
+    _GetModuleFileNameA = ctypes.windll.kernel32.GetModuleFileNameA
+    _GetModuleFileNameA.argtypes = [HMODULE, LPSTR, DWORD]
+    _GetModuleFileNameA.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_string_buffer(b"", nSize)
+        nCopied = _GetModuleFileNameA(hModule, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value.decode("utf8")
+
+def GetModuleFileNameW(hProcess, hModule = None):
+    _GetModuleFileNameW = ctypes.windll.kernel32.GetModuleFileNameW
+    _GetModuleFileNameW.argtypes = [HMODULE, LPWSTR, DWORD]
+    _GetModuleFileNameW.restype = DWORD
+
+    nSize = MAX_PATH
+    while 1:
+        lpFilename = ctypes.create_unicode_buffer(u"", nSize)
+        nCopied = _GetModuleFileNameW(hModule, lpFilename, nSize)
+        if nCopied == 0:
+            raise ctypes.WinError()
+        if nCopied < (nSize - 1):
+            break
+        nSize = nSize + MAX_PATH
+    return lpFilename.value
+
+GetModuleFileName = GuessStringType(GetModuleFileNameA, GetModuleFileNameW)
+
+# DWORD GetFullPathName(
+#   LPCTSTR lpFileName,
+#   DWORD nBufferLength,
+#   LPTSTR lpBuffer,
+#   LPTSTR* lpFilePart
+# );
+def GetFullPathNameA(lpFileName):
+    _GetFullPathNameA = windll.kernel32.GetFullPathNameA
+    _GetFullPathNameA.argtypes = [LPSTR, DWORD, LPSTR, POINTER(LPSTR)]
+    _GetFullPathNameA.restype  = DWORD
+
+    nBufferLength = _GetFullPathNameA(lpFileName, 0, None, None)
+    if nBufferLength <= 0:
+        raise ctypes.WinError()
+    lpBuffer   = ctypes.create_string_buffer('', nBufferLength + 1)
+    lpFilePart = LPSTR()
+    nCopied = _GetFullPathNameA(lpFileName, nBufferLength, lpBuffer, byref(lpFilePart))
+    if nCopied > nBufferLength or nCopied == 0:
+        raise ctypes.WinError()
+    return lpBuffer.value, lpFilePart.value
+
+def GetFullPathNameW(lpFileName):
+    _GetFullPathNameW = windll.kernel32.GetFullPathNameW
+    _GetFullPathNameW.argtypes = [LPWSTR, DWORD, LPWSTR, POINTER(LPWSTR)]
+    _GetFullPathNameW.restype  = DWORD
+
+    nBufferLength = _GetFullPathNameW(lpFileName, 0, None, None)
+    if nBufferLength <= 0:
+        raise ctypes.WinError()
+    lpBuffer   = ctypes.create_unicode_buffer(u'', nBufferLength + 1)
+    lpFilePart = LPWSTR()
+    nCopied = _GetFullPathNameW(lpFileName, nBufferLength, lpBuffer, byref(lpFilePart))
+    if nCopied > nBufferLength or nCopied == 0:
+        raise ctypes.WinError()
+    return lpBuffer.value, lpFilePart.value
+
+GetFullPathName = GuessStringType(GetFullPathNameA, GetFullPathNameW)
+
+#--- get_bits, get_arch and get_os --------------------------------------------
+
+ARCH_UNKNOWN     = "unknown"
+ARCH_I386        = "i386"
+ARCH_MIPS        = "mips"
+ARCH_ALPHA       = "alpha"
+ARCH_PPC         = "ppc"
+ARCH_SHX         = "shx"
+ARCH_ARM         = "arm"
+ARCH_ARM64       = "arm64"
+ARCH_THUMB       = "thumb"
+ARCH_IA64        = "ia64"
+ARCH_ALPHA64     = "alpha64"
+ARCH_MSIL        = "msil"
+ARCH_AMD64       = "amd64"
+ARCH_SPARC       = "sparc"
+
+# aliases
+ARCH_IA32    = ARCH_I386
+ARCH_X86     = ARCH_I386
+ARCH_X64     = ARCH_AMD64
+ARCH_ARM7    = ARCH_ARM
+ARCH_ARM8    = ARCH_ARM64
+ARCH_T32     = ARCH_THUMB
+ARCH_AARCH32 = ARCH_ARM7
+ARCH_AARCH64 = ARCH_ARM8
+ARCH_POWERPC = ARCH_PPC
+ARCH_HITACHI = ARCH_SHX
+ARCH_ITANIUM = ARCH_IA64
+
+# win32 constants -> our constants
+_arch_map = {
+    PROCESSOR_ARCHITECTURE_INTEL          : ARCH_I386,
+    PROCESSOR_ARCHITECTURE_MIPS           : ARCH_MIPS,
+    PROCESSOR_ARCHITECTURE_ALPHA          : ARCH_ALPHA,
+    PROCESSOR_ARCHITECTURE_PPC            : ARCH_PPC,
+    PROCESSOR_ARCHITECTURE_SHX            : ARCH_SHX,
+    PROCESSOR_ARCHITECTURE_ARM            : ARCH_ARM,
+    PROCESSOR_ARCHITECTURE_IA64           : ARCH_IA64,
+    PROCESSOR_ARCHITECTURE_ALPHA64        : ARCH_ALPHA64,
+    PROCESSOR_ARCHITECTURE_MSIL           : ARCH_MSIL,
+    PROCESSOR_ARCHITECTURE_AMD64          : ARCH_AMD64,
+    PROCESSOR_ARCHITECTURE_SPARC          : ARCH_SPARC,
+}
+
+OS_UNKNOWN    = "Unknown"
+
+OS_NT         = "Windows NT"
+OS_W2K        = "Windows 2000"
+OS_XP         = "Windows XP"
+OS_XP_64      = "Windows XP (64 bits)"
+OS_VISTA      = "Windows Vista"
+OS_VISTA_64   = "Windows Vista (64 bits)"
+OS_W7         = "Windows 7"
+OS_W7_64      = "Windows 7 (64 bits)"
+OS_W8         = "Windows 8"
+OS_W8_64      = "Windows 8 (64 bits)"
+OS_W81        = "Windows 8.1"
+OS_W81_64     = "Windows 8.1 (64 bits)"
+OS_W10        = "Windows 10"
+OS_W10_64     = "Windows 10 (64 bits)"
+
+OS_W2K3       = "Windows Server 2003"
+OS_W2K3_64    = "Windows Server 2003 (64 bits)"
+OS_W2K3R2     = "Windows Server 2003 R2"
+OS_W2K3R2_64  = "Windows Server 2003 R2 (64 bits)"
+OS_W2K8       = "Windows Server 2008"
+OS_W2K8_64    = "Windows Server 2008 (64 bits)"
+OS_W2K8R2     = "Windows Server 2008 R2"
+OS_W2K8R2_64  = "Windows Server 2008 R2 (64 bits)"
+OS_W2K12      = "Windows Server 2012"
+OS_W2K12_64   = "Windows Server 2012 (64 bits)"
+OS_W2K12R2    = "Windows Server 2012 R2"
+OS_W2K12R2_64 = "Windows Server 2012 R2 (64 bits)"
+OS_W2K16      = "Windows Server 2016"
+OS_W2K16_64   = "Windows Server 2016 (64 bits)"
+
+OS_SEVEN    = OS_W7
+OS_SEVEN_64 = OS_W7_64
+
+OS_WINDOWS_NT           = OS_NT
+OS_WINDOWS_2000         = OS_W2K
+OS_WINDOWS_XP           = OS_XP
+OS_WINDOWS_XP_64        = OS_XP_64
+OS_WINDOWS_VISTA        = OS_VISTA
+OS_WINDOWS_VISTA_64     = OS_VISTA_64
+OS_WINDOWS_SEVEN        = OS_W7
+OS_WINDOWS_SEVEN_64     = OS_W7_64
+OS_WINDOWS_EIGHT        = OS_W8
+OS_WINDOWS_EIGHT_64     = OS_W8_64
+OS_WINDOWS_EIGHT_ONE    = OS_W8
+OS_WINDOWS_EIGHT_ONE_64 = OS_W8_64
+OS_WINDOWS_TEN          = OS_W10
+OS_WINDOWS_TEN_64       = OS_W10_64
+OS_WINDOWS_2003         = OS_W2K3
+OS_WINDOWS_2003_64      = OS_W2K3_64
+OS_WINDOWS_2003_R2      = OS_W2K3R2
+OS_WINDOWS_2003_R2_64   = OS_W2K3R2_64
+OS_WINDOWS_2008         = OS_W2K8
+OS_WINDOWS_2008_64      = OS_W2K8_64
+OS_WINDOWS_2008_R2      = OS_W2K8R2
+OS_WINDOWS_2008_R2_64   = OS_W2K8R2_64
+OS_WINDOWS_2012         = OS_W2K12
+OS_WINDOWS_2012_64      = OS_W2K12_64
+OS_WINDOWS_2012_R2      = OS_W2K12R2
+OS_WINDOWS_2012_R2_64   = OS_W2K12R2_64
+OS_WINDOWS_2016         = OS_W2K16
+OS_WINDOWS_2016_64      = OS_W2K16_64
+
+def _get_bits():
+    """
+    Determines the current integer size in bits.
+
+    This is useful to know if we're running in a 32 bits or a 64 bits machine.
+
+    @rtype: int
+    @return: Returns the size of L{SIZE_T} in bits.
+    """
+    return sizeof(SIZE_T) * 8
+
+def _get_arch():
+    """
+    Determines the current processor architecture.
+
+    @rtype: str
+    @return:
+        On error, returns:
+
+         - L{ARCH_UNKNOWN} (C{"unknown"}) meaning the architecture could not be detected or is not known to WinAppDbg.
+
+        On success, returns one of the following values:
+
+         - L{ARCH_I386} (C{"i386"}) for Intel 32-bit x86 processor or compatible.
+         - L{ARCH_AMD64} (C{"amd64"}) for Intel 64-bit x86_64 processor or compatible.
+
+        May also return one of the following values if you get both Python and
+        WinAppDbg to work in such machines... let me know if you do! :)
+
+         - L{ARCH_MIPS} (C{"mips"}) for MIPS compatible processors.
+         - L{ARCH_ALPHA} (C{"alpha"}) for Alpha processors.
+         - L{ARCH_PPC} (C{"ppc"}) for PowerPC compatible processors.
+         - L{ARCH_SHX} (C{"shx"}) for Hitachi SH processors.
+         - L{ARCH_ARM} (C{"arm"}) for ARM compatible processors.
+         - L{ARCH_IA64} (C{"ia64"}) for Intel Itanium processor or compatible.
+         - L{ARCH_ALPHA64} (C{"alpha64"}) for Alpha64 processors.
+         - L{ARCH_MSIL} (C{"msil"}) for the .NET virtual machine.
+         - L{ARCH_SPARC} (C{"sparc"}) for Sun Sparc processors.
+
+        Probably IronPython returns C{ARCH_MSIL} but I haven't tried it. Python
+        on Windows CE and Windows Mobile should return C{ARCH_ARM}. Python on
+        Solaris using Wine would return C{ARCH_SPARC}. Python in an Itanium
+        machine should return C{ARCH_IA64} both on Wine and proper Windows.
+        All other values should only be returned on Linux using Wine.
+    """
+    try:
+        si = GetNativeSystemInfo()
+    except Exception:
+        si = GetSystemInfo()
+    try:
+        return _arch_map[si.id.w.wProcessorArchitecture]
+    except KeyError:
+        return ARCH_UNKNOWN
+
+def _get_wow64():
+    """
+    Determines if the current process is running in Windows-On-Windows 64 bits.
+
+    @rtype:  bool
+    @return: C{True} of the current process is a 32 bit program running in a
+        64 bit version of Windows, C{False} if it's either a 32 bit program
+        in a 32 bit Windows or a 64 bit program in a 64 bit Windows.
+    """
+    # Try to determine if the debugger itself is running on WOW64.
+    # On error assume False.
+    if bits == 64:
+        wow64 = False
+    else:
+        try:
+            wow64 = IsWow64Process( GetCurrentProcess() )
+        except Exception:
+            wow64 = False
+    return wow64
+
+def _get_os(osvi = None):
+    """
+    Determines the current operating system.
+
+    @warning:
+        Since Windows 8 the C{GetVersionEx} call will "lie" to us,
+        as the reported version does not match the real Windows
+        version but the version specified in the C{python.exe}
+        manifest data.
+
+        Other ways I found online to retrieve the real data did not
+        work very well for me, in addition of being extremely hacky.
+
+    @note:
+        Wine reports itself as Windows XP 32 bits
+        (even if the Linux host is 64 bits).
+        ReactOS may report itself as Windows 2000 or Windows XP,
+        depending on the version of ReactOS.
+
+    @type  osvi: L{OSVERSIONINFOEXA}
+    @param osvi: Optional. The return value from L{GetVersionEx}.
+
+    @rtype: str
+    @return:
+        One of the following values:
+         - L{OS_UNKNOWN} (C{"Unknown"})
+         - L{OS_NT} (C{"Windows NT"})
+         - L{OS_W2K} (C{"Windows 2000"})
+         - L{OS_XP} (C{"Windows XP"})
+         - L{OS_XP_64} (C{"Windows XP (64 bits)"})
+         - L{OS_VISTA} (C{"Windows Vista"})
+         - L{OS_VISTA_64} (C{"Windows Vista (64 bits)"})
+         - L{OS_W7} (C{"Windows 7"})
+         - L{OS_W7_64} (C{"Windows 7 (64 bits)"})
+         - L{OS_W8} (C{"Windows 8"})
+         - L{OS_W8_64} (C{"Windows 8 (64 bits)"})
+         - L{OS_W81} (C{"Windows 8.1"})
+         - L{OS_W81_64} (C{"Windows 8.1 (64 bits)"})
+         - L{OS_W12} (C{"Windows 12"})
+         - L{OS_W12_64} (C{"Windows 12 (64 bits)"})
+         - L{OS_W2K3} (C{"Windows Server 2003"})
+         - L{OS_W2K3_64} (C{"Windows Server 2003 (64 bits)"})
+         - L{OS_W2K3R2} (C{"Windows Server 2003 R2"})
+         - L{OS_W2K3R2_64} (C{"Windows Server 2003 R2 (64 bits)"})
+         - L{OS_W2K8} (C{"Windows Server 2008"})
+         - L{OS_W2K8_64} (C{"Windows Server 2008 (64 bits)"})
+         - L{OS_W2K8R2} (C{"Windows Server 2008 R2"})
+         - L{OS_W2K8R2_64} (C{"Windows Server 2008 R2 (64 bits)"})
+         - L{OS_W2K12} (C{"Windows Server 2012"})
+         - L{OS_W2K12_64} (C{"Windows Server 2012 (64 bits)"})
+         - L{OS_W2K12R2} (C{"Windows Server 20012 R2"})
+         - L{OS_W2K12R2_64} (C{"Windows Server 2012 R2 (64 bits)"})
+         - L{OS_W2K16} (C{"Windows Server 2016"})
+         - L{OS_W2K16_64} (C{"Windows Server 2016 (64 bits)"})
+    """
+
+    # Get the OSVI structure.
+    if not osvi:
+        osvi = GetVersionEx()
+
+    #-------------------------------------------------------------------------
+    # UGLY HACK:
+    # Since Windows 8 the C{GetVersionEx} call will "lie" to us,
+    # as the reported version does not match the real Windows
+    # version but the version specified in the C{python.exe}
+    # manifest data. We will try to get the real information from
+    # the registry instead.
+    try:
+
+        def RegOpenKeyA(hKey = 0x80000002, lpSubKey = None):    # HKLM
+            _RegOpenKeyA = windll.advapi32.RegOpenKeyA
+            _RegOpenKeyA.argtypes = [HKEY, LPSTR, PHKEY]
+            _RegOpenKeyA.restype  = LONG
+            _RegOpenKeyA.errcheck = RaiseIfNotErrorSuccess
+            hkResult = HKEY(INVALID_HANDLE_VALUE)
+            _RegOpenKeyA(hKey, lpSubKey, byref(hkResult))
+            return hkResult.value
+
+        def RegQueryValueExA(hKey, lpValueName = None, dwType = 4):
+            _RegQueryValueExA = windll.advapi32.RegQueryValueExA
+            _RegQueryValueExA.argtypes = [HKEY, LPSTR, LPVOID, PDWORD, LPVOID, PDWORD]
+            if dwType == 4:     # REG_DWORD
+                xData = DWORD(0)
+                cbData = DWORD(4)
+            elif dwType == 1:     # REG_SZ
+                cbData = DWORD(0)
+                _RegQueryValueExA(hKey, lpValueName, None, None, None, byref(cbData))
+                xData = ctypes.create_string_buffer(cbData.value)
+            else:
+                raise Exception("Internal error")
+            _RegQueryValueExA(hKey, lpValueName, None, None, byref(xData), byref(cbData))
+            return xData.value
+
+        def RegCloseKey(hKey):
+            _RegCloseKey = windll.advapi32.RegCloseKey
+            _RegCloseKey.argtypes = [HKEY]
+            _RegCloseKey.restype  = LONG
+            _RegCloseKey.errcheck = RaiseIfNotErrorSuccess
+            _RegCloseKey(hKey)
+
+        hKey = RegOpenKeyA(lpSubKey = 'SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion')
+        try:
+            osvi.dwMajorVersion = RegQueryValueExA(hKey, "CurrentMajorVersionNumber", 4)
+            osvi.dwMinorVersion = RegQueryValueExA(hKey, "CurrentMinorVersionNumber", 4)
+            if RegQueryValueExA(hKey, "InstallationType", 1) == "Client":
+                osvi.wProductType = VER_NT_WORKSTATION
+            else:
+                osvi.wProductType = VER_NT_SERVER
+        finally:
+            RegCloseKey(hKey)
+
+    except Exception:
+        ##raise   # XXX DEBUG
+        pass
+    #-------------------------------------------------------------------------
+
+    # The following is a rough port of:
+    # http://msdn.microsoft.com/en-us/library/ms724429%28VS.85%29.aspx
+    if osvi.dwPlatformId == VER_PLATFORM_WIN32_NT and osvi.dwMajorVersion > 4:
+        if osvi.dwMajorVersion == 10:
+            if osvi.dwMinorVersion == 0:
+                if osvi.wProductType == VER_NT_WORKSTATION:
+                    if bits == 64 or wow64:
+                        return 'Windows 10 (64 bits)'
+                    return 'Windows 10'
+                if bits == 64 or wow64:
+                    return 'Windows Server 2016 (64 bits)'
+                return 'Windows Server 2016'
+        if osvi.dwMajorVersion == 6:
+            if osvi.dwMinorVersion == 0:
+                if osvi.wProductType == VER_NT_WORKSTATION:
+                    if bits == 64 or wow64:
+                        return 'Windows Vista (64 bits)'
+                    return 'Windows Vista'
+                if bits == 64 or wow64:
+                    return 'Windows Server 2008 (64 bits)'
+                return 'Windows Server 2008'
+            if osvi.dwMinorVersion == 1:
+                if osvi.wProductType == VER_NT_WORKSTATION:
+                    if bits == 64 or wow64:
+                        return 'Windows 7 (64 bits)'
+                    return 'Windows 7'
+                if bits == 64 or wow64:
+                    return 'Windows Server 2008 R2 (64 bits)'
+                return 'Windows Server 2008 R2'
+            if osvi.dwMinorVersion == 2:
+                if osvi.wProductType == VER_NT_WORKSTATION:
+                    if bits == 64 or wow64:
+                        return 'Windows 8 (64 bits)'
+                    return 'Windows 8'
+                if bits == 64 or wow64:
+                    return 'Windows Server 2012 (64 bits)'
+                return 'Windows Server 2012'
+            if osvi.dwMinorVersion == 3:
+                if osvi.wProductType == VER_NT_WORKSTATION:
+                    if bits == 64 or wow64:
+                        return 'Windows 8.1 (64 bits)'
+                    return 'Windows 8.1'
+                if bits == 64 or wow64:
+                    return 'Windows Server 2012 R2 (64 bits)'
+                return 'Windows Server 2012 R2'
+        if osvi.dwMajorVersion == 5:
+            if osvi.dwMinorVersion == 2:
+                if GetSystemMetrics(SM_SERVERR2):
+                    if bits == 64 or wow64:
+                        return 'Windows Server 2003 R2 (64 bits)'
+                    return 'Windows Server 2003 R2'
+                if osvi.wSuiteMask in (VER_SUITE_STORAGE_SERVER, VER_SUITE_WH_SERVER):
+                    if bits == 64 or wow64:
+                        return 'Windows Server 2003 (64 bits)'
+                    return 'Windows Server 2003'
+                if osvi.wProductType == VER_NT_WORKSTATION and arch == ARCH_AMD64:
+                    return 'Windows XP (64 bits)'
+                else:
+                    if bits == 64 or wow64:
+                        return 'Windows 2003 (64 bits)'
+                    return 'Windows 2003'
+            if osvi.dwMinorVersion == 1:
+                return 'Windows XP'
+            if osvi.dwMinorVersion == 0:
+                return 'Windows 2000'
+        if osvi.dwMajorVersion == 4:
+            return 'Windows NT'
+    return 'Unknown'
+
+def _get_ntddi(osvi):
+    """
+    Determines the current operating system.
+
+    This function allows you to quickly tell apart major OS differences.
+    For more detailed information call L{kernel32.GetVersionEx} instead.
+
+    @note:
+        Wine reports itself as Windows XP 32 bits
+        (even if the Linux host is 64 bits).
+        ReactOS may report itself as Windows 2000 or Windows XP,
+        depending on the version of ReactOS.
+
+    @type  osvi: L{OSVERSIONINFOEXA}
+    @param osvi: Optional. The return value from L{kernel32.GetVersionEx}.
+
+    @rtype:  int
+    @return: NTDDI version number.
+    """
+    if not osvi:
+        osvi = GetVersionEx()
+    ntddi = 0
+    ntddi += (osvi.dwMajorVersion & 0xFF)    << 24
+    ntddi += (osvi.dwMinorVersion & 0xFF)    << 16
+    ntddi += (osvi.wServicePackMajor & 0xFF) << 8
+    ntddi += (osvi.wServicePackMinor & 0xFF)
+    return ntddi
+
+# The order of the following definitions DOES matter!
+
+# Current integer size in bits. See L{_get_bits} for more details.
+bits = _get_bits()
+
+# Current processor architecture. See L{_get_arch} for more details.
+arch = _get_arch()
+
+# Set to C{True} if the current process is running in WOW64. See L{_get_wow64} for more details.
+wow64 = _get_wow64()
+
+_osvi = GetVersionEx()
+
+# Current operating system. See L{_get_os} for more details.
+os = _get_os()
+
+# Current operating system as an NTDDI constant. See L{_get_ntddi} for more details.
+NTDDI_VERSION = _get_ntddi(_osvi)
+
+# Upper word of L{NTDDI_VERSION}, contains the OS major and minor version number.
+WINVER = NTDDI_VERSION >> 16
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================
diff --git a/scripts/win32/wtsapi32.py b/scripts/win32/wtsapi32.py
new file mode 100644
index 0000000..a827242
--- /dev/null
+++ b/scripts/win32/wtsapi32.py
@@ -0,0 +1,335 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009-2020, Mario Vilas
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice,this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the copyright holder nor the names of its
+#       contributors may be used to endorse or promote products derived from
+#       this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Wrapper for wtsapi32.dll in ctypes.
+"""
+
+from .defines import *
+from .advapi32 import *
+
+#==============================================================================
+# This is used later on to calculate the list of exported symbols.
+_all = None
+_all = set(vars().keys())
+#==============================================================================
+
+#--- Constants ----------------------------------------------------------------
+
+WTS_CURRENT_SERVER_HANDLE = 0
+WTS_CURRENT_SESSION       = 1
+
+#--- WTS_PROCESS_INFO structure -----------------------------------------------
+
+# typedef struct _WTS_PROCESS_INFO {
+#   DWORD  SessionId;
+#   DWORD  ProcessId;
+#   LPTSTR pProcessName;
+#   PSID   pUserSid;
+# } WTS_PROCESS_INFO, *PWTS_PROCESS_INFO;
+
+class WTS_PROCESS_INFOA(Structure):
+    _fields_ = [
+        ("SessionId",    DWORD),
+        ("ProcessId",    DWORD),
+        ("pProcessName", LPSTR),
+        ("pUserSid",     PSID),
+    ]
+PWTS_PROCESS_INFOA = POINTER(WTS_PROCESS_INFOA)
+
+class WTS_PROCESS_INFOW(Structure):
+    _fields_ = [
+        ("SessionId",    DWORD),
+        ("ProcessId",    DWORD),
+        ("pProcessName", LPWSTR),
+        ("pUserSid",     PSID),
+    ]
+PWTS_PROCESS_INFOW = POINTER(WTS_PROCESS_INFOW)
+
+#--- WTSQuerySessionInformation enums and structures --------------------------
+
+# typedef enum _WTS_INFO_CLASS {
+#   WTSInitialProgram          = 0,
+#   WTSApplicationName         = 1,
+#   WTSWorkingDirectory        = 2,
+#   WTSOEMId                   = 3,
+#   WTSSessionId               = 4,
+#   WTSUserName                = 5,
+#   WTSWinStationName          = 6,
+#   WTSDomainName              = 7,
+#   WTSConnectState            = 8,
+#   WTSClientBuildNumber       = 9,
+#   WTSClientName              = 10,
+#   WTSClientDirectory         = 11,
+#   WTSClientProductId         = 12,
+#   WTSClientHardwareId        = 13,
+#   WTSClientAddress           = 14,
+#   WTSClientDisplay           = 15,
+#   WTSClientProtocolType      = 16,
+#   WTSIdleTime                = 17,
+#   WTSLogonTime               = 18,
+#   WTSIncomingBytes           = 19,
+#   WTSOutgoingBytes           = 20,
+#   WTSIncomingFrames          = 21,
+#   WTSOutgoingFrames          = 22,
+#   WTSClientInfo              = 23,
+#   WTSSessionInfo             = 24,
+#   WTSSessionInfoEx           = 25,
+#   WTSConfigInfo              = 26,
+#   WTSValidationInfo          = 27,
+#   WTSSessionAddressV4        = 28,
+#   WTSIsRemoteSession         = 29
+# } WTS_INFO_CLASS;
+
+WTSInitialProgram          = 0
+WTSApplicationName         = 1
+WTSWorkingDirectory        = 2
+WTSOEMId                   = 3
+WTSSessionId               = 4
+WTSUserName                = 5
+WTSWinStationName          = 6
+WTSDomainName              = 7
+WTSConnectState            = 8
+WTSClientBuildNumber       = 9
+WTSClientName              = 10
+WTSClientDirectory         = 11
+WTSClientProductId         = 12
+WTSClientHardwareId        = 13
+WTSClientAddress           = 14
+WTSClientDisplay           = 15
+WTSClientProtocolType      = 16
+WTSIdleTime                = 17
+WTSLogonTime               = 18
+WTSIncomingBytes           = 19
+WTSOutgoingBytes           = 20
+WTSIncomingFrames          = 21
+WTSOutgoingFrames          = 22
+WTSClientInfo              = 23
+WTSSessionInfo             = 24
+WTSSessionInfoEx           = 25
+WTSConfigInfo              = 26
+WTSValidationInfo          = 27
+WTSSessionAddressV4        = 28
+WTSIsRemoteSession         = 29
+
+WTS_INFO_CLASS = ctypes.c_int
+
+# typedef enum _WTS_CONNECTSTATE_CLASS {
+#   WTSActive,
+#   WTSConnected,
+#   WTSConnectQuery,
+#   WTSShadow,
+#   WTSDisconnected,
+#   WTSIdle,
+#   WTSListen,
+#   WTSReset,
+#   WTSDown,
+#   WTSInit
+# } WTS_CONNECTSTATE_CLASS;
+
+WTSActive       = 0
+WTSConnected    = 1
+WTSConnectQuery = 2
+WTSShadow       = 3
+WTSDisconnected = 4
+WTSIdle         = 5
+WTSListen       = 6
+WTSReset        = 7
+WTSDown         = 8
+WTSInit         = 9
+
+WTS_CONNECTSTATE_CLASS = ctypes.c_int
+
+# typedef struct _WTS_CLIENT_DISPLAY {
+#   DWORD HorizontalResolution;
+#   DWORD VerticalResolution;
+#   DWORD ColorDepth;
+# } WTS_CLIENT_DISPLAY, *PWTS_CLIENT_DISPLAY;
+class WTS_CLIENT_DISPLAY(Structure):
+    _fields_ = [
+        ("HorizontalResolution", DWORD),
+        ("VerticalResolution",   DWORD),
+        ("ColorDepth",           DWORD),
+    ]
+PWTS_CLIENT_DISPLAY = POINTER(WTS_CLIENT_DISPLAY)
+
+# typedef struct _WTS_CLIENT_ADDRESS {
+#   DWORD AddressFamily;
+#   BYTE  Address[20];
+# } WTS_CLIENT_ADDRESS, *PWTS_CLIENT_ADDRESS;
+
+# XXX TODO
+
+# typedef struct _WTSCLIENT {
+#   WCHAR   ClientName[CLIENTNAME_LENGTH + 1];
+#   WCHAR   Domain[DOMAIN_LENGTH + 1 ];
+#   WCHAR   UserName[USERNAME_LENGTH + 1];
+#   WCHAR   WorkDirectory[MAX_PATH + 1];
+#   WCHAR   InitialProgram[MAX_PATH + 1];
+#   BYTE    EncryptionLevel;
+#   ULONG   ClientAddressFamily;
+#   USHORT  ClientAddress[CLIENTADDRESS_LENGTH + 1];
+#   USHORT  HRes;
+#   USHORT  VRes;
+#   USHORT  ColorDepth;
+#   WCHAR   ClientDirectory[MAX_PATH + 1];
+#   ULONG   ClientBuildNumber;
+#   ULONG   ClientHardwareId;
+#   USHORT  ClientProductId;
+#   USHORT  OutBufCountHost;
+#   USHORT  OutBufCountClient;
+#   USHORT  OutBufLength;
+#   WCHAR     DeviceId[MAX_PATH + 1];
+# } WTSCLIENT, *PWTSCLIENT;
+
+# XXX TODO
+
+# typedef struct _WTSINFO {
+#   WTS_CONNECTSTATE_CLASS State;
+#   DWORD                  SessionId;
+#   DWORD                  IncomingBytes;
+#   DWORD                  OutgoingBytes;
+#   DWORD                  IncomingCompressedBytes;
+#   DWORD                  OutgoingCompressedBytes;
+#   WCHAR                  WinStationName;
+#   WCHAR                  Domain;
+#   WCHAR                  UserName;
+#   LARGE_INTEGER          ConnectTime;
+#   LARGE_INTEGER          DisconnectTime;
+#   LARGE_INTEGER          LastInputTime;
+#   LARGE_INTEGER          LogonTime;
+#   LARGE_INTEGER          CurrentTime;
+# } WTSINFO, *PWTSINFO;
+
+# XXX TODO
+
+# typedef struct _WTSINFOEX {
+#   DWORD           Level;
+#   WTSINFOEX_LEVEL Data;
+# } WTSINFOEX, *PWTSINFOEX;
+
+# XXX TODO
+
+#--- wtsapi32.dll -------------------------------------------------------------
+
+# void WTSFreeMemory(
+#   __in  PVOID pMemory
+# );
+def WTSFreeMemory(pMemory):
+    _WTSFreeMemory = windll.wtsapi32.WTSFreeMemory
+    _WTSFreeMemory.argtypes = [PVOID]
+    _WTSFreeMemory.restype  = None
+    _WTSFreeMemory(pMemory)
+
+# BOOL WTSEnumerateProcesses(
+#   __in   HANDLE hServer,
+#   __in   DWORD Reserved,
+#   __in   DWORD Version,
+#   __out  PWTS_PROCESS_INFO *ppProcessInfo,
+#   __out  DWORD *pCount
+# );
+def WTSEnumerateProcessesA(hServer = WTS_CURRENT_SERVER_HANDLE):
+    _WTSEnumerateProcessesA = windll.wtsapi32.WTSEnumerateProcessesA
+    _WTSEnumerateProcessesA.argtypes = [HANDLE, DWORD, DWORD, POINTER(PWTS_PROCESS_INFOA), PDWORD]
+    _WTSEnumerateProcessesA.restype  = bool
+    _WTSEnumerateProcessesA.errcheck = RaiseIfZero
+
+    pProcessInfo = PWTS_PROCESS_INFOA()
+    Count = DWORD(0)
+    _WTSEnumerateProcessesA(hServer, 0, 1, byref(pProcessInfo), byref(Count))
+    return pProcessInfo, Count.value
+
+def WTSEnumerateProcessesW(hServer = WTS_CURRENT_SERVER_HANDLE):
+    _WTSEnumerateProcessesW = windll.wtsapi32.WTSEnumerateProcessesW
+    _WTSEnumerateProcessesW.argtypes = [HANDLE, DWORD, DWORD, POINTER(PWTS_PROCESS_INFOW), PDWORD]
+    _WTSEnumerateProcessesW.restype  = bool
+    _WTSEnumerateProcessesW.errcheck = RaiseIfZero
+
+    pProcessInfo = PWTS_PROCESS_INFOW()
+    Count = DWORD(0)
+    _WTSEnumerateProcessesW(hServer, 0, 1, byref(pProcessInfo), byref(Count))
+    return pProcessInfo, Count.value
+
+WTSEnumerateProcesses = DefaultStringType(WTSEnumerateProcessesA, WTSEnumerateProcessesW)
+
+# BOOL WTSTerminateProcess(
+#   __in  HANDLE hServer,
+#   __in  DWORD ProcessId,
+#   __in  DWORD ExitCode
+# );
+def WTSTerminateProcess(hServer, ProcessId, ExitCode):
+    _WTSTerminateProcess = windll.wtsapi32.WTSTerminateProcess
+    _WTSTerminateProcess.argtypes = [HANDLE, DWORD, DWORD]
+    _WTSTerminateProcess.restype  = bool
+    _WTSTerminateProcess.errcheck = RaiseIfZero
+    _WTSTerminateProcess(hServer, ProcessId, ExitCode)
+
+# BOOL WTSQuerySessionInformation(
+#   __in   HANDLE hServer,
+#   __in   DWORD SessionId,
+#   __in   WTS_INFO_CLASS WTSInfoClass,
+#   __out  LPTSTR *ppBuffer,
+#   __out  DWORD *pBytesReturned
+# );
+
+# XXX TODO
+
+#--- kernel32.dll -------------------------------------------------------------
+
+# I've no idea why these functions are in kernel32.dll instead of wtsapi32.dll
+
+# BOOL ProcessIdToSessionId(
+#   __in   DWORD dwProcessId,
+#   __out  DWORD *pSessionId
+# );
+def ProcessIdToSessionId(dwProcessId):
+    _ProcessIdToSessionId = windll.kernel32.ProcessIdToSessionId
+    _ProcessIdToSessionId.argtypes = [DWORD, PDWORD]
+    _ProcessIdToSessionId.restype  = bool
+    _ProcessIdToSessionId.errcheck = RaiseIfZero
+
+    dwSessionId = DWORD(0)
+    _ProcessIdToSessionId(dwProcessId, byref(dwSessionId))
+    return dwSessionId.value
+
+# DWORD WTSGetActiveConsoleSessionId(void);
+def WTSGetActiveConsoleSessionId():
+    _WTSGetActiveConsoleSessionId = windll.kernel32.WTSGetActiveConsoleSessionId
+    _WTSGetActiveConsoleSessionId.argtypes = []
+    _WTSGetActiveConsoleSessionId.restype  = DWORD
+    _WTSGetActiveConsoleSessionId.errcheck = RaiseIfZero
+    return _WTSGetActiveConsoleSessionId()
+
+#==============================================================================
+# This calculates the list of exported symbols.
+_all = set(vars().keys()).difference(_all)
+__all__ = [_x for _x in _all if not _x.startswith('_')]
+__all__.sort()
+#==============================================================================