diff --git a/Makefile b/Makefile index 5a5cd0189260..5d23cfd197f7 100644 --- a/Makefile +++ b/Makefile @@ -828,6 +828,8 @@ RUNTIME_CPP_COMPONENTS = \ hexagon_dma_pool \ hexagon_host \ ios_io \ + linux_aarch64_cpu_features \ + linux_arm_cpu_features \ linux_clock \ linux_host_cpu_count \ linux_yield \ @@ -839,6 +841,8 @@ RUNTIME_CPP_COMPONENTS = \ msan \ msan_stubs \ opencl \ + osx_aarch64_cpu_features \ + osx_arm_cpu_features \ osx_clock \ osx_get_symbol \ osx_host_cpu_count \ @@ -873,6 +877,7 @@ RUNTIME_CPP_COMPONENTS = \ wasm_cpu_features \ webgpu_dawn \ webgpu_emscripten \ + windows_aarch64_cpu_features_arm \ windows_clock \ windows_cuda \ windows_d3d12compute_arm \ diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index 09e58bea894b..e601a3325ce6 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -46,20 +46,31 @@ std::unique_ptr parse_bitcode_file(llvm::StringRef buf, llvm::LLVM return std::unique_ptr(); \ } +#define DECLARE_CPP_INITMOD_LOOKUP_BITS(mod, bits) \ + do { \ + if (debug) { \ + return get_initmod_##mod##_##bits##_debug(context); \ + } else { \ + return get_initmod_##mod##_##bits(context); \ + } \ + } while (0) + #define DECLARE_CPP_INITMOD_LOOKUP(mod) \ std::unique_ptr get_initmod_##mod(llvm::LLVMContext *context, bool bits_64, bool debug) { \ if (bits_64) { \ - if (debug) { \ - return get_initmod_##mod##_64_debug(context); \ - } else { \ - return get_initmod_##mod##_64(context); \ - } \ + DECLARE_CPP_INITMOD_LOOKUP_BITS(mod, 64); \ + } else { \ + DECLARE_CPP_INITMOD_LOOKUP_BITS(mod, 32); \ + } \ + } + +#define DECLARE_CPP_INITMOD_LOOKUP_64(mod) \ + std::unique_ptr get_initmod_##mod(llvm::LLVMContext *context, bool bits_64, bool debug) { \ + if (bits_64) { \ + DECLARE_CPP_INITMOD_LOOKUP_BITS(mod, 64); \ } else { \ - if (debug) { \ - return get_initmod_##mod##_32_debug(context); \ - } else { \ - return get_initmod_##mod##_32(context); \ - } \ + internal_error << "No support for 32-bit initmod: " #mod; \ + return nullptr; /* appease warnings */ \ } \ } @@ -70,6 +81,11 @@ std::unique_ptr parse_bitcode_file(llvm::StringRef buf, llvm::LLVM DECLARE_INITMOD(mod##_64) \ DECLARE_CPP_INITMOD_LOOKUP(mod) +#define DECLARE_CPP_INITMOD_64(mod) \ + DECLARE_INITMOD(mod##_64_debug) \ + DECLARE_INITMOD(mod##_64) \ + DECLARE_CPP_INITMOD_LOOKUP_64(mod) + #define DECLARE_LL_INITMOD(mod) \ DECLARE_INITMOD(mod##_ll) @@ -183,18 +199,28 @@ DECLARE_NO_INITMOD(metal_objc_x86) DECLARE_LL_INITMOD(arm) DECLARE_LL_INITMOD(arm_no_neon) DECLARE_CPP_INITMOD(arm_cpu_features) +DECLARE_CPP_INITMOD(linux_arm_cpu_features) +DECLARE_CPP_INITMOD(osx_arm_cpu_features) #else DECLARE_NO_INITMOD(arm) DECLARE_NO_INITMOD(arm_no_neon) DECLARE_NO_INITMOD(arm_cpu_features) +DECLARE_NO_INITMOD(linux_arm_cpu_features) +DECLARE_NO_INITMOD(osx_arm_cpu_features) #endif // WITH_ARM #ifdef WITH_AARCH64 DECLARE_LL_INITMOD(aarch64) DECLARE_CPP_INITMOD(aarch64_cpu_features) +DECLARE_CPP_INITMOD(linux_aarch64_cpu_features) +DECLARE_CPP_INITMOD(osx_aarch64_cpu_features) +DECLARE_CPP_INITMOD_64(windows_aarch64_cpu_features_arm) #else DECLARE_NO_INITMOD(aarch64) DECLARE_NO_INITMOD(aarch64_cpu_features) +DECLARE_NO_INITMOD(linux_aarch64_cpu_features) +DECLARE_NO_INITMOD(osx_aarch64_cpu_features) +DECLARE_NO_INITMOD(windows_aarch64_cpu_features_arm) #endif // WITH_AARCH64 #ifdef WITH_NVPTX @@ -1206,9 +1232,23 @@ std::unique_ptr get_initial_module_for_target(Target t, llvm::LLVM } if (t.arch == Target::ARM) { if (t.bits == 64) { - modules.push_back(get_initmod_aarch64_cpu_features(c, bits_64, debug)); + if (t.os == Target::Android || t.os == Target::Linux) { + modules.push_back(get_initmod_linux_aarch64_cpu_features(c, bits_64, debug)); + } else if (t.os == Target::OSX || t.os == Target::IOS) { + modules.push_back(get_initmod_osx_aarch64_cpu_features(c, bits_64, debug)); + } else if (t.os == Target::Windows) { + modules.push_back(get_initmod_windows_aarch64_cpu_features_arm(c, bits_64, debug)); + } else { + modules.push_back(get_initmod_aarch64_cpu_features(c, bits_64, debug)); + } } else { - modules.push_back(get_initmod_arm_cpu_features(c, bits_64, debug)); + if (t.os == Target::Android || t.os == Target::Linux) { + modules.push_back(get_initmod_linux_arm_cpu_features(c, bits_64, debug)); + } else if (t.os == Target::OSX || t.os == Target::IOS) { + modules.push_back(get_initmod_osx_arm_cpu_features(c, bits_64, debug)); + } else { + modules.push_back(get_initmod_arm_cpu_features(c, bits_64, debug)); + } } } if (t.arch == Target::POWERPC) { diff --git a/src/Target.cpp b/src/Target.cpp index 53e85196dae3..c0cd3e9bab3a 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -21,9 +21,35 @@ #endif #ifdef _MSC_VER +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN #include +#include #endif // _MSC_VER +#ifdef __APPLE__ +#include +#include +#include +#endif + +#if defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) +#include +#include +#ifndef HWCAP_ASIMDHP +#define HWCAP_ASIMDHP 0 +#endif +#ifndef HWCAP_ASIMDDP +#define HWCAP_ASIMDDP 0 +#endif +#ifndef HWCAP_SVE +#define HWCAP_SVE 0 +#endif +#ifndef HWCAP2_SVE2 +#define HWCAP2_SVE2 0 +#endif +#endif + namespace Halide { using std::string; @@ -31,13 +57,14 @@ using std::vector; namespace { -#ifdef _MSC_VER -static void cpuid(int info[4], int infoType, int extra) { +#if defined(_M_IX86) || defined(_M_AMD64) + +void cpuid(int info[4], int infoType, int extra) { __cpuidex(info, infoType, extra); } -#else -#if defined(__x86_64__) || defined(__i386__) +#elif defined(__x86_64__) || defined(__i386__) + // CPU feature detection code taken from ispc // (https://github.com/ispc/ispc/blob/master/builtins/dispatch.ll) @@ -47,10 +74,10 @@ void cpuid(int info[4], int infoType, int extra) { : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "0"(infoType), "2"(extra)); } -#endif + #endif -#if defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER) +#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_AMD64) enum class VendorSignatures { Unknown, @@ -143,6 +170,29 @@ Target::Processor get_amd_processor(unsigned family, unsigned model, bool have_s #endif // defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER) +#ifdef __APPLE__ + +template +std::optional getsysctl(const char *name) { + T value; + size_t size = sizeof(value); + if (sysctlbyname(name, &value, &size, nullptr, 0)) { + return std::nullopt; + } + return std::make_optional(value); +} + +bool sysctl_is_set(const char *name) { + return getsysctl(name).value_or(0); +} + +bool is_armv7s() { + return getsysctl("hw.cputype") == CPU_TYPE_ARM && + getsysctl("hw.cpusubtype") == CPU_SUBTYPE_ARM_V7S; +} + +#endif // __APPLE__ + Target calculate_host_target() { Target::OS os = Target::OSUnknown; #ifdef __linux__ @@ -164,8 +214,66 @@ Target calculate_host_target() { #if __riscv Target::Arch arch = Target::RISCV; #else -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) Target::Arch arch = Target::ARM; + +#ifdef __APPLE__ + if (is_armv7s()) { + initial_features.push_back(Target::ARMv7s); + } + + if (sysctl_is_set("hw.optional.arm.FEAT_DotProd")) { + initial_features.push_back(Target::ARMDotProd); + } + + if (sysctl_is_set("hw.optional.arm.FEAT_FP16")) { + initial_features.push_back(Target::ARMFp16); + } +#endif + +#ifdef __linux__ + unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long hwcaps2 = getauxval(AT_HWCAP2); + + if (hwcaps & HWCAP_ASIMDDP) { + initial_features.push_back(Target::ARMDotProd); + } + + if (hwcaps & HWCAP_ASIMDHP) { + initial_features.push_back(Target::ARMFp16); + } + + if (hwcaps & HWCAP_SVE) { + initial_features.push_back(Target::SVE); + } + + if (hwcaps2 & HWCAP2_SVE2) { + initial_features.push_back(Target::SVE2); + } +#endif + +#ifdef _MSC_VER + + // Magic value from: https://github.com/dotnet/runtime/blob/7e977dcbe5efaeec2c75ed0c3e200c85b2e55522/src/native/minipal/cpufeatures.c#L19 +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE (46) + + // This is the strategy used by Google's cpuinfo library for + // detecting fp16 arithmetic support on Windows. + if (!IsProcessorFeaturePresent(PF_FLOATING_POINT_EMULATED) && + IsProcessorFeaturePresent(PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE)) { + initial_features.push_back(Target::ARMFp16); + } + + if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { + initial_features.push_back(Target::ARMDotProd); + } + + if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) { + initial_features.push_back(Target::SVE); + } + +#endif + #else #if defined(__powerpc__) && (defined(__FreeBSD__) || defined(__linux__)) Target::Arch arch = Target::POWERPC; diff --git a/src/Type.cpp b/src/Type.cpp index 1cd95e0a6b01..48d7d90e80bf 100644 --- a/src/Type.cpp +++ b/src/Type.cpp @@ -35,7 +35,7 @@ Halide::Expr Type::max() const { } else { internal_assert(is_float()); if (bits() == 16) { - return Internal::FloatImm::make(*this, 65504.0); + return Internal::FloatImm::make(*this, (double)float16_t::make_infinity()); } else if (bits() == 32) { return Internal::FloatImm::make(*this, std::numeric_limits::infinity()); } else if (bits() == 64) { @@ -59,7 +59,7 @@ Halide::Expr Type::min() const { } else { internal_assert(is_float()); if (bits() == 16) { - return Internal::FloatImm::make(*this, -65504.0); + return Internal::FloatImm::make(*this, (double)float16_t::make_negative_infinity()); } else if (bits() == 32) { return Internal::FloatImm::make(*this, -std::numeric_limits::infinity()); } else if (bits() == 64) { diff --git a/src/Util.cpp b/src/Util.cpp index b266efeda55e..3ff261a7b24c 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -859,7 +859,14 @@ void run_with_large_stack(const std::function &action) { // Portable bit-counting methods int popcount64(uint64_t x) { #ifdef _MSC_VER -#if defined(_WIN64) +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64_EC) + int popcnt = 0; + while (x) { + x &= x - 1; + popcnt++; + } + return popcnt; +#elif defined(_WIN64) return __popcnt64(x); #else return __popcnt((uint32_t)(x >> 32)) + __popcnt((uint32_t)(x & 0xffffffff)); diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 3366f2113969..5426c355823c 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -32,6 +32,8 @@ set(RUNTIME_CPP hexagon_dma_pool hexagon_host ios_io + linux_aarch64_cpu_features + linux_arm_cpu_features linux_clock linux_host_cpu_count linux_yield @@ -43,6 +45,8 @@ set(RUNTIME_CPP msan msan_stubs opencl + osx_aarch64_cpu_features + osx_arm_cpu_features osx_clock osx_get_symbol osx_host_cpu_count @@ -80,6 +84,7 @@ set(RUNTIME_CPP # webgpu webgpu_dawn webgpu_emscripten + windows_aarch64_cpu_features_arm windows_clock windows_cuda windows_d3d12compute_arm diff --git a/src/runtime/aarch64_cpu_features.cpp b/src/runtime/aarch64_cpu_features.cpp index 90f54a17b3ea..385dc5ed6b76 100644 --- a/src/runtime/aarch64_cpu_features.cpp +++ b/src/runtime/aarch64_cpu_features.cpp @@ -5,9 +5,130 @@ namespace Halide { namespace Runtime { namespace Internal { +#if LINUX + +extern "C" unsigned long getauxval(unsigned long type); + +#define AT_HWCAP 16 +#define AT_HWCAP2 26 + +// https://cs.android.com/android/platform/superproject/main/+/main:bionic/libc/kernel/uapi/asm-arm64/asm/hwcap.h +// https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h +#define HWCAP_ASIMDHP (1 << 10) +#define HWCAP_ASIMDDP (1 << 20) +#define HWCAP_SVE (1 << 22) +#define HWCAP2_SVE2 (1 << 1) + +namespace { + +void set_platform_features(CpuFeatures &features) { + unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long hwcaps2 = getauxval(AT_HWCAP2); + + if (hwcaps & HWCAP_ASIMDDP) { + features.set_available(halide_target_feature_arm_dot_prod); + } + + if (hwcaps & HWCAP_ASIMDHP) { + features.set_available(halide_target_feature_arm_fp16); + } + + if (hwcaps & HWCAP_SVE) { + features.set_available(halide_target_feature_sve); + } + + if (hwcaps2 & HWCAP2_SVE2) { + features.set_available(halide_target_feature_sve2); + } +} + +} // namespace + +#elif OSX + +extern "C" int sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); + +namespace { + +bool sysctl_is_set(const char *name) { + int enabled = 0; + size_t enabled_len = sizeof(enabled); + return sysctlbyname(name, &enabled, &enabled_len, nullptr, 0) == 0 && enabled; +} + +void set_platform_features(CpuFeatures &features) { + if (sysctl_is_set("hw.optional.arm.FEAT_DotProd")) { + features.set_available(halide_target_feature_arm_dot_prod); + } + + if (sysctl_is_set("hw.optional.arm.FEAT_FP16")) { + features.set_available(halide_target_feature_arm_fp16); + } +} + +} // namespace + +#elif WINDOWS + +typedef int BOOL; +typedef unsigned long DWORD; + +extern "C" BOOL IsProcessorFeaturePresent(DWORD feature); + +#define PF_FLOATING_POINT_EMULATED (1) +#define PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE (27) +#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE (43) + +// Magic value from: https://github.com/dotnet/runtime/blob/7e977dcbe5efaeec2c75ed0c3e200c85b2e55522/src/native/minipal/cpufeatures.c#L19 +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE (46) + +namespace { + +void set_platform_features(CpuFeatures &features) { + // This is the strategy used by Google's cpuinfo library for + // detecting fp16 arithmetic support on Windows. + if (!IsProcessorFeaturePresent(PF_FLOATING_POINT_EMULATED) && + IsProcessorFeaturePresent(PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE)) { + features.set_available(halide_target_feature_arm_fp16); + } + + if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { + features.set_available(halide_target_feature_arm_dot_prod); + } + + if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) { + features.set_available(halide_target_feature_sve); + } +} + +} // namespace + +#else + +namespace { + +void set_platform_features(CpuFeatures &) { +} + +} // namespace + +#endif + WEAK CpuFeatures halide_get_cpu_features() { - // AArch64 has no CPU-specific Features. - return CpuFeatures(); + CpuFeatures features; + features.set_known(halide_target_feature_arm_dot_prod); + features.set_known(halide_target_feature_arm_fp16); + features.set_known(halide_target_feature_armv7s); + features.set_known(halide_target_feature_no_neon); + features.set_known(halide_target_feature_sve); + features.set_known(halide_target_feature_sve2); + + // All ARM architectures support "No Neon". + features.set_available(halide_target_feature_no_neon); + + set_platform_features(features); + + return features; } } // namespace Internal diff --git a/src/runtime/arm_cpu_features.cpp b/src/runtime/arm_cpu_features.cpp index b6361279fa48..d5be501c0f62 100644 --- a/src/runtime/arm_cpu_features.cpp +++ b/src/runtime/arm_cpu_features.cpp @@ -5,26 +5,110 @@ namespace Halide { namespace Runtime { namespace Internal { +#if LINUX + +extern "C" unsigned long getauxval(unsigned long type); + +#define AT_HWCAP 16 + +// https://cs.android.com/android/platform/superproject/+/master:bionic/libc/kernel/uapi/asm-arm/asm/hwcap.h +// https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h +#define HWCAP_ASIMDHP (1 << 23) +#define HWCAP_ASIMDDP (1 << 24) + +namespace { + +void set_platform_features(CpuFeatures &features) { + unsigned long hwcaps = getauxval(AT_HWCAP); + + if (hwcaps & HWCAP_ASIMDDP) { + features.set_available(halide_target_feature_arm_dot_prod); + } + + if (hwcaps & HWCAP_ASIMDHP) { + features.set_available(halide_target_feature_arm_fp16); + } +} + +} // namespace + +#elif OSX + +typedef int integer_t; + +typedef integer_t cpu_type_t; +typedef integer_t cpu_subtype_t; + +#define CPU_TYPE_ARM ((cpu_type_t)12) +#define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */ + +extern "C" int sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); + +namespace { + +bool sysctl_is_set(const char *name) { + int enabled = 0; + size_t enabled_len = sizeof(enabled); + return sysctlbyname(name, &enabled, &enabled_len, nullptr, 0) == 0 && enabled; +} + +bool is_armv7s() { + cpu_type_t type; + size_t type_len = sizeof(type); + if (sysctlbyname("hw.cputype", &type, &type_len, nullptr, 0)) { + return false; + } + + cpu_subtype_t subtype; + size_t subtype_len = sizeof(subtype); + if (sysctlbyname("hw.cpusubtype", &subtype, &subtype_len, nullptr, 0)) { + return false; + } + + return type == CPU_TYPE_ARM && subtype == CPU_SUBTYPE_ARM_V7S; +} + +void set_platform_features(CpuFeatures &features) { + if (is_armv7s()) { + features.set_available(halide_target_feature_armv7s); + } + + if (sysctl_is_set("hw.optional.arm.FEAT_DotProd")) { + features.set_available(halide_target_feature_arm_dot_prod); + } + + if (sysctl_is_set("hw.optional.arm.FEAT_FP16")) { + features.set_available(halide_target_feature_arm_fp16); + } +} + +} // namespace + +#else + +namespace { + +void set_platform_features(CpuFeatures &) { +} + +} // namespace + +#endif + WEAK CpuFeatures halide_get_cpu_features() { CpuFeatures features; - // All ARM architectures support "No Neon". + features.set_known(halide_target_feature_arm_dot_prod); + features.set_known(halide_target_feature_arm_fp16); + features.set_known(halide_target_feature_armv7s); features.set_known(halide_target_feature_no_neon); - features.set_available(halide_target_feature_no_neon); + features.set_known(halide_target_feature_sve); + features.set_known(halide_target_feature_sve2); - // TODO: add runtime detection for ARMv7s. AFAICT Apple doesn't - // provide an Officially Approved Way to detect this at runtime. - // Could probably use some variant of sysctl() to detect, but would - // need some experimentation and testing to get right. - // features.set_known(halide_target_feature_armv7s); - // if () { - // features.set_available(halide_target_feature_armv7s); - // } + // All ARM architectures support "No Neon". + features.set_available(halide_target_feature_no_neon); - // TODO: add runtime detection for ARMDotProd extension - // https://github.com/halide/Halide/issues/4727 + set_platform_features(features); - // TODO: add runtime detection for ARMFp16 extension - // https://github.com/halide/Halide/issues/6106 return features; } diff --git a/src/runtime/linux_aarch64_cpu_features.cpp b/src/runtime/linux_aarch64_cpu_features.cpp new file mode 100644 index 000000000000..dd92cd5e940a --- /dev/null +++ b/src/runtime/linux_aarch64_cpu_features.cpp @@ -0,0 +1,2 @@ +#define LINUX 1 +#include "aarch64_cpu_features.cpp" diff --git a/src/runtime/linux_arm_cpu_features.cpp b/src/runtime/linux_arm_cpu_features.cpp new file mode 100644 index 000000000000..924f6f0fe9d4 --- /dev/null +++ b/src/runtime/linux_arm_cpu_features.cpp @@ -0,0 +1,2 @@ +#define LINUX 1 +#include "arm_cpu_features.cpp" diff --git a/src/runtime/osx_aarch64_cpu_features.cpp b/src/runtime/osx_aarch64_cpu_features.cpp new file mode 100644 index 000000000000..cf1db4c84de3 --- /dev/null +++ b/src/runtime/osx_aarch64_cpu_features.cpp @@ -0,0 +1,2 @@ +#define OSX 1 +#include "aarch64_cpu_features.cpp" diff --git a/src/runtime/osx_arm_cpu_features.cpp b/src/runtime/osx_arm_cpu_features.cpp new file mode 100644 index 000000000000..5ad3be0c5995 --- /dev/null +++ b/src/runtime/osx_arm_cpu_features.cpp @@ -0,0 +1,2 @@ +#define OSX 1 +#include "arm_cpu_features.cpp" diff --git a/src/runtime/windows_aarch64_cpu_features_arm.cpp b/src/runtime/windows_aarch64_cpu_features_arm.cpp new file mode 100644 index 000000000000..5266c0772c5f --- /dev/null +++ b/src/runtime/windows_aarch64_cpu_features_arm.cpp @@ -0,0 +1,2 @@ +#define WINDOWS 1 +#include "aarch64_cpu_features.cpp" diff --git a/tutorial/lesson_15_generators_usage.sh b/tutorial/lesson_15_generators_usage.sh index f8bf34eebbdf..a3a36f32753a 100755 --- a/tutorial/lesson_15_generators_usage.sh +++ b/tutorial/lesson_15_generators_usage.sh @@ -194,21 +194,21 @@ check_no_runtime() -f my_first_generator_basic \ -e object,c_header\ -o . \ - target=host-x86-64-no_runtime + target=x86-64-linux-no_runtime ./lesson_15_generate \ -g my_first_generator \ -f my_first_generator_sse41 \ -e object,c_header\ -o . \ - target=host-x86-64-sse41-no_runtime + target=x86-64-linux-sse41-no_runtime ./lesson_15_generate \ -g my_first_generator \ -f my_first_generator_avx \ -e object,c_header\ -o . \ - target=host-x86-64-avx-no_runtime + target=x86-64-linux-avx-no_runtime # These files don't contain the runtime check_no_runtime my_first_generator_basic.o @@ -223,7 +223,7 @@ check_symbol my_first_generator_avx.o my_first_generator_avx -r halide_runtime_x86 \ -e object,c_header\ -o . \ - target=host-x86-64 + target=x86-64-linux check_runtime halide_runtime_x86.o # Linking the standalone runtime with the three generated object files