From 4125a3dfc1bc3c75ea8ff2a8084a355c1ff79205 Mon Sep 17 00:00:00 2001 From: Zuyu ZHANG Date: Wed, 11 Dec 2024 14:34:06 -0800 Subject: [PATCH] refactor: Enable Parquet and Arrow by default --- .github/workflows/build-metrics.yml | 2 - .github/workflows/experimental.yml | 4 +- .github/workflows/linux-build-base.yml | 3 - .github/workflows/macos.yml | 1 - .github/workflows/scheduled.yml | 2 +- .../arrow/CMakeLists.txt | 145 +++++++++--------- CMakeLists.txt | 19 +-- docker-compose.yml | 3 +- velox/connectors/hive/HiveConnectorUtil.cpp | 8 - velox/connectors/hive/tests/CMakeLists.txt | 18 +-- .../hive/tests/HiveConnectorUtilTest.cpp | 8 +- .../hive/tests/HiveDataSinkTest.cpp | 6 - velox/dwio/common/tests/CMakeLists.txt | 28 ++-- velox/dwio/parquet/CMakeLists.txt | 23 ++- velox/dwio/parquet/RegisterParquetReader.cpp | 7 - velox/dwio/parquet/RegisterParquetWriter.cpp | 7 - .../dwio/parquet/tests/reader/CMakeLists.txt | 26 ++-- .../tests/writer/ParquetWriterTest.cpp | 2 - velox/exec/benchmarks/CMakeLists.txt | 22 ++- velox/vector/arrow/CMakeLists.txt | 2 +- 20 files changed, 131 insertions(+), 205 deletions(-) diff --git a/.github/workflows/build-metrics.yml b/.github/workflows/build-metrics.yml index 2a3285e2f6ac2..7e880707fabda 100644 --- a/.github/workflows/build-metrics.yml +++ b/.github/workflows/build-metrics.yml @@ -67,8 +67,6 @@ jobs: run: | EXTRA_CMAKE_FLAGS=( "-DVELOX_ENABLE_BENCHMARKS=ON" - "-DVELOX_ENABLE_ARROW=ON" - "-DVELOX_ENABLE_PARQUET=ON" "-DVELOX_ENABLE_HDFS=ON" "-DVELOX_ENABLE_S3=ON" "-DVELOX_ENABLE_GCS=ON" diff --git a/.github/workflows/experimental.yml b/.github/workflows/experimental.yml index fbca400247ec4..e67fc211d258f 100644 --- a/.github/workflows/experimental.yml +++ b/.github/workflows/experimental.yml @@ -76,7 +76,7 @@ jobs: - name: "Build" run: | cd velox - make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}" + make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="${{ inputs.extraCMakeFlags }}" ccache -s - name: Upload aggregation fuzzer @@ -146,7 +146,7 @@ jobs: run: | cd velox source /opt/rh/gcc-toolset-12/enable - make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}" + make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="${{ inputs.extraCMakeFlags }}" ccache -s - name: "Run Aggregate Fuzzer" diff --git a/.github/workflows/linux-build-base.yml b/.github/workflows/linux-build-base.yml index 01ab4bae950df..cc77b7ad760eb 100644 --- a/.github/workflows/linux-build-base.yml +++ b/.github/workflows/linux-build-base.yml @@ -86,8 +86,6 @@ jobs: run: | EXTRA_CMAKE_FLAGS=( "-DVELOX_ENABLE_BENCHMARKS=ON" - "-DVELOX_ENABLE_ARROW=ON" - "-DVELOX_ENABLE_PARQUET=ON" "-DVELOX_ENABLE_HDFS=ON" "-DVELOX_ENABLE_S3=ON" "-DVELOX_ENABLE_GCS=ON" @@ -160,7 +158,6 @@ jobs: VELOX_DEPENDENCY_SOURCE: BUNDLED ICU_SOURCE: SYSTEM MAKEFLAGS: "NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=3" - EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_ENABLE_PARQUET=ON" run: | if [[ "${USE_CLANG}" = "true" ]]; then export CC=/usr/bin/clang-15; export CXX=/usr/bin/clang++-15; fi make debug diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index e40ec0b4173a8..19d30ef3ccc95 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -92,7 +92,6 @@ jobs: -GNinja \ -DTREAT_WARNINGS_AS_ERRORS=1 \ -DENABLE_ALL_WARNINGS=1 \ - -DVELOX_ENABLE_PARQUET=ON \ -DVELOX_MONO_LIBRARY=ON \ -DVELOX_BUILD_SHARED=ON \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 7c3ee8aa4dcf7..d157c37c537c3 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -198,7 +198,7 @@ jobs: - name: Build env: - EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" + EXTRA_CMAKE_FLAGS: "-DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" run: | EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(which python3) $EXTRA_CMAKE_FLAGS" make debug diff --git a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt index ce69fdb295d95..3b0e82c17eba3 100644 --- a/CMake/resolve_dependency_modules/arrow/CMakeLists.txt +++ b/CMake/resolve_dependency_modules/arrow/CMakeLists.txt @@ -13,84 +13,83 @@ # limitations under the License. project(Arrow) -if(VELOX_ENABLE_ARROW) - find_package(Thrift) - if(Thrift_FOUND) - set(THRIFT_SOURCE "SYSTEM") - else() - set(THRIFT_SOURCE "BUNDLED") - endif() - - set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep") - set(ARROW_CMAKE_ARGS - -DARROW_PARQUET=OFF - -DARROW_DEPENDENCY_SOURCE=AUTO - -DARROW_WITH_THRIFT=ON - -DARROW_WITH_LZ4=ON - -DARROW_WITH_SNAPPY=ON - -DARROW_WITH_ZLIB=ON - -DARROW_WITH_ZSTD=ON - -DARROW_JEMALLOC=OFF - -DARROW_SIMD_LEVEL=NONE - -DARROW_RUNTIME_SIMD_LEVEL=NONE - -DARROW_WITH_UTF8PROC=OFF - -DARROW_TESTING=ON - -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DARROW_BUILD_STATIC=ON - -DThrift_SOURCE=${THRIFT_SOURCE} - -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}) - set(ARROW_LIBDIR ${ARROW_PREFIX}/install/${CMAKE_INSTALL_LIBDIR}) +find_package(Thrift) +if(Thrift_FOUND) + set(THRIFT_SOURCE "SYSTEM") +else() + set(THRIFT_SOURCE "BUNDLED") +endif() - add_library(thrift STATIC IMPORTED GLOBAL) - if(NOT Thrift_FOUND) - set(THRIFT_ROOT ${ARROW_PREFIX}/src/arrow_ep-build/thrift_ep-install) - set(THRIFT_LIB ${THRIFT_ROOT}/lib/libthrift.a) +set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep") +set(ARROW_CMAKE_ARGS + -DARROW_PARQUET=OFF + -DARROW_DEPENDENCY_SOURCE=AUTO + -DARROW_WITH_THRIFT=ON + -DARROW_WITH_LZ4=ON + -DARROW_WITH_SNAPPY=ON + -DARROW_WITH_ZLIB=ON + -DARROW_WITH_ZSTD=ON + -DARROW_JEMALLOC=OFF + -DARROW_SIMD_LEVEL=NONE + -DARROW_RUNTIME_SIMD_LEVEL=NONE + -DARROW_WITH_UTF8PROC=OFF + -DARROW_TESTING=ON + -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DARROW_BUILD_STATIC=ON + -DThrift_SOURCE=${THRIFT_SOURCE} + -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}) +set(ARROW_LIBDIR ${ARROW_PREFIX}/install/${CMAKE_INSTALL_LIBDIR}) - file(MAKE_DIRECTORY ${THRIFT_ROOT}/include) - set(THRIFT_INCLUDE_DIR ${THRIFT_ROOT}/include) - endif() +add_library(thrift STATIC IMPORTED GLOBAL) +if(NOT Thrift_FOUND) + set(THRIFT_ROOT ${ARROW_PREFIX}/src/arrow_ep-build/thrift_ep-install) + set(THRIFT_LIB ${THRIFT_ROOT}/lib/libthrift.a) - set_property(TARGET thrift PROPERTY INTERFACE_INCLUDE_DIRECTORIES - ${THRIFT_INCLUDE_DIR}) - set_property(TARGET thrift PROPERTY IMPORTED_LOCATION ${THRIFT_LIB}) + file(MAKE_DIRECTORY ${THRIFT_ROOT}/include) + set(THRIFT_INCLUDE_DIR ${THRIFT_ROOT}/include) +endif() - set(VELOX_ARROW_BUILD_VERSION 15.0.0) - set(VELOX_ARROW_BUILD_SHA256_CHECKSUM - 01dd3f70e85d9b5b933ec92c0db8a4ef504a5105f78d2d8622e84279fb45c25d) - set(VELOX_ARROW_SOURCE_URL - "https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz" - ) +set_property(TARGET thrift PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${THRIFT_INCLUDE_DIR}) +set_property(TARGET thrift PROPERTY IMPORTED_LOCATION ${THRIFT_LIB}) - velox_resolve_dependency_url(ARROW) +set(VELOX_ARROW_BUILD_VERSION 15.0.0) +set(VELOX_ARROW_BUILD_SHA256_CHECKSUM + 01dd3f70e85d9b5b933ec92c0db8a4ef504a5105f78d2d8622e84279fb45c25d) +set(VELOX_ARROW_SOURCE_URL + "https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz" +) - ExternalProject_Add( - arrow_ep - PREFIX ${ARROW_PREFIX} - URL ${VELOX_ARROW_SOURCE_URL} - URL_HASH ${VELOX_ARROW_BUILD_SHA256_CHECKSUM} - SOURCE_SUBDIR cpp - CMAKE_ARGS ${ARROW_CMAKE_ARGS} - BUILD_BYPRODUCTS ${ARROW_LIBDIR}/libarrow.a ${ARROW_LIBDIR}/libparquet.a - ${ARROW_LIBDIR}/libarrow_testing.a ${THRIFT_LIB}) +velox_resolve_dependency_url(ARROW) - add_library(arrow STATIC IMPORTED GLOBAL) - add_library(arrow_testing STATIC IMPORTED GLOBAL) - add_library(parquet STATIC IMPORTED GLOBAL) - add_dependencies(arrow arrow_ep) - add_dependencies(arrow_testing arrow) - add_dependencies(parquet arrow) - file(MAKE_DIRECTORY ${ARROW_PREFIX}/install/include) - set_target_properties( - arrow arrow_testing parquet PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - ${ARROW_PREFIX}/install/include) - set_target_properties(arrow PROPERTIES IMPORTED_LOCATION - ${ARROW_LIBDIR}/libarrow.a) - set_property(TARGET arrow PROPERTY INTERFACE_LINK_LIBRARIES ${RE2} thrift) - set_target_properties( - arrow_testing PROPERTIES IMPORTED_LOCATION - ${ARROW_LIBDIR}/libarrow_testing.a) - set_target_properties(parquet PROPERTIES IMPORTED_LOCATION - ${ARROW_LIBDIR}/libparquet.a) +ExternalProject_Add( + arrow_ep + PREFIX ${ARROW_PREFIX} + URL ${VELOX_ARROW_SOURCE_URL} + URL_HASH ${VELOX_ARROW_BUILD_SHA256_CHECKSUM} + SOURCE_SUBDIR cpp + CMAKE_ARGS ${ARROW_CMAKE_ARGS} + BUILD_BYPRODUCTS ${ARROW_LIBDIR}/libarrow.a ${ARROW_LIBDIR}/libparquet.a + ${ARROW_LIBDIR}/libarrow_testing.a ${THRIFT_LIB}) -endif() +add_library(arrow STATIC IMPORTED GLOBAL) +add_library(arrow_testing STATIC IMPORTED GLOBAL) +add_library(parquet STATIC IMPORTED GLOBAL) +add_dependencies(arrow arrow_ep) +add_dependencies(arrow_testing arrow) +add_dependencies(parquet arrow) +file(MAKE_DIRECTORY ${ARROW_PREFIX}/install/include) +set_target_properties( + arrow arrow_testing parquet + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${ARROW_PREFIX}/install/include) +set_target_properties( + arrow + PROPERTIES IMPORTED_LOCATION ${ARROW_LIBDIR}/libarrow.a) +set_property(TARGET arrow PROPERTY INTERFACE_LINK_LIBRARIES ${RE2} thrift) +set_target_properties( + arrow_testing + PROPERTIES IMPORTED_LOCATION ${ARROW_LIBDIR}/libarrow_testing.a) +set_target_properties( + parquet + PROPERTIES IMPORTED_LOCATION ${ARROW_LIBDIR}/libparquet.a) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9de9f5a08e481..75333fc10f5bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,8 +136,6 @@ option(VELOX_ENABLE_S3 "Build S3 Connector" OFF) option(VELOX_ENABLE_GCS "Build GCS Connector" OFF) option(VELOX_ENABLE_ABFS "Build Abfs Connector" OFF) option(VELOX_ENABLE_HDFS "Build Hdfs Connector" OFF) -option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF) -option(VELOX_ENABLE_ARROW "Enable Arrow support" OFF) option(VELOX_ENABLE_REMOTE_FUNCTIONS "Enable remote function support" OFF) option(VELOX_ENABLE_CCACHE "Use ccache if installed." ON) @@ -183,7 +181,6 @@ if(${VELOX_BUILD_TESTING}) set(VELOX_ENABLE_TPCH_CONNECTOR ON) set(VELOX_ENABLE_SPARK_FUNCTIONS ON) set(VELOX_ENABLE_EXAMPLES ON) - set(VELOX_ENABLE_PARQUET ON) endif() if(${VELOX_ENABLE_BENCHMARKS}) @@ -280,14 +277,6 @@ endif() if(VELOX_ENABLE_HDFS) add_definitions(-DVELOX_ENABLE_HDFS) # JVM libhdfs requires arrow dependency. - set(VELOX_ENABLE_ARROW ON) -endif() - -if(VELOX_ENABLE_PARQUET) - add_definitions(-DVELOX_ENABLE_PARQUET) - # Native Parquet reader requires Apache Thrift and Arrow Parquet writer, which - # are included in Arrow. - set(VELOX_ENABLE_ARROW ON) endif() # make buildPartitionBounds_ a vector int64 instead of int32 to avoid integer @@ -633,9 +622,9 @@ if("${TREAT_WARNINGS_AS_ERRORS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") endif() -if(VELOX_ENABLE_ARROW) - velox_set_source(Arrow) - velox_resolve_dependency(Arrow) -endif() +# Native Parquet reader requires Apache Thrift and Arrow Parquet writer, which +# are included in Arrow. +velox_set_source(Arrow) +velox_resolve_dependency(Arrow) add_subdirectory(velox) diff --git a/docker-compose.yml b/docker-compose.yml index 6904baa543eea..d7b5ae5e647b3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -52,8 +52,7 @@ services: environment: NUM_THREADS: 8 # default value for NUM_THREADS CCACHE_DIR: "/velox/.ccache" - EXTRA_CMAKE_FLAGS: -DVELOX_ENABLE_PARQUET=ON - -DVELOX_ENABLE_S3=ON + EXTRA_CMAKE_FLAGS: -DVELOX_ENABLE_S3=ON volumes: - .:/velox:delegated working_dir: /velox diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index fb64001db0d0d..7f3764f7c2e02 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -26,11 +26,7 @@ #include "velox/dwio/common/Reader.h" #include "velox/dwio/dwrf/common/Config.h" #include "velox/dwio/dwrf/writer/Writer.h" - -#ifdef VELOX_ENABLE_PARQUET #include "velox/dwio/parquet/writer/Writer.h" // @manual -#endif - #include "velox/expression/Expr.h" #include "velox/expression/ExprToSubfieldFilter.h" #include "velox/type/TimestampConversion.h" @@ -898,7 +894,6 @@ core::TypedExprPtr extractFiltersFromRemainingFilter( namespace { -#ifdef VELOX_ENABLE_PARQUET std::optional getTimestampUnit( const config::ConfigBase& config, const char* configKey) { @@ -959,7 +954,6 @@ void updateParquetWriterOptions( writerOptions = std::move(parquetWriterOptions); } -#endif void updateDWRFWriterOptions( const std::shared_ptr& hiveConfig, @@ -1031,9 +1025,7 @@ void updateWriterOptionsFromHiveConfig( updateDWRFWriterOptions(hiveConfig, sessionProperties, writerOptions); break; case dwio::common::FileFormat::PARQUET: -#ifdef VELOX_ENABLE_PARQUET updateParquetWriterOptions(hiveConfig, sessionProperties, writerOptions); -#endif break; case dwio::common::FileFormat::NIMBLE: // No-op for now. diff --git a/velox/connectors/hive/tests/CMakeLists.txt b/velox/connectors/hive/tests/CMakeLists.txt index ad3a373dab119..f6cf03ad6ae35 100644 --- a/velox/connectors/hive/tests/CMakeLists.txt +++ b/velox/connectors/hive/tests/CMakeLists.txt @@ -26,23 +26,17 @@ add_executable( TableHandleTest.cpp) add_test(velox_hive_connector_test velox_hive_connector_test) +target_include_directories(velox_hive_connector_test + PUBLIC ${ARROW_PREFIX}/install/include) target_link_libraries( velox_hive_connector_test + velox_dwio_common_exception + velox_dwio_native_parquet_reader + velox_exec + velox_exec_test_lib velox_hive_connector velox_hive_partition_function - velox_dwio_common_exception velox_vector_fuzzer velox_vector_test_lib - velox_exec - velox_exec_test_lib GTest::gtest GTest::gtest_main) - -if(VELOX_ENABLE_PARQUET) - - target_include_directories(velox_hive_connector_test - PUBLIC ${ARROW_PREFIX}/install/include) - target_link_libraries(velox_hive_connector_test - velox_dwio_native_parquet_reader) - -endif() diff --git a/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp b/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp index b7a7ba335c12d..61dfe23d2921d 100644 --- a/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp +++ b/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp @@ -19,13 +19,9 @@ #include "velox/connectors/hive/HiveConfig.h" #include "velox/connectors/hive/HiveConnectorSplit.h" #include "velox/connectors/hive/TableHandle.h" -#include "velox/exec/tests/utils/HiveConnectorTestBase.h" - #include "velox/dwio/dwrf/writer/Writer.h" - -#ifdef VELOX_ENABLE_PARQUET #include "velox/dwio/parquet/writer/Writer.h" -#endif +#include "velox/exec/tests/utils/HiveConnectorTestBase.h" namespace facebook::velox::connector { @@ -403,7 +399,6 @@ TEST_F( "2"); } -#ifdef VELOX_ENABLE_PARQUET TEST_F(HiveConnectorUtilTest, updateWriterOptionsFromHiveConfigParquet) { auto fileFormat = dwio::common::FileFormat::PARQUET; std::unordered_map connectorConfig = { @@ -427,6 +422,5 @@ TEST_F(HiveConnectorUtilTest, updateWriterOptionsFromHiveConfigParquet) { parquetOptions->parquetWriteTimestampUnit.value(), TimestampUnit::kMilli); ASSERT_EQ(parquetOptions->parquetWriteTimestampTimeZone.value(), "UTC"); } -#endif } // namespace facebook::velox::connector diff --git a/velox/connectors/hive/tests/HiveDataSinkTest.cpp b/velox/connectors/hive/tests/HiveDataSinkTest.cpp index 36e806d8dae83..0c10b3d8f1002 100644 --- a/velox/connectors/hive/tests/HiveDataSinkTest.cpp +++ b/velox/connectors/hive/tests/HiveDataSinkTest.cpp @@ -27,12 +27,8 @@ #include "velox/dwio/dwrf/reader/DwrfReader.h" #include "velox/dwio/dwrf/writer/FlushPolicy.h" #include "velox/dwio/dwrf/writer/Writer.h" - -#ifdef VELOX_ENABLE_PARQUET #include "velox/dwio/parquet/reader/ParquetReader.h" #include "velox/dwio/parquet/writer/Writer.h" -#endif - #include "velox/exec/tests/utils/PlanBuilder.h" #include "velox/exec/tests/utils/TempDirectoryPath.h" #include "velox/vector/fuzzer/VectorFuzzer.h" @@ -1131,7 +1127,6 @@ TEST_F(HiveDataSinkTest, insertTableHandleToString) { "HiveInsertTableHandle [dwrf zstd], [inputColumns: [ HiveColumnHandle [name: c0, columnType: Regular, dataType: BIGINT, requiredSubfields: [ ]] HiveColumnHandle [name: c1, columnType: Regular, dataType: INTEGER, requiredSubfields: [ ]] HiveColumnHandle [name: c2, columnType: Regular, dataType: SMALLINT, requiredSubfields: [ ]] HiveColumnHandle [name: c3, columnType: Regular, dataType: REAL, requiredSubfields: [ ]] HiveColumnHandle [name: c4, columnType: Regular, dataType: DOUBLE, requiredSubfields: [ ]] HiveColumnHandle [name: c5, columnType: PartitionKey, dataType: VARCHAR, requiredSubfields: [ ]] HiveColumnHandle [name: c6, columnType: PartitionKey, dataType: BOOLEAN, requiredSubfields: [ ]] ], locationHandle: LocationHandle [targetPath: /path/to/test, writePath: /path/to/test, tableType: kNew,, bucketProperty: \nHiveBucketProperty[\n\tBucket Columns:\n\t\tc5\n\tBucket Types:\n\t\tVARCHAR\n\tSortedBy Columns:\n\t\t[COLUMN[c5] ORDER[DESC NULLS LAST]]\n]\n]"); } -#ifdef VELOX_ENABLE_PARQUET TEST_F(HiveDataSinkTest, flushPolicyWithParquet) { const auto outputDirectory = TempDirectoryPath::create(); auto flushPolicyFactory = []() { @@ -1167,7 +1162,6 @@ TEST_F(HiveDataSinkTest, flushPolicyWithParquet) { EXPECT_EQ(fileMeta.numRowGroups(), 10); EXPECT_EQ(fileMeta.rowGroup(0).numRows(), 500); } -#endif TEST_F(HiveDataSinkTest, flushPolicyWithDWRF) { const auto outputDirectory = TempDirectoryPath::create(); diff --git a/velox/dwio/common/tests/CMakeLists.txt b/velox/dwio/common/tests/CMakeLists.txt index 6f3a36273c6db..2b9f52242b832 100644 --- a/velox/dwio/common/tests/CMakeLists.txt +++ b/velox/dwio/common/tests/CMakeLists.txt @@ -97,21 +97,19 @@ if(VELOX_ENABLE_BENCHMARKS) Folly::folly Folly::follybenchmark) - if(VELOX_ENABLE_ARROW) - add_subdirectory(Lemire/FastPFor) - add_executable(velox_dwio_common_bitpack_decoder_benchmark - BitPackDecoderBenchmark.cpp) + add_subdirectory(Lemire/FastPFor) + add_executable(velox_dwio_common_bitpack_decoder_benchmark + BitPackDecoderBenchmark.cpp) - target_compile_options(velox_dwio_common_bitpack_decoder_benchmark - PRIVATE -Wno-deprecated-declarations) + target_compile_options(velox_dwio_common_bitpack_decoder_benchmark + PRIVATE -Wno-deprecated-declarations) - target_link_libraries( - velox_dwio_common_bitpack_decoder_benchmark - velox_dwio_common - arrow - velox_fastpforlib - duckdb_static - Folly::folly - Folly::follybenchmark) - endif() + target_link_libraries( + velox_dwio_common_bitpack_decoder_benchmark + velox_dwio_common + arrow + velox_fastpforlib + duckdb_static + Folly::folly + Folly::follybenchmark) endif() diff --git a/velox/dwio/parquet/CMakeLists.txt b/velox/dwio/parquet/CMakeLists.txt index 2baaaeee11f58..b1403937f873d 100644 --- a/velox/dwio/parquet/CMakeLists.txt +++ b/velox/dwio/parquet/CMakeLists.txt @@ -12,23 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -if(VELOX_ENABLE_PARQUET) - add_subdirectory(common) - add_subdirectory(reader) - add_subdirectory(thrift) - add_subdirectory(writer) +add_subdirectory(common) +add_subdirectory(reader) +add_subdirectory(thrift) +add_subdirectory(writer) - if(${VELOX_BUILD_TESTING}) - add_subdirectory(tests) - endif() +if(${VELOX_BUILD_TESTING}) + add_subdirectory(tests) endif() velox_add_library(velox_dwio_parquet_reader RegisterParquetReader.cpp) velox_add_library(velox_dwio_parquet_writer RegisterParquetWriter.cpp) -if(VELOX_ENABLE_PARQUET) - velox_link_libraries(velox_dwio_parquet_reader - velox_dwio_native_parquet_reader xsimd) - velox_link_libraries(velox_dwio_parquet_writer - velox_dwio_arrow_parquet_writer) -endif() +velox_link_libraries(velox_dwio_parquet_reader velox_dwio_native_parquet_reader + xsimd) +velox_link_libraries(velox_dwio_parquet_writer velox_dwio_arrow_parquet_writer) diff --git a/velox/dwio/parquet/RegisterParquetReader.cpp b/velox/dwio/parquet/RegisterParquetReader.cpp index 2f4cc94b81fec..0451a11d029a2 100644 --- a/velox/dwio/parquet/RegisterParquetReader.cpp +++ b/velox/dwio/parquet/RegisterParquetReader.cpp @@ -13,23 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifdef VELOX_ENABLE_PARQUET #include "velox/dwio/parquet/reader/ParquetReader.h" // @manual -#endif namespace facebook::velox::parquet { void registerParquetReaderFactory() { -#ifdef VELOX_ENABLE_PARQUET dwio::common::registerReaderFactory(std::make_shared()); -#endif } void unregisterParquetReaderFactory() { -#ifdef VELOX_ENABLE_PARQUET dwio::common::unregisterReaderFactory(dwio::common::FileFormat::PARQUET); -#endif } } // namespace facebook::velox::parquet diff --git a/velox/dwio/parquet/RegisterParquetWriter.cpp b/velox/dwio/parquet/RegisterParquetWriter.cpp index 1d6c0e0333572..5a94034007917 100644 --- a/velox/dwio/parquet/RegisterParquetWriter.cpp +++ b/velox/dwio/parquet/RegisterParquetWriter.cpp @@ -13,23 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#ifdef VELOX_ENABLE_PARQUET #include "velox/dwio/parquet/writer/Writer.h" // @manual -#endif namespace facebook::velox::parquet { void registerParquetWriterFactory() { -#ifdef VELOX_ENABLE_PARQUET dwio::common::registerWriterFactory(std::make_shared()); -#endif } void unregisterParquetWriterFactory() { -#ifdef VELOX_ENABLE_PARQUET dwio::common::unregisterWriterFactory(dwio::common::FileFormat::PARQUET); -#endif } } // namespace facebook::velox::parquet diff --git a/velox/dwio/parquet/tests/reader/CMakeLists.txt b/velox/dwio/parquet/tests/reader/CMakeLists.txt index 43e76708316df..690f9f9c11371 100644 --- a/velox/dwio/parquet/tests/reader/CMakeLists.txt +++ b/velox/dwio/parquet/tests/reader/CMakeLists.txt @@ -97,18 +97,14 @@ target_link_libraries( velox_type_tz ${TEST_LINK_LIBS}) -if(${VELOX_ENABLE_ARROW}) - - add_executable(velox_dwio_parquet_rlebp_decoder_test RleBpDecoderTest.cpp) - add_test( - NAME velox_dwio_parquet_rlebp_decoder_test - COMMAND velox_dwio_parquet_rlebp_decoder_test - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - target_link_libraries( - velox_dwio_parquet_rlebp_decoder_test - velox_dwio_native_parquet_reader - arrow - velox_link_libs - ${TEST_LINK_LIBS}) - -endif() +add_executable(velox_dwio_parquet_rlebp_decoder_test RleBpDecoderTest.cpp) +add_test( + NAME velox_dwio_parquet_rlebp_decoder_test + COMMAND velox_dwio_parquet_rlebp_decoder_test + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries( + velox_dwio_parquet_rlebp_decoder_test + velox_dwio_native_parquet_reader + arrow + velox_link_libs + ${TEST_LINK_LIBS}) diff --git a/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp b/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp index c7510d27bff81..57b52ef731ffe 100644 --- a/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp +++ b/velox/dwio/parquet/tests/writer/ParquetWriterTest.cpp @@ -203,7 +203,6 @@ TEST_F(ParquetWriterTest, parquetWriteTimestampTimeZoneWithDefault) { writer->close(); }; -#ifdef VELOX_ENABLE_PARQUET DEBUG_ONLY_TEST_F(ParquetWriterTest, unitFromHiveConfig) { SCOPED_TESTVALUE_SET( "facebook::velox::parquet::Writer::write", @@ -232,7 +231,6 @@ DEBUG_ONLY_TEST_F(ParquetWriterTest, unitFromHiveConfig) { .planNode(); AssertQueryBuilder(plan).copyResults(pool_.get()); } -#endif } // namespace diff --git a/velox/exec/benchmarks/CMakeLists.txt b/velox/exec/benchmarks/CMakeLists.txt index b3a198a11de71..d9d69e7f88a1f 100644 --- a/velox/exec/benchmarks/CMakeLists.txt +++ b/velox/exec/benchmarks/CMakeLists.txt @@ -74,18 +74,16 @@ target_link_libraries( velox_vector_test_lib Folly::follybenchmark) -if(${VELOX_ENABLE_PARQUET}) - add_executable(velox_sort_benchmark RowContainerSortBenchmark.cpp) - - target_link_libraries( - velox_sort_benchmark - velox_exec - velox_exec_test_lib - velox_vector_test_lib - Folly::follybenchmark - arrow - thrift) -endif() +add_executable(velox_sort_benchmark RowContainerSortBenchmark.cpp) + +target_link_libraries( + velox_sort_benchmark + velox_exec + velox_exec_test_lib + velox_vector_test_lib + Folly::follybenchmark + arrow + thrift) add_executable(velox_prefixsort_benchmark PrefixSortBenchmark.cpp) diff --git a/velox/vector/arrow/CMakeLists.txt b/velox/vector/arrow/CMakeLists.txt index ed3733205fbf2..37424d372021a 100644 --- a/velox/vector/arrow/CMakeLists.txt +++ b/velox/vector/arrow/CMakeLists.txt @@ -20,6 +20,6 @@ velox_link_libraries( velox_buffer velox_exception) -if(VELOX_BUILD_TESTING AND VELOX_ENABLE_ARROW) +if(VELOX_BUILD_TESTING) add_subdirectory(tests) endif()