Skip to content

Commit

Permalink
Add stats report daemon to Velox (#9653)
Browse files Browse the repository at this point in the history
Summary:
The stats report daemon is used for periodically exporting velox metrics. Current supported metrics are memory related metrics. There will be followups for additional metrics.

Pull Request resolved: #9653

Reviewed By: xiaoxmeng

Differential Revision: D56690811

Pulled By: tanjialiang

fbshipit-source-id: e6f7236df9ea1445355f72b6f94b52704e0e1f4e
  • Loading branch information
tanjialiang authored and facebook-github-bot committed May 2, 2024
1 parent 15780e0 commit ebcbec7
Show file tree
Hide file tree
Showing 10 changed files with 286 additions and 73 deletions.
1 change: 1 addition & 0 deletions velox/common/base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ add_library(
BitUtil.cpp
Counters.cpp
Fs.cpp
PeriodicStatsReporter.cpp
RandomUtil.cpp
RawVector.cpp
RuntimeMetrics.cpp
Expand Down
36 changes: 18 additions & 18 deletions velox/common/base/Counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,24 @@ void registerVeloxMetrics() {

/// ================== Memory Arbitration Counters =================

// The number of arbitration requests.
DEFINE_METRIC(
kMetricArbitratorRequestsCount, facebook::velox::StatType::COUNT);

// The number of times a query level memory pool is aborted as a result of a
// memory arbitration process. The memory pool aborted will eventually result
// in a cancelling of the original query.
DEFINE_METRIC(
kMetricArbitratorAbortedCount, facebook::velox::StatType::COUNT);

// The number of times a memory arbitration request failed. This may occur
// either because the requester was terminated during the processing of its
// request, the arbitration request would surpass the maximum allowed capacity
// for the requester, or the arbitration process couldn't release the
// requested amount of memory.
DEFINE_METRIC(
kMetricArbitratorFailuresCount, facebook::velox::StatType::COUNT);

// Tracks the memory reclaim count on an operator.
DEFINE_METRIC(kMetricMemoryReclaimCount, facebook::velox::StatType::COUNT);

Expand Down Expand Up @@ -82,10 +100,6 @@ void registerVeloxMetrics() {
DEFINE_METRIC(
kMetricMemoryNonReclaimableCount, facebook::velox::StatType::COUNT);

// The number of arbitration requests.
DEFINE_METRIC(
kMetricArbitratorRequestsCount, facebook::velox::StatType::COUNT);

// The number of arbitration that reclaims the used memory from the query
// which initiates the memory arbitration request itself. It ensures the
// memory arbitration request won't exceed its per-query memory capacity
Expand All @@ -103,20 +117,6 @@ void registerVeloxMetrics() {
kMetricArbitratorGlobalArbitrationCount,
facebook::velox::StatType::COUNT);

// The number of times a query level memory pool is aborted as a result of a
// memory arbitration process. The memory pool aborted will eventually result
// in a cancelling the original query.
DEFINE_METRIC(
kMetricArbitratorAbortedCount, facebook::velox::StatType::COUNT);

// The number of times a memory arbitration request failed. This may occur
// either because the requester was terminated during the processing of its
// request, the arbitration request would surpass the maximum allowed capacity
// for the requester, or the arbitration process couldn't release the
// requested amount of memory.
DEFINE_METRIC(
kMetricArbitratorFailuresCount, facebook::velox::StatType::COUNT);

// The distribution of the amount of time an arbitration request stays queued
// in range of [0, 600s] with 20 buckets. It is configured to report the
// latency at P50, P90, P99, and P100 percentiles.
Expand Down
18 changes: 9 additions & 9 deletions velox/common/base/Counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,12 @@ constexpr folly::StringPiece kMetricMemoryPoolReservationLeakBytes{
constexpr folly::StringPiece kMetricMemoryAllocatorDoubleFreeCount{
"velox.memory_allocator_double_free_count"};

constexpr folly::StringPiece kMetricArbitratorRequestsCount{
"velox.arbitrator_requests_count"};

constexpr folly::StringPiece kMetricArbitratorLocalArbitrationCount{
"velox.arbitrator_local_arbitration_count"};

constexpr folly::StringPiece kMetricArbitratorGlobalArbitrationCount{
"velox.arbitrator_global_arbitration_count"};

constexpr folly::StringPiece kMetricArbitratorAbortedCount{
"velox.arbitrator_aborted_count"};

constexpr folly::StringPiece kMetricArbitratorFailuresCount{
"velox.arbitrator_failures_count"};

constexpr folly::StringPiece kMetricArbitratorQueueTimeMs{
"velox.arbitrator_queue_time_ms"};

Expand Down Expand Up @@ -128,4 +119,13 @@ constexpr folly::StringPiece kMetricSpillWriteTimeMs{

constexpr folly::StringPiece kMetricFileWriterEarlyFlushedRawBytes{
"velox.file_writer_early_flushed_raw_bytes"};

constexpr folly::StringPiece kMetricArbitratorRequestsCount{
"velox.arbitrator_requests_count"};

constexpr folly::StringPiece kMetricArbitratorAbortedCount{
"velox.arbitrator_aborted_count"};

constexpr folly::StringPiece kMetricArbitratorFailuresCount{
"velox.arbitrator_failures_count"};
} // namespace facebook::velox
64 changes: 64 additions & 0 deletions velox/common/base/PeriodicStatsReporter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/common/base/PeriodicStatsReporter.h"
#include "velox/common/base/Counters.h"
#include "velox/common/base/StatsReporter.h"
#include "velox/common/memory/Memory.h"

namespace facebook::velox {

namespace {
#define REPORT_IF_NOT_ZERO(name, counter) \
if ((counter) != 0) { \
RECORD_METRIC_VALUE((name), (counter)); \
}
} // namespace

PeriodicStatsReporter::PeriodicStatsReporter(
const velox::memory::MemoryArbitrator* arbitrator,
const Options& options)
: arbitrator_(arbitrator), options_(options) {}

void PeriodicStatsReporter::start() {
LOG(INFO) << "Starting PeriodicStatsReporter with options "
<< options_.toString();
addTask(
"report_arbitrator_stats",
[this]() { reportArbitratorStats(); },
options_.arbitratorStatsIntervalMs);
}

void PeriodicStatsReporter::stop() {
LOG(INFO) << "Stopping PeriodicStatsReporter";
scheduler_.stop();
}

void PeriodicStatsReporter::reportArbitratorStats() {
if (arbitrator_ == nullptr) {
return;
}

const auto stats = arbitrator_->stats();
RECORD_METRIC_VALUE(
kMetricArbitratorFreeCapacityBytes,
stats.freeCapacityBytes + stats.freeReservedCapacityBytes);
RECORD_METRIC_VALUE(
kMetricArbitratorFreeReservedCapacityBytes,
stats.freeReservedCapacityBytes);
}

} // namespace facebook::velox
86 changes: 86 additions & 0 deletions velox/common/base/PeriodicStatsReporter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <folly/experimental/ThreadedRepeatingFunctionRunner.h>
#include "velox/common/memory/MemoryArbitrator.h"

namespace folly {
class CPUThreadPoolExecutor;
}

namespace facebook::velox {

namespace memory {
class MemoryAllocator;
}

namespace cache {
class AsyncDataCache;
}

/// Manages a background daemon thread to report stats through 'StatsReporter'.
class PeriodicStatsReporter {
public:
struct Options {
Options() {}

uint64_t arbitratorStatsIntervalMs{60'000};

std::string toString() const {
return fmt::format(
"arbitratorStatsIntervalMs:{}", arbitratorStatsIntervalMs);
}
};

PeriodicStatsReporter(
const velox::memory::MemoryArbitrator* arbitrator,
const Options& options = Options());

/// Invoked to start the report daemon in background.
void start();

/// Invoked to stop the report daemon in background.
void stop();

private:
// Add a task to run periodically.
template <typename TFunc>
void addTask(const std::string& taskName, TFunc&& func, size_t intervalMs) {
scheduler_.add(
taskName,
[taskName,
intervalMs,
func = std::forward<TFunc>(func)]() mutable noexcept {
try {
func();
} catch (const std::exception& e) {
LOG(ERROR) << "Error running periodic task " << taskName << ": "
<< e.what();
}
return std::chrono::milliseconds(intervalMs);
});
}

void reportArbitratorStats();

const velox::memory::MemoryArbitrator* const arbitrator_{nullptr};
const Options options_;

folly::ThreadedRepeatingFunctionRunner scheduler_;
};
} // namespace facebook::velox
6 changes: 4 additions & 2 deletions velox/common/base/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ add_executable(
FsTest.cpp
RangeTest.cpp
RawVectorTest.cpp
ScratchTest.cpp
RuntimeMetricsTest.cpp
ScopedLockTest.cpp
ScratchTest.cpp
SemaphoreTest.cpp
SimdUtilTest.cpp
SpillConfigTest.cpp
Expand All @@ -38,7 +38,9 @@ add_test(velox_base_test velox_base_test)

target_link_libraries(
velox_base_test
PRIVATE velox_common_base
PRIVATE velox_caching
velox_common_base
velox_memory
velox_time
velox_status
velox_exception
Expand Down
Loading

0 comments on commit ebcbec7

Please sign in to comment.