Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[do-not-merge] report capacity in metrics #4569

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ COPY --from=bin-builder /quickwit/config/quickwit.yaml /quickwit/config/quickwit
ENV QW_CONFIG=/quickwit/config/quickwit.yaml
ENV QW_DATA_DIR=/quickwit/qwdata
ENV QW_LISTEN_ADDRESS=0.0.0.0
ENV MALLOC_CONF=prof:true

RUN quickwit --version

Expand Down
49 changes: 48 additions & 1 deletion quickwit/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion quickwit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ chrono = { version = "0.4", default-features = false, features = [
] }
clap = { version = "4.4.1", features = ["env", "string"] }
colored = "2.1.0"
common-mem-prof = { git = "https://github.com/GreptimeTeam/greptimedb", rev = "fcff66e03904d80aacb91b8edd4e15240161d264" }
console-subscriber = "0.1.8"
criterion = { version = "0.5", features = ["async_tokio"] }
cron = "0.12.0"
Expand Down Expand Up @@ -149,7 +150,7 @@ matches = "0.1.9"
md5 = "0.7"
mime_guess = "2.0.4"
mockall = "0.11"
mrecordlog = { git = "https://github.com/quickwit-oss/mrecordlog", rev = "2914cad" }
mrecordlog = { git = "https://github.com/quickwit-oss/mrecordlog", rev = "e5cd7dedea568c271af715057119fb7b1022a479" }
new_string_template = "1.4.0"
nom = "7.1.3"
num_cpus = "1"
Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-ingest/src/ingest_api_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ impl IngestApiService {
.await?;

let memory_usage = self.queues.memory_usage();
let new_capacity = self.memory_limit - memory_usage;
let new_capacity = self.memory_limit - memory_usage.0;
self.memory_capacity.reset_capacity(new_capacity);

Ok(())
Expand Down
10 changes: 8 additions & 2 deletions quickwit/quickwit-ingest/src/ingest_v2/ingester.rs
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,10 @@ impl Ingester {
.set(state_guard.mrecordlog.disk_usage() as i64);
INGEST_V2_METRICS
.wal_memory_usage_bytes
.set(state_guard.mrecordlog.memory_usage() as i64);
.set(state_guard.mrecordlog.memory_usage().0 as i64);
INGEST_V2_METRICS
.wal_memory_capacity_bytes
.set(state_guard.mrecordlog.memory_usage().1 as i64);

drop(state_guard);

Expand Down Expand Up @@ -791,7 +794,10 @@ impl Ingester {
.set(current_disk_usage as i64);
INGEST_V2_METRICS
.wal_memory_usage_bytes
.set(current_memory_usage as i64);
.set(current_memory_usage.0 as i64);
INGEST_V2_METRICS
.wal_memory_capacity_bytes
.set(state_guard.mrecordlog.memory_usage().1 as i64);

self.check_decommissioning_status(&mut state_guard);
let truncate_response = TruncateShardsResponse {};
Expand Down
8 changes: 7 additions & 1 deletion quickwit/quickwit-ingest/src/ingest_v2/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub(super) struct IngestV2Metrics {
pub wal_acquire_lock_request_duration_secs: HistogramVec<2>,
pub wal_disk_usage_bytes: IntGauge,
pub wal_memory_usage_bytes: IntGauge,
pub wal_memory_capacity_bytes: IntGauge,
}

impl Default for IngestV2Metrics {
Expand Down Expand Up @@ -80,7 +81,12 @@ impl Default for IngestV2Metrics {
),
wal_memory_usage_bytes: new_gauge(
"wal_memory_usage_bytes",
"Memory usage of the write-ahead log in bytes.",
"Memory usage of the write-ahead log in bytes (used).",
"quickwit_ingest",
),
wal_memory_capacity_bytes: new_gauge(
"wal_memory_capacity_bytes",
"Memory usage of the write-ahead log in bytes (allocated).",
"quickwit_ingest",
),
}
Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-ingest/src/ingest_v2/mrecordlog_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ pub(super) fn check_enough_capacity(
requested: requested_capacity,
});
}
let memory_usage = ByteSize(mrecordlog.memory_usage() as u64);
let memory_usage = ByteSize(mrecordlog.memory_usage().0 as u64);

if memory_usage + requested_capacity > memory_capacity {
return Err(NotEnoughCapacityError::Memory {
Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-ingest/src/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ impl Queues {
self.record_log.disk_usage()
}

pub(crate) fn memory_usage(&self) -> usize {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to fix the mrecordlog API. There is no way anyone can understand what this couple is about.

Copy link
Contributor Author

@trinity-1686a trinity-1686a Feb 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as is, it is meant for debugging. having 3 function doing similar things seems to much. If this is to get merged, I plan on having a single one returning a struct

pub(crate) fn memory_usage(&self) -> (usize, usize) {
self.record_log.memory_usage()
}
}
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-serve/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ async-trait = { workspace = true }
base64 = { workspace = true }
bytes = { workspace = true }
bytesize = { workspace = true }
common-mem-prof = { workspace = true }
elasticsearch-dsl = "0.4.15"
flate2 = { workspace = true }
futures = { workspace = true }
Expand Down
4 changes: 2 additions & 2 deletions quickwit/quickwit-serve/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,9 @@ pub async fn serve_quickwit(
if let Some(metastore_server) = &metastore_server_opt {
metastore_server.clone()
} else {
// Wait for a metastore service to be available for at most 10 seconds.
// Wait for a metastore service to be available for at most 60 seconds.
if cluster
.wait_for_ready_members(has_node_with_metastore_service, Duration::from_secs(10))
.wait_for_ready_members(has_node_with_metastore_service, Duration::from_secs(60))
.await
.is_err()
{
Expand Down
17 changes: 16 additions & 1 deletion quickwit/quickwit-serve/src/node_info_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ pub fn node_info_handler(
runtime_info: &'static RuntimeInfo,
config: Arc<NodeConfig>,
) -> impl Filter<Extract = (impl warp::Reply,), Error = Rejection> + Clone {
node_version_handler(build_info, runtime_info).or(node_config_handler(config))
node_version_handler(build_info, runtime_info)
.or(node_config_handler(config))
.or(memory_map_handler())
}

#[utoipa::path(get, tag = "Node Info", path = "/version")]
Expand Down Expand Up @@ -76,6 +78,19 @@ async fn get_config(config: Arc<NodeConfig>) -> impl warp::Reply {
warp::reply::json(&config)
}

#[utoipa::path(get, tag = "Node Info", path = "/memory-map")]
fn memory_map_handler() -> impl Filter<Extract = (impl warp::Reply,), Error = Rejection> + Clone {
warp::path("memory-map")
.and(warp::path::end())
.then(get_memory_map)
}

async fn get_memory_map() -> impl warp::Reply {
common_mem_prof::dump_profile()
.await
.map_err(|e| e.to_string())
}

#[cfg(test)]
mod tests {
use assert_json_diff::assert_json_include;
Expand Down