Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New chunkified latest-at APIs and caches #306

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ Update instructions:
|----------------------|--------------------------------------------------------------------------|
| re_entity_db | In-memory storage of Rerun entities |
| re_query | Querying data in the re_chunk_store |
| re_query2 | Querying data in the re_chunk_store |
| re_types | The built-in Rerun data types, component types, and archetypes. |
| re_types_blueprint | The core traits and types that power Rerun's Blueprint sub-system. |
| re_log_encoding | Helpers for encoding and transporting Rerun log messages |
Expand Down
33 changes: 33 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4715,6 +4715,39 @@ dependencies = [
"thiserror",
]

[[package]]
name = "re_query2"
version = "0.18.0-alpha.1+dev"
dependencies = [
"ahash",
"anyhow",
"backtrace",
"criterion",
"indent",
"indexmap 2.1.0",
"itertools 0.13.0",
"mimalloc",
"nohash-hasher",
"parking_lot",
"paste",
"rand",
"re_arrow2",
"re_chunk",
"re_chunk_store",
"re_error",
"re_format",
"re_log",
"re_log_types",
"re_tracing",
"re_tuid",
"re_types",
"re_types_core",
"seq-macro",
"similar-asserts",
"static_assertions",
"thiserror",
]

[[package]]
name = "re_renderer"
version = "0.18.0-alpha.1+dev"
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ re_format_arrow = { path = "crates/store/re_format_arrow", version = "=0.18.0-al
re_log_encoding = { path = "crates/store/re_log_encoding", version = "=0.18.0-alpha.1", default-features = false }
re_log_types = { path = "crates/store/re_log_types", version = "=0.18.0-alpha.1", default-features = false }
re_query = { path = "crates/store/re_query", version = "=0.18.0-alpha.1", default-features = false }
re_query2 = { path = "crates/store/re_query2", version = "=0.18.0-alpha.1", default-features = false }
re_sdk_comms = { path = "crates/store/re_sdk_comms", version = "=0.18.0-alpha.1", default-features = false }
re_types = { path = "crates/store/re_types", version = "=0.18.0-alpha.1", default-features = false }
re_types_blueprint = { path = "crates/store/re_types_blueprint", version = "=0.18.0-alpha.1", default-features = false }
Expand Down
2 changes: 2 additions & 0 deletions crates/store/re_query/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,13 @@ bench = false
name = "clamped_zip"
required-features = ["codegen"]
bench = false
doc = false # we're already documenting the one is `re_query2`

[[bin]]
name = "range_zip"
required-features = ["codegen"]
bench = false
doc = false # we're already documenting the one is `re_query2`


[[bench]]
Expand Down
36 changes: 35 additions & 1 deletion crates/store/re_query/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ use std::{
};

use ahash::{HashMap, HashSet};
use nohash_hasher::IntSet;
use parking_lot::RwLock;

use re_chunk_store::{ChunkStore, ChunkStoreDiff, ChunkStoreEvent, ChunkStoreSubscriber};
use re_log_types::{EntityPath, ResolvedTimeRange, StoreId, TimeInt, Timeline};
use re_types_core::ComponentName;
use re_types_core::{components::ClearIsRecursive, ComponentName, Loggable as _};

use crate::{LatestAtCache, RangeCache};

Expand Down Expand Up @@ -70,6 +71,14 @@ pub struct Caches {
/// The [`StoreId`] of the associated [`ChunkStore`].
pub(crate) store_id: StoreId,

/// Keeps track of which entities have had any `Clear`-related data on any timeline at any
/// point in time.
///
/// This is used to optimized read-time clears, so that we don't unnecessarily pay for the fixed
/// overhead of all the query layers when we know for a fact that there won't be any data there.
/// This is a huge performance improvement in practice, especially in recordings with many entities.
pub(crate) might_require_clearing: RwLock<IntSet<EntityPath>>,

// NOTE: `Arc` so we can cheaply free the top-level lock early when needed.
pub(crate) latest_at_per_cache_key: RwLock<HashMap<CacheKey, Arc<RwLock<LatestAtCache>>>>,

Expand All @@ -81,12 +90,25 @@ impl std::fmt::Debug for Caches {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self {
store_id,
might_require_clearing,
latest_at_per_cache_key,
range_per_cache_key,
} = self;

let mut strings = Vec::new();

strings.push(format!(
"[Entities that must be checked for clears @ {store_id}]\n"
));
{
let sorted: BTreeSet<EntityPath> =
might_require_clearing.read().iter().cloned().collect();
for entity_path in sorted {
strings.push(format!(" * {entity_path}\n"));
}
strings.push("\n".to_owned());
}

strings.push(format!("[LatestAt @ {store_id}]"));
{
let latest_at_per_cache_key = latest_at_per_cache_key.read();
Expand Down Expand Up @@ -130,6 +152,7 @@ impl Caches {
pub fn new(store: &ChunkStore) -> Self {
Self {
store_id: store.id().clone(),
might_require_clearing: Default::default(),
latest_at_per_cache_key: Default::default(),
range_per_cache_key: Default::default(),
}
Expand All @@ -139,10 +162,12 @@ impl Caches {
pub fn clear(&self) {
let Self {
store_id: _,
might_require_clearing,
latest_at_per_cache_key,
range_per_cache_key,
} = self;

might_require_clearing.write().clear();
latest_at_per_cache_key.write().clear();
range_per_cache_key.write().clear();
}
Expand Down Expand Up @@ -223,6 +248,7 @@ impl ChunkStoreSubscriber for Caches {
}
}

let mut might_require_clearing = self.might_require_clearing.write();
let caches_latest_at = self.latest_at_per_cache_key.write();
let caches_range = self.range_per_cache_key.write();
// NOTE: Don't release the top-level locks -- even though this cannot happen yet with
Expand All @@ -237,6 +263,10 @@ impl ChunkStoreSubscriber for Caches {
// But since this pretty much never happens in practice, let's not go there until we
// have metrics showing that show we need to.
for (entity_path, component_name) in compacted.static_ {
if component_name == ClearIsRecursive::name() {
might_require_clearing.insert(entity_path.clone());
}

for (key, cache) in caches_latest_at.iter() {
if key.entity_path == entity_path && key.component_name == component_name {
cache.write().pending_invalidations.insert(TimeInt::STATIC);
Expand All @@ -255,6 +285,10 @@ impl ChunkStoreSubscriber for Caches {
re_tracing::profile_scope!("temporal");

for (key, times) in compacted.temporal {
if key.component_name == ClearIsRecursive::name() {
might_require_clearing.insert(key.entity_path.clone());
}

if let Some(cache) = caches_latest_at.get(&key) {
cache
.write()
Expand Down
21 changes: 21 additions & 0 deletions crates/store/re_query/src/latest_at/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ impl Caches {

let mut results = LatestAtResults::default();

// NOTE: This pre-filtering is extremely important: going through all these query layers
// has non-negligible overhead even if the final result ends up being nothing, and our
// number of queries for a frame grows linearly with the number of entity paths.
let component_names = component_names.into_iter().filter(|component_name| {
store.entity_has_component_on_timeline(&query.timeline(), entity_path, component_name)
});

// Query-time clears
// -----------------
//
Expand All @@ -70,8 +77,22 @@ impl Caches {
{
re_tracing::profile_scope!("clears");

let potential_clears = self.might_require_clearing.read();

let mut clear_entity_path = entity_path.clone();
loop {
if !potential_clears.contains(&clear_entity_path) {
// This entity does not contain any `Clear`-related data at all, there's no
// point in running actual queries.

let Some(parent_entity_path) = clear_entity_path.parent() else {
break;
};
clear_entity_path = parent_entity_path;

continue;
}

let key = CacheKey::new(
clear_entity_path.clone(),
query.timeline(),
Expand Down
7 changes: 7 additions & 0 deletions crates/store/re_query/src/range/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ impl Caches {

let mut results = RangeResults::new(query.clone());

// NOTE: This pre-filtering is extremely important: going through all these query layers
// has non-negligible overhead even if the final result ends up being nothing, and our
// number of queries for a frame grows linearly with the number of entity paths.
let component_names = component_names.into_iter().filter(|component_name| {
store.entity_has_component_on_timeline(&query.timeline(), entity_path, component_name)
});

for component_name in component_names {
let key = CacheKey::new(entity_path.clone(), query.timeline(), component_name);

Expand Down
81 changes: 81 additions & 0 deletions crates/store/re_query2/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
[package]
name = "re_query2"
authors.workspace = true
description = "High-level query APIs"
edition.workspace = true
homepage.workspace = true
include.workspace = true
license.workspace = true
publish = true
readme = "README.md"
repository.workspace = true
rust-version.workspace = true
version.workspace = true

[lints]
workspace = true

[package.metadata.docs.rs]
all-features = true


[features]
default = []

## Enable codegen helper binaries (generates ClampedZip & RangeZip implementations).
codegen = []


[dependencies]
# Rerun dependencies:
re_chunk.workspace = true
re_chunk_store.workspace = true
re_error.workspace = true
re_format.workspace = true
re_log.workspace = true
re_log_types.workspace = true
re_tracing.workspace = true
re_tuid.workspace = true
re_types_core.workspace = true

# External dependencies:
ahash.workspace = true
anyhow.workspace = true
arrow2.workspace = true
backtrace.workspace = true
indent.workspace = true
indexmap.workspace = true
itertools.workspace = true
nohash-hasher.workspace = true
parking_lot.workspace = true
paste.workspace = true
seq-macro.workspace = true
static_assertions.workspace = true
thiserror.workspace = true


[dev-dependencies]
criterion.workspace = true
mimalloc.workspace = true
rand = { workspace = true, features = ["std", "std_rng"] }
re_types.workspace = true
similar-asserts.workspace = true

[lib]
bench = false


[[bin]]
name = "clamped_zip"
required-features = ["codegen"]
bench = false

[[bin]]
name = "range_zip"
required-features = ["codegen"]
bench = false


[[bench]]
name = "latest_at"
harness = false
10 changes: 10 additions & 0 deletions crates/store/re_query2/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# re_query2

Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates.

[![Latest version](https://img.shields.io/crates/v/re_query2.svg)](https://crates.io/crates/re_query2)
[![Documentation](https://docs.rs/re_query2/badge.svg)](https://docs.rs/re_query2)
![MIT](https://img.shields.io/badge/license-MIT-blue.svg)
![Apache](https://img.shields.io/badge/license-Apache-blue.svg)

High-level query APIs.
Loading
Loading