Skip to content

Commit

Permalink
Add accounts hash pubkey bins to cli (solana-labs#3578)
Browse files Browse the repository at this point in the history
add accounts hash pubkey bins to cli

Co-authored-by: HaoranYi <[email protected]>
  • Loading branch information
HaoranYi and HaoranYi authored Nov 11, 2024
1 parent 7037f88 commit 26dd4ad
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 18 deletions.
22 changes: 17 additions & 5 deletions accounts-db/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ const DEFAULT_NUM_DIRS: u32 = 4;

// When calculating hashes, it is helpful to break the pubkeys found into bins based on the pubkey value.
// More bins means smaller vectors to sort, copy, etc.
pub const PUBKEY_BINS_FOR_CALCULATING_HASHES: usize = 65536;
pub const DEFAULT_HASH_CALCULATION_PUBKEY_BINS: usize = 65536;

// Without chunks, we end up with 1 output vec for each outer snapshot storage.
// This results in too many vectors to be efficient.
Expand Down Expand Up @@ -514,6 +514,7 @@ pub const ACCOUNTS_DB_CONFIG_FOR_TESTING: AccountsDbConfig = AccountsDbConfig {
num_clean_threads: None,
num_foreground_threads: None,
num_hash_threads: None,
hash_calculation_pubkey_bins: None,
};
pub const ACCOUNTS_DB_CONFIG_FOR_BENCHMARKS: AccountsDbConfig = AccountsDbConfig {
index: Some(ACCOUNTS_INDEX_CONFIG_FOR_BENCHMARKS),
Expand All @@ -538,6 +539,7 @@ pub const ACCOUNTS_DB_CONFIG_FOR_BENCHMARKS: AccountsDbConfig = AccountsDbConfig
num_clean_threads: None,
num_foreground_threads: None,
num_hash_threads: None,
hash_calculation_pubkey_bins: None,
};

pub type BinnedHashData = Vec<Vec<CalculateHashIntermediate>>;
Expand Down Expand Up @@ -651,6 +653,7 @@ pub struct AccountsDbConfig {
pub ancient_append_vec_offset: Option<i64>,
pub ancient_storage_ideal_size: Option<u64>,
pub max_ancient_storages: Option<usize>,
pub hash_calculation_pubkey_bins: Option<usize>,
pub test_skip_rewrites_but_include_in_bank_hash: bool,
pub skip_initial_hash_calc: bool,
pub exhaustively_verify_refcounts: bool,
Expand Down Expand Up @@ -1474,6 +1477,9 @@ pub struct AccountsDb {
/// true iff we want to skip the initial hash calculation on startup
pub skip_initial_hash_calc: bool,

/// The number of pubkey bins used for accounts hash calculation
pub hash_calculation_pubkey_bins: usize,

pub storage: AccountStorage,

/// from AccountsDbConfig
Expand Down Expand Up @@ -2011,6 +2017,9 @@ impl AccountsDb {
max_ancient_storages: accounts_db_config
.max_ancient_storages
.unwrap_or(DEFAULT_MAX_ANCIENT_STORAGES),
hash_calculation_pubkey_bins: accounts_db_config
.hash_calculation_pubkey_bins
.unwrap_or(DEFAULT_HASH_CALCULATION_PUBKEY_BINS),
account_indexes: accounts_db_config.account_indexes.unwrap_or_default(),
shrink_ratio: accounts_db_config.shrink_ratio,
accounts_update_notifier,
Expand Down Expand Up @@ -7258,7 +7267,7 @@ impl AccountsDb {

let bounds = Range {
start: 0,
end: PUBKEY_BINS_FOR_CALCULATING_HASHES,
end: self.hash_calculation_pubkey_bins,
};

let accounts_hasher = AccountsHasher {
Expand All @@ -7272,7 +7281,7 @@ impl AccountsDb {
&cache_hash_data,
storages,
&mut stats,
PUBKEY_BINS_FOR_CALCULATING_HASHES,
self.hash_calculation_pubkey_bins,
&bounds,
config,
);
Expand All @@ -7297,8 +7306,11 @@ impl AccountsDb {
.collect::<Vec<_>>();

// turn raw data into merkle tree hashes and sum of lamports
let (accounts_hash, capitalization) =
accounts_hasher.rest_of_hash_calculation(&cache_hash_intermediates, &mut stats);
let (accounts_hash, capitalization) = accounts_hasher.rest_of_hash_calculation(
&cache_hash_intermediates,
self.hash_calculation_pubkey_bins,
&mut stats,
);
let accounts_hash = match kind {
CalcAccountsHashKind::Full => AccountsHashKind::Full(AccountsHash(accounts_hash)),
CalcAccountsHashKind::Incremental => {
Expand Down
35 changes: 22 additions & 13 deletions accounts-db/src/accounts_hash.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use {
crate::{
accounts_db::{AccountStorageEntry, PUBKEY_BINS_FOR_CALCULATING_HASHES},
accounts_db::AccountStorageEntry,
active_stats::{ActiveStatItem, ActiveStats},
ancestors::Ancestors,
pubkey_bins::PubkeyBinCalculator24,
Expand Down Expand Up @@ -1213,13 +1213,10 @@ impl<'a> AccountsHasher<'a> {
pub fn rest_of_hash_calculation(
&self,
sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
bins: usize,
stats: &mut HashStats,
) -> (Hash, u64) {
let (hashes, total_lamports) = self.de_dup_accounts(
sorted_data_by_pubkey,
stats,
PUBKEY_BINS_FOR_CALCULATING_HASHES,
);
let (hashes, total_lamports) = self.de_dup_accounts(sorted_data_by_pubkey, stats, bins);

let cumulative = CumulativeHashesFromFiles::from_files(hashes);

Expand Down Expand Up @@ -1360,7 +1357,10 @@ impl From<IncrementalAccountsHash> for SerdeIncrementalAccountsHash {

#[cfg(test)]
mod tests {
use {super::*, itertools::Itertools, std::str::FromStr, tempfile::tempdir};
use {
super::*, crate::accounts_db::DEFAULT_HASH_CALCULATION_PUBKEY_BINS, itertools::Itertools,
std::str::FromStr, tempfile::tempdir,
};

lazy_static! {
static ref ACTIVE_STATS: ActiveStats = ActiveStats::default();
Expand Down Expand Up @@ -1606,8 +1606,11 @@ mod tests {

let dir_for_temp_cache_files = tempdir().unwrap();
let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
let result = accounts_hash
.rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
let result = accounts_hash.rest_of_hash_calculation(
&for_rest(&account_maps),
DEFAULT_HASH_CALCULATION_PUBKEY_BINS,
&mut HashStats::default(),
);
let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap();
assert_eq!((result.0, result.1), (expected_hash, 88));

Expand All @@ -1621,8 +1624,11 @@ mod tests {
};
account_maps.insert(0, val);

let result = accounts_hash
.rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
let result = accounts_hash.rest_of_hash_calculation(
&for_rest(&account_maps),
DEFAULT_HASH_CALCULATION_PUBKEY_BINS,
&mut HashStats::default(),
);
let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap();
assert_eq!((result.0, result.1), (expected_hash, 108));

Expand All @@ -1636,8 +1642,11 @@ mod tests {
};
account_maps.insert(1, val);

let result = accounts_hash
.rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
let result = accounts_hash.rest_of_hash_calculation(
&for_rest(&account_maps),
DEFAULT_HASH_CALCULATION_PUBKEY_BINS,
&mut HashStats::default(),
);
let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap();
assert_eq!((result.0, result.1), (expected_hash, 118));
}
Expand Down
13 changes: 13 additions & 0 deletions ledger-tool/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,13 @@ pub fn accounts_db_args<'a, 'b>() -> Box<[Arg<'a, 'b>]> {
.takes_value(true)
.help("The number of ancient storages the ancient slot combining should converge to.")
.hidden(hidden_unless_forced()),
Arg::with_name("accounts_db_hash_calculation_pubkey_bins")
.long("accounts-db-hash-calculation-pubkey-bins")
.value_name("USIZE")
.validator(is_parsable::<usize>)
.takes_value(true)
.help("The number of pubkey bins used for accounts hash calculation.")
.hidden(hidden_unless_forced()),
]
.into_boxed_slice()
}
Expand Down Expand Up @@ -370,6 +377,12 @@ pub fn get_accounts_db_config(
)
.ok(),
max_ancient_storages: value_t!(arg_matches, "accounts_db_max_ancient_storages", usize).ok(),
hash_calculation_pubkey_bins: value_t!(
arg_matches,
"accounts_db_hash_calculation_pubkey_bins",
usize
)
.ok(),
exhaustively_verify_refcounts: arg_matches.is_present("accounts_db_verify_refcounts"),
skip_initial_hash_calc: arg_matches.is_present("accounts_db_skip_initial_hash_calculation"),
test_partitioned_epoch_rewards,
Expand Down
9 changes: 9 additions & 0 deletions validator/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1391,6 +1391,15 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
.help("The number of ancient storages the ancient slot combining should converge to.")
.hidden(hidden_unless_forced()),
)
.arg(
Arg::with_name("accounts_db_hash_calculation_pubkey_bins")
.long("accounts-db-hash-calculation-pubkey-bins")
.value_name("USIZE")
.validator(is_parsable::<usize>)
.takes_value(true)
.help("The number of pubkey bins used for accounts hash calculation.")
.hidden(hidden_unless_forced()),
)
.arg(
Arg::with_name("accounts_db_cache_limit_mb")
.long("accounts-db-cache-limit-mb")
Expand Down
6 changes: 6 additions & 0 deletions validator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1349,6 +1349,12 @@ pub fn main() {
)
.ok(),
max_ancient_storages: value_t!(matches, "accounts_db_max_ancient_storages", usize).ok(),
hash_calculation_pubkey_bins: value_t!(
matches,
"accounts_db_hash_calculation_pubkey_bins",
usize
)
.ok(),
exhaustively_verify_refcounts: matches.is_present("accounts_db_verify_refcounts"),
create_ancient_storage,
test_partitioned_epoch_rewards,
Expand Down

0 comments on commit 26dd4ad

Please sign in to comment.