From 26dd4ad11c416a2c212bb60d40f5262cbb0645b5 Mon Sep 17 00:00:00 2001 From: HaoranYi <219428+HaoranYi@users.noreply.github.com> Date: Mon, 11 Nov 2024 13:37:13 -0600 Subject: [PATCH] Add accounts hash pubkey bins to cli (#3578) add accounts hash pubkey bins to cli Co-authored-by: HaoranYi --- accounts-db/src/accounts_db.rs | 22 +++++++++++++++----- accounts-db/src/accounts_hash.rs | 35 ++++++++++++++++++++------------ ledger-tool/src/args.rs | 13 ++++++++++++ validator/src/cli.rs | 9 ++++++++ validator/src/main.rs | 6 ++++++ 5 files changed, 67 insertions(+), 18 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 2620521d4286fe..35eb2efdb97416 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -127,7 +127,7 @@ const DEFAULT_NUM_DIRS: u32 = 4; // When calculating hashes, it is helpful to break the pubkeys found into bins based on the pubkey value. // More bins means smaller vectors to sort, copy, etc. -pub const PUBKEY_BINS_FOR_CALCULATING_HASHES: usize = 65536; +pub const DEFAULT_HASH_CALCULATION_PUBKEY_BINS: usize = 65536; // Without chunks, we end up with 1 output vec for each outer snapshot storage. // This results in too many vectors to be efficient. @@ -514,6 +514,7 @@ pub const ACCOUNTS_DB_CONFIG_FOR_TESTING: AccountsDbConfig = AccountsDbConfig { num_clean_threads: None, num_foreground_threads: None, num_hash_threads: None, + hash_calculation_pubkey_bins: None, }; pub const ACCOUNTS_DB_CONFIG_FOR_BENCHMARKS: AccountsDbConfig = AccountsDbConfig { index: Some(ACCOUNTS_INDEX_CONFIG_FOR_BENCHMARKS), @@ -538,6 +539,7 @@ pub const ACCOUNTS_DB_CONFIG_FOR_BENCHMARKS: AccountsDbConfig = AccountsDbConfig num_clean_threads: None, num_foreground_threads: None, num_hash_threads: None, + hash_calculation_pubkey_bins: None, }; pub type BinnedHashData = Vec>; @@ -651,6 +653,7 @@ pub struct AccountsDbConfig { pub ancient_append_vec_offset: Option, pub ancient_storage_ideal_size: Option, pub max_ancient_storages: Option, + pub hash_calculation_pubkey_bins: Option, pub test_skip_rewrites_but_include_in_bank_hash: bool, pub skip_initial_hash_calc: bool, pub exhaustively_verify_refcounts: bool, @@ -1474,6 +1477,9 @@ pub struct AccountsDb { /// true iff we want to skip the initial hash calculation on startup pub skip_initial_hash_calc: bool, + /// The number of pubkey bins used for accounts hash calculation + pub hash_calculation_pubkey_bins: usize, + pub storage: AccountStorage, /// from AccountsDbConfig @@ -2011,6 +2017,9 @@ impl AccountsDb { max_ancient_storages: accounts_db_config .max_ancient_storages .unwrap_or(DEFAULT_MAX_ANCIENT_STORAGES), + hash_calculation_pubkey_bins: accounts_db_config + .hash_calculation_pubkey_bins + .unwrap_or(DEFAULT_HASH_CALCULATION_PUBKEY_BINS), account_indexes: accounts_db_config.account_indexes.unwrap_or_default(), shrink_ratio: accounts_db_config.shrink_ratio, accounts_update_notifier, @@ -7258,7 +7267,7 @@ impl AccountsDb { let bounds = Range { start: 0, - end: PUBKEY_BINS_FOR_CALCULATING_HASHES, + end: self.hash_calculation_pubkey_bins, }; let accounts_hasher = AccountsHasher { @@ -7272,7 +7281,7 @@ impl AccountsDb { &cache_hash_data, storages, &mut stats, - PUBKEY_BINS_FOR_CALCULATING_HASHES, + self.hash_calculation_pubkey_bins, &bounds, config, ); @@ -7297,8 +7306,11 @@ impl AccountsDb { .collect::>(); // turn raw data into merkle tree hashes and sum of lamports - let (accounts_hash, capitalization) = - accounts_hasher.rest_of_hash_calculation(&cache_hash_intermediates, &mut stats); + let (accounts_hash, capitalization) = accounts_hasher.rest_of_hash_calculation( + &cache_hash_intermediates, + self.hash_calculation_pubkey_bins, + &mut stats, + ); let accounts_hash = match kind { CalcAccountsHashKind::Full => AccountsHashKind::Full(AccountsHash(accounts_hash)), CalcAccountsHashKind::Incremental => { diff --git a/accounts-db/src/accounts_hash.rs b/accounts-db/src/accounts_hash.rs index bf03e03bdcd493..5dc99b3a63fc19 100644 --- a/accounts-db/src/accounts_hash.rs +++ b/accounts-db/src/accounts_hash.rs @@ -1,6 +1,6 @@ use { crate::{ - accounts_db::{AccountStorageEntry, PUBKEY_BINS_FOR_CALCULATING_HASHES}, + accounts_db::AccountStorageEntry, active_stats::{ActiveStatItem, ActiveStats}, ancestors::Ancestors, pubkey_bins::PubkeyBinCalculator24, @@ -1213,13 +1213,10 @@ impl<'a> AccountsHasher<'a> { pub fn rest_of_hash_calculation( &self, sorted_data_by_pubkey: &[&[CalculateHashIntermediate]], + bins: usize, stats: &mut HashStats, ) -> (Hash, u64) { - let (hashes, total_lamports) = self.de_dup_accounts( - sorted_data_by_pubkey, - stats, - PUBKEY_BINS_FOR_CALCULATING_HASHES, - ); + let (hashes, total_lamports) = self.de_dup_accounts(sorted_data_by_pubkey, stats, bins); let cumulative = CumulativeHashesFromFiles::from_files(hashes); @@ -1360,7 +1357,10 @@ impl From for SerdeIncrementalAccountsHash { #[cfg(test)] mod tests { - use {super::*, itertools::Itertools, std::str::FromStr, tempfile::tempdir}; + use { + super::*, crate::accounts_db::DEFAULT_HASH_CALCULATION_PUBKEY_BINS, itertools::Itertools, + std::str::FromStr, tempfile::tempdir, + }; lazy_static! { static ref ACTIVE_STATS: ActiveStats = ActiveStats::default(); @@ -1606,8 +1606,11 @@ mod tests { let dir_for_temp_cache_files = tempdir().unwrap(); let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf()); - let result = accounts_hash - .rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default()); + let result = accounts_hash.rest_of_hash_calculation( + &for_rest(&account_maps), + DEFAULT_HASH_CALCULATION_PUBKEY_BINS, + &mut HashStats::default(), + ); let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap(); assert_eq!((result.0, result.1), (expected_hash, 88)); @@ -1621,8 +1624,11 @@ mod tests { }; account_maps.insert(0, val); - let result = accounts_hash - .rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default()); + let result = accounts_hash.rest_of_hash_calculation( + &for_rest(&account_maps), + DEFAULT_HASH_CALCULATION_PUBKEY_BINS, + &mut HashStats::default(), + ); let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap(); assert_eq!((result.0, result.1), (expected_hash, 108)); @@ -1636,8 +1642,11 @@ mod tests { }; account_maps.insert(1, val); - let result = accounts_hash - .rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default()); + let result = accounts_hash.rest_of_hash_calculation( + &for_rest(&account_maps), + DEFAULT_HASH_CALCULATION_PUBKEY_BINS, + &mut HashStats::default(), + ); let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap(); assert_eq!((result.0, result.1), (expected_hash, 118)); } diff --git a/ledger-tool/src/args.rs b/ledger-tool/src/args.rs index 38c7303acca410..9b7229d2ae23da 100644 --- a/ledger-tool/src/args.rs +++ b/ledger-tool/src/args.rs @@ -153,6 +153,13 @@ pub fn accounts_db_args<'a, 'b>() -> Box<[Arg<'a, 'b>]> { .takes_value(true) .help("The number of ancient storages the ancient slot combining should converge to.") .hidden(hidden_unless_forced()), + Arg::with_name("accounts_db_hash_calculation_pubkey_bins") + .long("accounts-db-hash-calculation-pubkey-bins") + .value_name("USIZE") + .validator(is_parsable::) + .takes_value(true) + .help("The number of pubkey bins used for accounts hash calculation.") + .hidden(hidden_unless_forced()), ] .into_boxed_slice() } @@ -370,6 +377,12 @@ pub fn get_accounts_db_config( ) .ok(), max_ancient_storages: value_t!(arg_matches, "accounts_db_max_ancient_storages", usize).ok(), + hash_calculation_pubkey_bins: value_t!( + arg_matches, + "accounts_db_hash_calculation_pubkey_bins", + usize + ) + .ok(), exhaustively_verify_refcounts: arg_matches.is_present("accounts_db_verify_refcounts"), skip_initial_hash_calc: arg_matches.is_present("accounts_db_skip_initial_hash_calculation"), test_partitioned_epoch_rewards, diff --git a/validator/src/cli.rs b/validator/src/cli.rs index 851ce6bcc31573..eb237e1d4f90de 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -1391,6 +1391,15 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> { .help("The number of ancient storages the ancient slot combining should converge to.") .hidden(hidden_unless_forced()), ) + .arg( + Arg::with_name("accounts_db_hash_calculation_pubkey_bins") + .long("accounts-db-hash-calculation-pubkey-bins") + .value_name("USIZE") + .validator(is_parsable::) + .takes_value(true) + .help("The number of pubkey bins used for accounts hash calculation.") + .hidden(hidden_unless_forced()), + ) .arg( Arg::with_name("accounts_db_cache_limit_mb") .long("accounts-db-cache-limit-mb") diff --git a/validator/src/main.rs b/validator/src/main.rs index 877081ae125235..34fd5e1227bd16 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -1349,6 +1349,12 @@ pub fn main() { ) .ok(), max_ancient_storages: value_t!(matches, "accounts_db_max_ancient_storages", usize).ok(), + hash_calculation_pubkey_bins: value_t!( + matches, + "accounts_db_hash_calculation_pubkey_bins", + usize + ) + .ok(), exhaustively_verify_refcounts: matches.is_present("accounts_db_verify_refcounts"), create_ancient_storage, test_partitioned_epoch_rewards,