From 642976cce93bd97cd9b5eb384de6a7fa7a569fa5 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Wed, 4 Dec 2024 00:36:24 +0100 Subject: [PATCH] Tweak tuning database plot and comparison scripts (#2883) * Customize tuning sol and compare scripts * Mark compare as executable * Plot and compare each sub bench * Avoid confusing use of 'bw' * Don't filter the runs * Show relative change compared to first series * Print speedup table --- benchmarks/scripts/cccl/bench/bench.py | 4 +- benchmarks/scripts/compare.py | 75 ++++++++++++++++---------- benchmarks/scripts/sol.py | 67 ++++++++++++----------- 3 files changed, 85 insertions(+), 61 deletions(-) mode change 100644 => 100755 benchmarks/scripts/compare.py mode change 100644 => 100755 benchmarks/scripts/sol.py diff --git a/benchmarks/scripts/cccl/bench/bench.py b/benchmarks/scripts/cccl/bench/bench.py index 049dcbb6014..296d68d2aa8 100644 --- a/benchmarks/scripts/cccl/bench/bench.py +++ b/benchmarks/scripts/cccl/bench/bench.py @@ -242,10 +242,10 @@ def device_json(algname): def get_device_name(device): gpu_name = device["name"] - bw = device["global_memory_bus_width"] + bus_width = device["global_memory_bus_width"] sms = device["number_of_sms"] ecc = "eccon" if device["ecc_state"] else "eccoff" - name = "{} ({}, {}, {})".format(gpu_name, bw, sms, ecc) + name = "{} ({}, {}, {})".format(gpu_name, bus_width, sms, ecc) return name.replace('NVIDIA ', '') diff --git a/benchmarks/scripts/compare.py b/benchmarks/scripts/compare.py old mode 100644 new mode 100755 index 56a59f535ce..56567eb1604 --- a/benchmarks/scripts/compare.py +++ b/benchmarks/scripts/compare.py @@ -7,6 +7,8 @@ import numpy as np import pandas as pd +from colorama import Fore + def get_filenames_map(arr): if not arr: @@ -56,28 +58,21 @@ def alg_dfs(file): result = {} storage = cccl.bench.StorageBase(file) for algname in storage.algnames(): - subbench_df = None for subbench in storage.subbenches(algname): df = storage.alg_to_df(algname, subbench) df = df.map(lambda x: x if is_finite(x) else np.nan) df = df.dropna(subset=['center'], how='all') - df = filter_by_type(filter_by_offset_type(filter_by_problem_size(df))) + #TODO(bgruber): maybe expose the filters under a -p0, or --short flag + #df = filter_by_type(filter_by_offset_type(filter_by_problem_size(df))) df['Noise'] = df['samples'].apply(lambda x: np.std(x) / np.mean(x)) * 100 df['Mean'] = df['samples'].apply(lambda x: np.mean(x)) - df = df.drop(columns=['samples', 'center', 'bw', 'elapsed']) - if subbench_df is None: - subbench_df = df - else: - subbench_df = pd.concat([subbench_df, df]) - fused_algname = algname + '.' + subbench - if fused_algname in result: - result[fused_algname] = pd.concat([result[fused_algname], subbench_df]) - else: - result[fused_algname] = subbench_df + df = df.drop(columns=['samples', 'center', 'bw', 'elapsed', 'variant']) + fused_algname = algname.removeprefix("cub.bench.").removeprefix("thrust.bench.") + '.' + subbench + result[fused_algname] = df for algname in result: if result[algname]['cccl'].nunique() != 1: - raise ValueError(f"Multiple CCCL versions in one db '{algname}'") + print(f"WARNING: Multiple CCCL versions in one db '{algname}'") result[algname] = result[algname].drop(columns=['cccl']) return result @@ -96,29 +91,53 @@ def parse_args(): return parser.parse_args() +config_count = 0 +pass_count = 0 +faster_count = 0 +slower_count = 0 + + +def status(frac_diff, noise_ref, noise_cmp): + global config_count + global pass_count + global faster_count + global slower_count + config_count += 1 + min_noise = min(noise_ref, noise_cmp) + if abs(frac_diff) <= min_noise: + pass_count += 1 + return Fore.BLUE + "SAME" + Fore.RESET + if frac_diff < 0: + faster_count += 1 + return Fore.GREEN + "FAST" + Fore.RESET + if frac_diff > 0: + slower_count += 1 + return Fore.RED + "SLOW" + Fore.RESET + + def compare(): args = parse_args() reference_df = alg_dfs(args.reference) compare_df = alg_dfs(args.compare) - for alg in reference_df.keys() & compare_df.keys(): + for alg in sorted(reference_df.keys() & compare_df.keys()): print() print() print(f'# {alg}') - merge_columns = [col for col in reference_df[alg].columns if col not in ['Noise', 'Mean']] + # use every column except 'Noise', 'Mean', 'ctk', 'gpu' to match runs between reference and comparison file + merge_columns = [col for col in reference_df[alg].columns if col not in ['Noise', 'Mean', 'ctk', 'gpu']] df = pd.merge(reference_df[alg], compare_df[alg], on=merge_columns, suffixes=('Ref', 'Cmp')) - df['Diff'] = df['MeanCmp'] - df['MeanRef'] - df['FDiff'] = (df['Diff'] / df['MeanRef']) * 100 - - for _, row in df[['ctk', 'gpu', 'variant']].drop_duplicates().iterrows(): - ctk_version = row['ctk'] - variant = row['variant'] - gpu = row['gpu'] - case_df = df[(df['ctk'] == ctk_version) & (df['gpu'] == gpu) & (df['variant'] == variant)] - case_df = case_df.drop(columns=['ctk', 'gpu', 'variant']) - print() - print(f'## CTK {ctk_version} GPU {gpu} ({variant})') - print() - print(case_df.to_markdown(index=False)) + df['Abs. Diff'] = df['MeanCmp'] - df['MeanRef'] + df['Rel. Diff'] = (df['Abs. Diff'] / df['MeanRef']) * 100 + df['Status'] = list(map(status, df['Rel. Diff'], df['NoiseRef'], df['NoiseCmp'])) + df = df.drop(columns=['ctkRef', 'ctkCmp', 'gpuRef', 'gpuCmp']) + print() + print(df.to_markdown(index=False)) + + print("# Summary\n") + print("- Total Matches: %d" % config_count) + print(" - Pass (diff <= min_noise): %d" % pass_count) + print(" - Faster (diff > min_noise): %d" % faster_count) + print(" - Slower (diff > min_noise): %d" % slower_count) if __name__ == "__main__": diff --git a/benchmarks/scripts/sol.py b/benchmarks/scripts/sol.py old mode 100644 new mode 100755 index 9903e109405..8af2ece499c --- a/benchmarks/scripts/sol.py +++ b/benchmarks/scripts/sol.py @@ -10,20 +10,6 @@ import matplotlib.pyplot as plt -def get_filenames_map(arr): - if not arr: - return [] - - prefix = arr[0] - for string in arr: - while not string.startswith(prefix): - prefix = prefix[:-1] - if not prefix: - break - - return {string: string[len(prefix):] for string in arr} - - def is_finite(x): if isinstance(x, float): return x != np.inf and x != -np.inf @@ -60,28 +46,23 @@ def alg_dfs(files): storage = cccl.bench.StorageBase(file) for algname in storage.algnames(): for subbench in storage.subbenches(algname): - subbench_df = None df = storage.alg_to_df(algname, subbench) df = df.map(lambda x: x if is_finite(x) else np.nan) df = df.dropna(subset=['center'], how='all') df = filter_by_type(filter_by_offset_type(filter_by_problem_size(df))) df = df.filter(items=['ctk', 'cccl', 'gpu', 'variant', 'bw']) - df['variant'] = df['variant'].astype(str) + " ({})".format(file) + df['variant'] = df['variant'].astype(str) df['bw'] = df['bw'] * 100 - if subbench_df is None: - subbench_df = df + fused_algname = algname.removeprefix("cub.bench.").removeprefix("thrust.bench.") + '.' + subbench + if fused_algname in result: + result[fused_algname] = pd.concat([result[fused_algname], df]) else: - subbench_df = pd.concat([subbench_df, df]) - fused_algname = algname + '.' + subbench - if fused_algname in result: - result[fused_algname] = pd.concat([result[fused_algname], subbench_df]) - else: - result[fused_algname] = subbench_df + result[fused_algname] = df return result -def alg_bws(dfs): +def alg_bws(dfs, verbose): medians = None for algname in dfs: df = dfs[algname] @@ -90,8 +71,17 @@ def alg_bws(dfs): medians = df else: medians = pd.concat([medians, df]) - medians['hue'] = medians['ctk'].astype(str) + ' ' + medians['cccl'].astype( - str) + ' ' + medians['gpu'].astype(str) + ' ' + medians['variant'] + # print more information if it's not unique across all runs or when requested (verbose) + medians['hue'] = '' + if verbose or medians['cccl'].unique().size > 1: + medians['hue'] = medians['hue'] + 'CCCL ' + medians['cccl'].astype(str) + ' ' + gpuname = medians['gpu'] if verbose else medians['gpu'].astype(str).map(lambda x: x[:x.find('(') - 1]) + medians['hue'] = medians['hue'] + gpuname + ' ' + if medians['variant'].unique().size > 1: + variant = medians['variant'].astype(str).map(lambda x : (' ' + x if x != 'base' else '')) + medians['hue'] = medians['hue'] + variant + ' ' + if verbose or medians['ctk'].unique().size > 1: + medians['hue'] = medians['hue'] + 'CTK ' + medians['ctk'].astype(str) return medians.drop(columns=['ctk', 'cccl', 'gpu', 'variant']) @@ -106,22 +96,37 @@ def plot_sol(medians, box): ax = sns.boxenplot(data=medians, x='alg', y='bw', hue='hue') else: ax = sns.barplot(data=medians, x='alg', y='bw', hue='hue', errorbar=lambda x: (x.min(), x.max())) - for container in ax.containers: - ax.bar_label(container, fmt='%.1f') - ax.set_xticklabels(ax.get_xticklabels(), rotation=15, rotation_mode='anchor', ha='right') + ax.bar_label(ax.containers[0], fmt='%.1f') + for container in ax.containers[1:]: + labels = [f'{c:.1f}\n({(c/f)*100:.0f}%)' for f, c in zip(ax.containers[0].datavalues, container.datavalues)] + ax.bar_label(container, labels=labels) + + ax.legend(title=None) + ax.set_xlabel('Algorithm') + ax.set_ylabel('Bandwidth (%SOL)') + ax.set_xticklabels(ax.get_xticklabels(), rotation=30, rotation_mode='anchor', ha='right') plt.show() +def print_speedup(medians): + m = medians.groupby(['alg', 'hue'], sort=False).mean() + m['speedup'] = (m['bw'] / m.groupby(['alg'])['bw'].transform('first')) + print('# Speedups:') + print() + print(m.drop(columns='bw').sort_values(by='speedup', ascending=False).to_markdown()) def parse_args(): parser = argparse.ArgumentParser(description="Analyze benchmark results.") parser.add_argument('files', type=file_exists, nargs='+', help='At least one file is required.') parser.add_argument('--box', action='store_true', help='Plot box instead of bar.') + parser.add_argument('-v', action='store_true', help='Verbose legend.') return parser.parse_args() def sol(): args = parse_args() - plot_sol(alg_bws(alg_dfs(args.files)), args.box) + medians = alg_bws(alg_dfs(args.files), args.v) + print_speedup(medians) + plot_sol(medians, args.box) if __name__ == "__main__":