Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweak tuning database plot and comparison scripts #2883

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/scripts/cccl/bench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,10 @@ def device_json(algname):

def get_device_name(device):
gpu_name = device["name"]
bw = device["global_memory_bus_width"]
bus_width = device["global_memory_bus_width"]
sms = device["number_of_sms"]
ecc = "eccon" if device["ecc_state"] else "eccoff"
name = "{} ({}, {}, {})".format(gpu_name, bw, sms, ecc)
name = "{} ({}, {}, {})".format(gpu_name, bus_width, sms, ecc)
return name.replace('NVIDIA ', '')


Expand Down
75 changes: 47 additions & 28 deletions benchmarks/scripts/compare.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import numpy as np
import pandas as pd

from colorama import Fore


def get_filenames_map(arr):
if not arr:
Expand Down Expand Up @@ -56,28 +58,21 @@ def alg_dfs(file):
result = {}
storage = cccl.bench.StorageBase(file)
for algname in storage.algnames():
subbench_df = None
for subbench in storage.subbenches(algname):
df = storage.alg_to_df(algname, subbench)
df = df.map(lambda x: x if is_finite(x) else np.nan)
df = df.dropna(subset=['center'], how='all')
df = filter_by_type(filter_by_offset_type(filter_by_problem_size(df)))
#TODO(bgruber): maybe expose the filters under a -p0, or --short flag
#df = filter_by_type(filter_by_offset_type(filter_by_problem_size(df)))
df['Noise'] = df['samples'].apply(lambda x: np.std(x) / np.mean(x)) * 100
df['Mean'] = df['samples'].apply(lambda x: np.mean(x))
df = df.drop(columns=['samples', 'center', 'bw', 'elapsed'])
if subbench_df is None:
subbench_df = df
else:
subbench_df = pd.concat([subbench_df, df])
fused_algname = algname + '.' + subbench
if fused_algname in result:
result[fused_algname] = pd.concat([result[fused_algname], subbench_df])
else:
result[fused_algname] = subbench_df
df = df.drop(columns=['samples', 'center', 'bw', 'elapsed', 'variant'])
fused_algname = algname.removeprefix("cub.bench.").removeprefix("thrust.bench.") + '.' + subbench
result[fused_algname] = df

for algname in result:
if result[algname]['cccl'].nunique() != 1:
raise ValueError(f"Multiple CCCL versions in one db '{algname}'")
print(f"WARNING: Multiple CCCL versions in one db '{algname}'")
result[algname] = result[algname].drop(columns=['cccl'])

return result
Expand All @@ -96,29 +91,53 @@ def parse_args():
return parser.parse_args()


config_count = 0
pass_count = 0
faster_count = 0
slower_count = 0


def status(frac_diff, noise_ref, noise_cmp):
global config_count
global pass_count
global faster_count
global slower_count
config_count += 1
min_noise = min(noise_ref, noise_cmp)
if abs(frac_diff) <= min_noise:
pass_count += 1
return Fore.BLUE + "SAME" + Fore.RESET
if frac_diff < 0:
faster_count += 1
return Fore.GREEN + "FAST" + Fore.RESET
if frac_diff > 0:
slower_count += 1
return Fore.RED + "SLOW" + Fore.RESET


def compare():
args = parse_args()
reference_df = alg_dfs(args.reference)
compare_df = alg_dfs(args.compare)
for alg in reference_df.keys() & compare_df.keys():
for alg in sorted(reference_df.keys() & compare_df.keys()):
print()
print()
print(f'# {alg}')
merge_columns = [col for col in reference_df[alg].columns if col not in ['Noise', 'Mean']]
# use every column except 'Noise', 'Mean', 'ctk', 'gpu' to match runs between reference and comparison file
merge_columns = [col for col in reference_df[alg].columns if col not in ['Noise', 'Mean', 'ctk', 'gpu']]
df = pd.merge(reference_df[alg], compare_df[alg], on=merge_columns, suffixes=('Ref', 'Cmp'))
df['Diff'] = df['MeanCmp'] - df['MeanRef']
df['FDiff'] = (df['Diff'] / df['MeanRef']) * 100

for _, row in df[['ctk', 'gpu', 'variant']].drop_duplicates().iterrows():
ctk_version = row['ctk']
variant = row['variant']
gpu = row['gpu']
case_df = df[(df['ctk'] == ctk_version) & (df['gpu'] == gpu) & (df['variant'] == variant)]
case_df = case_df.drop(columns=['ctk', 'gpu', 'variant'])
print()
print(f'## CTK {ctk_version} GPU {gpu} ({variant})')
print()
print(case_df.to_markdown(index=False))
df['Abs. Diff'] = df['MeanCmp'] - df['MeanRef']
df['Rel. Diff'] = (df['Abs. Diff'] / df['MeanRef']) * 100
df['Status'] = list(map(status, df['Rel. Diff'], df['NoiseRef'], df['NoiseCmp']))
df = df.drop(columns=['ctkRef', 'ctkCmp', 'gpuRef', 'gpuCmp'])
print()
print(df.to_markdown(index=False))

print("# Summary\n")
print("- Total Matches: %d" % config_count)
print(" - Pass (diff <= min_noise): %d" % pass_count)
print(" - Faster (diff > min_noise): %d" % faster_count)
print(" - Slower (diff > min_noise): %d" % slower_count)


if __name__ == "__main__":
Expand Down
67 changes: 36 additions & 31 deletions benchmarks/scripts/sol.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,6 @@
import matplotlib.pyplot as plt


def get_filenames_map(arr):
if not arr:
return []

prefix = arr[0]
for string in arr:
while not string.startswith(prefix):
prefix = prefix[:-1]
if not prefix:
break

return {string: string[len(prefix):] for string in arr}


def is_finite(x):
if isinstance(x, float):
return x != np.inf and x != -np.inf
Expand Down Expand Up @@ -60,28 +46,23 @@ def alg_dfs(files):
storage = cccl.bench.StorageBase(file)
for algname in storage.algnames():
for subbench in storage.subbenches(algname):
subbench_df = None
df = storage.alg_to_df(algname, subbench)
df = df.map(lambda x: x if is_finite(x) else np.nan)
df = df.dropna(subset=['center'], how='all')
df = filter_by_type(filter_by_offset_type(filter_by_problem_size(df)))
df = df.filter(items=['ctk', 'cccl', 'gpu', 'variant', 'bw'])
df['variant'] = df['variant'].astype(str) + " ({})".format(file)
df['variant'] = df['variant'].astype(str)
df['bw'] = df['bw'] * 100
if subbench_df is None:
subbench_df = df
fused_algname = algname.removeprefix("cub.bench.").removeprefix("thrust.bench.") + '.' + subbench
if fused_algname in result:
result[fused_algname] = pd.concat([result[fused_algname], df])
else:
subbench_df = pd.concat([subbench_df, df])
fused_algname = algname + '.' + subbench
if fused_algname in result:
result[fused_algname] = pd.concat([result[fused_algname], subbench_df])
else:
result[fused_algname] = subbench_df
result[fused_algname] = df

return result


def alg_bws(dfs):
def alg_bws(dfs, verbose):
medians = None
for algname in dfs:
df = dfs[algname]
Expand All @@ -90,8 +71,17 @@ def alg_bws(dfs):
medians = df
else:
medians = pd.concat([medians, df])
medians['hue'] = medians['ctk'].astype(str) + ' ' + medians['cccl'].astype(
str) + ' ' + medians['gpu'].astype(str) + ' ' + medians['variant']
# print more information if it's not unique across all runs or when requested (verbose)
medians['hue'] = ''
if verbose or medians['cccl'].unique().size > 1:
medians['hue'] = medians['hue'] + 'CCCL ' + medians['cccl'].astype(str) + ' '
gpuname = medians['gpu'] if verbose else medians['gpu'].astype(str).map(lambda x: x[:x.find('(') - 1])
medians['hue'] = medians['hue'] + gpuname + ' '
if medians['variant'].unique().size > 1:
variant = medians['variant'].astype(str).map(lambda x : (' ' + x if x != 'base' else ''))
medians['hue'] = medians['hue'] + variant + ' '
if verbose or medians['ctk'].unique().size > 1:
medians['hue'] = medians['hue'] + 'CTK ' + medians['ctk'].astype(str)
return medians.drop(columns=['ctk', 'cccl', 'gpu', 'variant'])


Expand All @@ -106,22 +96,37 @@ def plot_sol(medians, box):
ax = sns.boxenplot(data=medians, x='alg', y='bw', hue='hue')
else:
ax = sns.barplot(data=medians, x='alg', y='bw', hue='hue', errorbar=lambda x: (x.min(), x.max()))
for container in ax.containers:
ax.bar_label(container, fmt='%.1f')
ax.set_xticklabels(ax.get_xticklabels(), rotation=15, rotation_mode='anchor', ha='right')
ax.bar_label(ax.containers[0], fmt='%.1f')
for container in ax.containers[1:]:
labels = [f'{c:.1f}\n({(c/f)*100:.0f}%)' for f, c in zip(ax.containers[0].datavalues, container.datavalues)]
ax.bar_label(container, labels=labels)

ax.legend(title=None)
ax.set_xlabel('Algorithm')
ax.set_ylabel('Bandwidth (%SOL)')
ax.set_xticklabels(ax.get_xticklabels(), rotation=30, rotation_mode='anchor', ha='right')
plt.show()

def print_speedup(medians):
m = medians.groupby(['alg', 'hue'], sort=False).mean()
m['speedup'] = (m['bw'] / m.groupby(['alg'])['bw'].transform('first'))
print('# Speedups:')
print()
print(m.drop(columns='bw').sort_values(by='speedup', ascending=False).to_markdown())
Comment on lines +110 to +115
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels a bit misplaced here, but it's great to have the speedup's from the plot as a copyable table as well., since compare.py works on a much more detailed level.


def parse_args():
parser = argparse.ArgumentParser(description="Analyze benchmark results.")
parser.add_argument('files', type=file_exists, nargs='+', help='At least one file is required.')
parser.add_argument('--box', action='store_true', help='Plot box instead of bar.')
parser.add_argument('-v', action='store_true', help='Verbose legend.')
return parser.parse_args()


def sol():
args = parse_args()
plot_sol(alg_bws(alg_dfs(args.files)), args.box)
medians = alg_bws(alg_dfs(args.files), args.v)
print_speedup(medians)
plot_sol(medians, args.box)


if __name__ == "__main__":
Expand Down
Loading