diff --git a/Makefile b/Makefile index 41363118b..172e6c855 100644 --- a/Makefile +++ b/Makefile @@ -100,6 +100,18 @@ OBJ := $(filter-out $(BUILD_DIR)/access_x86_translate.o,$(OBJ)) else OBJ := $(filter-out $(BUILD_DIR)/loadDataARM.o,$(OBJ)) endif +ifeq ($(COMPILER), GCCARM) +OBJ := $(filter-out $(BUILD_DIR)/topology_cpuid.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/loadData.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86_msr.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86_pci.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86_rdpmc.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86_clientmem.o,$(OBJ)) +OBJ := $(filter-out $(BUILD_DIR)/access_x86_translate.o,$(OBJ)) +else +OBJ := $(filter-out $(BUILD_DIR)/loadDataARM.o,$(OBJ)) +endif ifeq ($(COMPILER), FCC) OBJ := $(filter-out $(BUILD_DIR)/topology_cpuid.o,$(OBJ)) OBJ := $(filter-out $(BUILD_DIR)/loadData.o,$(OBJ)) diff --git a/bench/Makefile b/bench/Makefile index ea97d7dd3..596ce04f6 100644 --- a/bench/Makefile +++ b/bench/Makefile @@ -63,6 +63,9 @@ endif ifeq ($(COMPILER),GCCARMv8) BENCH_DIR = ./armv8 endif +ifeq ($(COMPILER),GCCARM) +BENCH_DIR = ./armv8 +endif ifeq ($(COMPILER),ARMCLANG) BENCH_DIR = ./armv8 endif diff --git a/config.mk b/config.mk index 3927e6a62..2f2a1ae89 100644 --- a/config.mk +++ b/config.mk @@ -8,6 +8,7 @@ # configuration options setup steps. # Supported: GCC, CLANG, ICC, MIC (ICC), GCCX86 (for 32bit systems) # GCCARMv8, GCCARMv7 and GCCPOWER +# Since 5.3, there is a generic GCCARM target COMPILER = GCC#NO SPACE # Absolute path were to install likwid diff --git a/ext/hwloc/Makefile b/ext/hwloc/Makefile index 9ee1f1f36..10269282b 100644 --- a/ext/hwloc/Makefile +++ b/ext/hwloc/Makefile @@ -39,6 +39,9 @@ endif ifeq ($(strip $(COMPILER)), GCCARMv8) OBJ := $(filter-out $(BUILD_DIR)/topology-x86.o, $(OBJ)) endif +ifeq ($(strip $(COMPILER)), GCCARM) +OBJ := $(filter-out $(BUILD_DIR)/topology-x86.o, $(OBJ)) +endif ifeq ($(strip $(COMPILER)), ARMCLANG) OBJ := $(filter-out $(BUILD_DIR)/topology-x86.o, $(OBJ)) endif diff --git a/groups/nvidia_grace/BRANCH.txt b/groups/nvidia_grace/BRANCH.txt new file mode 100644 index 000000000..c35f37084 --- /dev/null +++ b/groups/nvidia_grace/BRANCH.txt @@ -0,0 +1,30 @@ +SHORT Branch prediction miss rate/ratio + +EVENTSET +PMC0 INST_RETIRED +PMC1 CPU_CYCLES +PMC2 BR_RETIRED +PMC3 BR_MIS_PRED_RETIRED + + +METRICS +Runtime (RDTSC) [s] time +CPI PMC1/PMC0 +Branch rate PMC2/PMC0 +Branch misprediction rate PMC3/PMC0 +Branch misprediction ratio PMC3/PMC2 +Instructions per branch PMC0/PMC2 + +LONG +Formulas: +CPI = CPU_CYCLES/INST_RETIRED +Branch rate = BR_RETIRED/INST_RETIRED +Branch misprediction rate = BR_MIS_PRED_RETIRED/INST_RETIRED +Branch misprediction ratio = BR_MIS_PRED_RETIRED/BR_RETIRED +Instructions per branch = INSTR_RETIRED_ANY/BR_RETIRED +- +The rates state how often in average a branch or a mispredicted branch occured +per instruction retired in total. The Branch misprediction ratio sets directly +into relation what ratio of all branch instruction where mispredicted. +Instructions per branch is 1/Branch rate. + diff --git a/groups/nvidia_grace/DATA.txt b/groups/nvidia_grace/DATA.txt new file mode 100644 index 000000000..40f9cb38b --- /dev/null +++ b/groups/nvidia_grace/DATA.txt @@ -0,0 +1,24 @@ +SHORT Load to store ratio + +EVENTSET +PMC0 INST_SPEC +PMC1 CPU_CYCLES +PMC2 LD_SPEC +PMC3 ST_SPEC + +METRICS +Runtime (RDTSC) [s] time +CPI PMC1/PMC0 +Load to store ratio PMC2/PMC3 +Load ratio PMC2/PMC0 +Store ratio PMC3/PMC0 + +LONG +Formulas: +CPI = CPU_CYCLES/INST_SPEC +Load to store ratio = LD_SPEC / ST_SPEC +Load ratio = LD_SPEC / INST_SPEC +Store ratio = ST_SPEC / INST_SPEC +- +This is a metric to determine your load to store ratio. + diff --git a/groups/nvidia_grace/FLOPS.txt b/groups/nvidia_grace/FLOPS.txt new file mode 100644 index 000000000..08bfcc7e8 --- /dev/null +++ b/groups/nvidia_grace/FLOPS.txt @@ -0,0 +1,19 @@ +SHORT MFLOP/s + +EVENTSET +PMC0 INST_RETIRED +PMC1 CPU_CYCLES +PMC3 FP_FIXED_OPS_SPEC +PMC4 FP_SCALE_OPS_SPEC + +METRICS +Runtime (RDTSC) [s] time +Clock [MHz] 1.E-06*PMC1/time +CPI PMC1/PMC0 +FP rate [MFLOP/s] 1E-06*(PMC3+PMC4)/time + +LONG +Formulas: +FP rate [MFLOP/s] = 1E-06*(FP_FIXED_OPS_SPEC+FP_SCALE_OPS_SPEC)/time +- +Double-precision FP rate for scalar and SVE vector operations. diff --git a/groups/nvidia_grace/L2.txt b/groups/nvidia_grace/L2.txt new file mode 100644 index 000000000..be4758533 --- /dev/null +++ b/groups/nvidia_grace/L2.txt @@ -0,0 +1,40 @@ +SHORT L2 cache bandwidth in MBytes/s + +EVENTSET +PMC0 INST_RETIRED +PMC1 CPU_CYCLES +PMC2 L1D_CACHE_REFILL +PMC3 L1D_CACHE_WB +PMC4 L1I_CACHE_REFILL + + +METRICS +Runtime (RDTSC) [s] time +CPI PMC1/PMC0 +L1D<-L2 load bandwidth [MBytes/s] 1.0E-06*(PMC2)*256.0/time +L1D<-L2 load data volume [GBytes] 1.0E-09*(PMC2)*256.0 +L1D->L2 evict bandwidth [MBytes/s] 1.0E-06*PMC3*256.0/time +L1D->L2 evict data volume [GBytes] 1.0E-09*PMC3*256.0 +L1I<-L2 load bandwidth [MBytes/s] 1.0E-06*PMC4*256.0/time +L1I<-L2 load data volume [GBytes] 1.0E-09*PMC4*256.0 +L1<->L2 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3+PMC4)*256.0/time +L1<->L2 data volume [GBytes] 1.0E-09*(PMC2+PMC3+PMC4)*256.0 + +LONG +Formulas: +CPI = CPU_CYCLES/INST_RETIRED +L1D<-L2 load bandwidth [MBytes/s] = 1.0E-06*L1D_CACHE_REFILL*256.0/time +L1D<-L2 load data volume [GBytes] = 1.0E-09*L1D_CACHE_REFILL*256.0 +L1D->L2 evict bandwidth [MBytes/s] = 1.0E-06*L1D_CACHE_WB*256.0/time +L1D->L2 evict data volume [GBytes] = 1.0E-09*L1D_CACHE_WB*256.0 +L1I<-L2 load bandwidth [MBytes/s] = 1.0E-06*L1I_CACHE_REFILL*256.0/time +L1I<-L2 load data volume [GBytes] = 1.0E-09*L1I_CACHE_REFILL*256.0 +L1<->L2 bandwidth [MBytes/s] = 1.0E-06*(L1D_CACHE_REFILL+L1D_CACHE_WB+L1I_CACHE_REFILL)*256.0/time +L1<->L2 data volume [GBytes] = 1.0E-09*(L1D_CACHE_REFILL+L1D_CACHE_WB+L1I_CACHE_REFILL)*256.0 +- +Profiling group to measure L2 cache bandwidth. The bandwidth is computed by the +number of cacheline loaded from the L2 to the L1 data cache and the writebacks from +the L1 data cache to the L2 cache. The group also outputs total data volume transfered between +L2 and L1. Note that this bandwidth also includes data transfers due to a write +allocate load on a store miss in L1 and cachelines transfered in the L1 instruction +cache. diff --git a/groups/nvidia_grace/L3.txt b/groups/nvidia_grace/L3.txt new file mode 100644 index 000000000..93ac39100 --- /dev/null +++ b/groups/nvidia_grace/L3.txt @@ -0,0 +1,34 @@ +SHORT L3 cache bandwidth in MBytes/s + +EVENTSET +PMC0 INST_RETIRED +PMC1 CPU_CYCLES +PMC2 L2D_CACHE_REFILL +PMC3 L2D_CACHE_WB + + +METRICS +Runtime (RDTSC) [s] time +CPI PMC1/PMC0 +L2D<-L3 load bandwidth [MBytes/s] 1.0E-06*(PMC2)*256.0/time +L2D<-L3 load data volume [GBytes] 1.0E-09*(PMC2)*256.0 +L2D->L3 evict bandwidth [MBytes/s] 1.0E-06*PMC3*256.0/time +L2D->L3 evict data volume [GBytes] 1.0E-09*PMC3*256.0 +L2<->L3 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3)*256.0/time +L2<->L3 data volume [GBytes] 1.0E-09*(PMC2+PMC3)*256.0 + +LONG +Formulas: +CPI = CPU_CYCLES/INST_RETIRED +L2D<-L3 load bandwidth [MBytes/s] = 1.0E-06*L2D_CACHE_REFILL*256.0/time +L2D<-L3 load data volume [GBytes] = 1.0E-09*L2D_CACHE_REFILL*256.0 +L2D->L3 evict bandwidth [MBytes/s] = 1.0E-06*L2D_CACHE_WB*256.0/time +L2D->L3 evict data volume [GBytes] = 1.0E-09*L2D_CACHE_WB*256.0 +L2<->L3 bandwidth [MBytes/s] = 1.0E-06*(L2D_CACHE_REFILL+L2D_CACHE_WB)*256.0/time +L2<->L3 data volume [GBytes] = 1.0E-09*(L2D_CACHE_REFILL+L2D_CACHE_WB)*256.0 +- +Profiling group to measure L3 cache bandwidth. The bandwidth is computed by the +number of cacheline loaded from the L3 to the L2 data cache and the writebacks from +the L2 data cache to the L3 cache. The group also outputs total data volume transfered between +L3 and L2. Note that this bandwidth also includes data transfers due to a write +allocate load on a store miss in L2. diff --git a/groups/nvidia_grace/MEM.txt b/groups/nvidia_grace/MEM.txt new file mode 100644 index 000000000..1efbed67a --- /dev/null +++ b/groups/nvidia_grace/MEM.txt @@ -0,0 +1,30 @@ +SHORT Main memory bandwidth in MBytes/s + +EVENTSET +PMC0 INST_RETIRED +PMC1 CPU_CYCLES +SCF0 CMEM_RD_DATA +SCF1 CMEM_WR_TOTAL_BYTES + +METRICS +Runtime (RDTSC) [s] time +Clock [MHz] 1.E-06*PMC1/time +CPI PMC1/PMC0 +Memory read bandwidth [MBytes/s] 1.0E-06*(SCF0)*32.0/time +Memory read data volume [GBytes] 1.0E-09*(SCF0)*32.0 +Memory write bandwidth [MBytes/s] 1.0E-06*(SCF1)/time +Memory write data volume [GBytes] 1.0E-09*(SCF1) +Memory bandwidth [MBytes/s] 1.0E-06*((SCF0*32.0)+SCF1)/time +Memory data volume [GBytes] 1.0E-09*((SCF0*32.0)+SCF1) + +LONG +Formulas: +Memory read bandwidth [MBytes/s] = 1.0E-06*CMEM_RD_DATA*32.0/runtime +Memory read data volume [GBytes] = 1.0E-09*CMEM_RD_DATA*32.0 +Memory write bandwidth [MBytes/s] = 1.0E-06*CMEM_WR_DATA*32.0/runtime +Memory write data volume [GBytes] = 1.0E-09*CMEM_WR_TOTAL_BYTES +Memory bandwidth [MBytes/s] = 1.0E-06*((CMEM_RD_DATA*32.0)+CMEM_WR_TOTAL_BYTES)/runtime +Memory data volume [GBytes] = 1.0E-09*((CMEM_RD_DATA*32.0)+CMEM_WR_TOTAL_BYTES) +- +Profiling group to measure memory bandwidth for CPU memory. The transfer unit +'beats' is 32 Bytes. diff --git a/groups/nvidia_grace/MEM_DP.txt b/groups/nvidia_grace/MEM_DP.txt new file mode 100644 index 000000000..e470ddd9d --- /dev/null +++ b/groups/nvidia_grace/MEM_DP.txt @@ -0,0 +1,49 @@ +SHORT Main memory bandwidth and FLOPs + +EVENTSET +PMC0 INST_RETIRED +PMC1 CPU_CYCLES +PMC2 FP_FIXED_OPS_SPEC +PMC3 FP_SCALE_OPS_SPEC +PMC4 FP_DP_SPEC +PMC5 SVE_INST_SPEC +SCF0 CMEM_RD_DATA +SCF1 CMEM_WR_TOTAL_BYTES + +METRICS +Runtime (RDTSC) [s] time +Clock [MHz] 1.E-06*PMC1/time +CPI PMC1/PMC0 +FP rate [MFLOP/s] 1.0E-06*(PMC2+PMC3)/time +SVE FP rate [MFLOP/s] 1.0E-06*(PMC3)/time +Scalar/NEON FP rate [MFLOP/s] 1.0E-06*(PMC2)/time +SVE ratio 100*(PMC3)/(PMC2+PMC3) +Flops per DP instr PMC3/PMC4 +Arithmetic ratio SVE 100*(PMC4)/(PMC5) +Memory read bandwidth [MBytes/s] 1.0E-06*(SCF0)*32.0/time +Memory read data volume [GBytes] 1.0E-09*(SCF0)*32.0 +Memory write bandwidth [MBytes/s] 1.0E-06*(SCF1)/time +Memory write data volume [GBytes] 1.0E-09*SCF1 +Memory bandwidth [MBytes/s] 1.0E-06*((SCF0*32.0)+SCF1)/time +Memory data volume [GBytes] 1.0E-09*((SCF0*32.0)+SCF1) + + +LONG +Formulas: +FP rate [MFLOP/s] = 1E-06*(FP_FIXED_OPS_SPEC+FP_SCALE_OPS_SPEC)/time +SVE FP rate [MFLOP/s] = 1E-06*FP_SCALE_OPS_SPEC/time +Scalar/NEON FP rate [MFLOPS/s] = 1E-06*FP_FIXED_OPS_SPEC/time +SVE ratio = 100*FP_SCALE_OPS_SPEC/(FP_FIXED_OPS_SPEC+FP_SCALE_OPS_SPEC) +Flops per DP instr = FP_SCALE_OPS_SPEC/FP_DP_SPEC +Arithmetic ratio SVE = 100*FP_SCALE_OPS_SPEC/SVE_INST_SPEC +Memory read bandwidth [MBytes/s] = 1.0E-06*CMEM_RD_DATA*32.0/runtime +Memory read data volume [GBytes] = 1.0E-09*CMEM_RD_DATA*32.0 +Memory write bandwidth [MBytes/s] = 1.0E-06*CMEM_WR_DATA*32.0/runtime +Memory write data volume [GBytes] = 1.0E-09*CMEM_WR_TOTAL_BYTES +Memory bandwidth [MBytes/s] = 1.0E-06*((CMEM_RD_DATA*32.0)+CMEM_WR_TOTAL_BYTES)/runtime +Memory data volume [GBytes] = 1.0E-09*((CMEM_RD_DATA*32.0)+CMEM_WR_TOTAL_BYTES) +- +Profiling group to measure memory bandwidth for CPU memory and +FP rate in any precision for scalar and SVE vector operations with additional +insight into DP instructions. +The transfer unit 'beats' is 32 Bytes. diff --git a/groups/nvidia_grace/MEM_HP.txt b/groups/nvidia_grace/MEM_HP.txt new file mode 100644 index 000000000..5979fa998 --- /dev/null +++ b/groups/nvidia_grace/MEM_HP.txt @@ -0,0 +1,49 @@ +SHORT Main memory bandwidth and FLOPs + +EVENTSET +PMC0 INST_RETIRED +PMC1 CPU_CYCLES +PMC2 FP_FIXED_OPS_SPEC +PMC3 FP_SCALE_OPS_SPEC +PMC4 FP_HP_SPEC +PMC5 SVE_INST_SPEC +SCF0 CMEM_RD_DATA +SCF1 CMEM_WR_TOTAL_BYTES + +METRICS +Runtime (RDTSC) [s] time +Clock [MHz] 1.E-06*PMC1/time +CPI PMC1/PMC0 +FP rate [MFLOP/s] 1.0E-06*(PMC2+PMC3)/time +SVE FP rate [MFLOP/s] 1.0E-06*(PMC3)/time +Scalar/NEON FP rate [MFLOP/s] 1.0E-06*(PMC2)/time +SVE ratio 100*(PMC3)/(PMC2+PMC3) +Flops per HP instr PMC3/PMC4 +Arithmetic ratio SVE 100*(PMC4)/(PMC5) +Memory read bandwidth [MBytes/s] 1.0E-06*(SCF0)*32.0/time +Memory read data volume [GBytes] 1.0E-09*(SCF0)*32.0 +Memory write bandwidth [MBytes/s] 1.0E-06*(SCF1)/time +Memory write data volume [GBytes] 1.0E-09*SCF1 +Memory bandwidth [MBytes/s] 1.0E-06*((SCF0*32.0)+SCF1)/time +Memory data volume [GBytes] 1.0E-09*((SCF0*32.0)+SCF1) + + +LONG +Formulas: +FP rate [MFLOP/s] = 1E-06*(FP_FIXED_OPS_SPEC+FP_SCALE_OPS_SPEC)/time +SVE FP rate [MFLOP/s] = 1E-06*FP_SCALE_OPS_SPEC/time +Scalar/NEON FP rate [MFLOPS/s] = 1E-06*FP_FIXED_OPS_SPEC/time +SVE ratio = 100*FP_SCALE_OPS_SPEC/(FP_FIXED_OPS_SPEC+FP_SCALE_OPS_SPEC) +Flops per HP instr = FP_SCALE_OPS_SPEC/FP_HP_SPEC +Arithmetic ratio SVE = 100*FP_SCALE_OPS_SPEC/SVE_INST_SPEC +Memory read bandwidth [MBytes/s] = 1.0E-06*CMEM_RD_DATA*32.0/runtime +Memory read data volume [GBytes] = 1.0E-09*CMEM_RD_DATA*32.0 +Memory write bandwidth [MBytes/s] = 1.0E-06*CMEM_WR_DATA*32.0/runtime +Memory write data volume [GBytes] = 1.0E-09*CMEM_WR_TOTAL_BYTES +Memory bandwidth [MBytes/s] = 1.0E-06*((CMEM_RD_DATA*32.0)+CMEM_WR_TOTAL_BYTES)/runtime +Memory data volume [GBytes] = 1.0E-09*((CMEM_RD_DATA*32.0)+CMEM_WR_TOTAL_BYTES) +- +Profiling group to measure memory bandwidth for CPU memory and +FP rate in any precision for scalar and SVE vector operations with additional +insight into HP instructions. +The transfer unit 'beats' is 32 Bytes. diff --git a/groups/nvidia_grace/MEM_SP.txt b/groups/nvidia_grace/MEM_SP.txt new file mode 100644 index 000000000..7efd52edf --- /dev/null +++ b/groups/nvidia_grace/MEM_SP.txt @@ -0,0 +1,49 @@ +SHORT Main memory bandwidth and FLOPs + +EVENTSET +PMC0 INST_RETIRED +PMC1 CPU_CYCLES +PMC2 FP_FIXED_OPS_SPEC +PMC3 FP_SCALE_OPS_SPEC +PMC4 FP_SP_SPEC +PMC5 SVE_INST_SPEC +SCF0 CMEM_RD_DATA +SCF1 CMEM_WR_TOTAL_BYTES + +METRICS +Runtime (RDTSC) [s] time +Clock [MHz] 1.E-06*PMC1/time +CPI PMC1/PMC0 +FP rate [MFLOP/s] 1.0E-06*(PMC2+PMC3)/time +SVE FP rate [MFLOP/s] 1.0E-06*(PMC3)/time +Scalar/NEON FP rate [MFLOP/s] 1.0E-06*(PMC2)/time +SVE ratio 100*(PMC3)/(PMC2+PMC3) +Flops per SP instr PMC3/PMC4 +Arithmetic ratio SVE 100*(PMC4)/(PMC5) +Memory read bandwidth [MBytes/s] 1.0E-06*(SCF0)*32.0/time +Memory read data volume [GBytes] 1.0E-09*(SCF0)*32.0 +Memory write bandwidth [MBytes/s] 1.0E-06*(SCF1)/time +Memory write data volume [GBytes] 1.0E-09*SCF1 +Memory bandwidth [MBytes/s] 1.0E-06*((SCF0*32.0)+SCF1)/time +Memory data volume [GBytes] 1.0E-09*((SCF0*32.0)+SCF1) + + +LONG +Formulas: +FP rate [MFLOP/s] = 1E-06*(FP_FIXED_OPS_SPEC+FP_SCALE_OPS_SPEC)/time +SVE FP rate [MFLOP/s] = 1E-06*FP_SCALE_OPS_SPEC/time +Scalar/NEON FP rate [MFLOPS/s] = 1E-06*FP_FIXED_OPS_SPEC/time +SVE ratio = 100*FP_SCALE_OPS_SPEC/(FP_FIXED_OPS_SPEC+FP_SCALE_OPS_SPEC) +Flops per SP instr = FP_SCALE_OPS_SPEC/FP_SP_SPEC +Arithmetic ratio SVE = 100*FP_SCALE_OPS_SPEC/SVE_INST_SPEC +Memory read bandwidth [MBytes/s] = 1.0E-06*CMEM_RD_DATA*32.0/runtime +Memory read data volume [GBytes] = 1.0E-09*CMEM_RD_DATA*32.0 +Memory write bandwidth [MBytes/s] = 1.0E-06*CMEM_WR_DATA*32.0/runtime +Memory write data volume [GBytes] = 1.0E-09*CMEM_WR_TOTAL_BYTES +Memory bandwidth [MBytes/s] = 1.0E-06*((CMEM_RD_DATA*32.0)+CMEM_WR_TOTAL_BYTES)/runtime +Memory data volume [GBytes] = 1.0E-09*((CMEM_RD_DATA*32.0)+CMEM_WR_TOTAL_BYTES) +- +Profiling group to measure memory bandwidth for CPU memory and +FP rate in any precision for scalar and SVE vector operations with additional +insight into SP instructions. +The transfer unit 'beats' is 32 Bytes. diff --git a/make/config_defines.mk b/make/config_defines.mk index dfeb739ff..273fc1303 100644 --- a/make/config_defines.mk +++ b/make/config_defines.mk @@ -178,6 +178,34 @@ ifeq ($(strip $(COMPILER)),GCCARMv8) BUILDFREQ := false endif endif +ifeq ($(strip $(COMPILER)),GCCARM) + ifeq ($(strip $(ACCESSMODE)),sysdaemon) + $(info Info: Compiling for ARM architecture. Changing accessmode to perf_event.) + ACCESSMODE := perf_event + DEFINES += -DLIKWID_USE_PERFEVENT + BUILDDAEMON := false + BUILDFREQ := false + endif + ifeq ($(strip $(ACCESSMODE)),accessdaemon) + $(info Info: Compiling for ARM architecture. Changing accessmode to perf_event.) + ACCESSMODE := perf_event + DEFINES += -DLIKWID_USE_PERFEVENT + BUILDDAEMON := false + BUILDFREQ := false + endif + ifeq ($(strip $(ACCESSMODE)),direct) + $(info Info: Compiling for ARM architecture. Changing accessmode to perf_event.) + ACCESSMODE := perf_event + DEFINES += -DLIKWID_USE_PERFEVENT + BUILDDAEMON := false + BUILDFREQ := false + endif + ifeq ($(strip $(ACCESSMODE)),perf_event) + DEFINES += -DLIKWID_USE_PERFEVENT + BUILDDAEMON := false + BUILDFREQ := false + endif +endif ifeq ($(strip $(COMPILER)),ARMCLANG) ifeq ($(strip $(ACCESSMODE)),sysdaemon) $(info Info: Compiling for ARMv8 architecture. Changing accessmode to perf_event.) diff --git a/src/affinity.c b/src/affinity.c index 46a599503..2124e0cb0 100644 --- a/src/affinity.c +++ b/src/affinity.c @@ -182,6 +182,8 @@ static int create_lookups() int do_cache = 1; int cachelimit = 0; int cacheIdx = -1; + int * tmp = NULL; + int num_tmp = 0; topology_init(); numa_init(); CpuTopology_t cputopo = get_cpuTopology(); @@ -246,7 +248,23 @@ static int create_lookups() sharedl3_lock = malloc(cputopo->numHWThreads * sizeof(int)); memset(sharedl3_lock, LOCK_INIT, cputopo->numHWThreads*sizeof(int)); } - + tmp = malloc(cputopo->numHWThreads * sizeof(int)); + if (!tmp) + { + if (affinity_thread2core_lookup) {free(affinity_thread2core_lookup); affinity_thread2core_lookup = NULL;} + if (affinity_thread2socket_lookup) {free(affinity_thread2socket_lookup); affinity_thread2socket_lookup = NULL;} + if (affinity_thread2sharedl3_lookup) {free(affinity_thread2sharedl3_lookup); affinity_thread2sharedl3_lookup = NULL;} + if (affinity_thread2numa_lookup) {free(affinity_thread2numa_lookup); affinity_thread2numa_lookup = NULL;} + if (affinity_thread2die_lookup) {free(affinity_thread2die_lookup); affinity_thread2die_lookup = NULL;} + if (socket_lock) {free(socket_lock); socket_lock = NULL;} + if (die_lock) {free(die_lock); die_lock = NULL;} + if (numa_lock) {free(numa_lock); numa_lock = NULL;} + if (core_lock) {free(core_lock); core_lock = NULL;} + if (tile_lock) {free(tile_lock); tile_lock = NULL;} + if (sharedl2_lock) {free(sharedl2_lock); sharedl2_lock = NULL;} + if (sharedl3_lock) {free(sharedl3_lock); sharedl3_lock = NULL;} + return -ENOMEM; + } int num_pu = cputopo->numHWThreads; if (cputopo->numCacheLevels == 0) @@ -259,6 +277,23 @@ static int create_lookups() cacheIdx = -1; } for (int pu_idx = 0; pu_idx < num_pu; pu_idx++) + { + HWThread* t = &cputopo->threadPool[pu_idx]; + int found = 0; + for (int j = 0; j < num_tmp; j++) + { + if (tmp[j] == t->packageId) + { + found = 1; + break; + } + } + if (!found) + { + tmp[num_tmp++] = t->packageId; + } + } + for (int pu_idx = 0; pu_idx < num_pu; pu_idx++) { HWThread* t = &cputopo->threadPool[pu_idx]; int hwthreadid = t->apicId; @@ -268,6 +303,17 @@ static int create_lookups() int dies_per_socket = MAX(cputopo->numDies/cputopo->numSockets, 1); affinity_thread2core_lookup[hwthreadid] = coreid; affinity_thread2socket_lookup[hwthreadid] = sockid; + for (int j = 0; j < num_tmp; j++) + { + if (affinity_thread2socket_lookup[hwthreadid] == tmp[j]) + { + if (affinity_thread2socket_lookup[hwthreadid] != j) + { + affinity_thread2socket_lookup[hwthreadid] = j; + sockid = j; + } + } + } affinity_thread2die_lookup[hwthreadid] = (sockid * dies_per_socket) + dieid; int memid = 0; for (int n = 0; n < ntopo->numberOfNodes; n++) @@ -294,7 +340,7 @@ static int create_lookups() affinity_thread2sharedl3_lookup[hwthreadid], affinity_thread2numa_lookup[hwthreadid]); } - + free(tmp); return 0; } diff --git a/src/includes/perfmon_nvidiagrace.h b/src/includes/perfmon_nvidiagrace.h new file mode 100644 index 000000000..e0f26ff70 --- /dev/null +++ b/src/includes/perfmon_nvidiagrace.h @@ -0,0 +1,43 @@ +/* + * ======================================================================================= + * + * Filename: perfmon_nvidiagrace.h + * + * Description: Header File of perfmon module for Nvidia Grace CPU + * + * Version: + * Released: + * + * Author: Thomas Gruber (tr), thomas.roehl@googlemail.com + * Project: likwid + * + * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ +#include +#include + +#include +#include +#include +#include +#include + +static int perfmon_numCountersNvidiaGrace = NUM_COUNTERS_NVIDIAGRACE; +static int perfmon_numArchEventsNvidiaGrace = NUM_ARCH_EVENTS_NVIDIAGRACE; + + + diff --git a/src/includes/perfmon_nvidiagrace_counters.h b/src/includes/perfmon_nvidiagrace_counters.h new file mode 100644 index 000000000..66c6074df --- /dev/null +++ b/src/includes/perfmon_nvidiagrace_counters.h @@ -0,0 +1,105 @@ +/* + * ======================================================================================= + * + * Filename: perfmon_nvidiagrace_counters.h + * + * Description: Counter Header File of perfmon module for Nvidia Grace CPU. + * + * Version: + * Released: + * + * Author: Thomas Gruber (tr), thomas.roehl@googlemail.com + * Project: likwid + * + * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + * + * ======================================================================================= + */ + + +#define NUM_COUNTERS_NVIDIAGRACE 41 + +static RegisterMap nvidiagrace_counter_map[NUM_COUNTERS_NVIDIAGRACE] = { + {"PMC0", PMC0, PMC, A57_PERFEVTSEL0, A57_PMC0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PMC1", PMC1, PMC, A57_PERFEVTSEL1, A57_PMC1, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PMC2", PMC2, PMC, A57_PERFEVTSEL2, A57_PMC2, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PMC3", PMC3, PMC, A57_PERFEVTSEL3, A57_PMC3, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PMC4", PMC4, PMC, A57_PERFEVTSEL4, A57_PMC4, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PMC5", PMC5, PMC, A57_PERFEVTSEL5, A57_PMC5, 0, 0, EVENT_OPTION_NONE_MASK}, + {"SCFFIX", PMC6,MBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"SCF0", PMC7, MBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"SCF1", PMC8, MBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"SCF2", PMC9, MBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"SCF3", PMC10, MBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"SCF4", PMC11, MBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"SCF5", PMC12, MBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"CNV0", PMC13, BBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"CNV1", PMC14, BBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"CNV2", PMC15, BBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"CNV3", PMC16, BBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"CNV4", PMC17, BBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"CNV5", PMC18, BBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"CNVFIX", PMC19, BBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV0C0", PMC20, QBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV0C1", PMC21, QBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV0C2", PMC22, QBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV0C3", PMC23, QBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV0C4", PMC24, QBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV0C5", PMC25, QBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV0FIX", PMC26, QBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV1C0", PMC27, SBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV1C1", PMC28, SBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV1C2", PMC29, SBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV1C3", PMC30, SBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV1C4", PMC31, SBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV1C5", PMC32, SBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"NV1FIX", PMC33, SBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PCIE0", PMC34, PBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PCIE1", PMC35, PBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PCIE2", PMC36, PBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PCIE3", PMC37, PBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PCIE4", PMC38, PBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PCIE5", PMC39, PBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, + {"PCIEFIX", PMC40, PBOX0, 0, 0, 0, 0, EVENT_OPTION_NONE_MASK}, +}; + +static BoxMap nvidiagrace_box_map[NUM_UNITS] = { + [PMC] = {A57_PERF_CONTROL_CTRL, A57_OVERFLOW_STATUS, A57_OVERFLOW_FLAGS, 0, 0, 0, 32}, + [MBOX0] = {0, 0, 0, 0, 0, 0, 32}, + [BBOX0] = {0, 0, 0, 0, 0, 0, 32}, + [QBOX0] = {0, 0, 0, 0, 0, 0, 32}, + [SBOX0] = {0, 0, 0, 0, 0, 0, 32}, + [PBOX0] = {0, 0, 0, 0, 0, 0, 32}, + [MBOX1] = {0, 0, 0, 0, 0, 0, 32}, + [BBOX1] = {0, 0, 0, 0, 0, 0, 32}, + [QBOX1] = {0, 0, 0, 0, 0, 0, 32}, + [SBOX1] = {0, 0, 0, 0, 0, 0, 32}, + [PBOX1] = {0, 0, 0, 0, 0, 0, 32}, +}; + +static char* nvidiagrace_translate_types[NUM_UNITS] = { + [PMC] = "/sys/bus/event_source/devices/armv8_pmuv3_0", + [BBOX0] = "/sys/bus/event_source/devices/nvidia_cnvlink_pmu_0", + [BBOX1] = "/sys/bus/event_source/devices/nvidia_cnvlink_pmu_1", + [QBOX0] = "/sys/bus/event_source/devices/nvidia_nvlink_c2c0_pmu_0", + [QBOX1] = "/sys/bus/event_source/devices/nvidia_nvlink_c2c0_pmu_1", + [SBOX0] = "/sys/bus/event_source/devices/nvidia_nvlink_c2c1_pmu_0", + [SBOX1] = "/sys/bus/event_source/devices/nvidia_nvlink_c2c1_pmu_1", + [PBOX0] = "/sys/bus/event_source/devices/nvidia_pcie_pmu_0", + [PBOX1] = "/sys/bus/event_source/devices/nvidia_pcie_pmu_1", + [MBOX0] = "/sys/bus/event_source/devices/nvidia_scf_pmu_0", + [MBOX1] = "/sys/bus/event_source/devices/nvidia_scf_pmu_1", +}; + diff --git a/src/includes/perfmon_nvidiagrace_events.txt b/src/includes/perfmon_nvidiagrace_events.txt new file mode 100644 index 000000000..0811e9fd1 --- /dev/null +++ b/src/includes/perfmon_nvidiagrace_events.txt @@ -0,0 +1,1368 @@ +# ======================================================================================= +# +# Filename: perfmon_nvidiagrace_events.txt +# +# Description: Event list for Nvidia Grace CPU +# +# Version: +# Released: +# +# Author: Thomas Gruber (tr), thomas.roehl@googlemail.com +# Project: likwid +# +# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg +# +# This program is free software: you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see . +# +# ======================================================================================= + +EVENT_SW_INCR 0x00 PMC +UMASK_SW_INCR 0x00 + +EVENT_L1I_CACHE_REFILL 0x01 PMC +UMASK_L1I_CACHE_REFILL 0x00 + +EVENT_L1I_TLB_REFILL 0x02 PMC +UMASK_L1I_TLB_REFILL 0x00 + +EVENT_L1D_CACHE_REFILL 0x03 PMC +UMASK_L1D_CACHE_REFILL 0x00 + +EVENT_L1D_CACHE 0x04 PMC +UMASK_L1D_CACHE 0x00 + +EVENT_L1D_TLB_REFILL 0x05 PMC +UMASK_L1D_TLB_REFILL 0x00 + +EVENT_INST_RETIRED 0x08 PMC +UMASK_INST_RETIRED 0x00 + +EVENT_EXC_TAKEN 0x09 PMC +UMASK_EXC_TAKEN 0x00 + +EVENT_EXC_RETURN 0x0A PMC +UMASK_EXC_RETURN 0x00 + +EVENT_CID_WRITE_RETIRED 0x0B PMC +UMASK_CID_WRITE_RETIRED 0x00 + +EVENT_BR_MIS_PRED 0x10 PMC +UMASK_BR_MIS_PRED 0x00 + +EVENT_CPU_CYCLES 0x11 PMC +UMASK_CPU_CYCLES 0x00 + +EVENT_BR_PRED 0x12 PMC +UMASK_BR_PRED 0x00 + +EVENT_MEM_ACCESS 0x13 PMC +UMASK_MEM_ACCESS 0x00 + +EVENT_L1I_CACHE 0x14 PMC +UMASK_L1I_CACHE 0x00 + +EVENT_L1D_CACHE_WB 0x15 PMC +UMASK_L1D_CACHE_WB 0x00 + +EVENT_L2D_CACHE 0x16 PMC +UMASK_L2D_CACHE 0x00 + +EVENT_L2D_CACHE_REFILL 0x17 PMC +UMASK_L2D_CACHE_REFILL 0x00 + +EVENT_L2D_CACHE_WB 0x18 PMC +UMASK_L2D_CACHE_WB 0x00 + +EVENT_BUS_ACCESS 0x19 PMC +UMASK_BUS_ACCESS 0x00 + +EVENT_MEMORY_ERROR 0x1A PMC +UMASK_MEMORY_ERROR 0x00 + +EVENT_INST_SPEC 0x1B PMC +UMASK_INST_SPEC 0x00 + +EVENT_TTBR_WRITE_RETIRED 0x1C PMC +UMASK_TTBR_WRITE_RETIRED 0x00 + +EVENT_BUS_CYCLES 0x1D PMC +UMASK_BUS_CYCLES 0x00 + +EVENT_COUNTER_OVERFLOW 0x1E PMC +UMASK_COUNTER_OVERFLOW 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_ALLOCATE 0x20 PMC +UMASK_L2D_CACHE_ALLOCATE 0x00 + +EVENT_CACHE_ALLOCATE 0x20 PMC +UMASK_CACHE_ALLOCATE 0x00 + +EVENT_BR_RETIRED 0x21 PMC +UMASK_BR_RETIRED 0x00 + +EVENT_BR_MIS_PRED_RETIRED 0x22 PMC +UMASK_BR_MIS_PRED_RETIRED 0x00 + +EVENT_STALL_FRONTEND 0x23 PMC +UMASK_STALL_FRONTEND 0x00 + +EVENT_STALL_BACKEND 0x24 PMC +UMASK_STALL_BACKEND 0x00 + +EVENT_L1D_TLB 0x25 PMC +UMASK_L1D_TLB 0x00 + +EVENT_L1I_TLB 0x26 PMC +UMASK_L1I_TLB 0x00 + +EVENT_L3D_CACHE_ALLOCATE 0x29 PMC +UMASK_L3D_CACHE_ALLOCATE 0x00 + +EVENT_L3D_CACHE_REFILL 0x2A PMC +UMASK_L3D_CACHE_REFILL 0x00 + +EVENT_L3D_CACHE 0x2B PMC +UMASK_L3D_CACHE 0x00 + +EVENT_L2D_TLB_REFILL 0x2D PMC +UMASK_L2D_TLB_REFILL 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_TLB 0x2F PMC +UMASK_L2D_TLB 0x00 + +EVENT_L2TLB_REQ 0x2F PMC +UMASK_L2TLB_REQ 0x00 + +EVENT_REMOTE_ACCESS 0x31 PMC +UMASK_REMOTE_ACCESS 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_DTLB_WALK 0x34 PMC +UMASK_DTLB_WALK 0x00 + +EVENT_DTLB_WLK 0x34 PMC +UMASK_DTLB_WLK 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_ITLB_WALK 0x35 PMC +UMASK_ITLB_WALK 0x00 + +EVENT_ITLB_WLK 0x35 PMC +UMASK_ITLB_WLK 0x00 + +EVENT_LL_CACHE_RD 0x36 PMC +UMASK_LL_CACHE_RD 0x00 + +EVENT_LL_CACHE_MISS_RD 0x37 PMC +UMASK_LL_CACHE_MISS_RD 0x00 + +EVENT_L1D_CACHE_LMISS_RD 0x39 PMC +UMASK_L1D_CACHE_LMISS_RD 0x00 + +EVENT_OP_RETIRED 0x3A PMC +UMASK_OP_RETIRED 0x00 + +EVENT_OP_SPEC 0x3B PMC +UMASK_OP_SPEC 0x00 + +EVENT_STALL 0x3C PMC +UMASK_STALL 0x00 + +EVENT_STALL_SLOT_BACKEND 0x3D PMC +UMASK_STALL_SLOT_BACKEND 0x00 + +EVENT_STALL_SLOT_FRONTEND 0x3E PMC +UMASK_STALL_SLOT_FRONTEND 0x00 + +EVENT_STALL_SLOT 0x3F PMC +UMASK_STALL_SLOT 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L1D_CACHE_LD 0x40 PMC +UMASK_L1D_CACHE_LD 0x00 + +EVENT_L1D_CACHE_RD 0x40 PMC +UMASK_L1D_CACHE_RD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L1D_CACHE_ST 0x41 PMC +UMASK_L1D_CACHE_ST 0x00 + +EVENT_L1D_CACHE_WR 0x41 PMC +UMASK_L1D_CACHE_WR 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L1D_CACHE_REFILL_LD 0x42 PMC +UMASK_L1D_CACHE_REFILL_LD 0x00 + +EVENT_L1D_CACHE_REFILL_RD 0x42 PMC +UMASK_L1D_CACHE_REFILL_RD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L1D_CACHE_REFILL_ST 0x43 PMC +UMASK_L1D_CACHE_REFILL_ST 0x00 + +EVENT_L1D_CACHE_REFILL_WR 0x43 PMC +UMASK_L1D_CACHE_REFILL_WR 0x00 + +EVENT_L1D_CACHE_REFILL_INNER 0x44 PMC +UMASK_L1D_CACHE_REFILL_INNER 0x00 + +EVENT_L1D_CACHE_REFILL_OUTER 0x45 PMC +UMASK_L1D_CACHE_REFILL_OUTER 0x00 + +EVENT_L1D_CACHE_WB_VICTIM 0x46 PMC +UMASK_L1D_CACHE_WB_VICTIM 0x00 + +EVENT_L1D_CACHE_WB_CLEAN 0x47 PMC +UMASK_L1D_CACHE_WB_CLEAN 0x00 + +EVENT_L1D_CACHE_INVAL 0x48 PMC +UMASK_L1D_CACHE_INVAL 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L1D_TLB_REFILL_LD 0x4C PMC +UMASK_L1D_TLB_REFILL_LD 0x00 + +EVENT_L1D_TLB_REFILL_RD 0x4C PMC +UMASK_L1D_TLB_REFILL_RD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L1D_TLB_REFILL_ST 0x4D PMC +UMASK_L1D_TLB_REFILL_ST 0x00 + +EVENT_L1D_TLB_REFILL_WR 0x4D PMC +UMASK_L1D_TLB_REFILL_WR 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L1D_TLB_LD 0x4E PMC +UMASK_L1D_TLB_LD 0x00 + +EVENT_L1D_TLB_RD 0x4E PMC +UMASK_L1D_TLB_RD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L1D_TLB_ST 0x4F PMC +UMASK_L1D_TLB_ST 0x00 + +EVENT_L1D_TLB_WR 0x4F PMC +UMASK_L1D_TLB_WR 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_LD 0x50 PMC +UMASK_L2D_CACHE_LD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_RD 0x50 PMC +UMASK_L2D_CACHE_RD 0x00 + +EVENT_CACHE_ACCESS_RD 0x50 PMC +UMASK_CACHE_ACCESS_RD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_ST 0x51 PMC +UMASK_L2D_CACHE_ST 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_WR 0x51 PMC +UMASK_L2D_CACHE_WR 0x00 + +EVENT_CACHE_ACCESS_WR 0x51 PMC +UMASK_CACHE_ACCESS_WR 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_REFILL_LD 0x52 PMC +UMASK_L2D_CACHE_REFILL_LD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_REFILL_RD 0x52 PMC +UMASK_L2D_CACHE_REFILL_RD 0x00 + +EVENT_CACHE_RD_REFILL 0x52 PMC +UMASK_CACHE_RD_REFILL 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_REFILL_ST 0x53 PMC +UMASK_L2D_CACHE_REFILL_ST 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_REFILL_WR 0x53 PMC +UMASK_L2D_CACHE_REFILL_WR 0x00 + +EVENT_CACHE_WR_REFILL 0x53 PMC +UMASK_CACHE_WR_REFILL 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_WB_VICTIM 0x56 PMC +UMASK_L2D_CACHE_WB_VICTIM 0x00 + +EVENT_CACHE_WRITEBACK_VICTIM 0x56 PMC +UMASK_CACHE_WRITEBACK_VICTIM 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_WB_CLEAN 0x57 PMC +UMASK_L2D_CACHE_WB_CLEAN 0x00 + +EVENT_CACHE_WRITEBACK_CLEAN_COH 0x57 PMC +UMASK_CACHE_WRITEBACK_CLEAN_COH 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_INVAL 0x58 PMC +UMASK_L2D_CACHE_INVAL 0x00 + +EVENT_L2CACHE_INV 0x58 PMC +UMASK_L2CACHE_INV 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_TLB_REFILL_LD 0x5C PMC +UMASK_L2D_TLB_REFILL_LD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_TLB_REFILL_RD 0x5C PMC +UMASK_L2D_TLB_REFILL_RD 0x00 + +EVENT_L2TLB_RD_REFILL 0x5C PMC +UMASK_L2TLB_RD_REFILL 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_TLB_REFILL_ST 0x5D PMC +UMASK_L2D_TLB_REFILL_ST 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_TLB_REFILL_WR 0x5D PMC +UMASK_L2D_TLB_REFILL_WR 0x00 + +EVENT_L2TLB_WR_REFILL 0x5D PMC +UMASK_L2TLB_WR_REFILL 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_TLB_LD 0x5E PMC +UMASK_L2D_TLB_LD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_TLB_RD 0x5E PMC +UMASK_L2D_TLB_RD 0x00 + +EVENT_L2TLB_RD_REQ 0x5E PMC +UMASK_L2TLB_RD_REQ 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_TLB_ST 0x5F PMC +UMASK_L2D_TLB_ST 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_TLB_WR 0x5F PMC +UMASK_L2D_TLB_WR 0x00 + +EVENT_L2TLB_WR_REQ 0x5F PMC +UMASK_L2TLB_WR_REQ 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_BUS_ACCESS_LD 0x60 PMC +UMASK_BUS_ACCESS_LD 0x00 + +EVENT_BUS_ACCESS_RD 0x60 PMC +UMASK_BUS_ACCESS_RD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_BUS_ACCESS_ST 0x61 PMC +UMASK_BUS_ACCESS_ST 0x00 + +EVENT_BUS_ACCESS_WR 0x61 PMC +UMASK_BUS_ACCESS_WR 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_MEM_ACCESS_LD 0x66 PMC +UMASK_MEM_ACCESS_LD 0x00 + +EVENT_MEM_ACCESS_RD 0x66 PMC +UMASK_MEM_ACCESS_RD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_MEM_ACCESS_ST 0x67 PMC +UMASK_MEM_ACCESS_ST 0x00 + +EVENT_MEM_ACCESS_WR 0x67 PMC +UMASK_MEM_ACCESS_WR 0x00 + +EVENT_UNALIGNED_LD_SPEC 0x68 PMC +UMASK_UNALIGNED_LD_SPEC 0x00 + +EVENT_UNALIGNED_ST_SPEC 0x69 PMC +UMASK_UNALIGNED_ST_SPEC 0x00 + +EVENT_UNALIGNED_LDST_SPEC 0x6A PMC +UMASK_UNALIGNED_LDST_SPEC 0x00 + +EVENT_LDREX_SPEC 0x6C PMC +UMASK_LDREX_SPEC 0x00 + +EVENT_STREX_PASS_SPEC 0x6D PMC +UMASK_STREX_PASS_SPEC 0x00 + +EVENT_STREX_FAIL_SPEC 0x6E PMC +UMASK_STREX_FAIL_SPEC 0x00 + +EVENT_STREX_SPEC 0x6F PMC +UMASK_STREX_SPEC 0x00 + +EVENT_LD_SPEC 0x70 PMC +UMASK_LD_SPEC 0x00 + +EVENT_ST_SPEC 0x71 PMC +UMASK_ST_SPEC 0x00 + +EVENT_DP_SPEC 0x73 PMC +UMASK_DP_SPEC 0x00 + +EVENT_ASE_SPEC 0x74 PMC +UMASK_ASE_SPEC 0x00 + +EVENT_VFP_SPEC 0x75 PMC +UMASK_VFP_SPEC 0x00 + +EVENT_PC_WRITE_SPEC 0x76 PMC +UMASK_PC_WRITE_SPEC 0x00 + +EVENT_CRYPTO_SPEC 0x77 PMC +UMASK_CRYPTO_SPEC 0x00 + +EVENT_BR_IMMED_SPEC 0x78 PMC +UMASK_BR_IMMED_SPEC 0x00 + +EVENT_BR_RETURN_SPEC 0x79 PMC +UMASK_BR_RETURN_SPEC 0x00 + +EVENT_BR_INDIRECT_SPEC 0x7A PMC +UMASK_BR_INDIRECT_SPEC 0x00 + +EVENT_ISB_SPEC 0x7C PMC +UMASK_ISB_SPEC 0x00 + +EVENT_DSB_SPEC 0x7D PMC +UMASK_DSB_SPEC 0x00 + +EVENT_DMB_SPEC 0x7E PMC +UMASK_DMB_SPEC 0x00 + +EVENT_EXC_UNDEF 0x81 PMC +UMASK_EXC_UNDEF 0x00 + +EVENT_EXC_SVC 0x82 PMC +UMASK_EXC_SVC 0x00 + +EVENT_EXC_PABORT 0x83 PMC +UMASK_EXC_PABORT 0x00 + +EVENT_EXC_DABORT 0x84 PMC +UMASK_EXC_DABORT 0x00 + +EVENT_EXC_IRQ 0x86 PMC +UMASK_EXC_IRQ 0x00 + +EVENT_EXC_FIQ 0x87 PMC +UMASK_EXC_FIQ 0x00 + +EVENT_EXC_SMC 0x88 PMC +UMASK_EXC_SMC 0x00 + +EVENT_EXC_HVC 0x8A PMC +UMASK_EXC_HVC 0x00 + +EVENT_EXC_TRAP_PABORT 0x8B PMC +UMASK_EXC_TRAP_PABORT 0x00 + +EVENT_EXC_TRAP_DABORT 0x8C PMC +UMASK_EXC_TRAP_DABORT 0x00 + +EVENT_EXC_TRAP_OTHER 0x8D PMC +UMASK_EXC_TRAP_OTHER 0x00 + +EVENT_EXC_TRAP_IRQ 0x8E PMC +UMASK_EXC_TRAP_IRQ 0x00 + +EVENT_EXC_TRAP_FIQ 0x8F PMC +UMASK_EXC_TRAP_FIQ 0x00 + +EVENT_RC_LD_SPEC 0x90 PMC +UMASK_RC_LD_SPEC 0x00 + +EVENT_RC_ST_SPEC 0x91 PMC +UMASK_RC_ST_SPEC 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L3_CACHE_LD 0xA0 PMC +UMASK_L3_CACHE_LD 0x00 + +EVENT_L3_CACHE_RD 0xA0 PMC +UMASK_L3_CACHE_RD 0x00 + +EVENT_CNT_CYCLES 0x4004 PMC +UMASK_CNT_CYCLES + +EVENT_STALL_BACKEND_MEM 0x4005 PMC +UMASK_STALL_BACKEND_MEM 0x00 + +EVENT_L1I_CACHE_LMISS 0x4006 PMC +UMASK_L1I_CACHE_LMISS 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L2D_CACHE_LMISS_LD 0x4009 PMC +UMASK_L2D_CACHE_LMISS_LD 0x00 + +EVENT_L2D_CACHE_LMISS_RD 0x4009 PMC +UMASK_L2D_CACHE_LMISS_RD 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_L3D_CACHE_LMISS_LD 0x400B PMC +UMASK_L3D_CACHE_LMISS_LD 0x00 + +EVENT_L3D_CACHE_LMISS_RD 0x400B PMC +UMASK_L3D_CACHE_LMISS_RD 0x00 + +EVENT_TRB_WRAP 0x400C PMC +UMASK_TRB_WRAP 0x00 + +EVENT_TRCEXTOUT0 0x4010 PMC +UMASK_TRCEXTOUT0 0x00 + +EVENT_TRCEXTOUT1 0x4011 PMC +UMASK_TRCEXTOUT1 0x00 + +EVENT_TRCEXTOUT2 0x4012 PMC +UMASK_TRCEXTOUT2 0x00 + +EVENT_TRCEXTOUT3 0x4013 PMC +UMASK_TRCEXTOUT3 0x00 + +EVENT_CTI_TRIGOUT4 0x4018 PMC +UMASK_CTI_TRIGOUT4 0x00 + +EVENT_CTI_TRIGOUT5 0x4019 PMC +UMASK_CTI_TRIGOUT5 0x00 + +EVENT_CTI_TRIGOUT6 0x401A PMC +UMASK_CTI_TRIGOUT6 0x00 + +EVENT_CTI_TRIGOUT7 0x401B PMC +UMASK_CTI_TRIGOUT7 0x00 + +EVENT_LDST_ALIGN_LAT 0x4020 PMC +UMASK_LDST_ALIGN_LAT 0x00 + +EVENT_LD_ALIGN_LAT 0x4021 PMC +UMASK_LD_ALIGN_LAT 0x00 + +EVENT_ST_ALIGN_LAT 0x4022 PMC +UMASK_ST_ALIGN_LAT 0x00 + +EVENT_MEM_ACCESS_CHECKED 0x4024 PMC +UMASK_MEM_ACCESS_CHECKED 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_MEM_ACCESS_LD_CHECKED 0x4025 PMC +UMASK_MEM_ACCESS_LD_CHECKED 0x00 + +EVENT_MEM_ACCESS_RD_CHECKED 0x4025 PMC +UMASK_MEM_ACCESS_RD_CHECKED 0x00 + +# Added by Thomas Gruber to fit previous architectures +EVENT_MEM_ACCESS_ST_CHECKED 0x4026 PMC +UMASK_MEM_ACCESS_ST_CHECKED 0x00 + +EVENT_MEM_ACCESS_WR_CHECKED 0x4026 PMC +UMASK_MEM_ACCESS_WR_CHECKED 0x00 + +EVENT_ASE_INST_SPEC 0x8005 PMC +UMASK_ASE_INST_SPEC 0x00 + +EVENT_SVE_INST_SPEC 0x8006 PMC +UMASK_SVE_INST_SPEC 0x00 + +EVENT_FP_HP_SPEC 0x8014 PMC +UMASK_FP_HP_SPEC 0x00 + +EVENT_FP_SP_SPEC 0x8018 PMC +UMASK_FP_SP_SPEC 0x00 + +EVENT_FP_DP_SPEC 0x801C PMC +UMASK_FP_DP_SPEC 0x00 + +EVENT_SVE_PRED_SPEC 0x8074 PMC +UMASK_SVE_PRED_SPEC 0x00 + +EVENT_SVE_PRED_EMPTY_SPEC 0x8075 PMC +UMASK_SVE_PRED_EMPTY_SPEC 0x00 + +EVENT_SVE_PRED_FULL_SPEC 0x8076 PMC +UMASK_SVE_PRED_FULL_SPEC 0x00 + +EVENT_SVE_PRED_PARTIAL_SPEC 0x8077 PMC +UMASK_SVE_PRED_PARTIAL_SPEC 0x00 + +EVENT_SVE_PRED_NOT_FULL_SPEC 0x8079 PMC +UMASK_SVE_PRED_NOT_FULL_SPEC 0x00 + +EVENT_SVE_LDFF_SPEC 0x80BC PMC +UMASK_SVE_LDFF_SPEC 0x00 + +EVENT_SVE_LDFF_FAULT_SPEC 0x80BD PMC +UMASK_SVE_LDFF_FAULT_SPEC 0x00 + +EVENT_FP_SCALE_OPS_SPEC 0x80C0 PMC +UMASK_FP_SCALE_OPS_SPEC 0x00 + +EVENT_FP_FIXED_OPS_SPEC 0x80C1 PMC +UMASK_FP_FIXED_OPS_SPEC 0x00 + +EVENT_ASE_SVE_INT8_SPEC 0x80E3 PMC +UMASK_ASE_SVE_INT8_SPEC 0x00 + +EVENT_ASE_SVE_INT16_SPEC 0x80E7 PMC +UMASK_ASE_SVE_INT16_SPEC 0x00 + +EVENT_ASE_SVE_INT32_SPEC 0x80EB PMC +UMASK_ASE_SVE_INT32_SPEC 0x00 + +EVENT_ASE_SVE_INT64_SPEC 0x80EF PMC +UMASK_ASE_SVE_INT64_SPEC 0x00 + + +######### SCF events ##################### + +EVENT_BUS_CYCLES 0x1D SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_BUS_CYCLES 0x00 + +EVENT_CYCLES 0x100000000 SCFFIX +UMASK_CYCLES 0x00 + +EVENT_CMEM_DL_ACCESS 0x1A9 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_DL_ACCESS 0x00 + +EVENT_CMEM_DL_OUTSTANDING 0x1AA SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_DL_OUTSTANDING 0x00 + +EVENT_CMEM_DL_RSP 0x1A8 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_DL_RSP 0x00 + +EVENT_CMEM_EV_ACCESS 0x1AF SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_EV_ACCESS 0x00 + +EVENT_CMEM_EV_OUTSTANDING 0x1B0 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_EV_OUTSTANDING 0x00 + +EVENT_CMEM_EV_RSP 0x1AE SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_EV_RSP 0x00 + +EVENT_CMEM_RD_ACCESS 0x1A6 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_RD_ACCESS 0x00 + +EVENT_CMEM_RD_DATA 0x1A5 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_RD_DATA 0x00 + +EVENT_CMEM_RD_OUTSTANDING 0x1A7 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_RD_OUTSTANDING 0x00 + +EVENT_CMEM_WB_ACCESS 0x1AC SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_WB_ACCESS 0x00 + +EVENT_CMEM_WB_DATA 0x1AB SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_WB_DATA 0x00 + +EVENT_CMEM_WB_OUTSTANDING 0x1AD SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_WB_OUTSTANDING 0x00 + +EVENT_CMEM_WR_ACCESS 0x1CA SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_WR_ACCESS 0x00 + +EVENT_CMEM_WR_DATA 0x1B1 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_WR_DATA 0x00 + +EVENT_CMEM_WR_OUTSTANDING 0x1B2 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_WR_OUTSTANDING 0x00 + +EVENT_CMEM_WR_TOTAL_BYTES 0x1DB SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_CMEM_WR_TOTAL_BYTES 0x00 + +EVENT_GMEM_DL_ACCESS 0x171 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_DL_ACCESS 0x00 + +EVENT_GMEM_DL_OUTSTANDING 0x172 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_DL_OUTSTANDING 0x00 + +EVENT_GMEM_DL_RSP 0x170 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_DL_RSP 0x00 + +EVENT_GMEM_EV_ACCESS 0x177 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_EV_ACCESS 0x00 + +EVENT_GMEM_EV_OUTSTANDING 0x178 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_EV_OUTSTANDING 0x00 + +EVENT_GMEM_EV_RSP 0x176 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_EV_RSP 0x00 + +EVENT_GMEM_RD_ACCESS 0x16e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_RD_ACCESS 0x00 + +EVENT_GMEM_RD_DATA 0x16d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_RD_DATA 0x00 + +EVENT_GMEM_RD_OUTSTANDING 0x16f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_RD_OUTSTANDING 0x00 + +EVENT_GMEM_WB_ACCESS 0x174 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_WB_ACCESS 0x00 + +EVENT_GMEM_WB_DATA 0x173 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_WB_DATA 0x00 + +EVENT_GMEM_WB_OUTSTANDING 0x175 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_WB_OUTSTANDING 0x00 + +EVENT_GMEM_WR_ACCESS 0x17b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_WR_ACCESS 0x00 + +EVENT_GMEM_WR_DATA 0x179 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_WR_DATA 0x00 + +EVENT_GMEM_WR_OUTSTANDING 0x17a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_WR_OUTSTANDING 0x00 + +EVENT_GMEM_WR_TOTAL_BYTES 0x1a0 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_GMEM_WR_TOTAL_BYTES 0x00 + +EVENT_OCU_0_CMEM_RD_ACCESS 0x1b7 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_CMEM_RD_ACCESS 0x00 + +EVENT_OCU_0_CMEM_RD_DATA 0x1b3 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_CMEM_RD_DATA 0x00 + +EVENT_OCU_0_CMEM_RD_OUTSTANDING 0x1bf SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_CMEM_RD_OUTSTANDING 0x00 + +EVENT_OCU_0_CMEM_WB_ACCESS 0x1bb SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_CMEM_WB_ACCESS 0x00 + +EVENT_OCU_0_CMEM_WB_DATA 0x1cf SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_CMEM_WB_DATA 0x00 + +EVENT_OCU_0_CMEM_WB_OUTSTANDING 0x1d7 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_CMEM_WB_OUTSTANDING 0x00 + +EVENT_OCU_0_CMEM_WR_ACCESS 0x1cb SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_CMEM_WR_ACCESS 0x00 + +EVENT_OCU_0_CMEM_WR_DATA 0x1d3 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_CMEM_WR_DATA 0x00 + +EVENT_OCU_0_CMEM_WR_OUTSTANDING 0x1c3 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_CMEM_WR_OUTSTANDING 0x00 + +EVENT_OCU_0_GMEM_RD_ACCESS 0x149 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_GMEM_RD_ACCESS 0x00 + +EVENT_OCU_0_GMEM_RD_DATA 0x145 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_GMEM_RD_DATA 0x00 + +EVENT_OCU_0_GMEM_RD_OUTSTANDING 0x151 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_GMEM_RD_OUTSTANDING 0x00 + +EVENT_OCU_0_GMEM_WB_ACCESS 0x14d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_GMEM_WB_ACCESS 0x00 + +EVENT_OCU_0_GMEM_WB_DATA 0x184 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_GMEM_WB_DATA 0x00 + +EVENT_OCU_0_GMEM_WB_OUTSTANDING 0x18c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_GMEM_WB_OUTSTANDING 0x00 + +EVENT_OCU_0_GMEM_WR_ACCESS 0x188 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_GMEM_WR_ACCESS 0x00 + +EVENT_OCU_0_GMEM_WR_DATA 0x180 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_GMEM_WR_DATA 0x00 + +EVENT_OCU_0_GMEM_WR_OUTSTANDING 0x155 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_GMEM_WR_OUTSTANDING 0x00 + +EVENT_OCU_0_REM_RD_ACCESS 0x15d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_REM_RD_ACCESS 0x00 + +EVENT_OCU_0_REM_RD_DATA 0x159 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_REM_RD_DATA 0x00 + +EVENT_OCU_0_REM_RD_OUTSTANDING 0x165 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_REM_RD_OUTSTANDING 0x00 + +EVENT_OCU_0_REM_WB_ACCESS 0x161 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_REM_WB_ACCESS 0x00 + +EVENT_OCU_0_REM_WB_DATA 0x194 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_REM_WB_DATA 0x00 + +EVENT_OCU_0_REM_WB_OUTSTANDING 0x19c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_REM_WB_OUTSTANDING 0x00 + +EVENT_OCU_0_REM_WR_ACCESS 0x198 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_REM_WR_ACCESS 0x00 + +EVENT_OCU_0_REM_WR_DATA 0x190 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_REM_WR_DATA 0x00 + +EVENT_OCU_0_REM_WR_OUTSTANDING 0x169 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_0_REM_WR_OUTSTANDING 0x00 + +EVENT_OCU_1_CMEM_RD_ACCESS 0x1b8 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_CMEM_RD_ACCESS 0x00 + +EVENT_OCU_1_CMEM_RD_DATA 0x1b4 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_CMEM_RD_DATA 0x00 + +EVENT_OCU_1_CMEM_RD_OUTSTANDING 0x1c0 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_CMEM_RD_OUTSTANDING 0x00 + +EVENT_OCU_1_CMEM_WB_ACCESS 0x1bc SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_CMEM_WB_ACCESS 0x00 + +EVENT_OCU_1_CMEM_WB_DATA 0x1d0 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_CMEM_WB_DATA 0x00 + +EVENT_OCU_1_CMEM_WB_OUTSTANDING 0x1d8 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_CMEM_WB_OUTSTANDING 0x00 + +EVENT_OCU_1_CMEM_WR_ACCESS 0x1cc SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_CMEM_WR_ACCESS 0x00 + +EVENT_OCU_1_CMEM_WR_DATA 0x1d4 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_CMEM_WR_DATA 0x00 + +EVENT_OCU_1_CMEM_WR_OUTSTANDING 0x1c4 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_CMEM_WR_OUTSTANDING 0x00 + +EVENT_OCU_1_GMEM_RD_ACCESS 0x14a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_GMEM_RD_ACCESS 0x00 + +EVENT_OCU_1_GMEM_RD_DATA 0x146 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_GMEM_RD_DATA 0x00 + +EVENT_OCU_1_GMEM_RD_OUTSTANDING 0x152 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_GMEM_RD_OUTSTANDING 0x00 + +EVENT_OCU_1_GMEM_WB_ACCESS 0x14e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_GMEM_WB_ACCESS 0x00 + +EVENT_OCU_1_GMEM_WB_DATA 0x185 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_GMEM_WB_DATA 0x00 + +EVENT_OCU_1_GMEM_WB_OUTSTANDING 0x18d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_GMEM_WB_OUTSTANDING 0x00 + +EVENT_OCU_1_GMEM_WR_ACCESS 0x189 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_GMEM_WR_ACCESS 0x00 + +EVENT_OCU_1_GMEM_WR_DATA 0x181 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_GMEM_WR_DATA 0x00 + +EVENT_OCU_1_GMEM_WR_OUTSTANDING 0x156 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_GMEM_WR_OUTSTANDING 0x00 + +EVENT_OCU_1_REM_RD_ACCESS 0x15e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_REM_RD_ACCESS 0x00 + +EVENT_OCU_1_REM_RD_DATA 0x15a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_REM_RD_DATA 0x00 + +EVENT_OCU_1_REM_RD_OUTSTANDING 0x166 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_REM_RD_OUTSTANDING 0x00 + +EVENT_OCU_1_REM_WB_ACCESS 0x162 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_REM_WB_ACCESS 0x00 + +EVENT_OCU_1_REM_WB_DATA 0x195 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_REM_WB_DATA 0x00 + +EVENT_OCU_1_REM_WB_OUTSTANDING 0x19d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_REM_WB_OUTSTANDING 0x00 + +EVENT_OCU_1_REM_WR_ACCESS 0x199 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_REM_WR_ACCESS 0x00 + +EVENT_OCU_1_REM_WR_DATA 0x191 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_REM_WR_DATA 0x00 + +EVENT_OCU_1_REM_WR_OUTSTANDING 0x16a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_1_REM_WR_OUTSTANDING 0x00 + +EVENT_OCU_2_CMEM_RD_ACCESS 0x1b9 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_CMEM_RD_ACCESS 0x00 + +EVENT_OCU_2_CMEM_RD_DATA 0x1b5 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_CMEM_RD_DATA 0x00 + +EVENT_OCU_2_CMEM_RD_OUTSTANDING 0x1c1 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_CMEM_RD_OUTSTANDING 0x00 + +EVENT_OCU_2_CMEM_WB_ACCESS 0x1bd SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_CMEM_WB_ACCESS 0x00 + +EVENT_OCU_2_CMEM_WB_DATA 0x1d1 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_CMEM_WB_DATA 0x00 + +EVENT_OCU_2_CMEM_WB_OUTSTANDING 0x1d9 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_CMEM_WB_OUTSTANDING 0x00 + +EVENT_OCU_2_CMEM_WR_ACCESS 0x1cd SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_CMEM_WR_ACCESS 0x00 + +EVENT_OCU_2_CMEM_WR_DATA 0x1d5 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_CMEM_WR_DATA 0x00 + +EVENT_OCU_2_CMEM_WR_OUTSTANDING 0x1c5 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_CMEM_WR_OUTSTANDING 0x00 + +EVENT_OCU_2_GMEM_RD_ACCESS 0x14b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_GMEM_RD_ACCESS 0x00 + +EVENT_OCU_2_GMEM_RD_DATA 0x147 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_GMEM_RD_DATA 0x00 + +EVENT_OCU_2_GMEM_RD_OUTSTANDING 0x153 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_GMEM_RD_OUTSTANDING 0x00 + +EVENT_OCU_2_GMEM_WB_ACCESS 0x14f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_GMEM_WB_ACCESS 0x00 + +EVENT_OCU_2_GMEM_WB_DATA 0x186 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_GMEM_WB_DATA 0x00 + +EVENT_OCU_2_GMEM_WB_OUTSTANDING 0x18e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_GMEM_WB_OUTSTANDING 0x00 + +EVENT_OCU_2_GMEM_WR_ACCESS 0x18a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_GMEM_WR_ACCESS 0x00 + +EVENT_OCU_2_GMEM_WR_DATA 0x182 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_GMEM_WR_DATA 0x00 + +EVENT_OCU_2_GMEM_WR_OUTSTANDING 0x157 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_GMEM_WR_OUTSTANDING 0x00 + +EVENT_OCU_2_REM_RD_ACCESS 0x15f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_REM_RD_ACCESS 0x00 + +EVENT_OCU_2_REM_RD_DATA 0x15b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_REM_RD_DATA 0x00 + +EVENT_OCU_2_REM_RD_OUTSTANDING 0x167 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_REM_RD_OUTSTANDING 0x00 + +EVENT_OCU_2_REM_WB_ACCESS 0x163 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_REM_WB_ACCESS 0x00 + +EVENT_OCU_2_REM_WB_DATA 0x196 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_REM_WB_DATA 0x00 + +EVENT_OCU_2_REM_WB_OUTSTANDING 0x19e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_REM_WB_OUTSTANDING 0x00 + +EVENT_OCU_2_REM_WR_ACCESS 0x19a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_REM_WR_ACCESS 0x00 + +EVENT_OCU_2_REM_WR_DATA 0x192 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_REM_WR_DATA 0x00 + +EVENT_OCU_2_REM_WR_OUTSTANDING 0x16b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_2_REM_WR_OUTSTANDING 0x00 + +EVENT_OCU_3_CMEM_RD_ACCESS 0x1ba SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_CMEM_RD_ACCESS 0x00 + +EVENT_OCU_3_CMEM_RD_DATA 0x1b6 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_CMEM_RD_DATA 0x00 + +EVENT_OCU_3_CMEM_RD_OUTSTANDING 0x1c2 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_CMEM_RD_OUTSTANDING 0x00 + +EVENT_OCU_3_CMEM_WB_ACCESS 0x1be SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_CMEM_WB_ACCESS 0x00 + +EVENT_OCU_3_CMEM_WB_DATA 0x1d2 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_CMEM_WB_DATA 0x00 + +EVENT_OCU_3_CMEM_WB_OUTSTANDING 0x1da SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_CMEM_WB_OUTSTANDING 0x00 + +EVENT_OCU_3_CMEM_WR_ACCESS 0x1ce SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_CMEM_WR_ACCESS 0x00 + +EVENT_OCU_3_CMEM_WR_DATA 0x1d6 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_CMEM_WR_DATA 0x00 + +EVENT_OCU_3_CMEM_WR_OUTSTANDING 0x1c6 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_CMEM_WR_OUTSTANDING 0x00 + +EVENT_OCU_3_GMEM_RD_ACCESS 0x14c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_GMEM_RD_ACCESS 0x00 + +EVENT_OCU_3_GMEM_RD_DATA 0x148 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_GMEM_RD_DATA 0x00 + +EVENT_OCU_3_GMEM_RD_OUTSTANDING 0x154 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_GMEM_RD_OUTSTANDING 0x00 + +EVENT_OCU_3_GMEM_WB_ACCESS 0x150 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_GMEM_WB_ACCESS 0x00 + +EVENT_OCU_3_GMEM_WB_DATA 0x187 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_GMEM_WB_DATA 0x00 + +EVENT_OCU_3_GMEM_WB_OUTSTANDING 0x18f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_GMEM_WB_OUTSTANDING 0x00 + +EVENT_OCU_3_GMEM_WR_ACCESS 0x18b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_GMEM_WR_ACCESS 0x00 + +EVENT_OCU_3_GMEM_WR_DATA 0x183 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_GMEM_WR_DATA 0x00 + +EVENT_OCU_3_GMEM_WR_OUTSTANDING 0x158 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_GMEM_WR_OUTSTANDING 0x00 + +EVENT_OCU_3_REM_RD_ACCESS 0x160 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_REM_RD_ACCESS 0x00 + +EVENT_OCU_3_REM_RD_DATA 0x15c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_REM_RD_DATA 0x00 + +EVENT_OCU_3_REM_RD_OUTSTANDING 0x168 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_REM_RD_OUTSTANDING 0x00 + +EVENT_OCU_3_REM_WB_ACCESS 0x164 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_REM_WB_ACCESS 0x00 + +EVENT_OCU_3_REM_WB_DATA 0x197 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_REM_WB_DATA 0x00 + +EVENT_OCU_3_REM_WB_OUTSTANDING 0x19f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_REM_WB_OUTSTANDING 0x00 + +EVENT_OCU_3_REM_WR_ACCESS 0x19b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_REM_WR_ACCESS 0x00 + +EVENT_OCU_3_REM_WR_DATA 0x193 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_REM_WR_DATA 0x00 + +EVENT_OCU_3_REM_WR_OUTSTANDING 0x16c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_3_REM_WR_OUTSTANDING 0x00 + +EVENT_OCU_PRB_ACCESS 0x1c7 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_PRB_ACCESS 0x00 + +EVENT_OCU_PRB_DATA 0x1c8 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_PRB_DATA 0x00 + +EVENT_OCU_PRB_OUTSTANDING 0x1c9 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_OCU_PRB_OUTSTANDING 0x00 + +EVENT_REMOTE_SOCKET_RD_ACCESS 0x1a4 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_REMOTE_SOCKET_RD_ACCESS 0x00 + +EVENT_REMOTE_SOCKET_RD_DATA 0x1a2 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_REMOTE_SOCKET_RD_DATA 0x00 + +EVENT_REMOTE_SOCKET_RD_OUTSTANDING 0x1a3 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_REMOTE_SOCKET_RD_OUTSTANDING 0x00 + +EVENT_REMOTE_SOCKET_WR_TOTAL_BYTES 0x1a1 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_REMOTE_SOCKET_WR_TOTAL_BYTES 0x00 + +EVENT_SCF_CACHE 0xf2 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SCF_CACHE 0x00 + +EVENT_SCF_CACHE_ALLOCATE 0xf0 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SCF_CACHE_ALLOCATE 0x00 + +EVENT_SCF_CACHE_REFILL 0xf1 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SCF_CACHE_REFILL 0x00 + +EVENT_SCF_CACHE_WB 0xf3 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SCF_CACHE_WB 0x00 + +EVENT_SOCKET_0_DL_ACCESS 0x131 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_DL_ACCESS 0x00 + +EVENT_SOCKET_0_DL_OUTSTANDING 0x119 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_DL_OUTSTANDING 0x00 + +EVENT_SOCKET_0_DL_RSP 0x105 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_DL_RSP 0x00 + +EVENT_SOCKET_0_EV_ACCESS 0x13d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_EV_ACCESS 0x00 + +EVENT_SOCKET_0_EV_OUTSTANDING 0x125 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_EV_OUTSTANDING 0x00 + +EVENT_SOCKET_0_EV_RSP 0x10d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_EV_RSP 0x00 + +EVENT_SOCKET_0_PRB_ACCESS 0x141 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_PRB_ACCESS 0x00 + +EVENT_SOCKET_0_PRB_DATA 0x111 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_PRB_DATA 0x00 + +EVENT_SOCKET_0_PRB_OUTSTANDING 0x129 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_PRB_OUTSTANDING 0x00 + +EVENT_SOCKET_0_RD_ACCESS 0x12d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_RD_ACCESS 0x00 + +EVENT_SOCKET_0_RD_DATA 0x101 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_RD_DATA 0x00 + +EVENT_SOCKET_0_RD_OUTSTANDING 0x115 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_RD_OUTSTANDING 0x00 + +EVENT_SOCKET_0_WB_ACCESS 0x135 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_WB_ACCESS 0x00 + +EVENT_SOCKET_0_WB_DATA 0x109 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_WB_DATA 0x00 + +EVENT_SOCKET_0_WB_OUTSTANDING 0x11d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_WB_OUTSTANDING 0x00 + +EVENT_SOCKET_0_WR_ACCESS 0x139 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_WR_ACCESS 0x00 + +EVENT_SOCKET_0_WR_DATA 0x17c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_WR_DATA 0x00 + +EVENT_SOCKET_0_WR_OUTSTANDING 0x121 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_0_WR_OUTSTANDING 0x00 + +EVENT_SOCKET_1_DL_ACCESS 0x132 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_DL_ACCESS 0x00 + +EVENT_SOCKET_1_DL_OUTSTANDING 0x11a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_DL_OUTSTANDING 0x00 + +EVENT_SOCKET_1_DL_RSP 0x106 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_DL_RSP 0x00 + +EVENT_SOCKET_1_EV_ACCESS 0x13e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_EV_ACCESS 0x00 + +EVENT_SOCKET_1_EV_OUTSTANDING 0x126 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_EV_OUTSTANDING 0x00 + +EVENT_SOCKET_1_EV_RSP 0x10e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_EV_RSP 0x00 + +EVENT_SOCKET_1_PRB_ACCESS 0x142 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_PRB_ACCESS 0x00 + +EVENT_SOCKET_1_PRB_DATA 0x112 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_PRB_DATA 0x00 + +EVENT_SOCKET_1_PRB_OUTSTANDING 0x12a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_PRB_OUTSTANDING 0x00 + +EVENT_SOCKET_1_RD_ACCESS 0x12e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_RD_ACCESS 0x00 + +EVENT_SOCKET_1_RD_DATA 0x102 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_RD_DATA 0x00 + +EVENT_SOCKET_1_RD_OUTSTANDING 0x116 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_RD_OUTSTANDING 0x00 + +EVENT_SOCKET_1_WB_ACCESS 0x136 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_WB_ACCESS 0x00 + +EVENT_SOCKET_1_WB_DATA 0x10a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_WB_DATA 0x00 + +EVENT_SOCKET_1_WB_OUTSTANDING 0x11e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_WB_OUTSTANDING 0x00 + +EVENT_SOCKET_1_WR_ACCESS 0x13a SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_WR_ACCESS 0x00 + +EVENT_SOCKET_1_WR_DATA 0x17d SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_WR_DATA 0x00 + +EVENT_SOCKET_1_WR_OUTSTANDING 0x122 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_1_WR_OUTSTANDING 0x00 + +EVENT_SOCKET_2_DL_ACCESS 0x133 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_DL_ACCESS 0x00 + +EVENT_SOCKET_2_DL_OUTSTANDING 0x11b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_DL_OUTSTANDING 0x00 + +EVENT_SOCKET_2_DL_RSP 0x107 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_DL_RSP 0x00 + +EVENT_SOCKET_2_EV_ACCESS 0x13f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_EV_ACCESS 0x00 + +EVENT_SOCKET_2_EV_OUTSTANDING 0x127 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_EV_OUTSTANDING 0x00 + +EVENT_SOCKET_2_EV_RSP 0x10f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_EV_RSP 0x00 + +EVENT_SOCKET_2_PRB_ACCESS 0x143 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_PRB_ACCESS 0x00 + +EVENT_SOCKET_2_PRB_DATA 0x113 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_PRB_DATA 0x00 + +EVENT_SOCKET_2_PRB_OUTSTANDING 0x12b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_PRB_OUTSTANDING 0x00 + +EVENT_SOCKET_2_RD_ACCESS 0x12f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_RD_ACCESS 0x00 + +EVENT_SOCKET_2_RD_DATA 0x103 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_RD_DATA 0x00 + +EVENT_SOCKET_2_RD_OUTSTANDING 0x117 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_RD_OUTSTANDING 0x00 + +EVENT_SOCKET_2_WB_ACCESS 0x137 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_WB_ACCESS 0x00 + +EVENT_SOCKET_2_WB_DATA 0x10b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_WB_DATA 0x00 + +EVENT_SOCKET_2_WB_OUTSTANDING 0x11f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_WB_OUTSTANDING 0x00 + +EVENT_SOCKET_2_WR_ACCESS 0x13b SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_WR_ACCESS 0x00 + +EVENT_SOCKET_2_WR_DATA 0x17e SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_WR_DATA 0x00 + +EVENT_SOCKET_2_WR_OUTSTANDING 0x123 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_2_WR_OUTSTANDING 0x00 + +EVENT_SOCKET_3_DL_ACCESS 0x134 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_DL_ACCESS 0x00 + +EVENT_SOCKET_3_DL_OUTSTANDING 0x11c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_DL_OUTSTANDING 0x00 + +EVENT_SOCKET_3_DL_RSP 0x108 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_DL_RSP 0x00 + +EVENT_SOCKET_3_EV_ACCESS 0x140 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_EV_ACCESS 0x00 + +EVENT_SOCKET_3_EV_OUTSTANDING 0x128 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_EV_OUTSTANDING 0x00 + +EVENT_SOCKET_3_EV_RSP 0x110 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_EV_RSP 0x00 + +EVENT_SOCKET_3_PRB_ACCESS 0x144 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_PRB_ACCESS 0x00 + +EVENT_SOCKET_3_PRB_DATA 0x114 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_PRB_DATA 0x00 + +EVENT_SOCKET_3_PRB_OUTSTANDING 0x12c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_PRB_OUTSTANDING 0x00 + +EVENT_SOCKET_3_RD_ACCESS 0x130 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_RD_ACCESS 0x00 + +EVENT_SOCKET_3_RD_DATA 0x104 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_RD_DATA 0x00 + +EVENT_SOCKET_3_RD_OUTSTANDING 0x118 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_RD_OUTSTANDING 0x00 + +EVENT_SOCKET_3_WB_ACCESS 0x138 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_WB_ACCESS 0x00 + +EVENT_SOCKET_3_WB_DATA 0x10c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_WB_DATA 0x00 + +EVENT_SOCKET_3_WB_OUTSTANDING 0x120 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_WB_OUTSTANDING 0x00 + +EVENT_SOCKET_3_WR_ACCESS 0x13c SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_WR_ACCESS 0x00 + +EVENT_SOCKET_3_WR_DATA 0x17f SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_WR_DATA 0x00 + +EVENT_SOCKET_3_WR_OUTSTANDING 0x124 SCF0|SCF1|SCF2|SCF3|SCF4|SCF5 +UMASK_SOCKET_3_WR_OUTSTANDING 0x00 + + +############## (C)NvLink events ################ + +EVENT_CYCLES 0x100000000 CNVFIX|NV0FIX|NV1FIX|PCIEFIX +UMASK_CYCLES 0x00 + +EVENT_RD_BYTES_LOC 0x00 CNV|NV0C|NV1C|PCIE +UMASK_RD_BYTES_LOC 0x00 + +EVENT_RD_BYTES_REM 0x01 CNV|NV0C|NV1C|PCIE +UMASK_RD_BYTES_REM 0x00 + +EVENT_RD_CUM_OUTS_LOC 0x0C CNV|NV0C|NV1C|PCIE +UMASK_RD_CUM_OUTS_LOC 0x00 + +EVENT_RD_CUM_OUTS_REM 0x0D CNV|NV0C|NV1C|PCIE +UMASK_RD_CUM_OUTS_REM 0x00 + +EVENT_RD_REQ_LOC 0x06 CNV|NV0C|NV1C|PCIE +UMASK_RD_REQ_LOC 0x00 + +EVENT_RD_REQ_REM 0x07 CNV|NV0C|NV1C|PCIE +UMASK_RD_REQ_REM 0x00 + +EVENT_TOTAL_BYTES_LOC 0x04 CNV|NV0C|NV1C|PCIE +UMASK_TOTAL_BYTES_LOC 0x00 + +EVENT_TOTAL_BYTES_REM 0x05 CNV|NV0C|NV1C|PCIE +UMASK_TOTAL_BYTES_REM 0x00 + +EVENT_TOTAL_REQ_LOC 0x0A CNV|NV0C|NV1C|PCIE +UMASK_TOTAL_REQ_LOC 0x00 + +EVENT_TOTAL_REQ_REM 0x0B CNV|NV0C|NV1C|PCIE +UMASK_TOTAL_REQ_REM 0x00 + +EVENT_WR_BYTES_LOC 0x02 CNV|NV0C|NV1C|PCIE +UMASK_WR_BYTES_LOC 0x00 + +EVENT_WR_BYTES_REM 0x03 CNV|NV0C|NV1C|PCIE +UMASK_WR_BYTES_REM 0x00 + +EVENT_WR_REQ_LOC 0x08 CNV|NV0C|NV1C|PCIE +UMASK_WR_REQ_LOC 0x00 + +EVENT_WR_REQ_REM 0x09 CNV|NV0C|NV1C|PCIE +UMASK_WR_REQ_REM 0x00 + + diff --git a/src/includes/perfmon_perfevent.h b/src/includes/perfmon_perfevent.h index 6daa9e17c..1794940a6 100644 --- a/src/includes/perfmon_perfevent.h +++ b/src/includes/perfmon_perfevent.h @@ -1125,6 +1125,14 @@ int perfmon_setupCountersThread_perfevent( case MBOX5: case MBOX6: case MBOX7: + case MBOX8: + case MBOX9: + case MBOX10: + case MBOX11: + case MBOX12: + case MBOX13: + case MBOX14: + case MBOX15: case CBOX0: case CBOX1: case CBOX2: @@ -1394,6 +1402,11 @@ int perfmon_setupCountersThread_perfevent( has_lock = 1; } } + if ((cpuid_info.family == ARMV8_FAMILY) && (cpuid_info.part == NVIDIA_GRACE) && cpuid_topology.numSockets > 1) + { + DEBUG_PRINT(DEBUGLEV_DEVELOP, Updating uncore type for socket %d on Nvidia Grace, affinity_thread2socket_lookup[cpu_id]); + type += affinity_thread2socket_lookup[cpu_id]; + } if (has_lock) { ret = perf_uncore_setup(&attr, type, event); diff --git a/src/includes/topology.h b/src/includes/topology.h index 5b1544453..33facb0f4 100644 --- a/src/includes/topology.h +++ b/src/includes/topology.h @@ -181,6 +181,7 @@ struct topology_functions { #define APPLE_M1_STUDIO 0x02U #define HUAWEI_TSV110 0xD01U #define AWS_GRAVITON3 0xD40U +#define NVIDIA_GRACE 0xD4FU /* ARM vendors */ #define DEFAULT_ARM 0x41U diff --git a/src/perfmon.c b/src/perfmon.c index ec724b64d..757be7fa4 100644 --- a/src/perfmon.c +++ b/src/perfmon.c @@ -88,6 +88,7 @@ #include #include #include +#include #ifdef LIKWID_USE_PERFEVENT #include @@ -1424,6 +1425,14 @@ perfmon_init_maps(void) perfmon_numCounters = perfmon_numCountersGraviton3; translate_types = graviton3_translate_types; break; + case NVIDIA_GRACE: + eventHash = nvidiagrace_arch_events; + perfmon_numArchEvents = perfmon_numArchEventsNvidiaGrace; + counter_map = nvidiagrace_counter_map; + box_map = nvidiagrace_box_map; + perfmon_numCounters = perfmon_numCountersNvidiaGrace; + translate_types = nvidiagrace_translate_types; + break; default: ERROR_PLAIN_PRINT(Unsupported ARMv8 Processor); err = -EINVAL; diff --git a/src/topology.c b/src/topology.c index 94155a007..750828cd7 100644 --- a/src/topology.c +++ b/src/topology.c @@ -128,6 +128,7 @@ static char* arm_cortex_a76 = "ARM Cortex A76"; static char* arm_neoverse_n1 = "ARM Neoverse N1"; static char* arm_neoverse_v1 = "ARM Neoverse V1"; static char* arm_huawei_tsv110 = "Huawei TSV110 (ARMv8)"; +static char* arm_nvidia_grace = "Nvidia Grace"; static char* fujitsu_a64fx = "Fujitsu A64FX"; static char* apple_m1_studio = "Apple M1"; static char* power7_str = "POWER7 architecture"; @@ -188,6 +189,7 @@ static char* short_arm8_neo_n1 = "arm8_n1"; static char* short_arm8_neo_v1 = "arm8_v1"; static char* short_a64fx = "arm64fx"; static char* short_apple_m1 = "apple_m1"; +static char* short_nvidia_grace = "nvidia_grace"; static char* short_power7 = "power7"; static char* short_power8 = "power8"; @@ -1216,6 +1218,10 @@ topology_setName(void) cpuid_info.name = arm_neoverse_v1; cpuid_info.short_name = short_arm8_neo_v1; break; + case NVIDIA_GRACE: + cpuid_info.name = arm_nvidia_grace; + cpuid_info.short_name = short_nvidia_grace; + break; default: return EXIT_FAILURE; break;