forked from RRZE-HPC/likwid
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of github.com:RRZE-HPC/likwid
- Loading branch information
Showing
61 changed files
with
988 additions
and
229 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
STREAMS 1 | ||
TYPE DOUBLE | ||
FLOPS 28 | ||
BYTES 8 | ||
DESC Double-precision multiplications and additions with a single load, optimized for NEON FMAs | ||
LOADS 1 | ||
STORES 0 | ||
INSTR_LOOP 29 | ||
UOPS 29 | ||
ldr q1, [STR0] | ||
ldr q2, [STR0] | ||
ldr q3, [STR0] | ||
ldr q4, [STR0] | ||
ldr q5, [STR0] | ||
ldr q6, [STR0] | ||
ldr q7, [STR0] | ||
ldr q8, [STR0] | ||
ldr q9, [STR0] | ||
ldr q10, [STR0] | ||
ldr q11, [STR0] | ||
ldr q12, [STR0] | ||
ldr q13, [STR0] | ||
ldr q14, [STR0] | ||
ldr q15, [STR0] | ||
ldr q16, [STR0] | ||
ldr q17, [STR0] | ||
ldr q18, [STR0] | ||
ldr q19, [STR0] | ||
ldr q20, [STR0] | ||
ldr q21, [STR0] | ||
ldr q22, [STR0] | ||
ldr q23, [STR0] | ||
ldr q24, [STR0] | ||
ldr q25, [STR0] | ||
ldr q26, [STR0] | ||
ldr q27, [STR0] | ||
ldr q28, [STR0] | ||
LOOP 2 | ||
ldr q16, [STR0], #8 | ||
fadd v1.2d, v1.2d, v1.2d | ||
fadd v2.2d, v2.2d, v2.2d | ||
fmul v3.2d, v3.2d, v3.2d | ||
fmul v4.2d, v4.2d, v4.2d | ||
fadd v5.2d, v5.2d, v5.2d | ||
fadd v6.2d, v6.2d, v6.2d | ||
fmul v7.2d, v7.2d, v7.2d | ||
fmul v8.2d, v8.2d, v8.2d | ||
fadd v9.2d, v9.2d, v9.2d | ||
fadd v10.2d, v10.2d, v10.2d | ||
fmul v11.2d, v11.2d, v11.2d | ||
fmul v12.2d, v12.2d, v12.2d | ||
fadd v13.2d, v13.2d, v13.2d | ||
fadd v14.2d, v14.2d, v14.2d | ||
fmul v15.2d, v15.2d, v15.2d | ||
fmul v16.2d, v16.2d, v16.2d | ||
fadd v17.2d, v17.2d, v17.2d | ||
fadd v18.2d, v18.2d, v18.2d | ||
fmul v19.2d, v19.2d, v19.2d | ||
fmul v20.2d, v20.2d, v20.2d | ||
fadd v21.2d, v21.2d, v21.2d | ||
fadd v22.2d, v22.2d, v22.2d | ||
fmul v23.2d, v23.2d, v23.2d | ||
fmul v24.2d, v24.2d, v24.2d | ||
fadd v25.2d, v25.2d, v25.2d | ||
fadd v26.2d, v26.2d, v26.2d | ||
fmul v27.2d, v27.2d, v27.2d | ||
fmul v28.2d, v28.2d, v28.2d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
STREAMS 1 | ||
TYPE DOUBLE | ||
FLOPS 56 | ||
BYTES 8 | ||
DESC Double-precision multiplications and additions with a single load, optimized for NEON FMAs | ||
LOADS 1 | ||
STORES 0 | ||
INSTR_LOOP 29 | ||
UOPS 29 | ||
ldr q1, [STR0] | ||
ldr q2, [STR0] | ||
ldr q3, [STR0] | ||
ldr q4, [STR0] | ||
ldr q5, [STR0] | ||
ldr q6, [STR0] | ||
ldr q7, [STR0] | ||
ldr q8, [STR0] | ||
ldr q9, [STR0] | ||
ldr q10, [STR0] | ||
ldr q11, [STR0] | ||
ldr q12, [STR0] | ||
ldr q13, [STR0] | ||
ldr q14, [STR0] | ||
ldr q15, [STR0] | ||
ldr q16, [STR0] | ||
ldr q17, [STR0] | ||
ldr q18, [STR0] | ||
ldr q19, [STR0] | ||
ldr q20, [STR0] | ||
ldr q21, [STR0] | ||
ldr q22, [STR0] | ||
ldr q23, [STR0] | ||
ldr q24, [STR0] | ||
ldr q25, [STR0] | ||
ldr q26, [STR0] | ||
ldr q27, [STR0] | ||
ldr q28, [STR0] | ||
LOOP 2 | ||
ldr q16, [STR0], #8 | ||
fmla v1.2d, v1.2d, v1.2d | ||
fmla v2.2d, v2.2d, v2.2d | ||
fmla v3.2d, v3.2d, v3.2d | ||
fmla v4.2d, v4.2d, v4.2d | ||
fmla v5.2d, v5.2d, v5.2d | ||
fmla v6.2d, v6.2d, v6.2d | ||
fmla v7.2d, v7.2d, v7.2d | ||
fmla v8.2d, v8.2d, v8.2d | ||
fmla v9.2d, v9.2d, v9.2d | ||
fmla v10.2d, v10.2d, v10.2d | ||
fmla v11.2d, v11.2d, v11.2d | ||
fmla v12.2d, v12.2d, v12.2d | ||
fmla v13.2d, v13.2d, v13.2d | ||
fmla v14.2d, v14.2d, v14.2d | ||
fmla v15.2d, v15.2d, v15.2d | ||
fmla v16.2d, v16.2d, v16.2d | ||
fmla v17.2d, v17.2d, v17.2d | ||
fmla v18.2d, v18.2d, v18.2d | ||
fmla v19.2d, v19.2d, v19.2d | ||
fmla v20.2d, v20.2d, v20.2d | ||
fmla v21.2d, v21.2d, v21.2d | ||
fmla v22.2d, v22.2d, v22.2d | ||
fmla v23.2d, v23.2d, v23.2d | ||
fmla v24.2d, v24.2d, v24.2d | ||
fmla v25.2d, v25.2d, v25.2d | ||
fmla v26.2d, v26.2d, v26.2d | ||
fmla v27.2d, v27.2d, v27.2d | ||
fmla v28.2d, v28.2d, v28.2d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
STREAMS 1 | ||
TYPE SINGLE | ||
FLOPS 28 | ||
BYTES 4 | ||
DESC Single-precision multiplications and additions with a single load, optimized for NEON FMAs | ||
LOADS 1 | ||
STORES 0 | ||
INSTR_LOOP 29 | ||
UOPS 29 | ||
ldr q1, [STR0] | ||
ldr q2, [STR0] | ||
ldr q3, [STR0] | ||
ldr q4, [STR0] | ||
ldr q5, [STR0] | ||
ldr q6, [STR0] | ||
ldr q7, [STR0] | ||
ldr q8, [STR0] | ||
ldr q9, [STR0] | ||
ldr q10, [STR0] | ||
ldr q11, [STR0] | ||
ldr q12, [STR0] | ||
ldr q13, [STR0] | ||
ldr q14, [STR0] | ||
ldr q15, [STR0] | ||
ldr q16, [STR0] | ||
ldr q17, [STR0] | ||
ldr q18, [STR0] | ||
ldr q19, [STR0] | ||
ldr q20, [STR0] | ||
ldr q21, [STR0] | ||
ldr q22, [STR0] | ||
ldr q23, [STR0] | ||
ldr q24, [STR0] | ||
ldr q25, [STR0] | ||
ldr q26, [STR0] | ||
ldr q27, [STR0] | ||
ldr q28, [STR0] | ||
LOOP 4 | ||
ldr q16, [STR0], #8 | ||
fadd v1.4s, v1.4s, v1.4s | ||
fadd v2.4s, v2.4s, v2.4s | ||
fmul v3.4s, v3.4s, v3.4s | ||
fmul v4.4s, v4.4s, v4.4s | ||
fadd v5.4s, v5.4s, v5.4s | ||
fadd v6.4s, v6.4s, v6.4s | ||
fmul v7.4s, v7.4s, v7.4s | ||
fmul v8.4s, v8.4s, v8.4s | ||
fadd v9.4s, v9.4s, v9.4s | ||
fadd v10.4s, v10.4s, v10.4s | ||
fmul v11.4s, v11.4s, v11.4s | ||
fmul v12.4s, v12.4s, v12.4s | ||
fadd v13.4s, v13.4s, v13.4s | ||
fadd v14.4s, v14.4s, v14.4s | ||
fmul v15.4s, v15.4s, v15.4s | ||
fmul v16.4s, v16.4s, v16.4s | ||
fadd v17.4s, v17.4s, v17.4s | ||
fadd v18.4s, v18.4s, v18.4s | ||
fmul v19.4s, v19.4s, v19.4s | ||
fmul v20.4s, v20.4s, v20.4s | ||
fadd v21.4s, v21.4s, v21.4s | ||
fadd v22.4s, v22.4s, v22.4s | ||
fmul v23.4s, v23.4s, v23.4s | ||
fmul v24.4s, v24.4s, v24.4s | ||
fadd v25.4s, v25.4s, v25.4s | ||
fadd v26.4s, v26.4s, v26.4s | ||
fmul v27.4s, v27.4s, v27.4s | ||
fmul v28.4s, v28.4s, v28.4s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
STREAMS 1 | ||
TYPE SINGLE | ||
FLOPS 56 | ||
BYTES 4 | ||
DESC Single-precision multiplications and additions with a single load, optimized for NEON FMAs | ||
LOADS 1 | ||
STORES 0 | ||
INSTR_LOOP 29 | ||
UOPS 29 | ||
ldr q1, [STR0] | ||
ldr q2, [STR0] | ||
ldr q3, [STR0] | ||
ldr q4, [STR0] | ||
ldr q5, [STR0] | ||
ldr q6, [STR0] | ||
ldr q7, [STR0] | ||
ldr q8, [STR0] | ||
ldr q9, [STR0] | ||
ldr q10, [STR0] | ||
ldr q11, [STR0] | ||
ldr q12, [STR0] | ||
ldr q13, [STR0] | ||
ldr q14, [STR0] | ||
ldr q15, [STR0] | ||
ldr q16, [STR0] | ||
ldr q17, [STR0] | ||
ldr q18, [STR0] | ||
ldr q19, [STR0] | ||
ldr q20, [STR0] | ||
ldr q21, [STR0] | ||
ldr q22, [STR0] | ||
ldr q23, [STR0] | ||
ldr q24, [STR0] | ||
ldr q25, [STR0] | ||
ldr q26, [STR0] | ||
ldr q27, [STR0] | ||
ldr q28, [STR0] | ||
LOOP 4 | ||
ldr q16, [STR0], #8 | ||
fmla v1.4s, v1.4s, v1.4s | ||
fmla v2.4s, v2.4s, v2.4s | ||
fmla v3.4s, v3.4s, v3.4s | ||
fmla v4.4s, v4.4s, v4.4s | ||
fmla v5.4s, v5.4s, v5.4s | ||
fmla v6.4s, v6.4s, v6.4s | ||
fmla v7.4s, v7.4s, v7.4s | ||
fmla v8.4s, v8.4s, v8.4s | ||
fmla v9.4s, v9.4s, v9.4s | ||
fmla v10.4s, v10.4s, v10.4s | ||
fmla v11.4s, v11.4s, v11.4s | ||
fmla v12.4s, v12.4s, v12.4s | ||
fmla v13.4s, v13.4s, v13.4s | ||
fmla v14.4s, v14.4s, v14.4s | ||
fmla v15.4s, v15.4s, v15.4s | ||
fmla v16.4s, v16.4s, v16.4s | ||
fmla v17.4s, v17.4s, v17.4s | ||
fmla v18.4s, v18.4s, v18.4s | ||
fmla v19.4s, v19.4s, v19.4s | ||
fmla v20.4s, v20.4s, v20.4s | ||
fmla v21.4s, v21.4s, v21.4s | ||
fmla v22.4s, v22.4s, v22.4s | ||
fmla v23.4s, v23.4s, v23.4s | ||
fmla v24.4s, v24.4s, v24.4s | ||
fmla v25.4s, v25.4s, v25.4s | ||
fmla v26.4s, v26.4s, v26.4s | ||
fmla v27.4s, v27.4s, v27.4s | ||
fmla v28.4s, v28.4s, v28.4s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.