Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port OpenSSL assembly routines to Morello #2120

Open
wants to merge 19 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 69 additions & 56 deletions crypto/openssl/crypto/aes/asm/aesv8-armx.pl
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,12 @@

#if __ARM_MAX_ARCH__>=7
___
$code.=".arch armv8-a+crypto\n.text\n" if ($flavour =~ /64/);
$code.=<<___ if ($flavour =~ /64/);
#ifndef __CHERI_PURE_CAPABILITY__
.arch armv8-a+crypto
#endif
.text
___
$code.=<<___ if ($flavour !~ /64/);
.arch armv7-a // don't confuse not-so-latest binutils with argv8 :-)
.fpu neon
Expand All @@ -101,17 +106,20 @@
# transliterate common code to either flavour with regex vodoo.
#
{{{
my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12");
my ($inp,$bits,$out,$ptr,$rval,$rounds)=("PTR(0)","w1","PTR(2)","PTR(3)","x3","w12");

Check warning on line 109 in crypto/openssl/crypto/aes/asm/aesv8-armx.pl

View workflow job for this annotation

GitHub Actions / Style Checker

line over 80 characters
my ($inpx,$outx)=("x0","x2");
my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
$flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10));


$code.=<<___;
.align 5
.type .Lrcon,%object
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.size .Lrcon, . - .Lrcon

.globl ${prefix}_set_encrypt_key
.type ${prefix}_set_encrypt_key,%function
Expand All @@ -122,16 +130,16 @@
$code.=<<___ if ($flavour =~ /64/);
AARCH64_VALID_CALL_TARGET
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp PTR(29),PTR(30),[PTRN(sp),#-(2*PTR_WIDTH)]!
add PTR(29),PTRN(sp),#0
___
$code.=<<___;
mov $ptr,#-1
cmp $inp,#0
mov $rval,#-1
cmp $inpx,#0
b.eq .Lenc_key_abort
cmp $out,#0
cmp $outx,#0
b.eq .Lenc_key_abort
mov $ptr,#-2
mov $rval,#-2
cmp $bits,#128
b.lt .Lenc_key_abort
cmp $bits,#256
Expand Down Expand Up @@ -283,11 +291,11 @@

.Ldone:
str $rounds,[$out]
mov $ptr,#0
mov $rval,#0

.Lenc_key_abort:
mov x0,$ptr // return value
`"ldr x29,[sp],#16" if ($flavour =~ /64/)`
mov x0,$rval // return value
`"ldr PTR(29),[PTRN(sp)],#(2*PTR_WIDTH)" if ($flavour =~ /64/)`
ret
.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key

Expand All @@ -298,8 +306,8 @@
___
$code.=<<___ if ($flavour =~ /64/);
AARCH64_SIGN_LINK_REGISTER
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp PTR(29),PTR(30),[PTRN(sp),#-(2*PTR_WIDTH)]!
add PTR(29),PTRN(sp),#0
___
$code.=<<___ if ($flavour !~ /64/);
stmdb sp!,{r4,lr}
Expand Down Expand Up @@ -340,7 +348,7 @@
ldmia sp!,{r4,pc}
___
$code.=<<___ if ($flavour =~ /64/);
ldp x29,x30,[sp],#16
ldp PTR(29),PTR(30),[PTRN(sp)],#(2*PTR_WIDTH)
AARCH64_VALIDATE_LINK_REGISTER
ret
___
Expand All @@ -352,7 +360,7 @@
sub gen_block () {
my $dir = shift;
my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc");
my ($inp,$out,$key)=map("x$_",(0..2));
my ($inp,$out,$key)=map("PTR($_)",(0..2));
my $rounds="w3";
my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3));

Expand Down Expand Up @@ -422,8 +430,8 @@
# store, just looks like what the original ECB implementation does.

{{{
my ($inp,$out,$len,$key)=map("x$_",(0..3));
my ($enc,$rounds,$cnt,$key_,$step)=("w4","w5","w6","x7","x8");
my ($inp,$out,$len,$key)=("PTR(0)","PTR(1)","x2","PTR(3)");
my ($enc,$rounds,$cnt,$key_,$step)=("w4","w5","w6","PTR(7)","x8");
my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));

my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
Expand Down Expand Up @@ -541,8 +549,8 @@
.Lecb_big_size:
___
$code.=<<___ if ($flavour =~ /64/);
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp PTR(29),PTR(30),[PTRN(sp),#-(2*PTR_WIDTH)]!
add PTR(29),PTRN(sp),#0
___
$code.=<<___ if ($flavour !~ /64/);
mov ip,sp
Expand Down Expand Up @@ -1217,7 +1225,7 @@
ldmia sp!,{r4-r8,pc}
___
$code.=<<___ if ($flavour =~ /64/);
ldr x29,[sp],#16
ldr PTR(29),[PTRN(sp)],#(2*PTR_WIDTH)
___
$code.=<<___ if ($flavour =~ /64/);
.Lecb_Final_abort:
Expand All @@ -1228,12 +1236,13 @@
___
}}}
{{{
my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5";
my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12");
my ($inp,$out,$len,$key,$ivp)=("PTR(0)","PTR(1)","x2","PTR(3)","PTR(4)");
my $enc="w5";
my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","PTR(7)","x8","x12");
my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));

my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
my ($key4,$key5,$key6,$key7)=("x6","x12","x14",$key);
my ($key4,$key5,$key6,$key7)=("PTR(6)","PTR(12)","PTR(14)",$key);

### q8-q15 preloaded key schedule

Expand All @@ -1246,8 +1255,8 @@
$code.=<<___ if ($flavour =~ /64/);
AARCH64_VALID_CALL_TARGET
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp PTR(29),PTR(30),[PTRN(sp),#-(2*PTR_WIDTH)]!
add PTR(29),PTRN(sp),#0
___
$code.=<<___ if ($flavour !~ /64/);
mov ip,sp
Expand Down Expand Up @@ -1744,16 +1753,16 @@
ldmia sp!,{r4-r8,pc}
___
$code.=<<___ if ($flavour =~ /64/);
ldr x29,[sp],#16
ldr PTR(29),[PTRN(sp)],#(2*PTR_WIDTH)
ret
___
$code.=<<___;
.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
___
}}}
{{{
my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
my ($rounds,$cnt,$key_)=("w5","w6","x7");
my ($inp,$out,$len,$key,$ivp)=("PTR(0)","PTR(1)","x2","PTR(3)","PTR(4)");
my ($rounds,$cnt,$key_)=("w5","w6","PTR(7)");
my ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12));
my $step="x12"; # aliases with $tctr2

Expand All @@ -1776,8 +1785,8 @@
$code.=<<___ if ($flavour =~ /64/);
AARCH64_VALID_CALL_TARGET
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp PTR(29),PTR(30),[PTRN(sp),#-(2*PTR_WIDTH)]!
add PTR(29),PTRN(sp),#0
___
$code.=<<___ if ($flavour !~ /64/);
mov ip,sp
Expand Down Expand Up @@ -2196,7 +2205,7 @@
ldmia sp!,{r4-r10,pc}
___
$code.=<<___ if ($flavour =~ /64/);
ldr x29,[sp],#16
ldr PTR(29),[PTRN(sp)],#(2*PTR_WIDTH)
ret
___
$code.=<<___;
Expand Down Expand Up @@ -2234,10 +2243,12 @@
# plain text block.

{{{
my ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5));
my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10");
my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20");
my ($inp,$out,$len,$key1,$key2,$ivp)=("PTR(0)","PTR(1)","x2","PTR(3)","PTR(4)","PTR(5)");

Check warning on line 2246 in crypto/openssl/crypto/aes/asm/aesv8-armx.pl

View workflow job for this annotation

GitHub Actions / Style Checker

line over 80 characters
my ($ivpx)=("x5");
my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","PTR(7)","x8","x9","x10");

Check warning on line 2248 in crypto/openssl/crypto/aes/asm/aesv8-armx.pl

View workflow job for this annotation

GitHub Actions / Style Checker

line over 80 characters
my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("PTR(13)","w14","w15","PTR(20)");
my ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19");
my ($tailcntp,$midnump,$constnump)=("PTR(21)","PTR(22)","PTR(19)");
my ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11");
my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
my ($iv0,$iv1,$iv2,$iv3,$iv4)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b");
Expand Down Expand Up @@ -2346,10 +2357,10 @@
.Lxts_enc_big_size:
___
$code.=<<___ if ($flavour =~ /64/);
stp $constnumx,$tmpinp,[sp,#-64]!
stp $tailcnt,$midnumx,[sp,#48]
stp $ivd10,$ivd20,[sp,#32]
stp $ivd30,$ivd40,[sp,#16]
bsdjhb marked this conversation as resolved.
Show resolved Hide resolved
stp $constnump,$tmpinp,[PTRN(sp),#-(4*PTR_WIDTH+32)]!
stp $tailcntp,$midnump,[PTRN(sp),#(2*PTR_WIDTH)]
stp $ivd10,$ivd20,[PTRN(sp),#(4*PTR_WIDTH)]
stp $ivd30,$ivd40,[PTRN(sp),#(4*PTR_WIDTH+16)]

// tailcnt store the tail value of length%16.
and $tailcnt,$len,#0xf
Expand Down Expand Up @@ -2400,7 +2411,7 @@

vld1.32 {q8-q9},[$key1] // load key schedule...
sub $rounds0,$rounds0,#6
add $key_,$key1,$ivp,lsl#4 // pointer to last 7 round keys
add $key_,$key1,$ivpx,lsl#4 // pointer to last 7 round keys
sub $rounds0,$rounds0,#2
vld1.32 {q10-q11},[$key_],#32
vld1.32 {q12-q13},[$key_],#32
Expand Down Expand Up @@ -2901,21 +2912,23 @@
vst1.8 {$tmpin},[$out]

.Lxts_abort:
ldp $tailcnt,$midnumx,[sp,#48]
ldp $ivd10,$ivd20,[sp,#32]
ldp $ivd30,$ivd40,[sp,#16]
ldp $constnumx,$tmpinp,[sp],#64
ldp $tailcntp,$midnump,[PTRN(sp),#(2*PTR_WIDTH)]
ldp $ivd10,$ivd20,[PTRN(sp),#(4*PTR_WIDTH)]
ldp $ivd30,$ivd40,[PTRN(sp),#(4*PTR_WIDTH+16)]
ldp $constnump,$tmpinp,[PTRN(sp)],#(4*PTR_WIDTH+32)
.Lxts_enc_final_abort:
ret
.size ${prefix}_xts_encrypt,.-${prefix}_xts_encrypt
___

}}}
{{{
my ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5));
my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10");
my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20");
my ($inp,$out,$len,$key1,$key2,$ivp)=("PTR(0)","PTR(1)","x2","PTR(3)","PTR(4)","PTR(5)");

Check warning on line 2926 in crypto/openssl/crypto/aes/asm/aesv8-armx.pl

View workflow job for this annotation

GitHub Actions / Style Checker

line over 80 characters
my ($ivpx)=("x5");
my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","PTR(7)","x8","x9","x10");

Check warning on line 2928 in crypto/openssl/crypto/aes/asm/aesv8-armx.pl

View workflow job for this annotation

GitHub Actions / Style Checker

line over 80 characters
my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("PTR(13)","w14","w15","PTR(20)");
my ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19");
my ($tailcntp,$midnump,$constnump)=("PTR(21)","PTR(22)","PTR(19)");
my ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11");
my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
my ($iv0,$iv1,$iv2,$iv3,$iv4,$tmpin)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b","v26.16b");
Expand Down Expand Up @@ -3021,10 +3034,10 @@
.Lxts_dec_big_size:
___
$code.=<<___ if ($flavour =~ /64/);
stp $constnumx,$tmpinp,[sp,#-64]!
stp $tailcnt,$midnumx,[sp,#48]
stp $ivd10,$ivd20,[sp,#32]
stp $ivd30,$ivd40,[sp,#16]
stp $constnump,$tmpinp,[PTRN(sp),#-(4*PTR_WIDTH+32)]!
stp $tailcntp,$midnump,[PTRN(sp),#(2*PTR_WIDTH)]
stp $ivd10,$ivd20,[PTRN(sp),#(4*PTR_WIDTH)]
stp $ivd30,$ivd40,[PTRN(sp),#(4*PTR_WIDTH+16)]

and $tailcnt,$len,#0xf
and $len,$len,#-16
Expand Down Expand Up @@ -3080,7 +3093,7 @@

vld1.32 {q8-q9},[$key1] // load key schedule...
sub $rounds0,$rounds0,#6
add $key_,$key1,$ivp,lsl#4 // pointer to last 7 round keys
add $key_,$key1,$ivpx,lsl#4 // pointer to last 7 round keys
sub $rounds0,$rounds0,#2
vld1.32 {q10-q11},[$key_],#32 // load key schedule...
vld1.32 {q12-q13},[$key_],#32
Expand Down Expand Up @@ -3553,7 +3566,7 @@
tst $tailcnt,#0xf
b.eq .Lxts_dec_abort
// Processing the last two blocks with cipher stealing.
mov x7,x3
mov $key_,$key1
cbnz x2,.Lxts_dec_1st_done
vld1.8 {$dat0},[$inp],#16

Expand Down Expand Up @@ -3622,10 +3635,10 @@
vst1.8 {$tmpin},[$out]

.Lxts_dec_abort:
ldp $tailcnt,$midnumx,[sp,#48]
ldp $ivd10,$ivd20,[sp,#32]
ldp $ivd30,$ivd40,[sp,#16]
ldp $constnumx,$tmpinp,[sp],#64
ldp $tailcntp,$midnump,[PTRN(sp),#(2*PTR_WIDTH)]
ldp $ivd10,$ivd20,[PTRN(sp),#(4*PTR_WIDTH)]
ldp $ivd30,$ivd40,[PTRN(sp),#(4*PTR_WIDTH+16)]
ldp $constnump,$tmpinp,[PTRN(sp)],#(4*PTR_WIDTH+32)

.Lxts_dec_final_abort:
ret
Expand Down
Loading
Loading