public inbox for git-commits@fedoraproject.org
help / color / mirror / Atom feed
From: Tomas Mraz <tmraz@fedoraproject.org>
To: git-commits@fedoraproject.org
Subject: [rpms/openssl] rebase_40beta: Update to the 1.1.1i release fixing CVE-2020-1971
Date: Tue, 09 Jun 2026 12:44:52 GMT [thread overview]
Message-ID: <178100909242.1.579790369096856438.rpms-openssl-a07706cf0e50@fedoraproject.org> (raw)
A new commit has been pushed.
Repo : rpms/openssl
Branch : rebase_40beta
Commit : a07706cf0e50b02a61d3cb10ecad554d4ac4240c
Author : Tomas Mraz <tmraz@fedoraproject.org>
Date : 2020-12-09T10:49:38+01:00
Stats : +2573/-297 in 6 file(s)
URL : https://src.fedoraproject.org/rpms/openssl/c/a07706cf0e50b02a61d3cb10ecad554d4ac4240c?branch=rebase_40beta
Log:
Update to the 1.1.1i release fixing CVE-2020-1971
---
diff --git a/.gitignore b/.gitignore
index 3305a0f..d1abce3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,4 @@ openssl-1.0.0a-usa.tar.bz2
/openssl-1.1.1f-hobbled.tar.xz
/openssl-1.1.1g-hobbled.tar.xz
/openssl-1.1.1h-hobbled.tar.xz
+/openssl-1.1.1i-hobbled.tar.xz
diff --git a/openssl-1.1.1-arm-update.patch b/openssl-1.1.1-arm-update.patch
index 998905f..2b8c549 100644
--- a/openssl-1.1.1-arm-update.patch
+++ b/openssl-1.1.1-arm-update.patch
@@ -1,6 +1,6 @@
-diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl
---- openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update 2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl 2019-11-20 11:36:22.389506155 +0100
+diff -up openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl
+--- openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl.arm-update 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/aes/asm/aesv8-armx.pl 2020-12-09 10:39:50.645705385 +0100
@@ -27,44 +27,72 @@
# CBC encrypt case. On Cortex-A57 parallelizable mode performance
# seems to be limited by sheer amount of NEON instructions...
@@ -85,10 +85,49 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
___
# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
-@@ -514,6 +542,13 @@ $code.=<<___;
- ___
- {
- my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
+@@ -361,6 +389,836 @@ ___
+ &gen_block("en");
+ &gen_block("de");
+ }}}
++
++# Performance in cycles per byte.
++# Processed with AES-ECB different key size.
++# It shows the value before and after optimization as below:
++# (before/after):
++#
++# AES-128-ECB AES-192-ECB AES-256-ECB
++# Cortex-A57 1.85/0.82 2.16/0.96 2.47/1.10
++# Cortex-A72 1.64/0.85 1.82/0.99 2.13/1.14
++
++# Optimization is implemented by loop unrolling and interleaving.
++# Commonly, we choose the unrolling factor as 5, if the input
++# data size smaller than 5 blocks, but not smaller than 3 blocks,
++# choose 3 as the unrolling factor.
++# If the input data size dsize >= 5*16 bytes, then take 5 blocks
++# as one iteration, every loop the left size lsize -= 5*16.
++# If 5*16 > lsize >= 3*16 bytes, take 3 blocks as one iteration,
++# every loop lsize -=3*16.
++# If lsize < 3*16 bytes, treat them as the tail, interleave the
++# two blocks AES instructions.
++# There is one special case, if the original input data size dsize
++# = 16 bytes, we will treat it seperately to improve the
++# performance: one independent code block without LR, FP load and
++# store, just looks like what the original ECB implementation does.
++
++{{{
++my ($inp,$out,$len,$key)=map("x$_",(0..3));
++my ($enc,$rounds,$cnt,$key_,$step)=("w4","w5","w6","x7","x8");
++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
++
++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
++
++### q7 last round key
++### q10-q15 q7 Last 7 round keys
++### q8-q9 preloaded round keys except last 7 keys for big size
++### q5, q6, q8-q9 preloaded round keys except last 7 keys for only 16 byte
++
++{
++my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
+
+my ($dat3,$in3,$tmp3); # used only in 64-bit mode
+my ($dat4,$in4,$tmp4);
@@ -96,26 +135,478 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
+ ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
+}
+
- $code.=<<___;
- .align 5
- .Lcbc_dec:
-@@ -530,7 +565,196 @@ $code.=<<___;
- vorr $in0,$dat,$dat
- vorr $in1,$dat1,$dat1
- vorr $in2,$dat2,$dat2
++$code.=<<___;
++.globl ${prefix}_ecb_encrypt
++.type ${prefix}_ecb_encrypt,%function
++.align 5
++${prefix}_ecb_encrypt:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ subs $len,$len,#16
++ // Original input data size bigger than 16, jump to big size processing.
++ b.ne .Lecb_big_size
++ vld1.8 {$dat0},[$inp]
++ cmp $enc,#0 // en- or decrypting?
++ ldr $rounds,[$key,#240]
++ vld1.32 {q5-q6},[$key],#32 // load key schedule...
++
++ b.eq .Lecb_small_dec
++ aese $dat0,q5
++ aesmc $dat0,$dat0
++ vld1.32 {q8-q9},[$key],#32 // load key schedule...
++ aese $dat0,q6
++ aesmc $dat0,$dat0
++ subs $rounds,$rounds,#10 // if rounds==10, jump to aes-128-ecb processing
++ b.eq .Lecb_128_enc
++.Lecb_round_loop:
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ vld1.32 {q8},[$key],#16 // load key schedule...
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ vld1.32 {q9},[$key],#16 // load key schedule...
++ subs $rounds,$rounds,#2 // bias
++ b.gt .Lecb_round_loop
++.Lecb_128_enc:
++ vld1.32 {q10-q11},[$key],#32 // load key schedule...
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ vld1.32 {q12-q13},[$key],#32 // load key schedule...
++ aese $dat0,q10
++ aesmc $dat0,$dat0
++ aese $dat0,q11
++ aesmc $dat0,$dat0
++ vld1.32 {q14-q15},[$key],#32 // load key schedule...
++ aese $dat0,q12
++ aesmc $dat0,$dat0
++ aese $dat0,q13
++ aesmc $dat0,$dat0
++ vld1.32 {$rndlast},[$key]
++ aese $dat0,q14
++ aesmc $dat0,$dat0
++ aese $dat0,q15
++ veor $dat0,$dat0,$rndlast
++ vst1.8 {$dat0},[$out]
++ b .Lecb_Final_abort
++.Lecb_small_dec:
++ aesd $dat0,q5
++ aesimc $dat0,$dat0
++ vld1.32 {q8-q9},[$key],#32 // load key schedule...
++ aesd $dat0,q6
++ aesimc $dat0,$dat0
++ subs $rounds,$rounds,#10 // bias
++ b.eq .Lecb_128_dec
++.Lecb_dec_round_loop:
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ vld1.32 {q8},[$key],#16 // load key schedule...
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ vld1.32 {q9},[$key],#16 // load key schedule...
++ subs $rounds,$rounds,#2 // bias
++ b.gt .Lecb_dec_round_loop
++.Lecb_128_dec:
++ vld1.32 {q10-q11},[$key],#32 // load key schedule...
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ vld1.32 {q12-q13},[$key],#32 // load key schedule...
++ aesd $dat0,q10
++ aesimc $dat0,$dat0
++ aesd $dat0,q11
++ aesimc $dat0,$dat0
++ vld1.32 {q14-q15},[$key],#32 // load key schedule...
++ aesd $dat0,q12
++ aesimc $dat0,$dat0
++ aesd $dat0,q13
++ aesimc $dat0,$dat0
++ vld1.32 {$rndlast},[$key]
++ aesd $dat0,q14
++ aesimc $dat0,$dat0
++ aesd $dat0,q15
++ veor $dat0,$dat0,$rndlast
++ vst1.8 {$dat0},[$out]
++ b .Lecb_Final_abort
++.Lecb_big_size:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ stp x29,x30,[sp,#-16]!
++ add x29,sp,#0
++___
++$code.=<<___ if ($flavour !~ /64/);
++ mov ip,sp
++ stmdb sp!,{r4-r8,lr}
++ vstmdb sp!,{d8-d15} @ ABI specification says so
++ ldmia ip,{r4-r5} @ load remaining args
++ subs $len,$len,#16
++___
++$code.=<<___;
++ mov $step,#16
++ b.lo .Lecb_done
++ cclr $step,eq
++
++ cmp $enc,#0 // en- or decrypting?
++ ldr $rounds,[$key,#240]
++ and $len,$len,#-16
++ vld1.8 {$dat},[$inp],$step
++
++ vld1.32 {q8-q9},[$key] // load key schedule...
++ sub $rounds,$rounds,#6
++ add $key_,$key,x5,lsl#4 // pointer to last 7 round keys
++ sub $rounds,$rounds,#2
++ vld1.32 {q10-q11},[$key_],#32
++ vld1.32 {q12-q13},[$key_],#32
++ vld1.32 {q14-q15},[$key_],#32
++ vld1.32 {$rndlast},[$key_]
++
++ add $key_,$key,#32
++ mov $cnt,$rounds
++ b.eq .Lecb_dec
++
++ vld1.8 {$dat1},[$inp],#16
++ subs $len,$len,#32 // bias
++ add $cnt,$rounds,#2
++ vorr $in1,$dat1,$dat1
++ vorr $dat2,$dat1,$dat1
++ vorr $dat1,$dat,$dat
++ b.lo .Lecb_enc_tail
++
++ vorr $dat1,$in1,$in1
++ vld1.8 {$dat2},[$inp],#16
+___
+$code.=<<___ if ($flavour =~ /64/);
+ cmp $len,#32
-+ b.lo .Loop3x_cbc_dec
++ b.lo .Loop3x_ecb_enc
+
+ vld1.8 {$dat3},[$inp],#16
+ vld1.8 {$dat4},[$inp],#16
-+ sub $len,$len,#32 // bias
++ sub $len,$len,#32 // bias
+ mov $cnt,$rounds
-+ vorr $in3,$dat3,$dat3
-+ vorr $in4,$dat4,$dat4
+
-+.Loop5x_cbc_dec:
++.Loop5x_ecb_enc:
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ aese $dat3,q8
++ aesmc $dat3,$dat3
++ aese $dat4,q8
++ aesmc $dat4,$dat4
++ vld1.32 {q8},[$key_],#16
++ subs $cnt,$cnt,#2
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ aese $dat3,q9
++ aesmc $dat3,$dat3
++ aese $dat4,q9
++ aesmc $dat4,$dat4
++ vld1.32 {q9},[$key_],#16
++ b.gt .Loop5x_ecb_enc
++
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ aese $dat3,q8
++ aesmc $dat3,$dat3
++ aese $dat4,q8
++ aesmc $dat4,$dat4
++ cmp $len,#0x40 // because .Lecb_enc_tail4x
++ sub $len,$len,#0x50
++
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ aese $dat3,q9
++ aesmc $dat3,$dat3
++ aese $dat4,q9
++ aesmc $dat4,$dat4
++ csel x6,xzr,$len,gt // borrow x6, $cnt, "gt" is not typo
++ mov $key_,$key
++
++ aese $dat0,q10
++ aesmc $dat0,$dat0
++ aese $dat1,q10
++ aesmc $dat1,$dat1
++ aese $dat2,q10
++ aesmc $dat2,$dat2
++ aese $dat3,q10
++ aesmc $dat3,$dat3
++ aese $dat4,q10
++ aesmc $dat4,$dat4
++ add $inp,$inp,x6 // $inp is adjusted in such way that
++ // at exit from the loop $dat1-$dat4
++ // are loaded with last "words"
++ add x6,$len,#0x60 // because .Lecb_enc_tail4x
++
++ aese $dat0,q11
++ aesmc $dat0,$dat0
++ aese $dat1,q11
++ aesmc $dat1,$dat1
++ aese $dat2,q11
++ aesmc $dat2,$dat2
++ aese $dat3,q11
++ aesmc $dat3,$dat3
++ aese $dat4,q11
++ aesmc $dat4,$dat4
++
++ aese $dat0,q12
++ aesmc $dat0,$dat0
++ aese $dat1,q12
++ aesmc $dat1,$dat1
++ aese $dat2,q12
++ aesmc $dat2,$dat2
++ aese $dat3,q12
++ aesmc $dat3,$dat3
++ aese $dat4,q12
++ aesmc $dat4,$dat4
++
++ aese $dat0,q13
++ aesmc $dat0,$dat0
++ aese $dat1,q13
++ aesmc $dat1,$dat1
++ aese $dat2,q13
++ aesmc $dat2,$dat2
++ aese $dat3,q13
++ aesmc $dat3,$dat3
++ aese $dat4,q13
++ aesmc $dat4,$dat4
++
++ aese $dat0,q14
++ aesmc $dat0,$dat0
++ aese $dat1,q14
++ aesmc $dat1,$dat1
++ aese $dat2,q14
++ aesmc $dat2,$dat2
++ aese $dat3,q14
++ aesmc $dat3,$dat3
++ aese $dat4,q14
++ aesmc $dat4,$dat4
++
++ aese $dat0,q15
++ vld1.8 {$in0},[$inp],#16
++ aese $dat1,q15
++ vld1.8 {$in1},[$inp],#16
++ aese $dat2,q15
++ vld1.8 {$in2},[$inp],#16
++ aese $dat3,q15
++ vld1.8 {$in3},[$inp],#16
++ aese $dat4,q15
++ vld1.8 {$in4},[$inp],#16
++ cbz x6,.Lecb_enc_tail4x
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ veor $tmp0,$rndlast,$dat0
++ vorr $dat0,$in0,$in0
++ veor $tmp1,$rndlast,$dat1
++ vorr $dat1,$in1,$in1
++ veor $tmp2,$rndlast,$dat2
++ vorr $dat2,$in2,$in2
++ veor $tmp3,$rndlast,$dat3
++ vorr $dat3,$in3,$in3
++ veor $tmp4,$rndlast,$dat4
++ vst1.8 {$tmp0},[$out],#16
++ vorr $dat4,$in4,$in4
++ vst1.8 {$tmp1},[$out],#16
++ mov $cnt,$rounds
++ vst1.8 {$tmp2},[$out],#16
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ vst1.8 {$tmp3},[$out],#16
++ vst1.8 {$tmp4},[$out],#16
++ b.hs .Loop5x_ecb_enc
++
++ add $len,$len,#0x50
++ cbz $len,.Lecb_done
++
++ add $cnt,$rounds,#2
++ subs $len,$len,#0x30
++ vorr $dat0,$in2,$in2
++ vorr $dat1,$in3,$in3
++ vorr $dat2,$in4,$in4
++ b.lo .Lecb_enc_tail
++
++ b .Loop3x_ecb_enc
++
++.align 4
++.Lecb_enc_tail4x:
++ veor $tmp1,$rndlast,$dat1
++ veor $tmp2,$rndlast,$dat2
++ veor $tmp3,$rndlast,$dat3
++ veor $tmp4,$rndlast,$dat4
++ vst1.8 {$tmp1},[$out],#16
++ vst1.8 {$tmp2},[$out],#16
++ vst1.8 {$tmp3},[$out],#16
++ vst1.8 {$tmp4},[$out],#16
++
++ b .Lecb_done
++.align 4
++___
++$code.=<<___;
++.Loop3x_ecb_enc:
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ vld1.32 {q8},[$key_],#16
++ subs $cnt,$cnt,#2
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ vld1.32 {q9},[$key_],#16
++ b.gt .Loop3x_ecb_enc
++
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ subs $len,$len,#0x30
++ mov.lo x6,$len // x6, $cnt, is zero at this point
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ add $inp,$inp,x6 // $inp is adjusted in such way that
++ // at exit from the loop $dat1-$dat2
++ // are loaded with last "words"
++ mov $key_,$key
++ aese $dat0,q12
++ aesmc $dat0,$dat0
++ aese $dat1,q12
++ aesmc $dat1,$dat1
++ aese $dat2,q12
++ aesmc $dat2,$dat2
++ vld1.8 {$in0},[$inp],#16
++ aese $dat0,q13
++ aesmc $dat0,$dat0
++ aese $dat1,q13
++ aesmc $dat1,$dat1
++ aese $dat2,q13
++ aesmc $dat2,$dat2
++ vld1.8 {$in1},[$inp],#16
++ aese $dat0,q14
++ aesmc $dat0,$dat0
++ aese $dat1,q14
++ aesmc $dat1,$dat1
++ aese $dat2,q14
++ aesmc $dat2,$dat2
++ vld1.8 {$in2},[$inp],#16
++ aese $dat0,q15
++ aese $dat1,q15
++ aese $dat2,q15
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ add $cnt,$rounds,#2
++ veor $tmp0,$rndlast,$dat0
++ veor $tmp1,$rndlast,$dat1
++ veor $dat2,$dat2,$rndlast
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ vst1.8 {$tmp0},[$out],#16
++ vorr $dat0,$in0,$in0
++ vst1.8 {$tmp1},[$out],#16
++ vorr $dat1,$in1,$in1
++ vst1.8 {$dat2},[$out],#16
++ vorr $dat2,$in2,$in2
++ b.hs .Loop3x_ecb_enc
++
++ cmn $len,#0x30
++ b.eq .Lecb_done
++ nop
++
++.Lecb_enc_tail:
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ vld1.32 {q8},[$key_],#16
++ subs $cnt,$cnt,#2
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ vld1.32 {q9},[$key_],#16
++ b.gt .Lecb_enc_tail
++
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ aese $dat1,q12
++ aesmc $dat1,$dat1
++ aese $dat2,q12
++ aesmc $dat2,$dat2
++ cmn $len,#0x20
++ aese $dat1,q13
++ aesmc $dat1,$dat1
++ aese $dat2,q13
++ aesmc $dat2,$dat2
++ aese $dat1,q14
++ aesmc $dat1,$dat1
++ aese $dat2,q14
++ aesmc $dat2,$dat2
++ aese $dat1,q15
++ aese $dat2,q15
++ b.eq .Lecb_enc_one
++ veor $tmp1,$rndlast,$dat1
++ veor $tmp2,$rndlast,$dat2
++ vst1.8 {$tmp1},[$out],#16
++ vst1.8 {$tmp2},[$out],#16
++ b .Lecb_done
++
++.Lecb_enc_one:
++ veor $tmp1,$rndlast,$dat2
++ vst1.8 {$tmp1},[$out],#16
++ b .Lecb_done
++___
++
++$code.=<<___;
++.align 5
++.Lecb_dec:
++ vld1.8 {$dat1},[$inp],#16
++ subs $len,$len,#32 // bias
++ add $cnt,$rounds,#2
++ vorr $in1,$dat1,$dat1
++ vorr $dat2,$dat1,$dat1
++ vorr $dat1,$dat,$dat
++ b.lo .Lecb_dec_tail
++
++ vorr $dat1,$in1,$in1
++ vld1.8 {$dat2},[$inp],#16
++___
++$code.=<<___ if ($flavour =~ /64/);
++ cmp $len,#32
++ b.lo .Loop3x_ecb_dec
++
++ vld1.8 {$dat3},[$inp],#16
++ vld1.8 {$dat4},[$inp],#16
++ sub $len,$len,#32 // bias
++ mov $cnt,$rounds
++
++.Loop5x_ecb_dec:
+ aesd $dat0,q8
+ aesimc $dat0,$dat0
+ aesd $dat1,q8
@@ -139,7 +630,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
+ aesd $dat4,q9
+ aesimc $dat4,$dat4
+ vld1.32 {q9},[$key_],#16
-+ b.gt .Loop5x_cbc_dec
++ b.gt .Loop5x_ecb_dec
+
+ aesd $dat0,q8
+ aesimc $dat0,$dat0
@@ -151,8 +642,8 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
+ aesimc $dat3,$dat3
+ aesd $dat4,q8
+ aesimc $dat4,$dat4
-+ cmp $len,#0x40 // because .Lcbc_tail4x
-+ sub $len,$len,#0x50
++ cmp $len,#0x40 // because .Lecb_tail4x
++ sub $len,$len,#0x50
+
+ aesd $dat0,q9
+ aesimc $dat0,$dat0
@@ -164,8 +655,8 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
+ aesimc $dat3,$dat3
+ aesd $dat4,q9
+ aesimc $dat4,$dat4
-+ csel x6,xzr,$len,gt // borrow x6, $cnt, "gt" is not typo
-+ mov $key_,$key
++ csel x6,xzr,$len,gt // borrow x6, $cnt, "gt" is not typo
++ mov $key_,$key
+
+ aesd $dat0,q10
+ aesimc $dat0,$dat0
@@ -177,10 +668,10 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
+ aesimc $dat3,$dat3
+ aesd $dat4,q10
+ aesimc $dat4,$dat4
-+ add $inp,$inp,x6 // $inp is adjusted in such way that
-+ // at exit from the loop $dat1-$dat4
-+ // are loaded with last "words"
-+ add x6,$len,#0x60 // because .Lcbc_tail4x
++ add $inp,$inp,x6 // $inp is adjusted in such way that
++ // at exit from the loop $dat1-$dat4
++ // are loaded with last "words"
++ add x6,$len,#0x60 // because .Lecb_tail4x
+
+ aesd $dat0,q11
+ aesimc $dat0,$dat0
@@ -226,109 +717,455 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
+ aesd $dat4,q14
+ aesimc $dat4,$dat4
+
-+ veor $tmp0,$ivec,$rndlast
+ aesd $dat0,q15
-+ veor $tmp1,$in0,$rndlast
-+ vld1.8 {$in0},[$inp],#16
++ vld1.8 {$in0},[$inp],#16
+ aesd $dat1,q15
-+ veor $tmp2,$in1,$rndlast
-+ vld1.8 {$in1},[$inp],#16
++ vld1.8 {$in1},[$inp],#16
+ aesd $dat2,q15
-+ veor $tmp3,$in2,$rndlast
-+ vld1.8 {$in2},[$inp],#16
++ vld1.8 {$in2},[$inp],#16
+ aesd $dat3,q15
-+ veor $tmp4,$in3,$rndlast
-+ vld1.8 {$in3},[$inp],#16
++ vld1.8 {$in3},[$inp],#16
+ aesd $dat4,q15
-+ vorr $ivec,$in4,$in4
-+ vld1.8 {$in4},[$inp],#16
-+ cbz x6,.Lcbc_tail4x
-+ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
-+ veor $tmp0,$tmp0,$dat0
-+ vorr $dat0,$in0,$in0
-+ veor $tmp1,$tmp1,$dat1
-+ vorr $dat1,$in1,$in1
-+ veor $tmp2,$tmp2,$dat2
-+ vorr $dat2,$in2,$in2
-+ veor $tmp3,$tmp3,$dat3
-+ vorr $dat3,$in3,$in3
-+ veor $tmp4,$tmp4,$dat4
++ vld1.8 {$in4},[$inp],#16
++ cbz x6,.Lecb_tail4x
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ veor $tmp0,$rndlast,$dat0
++ vorr $dat0,$in0,$in0
++ veor $tmp1,$rndlast,$dat1
++ vorr $dat1,$in1,$in1
++ veor $tmp2,$rndlast,$dat2
++ vorr $dat2,$in2,$in2
++ veor $tmp3,$rndlast,$dat3
++ vorr $dat3,$in3,$in3
++ veor $tmp4,$rndlast,$dat4
+ vst1.8 {$tmp0},[$out],#16
-+ vorr $dat4,$in4,$in4
++ vorr $dat4,$in4,$in4
+ vst1.8 {$tmp1},[$out],#16
-+ mov $cnt,$rounds
++ mov $cnt,$rounds
+ vst1.8 {$tmp2},[$out],#16
-+ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
+ vst1.8 {$tmp3},[$out],#16
+ vst1.8 {$tmp4},[$out],#16
-+ b.hs .Loop5x_cbc_dec
++ b.hs .Loop5x_ecb_dec
+
+ add $len,$len,#0x50
-+ cbz $len,.Lcbc_done
++ cbz $len,.Lecb_done
+
+ add $cnt,$rounds,#2
+ subs $len,$len,#0x30
+ vorr $dat0,$in2,$in2
-+ vorr $in0,$in2,$in2
+ vorr $dat1,$in3,$in3
-+ vorr $in1,$in3,$in3
+ vorr $dat2,$in4,$in4
-+ vorr $in2,$in4,$in4
-+ b.lo .Lcbc_dec_tail
++ b.lo .Lecb_dec_tail
++
++ b .Loop3x_ecb_dec
+
-+ b .Loop3x_cbc_dec
-
+.align 4
-+.Lcbc_tail4x:
-+ veor $tmp1,$tmp0,$dat1
-+ veor $tmp2,$tmp2,$dat2
-+ veor $tmp3,$tmp3,$dat3
-+ veor $tmp4,$tmp4,$dat4
++.Lecb_tail4x:
++ veor $tmp1,$rndlast,$dat1
++ veor $tmp2,$rndlast,$dat2
++ veor $tmp3,$rndlast,$dat3
++ veor $tmp4,$rndlast,$dat4
+ vst1.8 {$tmp1},[$out],#16
+ vst1.8 {$tmp2},[$out],#16
+ vst1.8 {$tmp3},[$out],#16
+ vst1.8 {$tmp4},[$out],#16
+
-+ b .Lcbc_done
++ b .Lecb_done
+.align 4
+___
+$code.=<<___;
- .Loop3x_cbc_dec:
- aesd $dat0,q8
- aesimc $dat0,$dat0
-@@ -691,6 +915,9 @@ my $step="x12"; # aliases with $tctr2
- my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
- my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
-
-+# used only in 64-bit mode...
-+my ($dat3,$dat4,$in3,$in4)=map("q$_",(16..23));
++.Loop3x_ecb_dec:
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ vld1.32 {q8},[$key_],#16
++ subs $cnt,$cnt,#2
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ vld1.32 {q9},[$key_],#16
++ b.gt .Loop3x_ecb_dec
+
- my ($dat,$tmp)=($dat0,$tmp0);
-
- ### q8-q15 preloaded key schedule
-@@ -743,6 +970,175 @@ $code.=<<___;
- rev $tctr2, $ctr
- sub $len,$len,#3 // bias
- vmov.32 ${dat2}[3],$tctr2
-+___
-+$code.=<<___ if ($flavour =~ /64/);
-+ cmp $len,#2
-+ b.lo .Loop3x_ctr32
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ subs $len,$len,#0x30
++ mov.lo x6,$len // x6, $cnt, is zero at this point
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ add $inp,$inp,x6 // $inp is adjusted in such way that
++ // at exit from the loop $dat1-$dat2
++ // are loaded with last "words"
++ mov $key_,$key
++ aesd $dat0,q12
++ aesimc $dat0,$dat0
++ aesd $dat1,q12
++ aesimc $dat1,$dat1
++ aesd $dat2,q12
++ aesimc $dat2,$dat2
++ vld1.8 {$in0},[$inp],#16
++ aesd $dat0,q13
++ aesimc $dat0,$dat0
++ aesd $dat1,q13
++ aesimc $dat1,$dat1
++ aesd $dat2,q13
++ aesimc $dat2,$dat2
++ vld1.8 {$in1},[$inp],#16
++ aesd $dat0,q14
++ aesimc $dat0,$dat0
++ aesd $dat1,q14
++ aesimc $dat1,$dat1
++ aesd $dat2,q14
++ aesimc $dat2,$dat2
++ vld1.8 {$in2},[$inp],#16
++ aesd $dat0,q15
++ aesd $dat1,q15
++ aesd $dat2,q15
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ add $cnt,$rounds,#2
++ veor $tmp0,$rndlast,$dat0
++ veor $tmp1,$rndlast,$dat1
++ veor $dat2,$dat2,$rndlast
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ vst1.8 {$tmp0},[$out],#16
++ vorr $dat0,$in0,$in0
++ vst1.8 {$tmp1},[$out],#16
++ vorr $dat1,$in1,$in1
++ vst1.8 {$dat2},[$out],#16
++ vorr $dat2,$in2,$in2
++ b.hs .Loop3x_ecb_dec
+
-+ add w13,$ctr,#1
-+ add w14,$ctr,#2
-+ vorr $dat3,$dat0,$dat0
-+ rev w13,w13
-+ vorr $dat4,$dat0,$dat0
-+ rev w14,w14
-+ vmov.32 ${dat3}[3],w13
-+ sub $len,$len,#2 // bias
-+ vmov.32 ${dat4}[3],w14
-+ add $ctr,$ctr,#2
-+ b .Loop5x_ctr32
++ cmn $len,#0x30
++ b.eq .Lecb_done
++ nop
+
-+.align 4
-+.Loop5x_ctr32:
++.Lecb_dec_tail:
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ vld1.32 {q8},[$key_],#16
++ subs $cnt,$cnt,#2
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ vld1.32 {q9},[$key_],#16
++ b.gt .Lecb_dec_tail
++
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ aesd $dat1,q12
++ aesimc $dat1,$dat1
++ aesd $dat2,q12
++ aesimc $dat2,$dat2
++ cmn $len,#0x20
++ aesd $dat1,q13
++ aesimc $dat1,$dat1
++ aesd $dat2,q13
++ aesimc $dat2,$dat2
++ aesd $dat1,q14
++ aesimc $dat1,$dat1
++ aesd $dat2,q14
++ aesimc $dat2,$dat2
++ aesd $dat1,q15
++ aesd $dat2,q15
++ b.eq .Lecb_dec_one
++ veor $tmp1,$rndlast,$dat1
++ veor $tmp2,$rndlast,$dat2
++ vst1.8 {$tmp1},[$out],#16
++ vst1.8 {$tmp2},[$out],#16
++ b .Lecb_done
++
++.Lecb_dec_one:
++ veor $tmp1,$rndlast,$dat2
++ vst1.8 {$tmp1},[$out],#16
++
++.Lecb_done:
++___
++}
++$code.=<<___ if ($flavour !~ /64/);
++ vldmia sp!,{d8-d15}
++ ldmia sp!,{r4-r8,pc}
++___
++$code.=<<___ if ($flavour =~ /64/);
++ ldr x29,[sp],#16
++___
++$code.=<<___ if ($flavour =~ /64/);
++.Lecb_Final_abort:
++ ret
++___
++$code.=<<___;
++.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt
++___
++}}}
+ {{{
+ my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5";
+ my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12");
+@@ -519,6 +1377,13 @@ $code.=<<___;
+ ___
+ {
+ my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
++
++my ($dat3,$in3,$tmp3); # used only in 64-bit mode
++my ($dat4,$in4,$tmp4);
++if ($flavour =~ /64/) {
++ ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
++}
++
+ $code.=<<___;
+ .align 5
+ .Lcbc_dec:
+@@ -535,7 +1400,196 @@ $code.=<<___;
+ vorr $in0,$dat,$dat
+ vorr $in1,$dat1,$dat1
+ vorr $in2,$dat2,$dat2
++___
++$code.=<<___ if ($flavour =~ /64/);
++ cmp $len,#32
++ b.lo .Loop3x_cbc_dec
++
++ vld1.8 {$dat3},[$inp],#16
++ vld1.8 {$dat4},[$inp],#16
++ sub $len,$len,#32 // bias
++ mov $cnt,$rounds
++ vorr $in3,$dat3,$dat3
++ vorr $in4,$dat4,$dat4
++
++.Loop5x_cbc_dec:
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ aesd $dat3,q8
++ aesimc $dat3,$dat3
++ aesd $dat4,q8
++ aesimc $dat4,$dat4
++ vld1.32 {q8},[$key_],#16
++ subs $cnt,$cnt,#2
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ aesd $dat3,q9
++ aesimc $dat3,$dat3
++ aesd $dat4,q9
++ aesimc $dat4,$dat4
++ vld1.32 {q9},[$key_],#16
++ b.gt .Loop5x_cbc_dec
++
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ aesd $dat3,q8
++ aesimc $dat3,$dat3
++ aesd $dat4,q8
++ aesimc $dat4,$dat4
++ cmp $len,#0x40 // because .Lcbc_tail4x
++ sub $len,$len,#0x50
++
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ aesd $dat3,q9
++ aesimc $dat3,$dat3
++ aesd $dat4,q9
++ aesimc $dat4,$dat4
++ csel x6,xzr,$len,gt // borrow x6, $cnt, "gt" is not typo
++ mov $key_,$key
++
++ aesd $dat0,q10
++ aesimc $dat0,$dat0
++ aesd $dat1,q10
++ aesimc $dat1,$dat1
++ aesd $dat2,q10
++ aesimc $dat2,$dat2
++ aesd $dat3,q10
++ aesimc $dat3,$dat3
++ aesd $dat4,q10
++ aesimc $dat4,$dat4
++ add $inp,$inp,x6 // $inp is adjusted in such way that
++ // at exit from the loop $dat1-$dat4
++ // are loaded with last "words"
++ add x6,$len,#0x60 // because .Lcbc_tail4x
++
++ aesd $dat0,q11
++ aesimc $dat0,$dat0
++ aesd $dat1,q11
++ aesimc $dat1,$dat1
++ aesd $dat2,q11
++ aesimc $dat2,$dat2
++ aesd $dat3,q11
++ aesimc $dat3,$dat3
++ aesd $dat4,q11
++ aesimc $dat4,$dat4
++
++ aesd $dat0,q12
++ aesimc $dat0,$dat0
++ aesd $dat1,q12
++ aesimc $dat1,$dat1
++ aesd $dat2,q12
++ aesimc $dat2,$dat2
++ aesd $dat3,q12
++ aesimc $dat3,$dat3
++ aesd $dat4,q12
++ aesimc $dat4,$dat4
++
++ aesd $dat0,q13
++ aesimc $dat0,$dat0
++ aesd $dat1,q13
++ aesimc $dat1,$dat1
++ aesd $dat2,q13
++ aesimc $dat2,$dat2
++ aesd $dat3,q13
++ aesimc $dat3,$dat3
++ aesd $dat4,q13
++ aesimc $dat4,$dat4
++
++ aesd $dat0,q14
++ aesimc $dat0,$dat0
++ aesd $dat1,q14
++ aesimc $dat1,$dat1
++ aesd $dat2,q14
++ aesimc $dat2,$dat2
++ aesd $dat3,q14
++ aesimc $dat3,$dat3
++ aesd $dat4,q14
++ aesimc $dat4,$dat4
+
++ veor $tmp0,$ivec,$rndlast
++ aesd $dat0,q15
++ veor $tmp1,$in0,$rndlast
++ vld1.8 {$in0},[$inp],#16
++ aesd $dat1,q15
++ veor $tmp2,$in1,$rndlast
++ vld1.8 {$in1},[$inp],#16
++ aesd $dat2,q15
++ veor $tmp3,$in2,$rndlast
++ vld1.8 {$in2},[$inp],#16
++ aesd $dat3,q15
++ veor $tmp4,$in3,$rndlast
++ vld1.8 {$in3},[$inp],#16
++ aesd $dat4,q15
++ vorr $ivec,$in4,$in4
++ vld1.8 {$in4},[$inp],#16
++ cbz x6,.Lcbc_tail4x
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ veor $tmp0,$tmp0,$dat0
++ vorr $dat0,$in0,$in0
++ veor $tmp1,$tmp1,$dat1
++ vorr $dat1,$in1,$in1
++ veor $tmp2,$tmp2,$dat2
++ vorr $dat2,$in2,$in2
++ veor $tmp3,$tmp3,$dat3
++ vorr $dat3,$in3,$in3
++ veor $tmp4,$tmp4,$dat4
++ vst1.8 {$tmp0},[$out],#16
++ vorr $dat4,$in4,$in4
++ vst1.8 {$tmp1},[$out],#16
++ mov $cnt,$rounds
++ vst1.8 {$tmp2},[$out],#16
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ vst1.8 {$tmp3},[$out],#16
++ vst1.8 {$tmp4},[$out],#16
++ b.hs .Loop5x_cbc_dec
++
++ add $len,$len,#0x50
++ cbz $len,.Lcbc_done
++
++ add $cnt,$rounds,#2
++ subs $len,$len,#0x30
++ vorr $dat0,$in2,$in2
++ vorr $in0,$in2,$in2
++ vorr $dat1,$in3,$in3
++ vorr $in1,$in3,$in3
++ vorr $dat2,$in4,$in4
++ vorr $in2,$in4,$in4
++ b.lo .Lcbc_dec_tail
++
++ b .Loop3x_cbc_dec
++
++.align 4
++.Lcbc_tail4x:
++ veor $tmp1,$tmp0,$dat1
++ veor $tmp2,$tmp2,$dat2
++ veor $tmp3,$tmp3,$dat3
++ veor $tmp4,$tmp4,$dat4
++ vst1.8 {$tmp1},[$out],#16
++ vst1.8 {$tmp2},[$out],#16
++ vst1.8 {$tmp3},[$out],#16
++ vst1.8 {$tmp4},[$out],#16
++
++ b .Lcbc_done
++.align 4
++___
++$code.=<<___;
+ .Loop3x_cbc_dec:
+ aesd $dat0,q8
+ aesimc $dat0,$dat0
+@@ -696,6 +1750,9 @@ my $step="x12"; # aliases with $tctr2
+ my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
+ my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
+
++# used only in 64-bit mode...
++my ($dat3,$dat4,$in3,$in4)=map("q$_",(16..23));
++
+ my ($dat,$tmp)=($dat0,$tmp0);
+
+ ### q8-q15 preloaded key schedule
+@@ -751,6 +1808,175 @@ $code.=<<___;
+ vmov.32 ${ivec}[3],$tctr2
+ sub $len,$len,#3 // bias
+ vorr $dat2,$ivec,$ivec
++___
++$code.=<<___ if ($flavour =~ /64/);
++ cmp $len,#2
++ b.lo .Loop3x_ctr32
++
++ add w13,$ctr,#1
++ add w14,$ctr,#2
++ vorr $dat3,$dat0,$dat0
++ rev w13,w13
++ vorr $dat4,$dat0,$dat0
++ rev w14,w14
++ vmov.32 ${dat3}[3],w13
++ sub $len,$len,#2 // bias
++ vmov.32 ${dat4}[3],w14
++ add $ctr,$ctr,#2
++ b .Loop5x_ctr32
++
++.align 4
++.Loop5x_ctr32:
+ aese $dat0,q8
+ aesmc $dat0,$dat0
+ aese $dat1,q8
@@ -354,135 +1191,1568 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
+ vld1.32 {q9},[$key_],#16
+ b.gt .Loop5x_ctr32
+
-+ mov $key_,$key
-+ aese $dat0,q8
-+ aesmc $dat0,$dat0
-+ aese $dat1,q8
-+ aesmc $dat1,$dat1
-+ aese $dat2,q8
-+ aesmc $dat2,$dat2
-+ aese $dat3,q8
-+ aesmc $dat3,$dat3
-+ aese $dat4,q8
-+ aesmc $dat4,$dat4
-+ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ mov $key_,$key
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ aese $dat3,q8
++ aesmc $dat3,$dat3
++ aese $dat4,q8
++ aesmc $dat4,$dat4
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ aese $dat3,q9
++ aesmc $dat3,$dat3
++ aese $dat4,q9
++ aesmc $dat4,$dat4
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++
++ aese $dat0,q12
++ aesmc $dat0,$dat0
++ add $tctr0,$ctr,#1
++ add $tctr1,$ctr,#2
++ aese $dat1,q12
++ aesmc $dat1,$dat1
++ add $tctr2,$ctr,#3
++ add w13,$ctr,#4
++ aese $dat2,q12
++ aesmc $dat2,$dat2
++ add w14,$ctr,#5
++ rev $tctr0,$tctr0
++ aese $dat3,q12
++ aesmc $dat3,$dat3
++ rev $tctr1,$tctr1
++ rev $tctr2,$tctr2
++ aese $dat4,q12
++ aesmc $dat4,$dat4
++ rev w13,w13
++ rev w14,w14
++
++ aese $dat0,q13
++ aesmc $dat0,$dat0
++ aese $dat1,q13
++ aesmc $dat1,$dat1
++ aese $dat2,q13
++ aesmc $dat2,$dat2
++ aese $dat3,q13
++ aesmc $dat3,$dat3
++ aese $dat4,q13
++ aesmc $dat4,$dat4
++
++ aese $dat0,q14
++ aesmc $dat0,$dat0
++ vld1.8 {$in0},[$inp],#16
++ aese $dat1,q14
++ aesmc $dat1,$dat1
++ vld1.8 {$in1},[$inp],#16
++ aese $dat2,q14
++ aesmc $dat2,$dat2
++ vld1.8 {$in2},[$inp],#16
++ aese $dat3,q14
++ aesmc $dat3,$dat3
++ vld1.8 {$in3},[$inp],#16
++ aese $dat4,q14
++ aesmc $dat4,$dat4
++ vld1.8 {$in4},[$inp],#16
++
++ aese $dat0,q15
++ veor $in0,$in0,$rndlast
++ aese $dat1,q15
++ veor $in1,$in1,$rndlast
++ aese $dat2,q15
++ veor $in2,$in2,$rndlast
++ aese $dat3,q15
++ veor $in3,$in3,$rndlast
++ aese $dat4,q15
++ veor $in4,$in4,$rndlast
++
++ veor $in0,$in0,$dat0
++ vorr $dat0,$ivec,$ivec
++ veor $in1,$in1,$dat1
++ vorr $dat1,$ivec,$ivec
++ veor $in2,$in2,$dat2
++ vorr $dat2,$ivec,$ivec
++ veor $in3,$in3,$dat3
++ vorr $dat3,$ivec,$ivec
++ veor $in4,$in4,$dat4
++ vorr $dat4,$ivec,$ivec
++
++ vst1.8 {$in0},[$out],#16
++ vmov.32 ${dat0}[3],$tctr0
++ vst1.8 {$in1},[$out],#16
++ vmov.32 ${dat1}[3],$tctr1
++ vst1.8 {$in2},[$out],#16
++ vmov.32 ${dat2}[3],$tctr2
++ vst1.8 {$in3},[$out],#16
++ vmov.32 ${dat3}[3],w13
++ vst1.8 {$in4},[$out],#16
++ vmov.32 ${dat4}[3],w14
++
++ mov $cnt,$rounds
++ cbz $len,.Lctr32_done
++
++ add $ctr,$ctr,#5
++ subs $len,$len,#5
++ b.hs .Loop5x_ctr32
++
++ add $len,$len,#5
++ sub $ctr,$ctr,#5
++
++ cmp $len,#2
++ mov $step,#16
++ cclr $step,lo
++ b.ls .Lctr32_tail
++
++ sub $len,$len,#3 // bias
++ add $ctr,$ctr,#3
++___
++$code.=<<___;
+ b .Loop3x_ctr32
+
+ .align 4
+@@ -905,6 +2131,1432 @@ $code.=<<___;
+ .size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
+ ___
+ }}}
++# Performance in cycles per byte.
++# Processed with AES-XTS different key size.
++# It shows the value before and after optimization as below:
++# (before/after):
++#
++# AES-128-XTS AES-256-XTS
++# Cortex-A57 3.36/1.09 4.02/1.37
++# Cortex-A72 3.03/1.02 3.28/1.33
++
++# Optimization is implemented by loop unrolling and interleaving.
++# Commonly, we choose the unrolling factor as 5, if the input
++# data size smaller than 5 blocks, but not smaller than 3 blocks,
++# choose 3 as the unrolling factor.
++# If the input data size dsize >= 5*16 bytes, then take 5 blocks
++# as one iteration, every loop the left size lsize -= 5*16.
++# If lsize < 5*16 bytes, treat them as the tail. Note: left 4*16 bytes
++# will be processed specially, which be integrated into the 5*16 bytes
++# loop to improve the efficiency.
++# There is one special case, if the original input data size dsize
++# = 16 bytes, we will treat it seperately to improve the
++# performance: one independent code block without LR, FP load and
++# store.
++# Encryption will process the (length -tailcnt) bytes as mentioned
++# previously, then encrypt the composite block as last second
++# cipher block.
++# Decryption will process the (length -tailcnt -1) bytes as mentioned
++# previously, then decrypt the last second cipher block to get the
++# last plain block(tail), decrypt the composite block as last second
++# plain text block.
++
++{{{
++my ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5));
++my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10");
++my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20");
++my ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19");
++my ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11");
++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
++my ($iv0,$iv1,$iv2,$iv3,$iv4)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b");
++my ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]");
++my ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]");
++
++my ($tmpin)=("v26.16b");
++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
++
++# q7 last round key
++# q10-q15, q7 Last 7 round keys
++# q8-q9 preloaded round keys except last 7 keys for big size
++# q20, q21, q8-q9 preloaded round keys except last 7 keys for only 16 byte
++
++
++my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
++
++my ($dat3,$in3,$tmp3); # used only in 64-bit mode
++my ($dat4,$in4,$tmp4);
++if ($flavour =~ /64/) {
++ ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
++}
++
++$code.=<<___ if ($flavour =~ /64/);
++.globl ${prefix}_xts_encrypt
++.type ${prefix}_xts_encrypt,%function
++.align 5
++${prefix}_xts_encrypt:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ cmp $len,#16
++ // Original input data size bigger than 16, jump to big size processing.
++ b.ne .Lxts_enc_big_size
++ // Encrypt the iv with key2, as the first XEX iv.
++ ldr $rounds,[$key2,#240]
++ vld1.8 {$dat},[$key2],#16
++ vld1.8 {$iv0},[$ivp]
++ sub $rounds,$rounds,#2
++ vld1.8 {$dat1},[$key2],#16
++
++.Loop_enc_iv_enc:
++ aese $iv0,$dat
++ aesmc $iv0,$iv0
++ vld1.32 {$dat},[$key2],#16
++ subs $rounds,$rounds,#2
++ aese $iv0,$dat1
++ aesmc $iv0,$iv0
++ vld1.32 {$dat1},[$key2],#16
++ b.gt .Loop_enc_iv_enc
++
++ aese $iv0,$dat
++ aesmc $iv0,$iv0
++ vld1.32 {$dat},[$key2]
++ aese $iv0,$dat1
++ veor $iv0,$iv0,$dat
++
++ vld1.8 {$dat0},[$inp]
++ veor $dat0,$iv0,$dat0
++
++ ldr $rounds,[$key1,#240]
++ vld1.32 {q20-q21},[$key1],#32 // load key schedule...
++
++ aese $dat0,q20
++ aesmc $dat0,$dat0
++ vld1.32 {q8-q9},[$key1],#32 // load key schedule...
++ aese $dat0,q21
++ aesmc $dat0,$dat0
++ subs $rounds,$rounds,#10 // if rounds==10, jump to aes-128-xts processing
++ b.eq .Lxts_128_enc
++.Lxts_enc_round_loop:
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ vld1.32 {q8},[$key1],#16 // load key schedule...
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ vld1.32 {q9},[$key1],#16 // load key schedule...
++ subs $rounds,$rounds,#2 // bias
++ b.gt .Lxts_enc_round_loop
++.Lxts_128_enc:
++ vld1.32 {q10-q11},[$key1],#32 // load key schedule...
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ vld1.32 {q12-q13},[$key1],#32 // load key schedule...
++ aese $dat0,q10
++ aesmc $dat0,$dat0
++ aese $dat0,q11
++ aesmc $dat0,$dat0
++ vld1.32 {q14-q15},[$key1],#32 // load key schedule...
++ aese $dat0,q12
++ aesmc $dat0,$dat0
++ aese $dat0,q13
++ aesmc $dat0,$dat0
++ vld1.32 {$rndlast},[$key1]
++ aese $dat0,q14
++ aesmc $dat0,$dat0
++ aese $dat0,q15
++ veor $dat0,$dat0,$rndlast
++ veor $dat0,$dat0,$iv0
++ vst1.8 {$dat0},[$out]
++ b .Lxts_enc_final_abort
++
++.align 4
++.Lxts_enc_big_size:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ stp $constnumx,$tmpinp,[sp,#-64]!
++ stp $tailcnt,$midnumx,[sp,#48]
++ stp $ivd10,$ivd20,[sp,#32]
++ stp $ivd30,$ivd40,[sp,#16]
++
++ // tailcnt store the tail value of length%16.
++ and $tailcnt,$len,#0xf
++ and $len,$len,#-16
++ subs $len,$len,#16
++ mov $step,#16
++ b.lo .Lxts_abort
++ csel $step,xzr,$step,eq
++
++ // Firstly, encrypt the iv with key2, as the first iv of XEX.
++ ldr $rounds,[$key2,#240]
++ vld1.32 {$dat},[$key2],#16
++ vld1.8 {$iv0},[$ivp]
++ sub $rounds,$rounds,#2
++ vld1.32 {$dat1},[$key2],#16
++
++.Loop_iv_enc:
++ aese $iv0,$dat
++ aesmc $iv0,$iv0
++ vld1.32 {$dat},[$key2],#16
++ subs $rounds,$rounds,#2
++ aese $iv0,$dat1
++ aesmc $iv0,$iv0
++ vld1.32 {$dat1},[$key2],#16
++ b.gt .Loop_iv_enc
++
++ aese $iv0,$dat
++ aesmc $iv0,$iv0
++ vld1.32 {$dat},[$key2]
++ aese $iv0,$dat1
++ veor $iv0,$iv0,$dat
++
++ // The iv for second block
++ // $ivl- iv(low), $ivh - iv(high)
++ // the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4
++ fmov $ivl,$ivd00
++ fmov $ivh,$ivd01
++ mov $constnum,#0x87
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr#31
++ eor $ivl,$tmpmx,$ivl,lsl#1
++ fmov $ivd10,$ivl
++ fmov $ivd11,$ivh
++
++ ldr $rounds0,[$key1,#240] // next starting point
++ vld1.8 {$dat},[$inp],$step
++
++ vld1.32 {q8-q9},[$key1] // load key schedule...
++ sub $rounds0,$rounds0,#6
++ add $key_,$key1,$ivp,lsl#4 // pointer to last 7 round keys
++ sub $rounds0,$rounds0,#2
++ vld1.32 {q10-q11},[$key_],#32
++ vld1.32 {q12-q13},[$key_],#32
++ vld1.32 {q14-q15},[$key_],#32
++ vld1.32 {$rndlast},[$key_]
++
++ add $key_,$key1,#32
++ mov $rounds,$rounds0
++
++ // Encryption
++.Lxts_enc:
++ vld1.8 {$dat2},[$inp],#16
++ subs $len,$len,#32 // bias
++ add $rounds,$rounds0,#2
++ vorr $in1,$dat,$dat
++ vorr $dat1,$dat,$dat
++ vorr $in3,$dat,$dat
++ vorr $in2,$dat2,$dat2
++ vorr $in4,$dat2,$dat2
++ b.lo .Lxts_inner_enc_tail
++ veor $dat,$dat,$iv0 // before encryption, xor with iv
++ veor $dat2,$dat2,$iv1
++
++ // The iv for third block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr#31
++ eor $ivl,$tmpmx,$ivl,lsl#1
++ fmov $ivd20,$ivl
++ fmov $ivd21,$ivh
++
++
++ vorr $dat1,$dat2,$dat2
++ vld1.8 {$dat2},[$inp],#16
++ vorr $in0,$dat,$dat
++ vorr $in1,$dat1,$dat1
++ veor $in2,$dat2,$iv2 // the third block
++ veor $dat2,$dat2,$iv2
++ cmp $len,#32
++ b.lo .Lxts_outer_enc_tail
++
++ // The iv for fourth block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr#31
++ eor $ivl,$tmpmx,$ivl,lsl#1
++ fmov $ivd30,$ivl
++ fmov $ivd31,$ivh
++
++ vld1.8 {$dat3},[$inp],#16
++ // The iv for fifth block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr#31
++ eor $ivl,$tmpmx,$ivl,lsl#1
++ fmov $ivd40,$ivl
++ fmov $ivd41,$ivh
++
++ vld1.8 {$dat4},[$inp],#16
++ veor $dat3,$dat3,$iv3 // the fourth block
++ veor $dat4,$dat4,$iv4
++ sub $len,$len,#32 // bias
++ mov $rounds,$rounds0
++ b .Loop5x_xts_enc
++
++.align 4
++.Loop5x_xts_enc:
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ aese $dat3,q8
++ aesmc $dat3,$dat3
++ aese $dat4,q8
++ aesmc $dat4,$dat4
++ vld1.32 {q8},[$key_],#16
++ subs $rounds,$rounds,#2
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ aese $dat3,q9
++ aesmc $dat3,$dat3
++ aese $dat4,q9
++ aesmc $dat4,$dat4
++ vld1.32 {q9},[$key_],#16
++ b.gt .Loop5x_xts_enc
++
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ aese $dat3,q8
++ aesmc $dat3,$dat3
++ aese $dat4,q8
++ aesmc $dat4,$dat4
++ subs $len,$len,#0x50 // because .Lxts_enc_tail4x
++
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ aese $dat3,q9
++ aesmc $dat3,$dat3
++ aese $dat4,q9
++ aesmc $dat4,$dat4
++ csel $xoffset,xzr,$len,gt // borrow x6, w6, "gt" is not typo
++ mov $key_,$key1
++
++ aese $dat0,q10
++ aesmc $dat0,$dat0
++ aese $dat1,q10
++ aesmc $dat1,$dat1
++ aese $dat2,q10
++ aesmc $dat2,$dat2
++ aese $dat3,q10
++ aesmc $dat3,$dat3
++ aese $dat4,q10
++ aesmc $dat4,$dat4
++ add $inp,$inp,$xoffset // x0 is adjusted in such way that
++ // at exit from the loop v1.16b-v26.16b
++ // are loaded with last "words"
++ add $xoffset,$len,#0x60 // because .Lxts_enc_tail4x
++
++ aese $dat0,q11
++ aesmc $dat0,$dat0
++ aese $dat1,q11
++ aesmc $dat1,$dat1
++ aese $dat2,q11
++ aesmc $dat2,$dat2
++ aese $dat3,q11
++ aesmc $dat3,$dat3
++ aese $dat4,q11
++ aesmc $dat4,$dat4
++
++ aese $dat0,q12
++ aesmc $dat0,$dat0
++ aese $dat1,q12
++ aesmc $dat1,$dat1
++ aese $dat2,q12
++ aesmc $dat2,$dat2
++ aese $dat3,q12
++ aesmc $dat3,$dat3
++ aese $dat4,q12
++ aesmc $dat4,$dat4
++
++ aese $dat0,q13
++ aesmc $dat0,$dat0
++ aese $dat1,q13
++ aesmc $dat1,$dat1
++ aese $dat2,q13
++ aesmc $dat2,$dat2
++ aese $dat3,q13
++ aesmc $dat3,$dat3
++ aese $dat4,q13
++ aesmc $dat4,$dat4
++
++ aese $dat0,q14
++ aesmc $dat0,$dat0
++ aese $dat1,q14
++ aesmc $dat1,$dat1
++ aese $dat2,q14
++ aesmc $dat2,$dat2
++ aese $dat3,q14
++ aesmc $dat3,$dat3
++ aese $dat4,q14
++ aesmc $dat4,$dat4
++
++ veor $tmp0,$rndlast,$iv0
++ aese $dat0,q15
++ // The iv for first block of one iteration
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr#31
++ eor $ivl,$tmpmx,$ivl,lsl#1
++ fmov $ivd00,$ivl
++ fmov $ivd01,$ivh
++ veor $tmp1,$rndlast,$iv1
++ vld1.8 {$in0},[$inp],#16
++ aese $dat1,q15
++ // The iv for second block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr#31
++ eor $ivl,$tmpmx,$ivl,lsl#1
++ fmov $ivd10,$ivl
++ fmov $ivd11,$ivh
++ veor $tmp2,$rndlast,$iv2
++ vld1.8 {$in1},[$inp],#16
++ aese $dat2,q15
++ // The iv for third block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr#31
++ eor $ivl,$tmpmx,$ivl,lsl#1
++ fmov $ivd20,$ivl
++ fmov $ivd21,$ivh
++ veor $tmp3,$rndlast,$iv3
++ vld1.8 {$in2},[$inp],#16
++ aese $dat3,q15
++ // The iv for fourth block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr#31
++ eor $ivl,$tmpmx,$ivl,lsl#1
++ fmov $ivd30,$ivl
++ fmov $ivd31,$ivh
++ veor $tmp4,$rndlast,$iv4
++ vld1.8 {$in3},[$inp],#16
++ aese $dat4,q15
++
++ // The iv for fifth block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd40,$ivl
++ fmov $ivd41,$ivh
++
++ vld1.8 {$in4},[$inp],#16
++ cbz $xoffset,.Lxts_enc_tail4x
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ veor $tmp0,$tmp0,$dat0
++ veor $dat0,$in0,$iv0
++ veor $tmp1,$tmp1,$dat1
++ veor $dat1,$in1,$iv1
++ veor $tmp2,$tmp2,$dat2
++ veor $dat2,$in2,$iv2
++ veor $tmp3,$tmp3,$dat3
++ veor $dat3,$in3,$iv3
++ veor $tmp4,$tmp4,$dat4
++ vst1.8 {$tmp0},[$out],#16
++ veor $dat4,$in4,$iv4
++ vst1.8 {$tmp1},[$out],#16
++ mov $rounds,$rounds0
++ vst1.8 {$tmp2},[$out],#16
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ vst1.8 {$tmp3},[$out],#16
++ vst1.8 {$tmp4},[$out],#16
++ b.hs .Loop5x_xts_enc
++
++
++ // If left 4 blocks, borrow the five block's processing.
++ cmn $len,#0x10
++ b.ne .Loop5x_enc_after
++ vorr $iv4,$iv3,$iv3
++ vorr $iv3,$iv2,$iv2
++ vorr $iv2,$iv1,$iv1
++ vorr $iv1,$iv0,$iv0
++ fmov $ivl,$ivd40
++ fmov $ivh,$ivd41
++ veor $dat0,$iv0,$in0
++ veor $dat1,$iv1,$in1
++ veor $dat2,$in2,$iv2
++ veor $dat3,$in3,$iv3
++ veor $dat4,$in4,$iv4
++ b.eq .Loop5x_xts_enc
++
++.Loop5x_enc_after:
++ add $len,$len,#0x50
++ cbz $len,.Lxts_enc_done
++
++ add $rounds,$rounds0,#2
++ subs $len,$len,#0x30
++ b.lo .Lxts_inner_enc_tail
++
++ veor $dat0,$iv0,$in2
++ veor $dat1,$iv1,$in3
++ veor $dat2,$in4,$iv2
++ b .Lxts_outer_enc_tail
++
++.align 4
++.Lxts_enc_tail4x:
++ add $inp,$inp,#16
++ veor $tmp1,$dat1,$tmp1
++ vst1.8 {$tmp1},[$out],#16
++ veor $tmp2,$dat2,$tmp2
++ vst1.8 {$tmp2},[$out],#16
++ veor $tmp3,$dat3,$tmp3
++ veor $tmp4,$dat4,$tmp4
++ vst1.8 {$tmp3-$tmp4},[$out],#32
++
++ b .Lxts_enc_done
++.align 4
++.Lxts_outer_enc_tail:
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ vld1.32 {q8},[$key_],#16
++ subs $rounds,$rounds,#2
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ vld1.32 {q9},[$key_],#16
++ b.gt .Lxts_outer_enc_tail
++
++ aese $dat0,q8
++ aesmc $dat0,$dat0
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ veor $tmp0,$iv0,$rndlast
++ subs $len,$len,#0x30
++ // The iv for first block
++ fmov $ivl,$ivd20
++ fmov $ivh,$ivd21
++ //mov $constnum,#0x87
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr#31
++ eor $ivl,$tmpmx,$ivl,lsl#1
++ fmov $ivd00,$ivl
++ fmov $ivd01,$ivh
++ veor $tmp1,$iv1,$rndlast
++ csel $xoffset,$len,$xoffset,lo // x6, w6, is zero at this point
++ aese $dat0,q9
++ aesmc $dat0,$dat0
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ veor $tmp2,$iv2,$rndlast
++
++ add $xoffset,$xoffset,#0x20
++ add $inp,$inp,$xoffset
++ mov $key_,$key1
++
++ aese $dat0,q12
++ aesmc $dat0,$dat0
++ aese $dat1,q12
++ aesmc $dat1,$dat1
++ aese $dat2,q12
++ aesmc $dat2,$dat2
++ aese $dat0,q13
++ aesmc $dat0,$dat0
++ aese $dat1,q13
++ aesmc $dat1,$dat1
++ aese $dat2,q13
++ aesmc $dat2,$dat2
++ aese $dat0,q14
++ aesmc $dat0,$dat0
++ aese $dat1,q14
++ aesmc $dat1,$dat1
++ aese $dat2,q14
++ aesmc $dat2,$dat2
++ aese $dat0,q15
++ aese $dat1,q15
++ aese $dat2,q15
++ vld1.8 {$in2},[$inp],#16
++ add $rounds,$rounds0,#2
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ veor $tmp0,$tmp0,$dat0
++ veor $tmp1,$tmp1,$dat1
++ veor $dat2,$dat2,$tmp2
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ vst1.8 {$tmp0},[$out],#16
++ vst1.8 {$tmp1},[$out],#16
++ vst1.8 {$dat2},[$out],#16
++ cmn $len,#0x30
++ b.eq .Lxts_enc_done
++.Lxts_encxor_one:
++ vorr $in3,$in1,$in1
++ vorr $in4,$in2,$in2
++ nop
++
++.Lxts_inner_enc_tail:
++ cmn $len,#0x10
++ veor $dat1,$in3,$iv0
++ veor $dat2,$in4,$iv1
++ b.eq .Lxts_enc_tail_loop
++ veor $dat2,$in4,$iv0
++.Lxts_enc_tail_loop:
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ vld1.32 {q8},[$key_],#16
++ subs $rounds,$rounds,#2
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ vld1.32 {q9},[$key_],#16
++ b.gt .Lxts_enc_tail_loop
++
++ aese $dat1,q8
++ aesmc $dat1,$dat1
++ aese $dat2,q8
++ aesmc $dat2,$dat2
++ aese $dat1,q9
++ aesmc $dat1,$dat1
++ aese $dat2,q9
++ aesmc $dat2,$dat2
++ aese $dat1,q12
++ aesmc $dat1,$dat1
++ aese $dat2,q12
++ aesmc $dat2,$dat2
++ cmn $len,#0x20
++ aese $dat1,q13
++ aesmc $dat1,$dat1
++ aese $dat2,q13
++ aesmc $dat2,$dat2
++ veor $tmp1,$iv0,$rndlast
++ aese $dat1,q14
++ aesmc $dat1,$dat1
++ aese $dat2,q14
++ aesmc $dat2,$dat2
++ veor $tmp2,$iv1,$rndlast
++ aese $dat1,q15
++ aese $dat2,q15
++ b.eq .Lxts_enc_one
++ veor $tmp1,$tmp1,$dat1
++ vst1.8 {$tmp1},[$out],#16
++ veor $tmp2,$tmp2,$dat2
++ vorr $iv0,$iv1,$iv1
++ vst1.8 {$tmp2},[$out],#16
++ fmov $ivl,$ivd10
++ fmov $ivh,$ivd11
++ mov $constnum,#0x87
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd00,$ivl
++ fmov $ivd01,$ivh
++ b .Lxts_enc_done
++
++.Lxts_enc_one:
++ veor $tmp1,$tmp1,$dat2
++ vorr $iv0,$iv0,$iv0
++ vst1.8 {$tmp1},[$out],#16
++ fmov $ivl,$ivd00
++ fmov $ivh,$ivd01
++ mov $constnum,#0x87
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd00,$ivl
++ fmov $ivd01,$ivh
++ b .Lxts_enc_done
++.align 5
++.Lxts_enc_done:
++ // Process the tail block with cipher stealing.
++ tst $tailcnt,#0xf
++ b.eq .Lxts_abort
++
++ mov $tmpinp,$inp
++ mov $tmpoutp,$out
++ sub $out,$out,#16
++.composite_enc_loop:
++ subs $tailcnt,$tailcnt,#1
++ ldrb $l2outp,[$out,$tailcnt]
++ ldrb $loutp,[$tmpinp,$tailcnt]
++ strb $l2outp,[$tmpoutp,$tailcnt]
++ strb $loutp,[$out,$tailcnt]
++ b.gt .composite_enc_loop
++.Lxts_enc_load_done:
++ vld1.8 {$tmpin},[$out]
++ veor $tmpin,$tmpin,$iv0
++
++ // Encrypt the composite block to get the last second encrypted text block
++ ldr $rounds,[$key1,#240] // load key schedule...
++ vld1.8 {$dat},[$key1],#16
++ sub $rounds,$rounds,#2
++ vld1.8 {$dat1},[$key1],#16 // load key schedule...
++.Loop_final_enc:
++ aese $tmpin,$dat0
++ aesmc $tmpin,$tmpin
++ vld1.32 {$dat0},[$key1],#16
++ subs $rounds,$rounds,#2
++ aese $tmpin,$dat1
++ aesmc $tmpin,$tmpin
++ vld1.32 {$dat1},[$key1],#16
++ b.gt .Loop_final_enc
++
++ aese $tmpin,$dat0
++ aesmc $tmpin,$tmpin
++ vld1.32 {$dat0},[$key1]
++ aese $tmpin,$dat1
++ veor $tmpin,$tmpin,$dat0
++ veor $tmpin,$tmpin,$iv0
++ vst1.8 {$tmpin},[$out]
++
++.Lxts_abort:
++ ldp $tailcnt,$midnumx,[sp,#48]
++ ldp $ivd10,$ivd20,[sp,#32]
++ ldp $ivd30,$ivd40,[sp,#16]
++ ldp $constnumx,$tmpinp,[sp],#64
++.Lxts_enc_final_abort:
++ ret
++.size ${prefix}_xts_encrypt,.-${prefix}_xts_encrypt
++___
++
++}}}
++{{{
++my ($inp,$out,$len,$key1,$key2,$ivp)=map("x$_",(0..5));
++my ($rounds0,$rounds,$key_,$step,$ivl,$ivh)=("w5","w6","x7","x8","x9","x10");
++my ($tmpoutp,$loutp,$l2outp,$tmpinp)=("x13","w14","w15","x20");
++my ($tailcnt,$midnum,$midnumx,$constnum,$constnumx)=("x21","w22","x22","w19","x19");
++my ($xoffset,$tmpmx,$tmpmw)=("x6","x11","w11");
++my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$tmp2,$rndlast)=map("q$_",(0..7));
++my ($iv0,$iv1,$iv2,$iv3,$iv4,$tmpin)=("v6.16b","v8.16b","v9.16b","v10.16b","v11.16b","v26.16b");
++my ($ivd00,$ivd01,$ivd20,$ivd21)=("d6","v6.d[1]","d9","v9.d[1]");
++my ($ivd10,$ivd11,$ivd30,$ivd31,$ivd40,$ivd41)=("d8","v8.d[1]","d10","v10.d[1]","d11","v11.d[1]");
++
++my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
++
++# q7 last round key
++# q10-q15, q7 Last 7 round keys
++# q8-q9 preloaded round keys except last 7 keys for big size
++# q20, q21, q8-q9 preloaded round keys except last 7 keys for only 16 byte
++
++{
++my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
++
++my ($dat3,$in3,$tmp3); # used only in 64-bit mode
++my ($dat4,$in4,$tmp4);
++if ($flavour =~ /64/) {
++ ($dat2,$dat3,$dat4,$in2,$in3,$in4,$tmp3,$tmp4)=map("q$_",(16..23));
++}
++
++$code.=<<___ if ($flavour =~ /64/);
++.globl ${prefix}_xts_decrypt
++.type ${prefix}_xts_decrypt,%function
++.align 5
++${prefix}_xts_decrypt:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ cmp $len,#16
++ // Original input data size bigger than 16, jump to big size processing.
++ b.ne .Lxts_dec_big_size
++ // Encrypt the iv with key2, as the first XEX iv.
++ ldr $rounds,[$key2,#240]
++ vld1.8 {$dat},[$key2],#16
++ vld1.8 {$iv0},[$ivp]
++ sub $rounds,$rounds,#2
++ vld1.8 {$dat1},[$key2],#16
++
++.Loop_dec_small_iv_enc:
++ aese $iv0,$dat
++ aesmc $iv0,$iv0
++ vld1.32 {$dat},[$key2],#16
++ subs $rounds,$rounds,#2
++ aese $iv0,$dat1
++ aesmc $iv0,$iv0
++ vld1.32 {$dat1},[$key2],#16
++ b.gt .Loop_dec_small_iv_enc
++
++ aese $iv0,$dat
++ aesmc $iv0,$iv0
++ vld1.32 {$dat},[$key2]
++ aese $iv0,$dat1
++ veor $iv0,$iv0,$dat
++
++ vld1.8 {$dat0},[$inp]
++ veor $dat0,$iv0,$dat0
++
++ ldr $rounds,[$key1,#240]
++ vld1.32 {q20-q21},[$key1],#32 // load key schedule...
++
++ aesd $dat0,q20
++ aesimc $dat0,$dat0
++ vld1.32 {q8-q9},[$key1],#32 // load key schedule...
++ aesd $dat0,q21
++ aesimc $dat0,$dat0
++ subs $rounds,$rounds,#10 // bias
++ b.eq .Lxts_128_dec
++.Lxts_dec_round_loop:
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ vld1.32 {q8},[$key1],#16 // load key schedule...
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ vld1.32 {q9},[$key1],#16 // load key schedule...
++ subs $rounds,$rounds,#2 // bias
++ b.gt .Lxts_dec_round_loop
++.Lxts_128_dec:
++ vld1.32 {q10-q11},[$key1],#32 // load key schedule...
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ vld1.32 {q12-q13},[$key1],#32 // load key schedule...
++ aesd $dat0,q10
++ aesimc $dat0,$dat0
++ aesd $dat0,q11
++ aesimc $dat0,$dat0
++ vld1.32 {q14-q15},[$key1],#32 // load key schedule...
++ aesd $dat0,q12
++ aesimc $dat0,$dat0
++ aesd $dat0,q13
++ aesimc $dat0,$dat0
++ vld1.32 {$rndlast},[$key1]
++ aesd $dat0,q14
++ aesimc $dat0,$dat0
++ aesd $dat0,q15
++ veor $dat0,$dat0,$rndlast
++ veor $dat0,$iv0,$dat0
++ vst1.8 {$dat0},[$out]
++ b .Lxts_dec_final_abort
++.Lxts_dec_big_size:
++___
++$code.=<<___ if ($flavour =~ /64/);
++ stp $constnumx,$tmpinp,[sp,#-64]!
++ stp $tailcnt,$midnumx,[sp,#48]
++ stp $ivd10,$ivd20,[sp,#32]
++ stp $ivd30,$ivd40,[sp,#16]
++
++ and $tailcnt,$len,#0xf
++ and $len,$len,#-16
++ subs $len,$len,#16
++ mov $step,#16
++ b.lo .Lxts_dec_abort
++
++ // Encrypt the iv with key2, as the first XEX iv
++ ldr $rounds,[$key2,#240]
++ vld1.8 {$dat},[$key2],#16
++ vld1.8 {$iv0},[$ivp]
++ sub $rounds,$rounds,#2
++ vld1.8 {$dat1},[$key2],#16
++
++.Loop_dec_iv_enc:
++ aese $iv0,$dat
++ aesmc $iv0,$iv0
++ vld1.32 {$dat},[$key2],#16
++ subs $rounds,$rounds,#2
++ aese $iv0,$dat1
++ aesmc $iv0,$iv0
++ vld1.32 {$dat1},[$key2],#16
++ b.gt .Loop_dec_iv_enc
++
++ aese $iv0,$dat
++ aesmc $iv0,$iv0
++ vld1.32 {$dat},[$key2]
++ aese $iv0,$dat1
++ veor $iv0,$iv0,$dat
++
++ // The iv for second block
++ // $ivl- iv(low), $ivh - iv(high)
++ // the five ivs stored into, $iv0,$iv1,$iv2,$iv3,$iv4
++ fmov $ivl,$ivd00
++ fmov $ivh,$ivd01
++ mov $constnum,#0x87
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd10,$ivl
++ fmov $ivd11,$ivh
++
++ ldr $rounds0,[$key1,#240] // load rounds number
++
++ // The iv for third block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd20,$ivl
++ fmov $ivd21,$ivh
++
++ vld1.32 {q8-q9},[$key1] // load key schedule...
++ sub $rounds0,$rounds0,#6
++ add $key_,$key1,$ivp,lsl#4 // pointer to last 7 round keys
++ sub $rounds0,$rounds0,#2
++ vld1.32 {q10-q11},[$key_],#32 // load key schedule...
++ vld1.32 {q12-q13},[$key_],#32
++ vld1.32 {q14-q15},[$key_],#32
++ vld1.32 {$rndlast},[$key_]
++
++ // The iv for fourth block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd30,$ivl
++ fmov $ivd31,$ivh
++
++ add $key_,$key1,#32
++ mov $rounds,$rounds0
++ b .Lxts_dec
++
++ // Decryption
++.align 5
++.Lxts_dec:
++ tst $tailcnt,#0xf
++ b.eq .Lxts_dec_begin
++ subs $len,$len,#16
++ csel $step,xzr,$step,eq
++ vld1.8 {$dat},[$inp],#16
++ b.lo .Lxts_done
++ sub $inp,$inp,#16
++.Lxts_dec_begin:
++ vld1.8 {$dat},[$inp],$step
++ subs $len,$len,#32 // bias
++ add $rounds,$rounds0,#2
++ vorr $in1,$dat,$dat
++ vorr $dat1,$dat,$dat
++ vorr $in3,$dat,$dat
++ vld1.8 {$dat2},[$inp],#16
++ vorr $in2,$dat2,$dat2
++ vorr $in4,$dat2,$dat2
++ b.lo .Lxts_inner_dec_tail
++ veor $dat,$dat,$iv0 // before decryt, xor with iv
++ veor $dat2,$dat2,$iv1
++
++ vorr $dat1,$dat2,$dat2
++ vld1.8 {$dat2},[$inp],#16
++ vorr $in0,$dat,$dat
++ vorr $in1,$dat1,$dat1
++ veor $in2,$dat2,$iv2 // third block xox with third iv
++ veor $dat2,$dat2,$iv2
++ cmp $len,#32
++ b.lo .Lxts_outer_dec_tail
++
++ vld1.8 {$dat3},[$inp],#16
++
++ // The iv for fifth block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd40,$ivl
++ fmov $ivd41,$ivh
++
++ vld1.8 {$dat4},[$inp],#16
++ veor $dat3,$dat3,$iv3 // the fourth block
++ veor $dat4,$dat4,$iv4
++ sub $len,$len,#32 // bias
++ mov $rounds,$rounds0
++ b .Loop5x_xts_dec
++
++.align 4
++.Loop5x_xts_dec:
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ aesd $dat3,q8
++ aesimc $dat3,$dat3
++ aesd $dat4,q8
++ aesimc $dat4,$dat4
++ vld1.32 {q8},[$key_],#16 // load key schedule...
++ subs $rounds,$rounds,#2
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ aesd $dat3,q9
++ aesimc $dat3,$dat3
++ aesd $dat4,q9
++ aesimc $dat4,$dat4
++ vld1.32 {q9},[$key_],#16 // load key schedule...
++ b.gt .Loop5x_xts_dec
++
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ aesd $dat3,q8
++ aesimc $dat3,$dat3
++ aesd $dat4,q8
++ aesimc $dat4,$dat4
++ subs $len,$len,#0x50 // because .Lxts_dec_tail4x
++
++ aesd $dat0,q9
++ aesimc $dat0,$dat
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ aesd $dat3,q9
++ aesimc $dat3,$dat3
++ aesd $dat4,q9
++ aesimc $dat4,$dat4
++ csel $xoffset,xzr,$len,gt // borrow x6, w6, "gt" is not typo
++ mov $key_,$key1
++
++ aesd $dat0,q10
++ aesimc $dat0,$dat0
++ aesd $dat1,q10
++ aesimc $dat1,$dat1
++ aesd $dat2,q10
++ aesimc $dat2,$dat2
++ aesd $dat3,q10
++ aesimc $dat3,$dat3
++ aesd $dat4,q10
++ aesimc $dat4,$dat4
++ add $inp,$inp,$xoffset // x0 is adjusted in such way that
++ // at exit from the loop v1.16b-v26.16b
++ // are loaded with last "words"
++ add $xoffset,$len,#0x60 // because .Lxts_dec_tail4x
++
++ aesd $dat0,q11
++ aesimc $dat0,$dat0
++ aesd $dat1,q11
++ aesimc $dat1,$dat1
++ aesd $dat2,q11
++ aesimc $dat2,$dat2
++ aesd $dat3,q11
++ aesimc $dat3,$dat3
++ aesd $dat4,q11
++ aesimc $dat4,$dat4
++
++ aesd $dat0,q12
++ aesimc $dat0,$dat0
++ aesd $dat1,q12
++ aesimc $dat1,$dat1
++ aesd $dat2,q12
++ aesimc $dat2,$dat2
++ aesd $dat3,q12
++ aesimc $dat3,$dat3
++ aesd $dat4,q12
++ aesimc $dat4,$dat4
++
++ aesd $dat0,q13
++ aesimc $dat0,$dat0
++ aesd $dat1,q13
++ aesimc $dat1,$dat1
++ aesd $dat2,q13
++ aesimc $dat2,$dat2
++ aesd $dat3,q13
++ aesimc $dat3,$dat3
++ aesd $dat4,q13
++ aesimc $dat4,$dat4
++
++ aesd $dat0,q14
++ aesimc $dat0,$dat0
++ aesd $dat1,q14
++ aesimc $dat1,$dat1
++ aesd $dat2,q14
++ aesimc $dat2,$dat2
++ aesd $dat3,q14
++ aesimc $dat3,$dat3
++ aesd $dat4,q14
++ aesimc $dat4,$dat4
++
++ veor $tmp0,$rndlast,$iv0
++ aesd $dat0,q15
++ // The iv for first block of next iteration.
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd00,$ivl
++ fmov $ivd01,$ivh
++ veor $tmp1,$rndlast,$iv1
++ vld1.8 {$in0},[$inp],#16
++ aesd $dat1,q15
++ // The iv for second block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd10,$ivl
++ fmov $ivd11,$ivh
++ veor $tmp2,$rndlast,$iv2
++ vld1.8 {$in1},[$inp],#16
++ aesd $dat2,q15
++ // The iv for third block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd20,$ivl
++ fmov $ivd21,$ivh
++ veor $tmp3,$rndlast,$iv3
++ vld1.8 {$in2},[$inp],#16
++ aesd $dat3,q15
++ // The iv for fourth block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd30,$ivl
++ fmov $ivd31,$ivh
++ veor $tmp4,$rndlast,$iv4
++ vld1.8 {$in3},[$inp],#16
++ aesd $dat4,q15
+
-+ aese $dat0,q9
-+ aesmc $dat0,$dat0
-+ aese $dat1,q9
-+ aesmc $dat1,$dat1
-+ aese $dat2,q9
-+ aesmc $dat2,$dat2
-+ aese $dat3,q9
-+ aesmc $dat3,$dat3
-+ aese $dat4,q9
-+ aesmc $dat4,$dat4
-+ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ // The iv for fifth block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd40,$ivl
++ fmov $ivd41,$ivh
+
-+ aese $dat0,q12
-+ aesmc $dat0,$dat0
-+ add $tctr0,$ctr,#1
-+ add $tctr1,$ctr,#2
-+ aese $dat1,q12
-+ aesmc $dat1,$dat1
-+ add $tctr2,$ctr,#3
-+ add w13,$ctr,#4
-+ aese $dat2,q12
-+ aesmc $dat2,$dat2
-+ add w14,$ctr,#5
-+ rev $tctr0,$tctr0
-+ aese $dat3,q12
-+ aesmc $dat3,$dat3
-+ rev $tctr1,$tctr1
-+ rev $tctr2,$tctr2
-+ aese $dat4,q12
-+ aesmc $dat4,$dat4
-+ rev w13,w13
-+ rev w14,w14
++ vld1.8 {$in4},[$inp],#16
++ cbz $xoffset,.Lxts_dec_tail4x
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ veor $tmp0,$tmp0,$dat0
++ veor $dat0,$in0,$iv0
++ veor $tmp1,$tmp1,$dat1
++ veor $dat1,$in1,$iv1
++ veor $tmp2,$tmp2,$dat2
++ veor $dat2,$in2,$iv2
++ veor $tmp3,$tmp3,$dat3
++ veor $dat3,$in3,$iv3
++ veor $tmp4,$tmp4,$dat4
++ vst1.8 {$tmp0},[$out],#16
++ veor $dat4,$in4,$iv4
++ vst1.8 {$tmp1},[$out],#16
++ mov $rounds,$rounds0
++ vst1.8 {$tmp2},[$out],#16
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ vst1.8 {$tmp3},[$out],#16
++ vst1.8 {$tmp4},[$out],#16
++ b.hs .Loop5x_xts_dec
+
-+ aese $dat0,q13
-+ aesmc $dat0,$dat0
-+ aese $dat1,q13
-+ aesmc $dat1,$dat1
-+ aese $dat2,q13
-+ aesmc $dat2,$dat2
-+ aese $dat3,q13
-+ aesmc $dat3,$dat3
-+ aese $dat4,q13
-+ aesmc $dat4,$dat4
++ cmn $len,#0x10
++ b.ne .Loop5x_dec_after
++ // If x2($len) equal to -0x10, the left blocks is 4.
++ // After specially processing, utilize the five blocks processing again.
++ // It will use the following IVs: $iv0,$iv0,$iv1,$iv2,$iv3.
++ vorr $iv4,$iv3,$iv3
++ vorr $iv3,$iv2,$iv2
++ vorr $iv2,$iv1,$iv1
++ vorr $iv1,$iv0,$iv0
++ fmov $ivl,$ivd40
++ fmov $ivh,$ivd41
++ veor $dat0,$iv0,$in0
++ veor $dat1,$iv1,$in1
++ veor $dat2,$in2,$iv2
++ veor $dat3,$in3,$iv3
++ veor $dat4,$in4,$iv4
++ b.eq .Loop5x_xts_dec
+
-+ aese $dat0,q14
-+ aesmc $dat0,$dat0
-+ vld1.8 {$in0},[$inp],#16
-+ aese $dat1,q14
-+ aesmc $dat1,$dat1
-+ vld1.8 {$in1},[$inp],#16
-+ aese $dat2,q14
-+ aesmc $dat2,$dat2
-+ vld1.8 {$in2},[$inp],#16
-+ aese $dat3,q14
-+ aesmc $dat3,$dat3
-+ vld1.8 {$in3},[$inp],#16
-+ aese $dat4,q14
-+ aesmc $dat4,$dat4
-+ vld1.8 {$in4},[$inp],#16
++.Loop5x_dec_after:
++ add $len,$len,#0x50
++ cbz $len,.Lxts_done
+
-+ aese $dat0,q15
-+ veor $in0,$in0,$rndlast
-+ aese $dat1,q15
-+ veor $in1,$in1,$rndlast
-+ aese $dat2,q15
-+ veor $in2,$in2,$rndlast
-+ aese $dat3,q15
-+ veor $in3,$in3,$rndlast
-+ aese $dat4,q15
-+ veor $in4,$in4,$rndlast
++ add $rounds,$rounds0,#2
++ subs $len,$len,#0x30
++ b.lo .Lxts_inner_dec_tail
+
-+ veor $in0,$in0,$dat0
-+ vorr $dat0,$ivec,$ivec
-+ veor $in1,$in1,$dat1
-+ vorr $dat1,$ivec,$ivec
-+ veor $in2,$in2,$dat2
-+ vorr $dat2,$ivec,$ivec
-+ veor $in3,$in3,$dat3
-+ vorr $dat3,$ivec,$ivec
-+ veor $in4,$in4,$dat4
-+ vorr $dat4,$ivec,$ivec
++ veor $dat0,$iv0,$in2
++ veor $dat1,$iv1,$in3
++ veor $dat2,$in4,$iv2
++ b .Lxts_outer_dec_tail
+
-+ vst1.8 {$in0},[$out],#16
-+ vmov.32 ${dat0}[3],$tctr0
-+ vst1.8 {$in1},[$out],#16
-+ vmov.32 ${dat1}[3],$tctr1
-+ vst1.8 {$in2},[$out],#16
-+ vmov.32 ${dat2}[3],$tctr2
-+ vst1.8 {$in3},[$out],#16
-+ vmov.32 ${dat3}[3],w13
-+ vst1.8 {$in4},[$out],#16
-+ vmov.32 ${dat4}[3],w14
++.align 4
++.Lxts_dec_tail4x:
++ add $inp,$inp,#16
++ vld1.32 {$dat0},[$inp],#16
++ veor $tmp1,$dat1,$tmp0
++ vst1.8 {$tmp1},[$out],#16
++ veor $tmp2,$dat2,$tmp2
++ vst1.8 {$tmp2},[$out],#16
++ veor $tmp3,$dat3,$tmp3
++ veor $tmp4,$dat4,$tmp4
++ vst1.8 {$tmp3-$tmp4},[$out],#32
+
-+ mov $cnt,$rounds
-+ cbz $len,.Lctr32_done
++ b .Lxts_done
++.align 4
++.Lxts_outer_dec_tail:
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ vld1.32 {q8},[$key_],#16
++ subs $rounds,$rounds,#2
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ vld1.32 {q9},[$key_],#16
++ b.gt .Lxts_outer_dec_tail
+
-+ add $ctr,$ctr,#5
-+ subs $len,$len,#5
-+ b.hs .Loop5x_ctr32
++ aesd $dat0,q8
++ aesimc $dat0,$dat0
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ veor $tmp0,$iv0,$rndlast
++ subs $len,$len,#0x30
++ // The iv for first block
++ fmov $ivl,$ivd20
++ fmov $ivh,$ivd21
++ mov $constnum,#0x87
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd00,$ivl
++ fmov $ivd01,$ivh
++ veor $tmp1,$iv1,$rndlast
++ csel $xoffset,$len,$xoffset,lo // x6, w6, is zero at this point
++ aesd $dat0,q9
++ aesimc $dat0,$dat0
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ veor $tmp2,$iv2,$rndlast
++ // The iv for second block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd10,$ivl
++ fmov $ivd11,$ivh
+
-+ add $len,$len,#5
-+ sub $ctr,$ctr,#5
++ add $xoffset,$xoffset,#0x20
++ add $inp,$inp,$xoffset // $inp is adjusted to the last data
+
-+ cmp $len,#2
-+ mov $step,#16
-+ cclr $step,lo
-+ b.ls .Lctr32_tail
++ mov $key_,$key1
+
-+ sub $len,$len,#3 // bias
-+ add $ctr,$ctr,#3
++ // The iv for third block
++ extr $midnumx,$ivh,$ivh,#32
++ extr $ivh,$ivh,$ivl,#63
++ and $tmpmw,$constnum,$midnum,asr #31
++ eor $ivl,$tmpmx,$ivl,lsl #1
++ fmov $ivd20,$ivl
++ fmov $ivd21,$ivh
++
++ aesd $dat0,q12
++ aesimc $dat0,$dat0
++ aesd $dat1,q12
++ aesimc $dat1,$dat1
++ aesd $dat2,q12
++ aesimc $dat2,$dat2
++ aesd $dat0,q13
++ aesimc $dat0,$dat0
++ aesd $dat1,q13
++ aesimc $dat1,$dat1
++ aesd $dat2,q13
++ aesimc $dat2,$dat2
++ aesd $dat0,q14
++ aesimc $dat0,$dat0
++ aesd $dat1,q14
++ aesimc $dat1,$dat1
++ aesd $dat2,q14
++ aesimc $dat2,$dat2
++ vld1.8 {$in2},[$inp],#16
++ aesd $dat0,q15
++ aesd $dat1,q15
++ aesd $dat2,q15
++ vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
++ add $rounds,$rounds0,#2
++ veor $tmp0,$tmp0,$dat0
++ veor $tmp1,$tmp1,$dat1
++ veor $dat2,$dat2,$tmp2
++ vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
++ vst1.8 {$tmp0},[$out],#16
++ vst1.8 {$tmp1},[$out],#16
++ vst1.8 {$dat2},[$out],#16
++
++ cmn $len,#0x30
++ add $len,$len,#0x30
++ b.eq .Lxts_done
++ sub $len,$len,#0x30
++ vorr $in3,$in1,$in1
++ vorr $in4,$in2,$in2
++ nop
++
++.Lxts_inner_dec_tail:
++ // $len == -0x10 means two blocks left.
++ cmn $len,#0x10
++ veor $dat1,$in3,$iv0
++ veor $dat2,$in4,$iv1
++ b.eq .Lxts_dec_tail_loop
++ veor $dat2,$in4,$iv0
++.Lxts_dec_tail_loop:
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ vld1.32 {q8},[$key_],#16
++ subs $rounds,$rounds,#2
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ vld1.32 {q9},[$key_],#16
++ b.gt .Lxts_dec_tail_loop
++
++ aesd $dat1,q8
++ aesimc $dat1,$dat1
++ aesd $dat2,q8
++ aesimc $dat2,$dat2
++ aesd $dat1,q9
++ aesimc $dat1,$dat1
++ aesd $dat2,q9
++ aesimc $dat2,$dat2
++ aesd $dat1,q12
++ aesimc $dat1,$dat1
++ aesd $dat2,q12
++ aesimc $dat2,$dat2
++ cmn $len,#0x20
++ aesd $dat1,q13
++ aesimc $dat1,$dat1
++ aesd $dat2,q13
++ aesimc $dat2,$dat2
++ veor $tmp1,$iv0,$rndlast
++ aesd $dat1,q14
++ aesimc $dat1,$dat1
++ aesd $dat2,q14
++ aesimc $dat2,$dat2
++ veor $tmp2,$iv1,$rndlast
++ aesd $dat1,q15
++ aesd $dat2,q15
++ b.eq .Lxts_dec_one
++ veor $tmp1,$tmp1,$dat1
++ veor $tmp2,$tmp2,$dat2
++ vorr $iv0,$iv2,$iv2
++ vorr $iv1,$iv3,$iv3
++ vst1.8 {$tmp1},[$out],#16
++ vst1.8 {$tmp2},[$out],#16
++ add $len,$len,#16
++ b .Lxts_done
++
++.Lxts_dec_one:
++ veor $tmp1,$tmp1,$dat2
++ vorr $iv0,$iv1,$iv1
++ vorr $iv1,$iv2,$iv2
++ vst1.8 {$tmp1},[$out],#16
++ add $len,$len,#32
++
++.Lxts_done:
++ tst $tailcnt,#0xf
++ b.eq .Lxts_dec_abort
++ // Processing the last two blocks with cipher stealing.
++ mov x7,x3
++ cbnz x2,.Lxts_dec_1st_done
++ vld1.32 {$dat0},[$inp],#16
++
++ // Decrypt the last secod block to get the last plain text block
++.Lxts_dec_1st_done:
++ eor $tmpin,$dat0,$iv1
++ ldr $rounds,[$key1,#240]
++ vld1.32 {$dat0},[$key1],#16
++ sub $rounds,$rounds,#2
++ vld1.32 {$dat1},[$key1],#16
++.Loop_final_2nd_dec:
++ aesd $tmpin,$dat0
++ aesimc $tmpin,$tmpin
++ vld1.32 {$dat0},[$key1],#16 // load key schedule...
++ subs $rounds,$rounds,#2
++ aesd $tmpin,$dat1
++ aesimc $tmpin,$tmpin
++ vld1.32 {$dat1},[$key1],#16 // load key schedule...
++ b.gt .Loop_final_2nd_dec
++
++ aesd $tmpin,$dat0
++ aesimc $tmpin,$tmpin
++ vld1.32 {$dat0},[$key1]
++ aesd $tmpin,$dat1
++ veor $tmpin,$tmpin,$dat0
++ veor $tmpin,$tmpin,$iv1
++ vst1.8 {$tmpin},[$out]
++
++ mov $tmpinp,$inp
++ add $tmpoutp,$out,#16
++
++ // Composite the tailcnt "16 byte not aligned block" into the last second plain blocks
++ // to get the last encrypted block.
++.composite_dec_loop:
++ subs $tailcnt,$tailcnt,#1
++ ldrb $l2outp,[$out,$tailcnt]
++ ldrb $loutp,[$tmpinp,$tailcnt]
++ strb $l2outp,[$tmpoutp,$tailcnt]
++ strb $loutp,[$out,$tailcnt]
++ b.gt .composite_dec_loop
++.Lxts_dec_load_done:
++ vld1.8 {$tmpin},[$out]
++ veor $tmpin,$tmpin,$iv0
++
++ // Decrypt the composite block to get the last second plain text block
++ ldr $rounds,[$key_,#240]
++ vld1.8 {$dat},[$key_],#16
++ sub $rounds,$rounds,#2
++ vld1.8 {$dat1},[$key_],#16
++.Loop_final_dec:
++ aesd $tmpin,$dat0
++ aesimc $tmpin,$tmpin
++ vld1.32 {$dat0},[$key_],#16 // load key schedule...
++ subs $rounds,$rounds,#2
++ aesd $tmpin,$dat1
++ aesimc $tmpin,$tmpin
++ vld1.32 {$dat1},[$key_],#16 // load key schedule...
++ b.gt .Loop_final_dec
++
++ aesd $tmpin,$dat0
++ aesimc $tmpin,$tmpin
++ vld1.32 {$dat0},[$key_]
++ aesd $tmpin,$dat1
++ veor $tmpin,$tmpin,$dat0
++ veor $tmpin,$tmpin,$iv0
++ vst1.8 {$tmpin},[$out]
++
++.Lxts_dec_abort:
++ ldp $tailcnt,$midnumx,[sp,#48]
++ ldp $ivd10,$ivd20,[sp,#32]
++ ldp $ivd30,$ivd40,[sp,#16]
++ ldp $constnumx,$tmpinp,[sp],#64
++
++.Lxts_dec_final_abort:
++ ret
++.size ${prefix}_xts_decrypt,.-${prefix}_xts_decrypt
+___
-+$code.=<<___;
- b .Loop3x_ctr32
-
- .align 4
-@@ -955,7 +1351,7 @@ if ($flavour =~ /64/) { ######## 64-bi
++}
++}}}
+ $code.=<<___;
+ #endif
+ ___
+@@ -963,7 +3615,7 @@ if ($flavour =~ /64/) { ######## 64-bi
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
@@ -491,7 +2761,7 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;
-@@ -996,14 +1392,17 @@ if ($flavour =~ /64/) { ######## 64-bi
+@@ -1004,14 +3656,17 @@ if ($flavour =~ /64/) { ######## 64-bi
s/\],#[0-9]+/]!/o;
s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
@@ -511,9 +2781,9 @@ diff -up openssl-1.1.1c/crypto/aes/asm/aesv8-armx.pl.arm-update openssl-1.1.1c/c
print $_,"\n";
}
}
-diff -up openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl.arm-update openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl
---- openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl 2019-11-20 11:36:22.389506155 +0100
+diff -up openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl.arm-update openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl
+--- openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/aes/asm/vpaes-armv8.pl 2020-12-09 10:37:38.405558929 +0100
@@ -30,6 +30,7 @@
# Denver(***) 16.6(**) 15.1/17.8(**) [8.80/9.93 ]
# Apple A7(***) 22.7(**) 10.9/14.3 [8.45/10.0 ]
@@ -522,9 +2792,9 @@ diff -up openssl-1.1.1c/crypto/aes/asm/vpaes-armv8.pl.arm-update openssl-1.1.1c/
#
# (*) ECB denotes approximate result for parallelizable modes
# such as CBC decrypt, CTR, etc.;
-diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl
---- openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl 2019-11-21 16:44:50.814651553 +0100
+diff -up openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl
+--- openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/chacha/asm/chacha-armv8.pl 2020-12-09 10:40:57.922288627 +0100
@@ -18,32 +18,44 @@
#
# ChaCha20 for ARMv8.
@@ -585,20 +2855,22 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
*STDOUT=*OUT;
sub AUTOLOAD() # thunk [simplified] x86-style perlasm
-@@ -120,41 +132,36 @@ my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)
+@@ -120,42 +132,37 @@ my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)
}
$code.=<<___;
-#include "arm_arch.h"
+-
+-.text
+-
+#ifndef __KERNEL__
+# include "arm_arch.h"
-+.extern OPENSSL_armcap_P
+ .extern OPENSSL_armcap_P
+ .hidden OPENSSL_armcap_P
+#endif
++
++.text
- .text
-
--.extern OPENSSL_armcap_P
--
.align 5
.Lsigma:
.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral
@@ -641,7 +2913,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
.Lshort:
.inst 0xd503233f // paciasp
-@@ -173,7 +180,7 @@ ChaCha20_ctr32:
+@@ -174,7 +181,7 @@ ChaCha20_ctr32:
ldp @d[2],@d[3],[$key] // load key
ldp @d[4],@d[5],[$key,#16]
ldp @d[6],@d[7],[$ctr] // load counter
@@ -650,7 +2922,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
ror @d[2],@d[2],#32
ror @d[3],@d[3],#32
ror @d[4],@d[4],#32
-@@ -242,7 +249,7 @@ $code.=<<___;
+@@ -243,7 +250,7 @@ $code.=<<___;
add @x[14],@x[14],@x[15],lsl#32
ldp @x[13],@x[15],[$inp,#48]
add $inp,$inp,#64
@@ -659,7 +2931,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
rev @x[0],@x[0]
rev @x[2],@x[2]
rev @x[4],@x[4]
-@@ -299,7 +306,7 @@ $code.=<<___;
+@@ -300,7 +307,7 @@ $code.=<<___;
add @x[10],@x[10],@x[11],lsl#32
add @x[12],@x[12],@x[13],lsl#32
add @x[14],@x[14],@x[15],lsl#32
@@ -668,7 +2940,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
rev @x[0],@x[0]
rev @x[2],@x[2]
rev @x[4],@x[4]
-@@ -340,46 +347,91 @@ $code.=<<___;
+@@ -341,46 +348,91 @@ $code.=<<___;
___
{{{
@@ -789,7 +3061,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
.inst 0xd503233f // paciasp
stp x29,x30,[sp,#-96]!
add x29,sp,#0
-@@ -402,8 +454,9 @@ ChaCha20_neon:
+@@ -403,8 +455,9 @@ ChaCha20_neon:
ld1 {@K[1],@K[2]},[$key]
ldp @d[6],@d[7],[$ctr] // load counter
ld1 {@K[3]},[$ctr]
@@ -801,7 +3073,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
rev64 @K[0],@K[0]
ror @d[2],@d[2],#32
ror @d[3],@d[3],#32
-@@ -412,115 +465,129 @@ ChaCha20_neon:
+@@ -413,115 +466,129 @@ ChaCha20_neon:
ror @d[6],@d[6],#32
ror @d[7],@d[7],#32
#endif
@@ -1013,7 +3285,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
rev @x[0],@x[0]
rev @x[2],@x[2]
rev @x[4],@x[4]
-@@ -530,48 +597,68 @@ $code.=<<___;
+@@ -531,48 +598,68 @@ $code.=<<___;
rev @x[12],@x[12]
rev @x[14],@x[14]
#endif
@@ -1106,7 +3378,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
ldp x19,x20,[x29,#16]
add sp,sp,#64
ldp x21,x22,[x29,#32]
-@@ -582,8 +669,10 @@ $code.=<<___;
+@@ -583,8 +670,10 @@ $code.=<<___;
.inst 0xd50323bf // autiasp
ret
@@ -1118,7 +3390,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
cmp $len,#64
b.lo .Less_than_64
-@@ -600,7 +689,7 @@ $code.=<<___;
+@@ -601,7 +690,7 @@ $code.=<<___;
add @x[14],@x[14],@x[15],lsl#32
ldp @x[13],@x[15],[$inp,#48]
add $inp,$inp,#64
@@ -1127,7 +3399,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
rev @x[0],@x[0]
rev @x[2],@x[2]
rev @x[4],@x[4]
-@@ -620,48 +709,68 @@ $code.=<<___;
+@@ -621,48 +710,68 @@ $code.=<<___;
eor @x[14],@x[14],@x[15]
stp @x[0],@x[2],[$out,#0] // store output
@@ -1220,7 +3492,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
sub $out,$out,#1
add $inp,$inp,$len
add $out,$out,$len
-@@ -694,9 +803,41 @@ $code.=<<___;
+@@ -695,9 +804,41 @@ $code.=<<___;
.size ChaCha20_neon,.-ChaCha20_neon
___
{
@@ -1263,7 +3535,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
$code.=<<___;
.type ChaCha20_512_neon,%function
-@@ -716,6 +857,7 @@ ChaCha20_512_neon:
+@@ -717,6 +858,7 @@ ChaCha20_512_neon:
.L512_or_more_neon:
sub sp,sp,#128+64
@@ -1271,7 +3543,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
ldp @d[0],@d[1],[@x[0]] // load sigma
ld1 {@K[0]},[@x[0]],#16
ldp @d[2],@d[3],[$key] // load key
-@@ -723,8 +865,9 @@ ChaCha20_512_neon:
+@@ -724,8 +866,9 @@ ChaCha20_512_neon:
ld1 {@K[1],@K[2]},[$key]
ldp @d[6],@d[7],[$ctr] // load counter
ld1 {@K[3]},[$ctr]
@@ -1283,7 +3555,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
rev64 @K[0],@K[0]
ror @d[2],@d[2],#32
ror @d[3],@d[3],#32
-@@ -791,9 +934,10 @@ ChaCha20_512_neon:
+@@ -792,9 +935,10 @@ ChaCha20_512_neon:
mov $C4,@K[2]
stp @K[3],@K[4],[sp,#48] // off-load key block, variable part
mov $C5,@K[2]
@@ -1295,7 +3567,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
subs $len,$len,#512
.Loop_upper_neon:
sub $ctr,$ctr,#1
-@@ -866,7 +1010,7 @@ $code.=<<___;
+@@ -867,7 +1011,7 @@ $code.=<<___;
add @x[14],@x[14],@x[15],lsl#32
ldp @x[13],@x[15],[$inp,#48]
add $inp,$inp,#64
@@ -1304,7 +3576,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
rev @x[0],@x[0]
rev @x[2],@x[2]
rev @x[4],@x[4]
-@@ -955,6 +1099,7 @@ $code.=<<___;
+@@ -956,6 +1100,7 @@ $code.=<<___;
add.32 @x[2],@x[2],@d[1]
ldp @K[4],@K[5],[sp,#64]
add @x[3],@x[3],@d[1],lsr#32
@@ -1312,7 +3584,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
add $A0,$A0,@K[0]
add.32 @x[4],@x[4],@d[2]
add $A1,$A1,@K[0]
-@@ -1007,7 +1152,7 @@ $code.=<<___;
+@@ -1008,7 +1153,7 @@ $code.=<<___;
add $inp,$inp,#64
add $B5,$B5,@K[1]
@@ -1321,7 +3593,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
rev @x[0],@x[0]
rev @x[2],@x[2]
rev @x[4],@x[4]
-@@ -1085,26 +1230,26 @@ $code.=<<___;
+@@ -1086,26 +1231,26 @@ $code.=<<___;
b.hs .Loop_outer_512_neon
adds $len,$len,#512
@@ -1356,7 +3628,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
eor @K[1],@K[1],@K[1]
eor @K[2],@K[2],@K[2]
eor @K[3],@K[3],@K[3]
-@@ -1114,6 +1259,7 @@ $code.=<<___;
+@@ -1115,6 +1260,7 @@ $code.=<<___;
b .Loop_outer
.Ldone_512_neon:
@@ -1364,7 +3636,7 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
ldp x19,x20,[x29,#16]
add sp,sp,#128+64
ldp x21,x22,[x29,#32]
-@@ -1132,9 +1278,11 @@ foreach (split("\n",$code)) {
+@@ -1133,9 +1279,11 @@ foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/geo;
(s/\b([a-z]+)\.32\b/$1/ and (s/x([0-9]+)/w$1/g or 1)) or
@@ -1377,9 +3649,9 @@ diff -up openssl-1.1.1c/crypto/chacha/asm/chacha-armv8.pl.arm-update openssl-1.1
(s/\brev32\.16\b/rev32/ and (s/\.4s/\.8h/g or 1));
#s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo;
-diff -up openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl.arm-update openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl
---- openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl.arm-update 2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl 2019-11-20 11:36:22.389506155 +0100
+diff -up openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl.arm-update openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl
+--- openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl.arm-update 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/modes/asm/ghashv8-armx.pl 2020-12-09 10:37:38.408558954 +0100
@@ -42,6 +42,7 @@
# Denver 0.51 0.65 6.02
# Mongoose 0.65 1.10 8.06
@@ -1388,9 +3660,9 @@ diff -up openssl-1.1.1c/crypto/modes/asm/ghashv8-armx.pl.arm-update openssl-1.1.
#
# (*) presented for reference/comparison purposes;
-diff -up openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl.arm-update openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl
---- openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl 2019-11-20 11:36:22.390506137 +0100
+diff -up openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl.arm-update openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl
+--- openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/poly1305/asm/poly1305-armv8.pl 2020-12-09 10:37:38.408558954 +0100
@@ -29,6 +29,7 @@
# X-Gene 2.13/+68% 2.27
# Mongoose 1.77/+75% 1.12
@@ -1399,9 +3671,9 @@ diff -up openssl-1.1.1c/crypto/poly1305/asm/poly1305-armv8.pl.arm-update openssl
#
# (*) estimate based on resources availability is less than 1.0,
# i.e. measured result is worse than expected, presumably binary
-diff -up openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl.arm-update openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl
---- openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl 2019-11-20 11:36:22.390506137 +0100
+diff -up openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl.arm-update openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl
+--- openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/sha/asm/keccak1600-armv8.pl 2020-12-09 10:37:38.408558954 +0100
@@ -51,6 +51,7 @@
# Kryo 12
# Denver 7.8
@@ -1410,9 +3682,9 @@ diff -up openssl-1.1.1c/crypto/sha/asm/keccak1600-armv8.pl.arm-update openssl-1.
#
# (*) Corresponds to SHA3-256. No improvement coefficients are listed
# because they vary too much from compiler to compiler. Newer
-diff -up openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl.arm-update openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl
---- openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl 2019-11-20 11:36:22.390506137 +0100
+diff -up openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl.arm-update openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl
+--- openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/sha/asm/sha1-armv8.pl 2020-12-09 10:37:38.408558954 +0100
@@ -27,6 +27,7 @@
# X-Gene 8.80 (+200%)
# Mongoose 2.05 6.50 (+160%)
@@ -1421,9 +3693,9 @@ diff -up openssl-1.1.1c/crypto/sha/asm/sha1-armv8.pl.arm-update openssl-1.1.1c/c
#
# (*) Software results are presented mostly for reference purposes.
# (**) Keep in mind that Denver relies on binary translation, which
-diff -up openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl.arm-update openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl
---- openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl.arm-update 2019-05-28 15:12:21.000000000 +0200
-+++ openssl-1.1.1c/crypto/sha/asm/sha512-armv8.pl 2019-11-20 11:36:22.390506137 +0100
+diff -up openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl.arm-update openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl
+--- openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl.arm-update 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/sha/asm/sha512-armv8.pl 2020-12-09 10:37:38.408558954 +0100
@@ -28,6 +28,7 @@
# X-Gene 20.0 (+100%) 12.8 (+300%(***))
# Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
diff --git a/openssl-1.1.1-fips-post-rand.patch b/openssl-1.1.1-fips-post-rand.patch
index 18a01fe..027dc55 100644
--- a/openssl-1.1.1-fips-post-rand.patch
+++ b/openssl-1.1.1-fips-post-rand.patch
@@ -1,6 +1,6 @@
-diff -up openssl-1.1.1e/crypto/fips/fips.c.fips-post-rand openssl-1.1.1e/crypto/fips/fips.c
---- openssl-1.1.1e/crypto/fips/fips.c.fips-post-rand 2020-03-17 18:06:16.822418854 +0100
-+++ openssl-1.1.1e/crypto/fips/fips.c 2020-03-17 18:06:16.861418172 +0100
+diff -up openssl-1.1.1i/crypto/fips/fips.c.fips-post-rand openssl-1.1.1i/crypto/fips/fips.c
+--- openssl-1.1.1i/crypto/fips/fips.c.fips-post-rand 2020-12-09 10:26:41.634106328 +0100
++++ openssl-1.1.1i/crypto/fips/fips.c 2020-12-09 10:26:41.652106475 +0100
@@ -68,6 +68,7 @@
# include <openssl/fips.h>
@@ -51,10 +51,10 @@ diff -up openssl-1.1.1e/crypto/fips/fips.c.fips-post-rand openssl-1.1.1e/crypto/
ret = 1;
goto end;
}
-diff -up openssl-1.1.1e/crypto/rand/drbg_lib.c.fips-post-rand openssl-1.1.1e/crypto/rand/drbg_lib.c
---- openssl-1.1.1e/crypto/rand/drbg_lib.c.fips-post-rand 2020-03-17 15:31:17.000000000 +0100
-+++ openssl-1.1.1e/crypto/rand/drbg_lib.c 2020-03-17 18:07:35.305045521 +0100
-@@ -1009,6 +1009,20 @@ size_t rand_drbg_seedlen(RAND_DRBG *drbg
+diff -up openssl-1.1.1i/crypto/rand/drbg_lib.c.fips-post-rand openssl-1.1.1i/crypto/rand/drbg_lib.c
+--- openssl-1.1.1i/crypto/rand/drbg_lib.c.fips-post-rand 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/rand/drbg_lib.c 2020-12-09 10:26:41.652106475 +0100
+@@ -1005,6 +1005,20 @@ size_t rand_drbg_seedlen(RAND_DRBG *drbg
return min_entropy > min_entropylen ? min_entropy : min_entropylen;
}
@@ -75,9 +75,9 @@ diff -up openssl-1.1.1e/crypto/rand/drbg_lib.c.fips-post-rand openssl-1.1.1e/cry
/* Implements the default OpenSSL RAND_add() method */
static int drbg_add(const void *buf, int num, double randomness)
{
-diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/crypto/rand/rand_unix.c
---- openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand 2020-03-17 15:31:17.000000000 +0100
-+++ openssl-1.1.1e/crypto/rand/rand_unix.c 2020-03-17 18:09:01.503537189 +0100
+diff -up openssl-1.1.1i/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1i/crypto/rand/rand_unix.c
+--- openssl-1.1.1i/crypto/rand/rand_unix.c.fips-post-rand 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/crypto/rand/rand_unix.c 2020-12-09 10:36:59.531221903 +0100
@@ -17,10 +17,12 @@
#include <openssl/crypto.h>
#include "rand_local.h"
@@ -91,7 +91,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
# ifdef DEVRANDOM_WAIT
# include <sys/shm.h>
# include <sys/utsname.h>
-@@ -342,7 +344,7 @@ static ssize_t sysctl_random(char *buf,
+@@ -344,7 +346,7 @@ static ssize_t sysctl_random(char *buf,
* syscall_random(): Try to get random data using a system call
* returns the number of bytes returned in buf, or < 0 on error.
*/
@@ -100,15 +100,15 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
{
/*
* Note: 'buflen' equals the size of the buffer which is used by the
-@@ -364,6 +366,7 @@ static ssize_t syscall_random(void *buf,
- * - Linux since 3.17 with glibc 2.25
- * - FreeBSD since 12.0 (1200061)
+@@ -369,6 +371,7 @@ static ssize_t syscall_random(void *buf,
+ * Note: Sometimes getentropy() can be provided but not implemented
+ * internally. So we need to check errno for ENOSYS
*/
+# if 0
# if defined(__GNUC__) && __GNUC__>=2 && defined(__ELF__) && !defined(__hpux)
extern int getentropy(void *buffer, size_t length) __attribute__((weak));
-@@ -385,10 +388,10 @@ static ssize_t syscall_random(void *buf,
+@@ -394,10 +397,10 @@ static ssize_t syscall_random(void *buf,
if (p_getentropy.p != NULL)
return p_getentropy.f(buf, buflen) == 0 ? (ssize_t)buflen : -1;
# endif
@@ -122,7 +122,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
# elif (defined(__FreeBSD__) || defined(__NetBSD__)) && defined(KERN_ARND)
return sysctl_random(buf, buflen);
# else
-@@ -623,6 +626,9 @@ size_t rand_pool_acquire_entropy(RAND_PO
+@@ -633,6 +636,9 @@ size_t rand_pool_acquire_entropy(RAND_PO
size_t entropy_available;
# if defined(OPENSSL_RAND_SEED_GETRANDOM)
@@ -132,7 +132,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
{
size_t bytes_needed;
unsigned char *buffer;
-@@ -633,7 +639,7 @@ size_t rand_pool_acquire_entropy(RAND_PO
+@@ -643,7 +649,7 @@ size_t rand_pool_acquire_entropy(RAND_PO
bytes_needed = rand_pool_bytes_needed(pool, 1 /*entropy_factor*/);
while (bytes_needed != 0 && attempts-- > 0) {
buffer = rand_pool_add_begin(pool, bytes_needed);
@@ -141,7 +141,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
if (bytes > 0) {
rand_pool_add_end(pool, bytes, 8 * bytes);
bytes_needed -= bytes;
-@@ -668,8 +674,10 @@ size_t rand_pool_acquire_entropy(RAND_PO
+@@ -678,8 +684,10 @@ size_t rand_pool_acquire_entropy(RAND_PO
int attempts = 3;
const int fd = get_random_device(i);
@@ -153,7 +153,7 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
while (bytes_needed != 0 && attempts-- > 0) {
buffer = rand_pool_add_begin(pool, bytes_needed);
-@@ -732,7 +740,9 @@ size_t rand_pool_acquire_entropy(RAND_PO
+@@ -742,7 +750,9 @@ size_t rand_pool_acquire_entropy(RAND_PO
return entropy_available;
}
# endif
@@ -164,9 +164,9 @@ diff -up openssl-1.1.1e/crypto/rand/rand_unix.c.fips-post-rand openssl-1.1.1e/cr
return rand_pool_entropy_available(pool);
# endif
}
-diff -up openssl-1.1.1e/include/crypto/fips.h.fips-post-rand openssl-1.1.1e/include/crypto/fips.h
---- openssl-1.1.1e/include/crypto/fips.h.fips-post-rand 2020-03-17 18:06:16.831418696 +0100
-+++ openssl-1.1.1e/include/crypto/fips.h 2020-03-17 18:06:16.861418172 +0100
+diff -up openssl-1.1.1i/include/crypto/fips.h.fips-post-rand openssl-1.1.1i/include/crypto/fips.h
+--- openssl-1.1.1i/include/crypto/fips.h.fips-post-rand 2020-12-09 10:26:41.639106369 +0100
++++ openssl-1.1.1i/include/crypto/fips.h 2020-12-09 10:26:41.657106516 +0100
@@ -77,6 +77,8 @@ int FIPS_selftest_hmac(void);
int FIPS_selftest_drbg(void);
int FIPS_selftest_cmac(void);
@@ -176,9 +176,9 @@ diff -up openssl-1.1.1e/include/crypto/fips.h.fips-post-rand openssl-1.1.1e/incl
int fips_pkey_signature_test(EVP_PKEY *pkey,
const unsigned char *tbs, int tbslen,
const unsigned char *kat,
-diff -up openssl-1.1.1e/include/crypto/rand.h.fips-post-rand openssl-1.1.1e/include/crypto/rand.h
---- openssl-1.1.1e/include/crypto/rand.h.fips-post-rand 2020-03-17 15:31:17.000000000 +0100
-+++ openssl-1.1.1e/include/crypto/rand.h 2020-03-17 18:07:35.303045555 +0100
+diff -up openssl-1.1.1i/include/crypto/rand.h.fips-post-rand openssl-1.1.1i/include/crypto/rand.h
+--- openssl-1.1.1i/include/crypto/rand.h.fips-post-rand 2020-12-08 14:20:59.000000000 +0100
++++ openssl-1.1.1i/include/crypto/rand.h 2020-12-09 10:26:41.657106516 +0100
@@ -24,6 +24,7 @@
typedef struct rand_pool_st RAND_POOL;
diff --git a/openssl-1.1.1-version-override.patch b/openssl-1.1.1-version-override.patch
index ff69bdb..727cc26 100644
--- a/openssl-1.1.1-version-override.patch
+++ b/openssl-1.1.1-version-override.patch
@@ -1,12 +1,12 @@
-diff -up openssl-1.1.1g/include/openssl/opensslv.h.version-override openssl-1.1.1g/include/openssl/opensslv.h
---- openssl-1.1.1g/include/openssl/opensslv.h.version-override 2020-04-23 13:29:37.802673513 +0200
-+++ openssl-1.1.1g/include/openssl/opensslv.h 2020-04-23 13:30:13.064008458 +0200
+diff -up openssl-1.1.1i/include/openssl/opensslv.h.version-override openssl-1.1.1i/include/openssl/opensslv.h
+--- openssl-1.1.1i/include/openssl/opensslv.h.version-override 2020-12-09 10:25:12.042374409 +0100
++++ openssl-1.1.1i/include/openssl/opensslv.h 2020-12-09 10:26:00.362769170 +0100
@@ -40,7 +40,7 @@ extern "C" {
* major minor fix final patch/beta)
*/
- # define OPENSSL_VERSION_NUMBER 0x1010108fL
--# define OPENSSL_VERSION_TEXT "OpenSSL 1.1.1h 22 Sep 2020"
-+# define OPENSSL_VERSION_TEXT "OpenSSL 1.1.1h FIPS 22 Sep 2020"
+ # define OPENSSL_VERSION_NUMBER 0x1010109fL
+-# define OPENSSL_VERSION_TEXT "OpenSSL 1.1.1i 8 Dec 2020"
++# define OPENSSL_VERSION_TEXT "OpenSSL 1.1.1i FIPS 8 Dec 2020"
/*-
* The macros below are to be used for shared library (.so, .dll, ...)
diff --git a/openssl.spec b/openssl.spec
index 3f6403c..2e26e49 100644
--- a/openssl.spec
+++ b/openssl.spec
@@ -21,7 +21,7 @@
Summary: Utilities from the general purpose cryptography library with TLS implementation
Name: openssl
-Version: 1.1.1h
+Version: 1.1.1i
Release: 1%{?dist}
Epoch: 1
# We have to remove certain patented algorithms from the openssl source
@@ -473,6 +473,9 @@ export LD_LIBRARY_PATH
%ldconfig_scriptlets libs
%changelog
+* Wed Dec 9 2020 Tomáš Mráz <tmraz@redhat.com> 1.1.1i-1
+- Update to the 1.1.1i release fixing CVE-2020-1971
+
* Mon Nov 9 2020 Sahana Prasad <sahana@redhat.com> - 1.1.1h-1
- Upgrade to version 1.1.1.h
diff --git a/sources b/sources
index 2bae151..4c1e648 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-SHA512 (openssl-1.1.1h-hobbled.tar.xz) = 75e1d3f34f93462b97db92aa6538fd4f2f091ad717438e51d147508738be720d7d0bf4a9b1fda3a1943a4c13aae2a39da3add05f7da833b3c6de40a97bc97908
+SHA512 (openssl-1.1.1i-hobbled.tar.xz) = e131a05e88690a7be7c3d74cbb26620130498ced2ce3d7fd55979aab5ea736ec8b268ba92268bd5bc347989325a3950a066883007cb20c2dd9739fd1eafc513f
reply other threads:[~2026-06-09 12:44 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=178100909242.1.579790369096856438.rpms-openssl-a07706cf0e50@fedoraproject.org \
--to=tmraz@fedoraproject.org \
--cc=git-commits@fedoraproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox