public inbox for git-commits@fedoraproject.org
help / color / mirror / Atom feed
* [rpms/openssl] rebase_40beta: - new upstream version fixing CVE-2010-3864 (#649304)
@ 2026-06-09 12:42 Tomas Mraz
0 siblings, 0 replies; 2+ messages in thread
From: Tomas Mraz @ 2026-06-09 12:42 UTC (permalink / raw)
To: git-commits
A new commit has been pushed.
Repo : rpms/openssl
Branch : rebase_40beta
Commit : 2b2423c26b793f9c140ab13186d47b88008fd526
Author : Tomas Mraz <tmraz@fedoraproject.org>
Date : 2010-11-16T18:25:23+01:00
Stats : +2/-1 in 2 file(s)
URL : https://src.fedoraproject.org/rpms/openssl/c/2b2423c26b793f9c140ab13186d47b88008fd526?branch=rebase_40beta
Log:
- new upstream version fixing CVE-2010-3864 (#649304)
---
diff --git a/.gitignore b/.gitignore
index f4623d7..7e9e24e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
openssl-1.0.0a-usa.tar.bz2
+/openssl-1.0.0b-usa.tar.bz2
diff --git a/sources b/sources
index f42b68d..e31429d 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-36a9936e1791566b205daa7cb4bea074 openssl-1.0.0a-usa.tar.bz2
+0efec5b9c8fc10356f4d52732f91f8ea openssl-1.0.0b-usa.tar.bz2
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [rpms/openssl] rebase_40beta: - new upstream version fixing CVE-2010-3864 (#649304)
@ 2026-06-09 12:42 Tomas Mraz
0 siblings, 0 replies; 2+ messages in thread
From: Tomas Mraz @ 2026-06-09 12:42 UTC (permalink / raw)
To: git-commits
A new commit has been pushed.
Repo : rpms/openssl
Branch : rebase_40beta
Commit : 3ff2d49a83bc6aed93deba67ad602802de9e38e2
Author : Tomas Mraz <tmraz@fedoraproject.org>
Date : 2010-11-16T18:21:39+01:00
Stats : +2914/-2914 in 7 file(s)
URL : https://src.fedoraproject.org/rpms/openssl/c/3ff2d49a83bc6aed93deba67ad602802de9e38e2?branch=rebase_40beta
Log:
- new upstream version fixing CVE-2010-3864 (#649304)
---
diff --git a/openssl-1.0.0-beta4-aesni.patch b/openssl-1.0.0-beta4-aesni.patch
deleted file mode 100644
index f57918b..0000000
--- a/openssl-1.0.0-beta4-aesni.patch
+++ /dev/null
@@ -1,2388 +0,0 @@
-diff -up openssl-1.0.0-beta4/Configure.aesni openssl-1.0.0-beta4/Configure
---- openssl-1.0.0-beta4/Configure.aesni 2010-01-07 23:38:31.000000000 +0100
-+++ openssl-1.0.0-beta4/Configure 2010-01-12 22:18:06.000000000 +0100
-@@ -123,11 +123,11 @@ my $tlib="-lnsl -lsocket";
- my $bits1="THIRTY_TWO_BIT ";
- my $bits2="SIXTY_FOUR_BIT ";
-
--my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o";
-+my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o";
-
- my $x86_elf_asm="$x86_asm:elf";
-
--my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
-+my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
- my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::void";
- my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void";
- my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void";
-@@ -491,7 +491,7 @@ my %table=(
- #
- # Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64
- "VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32",
--"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32",
-+"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32",
- # x86 Win32 target defaults to ANSI API, if you want UNICODE, complement
- # 'perl Configure VC-WIN32' with '-DUNICODE -D_UNICODE'
- "VC-WIN32","cl:-W3 -WX -Gs0 -GF -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -D_CRT_SECURE_NO_DEPRECATE:::WIN32::BN_LLONG RC4_INDEX EXPORT_VAR_AS_FN ${x86_gcc_opts}:${x86_asm}:win32n:win32",
-@@ -1410,6 +1410,7 @@ if ($rmd160_obj =~ /\.o$/)
- if ($aes_obj =~ /\.o$/)
- {
- $cflags.=" -DAES_ASM";
-+ $aes_obj =~ s/\s*aesni\-x86\.o// if ($no_sse2);
- }
- else {
- $aes_obj=$aes_enc;
-diff -up openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86.pl.aesni openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86.pl
---- openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86.pl.aesni 2010-01-12 22:18:06.000000000 +0100
-+++ openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86.pl 2010-01-12 22:18:06.000000000 +0100
-@@ -0,0 +1,765 @@
-+#!/usr/bin/env perl
-+
-+# ====================================================================
-+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+#
-+# This module implements support for Intel AES-NI extension. In
-+# OpenSSL context it's used with Intel engine, but can also be used as
-+# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for
-+# details].
-+
-+$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
-+ # generates drop-in replacement for
-+ # crypto/aes/asm/aes-586.pl:-)
-+
-+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-+push(@INC,"${dir}","${dir}../../perlasm");
-+require "x86asm.pl";
-+
-+&asm_init($ARGV[0],$0);
-+
-+$movekey = eval($RREFIX eq "aseni" ? "*movaps" : "*movups");
-+
-+$len="eax";
-+$rounds="ecx";
-+$key="edx";
-+$inp="esi";
-+$out="edi";
-+$rounds_="ebx"; # backup copy for $rounds
-+$key_="ebp"; # backup copy for $key
-+
-+$inout0="xmm0";
-+$inout1="xmm1";
-+$inout2="xmm2";
-+$rndkey0="xmm3";
-+$rndkey1="xmm4";
-+$ivec="xmm5";
-+$in0="xmm6";
-+$in1="xmm7"; $inout3="xmm7";
-+
-+# Inline version of internal aesni_[en|de]crypt1
-+sub aesni_inline_generate1
-+{ my $p=shift;
-+
-+ &$movekey ($rndkey0,&QWP(0,$key));
-+ &$movekey ($rndkey1,&QWP(16,$key));
-+ &lea ($key,&DWP(32,$key));
-+ &pxor ($inout0,$rndkey0);
-+ &set_label("${p}1_loop");
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &dec ($rounds);
-+ &$movekey ($rndkey1,&QWP(0,$key));
-+ &lea ($key,&DWP(16,$key));
-+ &jnz (&label("${p}1_loop"));
-+ eval"&aes${p}last ($inout0,$rndkey1)";
-+}
-+
-+sub aesni_generate1 # fully unrolled loop
-+{ my $p=shift;
-+
-+ &function_begin_B("_aesni_${p}rypt1");
-+ &$movekey ($rndkey0,&QWP(0,$key));
-+ &$movekey ($rndkey1,&QWP(0x10,$key));
-+ &cmp ($rounds,11);
-+ &pxor ($inout0,$rndkey0);
-+ &$movekey ($rndkey0,&QWP(0x20,$key));
-+ &lea ($key,&DWP(0x30,$key));
-+ &jb (&label("${p}128"));
-+ &lea ($key,&DWP(0x20,$key));
-+ &je (&label("${p}192"));
-+ &lea ($key,&DWP(0x20,$key));
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey1,&QWP(-0x40,$key));
-+ eval"&aes${p} ($inout0,$rndkey0)";
-+ &$movekey ($rndkey0,&QWP(-0x30,$key));
-+ &set_label("${p}192");
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey1,&QWP(-0x20,$key));
-+ eval"&aes${p} ($inout0,$rndkey0)";
-+ &$movekey ($rndkey0,&QWP(-0x10,$key));
-+ &set_label("${p}128");
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey1,&QWP(0,$key));
-+ eval"&aes${p} ($inout0,$rndkey0)";
-+ &$movekey ($rndkey0,&QWP(0x10,$key));
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey1,&QWP(0x20,$key));
-+ eval"&aes${p} ($inout0,$rndkey0)";
-+ &$movekey ($rndkey0,&QWP(0x30,$key));
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey1,&QWP(0x40,$key));
-+ eval"&aes${p} ($inout0,$rndkey0)";
-+ &$movekey ($rndkey0,&QWP(0x50,$key));
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey1,&QWP(0x60,$key));
-+ eval"&aes${p} ($inout0,$rndkey0)";
-+ &$movekey ($rndkey0,&QWP(0x70,$key));
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ eval"&aes${p}last ($inout0,$rndkey0)";
-+ &ret();
-+ &function_end_B("_aesni_${p}rypt1");
-+}
-+
-+# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
-+# &aesni_generate1("dec");
-+&function_begin_B("${PREFIX}_encrypt");
-+ &mov ("eax",&wparam(0));
-+ &mov ($key,&wparam(2));
-+ &movups ($inout0,&QWP(0,"eax"));
-+ &mov ($rounds,&DWP(240,$key));
-+ &mov ("eax",&wparam(1));
-+ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1");
-+ &movups (&QWP(0,"eax"),$inout0);
-+ &ret ();
-+&function_end_B("${PREFIX}_encrypt");
-+
-+# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key);
-+# &aesni_generate1("dec");
-+&function_begin_B("${PREFIX}_decrypt");
-+ &mov ("eax",&wparam(0));
-+ &mov ($key,&wparam(2));
-+ &movups ($inout0,&QWP(0,"eax"));
-+ &mov ($rounds,&DWP(240,$key));
-+ &mov ("eax",&wparam(1));
-+ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt1");
-+ &movups (&QWP(0,"eax"),$inout0);
-+ &ret ();
-+&function_end_B("${PREFIX}_decrypt");
-+\f
-+# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave
-+# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec]
-+# latency is 6, it turned out that it can be scheduled only every
-+# *second* cycle. Thus 3x interleave is the one providing optimal
-+# utilization, i.e. when subroutine's throughput is virtually same as
-+# of non-interleaved subroutine [for number of input blocks up to 3].
-+# This is why it makes no sense to implement 2x subroutine. As soon
-+# as/if Intel improves throughput by making it possible to schedule
-+# the instructions in question *every* cycles I would have to
-+# implement 6x interleave and use it in loop...
-+sub aesni_generate3
-+{ my $p=shift;
-+
-+ &function_begin_B("_aesni_${p}rypt3");
-+ &$movekey ($rndkey0,&QWP(0,$key));
-+ &shr ($rounds,1);
-+ &$movekey ($rndkey1,&QWP(16,$key));
-+ &lea ($key,&DWP(32,$key));
-+ &pxor ($inout0,$rndkey0);
-+ &pxor ($inout1,$rndkey0);
-+ &pxor ($inout2,$rndkey0);
-+ &jmp (&label("${p}3_loop"));
-+ &set_label("${p}3_loop",16);
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey0,&QWP(0,$key));
-+ eval"&aes${p} ($inout1,$rndkey1)";
-+ &dec ($rounds);
-+ eval"&aes${p} ($inout2,$rndkey1)";
-+ &$movekey ($rndkey1,&QWP(16,$key));
-+ eval"&aes${p} ($inout0,$rndkey0)";
-+ &lea ($key,&DWP(32,$key));
-+ eval"&aes${p} ($inout1,$rndkey0)";
-+ eval"&aes${p} ($inout2,$rndkey0)";
-+ &jnz (&label("${p}3_loop"));
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey0,&QWP(0,$key));
-+ eval"&aes${p} ($inout1,$rndkey1)";
-+ eval"&aes${p} ($inout2,$rndkey1)";
-+ eval"&aes${p}last ($inout0,$rndkey0)";
-+ eval"&aes${p}last ($inout1,$rndkey0)";
-+ eval"&aes${p}last ($inout2,$rndkey0)";
-+ &ret();
-+ &function_end_B("_aesni_${p}rypt3");
-+}
-+
-+# 4x interleave is implemented to improve small block performance,
-+# most notably [and naturally] 4 block by ~30%. One can argue that one
-+# should have implemented 5x as well, but improvement would be <20%,
-+# so it's not worth it...
-+sub aesni_generate4
-+{ my $p=shift;
-+
-+ &function_begin_B("_aesni_${p}rypt4");
-+ &$movekey ($rndkey0,&QWP(0,$key));
-+ &$movekey ($rndkey1,&QWP(16,$key));
-+ &shr ($rounds,1);
-+ &lea ($key,&DWP(32,$key));
-+ &pxor ($inout0,$rndkey0);
-+ &pxor ($inout1,$rndkey0);
-+ &pxor ($inout2,$rndkey0);
-+ &pxor ($inout3,$rndkey0);
-+ &jmp (&label("${p}3_loop"));
-+ &set_label("${p}3_loop",16);
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey0,&QWP(0,$key));
-+ eval"&aes${p} ($inout1,$rndkey1)";
-+ &dec ($rounds);
-+ eval"&aes${p} ($inout2,$rndkey1)";
-+ eval"&aes${p} ($inout3,$rndkey1)";
-+ &$movekey ($rndkey1,&QWP(16,$key));
-+ eval"&aes${p} ($inout0,$rndkey0)";
-+ &lea ($key,&DWP(32,$key));
-+ eval"&aes${p} ($inout1,$rndkey0)";
-+ eval"&aes${p} ($inout2,$rndkey0)";
-+ eval"&aes${p} ($inout3,$rndkey0)";
-+ &jnz (&label("${p}3_loop"));
-+ eval"&aes${p} ($inout0,$rndkey1)";
-+ &$movekey ($rndkey0,&QWP(0,$key));
-+ eval"&aes${p} ($inout1,$rndkey1)";
-+ eval"&aes${p} ($inout2,$rndkey1)";
-+ eval"&aes${p} ($inout3,$rndkey1)";
-+ eval"&aes${p}last ($inout0,$rndkey0)";
-+ eval"&aes${p}last ($inout1,$rndkey0)";
-+ eval"&aes${p}last ($inout2,$rndkey0)";
-+ eval"&aes${p}last ($inout3,$rndkey0)";
-+ &ret();
-+ &function_end_B("_aesni_${p}rypt4");
-+}
-+&aesni_generate3("enc") if ($PREFIX eq "aesni");
-+&aesni_generate3("dec");
-+&aesni_generate4("enc") if ($PREFIX eq "aesni");
-+&aesni_generate4("dec");
-+
-+if ($PREFIX eq "aesni") {
-+# void aesni_ecb_encrypt (const void *in, void *out,
-+# size_t length, const AES_KEY *key,
-+# int enc);
-+&function_begin("aesni_ecb_encrypt");
-+ &mov ($inp,&wparam(0));
-+ &mov ($out,&wparam(1));
-+ &mov ($len,&wparam(2));
-+ &mov ($key,&wparam(3));
-+ &mov ($rounds,&wparam(4));
-+ &cmp ($len,16);
-+ &jb (&label("ecb_ret"));
-+ &and ($len,-16);
-+ &test ($rounds,$rounds)
-+ &mov ($rounds,&DWP(240,$key));
-+ &mov ($key_,$key); # backup $key
-+ &mov ($rounds_,$rounds); # backup $rounds
-+ &jz (&label("ecb_decrypt"));
-+
-+ &sub ($len,0x40);
-+ &jbe (&label("ecb_enc_tail"));
-+ &jmp (&label("ecb_enc_loop3"));
-+
-+&set_label("ecb_enc_loop3",16);
-+ &movups ($inout0,&QWP(0,$inp));
-+ &movups ($inout1,&QWP(0x10,$inp));
-+ &movups ($inout2,&QWP(0x20,$inp));
-+ &call ("_aesni_encrypt3");
-+ &sub ($len,0x30);
-+ &lea ($inp,&DWP(0x30,$inp));
-+ &lea ($out,&DWP(0x30,$out));
-+ &movups (&QWP(-0x30,$out),$inout0);
-+ &mov ($key,$key_); # restore $key
-+ &movups (&QWP(-0x20,$out),$inout1);
-+ &mov ($rounds,$rounds_); # restore $rounds
-+ &movups (&QWP(-0x10,$out),$inout2);
-+ &ja (&label("ecb_enc_loop3"));
-+
-+&set_label("ecb_enc_tail");
-+ &add ($len,0x40);
-+ &jz (&label("ecb_ret"));
-+
-+ &cmp ($len,0x10);
-+ &movups ($inout0,&QWP(0,$inp));
-+ &je (&label("ecb_enc_one"));
-+ &cmp ($len,0x20);
-+ &movups ($inout1,&QWP(0x10,$inp));
-+ &je (&label("ecb_enc_two"));
-+ &cmp ($len,0x30);
-+ &movups ($inout2,&QWP(0x20,$inp));
-+ &je (&label("ecb_enc_three"));
-+ &movups ($inout3,&QWP(0x30,$inp));
-+ &call ("_aesni_encrypt4");
-+ &movups (&QWP(0,$out),$inout0);
-+ &movups (&QWP(0x10,$out),$inout1);
-+ &movups (&QWP(0x20,$out),$inout2);
-+ &movups (&QWP(0x30,$out),$inout3);
-+ jmp (&label("ecb_ret"));
-+
-+&set_label("ecb_enc_one",16);
-+ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1");
-+ &movups (&QWP(0,$out),$inout0);
-+ &jmp (&label("ecb_ret"));
-+
-+&set_label("ecb_enc_two",16);
-+ &call ("_aesni_encrypt3");
-+ &movups (&QWP(0,$out),$inout0);
-+ &movups (&QWP(0x10,$out),$inout1);
-+ &jmp (&label("ecb_ret"));
-+
-+&set_label("ecb_enc_three",16);
-+ &call ("_aesni_encrypt3");
-+ &movups (&QWP(0,$out),$inout0);
-+ &movups (&QWP(0x10,$out),$inout1);
-+ &movups (&QWP(0x20,$out),$inout2);
-+ &jmp (&label("ecb_ret"));
-+
-+&set_label("ecb_decrypt",16);
-+ &sub ($len,0x40);
-+ &jbe (&label("ecb_dec_tail"));
-+ &jmp (&label("ecb_dec_loop3"));
-+
-+&set_label("ecb_dec_loop3",16);
-+ &movups ($inout0,&QWP(0,$inp));
-+ &movups ($inout1,&QWP(0x10,$inp));
-+ &movups ($inout2,&QWP(0x20,$inp));
-+ &call ("_aesni_decrypt3");
-+ &sub ($len,0x30);
-+ &lea ($inp,&DWP(0x30,$inp));
-+ &lea ($out,&DWP(0x30,$out));
-+ &movups (&QWP(-0x30,$out),$inout0);
-+ &mov ($key,$key_); # restore $key
-+ &movups (&QWP(-0x20,$out),$inout1);
-+ &mov ($rounds,$rounds_); # restore $rounds
-+ &movups (&QWP(-0x10,$out),$inout2);
-+ &ja (&label("ecb_dec_loop3"));
-+
-+&set_label("ecb_dec_tail");
-+ &add ($len,0x40);
-+ &jz (&label("ecb_ret"));
-+
-+ &cmp ($len,0x10);
-+ &movups ($inout0,&QWP(0,$inp));
-+ &je (&label("ecb_dec_one"));
-+ &cmp ($len,0x20);
-+ &movups ($inout1,&QWP(0x10,$inp));
-+ &je (&label("ecb_dec_two"));
-+ &cmp ($len,0x30);
-+ &movups ($inout2,&QWP(0x20,$inp));
-+ &je (&label("ecb_dec_three"));
-+ &movups ($inout3,&QWP(0x30,$inp));
-+ &call ("_aesni_decrypt4");
-+ &movups (&QWP(0,$out),$inout0);
-+ &movups (&QWP(0x10,$out),$inout1);
-+ &movups (&QWP(0x20,$out),$inout2);
-+ &movups (&QWP(0x30,$out),$inout3);
-+ &jmp (&label("ecb_ret"));
-+
-+&set_label("ecb_dec_one",16);
-+ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3");
-+ &movups (&QWP(0,$out),$inout0);
-+ &jmp (&label("ecb_ret"));
-+
-+&set_label("ecb_dec_two",16);
-+ &call ("_aesni_decrypt3");
-+ &movups (&QWP(0,$out),$inout0);
-+ &movups (&QWP(0x10,$out),$inout1);
-+ &jmp (&label("ecb_ret"));
-+
-+&set_label("ecb_dec_three",16);
-+ &call ("_aesni_decrypt3");
-+ &movups (&QWP(0,$out),$inout0);
-+ &movups (&QWP(0x10,$out),$inout1);
-+ &movups (&QWP(0x20,$out),$inout2);
-+
-+&set_label("ecb_ret");
-+&function_end("aesni_ecb_encrypt");
-+}
-+
-+# void $PREFIX_cbc_encrypt (const void *inp, void *out,
-+# size_t length, const AES_KEY *key,
-+# unsigned char *ivp,const int enc);
-+&function_begin("${PREFIX}_cbc_encrypt");
-+ &mov ($inp,&wparam(0));
-+ &mov ($out,&wparam(1));
-+ &mov ($len,&wparam(2));
-+ &mov ($key,&wparam(3));
-+ &test ($len,$len);
-+ &mov ($key_,&wparam(4));
-+ &jz (&label("cbc_ret"));
-+
-+ &cmp (&wparam(5),0);
-+ &movups ($ivec,&QWP(0,$key_)); # load IV
-+ &mov ($rounds,&DWP(240,$key));
-+ &mov ($key_,$key); # backup $key
-+ &mov ($rounds_,$rounds); # backup $rounds
-+ &je (&label("cbc_decrypt"));
-+
-+ &movaps ($inout0,$ivec);
-+ &cmp ($len,16);
-+ &jb (&label("cbc_enc_tail"));
-+ &sub ($len,16);
-+ &jmp (&label("cbc_enc_loop"));
-+
-+&set_label("cbc_enc_loop",16);
-+ &movups ($ivec,&QWP(0,$inp));
-+ &lea ($inp,&DWP(16,$inp));
-+ &pxor ($inout0,$ivec);
-+ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt3");
-+ &sub ($len,16);
-+ &lea ($out,&DWP(16,$out));
-+ &mov ($rounds,$rounds_); # restore $rounds
-+ &mov ($key,$key_); # restore $key
-+ &movups (&QWP(-16,$out),$inout0);
-+ &jnc (&label("cbc_enc_loop"));
-+ &add ($len,16);
-+ &jnz (&label("cbc_enc_tail"));
-+ &movaps ($ivec,$inout0);
-+ &jmp (&label("cbc_ret"));
-+
-+&set_label("cbc_enc_tail");
-+ &mov ("ecx",$len); # zaps $rounds
-+ &data_word(0xA4F3F689); # rep movsb
-+ &mov ("ecx",16); # zero tail
-+ &sub ("ecx",$len);
-+ &xor ("eax","eax"); # zaps $len
-+ &data_word(0xAAF3F689); # rep stosb
-+ &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block
-+ &mov ($rounds,$rounds_); # restore $rounds
-+ &mov ($inp,$out); # $inp and $out are the same
-+ &mov ($key,$key_); # restore $key
-+ &jmp (&label("cbc_enc_loop"));
-+
-+&set_label("cbc_decrypt",16);
-+ &sub ($len,0x40);
-+ &jbe (&label("cbc_dec_tail"));
-+ &jmp (&label("cbc_dec_loop3"));
-+
-+&set_label("cbc_dec_loop3",16);
-+ &movups ($inout0,&QWP(0,$inp));
-+ &movups ($inout1,&QWP(0x10,$inp));
-+ &movups ($inout2,&QWP(0x20,$inp));
-+ &movaps ($in0,$inout0);
-+ &movaps ($in1,$inout1);
-+ &call ("_aesni_decrypt3");
-+ &sub ($len,0x30);
-+ &lea ($inp,&DWP(0x30,$inp));
-+ &lea ($out,&DWP(0x30,$out));
-+ &pxor ($inout0,$ivec);
-+ &pxor ($inout1,$in0);
-+ &movups ($ivec,&QWP(-0x10,$inp));
-+ &pxor ($inout2,$in1);
-+ &movups (&QWP(-0x30,$out),$inout0);
-+ &mov ($rounds,$rounds_) # restore $rounds
-+ &movups (&QWP(-0x20,$out),$inout1);
-+ &mov ($key,$key_); # restore $key
-+ &movups (&QWP(-0x10,$out),$inout2);
-+ &ja (&label("cbc_dec_loop3"));
-+
-+&set_label("cbc_dec_tail");
-+ &add ($len,0x40);
-+ &jz (&label("cbc_ret"));
-+
-+ &movups ($inout0,&QWP(0,$inp));
-+ &cmp ($len,0x10);
-+ &movaps ($in0,$inout0);
-+ &jbe (&label("cbc_dec_one"));
-+ &movups ($inout1,&QWP(0x10,$inp));
-+ &cmp ($len,0x20);
-+ &movaps ($in1,$inout1);
-+ &jbe (&label("cbc_dec_two"));
-+ &movups ($inout2,&QWP(0x20,$inp));
-+ &cmp ($len,0x30);
-+ &jbe (&label("cbc_dec_three"));
-+ &movups ($inout3,&QWP(0x30,$inp));
-+ &call ("_aesni_decrypt4");
-+ &movups ($rndkey0,&QWP(0x10,$inp));
-+ &movups ($rndkey1,&QWP(0x20,$inp));
-+ &pxor ($inout0,$ivec);
-+ &pxor ($inout1,$in0);
-+ &movups ($ivec,&QWP(0x30,$inp));
-+ &movups (&QWP(0,$out),$inout0);
-+ &pxor ($inout2,$rndkey0);
-+ &pxor ($inout3,$rndkey1);
-+ &movups (&QWP(0x10,$out),$inout1);
-+ &movups (&QWP(0x20,$out),$inout2);
-+ &movaps ($inout0,$inout3);
-+ &lea ($out,&DWP(0x30,$out));
-+ &jmp (&label("cbc_dec_tail_collected"));
-+
-+&set_label("cbc_dec_one");
-+ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3");
-+ &pxor ($inout0,$ivec);
-+ &movaps ($ivec,$in0);
-+ &jmp (&label("cbc_dec_tail_collected"));
-+
-+&set_label("cbc_dec_two");
-+ &call ("_aesni_decrypt3");
-+ &pxor ($inout0,$ivec);
-+ &pxor ($inout1,$in0);
-+ &movups (&QWP(0,$out),$inout0);
-+ &movaps ($inout0,$inout1);
-+ &movaps ($ivec,$in1);
-+ &lea ($out,&DWP(0x10,$out));
-+ &jmp (&label("cbc_dec_tail_collected"));
-+
-+&set_label("cbc_dec_three");
-+ &call ("_aesni_decrypt3");
-+ &pxor ($inout0,$ivec);
-+ &pxor ($inout1,$in0);
-+ &pxor ($inout2,$in1);
-+ &movups (&QWP(0,$out),$inout0);
-+ &movups (&QWP(0x10,$out),$inout1);
-+ &movaps ($inout0,$inout2);
-+ &movups ($ivec,&QWP(0x20,$inp));
-+ &lea ($out,&DWP(0x20,$out));
-+
-+&set_label("cbc_dec_tail_collected");
-+ &and ($len,15);
-+ &jnz (&label("cbc_dec_tail_partial"));
-+ &movups (&QWP(0,$out),$inout0);
-+ &jmp (&label("cbc_ret"));
-+
-+&set_label("cbc_dec_tail_partial");
-+ &mov ($key_,"esp");
-+ &sub ("esp",16);
-+ &and ("esp",-16);
-+ &movaps (&QWP(0,"esp"),$inout0);
-+ &mov ($inp,"esp");
-+ &mov ("ecx",$len);
-+ &data_word(0xA4F3F689); # rep movsb
-+ &mov ("esp",$key_);
-+
-+&set_label("cbc_ret");
-+ &mov ($key_,&wparam(4));
-+ &movups (&QWP(0,$key_),$ivec); # output IV
-+&function_end("${PREFIX}_cbc_encrypt");
-+
-+# Mechanical port from aesni-x86_64.pl.
-+#
-+# _aesni_set_encrypt_key is private interface,
-+# input:
-+# "eax" const unsigned char *userKey
-+# $rounds int bits
-+# $key AES_KEY *key
-+# output:
-+# "eax" return code
-+# $round rounds
-+
-+&function_begin_B("_aesni_set_encrypt_key");
-+ &test ("eax","eax");
-+ &jz (&label("bad_pointer"));
-+ &test ($key,$key);
-+ &jz (&label("bad_pointer"));
-+
-+ &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey
-+ &pxor ("xmm4","xmm4"); # low dword of xmm4 is assumed 0
-+ &lea ($key,&DWP(16,$key));
-+ &cmp ($rounds,256);
-+ &je (&label("14rounds"));
-+ &cmp ($rounds,192);
-+ &je (&label("12rounds"));
-+ &cmp ($rounds,128);
-+ &jne (&label("bad_keybits"));
-+
-+&set_label("10rounds",16);
-+ &mov ($rounds,9);
-+ &$movekey (&QWP(-16,$key),"xmm0"); # round 0
-+ &aeskeygenassist("xmm1","xmm0",0x01); # round 1
-+ &call (&label("key_128_cold"));
-+ &aeskeygenassist("xmm1","xmm0",0x2); # round 2
-+ &call (&label("key_128"));
-+ &aeskeygenassist("xmm1","xmm0",0x04); # round 3
-+ &call (&label("key_128"));
-+ &aeskeygenassist("xmm1","xmm0",0x08); # round 4
-+ &call (&label("key_128"));
-+ &aeskeygenassist("xmm1","xmm0",0x10); # round 5
-+ &call (&label("key_128"));
-+ &aeskeygenassist("xmm1","xmm0",0x20); # round 6
-+ &call (&label("key_128"));
-+ &aeskeygenassist("xmm1","xmm0",0x40); # round 7
-+ &call (&label("key_128"));
-+ &aeskeygenassist("xmm1","xmm0",0x80); # round 8
-+ &call (&label("key_128"));
-+ &aeskeygenassist("xmm1","xmm0",0x1b); # round 9
-+ &call (&label("key_128"));
-+ &aeskeygenassist("xmm1","xmm0",0x36); # round 10
-+ &call (&label("key_128"));
-+ &$movekey (&QWP(0,$key),"xmm0");
-+ &mov (&DWP(80,$key),$rounds);
-+ &xor ("eax","eax");
-+ &ret();
-+
-+&set_label("key_128",16);
-+ &$movekey (&QWP(0,$key),"xmm0");
-+ &lea ($key,&DWP(16,$key));
-+&set_label("key_128_cold");
-+ &shufps ("xmm4","xmm0",0b00010000);
-+ &pxor ("xmm0","xmm4");
-+ &shufps ("xmm4","xmm0",0b10001100,);
-+ &pxor ("xmm0","xmm4");
-+ &pshufd ("xmm1","xmm1",0b11111111); # critical path
-+ &pxor ("xmm0","xmm1");
-+ &ret();
-+
-+&set_label("12rounds",16);
-+ &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey
-+ &mov ($rounds,11);
-+ &$movekey (&QWP(-16,$key),"xmm0") # round 0
-+ &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2
-+ &call (&label("key_192a_cold"));
-+ &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3
-+ &call (&label("key_192b"));
-+ &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5
-+ &call (&label("key_192a"));
-+ &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6
-+ &call (&label("key_192b"));
-+ &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8
-+ &call (&label("key_192a"));
-+ &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9
-+ &call (&label("key_192b"));
-+ &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11
-+ &call (&label("key_192a"));
-+ &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12
-+ &call (&label("key_192b"));
-+ &$movekey (&QWP(0,$key),"xmm0");
-+ &mov (&DWP(48,$key),$rounds);
-+ &xor ("eax","eax");
-+ &ret();
-+
-+&set_label("key_192a",16);
-+ &$movekey (&QWP(0,$key),"xmm0");
-+ &lea ($key,&DWP(16,$key));
-+&set_label("key_192a_cold",16);
-+ &movaps ("xmm5","xmm2");
-+&set_label("key_192b_warm");
-+ &shufps ("xmm4","xmm0",0b00010000);
-+ &movaps ("xmm3","xmm2");
-+ &pxor ("xmm0","xmm4");
-+ &shufps ("xmm4","xmm0",0b10001100);
-+ &pslldq ("xmm3",4);
-+ &pxor ("xmm0","xmm4");
-+ &pshufd ("xmm1","xmm1",0b01010101); # critical path
-+ &pxor ("xmm2","xmm3");
-+ &pxor ("xmm0","xmm1");
-+ &pshufd ("xmm3","xmm0",0b11111111);
-+ &pxor ("xmm2","xmm3");
-+ &ret();
-+
-+&set_label("key_192b",16);
-+ &movaps ("xmm3","xmm0");
-+ &shufps ("xmm5","xmm0",0b01000100);
-+ &$movekey (&QWP(0,$key),"xmm5");
-+ &shufps ("xmm3","xmm2",0b01001110);
-+ &$movekey (&QWP(16,$key),"xmm3");
-+ &lea ($key,&DWP(32,$key));
-+ &jmp (&label("key_192b_warm"));
-+
-+&set_label("14rounds",16);
-+ &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey
-+ &mov ($rounds,13);
-+ &lea ($key,&DWP(16,$key));
-+ &$movekey (&QWP(-32,$key),"xmm0"); # round 0
-+ &$movekey (&QWP(-16,$key),"xmm2"); # round 1
-+ &aeskeygenassist("xmm1","xmm2",0x01); # round 2
-+ &call (&label("key_256a_cold"));
-+ &aeskeygenassist("xmm1","xmm0",0x01); # round 3
-+ &call (&label("key_256b"));
-+ &aeskeygenassist("xmm1","xmm2",0x02); # round 4
-+ &call (&label("key_256a"));
-+ &aeskeygenassist("xmm1","xmm0",0x02); # round 5
-+ &call (&label("key_256b"));
-+ &aeskeygenassist("xmm1","xmm2",0x04); # round 6
-+ &call (&label("key_256a"));
-+ &aeskeygenassist("xmm1","xmm0",0x04); # round 7
-+ &call (&label("key_256b"));
-+ &aeskeygenassist("xmm1","xmm2",0x08); # round 8
-+ &call (&label("key_256a"));
-+ &aeskeygenassist("xmm1","xmm0",0x08); # round 9
-+ &call (&label("key_256b"));
-+ &aeskeygenassist("xmm1","xmm2",0x10); # round 10
-+ &call (&label("key_256a"));
-+ &aeskeygenassist("xmm1","xmm0",0x10); # round 11
-+ &call (&label("key_256b"));
-+ &aeskeygenassist("xmm1","xmm2",0x20); # round 12
-+ &call (&label("key_256a"));
-+ &aeskeygenassist("xmm1","xmm0",0x20); # round 13
-+ &call (&label("key_256b"));
-+ &aeskeygenassist("xmm1","xmm2",0x40); # round 14
-+ &call (&label("key_256a"));
-+ &$movekey (&QWP(0,$key),"xmm0");
-+ &mov (&DWP(16,$key),$rounds);
-+ &xor ("eax","eax");
-+ &ret();
-+
-+&set_label("key_256a",16);
-+ &$movekey (&QWP(0,$key),"xmm2");
-+ &lea ($key,&DWP(16,$key));
-+&set_label("key_256a_cold");
-+ &shufps ("xmm4","xmm0",0b00010000);
-+ &pxor ("xmm0","xmm4");
-+ &shufps ("xmm4","xmm0",0b10001100);
-+ &pxor ("xmm0","xmm4");
-+ &pshufd ("xmm1","xmm1",0b11111111); # critical path
-+ &pxor ("xmm0","xmm1");
-+ &ret();
-+
-+&set_label("key_256b",16);
-+ &$movekey (&QWP(0,$key),"xmm0");
-+ &lea ($key,&DWP(16,$key));
-+
-+ &shufps ("xmm4","xmm2",0b00010000);
-+ &pxor ("xmm2","xmm4");
-+ &shufps ("xmm4","xmm2",0b10001100);
-+ &pxor ("xmm2","xmm4");
-+ &pshufd ("xmm1","xmm1",0b10101010); # critical path
-+ &pxor ("xmm2","xmm1");
-+ &ret();
-+
-+&set_label("bad_pointer",4);
-+ &mov ("eax",-1);
-+ &ret ();
-+&set_label("bad_keybits",4);
-+ &mov ("eax",-2);
-+ &ret ();
-+&function_end_B("_aesni_set_encrypt_key");
-+
-+# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits,
-+# AES_KEY *key)
-+&function_begin_B("${PREFIX}_set_encrypt_key");
-+ &mov ("eax",&wparam(0));
-+ &mov ($rounds,&wparam(1));
-+ &mov ($key,&wparam(2));
-+ &call ("_aesni_set_encrypt_key");
-+ &ret ();
-+&function_end_B("${PREFIX}_set_encrypt_key");
-+
-+# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits,
-+# AES_KEY *key)
-+&function_begin_B("${PREFIX}_set_decrypt_key");
-+ &mov ("eax",&wparam(0));
-+ &mov ($rounds,&wparam(1));
-+ &mov ($key,&wparam(2));
-+ &call ("_aesni_set_encrypt_key");
-+ &mov ($key,&wparam(2));
-+ &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key
-+ &test ("eax","eax");
-+ &jnz (&label("dec_key_ret"));
-+ &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule
-+
-+ &$movekey ("xmm0",&QWP(0,$key)); # just swap
-+ &$movekey ("xmm1",&QWP(0,"eax"));
-+ &$movekey (&QWP(0,"eax"),"xmm0");
-+ &$movekey (&QWP(0,$key),"xmm1");
-+ &lea ($key,&DWP(16,$key));
-+ &lea ("eax",&DWP(-16,"eax"));
-+
-+&set_label("dec_key_inverse");
-+ &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse
-+ &$movekey ("xmm1",&QWP(0,"eax"));
-+ &aesimc ("xmm0","xmm0");
-+ &aesimc ("xmm1","xmm1");
-+ &lea ($key,&DWP(16,$key));
-+ &lea ("eax",&DWP(-16,"eax"));
-+ &cmp ("eax",$key);
-+ &$movekey (&QWP(16,"eax"),"xmm0");
-+ &$movekey (&QWP(-16,$key),"xmm1");
-+ &ja (&label("dec_key_inverse"));
-+
-+ &$movekey ("xmm0",&QWP(0,$key)); # inverse middle
-+ &aesimc ("xmm0","xmm0");
-+ &$movekey (&QWP(0,$key),"xmm0");
-+
-+ &xor ("eax","eax"); # return success
-+&set_label("dec_key_ret");
-+ &ret ();
-+&function_end_B("${PREFIX}_set_decrypt_key");
-+&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
-+
-+&asm_finish();
-diff -up openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86_64.pl.aesni openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86_64.pl
---- openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86_64.pl.aesni 2010-01-12 22:18:06.000000000 +0100
-+++ openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86_64.pl 2010-01-12 22:18:06.000000000 +0100
-@@ -0,0 +1,991 @@
-+#!/usr/bin/env perl
-+#
-+# ====================================================================
-+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-+# project. The module is, however, dual licensed under OpenSSL and
-+# CRYPTOGAMS licenses depending on where you obtain it. For further
-+# details see http://www.openssl.org/~appro/cryptogams/.
-+# ====================================================================
-+#
-+# This module implements support for Intel AES-NI extension. In
-+# OpenSSL context it's used with Intel engine, but can also be used as
-+# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for
-+# details].
-+
-+$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
-+ # generates drop-in replacement for
-+ # crypto/aes/asm/aes-x86_64.pl:-)
-+
-+$flavour = shift;
-+$output = shift;
-+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-+
-+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-+
-+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-+die "can't locate x86_64-xlate.pl";
-+
-+open STDOUT,"| $^X $xlate $flavour $output";
-+
-+$movkey = $PREFIX eq "aesni" ? "movaps" : "movups";
-+@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order
-+ ("%rdi","%rsi","%rdx","%rcx"); # Unix order
-+
-+$code=".text\n";
-+
-+$rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!!
-+# this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ...
-+$inp="%rdi";
-+$out="%rsi";
-+$len="%rdx";
-+$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!!
-+$ivp="%r8"; # cbc
-+
-+$rnds_="%r10d"; # backup copy for $rounds
-+$key_="%r11"; # backup copy for $key
-+
-+# %xmm register layout
-+$inout0="%xmm0"; $inout1="%xmm1";
-+$inout2="%xmm2"; $inout3="%xmm3";
-+$rndkey0="%xmm4"; $rndkey1="%xmm5";
-+
-+$iv="%xmm6"; $in0="%xmm7"; # used in CBC decrypt
-+$in1="%xmm8"; $in2="%xmm9";
-+\f
-+# Inline version of internal aesni_[en|de]crypt1.
-+#
-+# Why folded loop? Because aes[enc|dec] is slow enough to accommodate
-+# cycles which take care of loop variables...
-+{ my $sn;
-+sub aesni_generate1 {
-+my ($p,$key,$rounds)=@_;
-+++$sn;
-+$code.=<<___;
-+ $movkey ($key),$rndkey0
-+ $movkey 16($key),$rndkey1
-+ lea 32($key),$key
-+ pxor $rndkey0,$inout0
-+.Loop_${p}1_$sn:
-+ aes${p} $rndkey1,$inout0
-+ dec $rounds
-+ $movkey ($key),$rndkey1
-+ lea 16($key),$key
-+ jnz .Loop_${p}1_$sn # loop body is 16 bytes
-+ aes${p}last $rndkey1,$inout0
-+___
-+}}
-+# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key);
-+#
-+{ my ($inp,$out,$key) = @_4args;
-+
-+$code.=<<___;
-+.globl ${PREFIX}_encrypt
-+.type ${PREFIX}_encrypt,\@abi-omnipotent
-+.align 16
-+${PREFIX}_encrypt:
-+ movups ($inp),$inout0 # load input
-+ mov 240($key),$rounds # pull $rounds
-+___
-+ &aesni_generate1("enc",$key,$rounds);
-+$code.=<<___;
-+ movups $inout0,($out) # output
-+ ret
-+.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt
-+
-+.globl ${PREFIX}_decrypt
-+.type ${PREFIX}_decrypt,\@abi-omnipotent
-+.align 16
-+${PREFIX}_decrypt:
-+ movups ($inp),$inout0 # load input
-+ mov 240($key),$rounds # pull $rounds
-+___
-+ &aesni_generate1("dec",$key,$rounds);
-+$code.=<<___;
-+ movups $inout0,($out) # output
-+ ret
-+.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt
-+___
-+}
-+\f
-+# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave
-+# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec]
-+# latency is 6, it turned out that it can be scheduled only every
-+# *second* cycle. Thus 3x interleave is the one providing optimal
-+# utilization, i.e. when subroutine's throughput is virtually same as
-+# of non-interleaved subroutine [for number of input blocks up to 3].
-+# This is why it makes no sense to implement 2x subroutine. As soon
-+# as/if Intel improves throughput by making it possible to schedule
-+# the instructions in question *every* cycles I would have to
-+# implement 6x interleave and use it in loop...
-+sub aesni_generate3 {
-+my $dir=shift;
-+# As already mentioned it takes in $key and $rounds, which are *not*
-+# preserved. $inout[0-2] is cipher/clear text...
-+$code.=<<___;
-+.type _aesni_${dir}rypt3,\@abi-omnipotent
-+.align 16
-+_aesni_${dir}rypt3:
-+ $movkey ($key),$rndkey0
-+ shr \$1,$rounds
-+ $movkey 16($key),$rndkey1
-+ lea 32($key),$key
-+ pxor $rndkey0,$inout0
-+ pxor $rndkey0,$inout1
-+ pxor $rndkey0,$inout2
-+
-+.L${dir}_loop3:
-+ aes${dir} $rndkey1,$inout0
-+ $movkey ($key),$rndkey0
-+ aes${dir} $rndkey1,$inout1
-+ dec $rounds
-+ aes${dir} $rndkey1,$inout2
-+ aes${dir} $rndkey0,$inout0
-+ $movkey 16($key),$rndkey1
-+ aes${dir} $rndkey0,$inout1
-+ lea 32($key),$key
-+ aes${dir} $rndkey0,$inout2
-+ jnz .L${dir}_loop3
-+
-+ aes${dir} $rndkey1,$inout0
-+ $movkey ($key),$rndkey0
-+ aes${dir} $rndkey1,$inout1
-+ aes${dir} $rndkey1,$inout2
-+ aes${dir}last $rndkey0,$inout0
-+ aes${dir}last $rndkey0,$inout1
-+ aes${dir}last $rndkey0,$inout2
-+ ret
-+.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3
-+___
-+}
-+# 4x interleave is implemented to improve small block performance,
-+# most notably [and naturally] 4 block by ~30%. One can argue that one
-+# should have implemented 5x as well, but improvement would be <20%,
-+# so it's not worth it...
-+sub aesni_generate4 {
-+my $dir=shift;
-+# As already mentioned it takes in $key and $rounds, which are *not*
-+# preserved. $inout[0-3] is cipher/clear text...
-+$code.=<<___;
-+.type _aesni_${dir}rypt4,\@abi-omnipotent
-+.align 16
-+_aesni_${dir}rypt4:
-+ $movkey ($key),$rndkey0
-+ shr \$1,$rounds
-+ $movkey 16($key),$rndkey1
-+ lea 32($key),$key
-+ pxor $rndkey0,$inout0
-+ pxor $rndkey0,$inout1
-+ pxor $rndkey0,$inout2
-+ pxor $rndkey0,$inout3
-+
-+.L${dir}_loop4:
-+ aes${dir} $rndkey1,$inout0
-+ $movkey ($key),$rndkey0
-+ aes${dir} $rndkey1,$inout1
-+ dec $rounds
-+ aes${dir} $rndkey1,$inout2
-+ aes${dir} $rndkey1,$inout3
-+ aes${dir} $rndkey0,$inout0
-+ $movkey 16($key),$rndkey1
-+ aes${dir} $rndkey0,$inout1
-+ lea 32($key),$key
-+ aes${dir} $rndkey0,$inout2
-+ aes${dir} $rndkey0,$inout3
-+ jnz .L${dir}_loop4
-+
-+ aes${dir} $rndkey1,$inout0
-+ $movkey ($key),$rndkey0
-+ aes${dir} $rndkey1,$inout1
-+ aes${dir} $rndkey1,$inout2
-+ aes${dir} $rndkey1,$inout3
-+ aes${dir}last $rndkey0,$inout0
-+ aes${dir}last $rndkey0,$inout1
-+ aes${dir}last $rndkey0,$inout2
-+ aes${dir}last $rndkey0,$inout3
-+ ret
-+.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4
-+___
-+}
-+&aesni_generate3("enc") if ($PREFIX eq "aesni");
-+&aesni_generate3("dec");
-+&aesni_generate4("enc") if ($PREFIX eq "aesni");
-+&aesni_generate4("dec");
-+\f
-+if ($PREFIX eq "aesni") {
-+# void aesni_ecb_encrypt (const void *in, void *out,
-+# size_t length, const AES_KEY *key,
-+# int enc);
-+$code.=<<___;
-+.globl aesni_ecb_encrypt
-+.type aesni_ecb_encrypt,\@function,5
-+.align 16
-+aesni_ecb_encrypt:
-+ cmp \$16,$len # check length
-+ jb .Lecb_ret
-+
-+ mov 240($key),$rounds # pull $rounds
-+ and \$-16,$len
-+ mov $key,$key_ # backup $key
-+ test %r8d,%r8d # 5th argument
-+ mov $rounds,$rnds_ # backup $rounds
-+ jz .Lecb_decrypt
-+#--------------------------- ECB ENCRYPT ------------------------------#
-+ sub \$0x40,$len
-+ jbe .Lecb_enc_tail
-+ jmp .Lecb_enc_loop3
-+.align 16
-+.Lecb_enc_loop3:
-+ movups ($inp),$inout0
-+ movups 0x10($inp),$inout1
-+ movups 0x20($inp),$inout2
-+ call _aesni_encrypt3
-+ sub \$0x30,$len
-+ lea 0x30($inp),$inp
-+ lea 0x30($out),$out
-+ movups $inout0,-0x30($out)
-+ mov $rnds_,$rounds # restore $rounds
-+ movups $inout1,-0x20($out)
-+ mov $key_,$key # restore $key
-+ movups $inout2,-0x10($out)
-+ ja .Lecb_enc_loop3
-+
-+.Lecb_enc_tail:
-+ add \$0x40,$len
-+ jz .Lecb_ret
-+
-+ cmp \$0x10,$len
-+ movups ($inp),$inout0
-+ je .Lecb_enc_one
-+ cmp \$0x20,$len
-+ movups 0x10($inp),$inout1
-+ je .Lecb_enc_two
-+ cmp \$0x30,$len
-+ movups 0x20($inp),$inout2
-+ je .Lecb_enc_three
-+ movups 0x30($inp),$inout3
-+ call _aesni_encrypt4
-+ movups $inout0,($out)
-+ movups $inout1,0x10($out)
-+ movups $inout2,0x20($out)
-+ movups $inout3,0x30($out)
-+ jmp .Lecb_ret
-+.align 16
-+.Lecb_enc_one:
-+___
-+ &aesni_generate1("enc",$key,$rounds);
-+$code.=<<___;
-+ movups $inout0,($out)
-+ jmp .Lecb_ret
-+.align 16
-+.Lecb_enc_two:
-+ call _aesni_encrypt3
-+ movups $inout0,($out)
-+ movups $inout1,0x10($out)
-+ jmp .Lecb_ret
-+.align 16
-+.Lecb_enc_three:
-+ call _aesni_encrypt3
-+ movups $inout0,($out)
-+ movups $inout1,0x10($out)
-+ movups $inout2,0x20($out)
-+ jmp .Lecb_ret
-+\f#--------------------------- ECB DECRYPT ------------------------------#
-+.align 16
-+.Lecb_decrypt:
-+ sub \$0x40,$len
-+ jbe .Lecb_dec_tail
-+ jmp .Lecb_dec_loop3
-+.align 16
-+.Lecb_dec_loop3:
-+ movups ($inp),$inout0
-+ movups 0x10($inp),$inout1
-+ movups 0x20($inp),$inout2
-+ call _aesni_decrypt3
-+ sub \$0x30,$len
-+ lea 0x30($inp),$inp
-+ lea 0x30($out),$out
-+ movups $inout0,-0x30($out)
-+ mov $rnds_,$rounds # restore $rounds
-+ movups $inout1,-0x20($out)
-+ mov $key_,$key # restore $key
-+ movups $inout2,-0x10($out)
-+ ja .Lecb_dec_loop3
-+
-+.Lecb_dec_tail:
-+ add \$0x40,$len
-+ jz .Lecb_ret
-+
-+ cmp \$0x10,$len
-+ movups ($inp),$inout0
-+ je .Lecb_dec_one
-+ cmp \$0x20,$len
-+ movups 0x10($inp),$inout1
-+ je .Lecb_dec_two
-+ cmp \$0x30,$len
-+ movups 0x20($inp),$inout2
-+ je .Lecb_dec_three
-+ movups 0x30($inp),$inout3
-+ call _aesni_decrypt4
-+ movups $inout0,($out)
-+ movups $inout1,0x10($out)
-+ movups $inout2,0x20($out)
-+ movups $inout3,0x30($out)
-+ jmp .Lecb_ret
-+.align 16
-+.Lecb_dec_one:
-+___
-+ &aesni_generate1("dec",$key,$rounds);
-+$code.=<<___;
-+ movups $inout0,($out)
-+ jmp .Lecb_ret
-+.align 16
-+.Lecb_dec_two:
-+ call _aesni_decrypt3
-+ movups $inout0,($out)
-+ movups $inout1,0x10($out)
-+ jmp .Lecb_ret
-+.align 16
-+.Lecb_dec_three:
-+ call _aesni_decrypt3
-+ movups $inout0,($out)
-+ movups $inout1,0x10($out)
-+ movups $inout2,0x20($out)
-+
-+.Lecb_ret:
-+ ret
-+.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
-+___
-+}
-+\f
-+# void $PREFIX_cbc_encrypt (const void *inp, void *out,
-+# size_t length, const AES_KEY *key,
-+# unsigned char *ivp,const int enc);
-+$reserved = $win64?0x40:-0x18; # used in decrypt
-+$code.=<<___;
-+.globl ${PREFIX}_cbc_encrypt
-+.type ${PREFIX}_cbc_encrypt,\@function,6
-+.align 16
-+${PREFIX}_cbc_encrypt:
-+ test $len,$len # check length
-+ jz .Lcbc_ret
-+
-+ mov 240($key),$rnds_ # pull $rounds
-+ mov $key,$key_ # backup $key
-+ test %r9d,%r9d # 6th argument
-+ jz .Lcbc_decrypt
-+#--------------------------- CBC ENCRYPT ------------------------------#
-+ movups ($ivp),$inout0 # load iv as initial state
-+ cmp \$16,$len
-+ mov $rnds_,$rounds
-+ jb .Lcbc_enc_tail
-+ sub \$16,$len
-+ jmp .Lcbc_enc_loop
-+.align 16
-+.Lcbc_enc_loop:
-+ movups ($inp),$inout1 # load input
-+ lea 16($inp),$inp
-+ pxor $inout1,$inout0
-+___
-+ &aesni_generate1("enc",$key,$rounds);
-+$code.=<<___;
-+ sub \$16,$len
-+ lea 16($out),$out
-+ mov $rnds_,$rounds # restore $rounds
-+ mov $key_,$key # restore $key
-+ movups $inout0,-16($out) # store output
-+ jnc .Lcbc_enc_loop
-+ add \$16,$len
-+ jnz .Lcbc_enc_tail
-+ movups $inout0,($ivp)
-+ jmp .Lcbc_ret
-+
-+.Lcbc_enc_tail:
-+ mov $len,%rcx # zaps $key
-+ xchg $inp,$out # $inp is %rsi and $out is %rdi now
-+ .long 0x9066A4F3 # rep movsb
-+ mov \$16,%ecx # zero tail
-+ sub $len,%rcx
-+ xor %eax,%eax
-+ .long 0x9066AAF3 # rep stosb
-+ lea -16(%rdi),%rdi # rewind $out by 1 block
-+ mov $rnds_,$rounds # restore $rounds
-+ mov %rdi,%rsi # $inp and $out are the same
-+ mov $key_,$key # restore $key
-+ xor $len,$len # len=16
-+ jmp .Lcbc_enc_loop # one more spin
-+\f#--------------------------- CBC DECRYPT ------------------------------#
-+.align 16
-+.Lcbc_decrypt:
-+___
-+$code.=<<___ if ($win64);
-+ lea -0x58(%rsp),%rsp
-+ movaps %xmm6,(%rsp)
-+ movaps %xmm7,0x10(%rsp)
-+ movaps %xmm8,0x20(%rsp)
-+ movaps %xmm9,0x30(%rsp)
-+.Lcbc_decrypt_body:
-+___
-+$code.=<<___;
-+ movups ($ivp),$iv
-+ sub \$0x40,$len
-+ mov $rnds_,$rounds
-+ jbe .Lcbc_dec_tail
-+ jmp .Lcbc_dec_loop3
-+.align 16
-+.Lcbc_dec_loop3:
-+ movups ($inp),$inout0
-+ movups 0x10($inp),$inout1
-+ movups 0x20($inp),$inout2
-+ movaps $inout0,$in0
-+ movaps $inout1,$in1
-+ movaps $inout2,$in2
-+ call _aesni_decrypt3
-+ sub \$0x30,$len
-+ lea 0x30($inp),$inp
-+ lea 0x30($out),$out
-+ pxor $iv,$inout0
-+ pxor $in0,$inout1
-+ movaps $in2,$iv
-+ pxor $in1,$inout2
-+ movups $inout0,-0x30($out)
-+ mov $rnds_,$rounds # restore $rounds
-+ movups $inout1,-0x20($out)
-+ mov $key_,$key # restore $key
-+ movups $inout2,-0x10($out)
-+ ja .Lcbc_dec_loop3
-+
-+.Lcbc_dec_tail:
-+ add \$0x40,$len
-+ movups $iv,($ivp)
-+ jz .Lcbc_dec_ret
-+
-+ movups ($inp),$inout0
-+ cmp \$0x10,$len
-+ movaps $inout0,$in0
-+ jbe .Lcbc_dec_one
-+ movups 0x10($inp),$inout1
-+ cmp \$0x20,$len
-+ movaps $inout1,$in1
-+ jbe .Lcbc_dec_two
-+ movups 0x20($inp),$inout2
-+ cmp \$0x30,$len
-+ movaps $inout2,$in2
-+ jbe .Lcbc_dec_three
-+ movups 0x30($inp),$inout3
-+ call _aesni_decrypt4
-+ pxor $iv,$inout0
-+ movups 0x30($inp),$iv
-+ pxor $in0,$inout1
-+ movups $inout0,($out)
-+ pxor $in1,$inout2
-+ movups $inout1,0x10($out)
-+ pxor $in2,$inout3
-+ movups $inout2,0x20($out)
-+ movaps $inout3,$inout0
-+ lea 0x30($out),$out
-+ jmp .Lcbc_dec_tail_collected
-+.align 16
-+.Lcbc_dec_one:
-+___
-+ &aesni_generate1("dec",$key,$rounds);
-+$code.=<<___;
-+ pxor $iv,$inout0
-+ movaps $in0,$iv
-+ jmp .Lcbc_dec_tail_collected
-+.align 16
-+.Lcbc_dec_two:
-+ call _aesni_decrypt3
-+ pxor $iv,$inout0
-+ pxor $in0,$inout1
-+ movups $inout0,($out)
-+ movaps $in1,$iv
-+ movaps $inout1,$inout0
-+ lea 0x10($out),$out
-+ jmp .Lcbc_dec_tail_collected
-+.align 16
-+.Lcbc_dec_three:
-+ call _aesni_decrypt3
-+ pxor $iv,$inout0
-+ pxor $in0,$inout1
-+ movups $inout0,($out)
-+ pxor $in1,$inout2
-+ movups $inout1,0x10($out)
-+ movaps $in2,$iv
-+ movaps $inout2,$inout0
-+ lea 0x20($out),$out
-+ jmp .Lcbc_dec_tail_collected
-+.align 16
-+.Lcbc_dec_tail_collected:
-+ and \$15,$len
-+ movups $iv,($ivp)
-+ jnz .Lcbc_dec_tail_partial
-+ movups $inout0,($out)
-+ jmp .Lcbc_dec_ret
-+.Lcbc_dec_tail_partial:
-+ movaps $inout0,$reserved(%rsp)
-+ mov $out,%rdi
-+ mov $len,%rcx
-+ lea $reserved(%rsp),%rsi
-+ .long 0x9066A4F3 # rep movsb
-+
-+.Lcbc_dec_ret:
-+___
-+$code.=<<___ if ($win64);
-+ movaps (%rsp),%xmm6
-+ movaps 0x10(%rsp),%xmm7
-+ movaps 0x20(%rsp),%xmm8
-+ movaps 0x30(%rsp),%xmm9
-+ lea 0x58(%rsp),%rsp
-+___
-+$code.=<<___;
-+.Lcbc_ret:
-+ ret
-+.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
-+___
-+\f
-+# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey,
-+# int bits, AES_KEY *key)
-+{ my ($inp,$bits,$key) = @_4args;
-+ $bits =~ s/%r/%e/;
-+
-+$code.=<<___;
-+.globl ${PREFIX}_set_decrypt_key
-+.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent
-+.align 16
-+${PREFIX}_set_decrypt_key:
-+ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8
-+ call _aesni_set_encrypt_key
-+ shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key
-+ test %eax,%eax
-+ jnz .Ldec_key_ret
-+ lea 16($key,$bits),$inp # points at the end of key schedule
-+
-+ $movkey ($key),%xmm0 # just swap
-+ $movkey ($inp),%xmm1
-+ $movkey %xmm0,($inp)
-+ $movkey %xmm1,($key)
-+ lea 16($key),$key
-+ lea -16($inp),$inp
-+
-+.Ldec_key_inverse:
-+ $movkey ($key),%xmm0 # swap and inverse
-+ $movkey ($inp),%xmm1
-+ aesimc %xmm0,%xmm0
-+ aesimc %xmm1,%xmm1
-+ lea 16($key),$key
-+ lea -16($inp),$inp
-+ cmp $key,$inp
-+ $movkey %xmm0,16($inp)
-+ $movkey %xmm1,-16($key)
-+ ja .Ldec_key_inverse
-+
-+ $movkey ($key),%xmm0 # inverse middle
-+ aesimc %xmm0,%xmm0
-+ $movkey %xmm0,($inp)
-+.Ldec_key_ret:
-+ add \$8,%rsp
-+ ret
-+.LSEH_end_set_decrypt_key:
-+.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
-+___
-+\f
-+# This is based on submission by
-+#
-+# Huang Ying <ying.huang@intel.com>
-+# Vinodh Gopal <vinodh.gopal@intel.com>
-+# Kahraman Akdemir
-+#
-+# Agressively optimized in respect to aeskeygenassist's critical path
-+# and is contained in %xmm0-5 to meet Win64 ABI requirement.
-+#
-+$code.=<<___;
-+.globl ${PREFIX}_set_encrypt_key
-+.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent
-+.align 16
-+${PREFIX}_set_encrypt_key:
-+_aesni_set_encrypt_key:
-+ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8
-+ test $inp,$inp
-+ mov \$-1,%rax
-+ jz .Lenc_key_ret
-+ test $key,$key
-+ jz .Lenc_key_ret
-+
-+ movups ($inp),%xmm0 # pull first 128 bits of *userKey
-+ pxor %xmm4,%xmm4 # low dword of xmm4 is assumed 0
-+ lea 16($key),%rax
-+ cmp \$256,$bits
-+ je .L14rounds
-+ cmp \$192,$bits
-+ je .L12rounds
-+ cmp \$128,$bits
-+ jne .Lbad_keybits
-+
-+.L10rounds:
-+ mov \$9,$bits # 10 rounds for 128-bit key
-+ $movkey %xmm0,($key) # round 0
-+ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 1
-+ call .Lkey_expansion_128_cold
-+ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 2
-+ call .Lkey_expansion_128
-+ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 3
-+ call .Lkey_expansion_128
-+ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 4
-+ call .Lkey_expansion_128
-+ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 5
-+ call .Lkey_expansion_128
-+ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 6
-+ call .Lkey_expansion_128
-+ aeskeygenassist \$0x40,%xmm0,%xmm1 # round 7
-+ call .Lkey_expansion_128
-+ aeskeygenassist \$0x80,%xmm0,%xmm1 # round 8
-+ call .Lkey_expansion_128
-+ aeskeygenassist \$0x1b,%xmm0,%xmm1 # round 9
-+ call .Lkey_expansion_128
-+ aeskeygenassist \$0x36,%xmm0,%xmm1 # round 10
-+ call .Lkey_expansion_128
-+ $movkey %xmm0,(%rax)
-+ mov $bits,80(%rax) # 240(%rdx)
-+ xor %eax,%eax
-+ jmp .Lenc_key_ret
-+
-+.align 16
-+.L12rounds:
-+ movq 16($inp),%xmm2 # remaining 1/3 of *userKey
-+ mov \$11,$bits # 12 rounds for 192
-+ $movkey %xmm0,($key) # round 0
-+ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 1,2
-+ call .Lkey_expansion_192a_cold
-+ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 2,3
-+ call .Lkey_expansion_192b
-+ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 4,5
-+ call .Lkey_expansion_192a
-+ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 5,6
-+ call .Lkey_expansion_192b
-+ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 7,8
-+ call .Lkey_expansion_192a
-+ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 8,9
-+ call .Lkey_expansion_192b
-+ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 10,11
-+ call .Lkey_expansion_192a
-+ aeskeygenassist \$0x80,%xmm2,%xmm1 # round 11,12
-+ call .Lkey_expansion_192b
-+ $movkey %xmm0,(%rax)
-+ mov $bits,48(%rax) # 240(%rdx)
-+ xor %rax, %rax
-+ jmp .Lenc_key_ret
-+
-+.align 16
-+.L14rounds:
-+ movups 16($inp),%xmm2 # remaning half of *userKey
-+ mov \$13,$bits # 14 rounds for 256
-+ lea 16(%rax),%rax
-+ $movkey %xmm0,($key) # round 0
-+ $movkey %xmm2,16($key) # round 1
-+ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 2
-+ call .Lkey_expansion_256a_cold
-+ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 3
-+ call .Lkey_expansion_256b
-+ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 4
-+ call .Lkey_expansion_256a
-+ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 5
-+ call .Lkey_expansion_256b
-+ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 6
-+ call .Lkey_expansion_256a
-+ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 7
-+ call .Lkey_expansion_256b
-+ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 8
-+ call .Lkey_expansion_256a
-+ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 9
-+ call .Lkey_expansion_256b
-+ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 10
-+ call .Lkey_expansion_256a
-+ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 11
-+ call .Lkey_expansion_256b
-+ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 12
-+ call .Lkey_expansion_256a
-+ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 13
-+ call .Lkey_expansion_256b
-+ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 14
-+ call .Lkey_expansion_256a
-+ $movkey %xmm0,(%rax)
-+ mov $bits,16(%rax) # 240(%rdx)
-+ xor %rax,%rax
-+ jmp .Lenc_key_ret
-+
-+.align 16
-+.Lbad_keybits:
-+ mov \$-2,%rax
-+.Lenc_key_ret:
-+ add \$8,%rsp
-+ ret
-+.LSEH_end_set_encrypt_key:
-+\f
-+.align 16
-+.Lkey_expansion_128:
-+ $movkey %xmm0,(%rax)
-+ lea 16(%rax),%rax
-+.Lkey_expansion_128_cold:
-+ shufps \$0b00010000,%xmm0,%xmm4
-+ pxor %xmm4, %xmm0
-+ shufps \$0b10001100,%xmm0,%xmm4
-+ pxor %xmm4, %xmm0
-+ pshufd \$0b11111111,%xmm1,%xmm1 # critical path
-+ pxor %xmm1,%xmm0
-+ ret
-+
-+.align 16
-+.Lkey_expansion_192a:
-+ $movkey %xmm0,(%rax)
-+ lea 16(%rax),%rax
-+.Lkey_expansion_192a_cold:
-+ movaps %xmm2, %xmm5
-+.Lkey_expansion_192b_warm:
-+ shufps \$0b00010000,%xmm0,%xmm4
-+ movaps %xmm2,%xmm3
-+ pxor %xmm4,%xmm0
-+ shufps \$0b10001100,%xmm0,%xmm4
-+ pslldq \$4,%xmm3
-+ pxor %xmm4,%xmm0
-+ pshufd \$0b01010101,%xmm1,%xmm1 # critical path
-+ pxor %xmm3,%xmm2
-+ pxor %xmm1,%xmm0
-+ pshufd \$0b11111111,%xmm0,%xmm3
-+ pxor %xmm3,%xmm2
-+ ret
-+
-+.align 16
-+.Lkey_expansion_192b:
-+ movaps %xmm0,%xmm3
-+ shufps \$0b01000100,%xmm0,%xmm5
-+ $movkey %xmm5,(%rax)
-+ shufps \$0b01001110,%xmm2,%xmm3
-+ $movkey %xmm3,16(%rax)
-+ lea 32(%rax),%rax
-+ jmp .Lkey_expansion_192b_warm
-+
-+.align 16
-+.Lkey_expansion_256a:
-+ $movkey %xmm2,(%rax)
-+ lea 16(%rax),%rax
-+.Lkey_expansion_256a_cold:
-+ shufps \$0b00010000,%xmm0,%xmm4
-+ pxor %xmm4,%xmm0
-+ shufps \$0b10001100,%xmm0,%xmm4
-+ pxor %xmm4,%xmm0
-+ pshufd \$0b11111111,%xmm1,%xmm1 # critical path
-+ pxor %xmm1,%xmm0
-+ ret
-+
-+.align 16
-+.Lkey_expansion_256b:
-+ $movkey %xmm0,(%rax)
-+ lea 16(%rax),%rax
-+
-+ shufps \$0b00010000,%xmm2,%xmm4
-+ pxor %xmm4,%xmm2
-+ shufps \$0b10001100,%xmm2,%xmm4
-+ pxor %xmm4,%xmm2
-+ pshufd \$0b10101010,%xmm1,%xmm1 # critical path
-+ pxor %xmm1,%xmm2
-+ ret
-+.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key
-+___
-+}
-+\f
-+$code.=<<___;
-+.asciz "AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>"
-+.align 64
-+___
-+
-+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
-+# CONTEXT *context,DISPATCHER_CONTEXT *disp)
-+if ($win64) {
-+$rec="%rcx";
-+$frame="%rdx";
-+$context="%r8";
-+$disp="%r9";
-+
-+$code.=<<___;
-+.extern __imp_RtlVirtualUnwind
-+.type cbc_se_handler,\@abi-omnipotent
-+.align 16
-+cbc_se_handler:
-+ push %rsi
-+ push %rdi
-+ push %rbx
-+ push %rbp
-+ push %r12
-+ push %r13
-+ push %r14
-+ push %r15
-+ pushfq
-+ sub \$64,%rsp
-+
-+ mov 152($context),%rax # pull context->Rsp
-+ mov 248($context),%rbx # pull context->Rip
-+
-+ lea .Lcbc_decrypt(%rip),%r10
-+ cmp %r10,%rbx # context->Rip<"prologue" label
-+ jb .Lin_prologue
-+
-+ lea .Lcbc_decrypt_body(%rip),%r10
-+ cmp %r10,%rbx # context->Rip<cbc_decrypt_body
-+ jb .Lrestore_rax
-+
-+ lea .Lcbc_ret(%rip),%r10
-+ cmp %r10,%rbx # context->Rip>="epilogue" label
-+ jae .Lin_prologue
-+
-+ lea 0(%rax),%rsi # top of stack
-+ lea 512($context),%rdi # &context.Xmm6
-+ mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
-+ .long 0xa548f3fc # cld; rep movsq
-+ lea 0x58(%rax),%rax # adjust stack pointer
-+ jmp .Lin_prologue
-+
-+.Lrestore_rax:
-+ mov 120($context),%rax
-+.Lin_prologue:
-+ mov 8(%rax),%rdi
-+ mov 16(%rax),%rsi
-+ mov %rax,152($context) # restore context->Rsp
-+ mov %rsi,168($context) # restore context->Rsi
-+ mov %rdi,176($context) # restore context->Rdi
-+
-+ jmp .Lcommon_seh_exit
-+.size cbc_se_handler,.-cbc_se_handler
-+
-+.type ecb_se_handler,\@abi-omnipotent
-+.align 16
-+ecb_se_handler:
-+ push %rsi
-+ push %rdi
-+ push %rbx
-+ push %rbp
-+ push %r12
-+ push %r13
-+ push %r14
-+ push %r15
-+ pushfq
-+ sub \$64,%rsp
-+
-+ mov 152($context),%rax # pull context->Rsp
-+ mov 8(%rax),%rdi
-+ mov 16(%rax),%rsi
-+ mov %rsi,168($context) # restore context->Rsi
-+ mov %rdi,176($context) # restore context->Rdi
-+
-+.Lcommon_seh_exit:
-+
-+ mov 40($disp),%rdi # disp->ContextRecord
-+ mov $context,%rsi # context
-+ mov \$154,%ecx # sizeof(CONTEXT)
-+ .long 0xa548f3fc # cld; rep movsq
-+
-+ mov $disp,%rsi
-+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
-+ mov 8(%rsi),%rdx # arg2, disp->ImageBase
-+ mov 0(%rsi),%r8 # arg3, disp->ControlPc
-+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
-+ mov 40(%rsi),%r10 # disp->ContextRecord
-+ lea 56(%rsi),%r11 # &disp->HandlerData
-+ lea 24(%rsi),%r12 # &disp->EstablisherFrame
-+ mov %r10,32(%rsp) # arg5
-+ mov %r11,40(%rsp) # arg6
-+ mov %r12,48(%rsp) # arg7
-+ mov %rcx,56(%rsp) # arg8, (NULL)
-+ call *__imp_RtlVirtualUnwind(%rip)
-+
-+ mov \$1,%eax # ExceptionContinueSearch
-+ add \$64,%rsp
-+ popfq
-+ pop %r15
-+ pop %r14
-+ pop %r13
-+ pop %r12
-+ pop %rbp
-+ pop %rbx
-+ pop %rdi
-+ pop %rsi
-+ ret
-+.size cbc_se_handler,.-cbc_se_handler
-+
-+.section .pdata
-+.align 4
-+ .rva .LSEH_begin_${PREFIX}_ecb_encrypt
-+ .rva .LSEH_end_${PREFIX}_ecb_encrypt
-+ .rva .LSEH_info_ecb
-+
-+ .rva .LSEH_begin_${PREFIX}_cbc_encrypt
-+ .rva .LSEH_end_${PREFIX}_cbc_encrypt
-+ .rva .LSEH_info_cbc
-+
-+ .rva ${PREFIX}_set_decrypt_key
-+ .rva .LSEH_end_set_decrypt_key
-+ .rva .LSEH_info_key
-+
-+ .rva ${PREFIX}_set_encrypt_key
-+ .rva .LSEH_end_set_encrypt_key
-+ .rva .LSEH_info_key
-+.section .xdata
-+.align 8
-+.LSEH_info_ecb:
-+ .byte 9,0,0,0
-+ .rva ecb_se_handler
-+.LSEH_info_cbc:
-+ .byte 9,0,0,0
-+ .rva cbc_se_handler
-+.LSEH_info_key:
-+ .byte 0x01,0x04,0x01,0x00
-+ .byte 0x04,0x02,0x00,0x00
-+___
-+}
-+
-+sub rex {
-+ local *opcode=shift;
-+ my ($dst,$src)=@_;
-+
-+ if ($dst>=8 || $src>=8) {
-+ $rex=0x40;
-+ $rex|=0x04 if($dst>=8);
-+ $rex|=0x01 if($src>=8);
-+ push @opcode,$rex;
-+ }
-+}
-+
-+sub aesni {
-+ my $line=shift;
-+ my @opcode=(0x66);
-+
-+ if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
-+ rex(\@opcode,$4,$3);
-+ push @opcode,0x0f,0x3a,0xdf;
-+ push @opcode,0xc0|($3&7)|(($4&7)<<3); # ModR/M
-+ my $c=$2;
-+ push @opcode,$c=~/^0/?oct($c):$c;
-+ return ".byte\t".join(',',@opcode);
-+ }
-+ elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) {
-+ my %opcodelet = (
-+ "aesimc" => 0xdb,
-+ "aesenc" => 0xdc, "aesenclast" => 0xdd,
-+ "aesdec" => 0xde, "aesdeclast" => 0xdf
-+ );
-+ return undef if (!defined($opcodelet{$1}));
-+ rex(\@opcode,$3,$2);
-+ push @opcode,0x0f,0x38,$opcodelet{$1};
-+ push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
-+ return ".byte\t".join(',',@opcode);
-+ }
-+ return $line;
-+}
-+
-+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
-+$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem;
-+
-+print $code;
-+
-+close STDOUT;
-diff -up openssl-1.0.0-beta4/crypto/aes/Makefile.aesni openssl-1.0.0-beta4/crypto/aes/Makefile
---- openssl-1.0.0-beta4/crypto/aes/Makefile.aesni 2008-12-23 12:33:00.000000000 +0100
-+++ openssl-1.0.0-beta4/crypto/aes/Makefile 2010-01-12 22:18:06.000000000 +0100
-@@ -50,9 +50,13 @@ aes-ia64.s: asm/aes-ia64.S
-
- aes-586.s: asm/aes-586.pl ../perlasm/x86asm.pl
- $(PERL) asm/aes-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
-+aesni-x86.s: asm/aesni-x86.pl ../perlasm/x86asm.pl
-+ $(PERL) asm/aesni-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
-
- aes-x86_64.s: asm/aes-x86_64.pl
- $(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@
-+aesni-x86_64.s: asm/aesni-x86_64.pl
-+ $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@
-
- aes-sparcv9.s: asm/aes-sparcv9.pl
- $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
-diff -up openssl-1.0.0-beta4/crypto/engine/eng_aesni.c.aesni openssl-1.0.0-beta4/crypto/engine/eng_aesni.c
---- openssl-1.0.0-beta4/crypto/engine/eng_aesni.c.aesni 2010-01-12 22:18:06.000000000 +0100
-+++ openssl-1.0.0-beta4/crypto/engine/eng_aesni.c 2010-01-12 22:18:06.000000000 +0100
-@@ -0,0 +1,413 @@
-+/*
-+ * Support for Intel AES-NI intruction set
-+ * Author: Huang Ying <ying.huang@intel.com>
-+ *
-+ * Intel AES-NI is a new set of Single Instruction Multiple Data
-+ * (SIMD) instructions that are going to be introduced in the next
-+ * generation of Intel processor, as of 2009. These instructions
-+ * enable fast and secure data encryption and decryption, using the
-+ * Advanced Encryption Standard (AES), defined by FIPS Publication
-+ * number 197. The architecture introduces six instructions that
-+ * offer full hardware support for AES. Four of them support high
-+ * performance data encryption and decryption, and the other two
-+ * instructions support the AES key expansion procedure.
-+ *
-+ * The white paper can be downloaded from:
-+ * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
-+ *
-+ * This file is based on engines/e_padlock.c
-+ */
-+
-+/* ====================================================================
-+ * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ *
-+ * 1. Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ *
-+ * 2. Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in
-+ * the documentation and/or other materials provided with the
-+ * distribution.
-+ *
-+ * 3. All advertising materials mentioning features or use of this
-+ * software must display the following acknowledgment:
-+ * "This product includes software developed by the OpenSSL Project
-+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
-+ *
-+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
-+ * endorse or promote products derived from this software without
-+ * prior written permission. For written permission, please contact
-+ * licensing@OpenSSL.org.
-+ *
-+ * 5. Products derived from this software may not be called "OpenSSL"
-+ * nor may "OpenSSL" appear in their names without prior written
-+ * permission of the OpenSSL Project.
-+ *
-+ * 6. Redistributions of any form whatsoever must retain the following
-+ * acknowledgment:
-+ * "This product includes software developed by the OpenSSL Project
-+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
-+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
-+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-+ * OF THE POSSIBILITY OF SUCH DAMAGE.
-+ * ====================================================================
-+ *
-+ * This product includes cryptographic software written by Eric Young
-+ * (eay@cryptsoft.com). This product includes software written by Tim
-+ * Hudson (tjh@cryptsoft.com).
-+ *
-+ */
-+
-+
-+#include <openssl/opensslconf.h>
-+
-+#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AES_NI) && !defined(OPENSSL_NO_AES)
-+
-+#include <stdio.h>
-+#include "cryptlib.h"
-+#include <openssl/dso.h>
-+#include <openssl/engine.h>
-+#include <openssl/evp.h>
-+#include <openssl/aes.h>
-+#include <openssl/err.h>
-+#include <openssl/modes.h>
-+
-+/* AES-NI is available *ONLY* on some x86 CPUs. Not only that it
-+ doesn't exist elsewhere, but it even can't be compiled on other
-+ platforms! */
-+#undef COMPILE_HW_AESNI
-+#if (defined(__x86_64) || defined(__x86_64__) || \
-+ defined(_M_AMD64) || defined(_M_X64) || \
-+ defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM)
-+#define COMPILE_HW_AESNI
-+static ENGINE *ENGINE_aesni (void);
-+#endif
-+
-+void ENGINE_load_aesni (void)
-+{
-+/* On non-x86 CPUs it just returns. */
-+#ifdef COMPILE_HW_AESNI
-+ ENGINE *toadd = ENGINE_aesni();
-+ if (!toadd)
-+ return;
-+ ENGINE_add (toadd);
-+ ENGINE_register_complete (toadd);
-+ ENGINE_free (toadd);
-+ ERR_clear_error ();
-+#endif
-+}
-+
-+#ifdef COMPILE_HW_AESNI
-+int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
-+ AES_KEY *key);
-+int aesni_set_decrypt_key(const unsigned char *userKey, int bits,
-+ AES_KEY *key);
-+
-+void aesni_encrypt(const unsigned char *in, unsigned char *out,
-+ const AES_KEY *key);
-+void aesni_decrypt(const unsigned char *in, unsigned char *out,
-+ const AES_KEY *key);
-+
-+void aesni_ecb_encrypt(const unsigned char *in,
-+ unsigned char *out,
-+ size_t length,
-+ const AES_KEY *key,
-+ int enc);
-+void aesni_cbc_encrypt(const unsigned char *in,
-+ unsigned char *out,
-+ size_t length,
-+ const AES_KEY *key,
-+ unsigned char *ivec, int enc);
-+
-+/* Function for ENGINE detection and control */
-+static int aesni_init(ENGINE *e);
-+
-+/* Cipher Stuff */
-+static int aesni_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
-+ const int **nids, int nid);
-+
-+#define AESNI_MIN_ALIGN 16
-+#define AESNI_ALIGN(x) \
-+ ((void *)(((unsigned long)(x)+AESNI_MIN_ALIGN-1)&~(AESNI_MIN_ALIGN-1)))
-+
-+/* Engine names */
-+static const char aesni_id[] = "aesni",
-+ aesni_name[] = "Intel AES-NI engine",
-+ no_aesni_name[] = "Intel AES-NI engine (no-aesni)";
-+
-+/* ===== Engine "management" functions ===== */
-+
-+#if defined(_WIN32)
-+typedef unsigned __int64 IA32CAP;
-+#else
-+typedef unsigned long long IA32CAP;
-+#endif
-+
-+/* Prepare the ENGINE structure for registration */
-+static int
-+aesni_bind_helper(ENGINE *e)
-+{
-+ int engage;
-+ if (sizeof(OPENSSL_ia32cap_P) > 4) {
-+ engage = (OPENSSL_ia32cap_P >> 57) & 1;
-+ } else {
-+ IA32CAP OPENSSL_ia32_cpuid(void);
-+ engage = (OPENSSL_ia32_cpuid() >> 57) & 1;
-+ }
-+
-+ /* Register everything or return with an error */
-+ if (!ENGINE_set_id(e, aesni_id) ||
-+ !ENGINE_set_name(e, engage ? aesni_name : no_aesni_name) ||
-+
-+ !ENGINE_set_init_function(e, aesni_init) ||
-+ (engage && !ENGINE_set_ciphers (e, aesni_ciphers))
-+ )
-+ return 0;
-+
-+ /* Everything looks good */
-+ return 1;
-+}
-+
-+/* Constructor */
-+static ENGINE *
-+ENGINE_aesni(void)
-+{
-+ ENGINE *eng = ENGINE_new();
-+
-+ if (!eng) {
-+ return NULL;
-+ }
-+
-+ if (!aesni_bind_helper(eng)) {
-+ ENGINE_free(eng);
-+ return NULL;
-+ }
-+
-+ return eng;
-+}
-+
-+/* Check availability of the engine */
-+static int
-+aesni_init(ENGINE *e)
-+{
-+ return 1;
-+}
-+
-+#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
-+#define NID_aes_128_cfb NID_aes_128_cfb128
-+#endif
-+
-+#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
-+#define NID_aes_128_ofb NID_aes_128_ofb128
-+#endif
-+
-+#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
-+#define NID_aes_192_cfb NID_aes_192_cfb128
-+#endif
-+
-+#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
-+#define NID_aes_192_ofb NID_aes_192_ofb128
-+#endif
-+
-+#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
-+#define NID_aes_256_cfb NID_aes_256_cfb128
-+#endif
-+
-+#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
-+#define NID_aes_256_ofb NID_aes_256_ofb128
-+#endif
-+
-+/* List of supported ciphers. */
-+static int aesni_cipher_nids[] = {
-+ NID_aes_128_ecb,
-+ NID_aes_128_cbc,
-+ NID_aes_128_cfb,
-+ NID_aes_128_ofb,
-+
-+ NID_aes_192_ecb,
-+ NID_aes_192_cbc,
-+ NID_aes_192_cfb,
-+ NID_aes_192_ofb,
-+
-+ NID_aes_256_ecb,
-+ NID_aes_256_cbc,
-+ NID_aes_256_cfb,
-+ NID_aes_256_ofb,
-+};
-+static int aesni_cipher_nids_num =
-+ (sizeof(aesni_cipher_nids)/sizeof(aesni_cipher_nids[0]));
-+
-+typedef struct
-+{
-+ AES_KEY ks;
-+ unsigned int _pad1[3];
-+} AESNI_KEY;
-+
-+static int
-+aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *user_key,
-+ const unsigned char *iv, int enc)
-+{
-+ int ret;
-+ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
-+
-+ if ((ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_CFB_MODE
-+ || (ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_OFB_MODE
-+ || enc)
-+ ret=aesni_set_encrypt_key(user_key, ctx->key_len * 8, key);
-+ else
-+ ret=aesni_set_decrypt_key(user_key, ctx->key_len * 8, key);
-+
-+ if(ret < 0) {
-+ EVPerr(EVP_F_AESNI_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED);
-+ return 0;
-+ }
-+
-+ return 1;
-+}
-+
-+static int aesni_cipher_ecb(EVP_CIPHER_CTX *ctx, unsigned char *out,
-+ const unsigned char *in, size_t inl)
-+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
-+ aesni_ecb_encrypt(in, out, inl, key, ctx->encrypt);
-+ return 1;
-+}
-+static int aesni_cipher_cbc(EVP_CIPHER_CTX *ctx, unsigned char *out,
-+ const unsigned char *in, size_t inl)
-+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
-+ aesni_cbc_encrypt(in, out, inl, key,
-+ ctx->iv, ctx->encrypt);
-+ return 1;
-+}
-+static int aesni_cipher_cfb(EVP_CIPHER_CTX *ctx, unsigned char *out,
-+ const unsigned char *in, size_t inl)
-+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
-+ CRYPTO_cfb128_encrypt(in, out, inl, key, ctx->iv,
-+ &ctx->num, ctx->encrypt,
-+ (block128_f)aesni_encrypt);
-+ return 1;
-+}
-+static int aesni_cipher_ofb(EVP_CIPHER_CTX *ctx, unsigned char *out,
-+ const unsigned char *in, size_t inl)
-+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
-+ CRYPTO_ofb128_encrypt(in, out, inl, key, ctx->iv,
-+ &ctx->num, (block128_f)aesni_encrypt);
-+ return 1;
-+}
-+
-+#define AES_BLOCK_SIZE 16
-+
-+#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
-+#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
-+#define EVP_CIPHER_block_size_OFB 1
-+#define EVP_CIPHER_block_size_CFB 1
-+
-+/* Declaring so many ciphers by hand would be a pain.
-+ Instead introduce a bit of preprocessor magic :-) */
-+#define DECLARE_AES_EVP(ksize,lmode,umode) \
-+static const EVP_CIPHER aesni_##ksize##_##lmode = { \
-+ NID_aes_##ksize##_##lmode, \
-+ EVP_CIPHER_block_size_##umode, \
-+ ksize / 8, \
-+ AES_BLOCK_SIZE, \
-+ 0 | EVP_CIPH_##umode##_MODE, \
-+ aesni_init_key, \
-+ aesni_cipher_##lmode, \
-+ NULL, \
-+ sizeof(AESNI_KEY), \
-+ EVP_CIPHER_set_asn1_iv, \
-+ EVP_CIPHER_get_asn1_iv, \
-+ NULL, \
-+ NULL \
-+}
-+
-+DECLARE_AES_EVP(128,ecb,ECB);
-+DECLARE_AES_EVP(128,cbc,CBC);
-+DECLARE_AES_EVP(128,cfb,CFB);
-+DECLARE_AES_EVP(128,ofb,OFB);
-+
-+DECLARE_AES_EVP(192,ecb,ECB);
-+DECLARE_AES_EVP(192,cbc,CBC);
-+DECLARE_AES_EVP(192,cfb,CFB);
-+DECLARE_AES_EVP(192,ofb,OFB);
-+
-+DECLARE_AES_EVP(256,ecb,ECB);
-+DECLARE_AES_EVP(256,cbc,CBC);
-+DECLARE_AES_EVP(256,cfb,CFB);
-+DECLARE_AES_EVP(256,ofb,OFB);
-+
-+static int
-+aesni_ciphers (ENGINE *e, const EVP_CIPHER **cipher,
-+ const int **nids, int nid)
-+{
-+ /* No specific cipher => return a list of supported nids ... */
-+ if (!cipher) {
-+ *nids = aesni_cipher_nids;
-+ return aesni_cipher_nids_num;
-+ }
-+
-+ /* ... or the requested "cipher" otherwise */
-+ switch (nid) {
-+ case NID_aes_128_ecb:
-+ *cipher = &aesni_128_ecb;
-+ break;
-+ case NID_aes_128_cbc:
-+ *cipher = &aesni_128_cbc;
-+ break;
-+ case NID_aes_128_cfb:
-+ *cipher = &aesni_128_cfb;
-+ break;
-+ case NID_aes_128_ofb:
-+ *cipher = &aesni_128_ofb;
-+ break;
-+
-+ case NID_aes_192_ecb:
-+ *cipher = &aesni_192_ecb;
-+ break;
-+ case NID_aes_192_cbc:
-+ *cipher = &aesni_192_cbc;
-+ break;
-+ case NID_aes_192_cfb:
-+ *cipher = &aesni_192_cfb;
-+ break;
-+ case NID_aes_192_ofb:
-+ *cipher = &aesni_192_ofb;
-+ break;
-+
-+ case NID_aes_256_ecb:
-+ *cipher = &aesni_256_ecb;
-+ break;
-+ case NID_aes_256_cbc:
-+ *cipher = &aesni_256_cbc;
-+ break;
-+ case NID_aes_256_cfb:
-+ *cipher = &aesni_256_cfb;
-+ break;
-+ case NID_aes_256_ofb:
-+ *cipher = &aesni_256_ofb;
-+ break;
-+
-+ default:
-+ /* Sorry, we don't support this NID */
-+ *cipher = NULL;
-+ return 0;
-+ }
-+
-+ return 1;
-+}
-+
-+#endif /* COMPILE_HW_AESNI */
-+#endif /* !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI) && !defined(OPENSSL_NO_AES) */
-diff -up openssl-1.0.0-beta4/crypto/engine/eng_all.c.aesni openssl-1.0.0-beta4/crypto/engine/eng_all.c
---- openssl-1.0.0-beta4/crypto/engine/eng_all.c.aesni 2010-01-07 23:38:31.000000000 +0100
-+++ openssl-1.0.0-beta4/crypto/engine/eng_all.c 2010-01-12 22:18:06.000000000 +0100
-@@ -85,6 +85,9 @@ void ENGINE_load_builtin_engines(void)
- #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV))
- ENGINE_load_cryptodev();
- #endif
-+#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI)
-+ ENGINE_load_aesni();
-+#endif
- ENGINE_load_dynamic();
- #ifndef OPENSSL_NO_STATIC_ENGINE
- #ifndef OPENSSL_NO_HW
-diff -up openssl-1.0.0-beta4/crypto/engine/engine.h.aesni openssl-1.0.0-beta4/crypto/engine/engine.h
---- openssl-1.0.0-beta4/crypto/engine/engine.h.aesni 2010-01-07 23:38:30.000000000 +0100
-+++ openssl-1.0.0-beta4/crypto/engine/engine.h 2010-01-12 22:18:06.000000000 +0100
-@@ -342,6 +342,7 @@ void ENGINE_load_gost(void);
- #endif
- #endif
- void ENGINE_load_cryptodev(void);
-+void ENGINE_load_aesni(void);
- void ENGINE_load_builtin_engines(void);
-
- /* Get and set global flags (ENGINE_TABLE_FLAG_***) for the implementation
-diff -up openssl-1.0.0-beta4/crypto/engine/Makefile.aesni openssl-1.0.0-beta4/crypto/engine/Makefile
---- openssl-1.0.0-beta4/crypto/engine/Makefile.aesni 2008-06-04 13:01:29.000000000 +0200
-+++ openssl-1.0.0-beta4/crypto/engine/Makefile 2010-01-12 22:18:06.000000000 +0100
-@@ -21,12 +21,14 @@ LIBSRC= eng_err.c eng_lib.c eng_list.c e
- eng_table.c eng_pkey.c eng_fat.c eng_all.c \
- tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \
- tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \
-- eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c
-+ eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \
-+ eng_aesni.c
- LIBOBJ= eng_err.o eng_lib.o eng_list.o eng_init.o eng_ctrl.o \
- eng_table.o eng_pkey.o eng_fat.o eng_all.o \
- tb_rsa.o tb_dsa.o tb_ecdsa.o tb_dh.o tb_ecdh.o tb_rand.o tb_store.o \
- tb_cipher.o tb_digest.o tb_pkmeth.o tb_asnmth.o \
-- eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o
-+ eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o \
-+ eng_aesni.o
-
- SRC= $(LIBSRC)
-
-diff -up openssl-1.0.0-beta4/crypto/evp/evp_err.c.aesni openssl-1.0.0-beta4/crypto/evp/evp_err.c
---- openssl-1.0.0-beta4/crypto/evp/evp_err.c.aesni 2010-01-07 23:38:31.000000000 +0100
-+++ openssl-1.0.0-beta4/crypto/evp/evp_err.c 2010-01-12 22:18:06.000000000 +0100
-@@ -1,6 +1,6 @@
- /* crypto/evp/evp_err.c */
- /* ====================================================================
-- * Copyright (c) 1999-2008 The OpenSSL Project. All rights reserved.
-+ * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
-@@ -70,6 +70,7 @@
-
- static ERR_STRING_DATA EVP_str_functs[]=
- {
-+{ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"},
- {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"},
- {ERR_FUNC(EVP_F_CAMELLIA_INIT_KEY), "CAMELLIA_INIT_KEY"},
- {ERR_FUNC(EVP_F_D2I_PKEY), "D2I_PKEY"},
-@@ -85,7 +86,7 @@ static ERR_STRING_DATA EVP_str_functs[]=
- {ERR_FUNC(EVP_F_EVP_DIGESTINIT_EX), "EVP_DigestInit_ex"},
- {ERR_FUNC(EVP_F_EVP_ENCRYPTFINAL_EX), "EVP_EncryptFinal_ex"},
- {ERR_FUNC(EVP_F_EVP_MD_CTX_COPY_EX), "EVP_MD_CTX_copy_ex"},
--{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_SIZE"},
-+{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_size"},
- {ERR_FUNC(EVP_F_EVP_OPENINIT), "EVP_OpenInit"},
- {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD), "EVP_PBE_alg_add"},
- {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD_TYPE), "EVP_PBE_alg_add_type"},
-diff -up openssl-1.0.0-beta4/crypto/evp/evp.h.aesni openssl-1.0.0-beta4/crypto/evp/evp.h
---- openssl-1.0.0-beta4/crypto/evp/evp.h.aesni 2010-01-07 23:38:31.000000000 +0100
-+++ openssl-1.0.0-beta4/crypto/evp/evp.h 2010-01-12 22:18:06.000000000 +0100
-@@ -1162,6 +1162,7 @@ void ERR_load_EVP_strings(void);
- /* Error codes for the EVP functions. */
-
- /* Function codes. */
-+#define EVP_F_AESNI_INIT_KEY 163
- #define EVP_F_AES_INIT_KEY 133
- #define EVP_F_CAMELLIA_INIT_KEY 159
- #define EVP_F_D2I_PKEY 100
-diff -up openssl-1.0.0-beta4/test/test_aesni.aesni openssl-1.0.0-beta4/test/test_aesni
---- openssl-1.0.0-beta4/test/test_aesni.aesni 2010-01-12 22:18:06.000000000 +0100
-+++ openssl-1.0.0-beta4/test/test_aesni 2010-01-12 22:18:06.000000000 +0100
-@@ -0,0 +1,69 @@
-+#!/bin/sh
-+
-+PROG=$1
-+
-+if [ -x $PROG ]; then
-+ if expr "x`$PROG version`" : "xOpenSSL" > /dev/null; then
-+ :
-+ else
-+ echo "$PROG is not OpenSSL executable"
-+ exit 1
-+ fi
-+else
-+ echo "$PROG is not executable"
-+ exit 1;
-+fi
-+
-+if $PROG engine aesni | grep -v no-aesni; then
-+
-+ HASH=`cat $PROG | $PROG dgst -hex`
-+
-+ AES_ALGS=" aes-128-ecb aes-192-ecb aes-256-ecb \
-+ aes-128-cbc aes-192-cbc aes-256-cbc \
-+ aes-128-cfb aes-192-cfb aes-256-cfb \
-+ aes-128-ofb aes-192-ofb aes-256-ofb"
-+ BUFSIZE="16 32 48 64 80 96 128 144 999"
-+
-+ nerr=0
-+
-+ for alg in $AES_ALGS; do
-+ echo $alg
-+ for bufsize in $BUFSIZE; do
-+ TEST=`( cat $PROG | \
-+ $PROG enc -e -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \
-+ $PROG enc -d -k "$HASH" -$alg | \
-+ $PROG dgst -hex ) 2>/dev/null`
-+ if [ "$TEST" != "$HASH" ]; then
-+ echo "-$alg/$bufsize encrypt test failed"
-+ nerr=`expr $nerr + 1`
-+ fi
-+ done
-+ for bufsize in $BUFSIZE; do
-+ TEST=`( cat $PROG | \
-+ $PROG enc -e -k "$HASH" -$alg | \
-+ $PROG enc -d -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \
-+ $PROG dgst -hex ) 2>/dev/null`
-+ if [ "$TEST" != "$HASH" ]; then
-+ echo "-$alg/$bufsize decrypt test failed"
-+ nerr=`expr $nerr + 1`
-+ fi
-+ done
-+ TEST=`( cat $PROG | \
-+ $PROG enc -e -k "$HASH" -$alg -engine aesni | \
-+ $PROG enc -d -k "$HASH" -$alg -engine aesni | \
-+ $PROG dgst -hex ) 2>/dev/null`
-+ if [ "$TEST" != "$HASH" ]; then
-+ echo "-$alg en/decrypt test failed"
-+ nerr=`expr $nerr + 1`
-+ fi
-+ done
-+
-+ if [ $nerr -gt 0 ]; then
-+ echo "AESNI engine test failed."
-+ exit 1;
-+ fi
-+else
-+ echo "AESNI engine is not available"
-+fi
-+
-+exit 0
diff --git a/openssl-1.0.0-beta5-ipv6-apps.patch b/openssl-1.0.0-beta5-ipv6-apps.patch
deleted file mode 100644
index 4304c01..0000000
--- a/openssl-1.0.0-beta5-ipv6-apps.patch
+++ /dev/null
@@ -1,499 +0,0 @@
-diff -up openssl-1.0.0-beta5/apps/s_apps.h.ipv6-apps openssl-1.0.0-beta5/apps/s_apps.h
---- openssl-1.0.0-beta5/apps/s_apps.h.ipv6-apps 2010-02-03 09:43:49.000000000 +0100
-+++ openssl-1.0.0-beta5/apps/s_apps.h 2010-02-03 09:43:49.000000000 +0100
-@@ -148,7 +148,7 @@ typedef fd_mask fd_set;
- #define PORT_STR "4433"
- #define PROTOCOL "tcp"
-
--int do_server(int port, int type, int *ret, int (*cb) (char *hostname, int s, unsigned char *context), unsigned char *context);
-+int do_server(char *port, int type, int *ret, int (*cb) (char *hostname, int s, unsigned char *context), unsigned char *context);
- #ifdef HEADER_X509_H
- int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx);
- #endif
-@@ -156,10 +156,9 @@ int MS_CALLBACK verify_callback(int ok,
- int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file);
- int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key);
- #endif
--int init_client(int *sock, char *server, int port, int type);
-+int init_client(int *sock, char *server, char *port, int type);
- int should_retry(int i);
--int extract_port(char *str, short *port_ptr);
--int extract_host_port(char *str,char **host_ptr,unsigned char *ip,short *p);
-+int extract_host_port(char *str,char **host_ptr,char **port_ptr);
-
- long MS_CALLBACK bio_dump_callback(BIO *bio, int cmd, const char *argp,
- int argi, long argl, long ret);
-diff -up openssl-1.0.0-beta5/apps/s_client.c.ipv6-apps openssl-1.0.0-beta5/apps/s_client.c
---- openssl-1.0.0-beta5/apps/s_client.c.ipv6-apps 2010-02-03 09:43:49.000000000 +0100
-+++ openssl-1.0.0-beta5/apps/s_client.c 2010-02-03 09:43:49.000000000 +0100
-@@ -389,7 +389,7 @@ int MAIN(int argc, char **argv)
- int cbuf_len,cbuf_off;
- int sbuf_len,sbuf_off;
- fd_set readfds,writefds;
-- short port=PORT;
-+ char *port_str = PORT_STR;
- int full_log=1;
- char *host=SSL_HOST_NAME;
- char *cert_file=NULL,*key_file=NULL;
-@@ -488,13 +488,12 @@ int MAIN(int argc, char **argv)
- else if (strcmp(*argv,"-port") == 0)
- {
- if (--argc < 1) goto bad;
-- port=atoi(*(++argv));
-- if (port == 0) goto bad;
-+ port_str= *(++argv);
- }
- else if (strcmp(*argv,"-connect") == 0)
- {
- if (--argc < 1) goto bad;
-- if (!extract_host_port(*(++argv),&host,NULL,&port))
-+ if (!extract_host_port(*(++argv),&host,&port_str))
- goto bad;
- }
- else if (strcmp(*argv,"-verify") == 0)
-@@ -967,7 +966,7 @@ bad:
-
- re_start:
-
-- if (init_client(&s,host,port,socket_type) == 0)
-+ if (init_client(&s,host,port_str,socket_type) == 0)
- {
- BIO_printf(bio_err,"connect:errno=%d\n",get_last_socket_error());
- SHUTDOWN(s);
-diff -up openssl-1.0.0-beta5/apps/s_server.c.ipv6-apps openssl-1.0.0-beta5/apps/s_server.c
---- openssl-1.0.0-beta5/apps/s_server.c.ipv6-apps 2010-02-03 09:43:49.000000000 +0100
-+++ openssl-1.0.0-beta5/apps/s_server.c 2010-02-03 09:43:49.000000000 +0100
-@@ -838,7 +838,7 @@ int MAIN(int argc, char *argv[])
- {
- X509_VERIFY_PARAM *vpm = NULL;
- int badarg = 0;
-- short port=PORT;
-+ char *port_str = PORT_STR;
- char *CApath=NULL,*CAfile=NULL;
- unsigned char *context = NULL;
- char *dhfile = NULL;
-@@ -909,8 +909,7 @@ int MAIN(int argc, char *argv[])
- (strcmp(*argv,"-accept") == 0))
- {
- if (--argc < 1) goto bad;
-- if (!extract_port(*(++argv),&port))
-- goto bad;
-+ port_str= *(++argv);
- }
- else if (strcmp(*argv,"-verify") == 0)
- {
-@@ -1700,9 +1699,9 @@ bad:
- BIO_printf(bio_s_out,"ACCEPT\n");
- (void)BIO_flush(bio_s_out);
- if (www)
-- do_server(port,socket_type,&accept_socket,www_body, context);
-+ do_server(port_str,socket_type,&accept_socket,www_body, context);
- else
-- do_server(port,socket_type,&accept_socket,sv_body, context);
-+ do_server(port_str,socket_type,&accept_socket,sv_body, context);
- print_stats(bio_s_out,ctx);
- ret=0;
- end:
-diff -up openssl-1.0.0-beta5/apps/s_socket.c.ipv6-apps openssl-1.0.0-beta5/apps/s_socket.c
---- openssl-1.0.0-beta5/apps/s_socket.c.ipv6-apps 2009-08-26 13:21:50.000000000 +0200
-+++ openssl-1.0.0-beta5/apps/s_socket.c 2010-02-03 10:00:30.000000000 +0100
-@@ -102,9 +102,7 @@ static struct hostent *GetHostByName(cha
- static void ssl_sock_cleanup(void);
- #endif
- static int ssl_sock_init(void);
--static int init_client_ip(int *sock,unsigned char ip[4], int port, int type);
--static int init_server(int *sock, int port, int type);
--static int init_server_long(int *sock, int port,char *ip, int type);
-+static int init_server(int *sock, char *port, int type);
- static int do_accept(int acc_sock, int *sock, char **host);
- static int host_ip(char *str, unsigned char ip[4]);
-
-@@ -234,58 +232,70 @@ static int ssl_sock_init(void)
- return(1);
- }
-
--int init_client(int *sock, char *host, int port, int type)
-+int init_client(int *sock, char *host, char *port, int type)
- {
-- unsigned char ip[4];
--
-- if (!host_ip(host,&(ip[0])))
-- {
-- return(0);
-- }
-- return(init_client_ip(sock,ip,port,type));
-- }
--
--static int init_client_ip(int *sock, unsigned char ip[4], int port, int type)
-- {
-- unsigned long addr;
-- struct sockaddr_in them;
-- int s,i;
-+ struct addrinfo *res, *res0, hints;
-+ char * failed_call = NULL;
-+ int s;
-+ int e;
-
- if (!ssl_sock_init()) return(0);
-
-- memset((char *)&them,0,sizeof(them));
-- them.sin_family=AF_INET;
-- them.sin_port=htons((unsigned short)port);
-- addr=(unsigned long)
-- ((unsigned long)ip[0]<<24L)|
-- ((unsigned long)ip[1]<<16L)|
-- ((unsigned long)ip[2]<< 8L)|
-- ((unsigned long)ip[3]);
-- them.sin_addr.s_addr=htonl(addr);
--
-- if (type == SOCK_STREAM)
-- s=socket(AF_INET,SOCK_STREAM,SOCKET_PROTOCOL);
-- else /* ( type == SOCK_DGRAM) */
-- s=socket(AF_INET,SOCK_DGRAM,IPPROTO_UDP);
--
-- if (s == INVALID_SOCKET) { perror("socket"); return(0); }
-+ memset(&hints, '\0', sizeof(hints));
-+ hints.ai_socktype = type;
-+ hints.ai_flags = AI_ADDRCONFIG;
-+
-+ e = getaddrinfo(host, port, &hints, &res);
-+ if (e)
-+ {
-+ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e));
-+ if (e == EAI_SYSTEM)
-+ perror("getaddrinfo");
-+ return (0);
-+ }
-
-+ res0 = res;
-+ while (res)
-+ {
-+ s = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
-+ if (s == INVALID_SOCKET)
-+ {
-+ failed_call = "socket";
-+ goto nextres;
-+ }
- #if defined(SO_KEEPALIVE) && !defined(OPENSSL_SYS_MPE)
- if (type == SOCK_STREAM)
- {
-- i=0;
-- i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i));
-- if (i < 0) { perror("keepalive"); return(0); }
-+ int i=0;
-+ i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,
-+ (char *)&i,sizeof(i));
-+ if (i < 0) {
-+ failed_call = "keepalive";
-+ goto nextres;
-+ }
- }
- #endif
--
-- if (connect(s,(struct sockaddr *)&them,sizeof(them)) == -1)
-- { closesocket(s); perror("connect"); return(0); }
-+ if (connect(s,(struct sockaddr *)res->ai_addr,
-+ res->ai_addrlen) == 0)
-+ {
-+ freeaddrinfo(res0);
- *sock=s;
- return(1);
- }
-
--int do_server(int port, int type, int *ret, int (*cb)(char *hostname, int s, unsigned char *context), unsigned char *context)
-+ failed_call = "socket";
-+nextres:
-+ if (s != INVALID_SOCKET)
-+ close(s);
-+ res = res->ai_next;
-+ }
-+ freeaddrinfo(res0);
-+
-+ perror(failed_call);
-+ return(0);
-+ }
-+
-+int do_server(char *port, int type, int *ret, int (*cb)(char *hostname, int s, unsigned char *context), unsigned char *context)
- {
- int sock;
- char *name = NULL;
-@@ -323,33 +333,38 @@ int do_server(int port, int type, int *r
- }
- }
-
--static int init_server_long(int *sock, int port, char *ip, int type)
-+static int init_server(int *sock, char *port, int type)
- {
-- int ret=0;
-- struct sockaddr_in server;
-- int s= -1,i;
-+ struct addrinfo *res, *res0, hints;
-+ char * failed_call = NULL;
-+ char port_name[8];
-+ int s;
-+ int e;
-
- if (!ssl_sock_init()) return(0);
-
-- memset((char *)&server,0,sizeof(server));
-- server.sin_family=AF_INET;
-- server.sin_port=htons((unsigned short)port);
-- if (ip == NULL)
-- server.sin_addr.s_addr=INADDR_ANY;
-- else
--/* Added for T3E, address-of fails on bit field (beckman@acl.lanl.gov) */
--#ifndef BIT_FIELD_LIMITS
-- memcpy(&server.sin_addr.s_addr,ip,4);
--#else
-- memcpy(&server.sin_addr,ip,4);
--#endif
-+ memset(&hints, '\0', sizeof(hints));
-+ hints.ai_socktype = type;
-+ hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
-
-- if (type == SOCK_STREAM)
-- s=socket(AF_INET,SOCK_STREAM,SOCKET_PROTOCOL);
-- else /* type == SOCK_DGRAM */
-- s=socket(AF_INET, SOCK_DGRAM,IPPROTO_UDP);
-+ e = getaddrinfo(NULL, port, &hints, &res);
-+ if (e)
-+ {
-+ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e));
-+ if (e == EAI_SYSTEM)
-+ perror("getaddrinfo");
-+ return (0);
-+ }
-
-- if (s == INVALID_SOCKET) goto err;
-+ res0 = res;
-+ while (res)
-+ {
-+ s = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
-+ if (s == INVALID_SOCKET)
-+ {
-+ failed_call = "socket";
-+ goto nextres;
-+ }
- #if defined SOL_SOCKET && defined SO_REUSEADDR
- {
- int j = 1;
-@@ -357,36 +372,39 @@ static int init_server_long(int *sock, i
- (void *) &j, sizeof j);
- }
- #endif
-- if (bind(s,(struct sockaddr *)&server,sizeof(server)) == -1)
-+
-+ if (bind(s,(struct sockaddr *)res->ai_addr, res->ai_addrlen) == -1)
- {
--#ifndef OPENSSL_SYS_WINDOWS
-- perror("bind");
--#endif
-- goto err;
-+ failed_call = "bind";
-+ goto nextres;
- }
-- /* Make it 128 for linux */
-- if (type==SOCK_STREAM && listen(s,128) == -1) goto err;
-- i=0;
-- *sock=s;
-- ret=1;
--err:
-- if ((ret == 0) && (s != -1))
-+ if (type==SOCK_STREAM && listen(s,128) == -1)
- {
-- SHUTDOWN(s);
-+ failed_call = "listen";
-+ goto nextres;
- }
-- return(ret);
-+
-+ *sock=s;
-+ return(1);
-+
-+nextres:
-+ if (s != INVALID_SOCKET)
-+ close(s);
-+ res = res->ai_next;
- }
-+ freeaddrinfo(res0);
-
--static int init_server(int *sock, int port, int type)
-- {
-- return(init_server_long(sock, port, NULL, type));
-+ if (s == INVALID_SOCKET) { perror("socket"); return(0); }
-+
-+ perror(failed_call);
-+ return(0);
- }
-
- static int do_accept(int acc_sock, int *sock, char **host)
- {
-- int ret,i;
-- struct hostent *h1,*h2;
-- static struct sockaddr_in from;
-+ static struct sockaddr_storage from;
-+ char buffer[NI_MAXHOST];
-+ int ret;
- int len;
- /* struct linger ling; */
-
-@@ -432,136 +450,58 @@ redoit:
- */
-
- if (host == NULL) goto end;
--#ifndef BIT_FIELD_LIMITS
-- /* I should use WSAAsyncGetHostByName() under windows */
-- h1=gethostbyaddr((char *)&from.sin_addr.s_addr,
-- sizeof(from.sin_addr.s_addr),AF_INET);
--#else
-- h1=gethostbyaddr((char *)&from.sin_addr,
-- sizeof(struct in_addr),AF_INET);
--#endif
-- if (h1 == NULL)
-+
-+ if (getnameinfo((struct sockaddr *)&from, sizeof(from),
-+ buffer, sizeof(buffer),
-+ NULL, 0, 0))
- {
-- BIO_printf(bio_err,"bad gethostbyaddr\n");
-+ BIO_printf(bio_err,"getnameinfo failed\n");
- *host=NULL;
- /* return(0); */
- }
- else
- {
-- if ((*host=(char *)OPENSSL_malloc(strlen(h1->h_name)+1)) == NULL)
-+ if ((*host=(char *)OPENSSL_malloc(strlen(buffer)+1)) == NULL)
- {
- perror("OPENSSL_malloc");
- return(0);
- }
-- BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1);
--
-- h2=GetHostByName(*host);
-- if (h2 == NULL)
-- {
-- BIO_printf(bio_err,"gethostbyname failure\n");
-- return(0);
-- }
-- i=0;
-- if (h2->h_addrtype != AF_INET)
-- {
-- BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n");
-- return(0);
-- }
-+ strcpy(*host, buffer);
- }
- end:
- *sock=ret;
- return(1);
- }
-
--int extract_host_port(char *str, char **host_ptr, unsigned char *ip,
-- short *port_ptr)
-+int extract_host_port(char *str, char **host_ptr,
-+ char **port_ptr)
- {
-- char *h,*p;
-+ char *h,*p,*x;
-
-- h=str;
-- p=strchr(str,':');
-+ x=h=str;
-+ if (*h == '[')
-+ {
-+ h++;
-+ p=strchr(h,']');
- if (p == NULL)
- {
-- BIO_printf(bio_err,"no port defined\n");
-+ BIO_printf(bio_err,"no ending bracket for IPv6 address\n");
- return(0);
- }
- *(p++)='\0';
--
-- if ((ip != NULL) && !host_ip(str,ip))
-- goto err;
-- if (host_ptr != NULL) *host_ptr=h;
--
-- if (!extract_port(p,port_ptr))
-- goto err;
-- return(1);
--err:
-- return(0);
-+ x = p;
- }
--
--static int host_ip(char *str, unsigned char ip[4])
-- {
-- unsigned int in[4];
-- int i;
--
-- if (sscanf(str,"%u.%u.%u.%u",&(in[0]),&(in[1]),&(in[2]),&(in[3])) == 4)
-- {
-- for (i=0; i<4; i++)
-- if (in[i] > 255)
-- {
-- BIO_printf(bio_err,"invalid IP address\n");
-- goto err;
-- }
-- ip[0]=in[0];
-- ip[1]=in[1];
-- ip[2]=in[2];
-- ip[3]=in[3];
-- }
-- else
-- { /* do a gethostbyname */
-- struct hostent *he;
--
-- if (!ssl_sock_init()) return(0);
--
-- he=GetHostByName(str);
-- if (he == NULL)
-- {
-- BIO_printf(bio_err,"gethostbyname failure\n");
-- goto err;
-- }
-- /* cast to short because of win16 winsock definition */
-- if ((short)he->h_addrtype != AF_INET)
-+ p=strchr(x,':');
-+ if (p == NULL)
- {
-- BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n");
-- return(0);
-- }
-- ip[0]=he->h_addr_list[0][0];
-- ip[1]=he->h_addr_list[0][1];
-- ip[2]=he->h_addr_list[0][2];
-- ip[3]=he->h_addr_list[0][3];
-- }
-- return(1);
--err:
-+ BIO_printf(bio_err,"no port defined\n");
- return(0);
- }
-+ *(p++)='\0';
-
--int extract_port(char *str, short *port_ptr)
-- {
-- int i;
-- struct servent *s;
-+ if (host_ptr != NULL) *host_ptr=h;
-+ if (port_ptr != NULL) *port_ptr=p;
-
-- i=atoi(str);
-- if (i != 0)
-- *port_ptr=(unsigned short)i;
-- else
-- {
-- s=getservbyname(str,"tcp");
-- if (s == NULL)
-- {
-- BIO_printf(bio_err,"getservbyname failure for %s\n",str);
-- return(0);
-- }
-- *port_ptr=ntohs((unsigned short)s->s_port);
-- }
- return(1);
- }
-
diff --git a/openssl-1.0.0a-version.patch b/openssl-1.0.0a-version.patch
deleted file mode 100644
index 75003af..0000000
--- a/openssl-1.0.0a-version.patch
+++ /dev/null
@@ -1,22 +0,0 @@
-diff -up openssl-1.0.0a/crypto/opensslv.h.version openssl-1.0.0a/crypto/opensslv.h
---- openssl-1.0.0a/crypto/opensslv.h.version 2010-08-13 12:40:00.000000000 +0200
-+++ openssl-1.0.0a/crypto/opensslv.h 2010-09-07 21:38:41.000000000 +0200
-@@ -25,7 +25,8 @@
- * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for
- * major minor fix final patch/beta)
- */
--#define OPENSSL_VERSION_NUMBER 0x1000001fL
-+/* we have to keep the version number to not break the abi */
-+#define OPENSSL_VERSION_NUMBER 0x10000003L
- #ifdef OPENSSL_FIPS
- #define OPENSSL_VERSION_TEXT "OpenSSL 1.0.0a-fips 1 Jun 2010"
- #else
-@@ -83,7 +84,7 @@
- * should only keep the versions that are binary compatible with the current.
- */
- #define SHLIB_VERSION_HISTORY ""
--#define SHLIB_VERSION_NUMBER "1.0.0"
-+#define SHLIB_VERSION_NUMBER "1.0.0a"
-
-
- #endif /* HEADER_OPENSSLV_H */
diff --git a/openssl-1.0.0b-aesni.patch b/openssl-1.0.0b-aesni.patch
new file mode 100644
index 0000000..1dda6bf
--- /dev/null
+++ b/openssl-1.0.0b-aesni.patch
@@ -0,0 +1,2388 @@
+diff -up openssl-1.0.0b/Configure.aesni openssl-1.0.0b/Configure
+--- openssl-1.0.0b/Configure.aesni 2010-11-16 17:33:22.000000000 +0100
++++ openssl-1.0.0b/Configure 2010-11-16 17:35:15.000000000 +0100
+@@ -123,11 +123,11 @@ my $tlib="-lnsl -lsocket";
+ my $bits1="THIRTY_TWO_BIT ";
+ my $bits2="SIXTY_FOUR_BIT ";
+
+-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o";
++my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o";
+
+ my $x86_elf_asm="$x86_asm:elf";
+
+-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
++my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
+ my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::void";
+ my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void";
+ my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void";
+@@ -491,7 +491,7 @@ my %table=(
+ #
+ # Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64
+ "VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32",
+-"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32",
++"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32",
+ "debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32",
+ "debug-VC-WIN64A","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32",
+ # x86 Win32 target defaults to ANSI API, if you want UNICODE, complement
+@@ -1419,6 +1419,7 @@ if ($rmd160_obj =~ /\.o$/)
+ if ($aes_obj =~ /\.o$/)
+ {
+ $cflags.=" -DAES_ASM";
++ $aes_obj =~ s/\s*aesni\-x86\.o// if ($no_sse2);
+ }
+ else {
+ $aes_obj=$aes_enc;
+diff -up openssl-1.0.0b/crypto/aes/asm/aesni-x86.pl.aesni openssl-1.0.0b/crypto/aes/asm/aesni-x86.pl
+--- openssl-1.0.0b/crypto/aes/asm/aesni-x86.pl.aesni 2010-11-16 17:33:23.000000000 +0100
++++ openssl-1.0.0b/crypto/aes/asm/aesni-x86.pl 2010-11-16 17:33:23.000000000 +0100
+@@ -0,0 +1,765 @@
++#!/usr/bin/env perl
++
++# ====================================================================
++# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# This module implements support for Intel AES-NI extension. In
++# OpenSSL context it's used with Intel engine, but can also be used as
++# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for
++# details].
++
++$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
++ # generates drop-in replacement for
++ # crypto/aes/asm/aes-586.pl:-)
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++push(@INC,"${dir}","${dir}../../perlasm");
++require "x86asm.pl";
++
++&asm_init($ARGV[0],$0);
++
++$movekey = eval($RREFIX eq "aseni" ? "*movaps" : "*movups");
++
++$len="eax";
++$rounds="ecx";
++$key="edx";
++$inp="esi";
++$out="edi";
++$rounds_="ebx"; # backup copy for $rounds
++$key_="ebp"; # backup copy for $key
++
++$inout0="xmm0";
++$inout1="xmm1";
++$inout2="xmm2";
++$rndkey0="xmm3";
++$rndkey1="xmm4";
++$ivec="xmm5";
++$in0="xmm6";
++$in1="xmm7"; $inout3="xmm7";
++
++# Inline version of internal aesni_[en|de]crypt1
++sub aesni_inline_generate1
++{ my $p=shift;
++
++ &$movekey ($rndkey0,&QWP(0,$key));
++ &$movekey ($rndkey1,&QWP(16,$key));
++ &lea ($key,&DWP(32,$key));
++ &pxor ($inout0,$rndkey0);
++ &set_label("${p}1_loop");
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &dec ($rounds);
++ &$movekey ($rndkey1,&QWP(0,$key));
++ &lea ($key,&DWP(16,$key));
++ &jnz (&label("${p}1_loop"));
++ eval"&aes${p}last ($inout0,$rndkey1)";
++}
++
++sub aesni_generate1 # fully unrolled loop
++{ my $p=shift;
++
++ &function_begin_B("_aesni_${p}rypt1");
++ &$movekey ($rndkey0,&QWP(0,$key));
++ &$movekey ($rndkey1,&QWP(0x10,$key));
++ &cmp ($rounds,11);
++ &pxor ($inout0,$rndkey0);
++ &$movekey ($rndkey0,&QWP(0x20,$key));
++ &lea ($key,&DWP(0x30,$key));
++ &jb (&label("${p}128"));
++ &lea ($key,&DWP(0x20,$key));
++ &je (&label("${p}192"));
++ &lea ($key,&DWP(0x20,$key));
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey1,&QWP(-0x40,$key));
++ eval"&aes${p} ($inout0,$rndkey0)";
++ &$movekey ($rndkey0,&QWP(-0x30,$key));
++ &set_label("${p}192");
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey1,&QWP(-0x20,$key));
++ eval"&aes${p} ($inout0,$rndkey0)";
++ &$movekey ($rndkey0,&QWP(-0x10,$key));
++ &set_label("${p}128");
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey1,&QWP(0,$key));
++ eval"&aes${p} ($inout0,$rndkey0)";
++ &$movekey ($rndkey0,&QWP(0x10,$key));
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey1,&QWP(0x20,$key));
++ eval"&aes${p} ($inout0,$rndkey0)";
++ &$movekey ($rndkey0,&QWP(0x30,$key));
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey1,&QWP(0x40,$key));
++ eval"&aes${p} ($inout0,$rndkey0)";
++ &$movekey ($rndkey0,&QWP(0x50,$key));
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey1,&QWP(0x60,$key));
++ eval"&aes${p} ($inout0,$rndkey0)";
++ &$movekey ($rndkey0,&QWP(0x70,$key));
++ eval"&aes${p} ($inout0,$rndkey1)";
++ eval"&aes${p}last ($inout0,$rndkey0)";
++ &ret();
++ &function_end_B("_aesni_${p}rypt1");
++}
++
++# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
++# &aesni_generate1("dec");
++&function_begin_B("${PREFIX}_encrypt");
++ &mov ("eax",&wparam(0));
++ &mov ($key,&wparam(2));
++ &movups ($inout0,&QWP(0,"eax"));
++ &mov ($rounds,&DWP(240,$key));
++ &mov ("eax",&wparam(1));
++ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1");
++ &movups (&QWP(0,"eax"),$inout0);
++ &ret ();
++&function_end_B("${PREFIX}_encrypt");
++
++# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key);
++# &aesni_generate1("dec");
++&function_begin_B("${PREFIX}_decrypt");
++ &mov ("eax",&wparam(0));
++ &mov ($key,&wparam(2));
++ &movups ($inout0,&QWP(0,"eax"));
++ &mov ($rounds,&DWP(240,$key));
++ &mov ("eax",&wparam(1));
++ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt1");
++ &movups (&QWP(0,"eax"),$inout0);
++ &ret ();
++&function_end_B("${PREFIX}_decrypt");
++\f
++# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave
++# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec]
++# latency is 6, it turned out that it can be scheduled only every
++# *second* cycle. Thus 3x interleave is the one providing optimal
++# utilization, i.e. when subroutine's throughput is virtually same as
++# of non-interleaved subroutine [for number of input blocks up to 3].
++# This is why it makes no sense to implement 2x subroutine. As soon
++# as/if Intel improves throughput by making it possible to schedule
++# the instructions in question *every* cycles I would have to
++# implement 6x interleave and use it in loop...
++sub aesni_generate3
++{ my $p=shift;
++
++ &function_begin_B("_aesni_${p}rypt3");
++ &$movekey ($rndkey0,&QWP(0,$key));
++ &shr ($rounds,1);
++ &$movekey ($rndkey1,&QWP(16,$key));
++ &lea ($key,&DWP(32,$key));
++ &pxor ($inout0,$rndkey0);
++ &pxor ($inout1,$rndkey0);
++ &pxor ($inout2,$rndkey0);
++ &jmp (&label("${p}3_loop"));
++ &set_label("${p}3_loop",16);
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey0,&QWP(0,$key));
++ eval"&aes${p} ($inout1,$rndkey1)";
++ &dec ($rounds);
++ eval"&aes${p} ($inout2,$rndkey1)";
++ &$movekey ($rndkey1,&QWP(16,$key));
++ eval"&aes${p} ($inout0,$rndkey0)";
++ &lea ($key,&DWP(32,$key));
++ eval"&aes${p} ($inout1,$rndkey0)";
++ eval"&aes${p} ($inout2,$rndkey0)";
++ &jnz (&label("${p}3_loop"));
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey0,&QWP(0,$key));
++ eval"&aes${p} ($inout1,$rndkey1)";
++ eval"&aes${p} ($inout2,$rndkey1)";
++ eval"&aes${p}last ($inout0,$rndkey0)";
++ eval"&aes${p}last ($inout1,$rndkey0)";
++ eval"&aes${p}last ($inout2,$rndkey0)";
++ &ret();
++ &function_end_B("_aesni_${p}rypt3");
++}
++
++# 4x interleave is implemented to improve small block performance,
++# most notably [and naturally] 4 block by ~30%. One can argue that one
++# should have implemented 5x as well, but improvement would be <20%,
++# so it's not worth it...
++sub aesni_generate4
++{ my $p=shift;
++
++ &function_begin_B("_aesni_${p}rypt4");
++ &$movekey ($rndkey0,&QWP(0,$key));
++ &$movekey ($rndkey1,&QWP(16,$key));
++ &shr ($rounds,1);
++ &lea ($key,&DWP(32,$key));
++ &pxor ($inout0,$rndkey0);
++ &pxor ($inout1,$rndkey0);
++ &pxor ($inout2,$rndkey0);
++ &pxor ($inout3,$rndkey0);
++ &jmp (&label("${p}3_loop"));
++ &set_label("${p}3_loop",16);
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey0,&QWP(0,$key));
++ eval"&aes${p} ($inout1,$rndkey1)";
++ &dec ($rounds);
++ eval"&aes${p} ($inout2,$rndkey1)";
++ eval"&aes${p} ($inout3,$rndkey1)";
++ &$movekey ($rndkey1,&QWP(16,$key));
++ eval"&aes${p} ($inout0,$rndkey0)";
++ &lea ($key,&DWP(32,$key));
++ eval"&aes${p} ($inout1,$rndkey0)";
++ eval"&aes${p} ($inout2,$rndkey0)";
++ eval"&aes${p} ($inout3,$rndkey0)";
++ &jnz (&label("${p}3_loop"));
++ eval"&aes${p} ($inout0,$rndkey1)";
++ &$movekey ($rndkey0,&QWP(0,$key));
++ eval"&aes${p} ($inout1,$rndkey1)";
++ eval"&aes${p} ($inout2,$rndkey1)";
++ eval"&aes${p} ($inout3,$rndkey1)";
++ eval"&aes${p}last ($inout0,$rndkey0)";
++ eval"&aes${p}last ($inout1,$rndkey0)";
++ eval"&aes${p}last ($inout2,$rndkey0)";
++ eval"&aes${p}last ($inout3,$rndkey0)";
++ &ret();
++ &function_end_B("_aesni_${p}rypt4");
++}
++&aesni_generate3("enc") if ($PREFIX eq "aesni");
++&aesni_generate3("dec");
++&aesni_generate4("enc") if ($PREFIX eq "aesni");
++&aesni_generate4("dec");
++
++if ($PREFIX eq "aesni") {
++# void aesni_ecb_encrypt (const void *in, void *out,
++# size_t length, const AES_KEY *key,
++# int enc);
++&function_begin("aesni_ecb_encrypt");
++ &mov ($inp,&wparam(0));
++ &mov ($out,&wparam(1));
++ &mov ($len,&wparam(2));
++ &mov ($key,&wparam(3));
++ &mov ($rounds,&wparam(4));
++ &cmp ($len,16);
++ &jb (&label("ecb_ret"));
++ &and ($len,-16);
++ &test ($rounds,$rounds)
++ &mov ($rounds,&DWP(240,$key));
++ &mov ($key_,$key); # backup $key
++ &mov ($rounds_,$rounds); # backup $rounds
++ &jz (&label("ecb_decrypt"));
++
++ &sub ($len,0x40);
++ &jbe (&label("ecb_enc_tail"));
++ &jmp (&label("ecb_enc_loop3"));
++
++&set_label("ecb_enc_loop3",16);
++ &movups ($inout0,&QWP(0,$inp));
++ &movups ($inout1,&QWP(0x10,$inp));
++ &movups ($inout2,&QWP(0x20,$inp));
++ &call ("_aesni_encrypt3");
++ &sub ($len,0x30);
++ &lea ($inp,&DWP(0x30,$inp));
++ &lea ($out,&DWP(0x30,$out));
++ &movups (&QWP(-0x30,$out),$inout0);
++ &mov ($key,$key_); # restore $key
++ &movups (&QWP(-0x20,$out),$inout1);
++ &mov ($rounds,$rounds_); # restore $rounds
++ &movups (&QWP(-0x10,$out),$inout2);
++ &ja (&label("ecb_enc_loop3"));
++
++&set_label("ecb_enc_tail");
++ &add ($len,0x40);
++ &jz (&label("ecb_ret"));
++
++ &cmp ($len,0x10);
++ &movups ($inout0,&QWP(0,$inp));
++ &je (&label("ecb_enc_one"));
++ &cmp ($len,0x20);
++ &movups ($inout1,&QWP(0x10,$inp));
++ &je (&label("ecb_enc_two"));
++ &cmp ($len,0x30);
++ &movups ($inout2,&QWP(0x20,$inp));
++ &je (&label("ecb_enc_three"));
++ &movups ($inout3,&QWP(0x30,$inp));
++ &call ("_aesni_encrypt4");
++ &movups (&QWP(0,$out),$inout0);
++ &movups (&QWP(0x10,$out),$inout1);
++ &movups (&QWP(0x20,$out),$inout2);
++ &movups (&QWP(0x30,$out),$inout3);
++ jmp (&label("ecb_ret"));
++
++&set_label("ecb_enc_one",16);
++ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1");
++ &movups (&QWP(0,$out),$inout0);
++ &jmp (&label("ecb_ret"));
++
++&set_label("ecb_enc_two",16);
++ &call ("_aesni_encrypt3");
++ &movups (&QWP(0,$out),$inout0);
++ &movups (&QWP(0x10,$out),$inout1);
++ &jmp (&label("ecb_ret"));
++
++&set_label("ecb_enc_three",16);
++ &call ("_aesni_encrypt3");
++ &movups (&QWP(0,$out),$inout0);
++ &movups (&QWP(0x10,$out),$inout1);
++ &movups (&QWP(0x20,$out),$inout2);
++ &jmp (&label("ecb_ret"));
++
++&set_label("ecb_decrypt",16);
++ &sub ($len,0x40);
++ &jbe (&label("ecb_dec_tail"));
++ &jmp (&label("ecb_dec_loop3"));
++
++&set_label("ecb_dec_loop3",16);
++ &movups ($inout0,&QWP(0,$inp));
++ &movups ($inout1,&QWP(0x10,$inp));
++ &movups ($inout2,&QWP(0x20,$inp));
++ &call ("_aesni_decrypt3");
++ &sub ($len,0x30);
++ &lea ($inp,&DWP(0x30,$inp));
++ &lea ($out,&DWP(0x30,$out));
++ &movups (&QWP(-0x30,$out),$inout0);
++ &mov ($key,$key_); # restore $key
++ &movups (&QWP(-0x20,$out),$inout1);
++ &mov ($rounds,$rounds_); # restore $rounds
++ &movups (&QWP(-0x10,$out),$inout2);
++ &ja (&label("ecb_dec_loop3"));
++
++&set_label("ecb_dec_tail");
++ &add ($len,0x40);
++ &jz (&label("ecb_ret"));
++
++ &cmp ($len,0x10);
++ &movups ($inout0,&QWP(0,$inp));
++ &je (&label("ecb_dec_one"));
++ &cmp ($len,0x20);
++ &movups ($inout1,&QWP(0x10,$inp));
++ &je (&label("ecb_dec_two"));
++ &cmp ($len,0x30);
++ &movups ($inout2,&QWP(0x20,$inp));
++ &je (&label("ecb_dec_three"));
++ &movups ($inout3,&QWP(0x30,$inp));
++ &call ("_aesni_decrypt4");
++ &movups (&QWP(0,$out),$inout0);
++ &movups (&QWP(0x10,$out),$inout1);
++ &movups (&QWP(0x20,$out),$inout2);
++ &movups (&QWP(0x30,$out),$inout3);
++ &jmp (&label("ecb_ret"));
++
++&set_label("ecb_dec_one",16);
++ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3");
++ &movups (&QWP(0,$out),$inout0);
++ &jmp (&label("ecb_ret"));
++
++&set_label("ecb_dec_two",16);
++ &call ("_aesni_decrypt3");
++ &movups (&QWP(0,$out),$inout0);
++ &movups (&QWP(0x10,$out),$inout1);
++ &jmp (&label("ecb_ret"));
++
++&set_label("ecb_dec_three",16);
++ &call ("_aesni_decrypt3");
++ &movups (&QWP(0,$out),$inout0);
++ &movups (&QWP(0x10,$out),$inout1);
++ &movups (&QWP(0x20,$out),$inout2);
++
++&set_label("ecb_ret");
++&function_end("aesni_ecb_encrypt");
++}
++
++# void $PREFIX_cbc_encrypt (const void *inp, void *out,
++# size_t length, const AES_KEY *key,
++# unsigned char *ivp,const int enc);
++&function_begin("${PREFIX}_cbc_encrypt");
++ &mov ($inp,&wparam(0));
++ &mov ($out,&wparam(1));
++ &mov ($len,&wparam(2));
++ &mov ($key,&wparam(3));
++ &test ($len,$len);
++ &mov ($key_,&wparam(4));
++ &jz (&label("cbc_ret"));
++
++ &cmp (&wparam(5),0);
++ &movups ($ivec,&QWP(0,$key_)); # load IV
++ &mov ($rounds,&DWP(240,$key));
++ &mov ($key_,$key); # backup $key
++ &mov ($rounds_,$rounds); # backup $rounds
++ &je (&label("cbc_decrypt"));
++
++ &movaps ($inout0,$ivec);
++ &cmp ($len,16);
++ &jb (&label("cbc_enc_tail"));
++ &sub ($len,16);
++ &jmp (&label("cbc_enc_loop"));
++
++&set_label("cbc_enc_loop",16);
++ &movups ($ivec,&QWP(0,$inp));
++ &lea ($inp,&DWP(16,$inp));
++ &pxor ($inout0,$ivec);
++ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt3");
++ &sub ($len,16);
++ &lea ($out,&DWP(16,$out));
++ &mov ($rounds,$rounds_); # restore $rounds
++ &mov ($key,$key_); # restore $key
++ &movups (&QWP(-16,$out),$inout0);
++ &jnc (&label("cbc_enc_loop"));
++ &add ($len,16);
++ &jnz (&label("cbc_enc_tail"));
++ &movaps ($ivec,$inout0);
++ &jmp (&label("cbc_ret"));
++
++&set_label("cbc_enc_tail");
++ &mov ("ecx",$len); # zaps $rounds
++ &data_word(0xA4F3F689); # rep movsb
++ &mov ("ecx",16); # zero tail
++ &sub ("ecx",$len);
++ &xor ("eax","eax"); # zaps $len
++ &data_word(0xAAF3F689); # rep stosb
++ &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block
++ &mov ($rounds,$rounds_); # restore $rounds
++ &mov ($inp,$out); # $inp and $out are the same
++ &mov ($key,$key_); # restore $key
++ &jmp (&label("cbc_enc_loop"));
++
++&set_label("cbc_decrypt",16);
++ &sub ($len,0x40);
++ &jbe (&label("cbc_dec_tail"));
++ &jmp (&label("cbc_dec_loop3"));
++
++&set_label("cbc_dec_loop3",16);
++ &movups ($inout0,&QWP(0,$inp));
++ &movups ($inout1,&QWP(0x10,$inp));
++ &movups ($inout2,&QWP(0x20,$inp));
++ &movaps ($in0,$inout0);
++ &movaps ($in1,$inout1);
++ &call ("_aesni_decrypt3");
++ &sub ($len,0x30);
++ &lea ($inp,&DWP(0x30,$inp));
++ &lea ($out,&DWP(0x30,$out));
++ &pxor ($inout0,$ivec);
++ &pxor ($inout1,$in0);
++ &movups ($ivec,&QWP(-0x10,$inp));
++ &pxor ($inout2,$in1);
++ &movups (&QWP(-0x30,$out),$inout0);
++ &mov ($rounds,$rounds_) # restore $rounds
++ &movups (&QWP(-0x20,$out),$inout1);
++ &mov ($key,$key_); # restore $key
++ &movups (&QWP(-0x10,$out),$inout2);
++ &ja (&label("cbc_dec_loop3"));
++
++&set_label("cbc_dec_tail");
++ &add ($len,0x40);
++ &jz (&label("cbc_ret"));
++
++ &movups ($inout0,&QWP(0,$inp));
++ &cmp ($len,0x10);
++ &movaps ($in0,$inout0);
++ &jbe (&label("cbc_dec_one"));
++ &movups ($inout1,&QWP(0x10,$inp));
++ &cmp ($len,0x20);
++ &movaps ($in1,$inout1);
++ &jbe (&label("cbc_dec_two"));
++ &movups ($inout2,&QWP(0x20,$inp));
++ &cmp ($len,0x30);
++ &jbe (&label("cbc_dec_three"));
++ &movups ($inout3,&QWP(0x30,$inp));
++ &call ("_aesni_decrypt4");
++ &movups ($rndkey0,&QWP(0x10,$inp));
++ &movups ($rndkey1,&QWP(0x20,$inp));
++ &pxor ($inout0,$ivec);
++ &pxor ($inout1,$in0);
++ &movups ($ivec,&QWP(0x30,$inp));
++ &movups (&QWP(0,$out),$inout0);
++ &pxor ($inout2,$rndkey0);
++ &pxor ($inout3,$rndkey1);
++ &movups (&QWP(0x10,$out),$inout1);
++ &movups (&QWP(0x20,$out),$inout2);
++ &movaps ($inout0,$inout3);
++ &lea ($out,&DWP(0x30,$out));
++ &jmp (&label("cbc_dec_tail_collected"));
++
++&set_label("cbc_dec_one");
++ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3");
++ &pxor ($inout0,$ivec);
++ &movaps ($ivec,$in0);
++ &jmp (&label("cbc_dec_tail_collected"));
++
++&set_label("cbc_dec_two");
++ &call ("_aesni_decrypt3");
++ &pxor ($inout0,$ivec);
++ &pxor ($inout1,$in0);
++ &movups (&QWP(0,$out),$inout0);
++ &movaps ($inout0,$inout1);
++ &movaps ($ivec,$in1);
++ &lea ($out,&DWP(0x10,$out));
++ &jmp (&label("cbc_dec_tail_collected"));
++
++&set_label("cbc_dec_three");
++ &call ("_aesni_decrypt3");
++ &pxor ($inout0,$ivec);
++ &pxor ($inout1,$in0);
++ &pxor ($inout2,$in1);
++ &movups (&QWP(0,$out),$inout0);
++ &movups (&QWP(0x10,$out),$inout1);
++ &movaps ($inout0,$inout2);
++ &movups ($ivec,&QWP(0x20,$inp));
++ &lea ($out,&DWP(0x20,$out));
++
++&set_label("cbc_dec_tail_collected");
++ &and ($len,15);
++ &jnz (&label("cbc_dec_tail_partial"));
++ &movups (&QWP(0,$out),$inout0);
++ &jmp (&label("cbc_ret"));
++
++&set_label("cbc_dec_tail_partial");
++ &mov ($key_,"esp");
++ &sub ("esp",16);
++ &and ("esp",-16);
++ &movaps (&QWP(0,"esp"),$inout0);
++ &mov ($inp,"esp");
++ &mov ("ecx",$len);
++ &data_word(0xA4F3F689); # rep movsb
++ &mov ("esp",$key_);
++
++&set_label("cbc_ret");
++ &mov ($key_,&wparam(4));
++ &movups (&QWP(0,$key_),$ivec); # output IV
++&function_end("${PREFIX}_cbc_encrypt");
++
++# Mechanical port from aesni-x86_64.pl.
++#
++# _aesni_set_encrypt_key is private interface,
++# input:
++# "eax" const unsigned char *userKey
++# $rounds int bits
++# $key AES_KEY *key
++# output:
++# "eax" return code
++# $round rounds
++
++&function_begin_B("_aesni_set_encrypt_key");
++ &test ("eax","eax");
++ &jz (&label("bad_pointer"));
++ &test ($key,$key);
++ &jz (&label("bad_pointer"));
++
++ &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey
++ &pxor ("xmm4","xmm4"); # low dword of xmm4 is assumed 0
++ &lea ($key,&DWP(16,$key));
++ &cmp ($rounds,256);
++ &je (&label("14rounds"));
++ &cmp ($rounds,192);
++ &je (&label("12rounds"));
++ &cmp ($rounds,128);
++ &jne (&label("bad_keybits"));
++
++&set_label("10rounds",16);
++ &mov ($rounds,9);
++ &$movekey (&QWP(-16,$key),"xmm0"); # round 0
++ &aeskeygenassist("xmm1","xmm0",0x01); # round 1
++ &call (&label("key_128_cold"));
++ &aeskeygenassist("xmm1","xmm0",0x2); # round 2
++ &call (&label("key_128"));
++ &aeskeygenassist("xmm1","xmm0",0x04); # round 3
++ &call (&label("key_128"));
++ &aeskeygenassist("xmm1","xmm0",0x08); # round 4
++ &call (&label("key_128"));
++ &aeskeygenassist("xmm1","xmm0",0x10); # round 5
++ &call (&label("key_128"));
++ &aeskeygenassist("xmm1","xmm0",0x20); # round 6
++ &call (&label("key_128"));
++ &aeskeygenassist("xmm1","xmm0",0x40); # round 7
++ &call (&label("key_128"));
++ &aeskeygenassist("xmm1","xmm0",0x80); # round 8
++ &call (&label("key_128"));
++ &aeskeygenassist("xmm1","xmm0",0x1b); # round 9
++ &call (&label("key_128"));
++ &aeskeygenassist("xmm1","xmm0",0x36); # round 10
++ &call (&label("key_128"));
++ &$movekey (&QWP(0,$key),"xmm0");
++ &mov (&DWP(80,$key),$rounds);
++ &xor ("eax","eax");
++ &ret();
++
++&set_label("key_128",16);
++ &$movekey (&QWP(0,$key),"xmm0");
++ &lea ($key,&DWP(16,$key));
++&set_label("key_128_cold");
++ &shufps ("xmm4","xmm0",0b00010000);
++ &pxor ("xmm0","xmm4");
++ &shufps ("xmm4","xmm0",0b10001100,);
++ &pxor ("xmm0","xmm4");
++ &pshufd ("xmm1","xmm1",0b11111111); # critical path
++ &pxor ("xmm0","xmm1");
++ &ret();
++
++&set_label("12rounds",16);
++ &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey
++ &mov ($rounds,11);
++ &$movekey (&QWP(-16,$key),"xmm0") # round 0
++ &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2
++ &call (&label("key_192a_cold"));
++ &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3
++ &call (&label("key_192b"));
++ &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5
++ &call (&label("key_192a"));
++ &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6
++ &call (&label("key_192b"));
++ &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8
++ &call (&label("key_192a"));
++ &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9
++ &call (&label("key_192b"));
++ &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11
++ &call (&label("key_192a"));
++ &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12
++ &call (&label("key_192b"));
++ &$movekey (&QWP(0,$key),"xmm0");
++ &mov (&DWP(48,$key),$rounds);
++ &xor ("eax","eax");
++ &ret();
++
++&set_label("key_192a",16);
++ &$movekey (&QWP(0,$key),"xmm0");
++ &lea ($key,&DWP(16,$key));
++&set_label("key_192a_cold",16);
++ &movaps ("xmm5","xmm2");
++&set_label("key_192b_warm");
++ &shufps ("xmm4","xmm0",0b00010000);
++ &movaps ("xmm3","xmm2");
++ &pxor ("xmm0","xmm4");
++ &shufps ("xmm4","xmm0",0b10001100);
++ &pslldq ("xmm3",4);
++ &pxor ("xmm0","xmm4");
++ &pshufd ("xmm1","xmm1",0b01010101); # critical path
++ &pxor ("xmm2","xmm3");
++ &pxor ("xmm0","xmm1");
++ &pshufd ("xmm3","xmm0",0b11111111);
++ &pxor ("xmm2","xmm3");
++ &ret();
++
++&set_label("key_192b",16);
++ &movaps ("xmm3","xmm0");
++ &shufps ("xmm5","xmm0",0b01000100);
++ &$movekey (&QWP(0,$key),"xmm5");
++ &shufps ("xmm3","xmm2",0b01001110);
++ &$movekey (&QWP(16,$key),"xmm3");
++ &lea ($key,&DWP(32,$key));
++ &jmp (&label("key_192b_warm"));
++
++&set_label("14rounds",16);
++ &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey
++ &mov ($rounds,13);
++ &lea ($key,&DWP(16,$key));
++ &$movekey (&QWP(-32,$key),"xmm0"); # round 0
++ &$movekey (&QWP(-16,$key),"xmm2"); # round 1
++ &aeskeygenassist("xmm1","xmm2",0x01); # round 2
++ &call (&label("key_256a_cold"));
++ &aeskeygenassist("xmm1","xmm0",0x01); # round 3
++ &call (&label("key_256b"));
++ &aeskeygenassist("xmm1","xmm2",0x02); # round 4
++ &call (&label("key_256a"));
++ &aeskeygenassist("xmm1","xmm0",0x02); # round 5
++ &call (&label("key_256b"));
++ &aeskeygenassist("xmm1","xmm2",0x04); # round 6
++ &call (&label("key_256a"));
++ &aeskeygenassist("xmm1","xmm0",0x04); # round 7
++ &call (&label("key_256b"));
++ &aeskeygenassist("xmm1","xmm2",0x08); # round 8
++ &call (&label("key_256a"));
++ &aeskeygenassist("xmm1","xmm0",0x08); # round 9
++ &call (&label("key_256b"));
++ &aeskeygenassist("xmm1","xmm2",0x10); # round 10
++ &call (&label("key_256a"));
++ &aeskeygenassist("xmm1","xmm0",0x10); # round 11
++ &call (&label("key_256b"));
++ &aeskeygenassist("xmm1","xmm2",0x20); # round 12
++ &call (&label("key_256a"));
++ &aeskeygenassist("xmm1","xmm0",0x20); # round 13
++ &call (&label("key_256b"));
++ &aeskeygenassist("xmm1","xmm2",0x40); # round 14
++ &call (&label("key_256a"));
++ &$movekey (&QWP(0,$key),"xmm0");
++ &mov (&DWP(16,$key),$rounds);
++ &xor ("eax","eax");
++ &ret();
++
++&set_label("key_256a",16);
++ &$movekey (&QWP(0,$key),"xmm2");
++ &lea ($key,&DWP(16,$key));
++&set_label("key_256a_cold");
++ &shufps ("xmm4","xmm0",0b00010000);
++ &pxor ("xmm0","xmm4");
++ &shufps ("xmm4","xmm0",0b10001100);
++ &pxor ("xmm0","xmm4");
++ &pshufd ("xmm1","xmm1",0b11111111); # critical path
++ &pxor ("xmm0","xmm1");
++ &ret();
++
++&set_label("key_256b",16);
++ &$movekey (&QWP(0,$key),"xmm0");
++ &lea ($key,&DWP(16,$key));
++
++ &shufps ("xmm4","xmm2",0b00010000);
++ &pxor ("xmm2","xmm4");
++ &shufps ("xmm4","xmm2",0b10001100);
++ &pxor ("xmm2","xmm4");
++ &pshufd ("xmm1","xmm1",0b10101010); # critical path
++ &pxor ("xmm2","xmm1");
++ &ret();
++
++&set_label("bad_pointer",4);
++ &mov ("eax",-1);
++ &ret ();
++&set_label("bad_keybits",4);
++ &mov ("eax",-2);
++ &ret ();
++&function_end_B("_aesni_set_encrypt_key");
++
++# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits,
++# AES_KEY *key)
++&function_begin_B("${PREFIX}_set_encrypt_key");
++ &mov ("eax",&wparam(0));
++ &mov ($rounds,&wparam(1));
++ &mov ($key,&wparam(2));
++ &call ("_aesni_set_encrypt_key");
++ &ret ();
++&function_end_B("${PREFIX}_set_encrypt_key");
++
++# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits,
++# AES_KEY *key)
++&function_begin_B("${PREFIX}_set_decrypt_key");
++ &mov ("eax",&wparam(0));
++ &mov ($rounds,&wparam(1));
++ &mov ($key,&wparam(2));
++ &call ("_aesni_set_encrypt_key");
++ &mov ($key,&wparam(2));
++ &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key
++ &test ("eax","eax");
++ &jnz (&label("dec_key_ret"));
++ &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule
++
++ &$movekey ("xmm0",&QWP(0,$key)); # just swap
++ &$movekey ("xmm1",&QWP(0,"eax"));
++ &$movekey (&QWP(0,"eax"),"xmm0");
++ &$movekey (&QWP(0,$key),"xmm1");
++ &lea ($key,&DWP(16,$key));
++ &lea ("eax",&DWP(-16,"eax"));
++
++&set_label("dec_key_inverse");
++ &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse
++ &$movekey ("xmm1",&QWP(0,"eax"));
++ &aesimc ("xmm0","xmm0");
++ &aesimc ("xmm1","xmm1");
++ &lea ($key,&DWP(16,$key));
++ &lea ("eax",&DWP(-16,"eax"));
++ &cmp ("eax",$key);
++ &$movekey (&QWP(16,"eax"),"xmm0");
++ &$movekey (&QWP(-16,$key),"xmm1");
++ &ja (&label("dec_key_inverse"));
++
++ &$movekey ("xmm0",&QWP(0,$key)); # inverse middle
++ &aesimc ("xmm0","xmm0");
++ &$movekey (&QWP(0,$key),"xmm0");
++
++ &xor ("eax","eax"); # return success
++&set_label("dec_key_ret");
++ &ret ();
++&function_end_B("${PREFIX}_set_decrypt_key");
++&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
++
++&asm_finish();
+diff -up openssl-1.0.0b/crypto/aes/asm/aesni-x86_64.pl.aesni openssl-1.0.0b/crypto/aes/asm/aesni-x86_64.pl
+--- openssl-1.0.0b/crypto/aes/asm/aesni-x86_64.pl.aesni 2010-11-16 17:33:23.000000000 +0100
++++ openssl-1.0.0b/crypto/aes/asm/aesni-x86_64.pl 2010-11-16 17:33:23.000000000 +0100
+@@ -0,0 +1,991 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# This module implements support for Intel AES-NI extension. In
++# OpenSSL context it's used with Intel engine, but can also be used as
++# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for
++# details].
++
++$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
++ # generates drop-in replacement for
++ # crypto/aes/asm/aes-x86_64.pl:-)
++
++$flavour = shift;
++$output = shift;
++if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
++
++$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
++die "can't locate x86_64-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour $output";
++
++$movkey = $PREFIX eq "aesni" ? "movaps" : "movups";
++@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order
++ ("%rdi","%rsi","%rdx","%rcx"); # Unix order
++
++$code=".text\n";
++
++$rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!!
++# this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ...
++$inp="%rdi";
++$out="%rsi";
++$len="%rdx";
++$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!!
++$ivp="%r8"; # cbc
++
++$rnds_="%r10d"; # backup copy for $rounds
++$key_="%r11"; # backup copy for $key
++
++# %xmm register layout
++$inout0="%xmm0"; $inout1="%xmm1";
++$inout2="%xmm2"; $inout3="%xmm3";
++$rndkey0="%xmm4"; $rndkey1="%xmm5";
++
++$iv="%xmm6"; $in0="%xmm7"; # used in CBC decrypt
++$in1="%xmm8"; $in2="%xmm9";
++\f
++# Inline version of internal aesni_[en|de]crypt1.
++#
++# Why folded loop? Because aes[enc|dec] is slow enough to accommodate
++# cycles which take care of loop variables...
++{ my $sn;
++sub aesni_generate1 {
++my ($p,$key,$rounds)=@_;
++++$sn;
++$code.=<<___;
++ $movkey ($key),$rndkey0
++ $movkey 16($key),$rndkey1
++ lea 32($key),$key
++ pxor $rndkey0,$inout0
++.Loop_${p}1_$sn:
++ aes${p} $rndkey1,$inout0
++ dec $rounds
++ $movkey ($key),$rndkey1
++ lea 16($key),$key
++ jnz .Loop_${p}1_$sn # loop body is 16 bytes
++ aes${p}last $rndkey1,$inout0
++___
++}}
++# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key);
++#
++{ my ($inp,$out,$key) = @_4args;
++
++$code.=<<___;
++.globl ${PREFIX}_encrypt
++.type ${PREFIX}_encrypt,\@abi-omnipotent
++.align 16
++${PREFIX}_encrypt:
++ movups ($inp),$inout0 # load input
++ mov 240($key),$rounds # pull $rounds
++___
++ &aesni_generate1("enc",$key,$rounds);
++$code.=<<___;
++ movups $inout0,($out) # output
++ ret
++.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt
++
++.globl ${PREFIX}_decrypt
++.type ${PREFIX}_decrypt,\@abi-omnipotent
++.align 16
++${PREFIX}_decrypt:
++ movups ($inp),$inout0 # load input
++ mov 240($key),$rounds # pull $rounds
++___
++ &aesni_generate1("dec",$key,$rounds);
++$code.=<<___;
++ movups $inout0,($out) # output
++ ret
++.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt
++___
++}
++\f
++# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave
++# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec]
++# latency is 6, it turned out that it can be scheduled only every
++# *second* cycle. Thus 3x interleave is the one providing optimal
++# utilization, i.e. when subroutine's throughput is virtually same as
++# of non-interleaved subroutine [for number of input blocks up to 3].
++# This is why it makes no sense to implement 2x subroutine. As soon
++# as/if Intel improves throughput by making it possible to schedule
++# the instructions in question *every* cycles I would have to
++# implement 6x interleave and use it in loop...
++sub aesni_generate3 {
++my $dir=shift;
++# As already mentioned it takes in $key and $rounds, which are *not*
++# preserved. $inout[0-2] is cipher/clear text...
++$code.=<<___;
++.type _aesni_${dir}rypt3,\@abi-omnipotent
++.align 16
++_aesni_${dir}rypt3:
++ $movkey ($key),$rndkey0
++ shr \$1,$rounds
++ $movkey 16($key),$rndkey1
++ lea 32($key),$key
++ pxor $rndkey0,$inout0
++ pxor $rndkey0,$inout1
++ pxor $rndkey0,$inout2
++
++.L${dir}_loop3:
++ aes${dir} $rndkey1,$inout0
++ $movkey ($key),$rndkey0
++ aes${dir} $rndkey1,$inout1
++ dec $rounds
++ aes${dir} $rndkey1,$inout2
++ aes${dir} $rndkey0,$inout0
++ $movkey 16($key),$rndkey1
++ aes${dir} $rndkey0,$inout1
++ lea 32($key),$key
++ aes${dir} $rndkey0,$inout2
++ jnz .L${dir}_loop3
++
++ aes${dir} $rndkey1,$inout0
++ $movkey ($key),$rndkey0
++ aes${dir} $rndkey1,$inout1
++ aes${dir} $rndkey1,$inout2
++ aes${dir}last $rndkey0,$inout0
++ aes${dir}last $rndkey0,$inout1
++ aes${dir}last $rndkey0,$inout2
++ ret
++.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3
++___
++}
++# 4x interleave is implemented to improve small block performance,
++# most notably [and naturally] 4 block by ~30%. One can argue that one
++# should have implemented 5x as well, but improvement would be <20%,
++# so it's not worth it...
++sub aesni_generate4 {
++my $dir=shift;
++# As already mentioned it takes in $key and $rounds, which are *not*
++# preserved. $inout[0-3] is cipher/clear text...
++$code.=<<___;
++.type _aesni_${dir}rypt4,\@abi-omnipotent
++.align 16
++_aesni_${dir}rypt4:
++ $movkey ($key),$rndkey0
++ shr \$1,$rounds
++ $movkey 16($key),$rndkey1
++ lea 32($key),$key
++ pxor $rndkey0,$inout0
++ pxor $rndkey0,$inout1
++ pxor $rndkey0,$inout2
++ pxor $rndkey0,$inout3
++
++.L${dir}_loop4:
++ aes${dir} $rndkey1,$inout0
++ $movkey ($key),$rndkey0
++ aes${dir} $rndkey1,$inout1
++ dec $rounds
++ aes${dir} $rndkey1,$inout2
++ aes${dir} $rndkey1,$inout3
++ aes${dir} $rndkey0,$inout0
++ $movkey 16($key),$rndkey1
++ aes${dir} $rndkey0,$inout1
++ lea 32($key),$key
++ aes${dir} $rndkey0,$inout2
++ aes${dir} $rndkey0,$inout3
++ jnz .L${dir}_loop4
++
++ aes${dir} $rndkey1,$inout0
++ $movkey ($key),$rndkey0
++ aes${dir} $rndkey1,$inout1
++ aes${dir} $rndkey1,$inout2
++ aes${dir} $rndkey1,$inout3
++ aes${dir}last $rndkey0,$inout0
++ aes${dir}last $rndkey0,$inout1
++ aes${dir}last $rndkey0,$inout2
++ aes${dir}last $rndkey0,$inout3
++ ret
++.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4
++___
++}
++&aesni_generate3("enc") if ($PREFIX eq "aesni");
++&aesni_generate3("dec");
++&aesni_generate4("enc") if ($PREFIX eq "aesni");
++&aesni_generate4("dec");
++\f
++if ($PREFIX eq "aesni") {
++# void aesni_ecb_encrypt (const void *in, void *out,
++# size_t length, const AES_KEY *key,
++# int enc);
++$code.=<<___;
++.globl aesni_ecb_encrypt
++.type aesni_ecb_encrypt,\@function,5
++.align 16
++aesni_ecb_encrypt:
++ cmp \$16,$len # check length
++ jb .Lecb_ret
++
++ mov 240($key),$rounds # pull $rounds
++ and \$-16,$len
++ mov $key,$key_ # backup $key
++ test %r8d,%r8d # 5th argument
++ mov $rounds,$rnds_ # backup $rounds
++ jz .Lecb_decrypt
++#--------------------------- ECB ENCRYPT ------------------------------#
++ sub \$0x40,$len
++ jbe .Lecb_enc_tail
++ jmp .Lecb_enc_loop3
++.align 16
++.Lecb_enc_loop3:
++ movups ($inp),$inout0
++ movups 0x10($inp),$inout1
++ movups 0x20($inp),$inout2
++ call _aesni_encrypt3
++ sub \$0x30,$len
++ lea 0x30($inp),$inp
++ lea 0x30($out),$out
++ movups $inout0,-0x30($out)
++ mov $rnds_,$rounds # restore $rounds
++ movups $inout1,-0x20($out)
++ mov $key_,$key # restore $key
++ movups $inout2,-0x10($out)
++ ja .Lecb_enc_loop3
++
++.Lecb_enc_tail:
++ add \$0x40,$len
++ jz .Lecb_ret
++
++ cmp \$0x10,$len
++ movups ($inp),$inout0
++ je .Lecb_enc_one
++ cmp \$0x20,$len
++ movups 0x10($inp),$inout1
++ je .Lecb_enc_two
++ cmp \$0x30,$len
++ movups 0x20($inp),$inout2
++ je .Lecb_enc_three
++ movups 0x30($inp),$inout3
++ call _aesni_encrypt4
++ movups $inout0,($out)
++ movups $inout1,0x10($out)
++ movups $inout2,0x20($out)
++ movups $inout3,0x30($out)
++ jmp .Lecb_ret
++.align 16
++.Lecb_enc_one:
++___
++ &aesni_generate1("enc",$key,$rounds);
++$code.=<<___;
++ movups $inout0,($out)
++ jmp .Lecb_ret
++.align 16
++.Lecb_enc_two:
++ call _aesni_encrypt3
++ movups $inout0,($out)
++ movups $inout1,0x10($out)
++ jmp .Lecb_ret
++.align 16
++.Lecb_enc_three:
++ call _aesni_encrypt3
++ movups $inout0,($out)
++ movups $inout1,0x10($out)
++ movups $inout2,0x20($out)
++ jmp .Lecb_ret
++\f#--------------------------- ECB DECRYPT ------------------------------#
++.align 16
++.Lecb_decrypt:
++ sub \$0x40,$len
++ jbe .Lecb_dec_tail
++ jmp .Lecb_dec_loop3
++.align 16
++.Lecb_dec_loop3:
++ movups ($inp),$inout0
++ movups 0x10($inp),$inout1
++ movups 0x20($inp),$inout2
++ call _aesni_decrypt3
++ sub \$0x30,$len
++ lea 0x30($inp),$inp
++ lea 0x30($out),$out
++ movups $inout0,-0x30($out)
++ mov $rnds_,$rounds # restore $rounds
++ movups $inout1,-0x20($out)
++ mov $key_,$key # restore $key
++ movups $inout2,-0x10($out)
++ ja .Lecb_dec_loop3
++
++.Lecb_dec_tail:
++ add \$0x40,$len
++ jz .Lecb_ret
++
++ cmp \$0x10,$len
++ movups ($inp),$inout0
++ je .Lecb_dec_one
++ cmp \$0x20,$len
++ movups 0x10($inp),$inout1
++ je .Lecb_dec_two
++ cmp \$0x30,$len
++ movups 0x20($inp),$inout2
++ je .Lecb_dec_three
++ movups 0x30($inp),$inout3
++ call _aesni_decrypt4
++ movups $inout0,($out)
++ movups $inout1,0x10($out)
++ movups $inout2,0x20($out)
++ movups $inout3,0x30($out)
++ jmp .Lecb_ret
++.align 16
++.Lecb_dec_one:
++___
++ &aesni_generate1("dec",$key,$rounds);
++$code.=<<___;
++ movups $inout0,($out)
++ jmp .Lecb_ret
++.align 16
++.Lecb_dec_two:
++ call _aesni_decrypt3
++ movups $inout0,($out)
++ movups $inout1,0x10($out)
++ jmp .Lecb_ret
++.align 16
++.Lecb_dec_three:
++ call _aesni_decrypt3
++ movups $inout0,($out)
++ movups $inout1,0x10($out)
++ movups $inout2,0x20($out)
++
++.Lecb_ret:
++ ret
++.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
++___
++}
++\f
++# void $PREFIX_cbc_encrypt (const void *inp, void *out,
++# size_t length, const AES_KEY *key,
++# unsigned char *ivp,const int enc);
++$reserved = $win64?0x40:-0x18; # used in decrypt
++$code.=<<___;
++.globl ${PREFIX}_cbc_encrypt
++.type ${PREFIX}_cbc_encrypt,\@function,6
++.align 16
++${PREFIX}_cbc_encrypt:
++ test $len,$len # check length
++ jz .Lcbc_ret
++
++ mov 240($key),$rnds_ # pull $rounds
++ mov $key,$key_ # backup $key
++ test %r9d,%r9d # 6th argument
++ jz .Lcbc_decrypt
++#--------------------------- CBC ENCRYPT ------------------------------#
++ movups ($ivp),$inout0 # load iv as initial state
++ cmp \$16,$len
++ mov $rnds_,$rounds
++ jb .Lcbc_enc_tail
++ sub \$16,$len
++ jmp .Lcbc_enc_loop
++.align 16
++.Lcbc_enc_loop:
++ movups ($inp),$inout1 # load input
++ lea 16($inp),$inp
++ pxor $inout1,$inout0
++___
++ &aesni_generate1("enc",$key,$rounds);
++$code.=<<___;
++ sub \$16,$len
++ lea 16($out),$out
++ mov $rnds_,$rounds # restore $rounds
++ mov $key_,$key # restore $key
++ movups $inout0,-16($out) # store output
++ jnc .Lcbc_enc_loop
++ add \$16,$len
++ jnz .Lcbc_enc_tail
++ movups $inout0,($ivp)
++ jmp .Lcbc_ret
++
++.Lcbc_enc_tail:
++ mov $len,%rcx # zaps $key
++ xchg $inp,$out # $inp is %rsi and $out is %rdi now
++ .long 0x9066A4F3 # rep movsb
++ mov \$16,%ecx # zero tail
++ sub $len,%rcx
++ xor %eax,%eax
++ .long 0x9066AAF3 # rep stosb
++ lea -16(%rdi),%rdi # rewind $out by 1 block
++ mov $rnds_,$rounds # restore $rounds
++ mov %rdi,%rsi # $inp and $out are the same
++ mov $key_,$key # restore $key
++ xor $len,$len # len=16
++ jmp .Lcbc_enc_loop # one more spin
++\f#--------------------------- CBC DECRYPT ------------------------------#
++.align 16
++.Lcbc_decrypt:
++___
++$code.=<<___ if ($win64);
++ lea -0x58(%rsp),%rsp
++ movaps %xmm6,(%rsp)
++ movaps %xmm7,0x10(%rsp)
++ movaps %xmm8,0x20(%rsp)
++ movaps %xmm9,0x30(%rsp)
++.Lcbc_decrypt_body:
++___
++$code.=<<___;
++ movups ($ivp),$iv
++ sub \$0x40,$len
++ mov $rnds_,$rounds
++ jbe .Lcbc_dec_tail
++ jmp .Lcbc_dec_loop3
++.align 16
++.Lcbc_dec_loop3:
++ movups ($inp),$inout0
++ movups 0x10($inp),$inout1
++ movups 0x20($inp),$inout2
++ movaps $inout0,$in0
++ movaps $inout1,$in1
++ movaps $inout2,$in2
++ call _aesni_decrypt3
++ sub \$0x30,$len
++ lea 0x30($inp),$inp
++ lea 0x30($out),$out
++ pxor $iv,$inout0
++ pxor $in0,$inout1
++ movaps $in2,$iv
++ pxor $in1,$inout2
++ movups $inout0,-0x30($out)
++ mov $rnds_,$rounds # restore $rounds
++ movups $inout1,-0x20($out)
++ mov $key_,$key # restore $key
++ movups $inout2,-0x10($out)
++ ja .Lcbc_dec_loop3
++
++.Lcbc_dec_tail:
++ add \$0x40,$len
++ movups $iv,($ivp)
++ jz .Lcbc_dec_ret
++
++ movups ($inp),$inout0
++ cmp \$0x10,$len
++ movaps $inout0,$in0
++ jbe .Lcbc_dec_one
++ movups 0x10($inp),$inout1
++ cmp \$0x20,$len
++ movaps $inout1,$in1
++ jbe .Lcbc_dec_two
++ movups 0x20($inp),$inout2
++ cmp \$0x30,$len
++ movaps $inout2,$in2
++ jbe .Lcbc_dec_three
++ movups 0x30($inp),$inout3
++ call _aesni_decrypt4
++ pxor $iv,$inout0
++ movups 0x30($inp),$iv
++ pxor $in0,$inout1
++ movups $inout0,($out)
++ pxor $in1,$inout2
++ movups $inout1,0x10($out)
++ pxor $in2,$inout3
++ movups $inout2,0x20($out)
++ movaps $inout3,$inout0
++ lea 0x30($out),$out
++ jmp .Lcbc_dec_tail_collected
++.align 16
++.Lcbc_dec_one:
++___
++ &aesni_generate1("dec",$key,$rounds);
++$code.=<<___;
++ pxor $iv,$inout0
++ movaps $in0,$iv
++ jmp .Lcbc_dec_tail_collected
++.align 16
++.Lcbc_dec_two:
++ call _aesni_decrypt3
++ pxor $iv,$inout0
++ pxor $in0,$inout1
++ movups $inout0,($out)
++ movaps $in1,$iv
++ movaps $inout1,$inout0
++ lea 0x10($out),$out
++ jmp .Lcbc_dec_tail_collected
++.align 16
++.Lcbc_dec_three:
++ call _aesni_decrypt3
++ pxor $iv,$inout0
++ pxor $in0,$inout1
++ movups $inout0,($out)
++ pxor $in1,$inout2
++ movups $inout1,0x10($out)
++ movaps $in2,$iv
++ movaps $inout2,$inout0
++ lea 0x20($out),$out
++ jmp .Lcbc_dec_tail_collected
++.align 16
++.Lcbc_dec_tail_collected:
++ and \$15,$len
++ movups $iv,($ivp)
++ jnz .Lcbc_dec_tail_partial
++ movups $inout0,($out)
++ jmp .Lcbc_dec_ret
++.Lcbc_dec_tail_partial:
++ movaps $inout0,$reserved(%rsp)
++ mov $out,%rdi
++ mov $len,%rcx
++ lea $reserved(%rsp),%rsi
++ .long 0x9066A4F3 # rep movsb
++
++.Lcbc_dec_ret:
++___
++$code.=<<___ if ($win64);
++ movaps (%rsp),%xmm6
++ movaps 0x10(%rsp),%xmm7
++ movaps 0x20(%rsp),%xmm8
++ movaps 0x30(%rsp),%xmm9
++ lea 0x58(%rsp),%rsp
++___
++$code.=<<___;
++.Lcbc_ret:
++ ret
++.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
++___
++\f
++# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey,
++# int bits, AES_KEY *key)
++{ my ($inp,$bits,$key) = @_4args;
++ $bits =~ s/%r/%e/;
++
++$code.=<<___;
++.globl ${PREFIX}_set_decrypt_key
++.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent
++.align 16
++${PREFIX}_set_decrypt_key:
++ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8
++ call _aesni_set_encrypt_key
++ shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key
++ test %eax,%eax
++ jnz .Ldec_key_ret
++ lea 16($key,$bits),$inp # points at the end of key schedule
++
++ $movkey ($key),%xmm0 # just swap
++ $movkey ($inp),%xmm1
++ $movkey %xmm0,($inp)
++ $movkey %xmm1,($key)
++ lea 16($key),$key
++ lea -16($inp),$inp
++
++.Ldec_key_inverse:
++ $movkey ($key),%xmm0 # swap and inverse
++ $movkey ($inp),%xmm1
++ aesimc %xmm0,%xmm0
++ aesimc %xmm1,%xmm1
++ lea 16($key),$key
++ lea -16($inp),$inp
++ cmp $key,$inp
++ $movkey %xmm0,16($inp)
++ $movkey %xmm1,-16($key)
++ ja .Ldec_key_inverse
++
++ $movkey ($key),%xmm0 # inverse middle
++ aesimc %xmm0,%xmm0
++ $movkey %xmm0,($inp)
++.Ldec_key_ret:
++ add \$8,%rsp
++ ret
++.LSEH_end_set_decrypt_key:
++.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
++___
++\f
++# This is based on submission by
++#
++# Huang Ying <ying.huang@intel.com>
++# Vinodh Gopal <vinodh.gopal@intel.com>
++# Kahraman Akdemir
++#
++# Agressively optimized in respect to aeskeygenassist's critical path
++# and is contained in %xmm0-5 to meet Win64 ABI requirement.
++#
++$code.=<<___;
++.globl ${PREFIX}_set_encrypt_key
++.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent
++.align 16
++${PREFIX}_set_encrypt_key:
++_aesni_set_encrypt_key:
++ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8
++ test $inp,$inp
++ mov \$-1,%rax
++ jz .Lenc_key_ret
++ test $key,$key
++ jz .Lenc_key_ret
++
++ movups ($inp),%xmm0 # pull first 128 bits of *userKey
++ pxor %xmm4,%xmm4 # low dword of xmm4 is assumed 0
++ lea 16($key),%rax
++ cmp \$256,$bits
++ je .L14rounds
++ cmp \$192,$bits
++ je .L12rounds
++ cmp \$128,$bits
++ jne .Lbad_keybits
++
++.L10rounds:
++ mov \$9,$bits # 10 rounds for 128-bit key
++ $movkey %xmm0,($key) # round 0
++ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 1
++ call .Lkey_expansion_128_cold
++ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 2
++ call .Lkey_expansion_128
++ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 3
++ call .Lkey_expansion_128
++ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 4
++ call .Lkey_expansion_128
++ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 5
++ call .Lkey_expansion_128
++ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 6
++ call .Lkey_expansion_128
++ aeskeygenassist \$0x40,%xmm0,%xmm1 # round 7
++ call .Lkey_expansion_128
++ aeskeygenassist \$0x80,%xmm0,%xmm1 # round 8
++ call .Lkey_expansion_128
++ aeskeygenassist \$0x1b,%xmm0,%xmm1 # round 9
++ call .Lkey_expansion_128
++ aeskeygenassist \$0x36,%xmm0,%xmm1 # round 10
++ call .Lkey_expansion_128
++ $movkey %xmm0,(%rax)
++ mov $bits,80(%rax) # 240(%rdx)
++ xor %eax,%eax
++ jmp .Lenc_key_ret
++
++.align 16
++.L12rounds:
++ movq 16($inp),%xmm2 # remaining 1/3 of *userKey
++ mov \$11,$bits # 12 rounds for 192
++ $movkey %xmm0,($key) # round 0
++ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 1,2
++ call .Lkey_expansion_192a_cold
++ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 2,3
++ call .Lkey_expansion_192b
++ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 4,5
++ call .Lkey_expansion_192a
++ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 5,6
++ call .Lkey_expansion_192b
++ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 7,8
++ call .Lkey_expansion_192a
++ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 8,9
++ call .Lkey_expansion_192b
++ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 10,11
++ call .Lkey_expansion_192a
++ aeskeygenassist \$0x80,%xmm2,%xmm1 # round 11,12
++ call .Lkey_expansion_192b
++ $movkey %xmm0,(%rax)
++ mov $bits,48(%rax) # 240(%rdx)
++ xor %rax, %rax
++ jmp .Lenc_key_ret
++
++.align 16
++.L14rounds:
++ movups 16($inp),%xmm2 # remaning half of *userKey
++ mov \$13,$bits # 14 rounds for 256
++ lea 16(%rax),%rax
++ $movkey %xmm0,($key) # round 0
++ $movkey %xmm2,16($key) # round 1
++ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 2
++ call .Lkey_expansion_256a_cold
++ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 3
++ call .Lkey_expansion_256b
++ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 4
++ call .Lkey_expansion_256a
++ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 5
++ call .Lkey_expansion_256b
++ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 6
++ call .Lkey_expansion_256a
++ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 7
++ call .Lkey_expansion_256b
++ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 8
++ call .Lkey_expansion_256a
++ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 9
++ call .Lkey_expansion_256b
++ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 10
++ call .Lkey_expansion_256a
++ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 11
++ call .Lkey_expansion_256b
++ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 12
++ call .Lkey_expansion_256a
++ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 13
++ call .Lkey_expansion_256b
++ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 14
++ call .Lkey_expansion_256a
++ $movkey %xmm0,(%rax)
++ mov $bits,16(%rax) # 240(%rdx)
++ xor %rax,%rax
++ jmp .Lenc_key_ret
++
++.align 16
++.Lbad_keybits:
++ mov \$-2,%rax
++.Lenc_key_ret:
++ add \$8,%rsp
++ ret
++.LSEH_end_set_encrypt_key:
++\f
++.align 16
++.Lkey_expansion_128:
++ $movkey %xmm0,(%rax)
++ lea 16(%rax),%rax
++.Lkey_expansion_128_cold:
++ shufps \$0b00010000,%xmm0,%xmm4
++ pxor %xmm4, %xmm0
++ shufps \$0b10001100,%xmm0,%xmm4
++ pxor %xmm4, %xmm0
++ pshufd \$0b11111111,%xmm1,%xmm1 # critical path
++ pxor %xmm1,%xmm0
++ ret
++
++.align 16
++.Lkey_expansion_192a:
++ $movkey %xmm0,(%rax)
++ lea 16(%rax),%rax
++.Lkey_expansion_192a_cold:
++ movaps %xmm2, %xmm5
++.Lkey_expansion_192b_warm:
++ shufps \$0b00010000,%xmm0,%xmm4
++ movaps %xmm2,%xmm3
++ pxor %xmm4,%xmm0
++ shufps \$0b10001100,%xmm0,%xmm4
++ pslldq \$4,%xmm3
++ pxor %xmm4,%xmm0
++ pshufd \$0b01010101,%xmm1,%xmm1 # critical path
++ pxor %xmm3,%xmm2
++ pxor %xmm1,%xmm0
++ pshufd \$0b11111111,%xmm0,%xmm3
++ pxor %xmm3,%xmm2
++ ret
++
++.align 16
++.Lkey_expansion_192b:
++ movaps %xmm0,%xmm3
++ shufps \$0b01000100,%xmm0,%xmm5
++ $movkey %xmm5,(%rax)
++ shufps \$0b01001110,%xmm2,%xmm3
++ $movkey %xmm3,16(%rax)
++ lea 32(%rax),%rax
++ jmp .Lkey_expansion_192b_warm
++
++.align 16
++.Lkey_expansion_256a:
++ $movkey %xmm2,(%rax)
++ lea 16(%rax),%rax
++.Lkey_expansion_256a_cold:
++ shufps \$0b00010000,%xmm0,%xmm4
++ pxor %xmm4,%xmm0
++ shufps \$0b10001100,%xmm0,%xmm4
++ pxor %xmm4,%xmm0
++ pshufd \$0b11111111,%xmm1,%xmm1 # critical path
++ pxor %xmm1,%xmm0
++ ret
++
++.align 16
++.Lkey_expansion_256b:
++ $movkey %xmm0,(%rax)
++ lea 16(%rax),%rax
++
++ shufps \$0b00010000,%xmm2,%xmm4
++ pxor %xmm4,%xmm2
++ shufps \$0b10001100,%xmm2,%xmm4
++ pxor %xmm4,%xmm2
++ pshufd \$0b10101010,%xmm1,%xmm1 # critical path
++ pxor %xmm1,%xmm2
++ ret
++.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key
++___
++}
++\f
++$code.=<<___;
++.asciz "AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>"
++.align 64
++___
++
++# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
++# CONTEXT *context,DISPATCHER_CONTEXT *disp)
++if ($win64) {
++$rec="%rcx";
++$frame="%rdx";
++$context="%r8";
++$disp="%r9";
++
++$code.=<<___;
++.extern __imp_RtlVirtualUnwind
++.type cbc_se_handler,\@abi-omnipotent
++.align 16
++cbc_se_handler:
++ push %rsi
++ push %rdi
++ push %rbx
++ push %rbp
++ push %r12
++ push %r13
++ push %r14
++ push %r15
++ pushfq
++ sub \$64,%rsp
++
++ mov 152($context),%rax # pull context->Rsp
++ mov 248($context),%rbx # pull context->Rip
++
++ lea .Lcbc_decrypt(%rip),%r10
++ cmp %r10,%rbx # context->Rip<"prologue" label
++ jb .Lin_prologue
++
++ lea .Lcbc_decrypt_body(%rip),%r10
++ cmp %r10,%rbx # context->Rip<cbc_decrypt_body
++ jb .Lrestore_rax
++
++ lea .Lcbc_ret(%rip),%r10
++ cmp %r10,%rbx # context->Rip>="epilogue" label
++ jae .Lin_prologue
++
++ lea 0(%rax),%rsi # top of stack
++ lea 512($context),%rdi # &context.Xmm6
++ mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax)
++ .long 0xa548f3fc # cld; rep movsq
++ lea 0x58(%rax),%rax # adjust stack pointer
++ jmp .Lin_prologue
++
++.Lrestore_rax:
++ mov 120($context),%rax
++.Lin_prologue:
++ mov 8(%rax),%rdi
++ mov 16(%rax),%rsi
++ mov %rax,152($context) # restore context->Rsp
++ mov %rsi,168($context) # restore context->Rsi
++ mov %rdi,176($context) # restore context->Rdi
++
++ jmp .Lcommon_seh_exit
++.size cbc_se_handler,.-cbc_se_handler
++
++.type ecb_se_handler,\@abi-omnipotent
++.align 16
++ecb_se_handler:
++ push %rsi
++ push %rdi
++ push %rbx
++ push %rbp
++ push %r12
++ push %r13
++ push %r14
++ push %r15
++ pushfq
++ sub \$64,%rsp
++
++ mov 152($context),%rax # pull context->Rsp
++ mov 8(%rax),%rdi
++ mov 16(%rax),%rsi
++ mov %rsi,168($context) # restore context->Rsi
++ mov %rdi,176($context) # restore context->Rdi
++
++.Lcommon_seh_exit:
++
++ mov 40($disp),%rdi # disp->ContextRecord
++ mov $context,%rsi # context
++ mov \$154,%ecx # sizeof(CONTEXT)
++ .long 0xa548f3fc # cld; rep movsq
++
++ mov $disp,%rsi
++ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
++ mov 8(%rsi),%rdx # arg2, disp->ImageBase
++ mov 0(%rsi),%r8 # arg3, disp->ControlPc
++ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
++ mov 40(%rsi),%r10 # disp->ContextRecord
++ lea 56(%rsi),%r11 # &disp->HandlerData
++ lea 24(%rsi),%r12 # &disp->EstablisherFrame
++ mov %r10,32(%rsp) # arg5
++ mov %r11,40(%rsp) # arg6
++ mov %r12,48(%rsp) # arg7
++ mov %rcx,56(%rsp) # arg8, (NULL)
++ call *__imp_RtlVirtualUnwind(%rip)
++
++ mov \$1,%eax # ExceptionContinueSearch
++ add \$64,%rsp
++ popfq
++ pop %r15
++ pop %r14
++ pop %r13
++ pop %r12
++ pop %rbp
++ pop %rbx
++ pop %rdi
++ pop %rsi
++ ret
++.size cbc_se_handler,.-cbc_se_handler
++
++.section .pdata
++.align 4
++ .rva .LSEH_begin_${PREFIX}_ecb_encrypt
++ .rva .LSEH_end_${PREFIX}_ecb_encrypt
++ .rva .LSEH_info_ecb
++
++ .rva .LSEH_begin_${PREFIX}_cbc_encrypt
++ .rva .LSEH_end_${PREFIX}_cbc_encrypt
++ .rva .LSEH_info_cbc
++
++ .rva ${PREFIX}_set_decrypt_key
++ .rva .LSEH_end_set_decrypt_key
++ .rva .LSEH_info_key
++
++ .rva ${PREFIX}_set_encrypt_key
++ .rva .LSEH_end_set_encrypt_key
++ .rva .LSEH_info_key
++.section .xdata
++.align 8
++.LSEH_info_ecb:
++ .byte 9,0,0,0
++ .rva ecb_se_handler
++.LSEH_info_cbc:
++ .byte 9,0,0,0
++ .rva cbc_se_handler
++.LSEH_info_key:
++ .byte 0x01,0x04,0x01,0x00
++ .byte 0x04,0x02,0x00,0x00
++___
++}
++
++sub rex {
++ local *opcode=shift;
++ my ($dst,$src)=@_;
++
++ if ($dst>=8 || $src>=8) {
++ $rex=0x40;
++ $rex|=0x04 if($dst>=8);
++ $rex|=0x01 if($src>=8);
++ push @opcode,$rex;
++ }
++}
++
++sub aesni {
++ my $line=shift;
++ my @opcode=(0x66);
++
++ if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
++ rex(\@opcode,$4,$3);
++ push @opcode,0x0f,0x3a,0xdf;
++ push @opcode,0xc0|($3&7)|(($4&7)<<3); # ModR/M
++ my $c=$2;
++ push @opcode,$c=~/^0/?oct($c):$c;
++ return ".byte\t".join(',',@opcode);
++ }
++ elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) {
++ my %opcodelet = (
++ "aesimc" => 0xdb,
++ "aesenc" => 0xdc, "aesenclast" => 0xdd,
++ "aesdec" => 0xde, "aesdeclast" => 0xdf
++ );
++ return undef if (!defined($opcodelet{$1}));
++ rex(\@opcode,$3,$2);
++ push @opcode,0x0f,0x38,$opcodelet{$1};
++ push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M
++ return ".byte\t".join(',',@opcode);
++ }
++ return $line;
++}
++
++$code =~ s/\`([^\`]*)\`/eval($1)/gem;
++$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem;
++
++print $code;
++
++close STDOUT;
+diff -up openssl-1.0.0b/crypto/aes/Makefile.aesni openssl-1.0.0b/crypto/aes/Makefile
+--- openssl-1.0.0b/crypto/aes/Makefile.aesni 2008-12-23 12:33:00.000000000 +0100
++++ openssl-1.0.0b/crypto/aes/Makefile 2010-11-16 17:33:23.000000000 +0100
+@@ -50,9 +50,13 @@ aes-ia64.s: asm/aes-ia64.S
+
+ aes-586.s: asm/aes-586.pl ../perlasm/x86asm.pl
+ $(PERL) asm/aes-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
++aesni-x86.s: asm/aesni-x86.pl ../perlasm/x86asm.pl
++ $(PERL) asm/aesni-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
+
+ aes-x86_64.s: asm/aes-x86_64.pl
+ $(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@
++aesni-x86_64.s: asm/aesni-x86_64.pl
++ $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@
+
+ aes-sparcv9.s: asm/aes-sparcv9.pl
+ $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
+diff -up openssl-1.0.0b/crypto/engine/eng_aesni.c.aesni openssl-1.0.0b/crypto/engine/eng_aesni.c
+--- openssl-1.0.0b/crypto/engine/eng_aesni.c.aesni 2010-11-16 17:33:23.000000000 +0100
++++ openssl-1.0.0b/crypto/engine/eng_aesni.c 2010-11-16 17:33:23.000000000 +0100
+@@ -0,0 +1,413 @@
++/*
++ * Support for Intel AES-NI intruction set
++ * Author: Huang Ying <ying.huang@intel.com>
++ *
++ * Intel AES-NI is a new set of Single Instruction Multiple Data
++ * (SIMD) instructions that are going to be introduced in the next
++ * generation of Intel processor, as of 2009. These instructions
++ * enable fast and secure data encryption and decryption, using the
++ * Advanced Encryption Standard (AES), defined by FIPS Publication
++ * number 197. The architecture introduces six instructions that
++ * offer full hardware support for AES. Four of them support high
++ * performance data encryption and decryption, and the other two
++ * instructions support the AES key expansion procedure.
++ *
++ * The white paper can be downloaded from:
++ * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
++ *
++ * This file is based on engines/e_padlock.c
++ */
++
++/* ====================================================================
++ * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * 2. Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in
++ * the documentation and/or other materials provided with the
++ * distribution.
++ *
++ * 3. All advertising materials mentioning features or use of this
++ * software must display the following acknowledgment:
++ * "This product includes software developed by the OpenSSL Project
++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
++ *
++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
++ * endorse or promote products derived from this software without
++ * prior written permission. For written permission, please contact
++ * licensing@OpenSSL.org.
++ *
++ * 5. Products derived from this software may not be called "OpenSSL"
++ * nor may "OpenSSL" appear in their names without prior written
++ * permission of the OpenSSL Project.
++ *
++ * 6. Redistributions of any form whatsoever must retain the following
++ * acknowledgment:
++ * "This product includes software developed by the OpenSSL Project
++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
++ * OF THE POSSIBILITY OF SUCH DAMAGE.
++ * ====================================================================
++ *
++ * This product includes cryptographic software written by Eric Young
++ * (eay@cryptsoft.com). This product includes software written by Tim
++ * Hudson (tjh@cryptsoft.com).
++ *
++ */
++
++
++#include <openssl/opensslconf.h>
++
++#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AES_NI) && !defined(OPENSSL_NO_AES)
++
++#include <stdio.h>
++#include "cryptlib.h"
++#include <openssl/dso.h>
++#include <openssl/engine.h>
++#include <openssl/evp.h>
++#include <openssl/aes.h>
++#include <openssl/err.h>
++#include <openssl/modes.h>
++
++/* AES-NI is available *ONLY* on some x86 CPUs. Not only that it
++ doesn't exist elsewhere, but it even can't be compiled on other
++ platforms! */
++#undef COMPILE_HW_AESNI
++#if (defined(__x86_64) || defined(__x86_64__) || \
++ defined(_M_AMD64) || defined(_M_X64) || \
++ defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM)
++#define COMPILE_HW_AESNI
++static ENGINE *ENGINE_aesni (void);
++#endif
++
++void ENGINE_load_aesni (void)
++{
++/* On non-x86 CPUs it just returns. */
++#ifdef COMPILE_HW_AESNI
++ ENGINE *toadd = ENGINE_aesni();
++ if (!toadd)
++ return;
++ ENGINE_add (toadd);
++ ENGINE_register_complete (toadd);
++ ENGINE_free (toadd);
++ ERR_clear_error ();
++#endif
++}
++
++#ifdef COMPILE_HW_AESNI
++int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
++ AES_KEY *key);
++int aesni_set_decrypt_key(const unsigned char *userKey, int bits,
++ AES_KEY *key);
++
++void aesni_encrypt(const unsigned char *in, unsigned char *out,
++ const AES_KEY *key);
++void aesni_decrypt(const unsigned char *in, unsigned char *out,
++ const AES_KEY *key);
++
++void aesni_ecb_encrypt(const unsigned char *in,
++ unsigned char *out,
++ size_t length,
++ const AES_KEY *key,
++ int enc);
++void aesni_cbc_encrypt(const unsigned char *in,
++ unsigned char *out,
++ size_t length,
++ const AES_KEY *key,
++ unsigned char *ivec, int enc);
++
++/* Function for ENGINE detection and control */
++static int aesni_init(ENGINE *e);
++
++/* Cipher Stuff */
++static int aesni_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
++ const int **nids, int nid);
++
++#define AESNI_MIN_ALIGN 16
++#define AESNI_ALIGN(x) \
++ ((void *)(((unsigned long)(x)+AESNI_MIN_ALIGN-1)&~(AESNI_MIN_ALIGN-1)))
++
++/* Engine names */
++static const char aesni_id[] = "aesni",
++ aesni_name[] = "Intel AES-NI engine",
++ no_aesni_name[] = "Intel AES-NI engine (no-aesni)";
++
++/* ===== Engine "management" functions ===== */
++
++#if defined(_WIN32)
++typedef unsigned __int64 IA32CAP;
++#else
++typedef unsigned long long IA32CAP;
++#endif
++
++/* Prepare the ENGINE structure for registration */
++static int
++aesni_bind_helper(ENGINE *e)
++{
++ int engage;
++ if (sizeof(OPENSSL_ia32cap_P) > 4) {
++ engage = (OPENSSL_ia32cap_P >> 57) & 1;
++ } else {
++ IA32CAP OPENSSL_ia32_cpuid(void);
++ engage = (OPENSSL_ia32_cpuid() >> 57) & 1;
++ }
++
++ /* Register everything or return with an error */
++ if (!ENGINE_set_id(e, aesni_id) ||
++ !ENGINE_set_name(e, engage ? aesni_name : no_aesni_name) ||
++
++ !ENGINE_set_init_function(e, aesni_init) ||
++ (engage && !ENGINE_set_ciphers (e, aesni_ciphers))
++ )
++ return 0;
++
++ /* Everything looks good */
++ return 1;
++}
++
++/* Constructor */
++static ENGINE *
++ENGINE_aesni(void)
++{
++ ENGINE *eng = ENGINE_new();
++
++ if (!eng) {
++ return NULL;
++ }
++
++ if (!aesni_bind_helper(eng)) {
++ ENGINE_free(eng);
++ return NULL;
++ }
++
++ return eng;
++}
++
++/* Check availability of the engine */
++static int
++aesni_init(ENGINE *e)
++{
++ return 1;
++}
++
++#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
++#define NID_aes_128_cfb NID_aes_128_cfb128
++#endif
++
++#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
++#define NID_aes_128_ofb NID_aes_128_ofb128
++#endif
++
++#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
++#define NID_aes_192_cfb NID_aes_192_cfb128
++#endif
++
++#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
++#define NID_aes_192_ofb NID_aes_192_ofb128
++#endif
++
++#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
++#define NID_aes_256_cfb NID_aes_256_cfb128
++#endif
++
++#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
++#define NID_aes_256_ofb NID_aes_256_ofb128
++#endif
++
++/* List of supported ciphers. */
++static int aesni_cipher_nids[] = {
++ NID_aes_128_ecb,
++ NID_aes_128_cbc,
++ NID_aes_128_cfb,
++ NID_aes_128_ofb,
++
++ NID_aes_192_ecb,
++ NID_aes_192_cbc,
++ NID_aes_192_cfb,
++ NID_aes_192_ofb,
++
++ NID_aes_256_ecb,
++ NID_aes_256_cbc,
++ NID_aes_256_cfb,
++ NID_aes_256_ofb,
++};
++static int aesni_cipher_nids_num =
++ (sizeof(aesni_cipher_nids)/sizeof(aesni_cipher_nids[0]));
++
++typedef struct
++{
++ AES_KEY ks;
++ unsigned int _pad1[3];
++} AESNI_KEY;
++
++static int
++aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *user_key,
++ const unsigned char *iv, int enc)
++{
++ int ret;
++ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
++
++ if ((ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_CFB_MODE
++ || (ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_OFB_MODE
++ || enc)
++ ret=aesni_set_encrypt_key(user_key, ctx->key_len * 8, key);
++ else
++ ret=aesni_set_decrypt_key(user_key, ctx->key_len * 8, key);
++
++ if(ret < 0) {
++ EVPerr(EVP_F_AESNI_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED);
++ return 0;
++ }
++
++ return 1;
++}
++
++static int aesni_cipher_ecb(EVP_CIPHER_CTX *ctx, unsigned char *out,
++ const unsigned char *in, size_t inl)
++{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
++ aesni_ecb_encrypt(in, out, inl, key, ctx->encrypt);
++ return 1;
++}
++static int aesni_cipher_cbc(EVP_CIPHER_CTX *ctx, unsigned char *out,
++ const unsigned char *in, size_t inl)
++{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
++ aesni_cbc_encrypt(in, out, inl, key,
++ ctx->iv, ctx->encrypt);
++ return 1;
++}
++static int aesni_cipher_cfb(EVP_CIPHER_CTX *ctx, unsigned char *out,
++ const unsigned char *in, size_t inl)
++{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
++ CRYPTO_cfb128_encrypt(in, out, inl, key, ctx->iv,
++ &ctx->num, ctx->encrypt,
++ (block128_f)aesni_encrypt);
++ return 1;
++}
++static int aesni_cipher_ofb(EVP_CIPHER_CTX *ctx, unsigned char *out,
++ const unsigned char *in, size_t inl)
++{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data);
++ CRYPTO_ofb128_encrypt(in, out, inl, key, ctx->iv,
++ &ctx->num, (block128_f)aesni_encrypt);
++ return 1;
++}
++
++#define AES_BLOCK_SIZE 16
++
++#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
++#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
++#define EVP_CIPHER_block_size_OFB 1
++#define EVP_CIPHER_block_size_CFB 1
++
++/* Declaring so many ciphers by hand would be a pain.
++ Instead introduce a bit of preprocessor magic :-) */
++#define DECLARE_AES_EVP(ksize,lmode,umode) \
++static const EVP_CIPHER aesni_##ksize##_##lmode = { \
++ NID_aes_##ksize##_##lmode, \
++ EVP_CIPHER_block_size_##umode, \
++ ksize / 8, \
++ AES_BLOCK_SIZE, \
++ 0 | EVP_CIPH_##umode##_MODE, \
++ aesni_init_key, \
++ aesni_cipher_##lmode, \
++ NULL, \
++ sizeof(AESNI_KEY), \
++ EVP_CIPHER_set_asn1_iv, \
++ EVP_CIPHER_get_asn1_iv, \
++ NULL, \
++ NULL \
++}
++
++DECLARE_AES_EVP(128,ecb,ECB);
++DECLARE_AES_EVP(128,cbc,CBC);
++DECLARE_AES_EVP(128,cfb,CFB);
++DECLARE_AES_EVP(128,ofb,OFB);
++
++DECLARE_AES_EVP(192,ecb,ECB);
++DECLARE_AES_EVP(192,cbc,CBC);
++DECLARE_AES_EVP(192,cfb,CFB);
++DECLARE_AES_EVP(192,ofb,OFB);
++
++DECLARE_AES_EVP(256,ecb,ECB);
++DECLARE_AES_EVP(256,cbc,CBC);
++DECLARE_AES_EVP(256,cfb,CFB);
++DECLARE_AES_EVP(256,ofb,OFB);
++
++static int
++aesni_ciphers (ENGINE *e, const EVP_CIPHER **cipher,
++ const int **nids, int nid)
++{
++ /* No specific cipher => return a list of supported nids ... */
++ if (!cipher) {
++ *nids = aesni_cipher_nids;
++ return aesni_cipher_nids_num;
++ }
++
++ /* ... or the requested "cipher" otherwise */
++ switch (nid) {
++ case NID_aes_128_ecb:
++ *cipher = &aesni_128_ecb;
++ break;
++ case NID_aes_128_cbc:
++ *cipher = &aesni_128_cbc;
++ break;
++ case NID_aes_128_cfb:
++ *cipher = &aesni_128_cfb;
++ break;
++ case NID_aes_128_ofb:
++ *cipher = &aesni_128_ofb;
++ break;
++
++ case NID_aes_192_ecb:
++ *cipher = &aesni_192_ecb;
++ break;
++ case NID_aes_192_cbc:
++ *cipher = &aesni_192_cbc;
++ break;
++ case NID_aes_192_cfb:
++ *cipher = &aesni_192_cfb;
++ break;
++ case NID_aes_192_ofb:
++ *cipher = &aesni_192_ofb;
++ break;
++
++ case NID_aes_256_ecb:
++ *cipher = &aesni_256_ecb;
++ break;
++ case NID_aes_256_cbc:
++ *cipher = &aesni_256_cbc;
++ break;
++ case NID_aes_256_cfb:
++ *cipher = &aesni_256_cfb;
++ break;
++ case NID_aes_256_ofb:
++ *cipher = &aesni_256_ofb;
++ break;
++
++ default:
++ /* Sorry, we don't support this NID */
++ *cipher = NULL;
++ return 0;
++ }
++
++ return 1;
++}
++
++#endif /* COMPILE_HW_AESNI */
++#endif /* !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI) && !defined(OPENSSL_NO_AES) */
+diff -up openssl-1.0.0b/crypto/engine/eng_all.c.aesni openssl-1.0.0b/crypto/engine/eng_all.c
+--- openssl-1.0.0b/crypto/engine/eng_all.c.aesni 2010-11-16 17:33:22.000000000 +0100
++++ openssl-1.0.0b/crypto/engine/eng_all.c 2010-11-16 17:33:23.000000000 +0100
+@@ -85,6 +85,9 @@ void ENGINE_load_builtin_engines(void)
+ #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV))
+ ENGINE_load_cryptodev();
+ #endif
++#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI)
++ ENGINE_load_aesni();
++#endif
+ ENGINE_load_dynamic();
+ #ifndef OPENSSL_NO_STATIC_ENGINE
+ #ifndef OPENSSL_NO_HW
+diff -up openssl-1.0.0b/crypto/engine/engine.h.aesni openssl-1.0.0b/crypto/engine/engine.h
+--- openssl-1.0.0b/crypto/engine/engine.h.aesni 2010-11-16 17:33:22.000000000 +0100
++++ openssl-1.0.0b/crypto/engine/engine.h 2010-11-16 17:33:23.000000000 +0100
+@@ -338,6 +338,7 @@ void ENGINE_load_gost(void);
+ #endif
+ #endif
+ void ENGINE_load_cryptodev(void);
++void ENGINE_load_aesni(void);
+ void ENGINE_load_builtin_engines(void);
+
+ /* Get and set global flags (ENGINE_TABLE_FLAG_***) for the implementation
+diff -up openssl-1.0.0b/crypto/engine/Makefile.aesni openssl-1.0.0b/crypto/engine/Makefile
+--- openssl-1.0.0b/crypto/engine/Makefile.aesni 2010-11-15 15:44:49.000000000 +0100
++++ openssl-1.0.0b/crypto/engine/Makefile 2010-11-16 17:33:23.000000000 +0100
+@@ -21,12 +21,14 @@ LIBSRC= eng_err.c eng_lib.c eng_list.c e
+ eng_table.c eng_pkey.c eng_fat.c eng_all.c \
+ tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \
+ tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \
+- eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c
++ eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \
++ eng_aesni.c
+ LIBOBJ= eng_err.o eng_lib.o eng_list.o eng_init.o eng_ctrl.o \
+ eng_table.o eng_pkey.o eng_fat.o eng_all.o \
+ tb_rsa.o tb_dsa.o tb_ecdsa.o tb_dh.o tb_ecdh.o tb_rand.o tb_store.o \
+ tb_cipher.o tb_digest.o tb_pkmeth.o tb_asnmth.o \
+- eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o
++ eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o \
++ eng_aesni.o
+
+ SRC= $(LIBSRC)
+
+diff -up openssl-1.0.0b/crypto/evp/evp_err.c.aesni openssl-1.0.0b/crypto/evp/evp_err.c
+--- openssl-1.0.0b/crypto/evp/evp_err.c.aesni 2010-11-16 17:33:22.000000000 +0100
++++ openssl-1.0.0b/crypto/evp/evp_err.c 2010-11-16 17:33:23.000000000 +0100
+@@ -1,6 +1,6 @@
+ /* crypto/evp/evp_err.c */
+ /* ====================================================================
+- * Copyright (c) 1999-2008 The OpenSSL Project. All rights reserved.
++ * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+@@ -70,6 +70,7 @@
+
+ static ERR_STRING_DATA EVP_str_functs[]=
+ {
++{ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"},
+ {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"},
+ {ERR_FUNC(EVP_F_CAMELLIA_INIT_KEY), "CAMELLIA_INIT_KEY"},
+ {ERR_FUNC(EVP_F_D2I_PKEY), "D2I_PKEY"},
+@@ -86,7 +87,7 @@ static ERR_STRING_DATA EVP_str_functs[]=
+ {ERR_FUNC(EVP_F_EVP_DIGESTINIT_EX), "EVP_DigestInit_ex"},
+ {ERR_FUNC(EVP_F_EVP_ENCRYPTFINAL_EX), "EVP_EncryptFinal_ex"},
+ {ERR_FUNC(EVP_F_EVP_MD_CTX_COPY_EX), "EVP_MD_CTX_copy_ex"},
+-{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_SIZE"},
++{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_size"},
+ {ERR_FUNC(EVP_F_EVP_OPENINIT), "EVP_OpenInit"},
+ {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD), "EVP_PBE_alg_add"},
+ {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD_TYPE), "EVP_PBE_alg_add_type"},
+diff -up openssl-1.0.0b/crypto/evp/evp.h.aesni openssl-1.0.0b/crypto/evp/evp.h
+--- openssl-1.0.0b/crypto/evp/evp.h.aesni 2010-11-16 17:33:22.000000000 +0100
++++ openssl-1.0.0b/crypto/evp/evp.h 2010-11-16 17:33:23.000000000 +0100
+@@ -1167,6 +1167,7 @@ void ERR_load_EVP_strings(void);
+ /* Error codes for the EVP functions. */
+
+ /* Function codes. */
++#define EVP_F_AESNI_INIT_KEY 163
+ #define EVP_F_AES_INIT_KEY 133
+ #define EVP_F_CAMELLIA_INIT_KEY 159
+ #define EVP_F_D2I_PKEY 100
+diff -up openssl-1.0.0b/test/test_aesni.aesni openssl-1.0.0b/test/test_aesni
+--- openssl-1.0.0b/test/test_aesni.aesni 2010-11-16 17:33:23.000000000 +0100
++++ openssl-1.0.0b/test/test_aesni 2010-11-16 17:33:23.000000000 +0100
+@@ -0,0 +1,69 @@
++#!/bin/sh
++
++PROG=$1
++
++if [ -x $PROG ]; then
++ if expr "x`$PROG version`" : "xOpenSSL" > /dev/null; then
++ :
++ else
++ echo "$PROG is not OpenSSL executable"
++ exit 1
++ fi
++else
++ echo "$PROG is not executable"
++ exit 1;
++fi
++
++if $PROG engine aesni | grep -v no-aesni; then
++
++ HASH=`cat $PROG | $PROG dgst -hex`
++
++ AES_ALGS=" aes-128-ecb aes-192-ecb aes-256-ecb \
++ aes-128-cbc aes-192-cbc aes-256-cbc \
++ aes-128-cfb aes-192-cfb aes-256-cfb \
++ aes-128-ofb aes-192-ofb aes-256-ofb"
++ BUFSIZE="16 32 48 64 80 96 128 144 999"
++
++ nerr=0
++
++ for alg in $AES_ALGS; do
++ echo $alg
++ for bufsize in $BUFSIZE; do
++ TEST=`( cat $PROG | \
++ $PROG enc -e -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \
++ $PROG enc -d -k "$HASH" -$alg | \
++ $PROG dgst -hex ) 2>/dev/null`
++ if [ "$TEST" != "$HASH" ]; then
++ echo "-$alg/$bufsize encrypt test failed"
++ nerr=`expr $nerr + 1`
++ fi
++ done
++ for bufsize in $BUFSIZE; do
++ TEST=`( cat $PROG | \
++ $PROG enc -e -k "$HASH" -$alg | \
++ $PROG enc -d -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \
++ $PROG dgst -hex ) 2>/dev/null`
++ if [ "$TEST" != "$HASH" ]; then
++ echo "-$alg/$bufsize decrypt test failed"
++ nerr=`expr $nerr + 1`
++ fi
++ done
++ TEST=`( cat $PROG | \
++ $PROG enc -e -k "$HASH" -$alg -engine aesni | \
++ $PROG enc -d -k "$HASH" -$alg -engine aesni | \
++ $PROG dgst -hex ) 2>/dev/null`
++ if [ "$TEST" != "$HASH" ]; then
++ echo "-$alg en/decrypt test failed"
++ nerr=`expr $nerr + 1`
++ fi
++ done
++
++ if [ $nerr -gt 0 ]; then
++ echo "AESNI engine test failed."
++ exit 1;
++ fi
++else
++ echo "AESNI engine is not available"
++fi
++
++exit 0
diff --git a/openssl-1.0.0b-ipv6-apps.patch b/openssl-1.0.0b-ipv6-apps.patch
new file mode 100644
index 0000000..b85a5d8
--- /dev/null
+++ b/openssl-1.0.0b-ipv6-apps.patch
@@ -0,0 +1,496 @@
+diff -up openssl-1.0.0b/apps/s_apps.h.ipv6-apps openssl-1.0.0b/apps/s_apps.h
+--- openssl-1.0.0b/apps/s_apps.h.ipv6-apps 2010-11-16 17:19:29.000000000 +0100
++++ openssl-1.0.0b/apps/s_apps.h 2010-11-16 17:19:29.000000000 +0100
+@@ -148,7 +148,7 @@ typedef fd_mask fd_set;
+ #define PORT_STR "4433"
+ #define PROTOCOL "tcp"
+
+-int do_server(int port, int type, int *ret, int (*cb) (char *hostname, int s, unsigned char *context), unsigned char *context);
++int do_server(char *port, int type, int *ret, int (*cb) (char *hostname, int s, unsigned char *context), unsigned char *context);
+ #ifdef HEADER_X509_H
+ int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx);
+ #endif
+@@ -156,10 +156,9 @@ int MS_CALLBACK verify_callback(int ok,
+ int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file);
+ int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key);
+ #endif
+-int init_client(int *sock, char *server, int port, int type);
++int init_client(int *sock, char *server, char *port, int type);
+ int should_retry(int i);
+-int extract_port(char *str, short *port_ptr);
+-int extract_host_port(char *str,char **host_ptr,unsigned char *ip,short *p);
++int extract_host_port(char *str,char **host_ptr,char **port_ptr);
+
+ long MS_CALLBACK bio_dump_callback(BIO *bio, int cmd, const char *argp,
+ int argi, long argl, long ret);
+diff -up openssl-1.0.0b/apps/s_client.c.ipv6-apps openssl-1.0.0b/apps/s_client.c
+--- openssl-1.0.0b/apps/s_client.c.ipv6-apps 2010-11-16 17:19:29.000000000 +0100
++++ openssl-1.0.0b/apps/s_client.c 2010-11-16 17:19:29.000000000 +0100
+@@ -389,7 +389,7 @@ int MAIN(int argc, char **argv)
+ int cbuf_len,cbuf_off;
+ int sbuf_len,sbuf_off;
+ fd_set readfds,writefds;
+- short port=PORT;
++ char *port_str = PORT_STR;
+ int full_log=1;
+ char *host=SSL_HOST_NAME;
+ char *cert_file=NULL,*key_file=NULL;
+@@ -488,13 +488,12 @@ int MAIN(int argc, char **argv)
+ else if (strcmp(*argv,"-port") == 0)
+ {
+ if (--argc < 1) goto bad;
+- port=atoi(*(++argv));
+- if (port == 0) goto bad;
++ port_str= *(++argv);
+ }
+ else if (strcmp(*argv,"-connect") == 0)
+ {
+ if (--argc < 1) goto bad;
+- if (!extract_host_port(*(++argv),&host,NULL,&port))
++ if (!extract_host_port(*(++argv),&host,&port_str))
+ goto bad;
+ }
+ else if (strcmp(*argv,"-verify") == 0)
+@@ -967,7 +966,7 @@ bad:
+
+ re_start:
+
+- if (init_client(&s,host,port,socket_type) == 0)
++ if (init_client(&s,host,port_str,socket_type) == 0)
+ {
+ BIO_printf(bio_err,"connect:errno=%d\n",get_last_socket_error());
+ SHUTDOWN(s);
+diff -up openssl-1.0.0b/apps/s_server.c.ipv6-apps openssl-1.0.0b/apps/s_server.c
+--- openssl-1.0.0b/apps/s_server.c.ipv6-apps 2010-11-16 17:19:29.000000000 +0100
++++ openssl-1.0.0b/apps/s_server.c 2010-11-16 17:19:29.000000000 +0100
+@@ -838,7 +838,7 @@ int MAIN(int argc, char *argv[])
+ {
+ X509_VERIFY_PARAM *vpm = NULL;
+ int badarg = 0;
+- short port=PORT;
++ char *port_str = PORT_STR;
+ char *CApath=NULL,*CAfile=NULL;
+ unsigned char *context = NULL;
+ char *dhfile = NULL;
+@@ -909,8 +909,7 @@ int MAIN(int argc, char *argv[])
+ (strcmp(*argv,"-accept") == 0))
+ {
+ if (--argc < 1) goto bad;
+- if (!extract_port(*(++argv),&port))
+- goto bad;
++ port_str= *(++argv);
+ }
+ else if (strcmp(*argv,"-verify") == 0)
+ {
+@@ -1700,9 +1699,9 @@ bad:
+ BIO_printf(bio_s_out,"ACCEPT\n");
+ (void)BIO_flush(bio_s_out);
+ if (www)
+- do_server(port,socket_type,&accept_socket,www_body, context);
++ do_server(port_str,socket_type,&accept_socket,www_body, context);
+ else
+- do_server(port,socket_type,&accept_socket,sv_body, context);
++ do_server(port_str,socket_type,&accept_socket,sv_body, context);
+ print_stats(bio_s_out,ctx);
+ ret=0;
+ end:
+diff -up openssl-1.0.0b/apps/s_socket.c.ipv6-apps openssl-1.0.0b/apps/s_socket.c
+--- openssl-1.0.0b/apps/s_socket.c.ipv6-apps 2010-07-05 13:03:22.000000000 +0200
++++ openssl-1.0.0b/apps/s_socket.c 2010-11-16 17:27:18.000000000 +0100
+@@ -102,9 +102,7 @@ static struct hostent *GetHostByName(cha
+ static void ssl_sock_cleanup(void);
+ #endif
+ static int ssl_sock_init(void);
+-static int init_client_ip(int *sock,unsigned char ip[4], int port, int type);
+-static int init_server(int *sock, int port, int type);
+-static int init_server_long(int *sock, int port,char *ip, int type);
++static int init_server(int *sock, char *port, int type);
+ static int do_accept(int acc_sock, int *sock, char **host);
+ static int host_ip(char *str, unsigned char ip[4]);
+
+@@ -234,58 +232,70 @@ static int ssl_sock_init(void)
+ return(1);
+ }
+
+-int init_client(int *sock, char *host, int port, int type)
++int init_client(int *sock, char *host, char *port, int type)
+ {
+- unsigned char ip[4];
+-
+- if (!host_ip(host,&(ip[0])))
+- {
+- return(0);
+- }
+- return(init_client_ip(sock,ip,port,type));
+- }
+-
+-static int init_client_ip(int *sock, unsigned char ip[4], int port, int type)
+- {
+- unsigned long addr;
+- struct sockaddr_in them;
+- int s,i;
++ struct addrinfo *res, *res0, hints;
++ char * failed_call = NULL;
++ int s;
++ int e;
+
+ if (!ssl_sock_init()) return(0);
+
+- memset((char *)&them,0,sizeof(them));
+- them.sin_family=AF_INET;
+- them.sin_port=htons((unsigned short)port);
+- addr=(unsigned long)
+- ((unsigned long)ip[0]<<24L)|
+- ((unsigned long)ip[1]<<16L)|
+- ((unsigned long)ip[2]<< 8L)|
+- ((unsigned long)ip[3]);
+- them.sin_addr.s_addr=htonl(addr);
+-
+- if (type == SOCK_STREAM)
+- s=socket(AF_INET,SOCK_STREAM,SOCKET_PROTOCOL);
+- else /* ( type == SOCK_DGRAM) */
+- s=socket(AF_INET,SOCK_DGRAM,IPPROTO_UDP);
+-
+- if (s == INVALID_SOCKET) { perror("socket"); return(0); }
++ memset(&hints, '\0', sizeof(hints));
++ hints.ai_socktype = type;
++ hints.ai_flags = AI_ADDRCONFIG;
++
++ e = getaddrinfo(host, port, &hints, &res);
++ if (e)
++ {
++ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e));
++ if (e == EAI_SYSTEM)
++ perror("getaddrinfo");
++ return (0);
++ }
+
++ res0 = res;
++ while (res)
++ {
++ s = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
++ if (s == INVALID_SOCKET)
++ {
++ failed_call = "socket";
++ goto nextres;
++ }
+ #if defined(SO_KEEPALIVE) && !defined(OPENSSL_SYS_MPE)
+ if (type == SOCK_STREAM)
+ {
+- i=0;
+- i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i));
+- if (i < 0) { perror("keepalive"); return(0); }
++ int i=0;
++ i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,
++ (char *)&i,sizeof(i));
++ if (i < 0) {
++ failed_call = "keepalive";
++ goto nextres;
++ }
+ }
+ #endif
+-
+- if (connect(s,(struct sockaddr *)&them,sizeof(them)) == -1)
+- { closesocket(s); perror("connect"); return(0); }
++ if (connect(s,(struct sockaddr *)res->ai_addr,
++ res->ai_addrlen) == 0)
++ {
++ freeaddrinfo(res0);
+ *sock=s;
+ return(1);
+ }
+
+-int do_server(int port, int type, int *ret, int (*cb)(char *hostname, int s, unsigned char *context), unsigned char *context)
++ failed_call = "socket";
++nextres:
++ if (s != INVALID_SOCKET)
++ close(s);
++ res = res->ai_next;
++ }
++ freeaddrinfo(res0);
++
++ perror(failed_call);
++ return(0);
++ }
++
++int do_server(char *port, int type, int *ret, int (*cb)(char *hostname, int s, unsigned char *context), unsigned char *context)
+ {
+ int sock;
+ char *name = NULL;
+@@ -323,33 +333,38 @@ int do_server(int port, int type, int *r
+ }
+ }
+
+-static int init_server_long(int *sock, int port, char *ip, int type)
++static int init_server(int *sock, char *port, int type)
+ {
+- int ret=0;
+- struct sockaddr_in server;
+- int s= -1;
++ struct addrinfo *res, *res0, hints;
++ char * failed_call = NULL;
++ char port_name[8];
++ int s;
++ int e;
+
+ if (!ssl_sock_init()) return(0);
+
+- memset((char *)&server,0,sizeof(server));
+- server.sin_family=AF_INET;
+- server.sin_port=htons((unsigned short)port);
+- if (ip == NULL)
+- server.sin_addr.s_addr=INADDR_ANY;
+- else
+-/* Added for T3E, address-of fails on bit field (beckman@acl.lanl.gov) */
+-#ifndef BIT_FIELD_LIMITS
+- memcpy(&server.sin_addr.s_addr,ip,4);
+-#else
+- memcpy(&server.sin_addr,ip,4);
+-#endif
++ memset(&hints, '\0', sizeof(hints));
++ hints.ai_socktype = type;
++ hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
+
+- if (type == SOCK_STREAM)
+- s=socket(AF_INET,SOCK_STREAM,SOCKET_PROTOCOL);
+- else /* type == SOCK_DGRAM */
+- s=socket(AF_INET, SOCK_DGRAM,IPPROTO_UDP);
++ e = getaddrinfo(NULL, port, &hints, &res);
++ if (e)
++ {
++ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e));
++ if (e == EAI_SYSTEM)
++ perror("getaddrinfo");
++ return (0);
++ }
+
+- if (s == INVALID_SOCKET) goto err;
++ res0 = res;
++ while (res)
++ {
++ s = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
++ if (s == INVALID_SOCKET)
++ {
++ failed_call = "socket";
++ goto nextres;
++ }
+ #if defined SOL_SOCKET && defined SO_REUSEADDR
+ {
+ int j = 1;
+@@ -357,35 +372,39 @@ static int init_server_long(int *sock, i
+ (void *) &j, sizeof j);
+ }
+ #endif
+- if (bind(s,(struct sockaddr *)&server,sizeof(server)) == -1)
++
++ if (bind(s,(struct sockaddr *)res->ai_addr, res->ai_addrlen) == -1)
+ {
+-#ifndef OPENSSL_SYS_WINDOWS
+- perror("bind");
+-#endif
+- goto err;
++ failed_call = "bind";
++ goto nextres;
+ }
+- /* Make it 128 for linux */
+- if (type==SOCK_STREAM && listen(s,128) == -1) goto err;
+- *sock=s;
+- ret=1;
+-err:
+- if ((ret == 0) && (s != -1))
++ if (type==SOCK_STREAM && listen(s,128) == -1)
+ {
+- SHUTDOWN(s);
++ failed_call = "listen";
++ goto nextres;
+ }
+- return(ret);
++
++ *sock=s;
++ return(1);
++
++nextres:
++ if (s != INVALID_SOCKET)
++ close(s);
++ res = res->ai_next;
+ }
++ freeaddrinfo(res0);
+
+-static int init_server(int *sock, int port, int type)
+- {
+- return(init_server_long(sock, port, NULL, type));
++ if (s == INVALID_SOCKET) { perror("socket"); return(0); }
++
++ perror(failed_call);
++ return(0);
+ }
+
+ static int do_accept(int acc_sock, int *sock, char **host)
+ {
++ static struct sockaddr_storage from;
++ char buffer[NI_MAXHOST];
+ int ret;
+- struct hostent *h1,*h2;
+- static struct sockaddr_in from;
+ int len;
+ /* struct linger ling; */
+
+@@ -432,135 +451,58 @@ redoit:
+ */
+
+ if (host == NULL) goto end;
+-#ifndef BIT_FIELD_LIMITS
+- /* I should use WSAAsyncGetHostByName() under windows */
+- h1=gethostbyaddr((char *)&from.sin_addr.s_addr,
+- sizeof(from.sin_addr.s_addr),AF_INET);
+-#else
+- h1=gethostbyaddr((char *)&from.sin_addr,
+- sizeof(struct in_addr),AF_INET);
+-#endif
+- if (h1 == NULL)
++
++ if (getnameinfo((struct sockaddr *)&from, sizeof(from),
++ buffer, sizeof(buffer),
++ NULL, 0, 0))
+ {
+- BIO_printf(bio_err,"bad gethostbyaddr\n");
++ BIO_printf(bio_err,"getnameinfo failed\n");
+ *host=NULL;
+ /* return(0); */
+ }
+ else
+ {
+- if ((*host=(char *)OPENSSL_malloc(strlen(h1->h_name)+1)) == NULL)
++ if ((*host=(char *)OPENSSL_malloc(strlen(buffer)+1)) == NULL)
+ {
+ perror("OPENSSL_malloc");
+ return(0);
+ }
+- BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1);
+-
+- h2=GetHostByName(*host);
+- if (h2 == NULL)
+- {
+- BIO_printf(bio_err,"gethostbyname failure\n");
+- return(0);
+- }
+- if (h2->h_addrtype != AF_INET)
+- {
+- BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n");
+- return(0);
+- }
++ strcpy(*host, buffer);
+ }
+ end:
+ *sock=ret;
+ return(1);
+ }
+
+-int extract_host_port(char *str, char **host_ptr, unsigned char *ip,
+- short *port_ptr)
++int extract_host_port(char *str, char **host_ptr,
++ char **port_ptr)
+ {
+- char *h,*p;
++ char *h,*p,*x;
+
+- h=str;
+- p=strchr(str,':');
++ x=h=str;
++ if (*h == '[')
++ {
++ h++;
++ p=strchr(h,']');
+ if (p == NULL)
+ {
+- BIO_printf(bio_err,"no port defined\n");
++ BIO_printf(bio_err,"no ending bracket for IPv6 address\n");
+ return(0);
+ }
+ *(p++)='\0';
+-
+- if ((ip != NULL) && !host_ip(str,ip))
+- goto err;
+- if (host_ptr != NULL) *host_ptr=h;
+-
+- if (!extract_port(p,port_ptr))
+- goto err;
+- return(1);
+-err:
+- return(0);
++ x = p;
+ }
+-
+-static int host_ip(char *str, unsigned char ip[4])
+- {
+- unsigned int in[4];
+- int i;
+-
+- if (sscanf(str,"%u.%u.%u.%u",&(in[0]),&(in[1]),&(in[2]),&(in[3])) == 4)
+- {
+- for (i=0; i<4; i++)
+- if (in[i] > 255)
+- {
+- BIO_printf(bio_err,"invalid IP address\n");
+- goto err;
+- }
+- ip[0]=in[0];
+- ip[1]=in[1];
+- ip[2]=in[2];
+- ip[3]=in[3];
+- }
+- else
+- { /* do a gethostbyname */
+- struct hostent *he;
+-
+- if (!ssl_sock_init()) return(0);
+-
+- he=GetHostByName(str);
+- if (he == NULL)
+- {
+- BIO_printf(bio_err,"gethostbyname failure\n");
+- goto err;
+- }
+- /* cast to short because of win16 winsock definition */
+- if ((short)he->h_addrtype != AF_INET)
++ p=strchr(x,':');
++ if (p == NULL)
+ {
+- BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n");
+- return(0);
+- }
+- ip[0]=he->h_addr_list[0][0];
+- ip[1]=he->h_addr_list[0][1];
+- ip[2]=he->h_addr_list[0][2];
+- ip[3]=he->h_addr_list[0][3];
+- }
+- return(1);
+-err:
++ BIO_printf(bio_err,"no port defined\n");
+ return(0);
+ }
++ *(p++)='\0';
+
+-int extract_port(char *str, short *port_ptr)
+- {
+- int i;
+- struct servent *s;
++ if (host_ptr != NULL) *host_ptr=h;
++ if (port_ptr != NULL) *port_ptr=p;
+
+- i=atoi(str);
+- if (i != 0)
+- *port_ptr=(unsigned short)i;
+- else
+- {
+- s=getservbyname(str,"tcp");
+- if (s == NULL)
+- {
+- BIO_printf(bio_err,"getservbyname failure for %s\n",str);
+- return(0);
+- }
+- *port_ptr=ntohs((unsigned short)s->s_port);
+- }
+ return(1);
+ }
+
diff --git a/openssl-1.0.0b-version.patch b/openssl-1.0.0b-version.patch
new file mode 100644
index 0000000..bdb6ab6
--- /dev/null
+++ b/openssl-1.0.0b-version.patch
@@ -0,0 +1,22 @@
+diff -up openssl-1.0.0b/crypto/opensslv.h.version openssl-1.0.0b/crypto/opensslv.h
+--- openssl-1.0.0b/crypto/opensslv.h.version 2010-11-16 17:31:23.000000000 +0100
++++ openssl-1.0.0b/crypto/opensslv.h 2010-11-16 17:32:59.000000000 +0100
+@@ -25,7 +25,8 @@
+ * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for
+ * major minor fix final patch/beta)
+ */
+-#define OPENSSL_VERSION_NUMBER 0x1000002f
++/* we have to keep the version number to not break the abi */
++#define OPENSSL_VERSION_NUMBER 0x10000003
+ #ifdef OPENSSL_FIPS
+ #define OPENSSL_VERSION_TEXT "OpenSSL 1.0.0b-fips 16 Nov 2010"
+ #else
+@@ -83,7 +84,7 @@
+ * should only keep the versions that are binary compatible with the current.
+ */
+ #define SHLIB_VERSION_HISTORY ""
+-#define SHLIB_VERSION_NUMBER "1.0.0"
++#define SHLIB_VERSION_NUMBER "1.0.0b"
+
+
+ #endif /* HEADER_OPENSSLV_H */
diff --git a/openssl.spec b/openssl.spec
index 17a8d67..8e4a9de 100644
--- a/openssl.spec
+++ b/openssl.spec
@@ -20,8 +20,8 @@
Summary: A general purpose cryptography library with TLS implementation
Name: openssl
-Version: 1.0.0a
-Release: 3%{?dist}
+Version: 1.0.0b
+Release: 1%{?dist}
# We remove certain patented algorithms from the openssl source tarball
# with the hobble-openssl script which is included below.
Source: openssl-%{version}-usa.tar.bz2
@@ -50,7 +50,7 @@ Patch33: openssl-1.0.0-beta4-ca-dir.patch
Patch34: openssl-0.9.6-x509.patch
Patch35: openssl-0.9.8j-version-add-engines.patch
Patch38: openssl-1.0.0-beta5-cipher-change.patch
-Patch39: openssl-1.0.0-beta5-ipv6-apps.patch
+Patch39: openssl-1.0.0b-ipv6-apps.patch
Patch40: openssl-1.0.0a-fips.patch
Patch41: openssl-1.0.0-beta3-fipscheck.patch
Patch43: openssl-1.0.0a-fipsmode.patch
@@ -59,8 +59,8 @@ Patch45: openssl-0.9.8j-env-nozlib.patch
Patch47: openssl-1.0.0-beta5-readme-warning.patch
Patch49: openssl-1.0.0-beta4-algo-doc.patch
Patch50: openssl-1.0.0-beta4-dtls1-abi.patch
-Patch51: openssl-1.0.0a-version.patch
-Patch52: openssl-1.0.0-beta4-aesni.patch
+Patch51: openssl-1.0.0b-version.patch
+Patch52: openssl-1.0.0b-aesni.patch
Patch53: openssl-1.0.0-name-hash.patch
# Backported fixes including security fixes
@@ -393,6 +393,9 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/fipscanister.*
%postun -p /sbin/ldconfig
%changelog
+* Tue Nov 16 2010 Tomas Mraz <tmraz@redhat.com> 1.0.0b-1
+- new upstream version fixing CVE-2010-3864 (#649304)
+
* Tue Sep 7 2010 Tomas Mraz <tmraz@redhat.com> 1.0.0a-3
- make SHLIB_VERSION reflect the library suffix
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-06-09 12:42 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-06-09 12:42 [rpms/openssl] rebase_40beta: - new upstream version fixing CVE-2010-3864 (#649304) Tomas Mraz
-- strict thread matches above, loose matches on Subject: below --
2026-06-09 12:42 Tomas Mraz
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox