openssl: use asm SHA1, SHA256, AES on MIPS.
authorFelix Fietkau <nbd@openwrt.org>
Mon, 24 Sep 2012 15:23:57 +0000 (15:23 +0000)
committerFelix Fietkau <nbd@openwrt.org>
Mon, 24 Sep 2012 15:23:57 +0000 (15:23 +0000)
On a lightly loaded 24Kc, as measured by 'openssl speed sha1', shows between
27% and 120% speedup depending on block size.
SHA1 is notably used in Transmission for piece verification.
Patch by Catalin Patulea

SVN-Revision: 33531

package/openssl/Makefile
package/openssl/patches/110-optimize-for-size.patch
package/openssl/patches/210-fix_aes_mips.patch [new file with mode: 0644]

index d95783a..cfc3b3d 100644 (file)
@@ -88,8 +88,13 @@ endif
 ifeq ($(CONFIG_x86_64),y)
   OPENSSL_TARGET:=linux-x86_64
 else
-  OPENSSL_TARGET:=linux-openwrt
-  OPENSSL_OPTIONS+=no-perlasm no-sse2
+  OPENSSL_OPTIONS+=no-sse2
+  ifeq ($(CONFIG_mips)$(CONFIG_mipsel),y)
+    OPENSSL_TARGET:=linux-mips-openwrt
+  else
+    OPENSSL_TARGET:=linux-generic-openwrt
+    OPENSSL_OPTIONS+=no-perlasm
+  endif
 endif
 
 define Build/Configure
index c0160a3..f301017 100644 (file)
@@ -1,11 +1,13 @@
 --- a/Configure
 +++ b/Configure
-@@ -400,6 +400,8 @@ my %table=(
- "linux-alpha+bwx-gcc","gcc:-O3 -DL_ENDIAN -DTERMIO::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+@@ -401,6 +401,10 @@ my %table=(
  "linux-alpha-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
  "linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
-+# OpenWrt targets
-+"linux-openwrt","gcc:-DTERMIO \$(OPENWRT_OPTIMIZATION_FLAGS) -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
  
++# OpenWrt targets
++"linux-mips-openwrt","gcc:-DTERMIO \$(OPENWRT_OPTIMIZATION_FLAGS) -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:o32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++"linux-generic-openwrt","gcc:-DTERMIO \$(OPENWRT_OPTIMIZATION_FLAGS) -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++
  # Android: linux-* but without -DTERMIO and pointers to headers and libs.
  "android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "android-x86","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:".eval{my $asm=${x86_elf_asm};$asm=~s/:elf/:android/;$asm}.":dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
diff --git a/package/openssl/patches/210-fix_aes_mips.patch b/package/openssl/patches/210-fix_aes_mips.patch
new file mode 100644 (file)
index 0000000..bbfc63a
--- /dev/null
@@ -0,0 +1,64 @@
+--- a/crypto/aes/asm/aes-mips.pl
++++ b/crypto/aes/asm/aes-mips.pl
+@@ -1036,9 +1036,9 @@ _mips_AES_set_encrypt_key:
+       nop
+ .end  _mips_AES_set_encrypt_key
+-.globl        AES_set_encrypt_key
+-.ent  AES_set_encrypt_key
+-AES_set_encrypt_key:
++.globl        private_AES_set_encrypt_key
++.ent  private_AES_set_encrypt_key
++private_AES_set_encrypt_key:
+       .frame  $sp,$FRAMESIZE,$ra
+       .mask   $SAVED_REGS_MASK,-$SZREG
+       .set    noreorder
+@@ -1060,7 +1060,7 @@ $code.=<<___ if ($flavour =~ /nubi/i);   #
+ ___
+ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
+       .cplocal        $Tbl
+-      .cpsetup        $pf,$zero,AES_set_encrypt_key
++      .cpsetup        $pf,$zero,private_AES_set_encrypt_key
+ ___
+ $code.=<<___;
+       .set    reorder
+@@ -1083,7 +1083,7 @@ ___
+ $code.=<<___;
+       jr      $ra
+       $PTR_ADD $sp,$FRAMESIZE
+-.end  AES_set_encrypt_key
++.end  private_AES_set_encrypt_key
+ ___
\f
+ my ($head,$tail)=($inp,$bits);
+@@ -1091,9 +1091,9 @@ my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$
+ my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
+ $code.=<<___;
+ .align        5
+-.globl        AES_set_decrypt_key
+-.ent  AES_set_decrypt_key
+-AES_set_decrypt_key:
++.globl        private_AES_set_decrypt_key
++.ent  private_AES_set_decrypt_key
++private_AES_set_decrypt_key:
+       .frame  $sp,$FRAMESIZE,$ra
+       .mask   $SAVED_REGS_MASK,-$SZREG
+       .set    noreorder
+@@ -1115,7 +1115,7 @@ $code.=<<___ if ($flavour =~ /nubi/i);   #
+ ___
+ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
+       .cplocal        $Tbl
+-      .cpsetup        $pf,$zero,AES_set_decrypt_key
++      .cpsetup        $pf,$zero,private_AES_set_decrypt_key
+ ___
+ $code.=<<___;
+       .set    reorder
+@@ -1226,7 +1226,7 @@ ___
+ $code.=<<___;
+       jr      $ra
+       $PTR_ADD $sp,$FRAMESIZE
+-.end  AES_set_decrypt_key
++.end  private_AES_set_decrypt_key
+ ___
+ }}}