aboutsummaryrefslogtreecommitdiff
path: root/networking/tls_symmetric.h
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2017-01-19 16:45:41 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2017-01-19 16:45:41 +0100
commitf7806f9d8fc889f1d6cd365b69d9d99a4a5a6e26 (patch)
treef7dca43d7506da675080d3a3b26dcde3835ecdeb /networking/tls_symmetric.h
parent432f1ae2ff184e07fa78bd3797073094069e521d (diff)
downloadbusybox-w32-f7806f9d8fc889f1d6cd365b69d9d99a4a5a6e26.tar.gz
busybox-w32-f7806f9d8fc889f1d6cd365b69d9d99a4a5a6e26.tar.bz2
busybox-w32-f7806f9d8fc889f1d6cd365b69d9d99a4a5a6e26.zip
tls: fix ROL/ROR x86 optimization
ALWAYS_INLINE: function old new delta psAesInitKey 825 824 -1 ROR 5 - -5 setup_mix2 148 134 -14 psAesDecryptBlock 1184 1139 -45 psAesEncryptBlock 1193 1102 -91 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-156) Total: -156 bytes ALWAYS_INLINE + __builtin_constant_p(shift_cnt): function old new delta ROR 5 - -5 psAesInitKey 825 818 -7 setup_mix2 148 123 -25 psAesDecryptBlock 1184 1078 -106 psAesEncryptBlock 1193 1017 -176 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-319) Total: -319 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking/tls_symmetric.h')
-rw-r--r--networking/tls_symmetric.h19
1 files changed, 14 insertions, 5 deletions
diff --git a/networking/tls_symmetric.h b/networking/tls_symmetric.h
index b6b55c78c..8488b437e 100644
--- a/networking/tls_symmetric.h
+++ b/networking/tls_symmetric.h
@@ -7,9 +7,6 @@
7 7
8/* The part below is a section of matrixssl-3-7-2b-open/crypto/cryptolib.h 8/* The part below is a section of matrixssl-3-7-2b-open/crypto/cryptolib.h
9 * Changes are flagged with //bbox 9 * Changes are flagged with //bbox
10 * TODO:
11 * Take a look at "roll %%cl" part... rotates by constant use fewer registers,
12 * and on many Intel CPUs rotates by %cl are slower: they take 2 cycles, not 1.
13 */ 10 */
14 11
15/******************************************************************************/ 12/******************************************************************************/
@@ -28,16 +25,28 @@
28#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && \ 25#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && \
29 !defined(INTEL_CC) && !defined(PS_NO_ASM) 26 !defined(INTEL_CC) && !defined(PS_NO_ASM)
30 27
31static inline unsigned ROL(unsigned word, int i) 28static ALWAYS_INLINE unsigned ROL(unsigned word, int i)
32{ 29{
30 if (__builtin_constant_p(i)) //box
31 // Rotates by constant use fewer registers,
32 // and on many Intel CPUs rotates by %cl take 2 cycles, not 1.
33 asm ("roll %2,%0" //box
34 :"=r" (word)
35 :"0" (word),"i" (i));
36 else //box
33 asm ("roll %%cl,%0" 37 asm ("roll %%cl,%0"
34 :"=r" (word) 38 :"=r" (word)
35 :"0" (word),"c" (i)); 39 :"0" (word),"c" (i));
36 return word; 40 return word;
37} 41}
38 42
39static inline unsigned ROR(unsigned word, int i) 43static ALWAYS_INLINE unsigned ROR(unsigned word, int i)
40{ 44{
45 if (__builtin_constant_p(i)) //box
46 asm ("rorl %2,%0" //box
47 :"=r" (word)
48 :"0" (word),"i" (i));
49 else //box
41 asm ("rorl %%cl,%0" 50 asm ("rorl %%cl,%0"
42 :"=r" (word) 51 :"=r" (word)
43 :"0" (word),"c" (i)); 52 :"0" (word),"c" (i));