diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-04-20 19:01:43 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-04-20 19:01:43 +0200 |
commit | 21367b283909cb71ef74cb0e89f77334e9eb6a61 (patch) | |
tree | f7880497fcebaa1f003811dfe8320e7110bee362 | |
parent | f277c9eebb91a46cbd795c34aa64ee8b6a2e448c (diff) | |
download | busybox-w32-21367b283909cb71ef74cb0e89f77334e9eb6a61.tar.gz busybox-w32-21367b283909cb71ef74cb0e89f77334e9eb6a61.tar.bz2 busybox-w32-21367b283909cb71ef74cb0e89f77334e9eb6a61.zip |
tls: reduce register pressure in i386 assembly (helps Android to compile)
function old new delta
pstm_montgomery_reduce 440 435 -5
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | networking/tls_pstm_montgomery_reduce.c | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/networking/tls_pstm_montgomery_reduce.c b/networking/tls_pstm_montgomery_reduce.c index 20f9c26d5..e8857f394 100644 --- a/networking/tls_pstm_montgomery_reduce.c +++ b/networking/tls_pstm_montgomery_reduce.c | |||
@@ -62,6 +62,7 @@ | |||
62 | #define LOOP_START \ | 62 | #define LOOP_START \ |
63 | mu = c[x] * mp | 63 | mu = c[x] * mp |
64 | 64 | ||
65 | #if 0 | ||
65 | #define INNERMUL \ | 66 | #define INNERMUL \ |
66 | asm( \ | 67 | asm( \ |
67 | "movl %5,%%eax \n\t" \ | 68 | "movl %5,%%eax \n\t" \ |
@@ -74,6 +75,29 @@ asm( \ | |||
74 | :"=g"(_c[LO]), "=r"(cy) \ | 75 | :"=g"(_c[LO]), "=r"(cy) \ |
75 | :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \ | 76 | :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \ |
76 | : "%eax", "%edx", "cc") | 77 | : "%eax", "%edx", "cc") |
78 | /* | ||
79 | * The above generated "error: 'asm' operand has impossible constraints" on Android. | ||
80 | * Do they reserve in their ABI a register for something, and there aren't enough left? | ||
81 | */ | ||
82 | #else | ||
83 | /* Let's avoid two explicit "movl" by telling compiler to put input value of *tmpm++ | ||
84 | * into EAX, and to expect cy result in EDX: | ||
85 | */ | ||
86 | #define INNERMUL \ | ||
87 | asm( \ | ||
88 | "mull %4 \n\t" \ | ||
89 | "addl %3,%%eax \n\t" \ | ||
90 | "adcl $0,%%edx \n\t" \ | ||
91 | "addl %%eax,%0 \n\t" \ | ||
92 | "adcl $0,%%edx \n\t" \ | ||
93 | :"=g"(_c[LO]), "=&d"(cy) \ | ||
94 | :"0"(_c[LO]), "g"(cy), "g"(mu), "a"(*tmpm++) \ | ||
95 | :"cc") | ||
96 | /* This doesn't tell compiler that we clobber EAX, but it probably won't need | ||
97 | * the value of *tmpm anyway, thus won't try to reuse EAX contents. | ||
98 | * TODO: fix it with dummy "=a"(clobbered_eax) output? | ||
99 | */ | ||
100 | #endif | ||
77 | 101 | ||
78 | #define PROPCARRY \ | 102 | #define PROPCARRY \ |
79 | asm( \ | 103 | asm( \ |