ntpd: try to avoid using libm. -1.2k if we succeed

uclibc's sqrt(x) is pathetic, 411 bytes? it can be ~100... Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
author: Denys Vlasenko <vda.linux@googlemail.com> 2010-01-03 21:06:27 +0100
committer: Denys Vlasenko <vda.linux@googlemail.com> 2010-01-03 21:06:27 +0100
commit: d498ff0ac474c00dd4e08939d2b3cc3da6f2cb78 (patch)
tree: e12f6abfd34d27b392e887b7c68e9c54c30094eb
parent: 510f56aa6fe62465989507b163ab737c2cf882aa (diff)
download: busybox-w32-d498ff0ac474c00dd4e08939d2b3cc3da6f2cb78.tar.gz
busybox-w32-d498ff0ac474c00dd4e08939d2b3cc3da6f2cb78.tar.bz2
busybox-w32-d498ff0ac474c00dd4e08939d2b3cc3da6f2cb78.zip
1 files changed, 39 insertions, 1 deletions
diff --git a/networking/ntpd.c b/networking/ntpd.c
index 08e51ef3f..f147d8c6a 100644
--- a/networking/ntpd.c
+++ b/networking/ntpd.c
@@ -299,7 +299,45 @@ static ALWAYS_INLINE double MIND(double a, double b)
                return a;
        return b;
 }
-#define SQRT(x) (sqrt(x))
+static NOINLINE double my_SQRT(double X)
+{
+        union {
+                float   f;
+                int32_t i;
+        } v;
+        double invsqrt;
+        double Xhalf = X * 0.5;
+        /* Fast and good approximation to 1/sqrt(X), black magic */
+        v.f = X;
+        /*v.i = 0x5f3759df - (v.i >> 1);*/
+        v.i = 0x5f375a86 - (v.i >> 1); /* - this constant is slightly better */
+        invsqrt = v.f; /* better than 0.2% accuracy */
+        /* Refining it using Newton's method: x1 = x0 - f(x0)/f'(x0)
+         * f(x) = 1/(x*x) - X  (f==0 when x = 1/sqrt(X))
+         * f'(x) = -2/(x*x*x)
+         * f(x)/f'(x) = (X - 1/(x*x)) / (2/(x*x*x)) = X*x*x*x/2 - x/2
+         * x1 = x0 - (X*x0*x0*x0/2 - x0/2) = 1.5*x0 - X*x0*x0*x0/2 = x0*(1.5 - (X/2)*x0*x0)
+         */
+        invsqrt = invsqrt * (1.5 - Xhalf * invsqrt * invsqrt); /* ~0.05% accuracy */
+        /* invsqrt = invsqrt * (1.5 - Xhalf * invsqrt * invsqrt); 2nd iter: ~0.0001% accuracy */
+        /* With 4 iterations, more than half results will be exact,
+         * at 6th iterations result stabilizes with about 72% results exact.
+         * We are well satisfied with 0.05% accuracy.
+         */
+        return X * invsqrt; /* X * 1/sqrt(X) ~= sqrt(X) */
+}
+static ALWAYS_INLINE double SQRT(double X)
+{
+        /* If this arch doesn't use IEEE 754 floats, fall back to using libm */
+        if (sizeof(float) != 4)
+                return sqrt(X);
+        /* This avoids needing libm, saves about 1.2k on x86-32 */
+        return my_SQRT(X);
+}
 static double
 gettime1900d(void)
author	Denys Vlasenko <vda.linux@googlemail.com>	2010-01-03 21:06:27 +0100
committer	Denys Vlasenko <vda.linux@googlemail.com>	2010-01-03 21:06:27 +0100
commit	d498ff0ac474c00dd4e08939d2b3cc3da6f2cb78 (patch)
tree	e12f6abfd34d27b392e887b7c68e9c54c30094eb
parent	510f56aa6fe62465989507b163ab737c2cf882aa (diff)
download	busybox-w32-d498ff0ac474c00dd4e08939d2b3cc3da6f2cb78.tar.gz busybox-w32-d498ff0ac474c00dd4e08939d2b3cc3da6f2cb78.tar.bz2 busybox-w32-d498ff0ac474c00dd4e08939d2b3cc3da6f2cb78.zip

diff --git a/networking/ntpd.c b/networking/ntpd.c index 08e51ef3f..f147d8c6a 100644 --- a/networking/ntpd.c +++ b/networking/ntpd.c
@@ -299,7 +299,45 @@ static ALWAYS_INLINE double MIND(double a, double b)
299	return a;	299	return a;
300	return b;	300	return b;
301	}	301	}
302	#define SQRT(x) (sqrt(x))	302	static NOINLINE double my_SQRT(double X)
		303	{
		304	union {
		305	float f;
		306	int32_t i;
		307	} v;
		308	double invsqrt;
		309	double Xhalf = X * 0.5;
		310
		311	/* Fast and good approximation to 1/sqrt(X), black magic */
		312	v.f = X;
		313	/v.i = 0x5f3759df - (v.i >> 1);/
		314	v.i = 0x5f375a86 - (v.i >> 1); /* - this constant is slightly better */
		315	invsqrt = v.f; /* better than 0.2% accuracy */
		316
		317	/* Refining it using Newton's method: x1 = x0 - f(x0)/f'(x0)
		318	* f(x) = 1/(x*x) - X (f==0 when x = 1/sqrt(X))
		319	* f'(x) = -2/(xxx)
		320	* f(x)/f'(x) = (X - 1/(xx)) / (2/(xxx)) = Xxxx/2 - x/2
		321	* x1 = x0 - (Xx0x0x0/2 - x0/2) = 1.5x0 - Xx0x0x0/2 = x0(1.5 - (X/2)x0x0)
		322	*/
		323	invsqrt = invsqrt * (1.5 - Xhalf * invsqrt * invsqrt); /* ~0.05% accuracy */
		324	/* invsqrt = invsqrt * (1.5 - Xhalf * invsqrt * invsqrt); 2nd iter: ~0.0001% accuracy */
		325	/* With 4 iterations, more than half results will be exact,
		326	* at 6th iterations result stabilizes with about 72% results exact.
		327	* We are well satisfied with 0.05% accuracy.
		328	*/
		329
		330	return X * invsqrt; /* X * 1/sqrt(X) ~= sqrt(X) */
		331	}
		332	static ALWAYS_INLINE double SQRT(double X)
		333	{
		334	/* If this arch doesn't use IEEE 754 floats, fall back to using libm */
		335	if (sizeof(float) != 4)
		336	return sqrt(X);
		337
		338	/* This avoids needing libm, saves about 1.2k on x86-32 */
		339	return my_SQRT(X);
		340	}
303		341
304	static double	342	static double
305	gettime1900d(void)	343	gettime1900d(void)