diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2023-04-23 13:42:19 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2023-04-23 14:02:20 +0200 |
commit | 382e1634972f7aab883259dd22cf721d1c205055 (patch) | |
tree | c1d8a19cfa055df68f30fe3faa7018d48e1a43a8 | |
parent | 7ddd233c807f71fc5a303e89ce166bc1cbccf89e (diff) | |
download | busybox-w32-382e1634972f7aab883259dd22cf721d1c205055.tar.gz busybox-w32-382e1634972f7aab883259dd22cf721d1c205055.tar.bz2 busybox-w32-382e1634972f7aab883259dd22cf721d1c205055.zip |
factor: we can pack 21, not 20, 3-bit elements into packed wheel words
function old new delta
packed_wheel 192 184 -8
factor_main 171 163 -8
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-16) Total: -16 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | coreutils/factor.c | 71 |
1 files changed, 35 insertions, 36 deletions
diff --git a/coreutils/factor.c b/coreutils/factor.c index de8ea4e11..46dd2a0d7 100644 --- a/coreutils/factor.c +++ b/coreutils/factor.c | |||
@@ -48,38 +48,40 @@ typedef unsigned long half_t; | |||
48 | * Larger wheels improve sieving only slightly, but quickly grow in size | 48 | * Larger wheels improve sieving only slightly, but quickly grow in size |
49 | * (adding just one prime, 13, results in 5766 element sieve). | 49 | * (adding just one prime, 13, results in 5766 element sieve). |
50 | */ | 50 | */ |
51 | #define R(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J) \ | 51 | #define R(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J,x) \ |
52 | (((uint64_t)(a<<0) | (b<<3) | (c<<6) | (d<<9) | (e<<12) | (f<<15) | (g<<18) | (h<<21) | (i<<24) | (j<<27)) << 1) | \ | 52 | (((uint64_t)(a<<0) | (b<<3) | (c<<6) | (d<<9) | (e<<12) | (f<<15) | (g<<18) | (h<<21) | (i<<24) | (j<<27)) << 1) | \ |
53 | (((uint64_t)(A<<0) | (B<<3) | (C<<6) | (D<<9) | (E<<12) | (F<<15) | (G<<18) | (H<<21) | (I<<24) | (J<<27)) << 31) | 53 | (((uint64_t)(A<<0) | (B<<3) | (C<<6) | (D<<9) | (E<<12) | (F<<15) | (G<<18) | (H<<21) | (I<<24) | (J<<27)) << 31) | \ |
54 | #define P(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J) \ | 54 | ((uint64_t)x << 61) |
55 | #define P(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J,x) \ | ||
55 | R( (a/2),(b/2),(c/2),(d/2),(e/2),(f/2),(g/2),(h/2),(i/2),(j/2), \ | 56 | R( (a/2),(b/2),(c/2),(d/2),(e/2),(f/2),(g/2),(h/2),(i/2),(j/2), \ |
56 | (A/2),(B/2),(C/2),(D/2),(E/2),(F/2),(G/2),(H/2),(I/2),(J/2) ) | 57 | (A/2),(B/2),(C/2),(D/2),(E/2),(F/2),(G/2),(H/2),(I/2),(J/2), \ |
58 | (x/2) \ | ||
59 | ) | ||
57 | static const uint64_t packed_wheel[] = { | 60 | static const uint64_t packed_wheel[] = { |
58 | /*1, 2, 2, 4, 2,*/ | 61 | /* 1, 2, */ |
59 | P( 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6, 4, 6, 8, 4), //01 | 62 | P( 2, 4, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6, 4, 6), |
60 | P( 2, 4, 2, 4,14, 4, 6, 2,10, 2, 6, 6, 4, 2, 4, 6, 2,10, 2, 4), //02 | 63 | P( 8, 4, 2, 4, 2, 4,14, 4, 6, 2,10, 2, 6, 6, 4, 2, 4, 6, 2,10, 2), |
61 | P( 2,12,10, 2, 4, 2, 4, 6, 2, 6, 4, 6, 6, 6, 2, 6, 4, 2, 6, 4), //03 | 64 | P( 4, 2,12,10, 2, 4, 2, 4, 6, 2, 6, 4, 6, 6, 6, 2, 6, 4, 2, 6, 4), |
62 | P( 6, 8, 4, 2, 4, 6, 8, 6,10, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2), //04 | 65 | P( 6, 8, 4, 2, 4, 6, 8, 6,10, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6), |
63 | P( 6, 4, 2, 6,10, 2,10, 2, 4, 2, 4, 6, 8, 4, 2, 4,12, 2, 6, 4), //05 | 66 | P( 4, 2, 6,10, 2,10, 2, 4, 2, 4, 6, 8, 4, 2, 4,12, 2, 6, 4, 2, 6), |
64 | P( 2, 6, 4, 6,12, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6,10, 2), //06 | 67 | P( 4, 6,12, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6,10, 2, 4, 6, 2), |
65 | P( 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 6, 6, 2, 6, 6, 4, 6), //07 | 68 | P( 6, 4, 2, 4, 2,10, 2,10, 2, 4, 6, 6, 2, 6, 6, 4, 6, 6, 2, 6, 4), |
66 | P( 6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 6, 4, 8, 6, 4, 6, 2, 4, 6), //08 | 69 | P( 2, 6, 4, 6, 8, 4, 2, 6, 4, 8, 6, 4, 6, 2, 4, 6, 8, 6, 4, 2,10), |
67 | P( 8, 6, 4, 2,10, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 2, 4, 8, 6), //09 | 70 | P( 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 2, 4, 8, 6, 4, 2, 4, 6, 6, 2), |
68 | P( 4, 2, 4, 6, 6, 2, 6, 4, 8, 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4), //10 | 71 | P( 6, 4, 8, 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 6, 6, 2, 6, 6, 4), |
69 | P( 6, 6, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2), //11 | 72 | P( 2, 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 6, 4, 6, 2, 6, 4, 2, 4), |
70 | P( 6, 4, 6, 2, 6, 4, 2, 4, 6, 6, 8, 4, 2, 6,10, 8, 4, 2, 4, 2), //12 | 73 | P( 6, 6, 8, 4, 2, 6,10, 8, 4, 2, 4, 2, 4, 8,10, 6, 2, 4, 8, 6, 6), |
71 | P( 4, 8,10, 6, 2, 4, 8, 6, 6, 4, 2, 4, 6, 2, 6, 4, 6, 2,10, 2), //13 | 74 | P( 4, 2, 4, 6, 2, 6, 4, 6, 2,10, 2,10, 2, 4, 2, 4, 6, 2, 6, 4, 2), |
72 | P(10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 6, 6, 4, 6, 8), //14 | 75 | P( 4, 6, 6, 2, 6, 6, 6, 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 8, 4, 6), |
73 | P( 4, 2, 4, 2, 4, 8, 6, 4, 8, 4, 6, 2, 6, 6, 4, 2, 4, 6, 8, 4), //15 | 76 | P( 2, 6, 6, 4, 2, 4, 6, 8, 4, 2, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2), |
74 | P( 2, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2,10, 2, 4, 6, 8, 6, 4, 2), //16 | 77 | P(10, 2, 4, 6, 8, 6, 4, 2, 6, 4, 6, 8, 4, 6, 2, 4, 8, 6, 4, 6, 2), |
75 | P( 6, 4, 6, 8, 4, 6, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6, 6, 4, 6), //17 | 78 | P( 4, 6, 2, 6, 6, 4, 6, 6, 2, 6, 6, 4, 2,10, 2,10, 2, 4, 2, 4, 6), |
76 | P( 6, 2, 6, 6, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2, 6, 4, 2,10, 6), //18 | 79 | P( 2, 6, 4, 2,10, 6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4, 2,12, 6, 4), |
77 | P( 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4, 2,12, 6, 4, 6, 2, 4, 6, 2), //19 | 80 | P( 6, 2, 4, 6, 2,12, 4, 2, 4, 8, 6, 4, 2, 4, 2,10, 2,10, 6, 2, 4), |
78 | P(12, 4, 2, 4, 8, 6, 4, 2, 4, 2,10, 2,10, 6, 2, 4, 6, 2, 6, 4), //20 | 81 | P( 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2,10, 6, 8, 6, 4, 2, 4, 8, 6), |
79 | P( 2, 4, 6, 6, 2, 6, 4, 2,10, 6, 8, 6, 4, 2, 4, 8, 6, 4, 6, 2), //21 | 82 | P( 4, 6, 2, 4, 6, 2, 6, 6, 6, 4, 6, 2, 6, 4, 2, 4, 2,10,12, 2, 4), |
80 | P( 4, 6, 2, 6, 6, 6, 4, 6, 2, 6, 4, 2, 4, 2,10,12, 2, 4, 2,10), //22 | 83 | P( 2,10, 2, 6, 4, 2, 4, 6, 6, 2,10, 2, 6, 4,14, 4, 2, 4, 2, 4, 8), |
81 | P( 2, 6, 4, 2, 4, 6, 6, 2,10, 2, 6, 4,14, 4, 2, 4, 2, 4, 8, 6), //23 | 84 | P( 6, 4, 6, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4,12, 2,12), |
82 | P( 4, 6, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4,12, 2,12), //24 | ||
83 | }; | 85 | }; |
84 | #undef P | 86 | #undef P |
85 | #undef R | 87 | #undef R |
@@ -93,8 +95,8 @@ static const uint64_t packed_wheel[] = { | |||
93 | * function old new delta | 95 | * function old new delta |
94 | * wheel_tab - 485 +485 | 96 | * wheel_tab - 485 +485 |
95 | * 3-bit-packed insanity: | 97 | * 3-bit-packed insanity: |
96 | * packed_wheel - 192 +192 | 98 | * packed_wheel - 184 +184 |
97 | * factor_main 108 171 +63 | 99 | * factor_main 108 163 +55 |
98 | */ | 100 | */ |
99 | static void unpack_wheel(void) | 101 | static void unpack_wheel(void) |
100 | { | 102 | { |
@@ -104,10 +106,7 @@ static void unpack_wheel(void) | |||
104 | setup_common_bufsiz(); | 106 | setup_common_bufsiz(); |
105 | wheel_tab[0] = 1; | 107 | wheel_tab[0] = 1; |
106 | wheel_tab[1] = 2; | 108 | wheel_tab[1] = 2; |
107 | wheel_tab[2] = 2; | 109 | p = &wheel_tab[2]; |
108 | wheel_tab[3] = 4; | ||
109 | wheel_tab[4] = 2; | ||
110 | p = &wheel_tab[5]; | ||
111 | for (i = 0; i < ARRAY_SIZE(packed_wheel); i++) { | 110 | for (i = 0; i < ARRAY_SIZE(packed_wheel); i++) { |
112 | uint64_t v = packed_wheel[i]; | 111 | uint64_t v = packed_wheel[i]; |
113 | while ((v & 0xe) != 0) { | 112 | while ((v & 0xe) != 0) { |