aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Novoa III <mjn3@codepoet.org>2001-09-11 01:11:31 +0000
committerManuel Novoa III <mjn3@codepoet.org>2001-09-11 01:11:31 +0000
commit6a9d1f652b20f53dfd64e260d2796d55ad973c98 (patch)
treea01f496c00bceef7c8fec6e2c3e80c473be020c5
parentf6ecaccf929c470b2eb4803af35df4ffaee497b1 (diff)
downloadbusybox-w32-6a9d1f652b20f53dfd64e260d2796d55ad973c98.tar.gz
busybox-w32-6a9d1f652b20f53dfd64e260d2796d55ad973c98.tar.bz2
busybox-w32-6a9d1f652b20f53dfd64e260d2796d55ad973c98.zip
Commit my changes to arith.c which fixed a couple of bugs and decreased
code size. Please read the TODO comments regarding accessing shell variables from the arith() funciton.
-rw-r--r--libbb/arith.c396
1 files changed, 253 insertions, 143 deletions
diff --git a/libbb/arith.c b/libbb/arith.c
index 04c45ec3d..362f7bb2b 100644
--- a/libbb/arith.c
+++ b/libbb/arith.c
@@ -24,19 +24,88 @@
24 * as a replacement for yacc-based parsers. However, it may well be faster 24 * as a replacement for yacc-based parsers. However, it may well be faster
25 * than a comparable parser writen in yacc. The supported operators are 25 * than a comparable parser writen in yacc. The supported operators are
26 * listed in #defines below. Parens, order of operations, and error handling 26 * listed in #defines below. Parens, order of operations, and error handling
27 * are supported. This code is threadsafe. */ 27 * are supported. This code is threadsafe. The exact expression format should
28 * be that which POSIX specifies for shells. */
29
30/* The code uses a simple two-stack algorithm. See
31 * http://www.onthenet.com.au/~grahamis/int2008/week02/lect02.html
32 * for a detailed explaination of the infix-to-postfix algorithm on which
33 * this is based (this code differs in that it applies operators immediately
34 * to the stack instead of adding them to a queue to end up with an
35 * expression). */
36
37/* To use the routine, call it with an expression string and error return
38 * pointer */
28 39
29/* To use the routine, call it with an expression string. It returns an 40/*
30 * integer result. You will also need to define an "error" function 41 * Aug 24, 2001 Manuel Novoa III
31 * that takes printf arguments and _does not return_, or modify the code 42 *
32 * to use another error mechanism. */ 43 * Reduced the generated code size by about 30% (i386) and fixed several bugs.
44 *
45 * 1) In arith_apply():
46 * a) Cached values of *numptr and &(numptr[-1]).
47 * b) Removed redundant test for zero denominator.
48 *
49 * 2) In arith():
50 * a) Eliminated redundant code for processing operator tokens by moving
51 * to a table-based implementation. Also folded handling of parens
52 * into the table.
53 * b) Combined all 3 loops which called arith_apply to reduce generated
54 * code size at the cost of speed.
55 *
56 * 3) The following expressions were treated as valid by the original code:
57 * 1() , 0! , 1 ( *3 ) .
58 * These bugs have been fixed by internally enclosing the expression in
59 * parens and then checking that all binary ops and right parens are
60 * preceded by a valid expression (NUM_TOKEN).
61 *
62 * Note: It may be desireable to replace Aaron's test for whitespace with
63 * ctype's isspace() if it is used by another busybox applet or if additional
64 * whitespace chars should be considered. Look below the "#include"s for a
65 * precompiler test.
66 */
67
68/*
69 * Aug 26, 2001 Manuel Novoa III
70 *
71 * Return 0 for null expressions. Pointed out by vodz.
72 *
73 * Merge in Aaron's comments previously posted to the busybox list,
74 * modified slightly to take account of my changes to the code.
75 *
76 * TODO: May want to allow access to variables in the arith code.
77 * This would:
78 * 1) allow us to evaluate $A as 0 if A isn't set (although this
79 * would require changes to ash.c too).
80 * 2) allow us to write expressions as $(( A + 2 )).
81 * This could be done using a callback function passed to the
82 * arith() function of by requiring such a function with fixed
83 * name as an extern.
84 */
33 85
34#include <stdlib.h> 86#include <stdlib.h>
35#include <string.h> 87#include <string.h>
88#include <ctype.h>
36#include "libbb.h" 89#include "libbb.h"
37 90
91/*
92 * Use "#if 1" below for Aaron's original test for whitespace.
93 * Use "#if 0" for ctype's isspace().
94 * */
95#if 1
96#undef isspace
97#define isspace(arithval) \
98 (arithval == ' ' || arithval == '\n' || arithval == '\t')
99#endif
100
38typedef char operator; 101typedef char operator;
39 102
103/* An operator's token id is a bit of a bitfield. The lower 5 bits are the
104 * precedence, and high 3 are an ID unique accross operators of that
105 * precedence. The ID portion is so that multiple operators can have the
106 * same precedence, ensuring that the leftmost one is evaluated first.
107 * Consider * and /. */
108
40#define tok_decl(prec,id) (((id)<<5)|(prec)) 109#define tok_decl(prec,id) (((id)<<5)|(prec))
41#define PREC(op) ((op)&0x1F) 110#define PREC(op) ((op)&0x1F)
42 111
@@ -70,194 +139,235 @@ typedef char operator;
70#define TOK_DIV tok_decl(10,1) 139#define TOK_DIV tok_decl(10,1)
71#define TOK_REM tok_decl(10,2) 140#define TOK_REM tok_decl(10,2)
72 141
142/* For now all unary operators have the same precedence, and that's used to
143 * identify them as unary operators */
73#define UNARYPREC 14 144#define UNARYPREC 14
74#define TOK_BNOT tok_decl(UNARYPREC,0) 145#define TOK_BNOT tok_decl(UNARYPREC,0)
75#define TOK_NOT tok_decl(UNARYPREC,1) 146#define TOK_NOT tok_decl(UNARYPREC,1)
76#define TOK_UMINUS tok_decl(UNARYPREC,2) 147#define TOK_UMINUS tok_decl(UNARYPREC,2)
148#define TOK_UPLUS tok_decl(UNARYPREC,3)
77 149
78#define TOK_NUM tok_decl(15,0) 150#define TOK_NUM tok_decl(15,0)
151#define TOK_RPAREN tok_decl(15,1)
152#define TOK_ERROR tok_decl(15,2) /* just a place-holder really */
79 153
80#define ARITH_APPLY(op) arith_apply(op, numstack, &numstackptr) 154#define ARITH_APPLY(op) arith_apply(op, numstack, &numstackptr)
81#define NUMPTR (*numstackptr) 155#define NUMPTR (*numstackptr)
156
157/* "applying" a token means performing it on the top elements on the integer
158 * stack. For a unary operator it will only change the top element, but a
159 * binary operator will pop two arguments and push a result */
82static short arith_apply(operator op, long *numstack, long **numstackptr) 160static short arith_apply(operator op, long *numstack, long **numstackptr)
83{ 161{
84 if (NUMPTR == numstack) goto err; 162 long numptr_val;
163 long *NUMPTR_M1;
164
165 if (NUMPTR == numstack) goto err; /* There is no operator that can work
166 without arguments */
167 NUMPTR_M1 = NUMPTR - 1;
85 if (op == TOK_UMINUS) 168 if (op == TOK_UMINUS)
86 NUMPTR[-1] *= -1; 169 *NUMPTR_M1 *= -1;
87 else if (op == TOK_NOT) 170 else if (op == TOK_NOT)
88 NUMPTR[-1] = !(NUMPTR[-1]); 171 *NUMPTR_M1 = !(*NUMPTR_M1);
89 else if (op == TOK_BNOT) 172 else if (op == TOK_BNOT)
90 NUMPTR[-1] = ~(NUMPTR[-1]); 173 *NUMPTR_M1 = ~(*NUMPTR_M1);
91 174 else if (op != TOK_UPLUS) {
92 /* Binary operators */ 175 /* Binary operators */
93 else { 176 if (NUMPTR_M1 == numstack) goto err; /* ... and binary operators need two
94 if (NUMPTR-1 == numstack) goto err; 177 arguments */
95 --NUMPTR; 178 numptr_val = *--NUMPTR; /* ... and they pop one */
179 NUMPTR_M1 = NUMPTR - 1;
96 if (op == TOK_BOR) 180 if (op == TOK_BOR)
97 NUMPTR[-1] |= *NUMPTR; 181 *NUMPTR_M1 |= numptr_val;
98 else if (op == TOK_OR) 182 else if (op == TOK_OR)
99 NUMPTR[-1] = *NUMPTR || NUMPTR[-1]; 183 *NUMPTR_M1 = numptr_val || *NUMPTR_M1;
100 else if (op == TOK_BAND) 184 else if (op == TOK_BAND)
101 NUMPTR[-1] &= *NUMPTR; 185 *NUMPTR_M1 &= numptr_val;
102 else if (op == TOK_AND) 186 else if (op == TOK_AND)
103 NUMPTR[-1] = NUMPTR[-1] && *NUMPTR; 187 *NUMPTR_M1 = *NUMPTR_M1 && numptr_val;
104 else if (op == TOK_EQ) 188 else if (op == TOK_EQ)
105 NUMPTR[-1] = (NUMPTR[-1] == *NUMPTR); 189 *NUMPTR_M1 = (*NUMPTR_M1 == numptr_val);
106 else if (op == TOK_NE) 190 else if (op == TOK_NE)
107 NUMPTR[-1] = (NUMPTR[-1] != *NUMPTR); 191 *NUMPTR_M1 = (*NUMPTR_M1 != numptr_val);
108 else if (op == TOK_GE) 192 else if (op == TOK_GE)
109 NUMPTR[-1] = (NUMPTR[-1] >= *NUMPTR); 193 *NUMPTR_M1 = (*NUMPTR_M1 >= numptr_val);
110 else if (op == TOK_RSHIFT) 194 else if (op == TOK_RSHIFT)
111 NUMPTR[-1] >>= *NUMPTR; 195 *NUMPTR_M1 >>= numptr_val;
112 else if (op == TOK_LSHIFT) 196 else if (op == TOK_LSHIFT)
113 NUMPTR[-1] <<= *NUMPTR; 197 *NUMPTR_M1 <<= numptr_val;
114 else if (op == TOK_GT) 198 else if (op == TOK_GT)
115 NUMPTR[-1] = (NUMPTR[-1] > *NUMPTR); 199 *NUMPTR_M1 = (*NUMPTR_M1 > numptr_val);
116 else if (op == TOK_LT) 200 else if (op == TOK_LT)
117 NUMPTR[-1] = (NUMPTR[-1] < *NUMPTR); 201 *NUMPTR_M1 = (*NUMPTR_M1 < numptr_val);
118 else if (op == TOK_LE) 202 else if (op == TOK_LE)
119 NUMPTR[-1] = (NUMPTR[-1] <= *NUMPTR); 203 *NUMPTR_M1 = (*NUMPTR_M1 <= numptr_val);
120 else if (op == TOK_MUL) 204 else if (op == TOK_MUL)
121 NUMPTR[-1] *= *NUMPTR; 205 *NUMPTR_M1 *= numptr_val;
122 else if (op == TOK_DIV) {
123 if(*NUMPTR==0)
124 return -2;
125 NUMPTR[-1] /= *NUMPTR;
126 }
127 else if (op == TOK_REM) {
128 if(*NUMPTR==0)
129 return -2;
130 NUMPTR[-1] %= *NUMPTR;
131 }
132 else if (op == TOK_ADD) 206 else if (op == TOK_ADD)
133 NUMPTR[-1] += *NUMPTR; 207 *NUMPTR_M1 += numptr_val;
134 else if (op == TOK_SUB) 208 else if (op == TOK_SUB)
135 NUMPTR[-1] -= *NUMPTR; 209 *NUMPTR_M1 -= numptr_val;
210 else if(numptr_val==0) /* zero divisor check */
211 return -2;
212 else if (op == TOK_DIV)
213 *NUMPTR_M1 /= numptr_val;
214 else if (op == TOK_REM)
215 *NUMPTR_M1 %= numptr_val;
216 /* WARNING!!! WARNING!!! WARNING!!! */
217 /* Any new operators should be added BEFORE the zero divisor check! */
136 } 218 }
137 return 0; 219 return 0;
138err: return(-1); 220err: return(-1);
139} 221}
140 222
141extern long arith (const char *startbuf, int *errcode) 223static const char endexpression[] = ")";
142{
143 register char arithval;
144 const char *expr = startbuf;
145 224
146 operator lasttok = TOK_MUL, op; 225/* + and - (in that order) must be last */
147 size_t datasizes = strlen(startbuf); 226static const char op_char[] = "!<>=|&*/%~()+-";
227static const char op_token[] = {
228 /* paired with equal */
229 TOK_NE, TOK_LE, TOK_GE,
230 /* paired with self -- note: ! is special-cased below*/
231 TOK_ERROR, TOK_LSHIFT, TOK_RSHIFT, TOK_EQ, TOK_OR, TOK_AND,
232 /* singles */
233 TOK_NOT, TOK_LT, TOK_GT, TOK_ERROR, TOK_BOR, TOK_BAND,
234 TOK_MUL, TOK_DIV, TOK_REM, TOK_BNOT, TOK_LPAREN, TOK_RPAREN,
235 TOK_ADD, TOK_SUB, TOK_UPLUS, TOK_UMINUS
236};
237
238#define NUM_PAIR_EQUAL 3
239#define NUM_PAIR_SAME 6
240
241extern long arith (const char *expr, int *errcode)
242{
243 register char arithval; /* Current character under analysis */
244 operator lasttok, op;
148 unsigned char prec; 245 unsigned char prec;
149 246
150 long *numstack, *numstackptr; 247 const char *p = endexpression;
151 operator *stack = alloca(datasizes * sizeof(operator)), *stackptr = stack;
152 248
153 *errcode = 0; 249 size_t datasizes = strlen(expr);
154 numstack = alloca((datasizes/2+1)*sizeof(long)), numstackptr = numstack;
155 250
156 while ((arithval = *expr)) { 251 /* Stack of integers */
157 if (arithval == ' ' || arithval == '\n' || arithval == '\t') 252 /* The proof that there can be no more than strlen(startbuf)/2+1 integers
158 goto prologue; 253 * in any given correct or incorrect expression is left as an excersize to
254 * the reader. */
255 long *numstack = alloca((datasizes/2)*sizeof(long)),
256 *numstackptr = numstack;
257 /* Stack of operator tokens */
258 operator *stack = alloca((datasizes+1) * sizeof(operator)),
259 *stackptr = stack;
260
261 *stackptr++ = lasttok = TOK_LPAREN; /* start off with a left paren */
262
263 loop:
264 if ((arithval = *expr) == 0) {
265 if (p == endexpression) { /* Null expression. */
266 return (*errcode = 0);
267 }
268
269 /* This is only reached after all tokens have been extracted from the
270 * input stream. If there are still tokens on the operator stack, they
271 * are to be applied in order. At the end, there should be a final
272 * result on the integer stack */
273
274 if (expr != endexpression + 1) { /* If we haven't done so already, */
275 expr = endexpression; /* append a closing right paren */
276 goto loop; /* and let the loop process it. */
277 }
278 /* At this point, we're done with the expression. */
279 if (numstackptr != numstack+1) {/* ... but if there isn't, it's bad */
280 err:
281 return (*errcode = -1);
282 /* NOTREACHED */
283 }
284 return *numstack;
285 } else {
286 /* Continue processing the expression. */
287 if (isspace(arithval)) {
288 goto prologue; /* Skip whitespace */
289 }
159 if ((unsigned)arithval-'0' <= 9) /* isdigit */ { 290 if ((unsigned)arithval-'0' <= 9) /* isdigit */ {
160 *numstackptr++ = strtol(expr, (char **) &expr, 10); 291 *numstackptr++ = strtol(expr, (char **) &expr, 10);
161 lasttok = TOK_NUM; 292 lasttok = TOK_NUM;
162 continue; 293 goto loop;
163 } if (arithval == '(') { 294 }
164 *stackptr++ = TOK_LPAREN; 295#if 1
165 lasttok = TOK_LPAREN; 296 if ((p = strchr(op_char, arithval)) == NULL) {
166 goto prologue; 297 goto err;
167 } if (arithval == ')') { 298 }
168 lasttok = TOK_NUM; 299#else
169 while (stackptr != stack) { 300 for ( p=op_char ; *p != arithval ; p++ ) {
170 op = *--stackptr; 301 if (!*p) {
171 if (op == TOK_LPAREN) 302 goto err;
172 goto prologue;
173 *errcode = ARITH_APPLY(op);
174 if(*errcode) return *errcode;
175 }
176 goto err; /* Mismatched parens */
177 } if (arithval == '|') {
178 if (*++expr == '|')
179 op = TOK_OR;
180 else {
181 --expr;
182 op = TOK_BOR;
183 }
184 } else if (arithval == '&') {
185 if (*++expr == '&')
186 op = TOK_AND;
187 else {
188 --expr;
189 op = TOK_BAND;
190 } 303 }
191 } else if (arithval == '=') { 304 }
192 if (*++expr != '=') goto err; /* Unknown token */ 305#endif
193 op = TOK_EQ; 306 p = op_token + (int)(p - op_char);
194 } else if (arithval == '!') { 307 ++expr;
195 if (*++expr == '=') 308 if ((p >= op_token + NUM_PAIR_EQUAL) || (*expr != '=')) {
196 op = TOK_NE; 309 p += NUM_PAIR_EQUAL;
197 else { 310 if ((p >= op_token + NUM_PAIR_SAME + NUM_PAIR_EQUAL)
311 || (*expr != arithval) || (arithval == '!')) {
198 --expr; 312 --expr;
199 op = TOK_NOT; 313 if (arithval == '=') { /* single = */
200 } 314 goto err;
201 } else if (arithval == '>') { 315 }
202 switch (*++expr) { 316 p += NUM_PAIR_SAME;
203 case '=': 317 /* Plus and minus are binary (not unary) _only_ if the last
204 op = TOK_GE; 318 * token was as number, or a right paren (which pretends to be
205 break; 319 * a number, since it evaluates to one). Think about it.
206 case '>': 320 * It makes sense. */
207 op = TOK_RSHIFT; 321 if ((lasttok != TOK_NUM)
208 break; 322 && (p >= op_token + NUM_PAIR_SAME + NUM_PAIR_EQUAL
209 default: 323 + sizeof(op_char) - 2)) {
210 --expr; 324 p += 2; /* Unary plus or minus */
211 op = TOK_GT; 325 }
212 } 326 }
213 } else if (arithval == '<') { 327 }
214 switch (*++expr) { 328 op = *p;
215 case '=':
216 op = TOK_LE;
217 break;
218 case '<':
219 op = TOK_LSHIFT;
220 break;
221 default:
222 --expr;
223 op = TOK_LT;
224 }
225 } else if (arithval == '*')
226 op = TOK_MUL;
227 else if (arithval == '/')
228 op = TOK_DIV;
229 else if (arithval == '%')
230 op = TOK_REM;
231 else if (arithval == '+') {
232 if (lasttok != TOK_NUM) goto prologue; /* Unary plus */
233 op = TOK_ADD;
234 } else if (arithval == '-')
235 op = (lasttok == TOK_NUM) ? TOK_SUB : TOK_UMINUS;
236 else if (arithval == '~')
237 op = TOK_BNOT;
238 else goto err; /* Unknown token */
239 329
330 /* We don't want a unary operator to cause recursive descent on the
331 * stack, because there can be many in a row and it could cause an
332 * operator to be evaluated before its argument is pushed onto the
333 * integer stack. */
334 /* But for binary operators, "apply" everything on the operator
335 * stack until we find an operator with a lesser priority than the
336 * one we have just extracted. */
337 /* Left paren is given the lowest priority so it will never be
338 * "applied" in this way */
240 prec = PREC(op); 339 prec = PREC(op);
241 if (prec != UNARYPREC) 340 if ((prec > 0) && (prec != UNARYPREC)) { /* not left paren or unary */
242 while (stackptr != stack && PREC(stackptr[-1]) >= prec) { 341 if (lasttok != TOK_NUM) { /* binary op must be preceded by a num */
342 goto err;
343 }
344 while (stackptr != stack) {
345 if (op == TOK_RPAREN) {
346 /* The algorithm employed here is simple: while we don't
347 * hit an open paren nor the bottom of the stack, pop
348 * tokens and apply them */
349 if (stackptr[-1] == TOK_LPAREN) {
350 --stackptr;
351 lasttok = TOK_NUM; /* Any operator directly after a */
352 /* close paren should consider itself binary */
353 goto prologue;
354 }
355 } else if (PREC(stackptr[-1]) < prec) {
356 break;
357 }
243 *errcode = ARITH_APPLY(*--stackptr); 358 *errcode = ARITH_APPLY(*--stackptr);
244 if(*errcode) return *errcode; 359 if(*errcode) return *errcode;
245 } 360 }
246 *stackptr++ = op; 361 if (op == TOK_RPAREN) {
247 lasttok = op; 362 goto err;
248prologue: ++expr; 363 }
249 } /* yay */ 364 }
250
251 while (stackptr != stack) {
252 *errcode = ARITH_APPLY(*--stackptr);
253 if(*errcode) return *errcode;
254 }
255 if (numstackptr != numstack+1) {
256err:
257 *errcode = -1;
258 return -1;
259 /* NOTREACHED */
260 }
261 365
262 return *numstack; 366 /* Push this operator to the stack and remember it. */
367 *stackptr++ = lasttok = op;
368
369 prologue:
370 ++expr;
371 goto loop;
372 }
263} 373}