Merge branch 'busybox' into merge

author: Ron Yorston <rmy@pobox.com> 2023-07-13 08:06:26 +0100
committer: Ron Yorston <rmy@pobox.com> 2023-07-13 08:06:26 +0100
commit: bd978d0256fd3a67de1a7dd54f1a37f9435be363 (patch)
tree: cb869384a533ac0d95fe787d75be6c050e1e7c1a /shell/math.c
parent: b2901ce8efa050da00e0f3a73f3be9bf9402deea (diff)
parent: d70256a5c719439cc6fab6a4571c1bb46178e4c7 (diff)
download: busybox-w32-bd978d0256fd3a67de1a7dd54f1a37f9435be363.tar.gz
busybox-w32-bd978d0256fd3a67de1a7dd54f1a37f9435be363.tar.bz2
busybox-w32-bd978d0256fd3a67de1a7dd54f1a37f9435be363.zip
1 files changed, 451 insertions, 262 deletions
diff --git a/shell/math.c b/shell/math.c
index 76d22c9bd..e90a38f05 100644
--- a/shell/math.c
+++ b/shell/math.c
@@ -46,7 +46,6 @@
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 /* This is my infix parser/evaluator. It is optimized for size, intended
 * as a replacement for yacc-based parsers. However, it may well be faster
 * than a comparable parser written in yacc. The supported operators are
@@ -61,7 +60,6 @@
 * to the stack instead of adding them to a queue to end up with an
 * expression).
 */
 /*
 * Aug 24, 2001              Manuel Novoa III
 *
@@ -96,7 +94,6 @@
 *
 * Merge in Aaron's comments previously posted to the busybox list,
 * modified slightly to take account of my changes to the code.
- *
 */
 /*
 *  (C) 2003 Vladimir Oleynik <dzo@simtreas.ru>
@@ -116,6 +113,12 @@
 #include "libbb.h"
 #include "math.h"
+#if 1
+# define dbg(...) ((void)0)
+#else
+# define dbg(...) bb_error_msg(__VA_ARGS__)
+#endif
 typedef unsigned char operator;
 /* An operator's token id is a bit of a bitfield. The lower 5 bits are the
@@ -125,9 +128,13 @@ typedef unsigned char operator;
 * Consider * and /
 */
 #define tok_decl(prec,id)       (((id)<<5) | (prec))
-#define PREC(op)                ((op) & 0x1F)
+#define ID_SHIFT                5
+#define PREC(op)                ((op) & 0x1f)
+#define PREC_LPAREN             0
 #define TOK_LPAREN              tok_decl(0,0)
+/* Precedence value of RPAREN is used only to distinguish it from LPAREN */
+#define TOK_RPAREN              tok_decl(1,1)
 #define TOK_COMMA               tok_decl(1,0)
@@ -135,22 +142,37 @@ typedef unsigned char operator;
 * but there are 11 of them, which doesn't fit into 3 bits for unique id.
 * Abusing another precedence level:
 */
+#define PREC_ASSIGN1            2
 #define TOK_ASSIGN              tok_decl(2,0)
 #define TOK_AND_ASSIGN          tok_decl(2,1)
 #define TOK_OR_ASSIGN           tok_decl(2,2)
 #define TOK_XOR_ASSIGN          tok_decl(2,3)
-#define TOK_PLUS_ASSIGN         tok_decl(2,4)
+#define TOK_ADD_ASSIGN          tok_decl(2,4)
-#define TOK_MINUS_ASSIGN        tok_decl(2,5)
+#define TOK_SUB_ASSIGN          tok_decl(2,5)
 #define TOK_LSHIFT_ASSIGN       tok_decl(2,6)
 #define TOK_RSHIFT_ASSIGN       tok_decl(2,7)
+#define PREC_ASSIGN2            3
 #define TOK_MUL_ASSIGN          tok_decl(3,0)
-#define TOK_DIV_ASSIGN          tok_decl(3,1)
+/* "/" and "/=" ops have the same id bits */
+#define DIV_ID1                 1
+#define TOK_DIV_ASSIGN          tok_decl(3,DIV_ID1)
 #define TOK_REM_ASSIGN          tok_decl(3,2)
-#define fix_assignment_prec(prec) do { if (prec == 3) prec = 2; } while (0)
+#define fix_assignment_prec(prec) do { prec -= (prec == 3); } while (0)
 /* Ternary conditional operator is right associative too */
+/*
+ * bash documentation says that precedence order is:
+ *  ...
+ *  expr ? expr1 : expr2
+ *  = *= /= %= += -= <<= >>= &= ^= |=
+ *  exprA , exprB
+ * What it omits is that expr1 is parsed as if parenthesized
+ * (this matches the rules of ?: in C language):
+ * "v ? 1,2 : 3,4" is parsed as "(v ? (1,2) : 3),4"
+ * "v ? a=2 : b=4" is parsed as "(v ? (a=1) : b)=4" (thus, this is a syntax error)
+ */
 #define TOK_CONDITIONAL         tok_decl(4,0)
 #define TOK_CONDITIONAL_SEP     tok_decl(4,1)
@@ -179,7 +201,7 @@ typedef unsigned char operator;
 #define TOK_SUB                 tok_decl(13,1)
 #define TOK_MUL                 tok_decl(14,0)
-#define TOK_DIV                 tok_decl(14,1)
+#define TOK_DIV                 tok_decl(14,DIV_ID1)
 #define TOK_REM                 tok_decl(14,2)
 /* Exponent is right associative */
@@ -194,26 +216,25 @@ typedef unsigned char operator;
 #define TOK_UPLUS               tok_decl(UNARYPREC+1,1)
 #define PREC_PRE                (UNARYPREC+2)
+#define TOK_PRE_INC             tok_decl(PREC_PRE,0)
-#define TOK_PRE_INC             tok_decl(PREC_PRE, 0)
+#define TOK_PRE_DEC             tok_decl(PREC_PRE,1)
-#define TOK_PRE_DEC             tok_decl(PREC_PRE, 1)
 #define PREC_POST               (UNARYPREC+3)
+#define TOK_POST_INC            tok_decl(PREC_POST,0)
+#define TOK_POST_DEC            tok_decl(PREC_POST,1)
-#define TOK_POST_INC            tok_decl(PREC_POST, 0)
+/* TOK_VALUE marks a number, name, name++/name--, or (EXPR):
-#define TOK_POST_DEC            tok_decl(PREC_POST, 1)
+ * IOW: something which can be used as the left side of a binary op.
+ * Since it's never pushed to opstack, its precedence does not matter.
-#define SPEC_PREC               (UNARYPREC+4)
+ */
+#define TOK_VALUE               tok_decl(PREC_POST,2)
-#define TOK_NUM                 tok_decl(SPEC_PREC, 0)
-#define TOK_RPAREN              tok_decl(SPEC_PREC, 1)
 static int
 is_assign_op(operator op)
 {
        operator prec = PREC(op);
-        fix_assignment_prec(prec);
+        return prec == PREC_ASSIGN1
-        return prec == PREC(TOK_ASSIGN)
+        || prec == PREC_ASSIGN2
        || prec == PREC_PRE
        || prec == PREC_POST;
 }
@@ -226,91 +247,107 @@ is_right_associative(operator prec)
        || prec == PREC(TOK_CONDITIONAL);
 }
 typedef struct {
        arith_t val;
-        /* We acquire second_val only when "expr1 : expr2" part
+        const char *var_name;
-         * of ternary ?: op is evaluated.
-         * We treat ?: as two binary ops: (expr ? (expr1 : expr2)).
-         * ':' produces a new value which has two parts, val and second_val;
-         * then '?' selects one of them based on its left side.
-         */
-        arith_t second_val;
-        char second_val_present;
-        /* If NULL then it's just a number, else it's a named variable */
-        char *var;
 } var_or_num_t;
+#define VALID_NAME(name) (name)
+#define NOT_NAME(name)   (!(name))
 typedef struct remembered_name {
        struct remembered_name *next;
-        const char *var;
+        const char *var_name;
 } remembered_name;
+static ALWAYS_INLINE int isalnum_(int c)
+{
+        return (isalnum(c) || c == '_');
+}
 static arith_t
 evaluate_string(arith_state_t *math_state, const char *expr);
-static const char*
+static arith_t
-arith_lookup_val(arith_state_t *math_state, var_or_num_t *t)
+arith_lookup_val(arith_state_t *math_state, const char *name, char *endname)
 {
-        if (t->var) {
+        char c;
-                const char *p = math_state->lookupvar(t->var);
+        const char *p;
-                if (p) {
-                        remembered_name *cur;
+        c = *endname;
-                        remembered_name cur_save;
+        *endname = '\0';
+        p = math_state->lookupvar(name);
-                        /* did we already see this name?
+        *endname = c;
-                         * testcase: a=b; b=a; echo $((a))
+        if (p) {
-                         */
+                arith_t val;
-                        for (cur = math_state->list_of_recursed_names; cur; cur = cur->next) {
+                size_t len = endname - name;
-                                if (strcmp(cur->var, t->var) == 0) {
+                remembered_name *cur;
-                                        /* Yes */
+                remembered_name remember;
-                                        return "expression recursion loop detected";
-                                }
+                /* did we already see this name?
+                 * testcase: a=b; b=a; echo $((a))
+                 */
+                for (cur = math_state->list_of_recursed_names; cur; cur = cur->next) {
+                        if (strncmp(cur->var_name, name, len) == 0
+                         && !isalnum_(cur->var_name[len])
+                        ) {
+                                /* yes */
+                                math_state->errmsg = "expression recursion loop detected";
+                                return -1;
                        }
+                }
-                        /* push current var name */
+                /* push current var name */
-                        cur = math_state->list_of_recursed_names;
+                remember.var_name = name;
-                        cur_save.var = t->var;
+                remember.next = math_state->list_of_recursed_names;
-                        cur_save.next = cur;
+                math_state->list_of_recursed_names = &remember;
-                        math_state->list_of_recursed_names = &cur_save;
-                        /* recursively evaluate p as expression */
+                /* recursively evaluate p as expression */
-                        t->val = evaluate_string(math_state, p);
+                /* this sets math_state->errmsg on error */
+                val = evaluate_string(math_state, p);
-                        /* pop current var name */
+                /* pop current var name */
-                        math_state->list_of_recursed_names = cur;
+                math_state->list_of_recursed_names = remember.next;
-                        return math_state->errmsg;
+                return val;
-                }
-                /* treat undefined var as 0 */
-                t->val = 0;
        }
+        /* treat undefined var as 0 */
        return 0;
 }
 /* "Applying" a token means performing it on the top elements on the integer
- * stack. For an unary operator it will only change the top element, but a
+ * stack. For an unary operator it will only change the top element,
- * binary operator will pop two arguments and push the result */
+ * a binary operator will pop two arguments and push the result,
+ * the ternary ?: op will pop three arguments and push the result.
+ */
 static NOINLINE const char*
 arith_apply(arith_state_t *math_state, operator op, var_or_num_t *numstack, var_or_num_t **numstackptr)
 {
-#define NUMPTR (*numstackptr)
+#define NUMSTACKPTR (*numstackptr)
        var_or_num_t *top_of_stack;
        arith_t rez;
-        const char *err;
        /* There is no operator that can work without arguments */
-        if (NUMPTR == numstack)
+        if (NUMSTACKPTR == numstack)
-                goto err;
+                goto syntax_err;
-        top_of_stack = NUMPTR - 1;
+        top_of_stack = NUMSTACKPTR - 1;
-        /* Resolve name to value, if needed */
+        if (op == TOK_CONDITIONAL_SEP) {
-        err = arith_lookup_val(math_state, top_of_stack);
+                /* "expr1 ? expr2 : expr3" operation */
-        if (err)
+                var_or_num_t *expr1 = &top_of_stack[-2];
-                return err;
+                NUMSTACKPTR = expr1 + 1;
+                if (expr1 < numstack) /* Example: $((2:3)) */
+                        return "malformed ?: operator";
+                if (expr1->val != 0) /* select expr2 or expr3 */
+                        top_of_stack--;
+                rez = top_of_stack->val;
+                top_of_stack = expr1;
+                goto ret_rez;
+        }
+        if (op == TOK_CONDITIONAL) /* Example: $((a ? b)) */
+                return "malformed ?: operator";
        rez = top_of_stack->val;
        if (op == TOK_UMINUS)
@@ -323,50 +360,30 @@ arith_apply(arith_state_t *math_state, operator op, var_or_num_t *numstack, var_
                rez++;
        else if (op == TOK_POST_DEC || op == TOK_PRE_DEC)
                rez--;
-        else if (op != TOK_UPLUS) {
+        else /*if (op != TOK_UPLUS) - always true, we drop TOK_UPLUS earlier */ {
                /* Binary operators */
                arith_t right_side_val;
-                char bad_second_val;
-                /* Binary operators need two arguments */
-                if (top_of_stack == numstack)
-                        goto err;
-                /* ...and they pop one */
-                NUMPTR = top_of_stack; /* this decrements NUMPTR */
-                bad_second_val = top_of_stack->second_val_present;
-                if (op == TOK_CONDITIONAL) { /* ? operation */
-                        /* Make next if (...) protect against
-                         * $((expr1 ? expr2)) - that is, missing ": expr" */
-                        bad_second_val = !bad_second_val;
-                }
-                if (bad_second_val) {
-                        /* Protect against $((expr <not_?_op> expr1 : expr2)) */
-                        return "malformed ?: operator";
-                }
-                top_of_stack--; /* now points to left side */
+                if (top_of_stack == numstack) /* have two arguments? */
+                        goto syntax_err; /* no */
+                /* Pop numstack */
+                NUMSTACKPTR = top_of_stack; /* this decrements NUMSTACKPTR */
-                if (op != TOK_ASSIGN) {
+                if (math_state->evaluation_disabled) {
-                        /* Resolve left side value (unless the op is '=') */
+                        dbg("binary op %02x skipped", op);
-                        err = arith_lookup_val(math_state, top_of_stack);
+                        return NULL;
-                        if (err)
+                        /* bash 5.2.12 does not execute "2/0" in disabled
-                                return err;
+                         * branches of ?: (and thus does not complain),
+                         * but complains about negative exp: "2**-1".
+                         * I don't think we need to emulate that.
+                         */
                }
+                top_of_stack--; /* now points to left side */
                right_side_val = rez;
                rez = top_of_stack->val;
-                if (op == TOK_CONDITIONAL) /* ? operation */
+                if (op == TOK_BOR || op == TOK_OR_ASSIGN)
-                        rez = (rez ? right_side_val : top_of_stack[1].second_val);
-                else if (op == TOK_CONDITIONAL_SEP) { /* : operation */
-                        if (top_of_stack == numstack) {
-                                /* Protect against $((expr : expr)) */
-                                return "malformed ?: operator";
-                        }
-                        top_of_stack->second_val_present = op;
-                        top_of_stack->second_val = right_side_val;
-                }
-                else if (op == TOK_BOR || op == TOK_OR_ASSIGN)
                        rez |= right_side_val;
                else if (op == TOK_OR)
                        rez = right_side_val || rez;
@@ -394,9 +411,9 @@ arith_apply(arith_state_t *math_state, operator op, var_or_num_t *numstack, var_
                        rez = (rez <= right_side_val);
                else if (op == TOK_MUL || op == TOK_MUL_ASSIGN)
                        rez *= right_side_val;
-                else if (op == TOK_ADD || op == TOK_PLUS_ASSIGN)
+                else if (op == TOK_ADD || op == TOK_ADD_ASSIGN)
                        rez += right_side_val;
-                else if (op == TOK_SUB || op == TOK_MINUS_ASSIGN)
+                else if (op == TOK_SUB || op == TOK_SUB_ASSIGN)
                        rez -= right_side_val;
                else if (op == TOK_ASSIGN || op == TOK_COMMA)
                        rez = right_side_val;
@@ -405,14 +422,26 @@ arith_apply(arith_state_t *math_state, operator op, var_or_num_t *numstack, var_
                        if (right_side_val < 0)
                                return "exponent less than 0";
                        c = 1;
-                        while (--right_side_val >= 0)
+                        while (right_side_val != 0) {
+                                if ((right_side_val & 1) == 0) {
+                                        /* this if() block is not necessary for correctness,
+                                         * but otherwise echo $((3**999999999999999999))
+                                         * takes a VERY LONG time
+                                         * (and it's not interruptible by ^C)
+                                         */
+                                        rez *= rez;
+                                        right_side_val >>= 1;
+                                }
                                c *= rez;
+                                right_side_val--;
+                        }
                        rez = c;
                }
-                else if (right_side_val == 0)
+                else /*if (op == TOK_DIV || op == TOK_DIV_ASSIGN
-                        return "divide by zero";
+                      || op == TOK_REM || op == TOK_REM_ASSIGN) - always true */
-                else if (op == TOK_DIV || op == TOK_DIV_ASSIGN
+                {
-                      || op == TOK_REM || op == TOK_REM_ASSIGN) {
+                        if (right_side_val == 0)
+                                return "divide by zero";
                        /*
                         * bash 4.2.45 x86 64bit: SEGV on 'echo $((2**63 / -1))'
                         *
@@ -424,42 +453,53 @@ arith_apply(arith_state_t *math_state, operator op, var_or_num_t *numstack, var_
                         * Make sure to at least not SEGV here:
                         */
                        if (right_side_val == -1
-                         && rez << 1 == 0 /* MAX_NEGATIVE_INT or 0 */
+                         && (rez << 1) == 0 /* MAX_NEGATIVE_INT or 0 */
                        ) {
                                right_side_val = 1;
                        }
-                        if (op == TOK_DIV || op == TOK_DIV_ASSIGN)
+                        if (op & (DIV_ID1 << ID_SHIFT)) /* DIV or DIV_ASSIGN? */
                                rez /= right_side_val;
-                        else {
+                        else
                                rez %= right_side_val;
-                        }
                }
        }
+        if (math_state->evaluation_disabled) {
+                dbg("unary op %02x skipped", op);
+                return NULL;
+        }
        if (is_assign_op(op)) {
                char buf[sizeof(arith_t)*3 + 2];
-                if (top_of_stack->var == NULL) {
+                if (NOT_NAME(top_of_stack->var_name)) {
                        /* Hmm, 1=2 ? */
-                        goto err;
+                        goto syntax_err;
                }
                /* Save to shell variable */
                sprintf(buf, ARITH_FMT, rez);
-                math_state->setvar(top_of_stack->var, buf);
+                {
-                /* After saving, make previous value for v++ or v-- */
+                        char *e = (char*)endofname(top_of_stack->var_name);
-                if (op == TOK_POST_INC)
+                        char c = *e;
-                        rez--;
+                        *e = '\0';
-                if (op == TOK_POST_DEC)
+                        math_state->setvar(top_of_stack->var_name, buf);
-                        rez++;
+                        *e = c;
+                }
+                /* VAR++ or VAR--? */
+                if (PREC(op) == PREC_POST) {
+                        /* Do not store new value to stack (keep old value) */
+                        goto ret_NULL;
+                }
        }
+ ret_rez:
        top_of_stack->val = rez;
+ ret_NULL:
        /* Erase var name, it is just a number now */
-        top_of_stack->var = NULL;
+        top_of_stack->var_name = NULL;
        return NULL;
- err:
+ syntax_err:
        return "arithmetic syntax error";
-#undef NUMPTR
+#undef NUMSTACKPTR
 }
 /* longest must be first */
@@ -479,8 +519,8 @@ static const char op_tokens[] ALIGN1 = {
        '*','=',    0, TOK_MUL_ASSIGN,
        '/','=',    0, TOK_DIV_ASSIGN,
        '%','=',    0, TOK_REM_ASSIGN,
-        '+','=',    0, TOK_PLUS_ASSIGN,
+        '+','=',    0, TOK_ADD_ASSIGN,
-        '-','=',    0, TOK_MINUS_ASSIGN,
+        '-','=',    0, TOK_SUB_ASSIGN,
        '-','-',    0, TOK_POST_DEC,
        '^','=',    0, TOK_XOR_ASSIGN,
        '+','+',    0, TOK_POST_INC,
@@ -497,7 +537,6 @@ static const char op_tokens[] ALIGN1 = {
        '+',        0, TOK_ADD,
        '-',        0, TOK_SUB,
        '^',        0, TOK_BXOR,
-        /* uniq */
        '~',        0, TOK_BNOT,
        ',',        0, TOK_COMMA,
        '?',        0, TOK_CONDITIONAL,
@@ -506,41 +545,26 @@ static const char op_tokens[] ALIGN1 = {
        '(',        0, TOK_LPAREN,
        0
 };
-#define ptr_to_rparen (&op_tokens[sizeof(op_tokens)-7])
+#define END_POINTER (&op_tokens[sizeof(op_tokens)-1])
 #if ENABLE_FEATURE_SH_MATH_BASE
-static arith_t strto_arith_t(const char *nptr, char **endptr)
+static arith_t parse_with_base(const char *nptr, char **endptr, unsigned base)
 {
-        unsigned base;
+        arith_t n = 0;
-        arith_t n;
+        const char *start = nptr;
-# if ENABLE_FEATURE_SH_MATH_64
-        n = strtoull(nptr, endptr, 0);
-# else
-        n = strtoul(nptr, endptr, 0);
-# endif
-        if (**endptr != '#'
-         || (*nptr < '1' || *nptr > '9')
-         || (n < 2 || n > 64)
-        ) {
-                return n;
-        }
-        /* It's "N#nnnn" or "NN#nnnn" syntax, NN can't start with 0,
-         * NN is in 2..64 range.
-         */
-        base = (unsigned)n;
-        n = 0;
-        nptr = *endptr + 1;
        for (;;) {
                unsigned digit = (unsigned)*nptr - '0';
                if (digit >= 10 /* not 0..9 */
-                 && digit <= 'z' - '0' /* needed to reject e.g. $((64#~)) */
+                 && digit <= 'z' - '0' /* reject e.g. $((64#~)) */
                ) {
-                        /* in bases up to 36, case does not matter for a-z */
+                        /* current char is one of :;<=>?@A..Z[\]^_`a..z */
+                        /* in bases up to 36, case does not matter for a-z,
+                         * map @A..Z and `a..z to 9..35: */
                        digit = (unsigned)(*nptr | 0x20) - ('a' - 10);
                        if (base > 36 && *nptr <= '_') {
-                                /* otherwise, A-Z,@,_ are 36-61,62,63 */
+                                /* base > 36: A-Z,@,_ are 36-61,62,63 */
                                if (*nptr == '_')
                                        digit = 63;
                                else if (*nptr == '@')
@@ -551,8 +575,8 @@ static arith_t strto_arith_t(const char *nptr, char **endptr)
                                        break; /* error: one of [\]^ */
                        }
                        //bb_error_msg("ch:'%c'%d digit:%u", *nptr, *nptr, digit);
-                        //if (digit < 10) - example where we need this?
+                        if (digit < 10) /* reject e.g. $((36#@)) */
-                        //      break;
+                                break;
                }
                if (digit >= base)
                        break;
@@ -560,15 +584,55 @@ static arith_t strto_arith_t(const char *nptr, char **endptr)
                n = n * base + digit;
                nptr++;
        }
-        /* Note: we do not set errno on bad chars, we just set a pointer
-         * to the first invalid char. For example, this allows
-         * "N#" (empty "nnnn" part): 64#+1 is a valid expression,
-         * it means 64# + 1, whereas 64#~... is not, since ~ is not a valid
-         * operator.
-         */
        *endptr = (char*)nptr;
+        /* "64#" and "64#+1" used to be valid expressions, but bash 5.2.15
+         * no longer allow such, detect this:
+         */
+// NB: bash allows $((0x)), this is probably a bug...
+        if (nptr == start)
+                *endptr = NULL; /* there weren't any digits, bad */
        return n;
 }
+static arith_t strto_arith_t(const char *nptr, char **endptr)
+{
+/* NB: we do not use strtoull here to be bash-compatible:
+ * $((99999999999999999999)) is 7766279631452241919
+ * (the 64-bit truncated value).
+ */
+        unsigned base;
+        /* nptr[0] is '0'..'9' here */
+        base = nptr[0] - '0';
+        if (base == 0) { /* nptr[0] is '0' */
+                base = 8;
+                if ((nptr[1] | 0x20) == 'x') {
+                        base = 16;
+                        nptr += 2;
+                }
+// NB: bash allows $((0x)), this is probably a bug...
+                return parse_with_base(nptr, endptr, base);
+        }
+        /* base is 1..9 here */
+        if (nptr[1] == '#') {
+                if (base > 1)
+                        return parse_with_base(nptr + 2, endptr, base);
+                /* else: "1#NN", bash says "invalid arithmetic base" */
+        }
+        if (isdigit(nptr[1]) && nptr[2] == '#') {
+                base = 10 * base + (nptr[1] - '0');
+                /* base is at least 10 here */
+                if (base <= 64)
+                        return parse_with_base(nptr + 3, endptr, base);
+                /* else: bash says "invalid arithmetic base" */
+        }
+        return parse_with_base(nptr, endptr, 10);
+}
 #else /* !ENABLE_FEATURE_SH_MATH_BASE */
 # if ENABLE_FEATURE_SH_MATH_64
 #  define strto_arith_t(nptr, endptr) strtoull(nptr, endptr, 0)
@@ -580,23 +644,52 @@ static arith_t strto_arith_t(const char *nptr, char **endptr)
 static arith_t
 evaluate_string(arith_state_t *math_state, const char *expr)
 {
+        /* Stack of integers/names */
+        var_or_num_t *numstack, *numstackptr;
+        /* Stack of operator tokens */
+        operator *opstack, *opstackptr;
+        /* To detect whether we are after a "value": */
        operator lasttok;
+        /* To insert implicit () in ?: ternary op: */
+        operator insert_op = 0xff;
+        unsigned ternary_level = 0;
        const char *errmsg;
        const char *start_expr = expr = skip_whitespace(expr);
-        unsigned expr_len = strlen(expr) + 2;
-        /* Stack of integers */
+        {
-        /* The proof that there can be no more than strlen(startbuf)/2+1
+                unsigned expr_len = strlen(expr);
-         * integers in any given correct or incorrect expression
+                /* If LOTS of whitespace, do not blow up the estimation */
-         * is left as an exercise to the reader. */
+                const char *p = expr;
-        var_or_num_t *const numstack = alloca((expr_len / 2) * sizeof(numstack[0]));
+                while (*p) {
-        var_or_num_t *numstackptr = numstack;
+                        /* in a run of whitespace, count only 1st char */
-        /* Stack of operator tokens */
+                        if (isspace(*p)) {
-        operator *const stack = alloca(expr_len * sizeof(stack[0]));
+                                while (p++, isspace(*p))
-        operator *stackptr = stack;
+                                        expr_len--;
+                        } else {
+                                p++;
+                        }
+                }
+                dbg("expr:'%s' expr_len:%u", expr, expr_len);
+                /* expr_len deep opstack is needed. Think "------------7".
+                 * Only "?" operator temporarily needs two opstack slots
+                 * (IOW: more than one slot), but its second slot (LPAREN)
+                 * is popped off when ":" is reached.
+                 */
+                expr_len++; /* +1 for 1st LPAREN. See what $((1?)) pushes to opstack */
+                opstackptr = opstack = alloca(expr_len * sizeof(opstack[0]));
+                /* There can be no more than (expr_len/2 + 1)
+                 * integers/names in any given correct or incorrect expression.
+                 * (modulo "09", "0v" cases where 2 chars are 2 ints/names,
+                 * but we have code to detect that early)
+                 */
+                expr_len = (expr_len / 2)
+                        + 1 /* "1+2" has two nums, 2 = len/2+1, NOT len/2 */;
+                numstackptr = numstack = alloca(expr_len * sizeof(numstack[0]));
+        }
        /* Start with a left paren */
-        *stackptr++ = lasttok = TOK_LPAREN;
+        dbg("(%d) op:TOK_LPAREN", (int)(opstackptr - opstack));
-        errmsg = NULL;
+        *opstackptr++ = lasttok = TOK_LPAREN;
        while (1) {
                const char *p;
@@ -607,8 +700,7 @@ evaluate_string(arith_state_t *math_state, const char *expr)
                if (*expr == '\0') {
                        if (expr == start_expr) {
                                /* Null expression */
-                                numstack->val = 0;
+                                return 0;
-                                goto ret;
                        }
                        /* This is only reached after all tokens have been extracted from the
@@ -616,46 +708,71 @@ evaluate_string(arith_state_t *math_state, const char *expr)
                         * are to be applied in order. At the end, there should be a final
                         * result on the integer stack */
-                        if (expr != ptr_to_rparen + 1) {
+                        if (expr != END_POINTER) {
                                /* If we haven't done so already,
                                 * append a closing right paren
                                 * and let the loop process it */
-                                expr = ptr_to_rparen;
+                                expr = END_POINTER;
-//bb_error_msg("expr=')'");
+                                op = TOK_RPAREN;
-                                continue;
+                                goto tok_found1;
                        }
                        /* At this point, we're done with the expression */
                        if (numstackptr != numstack + 1) {
-                                /* ...but if there isn't, it's bad */
+                                /* if there is not exactly one result, it's bad */
-                                goto err;
+                                /* Example: $((1 2)) */
+                                goto syntax_err;
                        }
-                        goto ret;
+                        return numstack->val;
                }
                p = endofname(expr);
                if (p != expr) {
                        /* Name */
-                        size_t var_name_size = (p - expr) + 1;  /* +1 for NUL */
+                        if (!math_state->evaluation_disabled) {
-                        numstackptr->var = alloca(var_name_size);
+                                numstackptr->var_name = expr;
-                        safe_strncpy(numstackptr->var, expr, var_name_size);
+                                dbg("[%d] var:'%.*s'", (int)(numstackptr - numstack), (int)(p - expr), expr);
-//bb_error_msg("var:'%s'", numstackptr->var);
+                                expr = skip_whitespace(p);
-                        expr = p;
+                                /* If it is not followed by "=" operator... */
- num:
+                                if (expr[0] != '=' /* not "=..." */
-                        numstackptr->second_val_present = 0;
+                                 || expr[1] == '=' /* or "==..." */
+                                ) {
+                                        /* Evaluate variable to value */
+                                        arith_t val = arith_lookup_val(math_state, numstackptr->var_name, (char*)p);
+                                        if (math_state->errmsg)
+                                                return val; /* -1 */
+                                        numstackptr->val = val;
+                                }
+                        } else {
+                                dbg("[%d] var:IGNORED", (int)(numstackptr - numstack));
+                                expr = p;
+                                numstackptr->var_name = NULL; /* not needed, paranoia */
+                                numstackptr->val = 0; /* not needed, paranoia */
+                        }
+ push_value:
                        numstackptr++;
-                        lasttok = TOK_NUM;
+                        lasttok = TOK_VALUE;
                        continue;
                }
                if (isdigit(*expr)) {
                        /* Number */
-                        numstackptr->var = NULL;
+                        char *end;
-                        errno = 0;
+                        numstackptr->var_name = NULL;
-                        numstackptr->val = strto_arith_t(expr, (char**) &expr);
+                        /* code is smaller compared to using &expr here: */
-//bb_error_msg("val:%lld", numstackptr->val);
+                        numstackptr->val = strto_arith_t(expr, &end);
-                        if (errno)
+                        expr = end;
-                                numstackptr->val = 0; /* bash compat */
+                        dbg("[%d] val:%lld", (int)(numstackptr - numstack), numstackptr->val);
-                        goto num;
+                        if (!expr) /* example: $((10#)) */
+                                goto syntax_err;
+                        /* A number can't be followed by another number, or a variable name.
+                         * We'd catch this later anyway, but this would require numstack[]
+                         * to be ~twice as deep to handle strings where _every_ char is
+                         * a new number or name.
+                         * Examples: "09" is two numbers, "0v" is number and name.
+                         */
+                        if (isalnum(*expr) || *expr == '_')
+                                goto syntax_err;
+                        goto push_value;
                }
                /* Should be an operator */
@@ -671,10 +788,11 @@ evaluate_string(arith_state_t *math_state, const char *expr)
                if ((expr[0] == '+' || expr[0] == '-')
                 && (expr[1] == expr[0])
                ) {
-                        if (numstackptr == numstack || !numstackptr[-1].var) { /* not a VAR++ */
+                        if (numstackptr == numstack || NOT_NAME(numstackptr[-1].var_name)) {
+                                /* not a VAR++ */
                                char next = skip_whitespace(expr + 2)[0];
-                                if (!(isalpha(next) || next == '_')) { /* not a ++VAR */
+                                if (!(isalpha(next) || next == '_')) {
-                                        //bb_error_msg("special %c%c", expr[0], expr[0]);
+                                        /* not a ++VAR */
                                        op = (expr[0] == '+' ? TOK_ADD : TOK_SUB);
                                        expr++;
                                        goto tok_found1;
@@ -704,27 +822,41 @@ evaluate_string(arith_state_t *math_state, const char *expr)
                        if (*p == '\0') {
                                /* No next element, operator not found */
                                //math_state->syntax_error_at = expr;
-                                goto err;
+                                goto syntax_err;
                        }
                }
+                /* NB: expr now points past the operator */
 tok_found:
                op = p[1]; /* fetch TOK_foo value */
- tok_found1:
-                /* NB: expr now points past the operator */
-                /* post grammar: a++ reduce to num */
+                /* Special rule for "? EXPR :"
-                if (lasttok == TOK_POST_INC || lasttok == TOK_POST_DEC)
+                 * "EXPR in the middle of ? : is parsed as if parenthesized"
-                        lasttok = TOK_NUM;
+                 * (this quirk originates in C grammar, I think).
+                 */
+                if (op == TOK_CONDITIONAL) {
+                        insert_op = TOK_LPAREN;
+                        dbg("insert_op=%02x", insert_op);
+                }
+                if (op == TOK_CONDITIONAL_SEP) {
+                        insert_op = op;
+                        op = TOK_RPAREN;
+                        dbg("insert_op=%02x op=%02x", insert_op, op);
+                }
+ tok_found1:
+                /* NAME++ is a "value" (something suitable for a binop) */
+                if (PREC(lasttok) == PREC_POST)
+                        lasttok = TOK_VALUE;
                /* Plus and minus are binary (not unary) _only_ if the last
-                 * token was a number, or a right paren (which pretends to be
+                 * token was a "value". Think about it. It makes sense.
-                 * a number, since it evaluates to one). Think about it.
+                 */
-                 * It makes sense. */
+                if (lasttok != TOK_VALUE) {
-                if (lasttok != TOK_NUM) {
                        switch (op) {
                        case TOK_ADD:
-                                op = TOK_UPLUS;
+                                //op = TOK_UPLUS;
-                                break;
+                                //break;
+                                /* Unary plus does nothing, do not even push it to opstack */
+                                continue;
                        case TOK_SUB:
                                op = TOK_UMINUS;
                                break;
@@ -744,80 +876,137 @@ evaluate_string(arith_state_t *math_state, const char *expr)
                 * stack until we find an operator with a lesser priority than the
                 * one we have just extracted. If op is right-associative,
                 * then stop "applying" on the equal priority too.
-                 * Left paren is given the lowest priority so it will never be
+                 * Left paren will never be "applied" in this way.
-                 * "applied" in this way.
                 */
                prec = PREC(op);
-//bb_error_msg("prec:%02x", prec);
+                if (prec != PREC_LPAREN && prec < UNARYPREC) {
-                if ((prec > 0 && prec < UNARYPREC) || prec == SPEC_PREC) {
+                        /* Binary, ternary or RPAREN */
-                        /* not left paren or unary */
+                        if (lasttok != TOK_VALUE) {
-                        if (lasttok != TOK_NUM) {
+                                /* Must be preceded by a value.
-                                /* binary op must be preceded by a num */
+                                 * $((2 2 + * 3)) would be accepted without this.
-                                goto err;
+                                 */
+                                goto syntax_err;
                        }
-                        /* The algorithm employed here is simple: while we don't
+                        /* if op is RPAREN:
-                         * hit an open paren nor the bottom of the stack, pop
+                         *     while opstack is not empty:
-                         * tokens and apply them */
+                         *         pop prev_op
-                        while (stackptr != stack) {
+                         *         if prev_op is LPAREN (finished evaluating (EXPR)):
-                                operator prev_op = *--stackptr;
+                         *             goto N
+                         *         evaluate prev_op on top of numstack
+                         *     BUG (unpaired RPAREN)
+                         * else (op is not RPAREN):
+                         *     while opstack is not empty:
+                         *         pop prev_op
+                         *         if can't evaluate prev_op (it is lower precedence than op):
+                         *             push prev_op back
+                         *             goto C
+                         *         evaluate prev_op on top of numstack
+                         *     C:if op is "?": check result, set disable flag if needed
+                         * push op
+                         * N:loop to parse the rest of string
+                         */
+                        while (opstackptr != opstack) {
+                                operator prev_op = *--opstackptr;
                                if (op == TOK_RPAREN) {
-//bb_error_msg("op == TOK_RPAREN");
                                        if (prev_op == TOK_LPAREN) {
-//bb_error_msg("prev_op == TOK_LPAREN");
+                                                /* Erase var name: for example, (VAR) = 1 is not valid */
-//bb_error_msg("  %p %p numstackptr[-1].var:'%s'", numstack, numstackptr-1, numstackptr[-1].var);
+                                                numstackptr[-1].var_name = NULL;
-                                                if (numstackptr[-1].var) {
+                                                /* (EXPR) is a "value": next operator directly after
-                                                        /* Expression is (var), lookup now */
+                                                 * close paren should be considered binary
-                                                        errmsg = arith_lookup_val(math_state, &numstackptr[-1]);
+                                                 */
-                                                        if (errmsg)
+                                                lasttok = TOK_VALUE;
-                                                                goto err_with_custom_msg;
-                                                        /* Erase var name: (var) is just a number, for example, (var) = 1 is not valid */
-                                                        numstackptr[-1].var = NULL;
-                                                }
-                                                /* Any operator directly after a
-                                                 * close paren should consider itself binary */
-                                                lasttok = TOK_NUM;
                                                goto next;
                                        }
-//bb_error_msg("prev_op != TOK_LPAREN");
+                                        /* Not (y), but ...x~y). Fall through to evaluate x~y */
                                } else {
                                        operator prev_prec = PREC(prev_op);
-//bb_error_msg("op != TOK_RPAREN");
                                        fix_assignment_prec(prec);
                                        fix_assignment_prec(prev_prec);
                                        if (prev_prec < prec
                                         || (prev_prec == prec && is_right_associative(prec))
                                        ) {
-                                                stackptr++;
+                                                /* ...x~y@. push @ on opstack */
-                                                break;
+                                                opstackptr++; /* undo removal of ~ op */
+                                                goto check_cond;
                                        }
+                                        /* else: ...x~y@. Evaluate x~y, replace it on stack with result. Then repeat */
                                }
-//bb_error_msg("arith_apply(prev_op:%02x)", prev_op);
+                                dbg("arith_apply(prev_op:%02x, numstack:%d)", prev_op, (int)(numstackptr - numstack));
                                errmsg = arith_apply(math_state, prev_op, numstack, &numstackptr);
                                if (errmsg)
                                        goto err_with_custom_msg;
+dbg("    numstack:%d val:%lld '%s'", (int)(numstackptr - numstack), numstackptr[-1].val, numstackptr[-1].var_name);
+                                if (prev_op == TOK_CONDITIONAL_SEP) {
+                                        /* We just executed ":" */
+                                        /* Remove "?" from opstack too, not just ":" */
+                                        opstackptr--;
+                                        if (*opstackptr != TOK_CONDITIONAL) {
+                                                /* Example: $((1,2:3)) */
+                                                errmsg = "malformed ?: operator";
+                                                goto err_with_custom_msg;
+                                        }
+                                        /* Example: a=1?2:3,a. We just executed ":".
+                                         * Prevent assignment from being still disabled.
+                                         */
+                                        if (ternary_level == math_state->evaluation_disabled) {
+                                                math_state->evaluation_disabled = 0;
+                                                dbg("':' executed: evaluation_disabled=CLEAR");
+                                        }
+                                        ternary_level--;
+                                }
+                        } /* while (opstack not empty) */
+                        if (op == TOK_RPAREN) /* unpaired RPAREN? */
+                                goto syntax_err;
+ check_cond:
+                        if (op == TOK_CONDITIONAL) {
+                                /* We just now evaluated EXPR before "?".
+                                 * Should we disable evaluation now?
+                                 */
+                                ternary_level++;
+                                if (numstackptr[-1].val == 0 && !math_state->evaluation_disabled) {
+                                        math_state->evaluation_disabled = ternary_level;
+                                        dbg("'?' entered: evaluation_disabled=%u", math_state->evaluation_disabled);
+                                }
+                        }
+                } /* if */
+                /* else: LPAREN or UNARY: push it on opstack */
+                /* Push this operator to opstack */
+                dbg("(%d) op:%02x insert_op:%02x", (int)(opstackptr - opstack), op, insert_op);
+                *opstackptr++ = lasttok = op;
+ next:
+                if (insert_op != 0xff) {
+                        op = insert_op;
+                        insert_op = 0xff;
+                        dbg("inserting %02x", op);
+                        if (op == TOK_CONDITIONAL_SEP) {
+                                /* The next token is ":". Toggle "do not evaluate" state */
+                                if (!math_state->evaluation_disabled) {
+                                        math_state->evaluation_disabled = ternary_level;
+                                        dbg("':' entered: evaluation_disabled=%u", math_state->evaluation_disabled);
+                                } else if (ternary_level == math_state->evaluation_disabled) {
+                                        math_state->evaluation_disabled = 0;
+                                        dbg("':' entered: evaluation_disabled=CLEAR");
+                                } /* else: ternary_level > evaluation_disabled && evaluation_disabled != 0 */
+                                        /* We are in nested "?:" while in outer "?:" disabled branch */
+                                        /* do_nothing */
                        }
-                        if (op == TOK_RPAREN)
+                        goto tok_found1;
-                                goto err;
                }
-                /* Push this operator to the stack and remember it */
-//bb_error_msg("push op:%02x", op);
-                *stackptr++ = lasttok = op;
- next: ;
        } /* while (1) */
- err:
+ syntax_err:
        errmsg = "arithmetic syntax error";
 err_with_custom_msg:
-        numstack->val = -1;
- ret:
        math_state->errmsg = errmsg;
-        return numstack->val;
+        return -1;
 }
 arith_t FAST_FUNC
 arith(arith_state_t *math_state, const char *expr)
 {
+        math_state->evaluation_disabled = 0;
        math_state->errmsg = NULL;
        math_state->list_of_recursed_names = NULL;
        return evaluate_string(math_state, expr);
author	Ron Yorston <rmy@pobox.com>	2023-07-13 08:06:26 +0100
committer	Ron Yorston <rmy@pobox.com>	2023-07-13 08:06:26 +0100
commit	bd978d0256fd3a67de1a7dd54f1a37f9435be363 (patch)
tree	cb869384a533ac0d95fe787d75be6c050e1e7c1a /shell/math.c
parent	b2901ce8efa050da00e0f3a73f3be9bf9402deea (diff)
parent	d70256a5c719439cc6fab6a4571c1bb46178e4c7 (diff)
download	busybox-w32-bd978d0256fd3a67de1a7dd54f1a37f9435be363.tar.gz busybox-w32-bd978d0256fd3a67de1a7dd54f1a37f9435be363.tar.bz2 busybox-w32-bd978d0256fd3a67de1a7dd54f1a37f9435be363.zip