libbb/arith.c

   1 /* Copyright (c) 2001 Aaron Lehmann <aaronl@vitelus.com>
   2
   3    Permission is hereby granted, free of charge, to any person obtaining
   4    a copy of this software and associated documentation files (the
   5    "Software"), to deal in the Software without restriction, including
   6    without limitation the rights to use, copy, modify, merge, publish,
   7    distribute, sublicense, and/or sell copies of the Software, and to
   8    permit persons to whom the Software is furnished to do so, subject to
   9    the following conditions:
  10
  11    The above copyright notice and this permission notice shall be
  12    included in all copies or substantial portions of the Software.
  13
  14    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  17    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  18    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  19    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  20    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23 /* This is my infix parser/evaluator. It is optimized for size, intended
  24  * as a replacement for yacc-based parsers. However, it may well be faster
  25  * than a comparable parser writen in yacc. The supported operators are
  26  * listed in #defines below. Parens, order of operations, and error handling
  27  * are supported. This code is threadsafe. The exact expression format should
  28  * be that which POSIX specifies for shells. */
  29
  30 /* The code uses a simple two-stack algorithm. See
  31  * http://www.onthenet.com.au/~grahamis/int2008/week02/lect02.html
  32  * for a detailed explaination of the infix-to-postfix algorithm on which
  33  * this is based (this code differs in that it applies operators immediately
  34  * to the stack instead of adding them to a queue to end up with an
  35  * expression). */
  36
  37 /* To use the routine, call it with an expression string and error return
  38  * pointer */
  39
  40 /*
  41  * Aug 24, 2001              Manuel Novoa III
  42  *
  43  * Reduced the generated code size by about 30% (i386) and fixed several bugs.
  44  *
  45  * 1) In arith_apply():
  46  *    a) Cached values of *numptr and &(numptr[-1]).
  47  *    b) Removed redundant test for zero denominator.
  48  *
  49  * 2) In arith():
  50  *    a) Eliminated redundant code for processing operator tokens by moving
  51  *       to a table-based implementation.  Also folded handling of parens
  52  *       into the table.
  53  *    b) Combined all 3 loops which called arith_apply to reduce generated
  54  *       code size at the cost of speed.
  55  *
  56  * 3) The following expressions were treated as valid by the original code:
  57  *       1()  ,    0!  ,    1 ( *3 )   .
  58  *    These bugs have been fixed by internally enclosing the expression in
  59  *    parens and then checking that all binary ops and right parens are
  60  *    preceded by a valid expression (NUM_TOKEN).
  61  *
  62  * Note: It may be desireable to replace Aaron's test for whitespace with
  63  * ctype's isspace() if it is used by another busybox applet or if additional
  64  * whitespace chars should be considered.  Look below the "#include"s for a
  65  * precompiler test.
  66  */
  67
  68 /*
  69  * Aug 26, 2001              Manuel Novoa III
  70  *
  71  * Return 0 for null expressions.  Pointed out by vodz.
  72  *
  73  * Merge in Aaron's comments previously posted to the busybox list,
  74  * modified slightly to take account of my changes to the code.
  75  *
  76  * TODO: May want to allow access to variables in the arith code.
  77  *       This would:
  78  *       1) allow us to evaluate $A as 0 if A isn't set (although this
  79  *          would require changes to ash.c too).
  80  *       2) allow us to write expressions as $(( A + 2 )).
  81  *       This could be done using a callback function passed to the
  82  *       arith() function of by requiring such a function with fixed
  83  *       name as an extern.
  84  */
  85
  86 #include <stdlib.h>
  87 #include <string.h>
  88 #include <ctype.h>
  89 #include "libbb.h"
  90
  91 /*
  92  * Use "#if 1" below for Aaron's original test for whitespace.
  93  * Use "#if 0" for ctype's isspace().
  94  * */
  95 #if 1
  96 #undef isspace
  97 #define isspace(arithval) \
  98         (arithval == ' ' || arithval == '\n' || arithval == '\t')
  99 #endif
 100
 101 typedef char operator;
 102
 103 /* An operator's token id is a bit of a bitfield. The lower 5 bits are the
 104  * precedence, and high 3 are an ID unique accross operators of that
 105  * precedence. The ID portion is so that multiple operators can have the
 106  * same precedence, ensuring that the leftmost one is evaluated first.
 107  * Consider * and /. */
 108
 109 #define tok_decl(prec,id) (((id)<<5)|(prec))
 110 #define PREC(op) ((op)&0x1F)
 111
 112 #define TOK_LPAREN tok_decl(0,0)
 113
 114 #define TOK_OR tok_decl(1,0)
 115
 116 #define TOK_AND tok_decl(2,0)
 117
 118 #define TOK_BOR tok_decl(3,0)
 119
 120 #define TOK_BXOR tok_decl(4,0)
 121
 122 #define TOK_BAND tok_decl(5,0)
 123
 124 #define TOK_EQ tok_decl(6,0)
 125 #define TOK_NE tok_decl(6,1)
 126
 127 #define TOK_LT tok_decl(7,0)
 128 #define TOK_GT tok_decl(7,1)
 129 #define TOK_GE tok_decl(7,2)
 130 #define TOK_LE tok_decl(7,3)
 131
 132 #define TOK_LSHIFT tok_decl(8,0)
 133 #define TOK_RSHIFT tok_decl(8,1)
 134
 135 #define TOK_ADD tok_decl(9,0)
 136 #define TOK_SUB tok_decl(9,1)
 137
 138 #define TOK_MUL tok_decl(10,0)
 139 #define TOK_DIV tok_decl(10,1)
 140 #define TOK_REM tok_decl(10,2)
 141
 142 /* For now all unary operators have the same precedence, and that's used to
 143  * identify them as unary operators
 144  */
 145 #define UNARYPREC 14
 146 #define TOK_BNOT tok_decl(UNARYPREC,0)
 147 #define TOK_NOT tok_decl(UNARYPREC,1)
 148 #define TOK_UMINUS tok_decl(UNARYPREC,2)
 149 #define TOK_UPLUS tok_decl(UNARYPREC,3)
 150
 151 #define TOK_NUM tok_decl(15,0)
 152 #define TOK_RPAREN tok_decl(15,1)
 153 #define TOK_ERROR  tok_decl(15,2)       /* just a place-holder really */
 154
 155 #define ARITH_APPLY(op) arith_apply(op, numstack, &numstackptr)
 156 #define NUMPTR (*numstackptr)
 157
 158 /* "applying" a token means performing it on the top elements on the integer
 159  * stack. For a unary operator it will only change the top element, but a
 160  * binary operator will pop two arguments and push a result
 161  */
 162 static short arith_apply(operator    op, long *numstack, long **numstackptr)
 163 {
 164         long numptr_val;
 165         long *NUMPTR_M1;
 166
 167         /* There is no operator that can work without arguments */
 168         if (NUMPTR == numstack) {
 169                 goto err;
 170         }
 171
 172         NUMPTR_M1 = NUMPTR - 1;
 173         if (op == TOK_UMINUS) {
 174                 *NUMPTR_M1 *= -1;
 175         } else if (op == TOK_NOT) {
 176                 *NUMPTR_M1 = !(*NUMPTR_M1);
 177         } else if (op == TOK_BNOT) {
 178                 *NUMPTR_M1 = ~(*NUMPTR_M1);
 179         } else if (op != TOK_UPLUS) {
 180                 /* Binary operators */
 181
 182                 /* ... and binary operators need two arguments */
 183                 if (NUMPTR_M1 == numstack) {
 184                         goto err;
 185                 }
 186                 /* ... and they pop one */
 187                 numptr_val = *--NUMPTR;
 188
 189                 NUMPTR_M1 = NUMPTR - 1;
 190
 191                 if (op == TOK_BOR) {
 192                         *NUMPTR_M1 |= numptr_val;
 193                 } else if (op == TOK_OR) {
 194                         *NUMPTR_M1 = numptr_val || *NUMPTR_M1;
 195                 } else if (op == TOK_BAND) {
 196                         *NUMPTR_M1 &= numptr_val;
 197                 } else if (op == TOK_AND) {
 198                         *NUMPTR_M1 = *NUMPTR_M1 && numptr_val;
 199                 } else if (op == TOK_EQ) {
 200                         *NUMPTR_M1 = (*NUMPTR_M1 == numptr_val);
 201                 } else if (op == TOK_NE) {
 202                         *NUMPTR_M1 = (*NUMPTR_M1 != numptr_val);
 203                 } else if (op == TOK_GE) {
 204                         *NUMPTR_M1 = (*NUMPTR_M1 >= numptr_val);
 205                 } else if (op == TOK_RSHIFT) {
 206                         *NUMPTR_M1 >>= numptr_val;
 207                 } else if (op == TOK_LSHIFT) {
 208                         *NUMPTR_M1 <<= numptr_val;
 209                 } else if (op == TOK_GT) {
 210                         *NUMPTR_M1 = (*NUMPTR_M1 > numptr_val);
 211                 } else if (op == TOK_LT) {
 212                         *NUMPTR_M1 = (*NUMPTR_M1 < numptr_val);
 213                 } else if (op == TOK_LE) {
 214                         *NUMPTR_M1 = (*NUMPTR_M1 <= numptr_val);
 215                 } else if (op == TOK_MUL) {
 216                         *NUMPTR_M1 *= numptr_val;
 217                 } else if (op == TOK_ADD) {
 218                         *NUMPTR_M1 += numptr_val;
 219                 } else if (op == TOK_SUB) {
 220                         *NUMPTR_M1 -= numptr_val;
 221                 }
 222                 /* zero divisor check */
 223                 else if (numptr_val == 0) {
 224                         return -2;
 225                 } else if (op == TOK_DIV) {
 226                         *NUMPTR_M1 /= numptr_val;
 227                 } else if (op == TOK_REM) {
 228                         *NUMPTR_M1 %= numptr_val;
 229                 }
 230                 /* WARNING!!!  WARNING!!!  WARNING!!! */
 231                 /* Any new operators should be added BEFORE the zero divisor check! */
 232         }
 233         return 0;
 234   err:
 235         return (-1);
 236 }
 237
 238 static const char endexpression[] = ")";
 239
 240 /* + and - (in that order) must be last */
 241 static const char op_char[] = "!<>=|&*/%~()+-";
 242 static const char op_token[] = {
 243         /* paired with equal */
 244         TOK_NE, TOK_LE, TOK_GE,
 245         /* paired with self -- note: ! is special-cased below */
 246         TOK_ERROR, TOK_LSHIFT, TOK_RSHIFT, TOK_EQ, TOK_OR, TOK_AND,
 247         /* singles */
 248         TOK_NOT, TOK_LT, TOK_GT, TOK_ERROR, TOK_BOR, TOK_BAND,
 249         TOK_MUL, TOK_DIV, TOK_REM, TOK_BNOT, TOK_LPAREN, TOK_RPAREN,
 250         TOK_ADD, TOK_SUB, TOK_UPLUS, TOK_UMINUS
 251 };
 252
 253 #define NUM_PAIR_EQUAL  3
 254 #define NUM_PAIR_SAME   6
 255
 256 extern long arith(const char *expr, int *errcode)
 257 {
 258         register char arithval; /* Current character under analysis */
 259         operator    lasttok, op;
 260         unsigned char prec;
 261         const char *p = endexpression;
 262         size_t datasizes = strlen(expr);
 263
 264         /* Stack of integers */
 265         /* The proof that there can be no more than strlen(startbuf)/2+1 integers
 266          * in any given correct or incorrect expression is left as an excersize to
 267          * the reader. */
 268         long *numstack = alloca(((datasizes + 1) / 2) * sizeof(long));
 269         long *numstackptr = numstack;
 270
 271         /* Stack of operator tokens */
 272         operator * stack = alloca((datasizes + 1) * sizeof(operator));
 273         operator * stackptr = stack;
 274
 275         /* start off with a left paren */
 276         *stackptr++ = lasttok = TOK_LPAREN;
 277
 278   loop:
 279         if ((arithval = *expr) == 0) {
 280                 /* Null expression. */
 281                 if (p == endexpression) {
 282                         return (*errcode = 0);
 283                 }
 284
 285                 /* This is only reached after all tokens have been extracted from the
 286                  * input stream. If there are still tokens on the operator stack, they
 287                  * are to be applied in order. At the end, there should be a final
 288                  * result on the integer stack */
 289
 290                 if (expr != endexpression + 1) {        /* If we haven't done so already, */
 291                         expr = endexpression;   /* append a closing right paren */
 292                         goto loop;      /* and let the loop process it. */
 293                 }
 294                 /* At this point, we're done with the expression. */
 295                 if (numstackptr != numstack + 1) {      /* ... but if there isn't, it's bad */
 296                   err:
 297                         return (*errcode = -1);
 298                         /* NOTREACHED */
 299                 }
 300                 return *numstack;
 301         } else {
 302                 /* Continue processing the expression.  */
 303                 /* Skip whitespace */
 304                 if (isspace(arithval)) {
 305                         goto prologue;
 306                 }
 307                 /* isdigit ? */
 308                 if ((unsigned) arithval - '0' <= 9) {
 309                         *numstackptr++ = strtol(expr, (char **) &expr, 10);
 310                         lasttok = TOK_NUM;
 311                         goto loop;
 312                 }
 313 #if 1
 314                 if ((p = strchr(op_char, arithval)) == NULL) {
 315                         goto err;
 316                 }
 317 #else
 318                 for (p = op_char; *p != arithval; p++) {
 319                         if (!*p) {
 320                                 goto err;
 321                         }
 322                 }
 323 #endif
 324                 p = op_token + (int) (p - op_char);
 325                 ++expr;
 326                 if ((p >= op_token + NUM_PAIR_EQUAL) || (*expr != '=')) {
 327                         p += NUM_PAIR_EQUAL;
 328                         if ((p >= op_token + NUM_PAIR_SAME + NUM_PAIR_EQUAL)
 329                                 || (*expr != arithval) || (arithval == '!')) {
 330                                 --expr;
 331                                 /* single = */
 332                                 if (arithval == '=') {
 333                                         goto err;
 334                                 }
 335                                 p += NUM_PAIR_SAME;
 336                                 /* Plus and minus are binary (not unary) _only_ if the last
 337                                  * token was as number, or a right paren (which pretends to be
 338                                  * a number, since it evaluates to one). Think about it.
 339                                  * It makes sense. */
 340                                 if ((lasttok != TOK_NUM)
 341                                         && (p >=
 342                                                 (op_token + NUM_PAIR_SAME + NUM_PAIR_EQUAL +
 343                                                  sizeof(op_char) - 2))) {
 344                                         /* Unary plus or minus */
 345                                         p += 2;
 346                                 }
 347                         }
 348                 }
 349                 op = *p;
 350
 351                 /* We don't want a unary operator to cause recursive descent on the
 352                  * stack, because there can be many in a row and it could cause an
 353                  * operator to be evaluated before its argument is pushed onto the
 354                  * integer stack. */
 355                 /* But for binary operators, "apply" everything on the operator
 356                  * stack until we find an operator with a lesser priority than the
 357                  * one we have just extracted. */
 358                 /* Left paren is given the lowest priority so it will never be
 359                  * "applied" in this way */
 360                 prec = PREC(op);
 361
 362                 /* not left paren or unary */
 363                 if ((prec > 0) && (prec != UNARYPREC)) {
 364                         /* binary op must be preceded by a num */
 365                         if (lasttok != TOK_NUM) {
 366                                 goto err;
 367                         }
 368                         while (stackptr != stack) {
 369                                 if (op == TOK_RPAREN) {
 370                                         /* The algorithm employed here is simple: while we don't
 371                                          * hit an open paren nor the bottom of the stack, pop
 372                                          * tokens and apply them */
 373                                         if (stackptr[-1] == TOK_LPAREN) {
 374                                                 --stackptr;
 375
 376                                                 /* Any operator directly after a */
 377                                                 lasttok = TOK_NUM;
 378
 379                                                 /* close paren should consider itself binary */
 380                                                 goto prologue;
 381                                         }
 382                                 } else if (PREC(stackptr[-1]) < prec) {
 383                                         break;
 384                                 }
 385                                 *errcode = ARITH_APPLY(*--stackptr);
 386                                 if (*errcode) {
 387                                         return *errcode;
 388                                 }
 389                         }
 390                         if (op == TOK_RPAREN) {
 391                                 goto err;
 392                         }
 393                 }
 394
 395                 /* Push this operator to the stack and remember it. */
 396                 *stackptr++ = lasttok = op;
 397
 398           prologue:
 399                 ++expr;
 400                 goto loop;
 401         }
 402 }