networking/tls_pstm.c

   1 /*
   2  * Copyright (C) 2017 Denys Vlasenko
   3  *
   4  * Licensed under GPLv2, see file LICENSE in this source tree.
   5  */
   6 #include "tls.h"
   7
   8 /**
   9  *      @file    pstm.c
  10  *      @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
  11  *
  12  *      Multiprecision number implementation.
  13  */
  14 /*
  15  *      Copyright (c) 2013-2015 INSIDE Secure Corporation
  16  *      Copyright (c) PeerSec Networks, 2002-2011
  17  *      All Rights Reserved
  18  *
  19  *      The latest version of this code is available at http://www.matrixssl.org
  20  *
  21  *      This software is open source; you can redistribute it and/or modify
  22  *      it under the terms of the GNU General Public License as published by
  23  *      the Free Software Foundation; either version 2 of the License, or
  24  *      (at your option) any later version.
  25  *
  26  *      This General Public License does NOT permit incorporating this software
  27  *      into proprietary programs.  If you are unable to comply with the GPL, a
  28  *      commercial license for this software may be purchased from INSIDE at
  29  *      http://www.insidesecure.com/eng/Company/Locations
  30  *
  31  *      This program is distributed in WITHOUT ANY WARRANTY; without even the
  32  *      implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  33  *      See the GNU General Public License for more details.
  34  *
  35  *      You should have received a copy of the GNU General Public License
  36  *      along with this program; if not, write to the Free Software
  37  *      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  38  *      http://www.gnu.org/copyleft/gpl.html
  39  */
  40 /******************************************************************************/
  41
  42 ///bbox
  43 //#include "../cryptoApi.h"
  44 #ifndef DISABLE_PSTM
  45
  46 static int32 pstm_mul_2d(pstm_int *a, int16 b, pstm_int *c);
  47
  48 /******************************************************************************/
  49 /*
  50         init an pstm_int for a given size
  51  */
  52 int32 pstm_init_size(psPool_t *pool, pstm_int * a, uint32 size)
  53 {
  54 //      uint16          x;
  55
  56 /*
  57         alloc mem
  58  */
  59         a->dp = xzalloc(sizeof (pstm_digit) * size);
  60         a->pool = pool;
  61         a->used  = 0;
  62         a->alloc = (int16)size;
  63         a->sign  = PSTM_ZPOS;
  64 /*
  65         zero the digits
  66  */
  67 ///bbox
  68 //      for (x = 0; x < size; x++) {
  69 //              a->dp[x] = 0;
  70 //      }
  71         return PSTM_OKAY;
  72 }
  73
  74 /******************************************************************************/
  75 /*
  76         Init a new pstm_int.
  77 */
  78 int32 pstm_init(psPool_t *pool, pstm_int * a)
  79 {
  80 //      int32           i;
  81 /*
  82         allocate memory required and clear it
  83  */
  84         a->dp = xzalloc(sizeof (pstm_digit) * PSTM_DEFAULT_INIT);
  85 /*
  86         set the digits to zero
  87  */
  88 ///bbox
  89 //      for (i = 0; i < PSTM_DEFAULT_INIT; i++) {
  90 //              a->dp[i] = 0;
  91 //      }
  92 /*
  93         set the used to zero, allocated digits to the default precision and sign
  94         to positive
  95  */
  96         a->pool = pool;
  97         a->used  = 0;
  98         a->alloc = PSTM_DEFAULT_INIT;
  99         a->sign  = PSTM_ZPOS;
 100
 101         return PSTM_OKAY;
 102 }
 103
 104 /******************************************************************************/
 105 /*
 106         Grow as required
 107  */
 108 int32 pstm_grow(pstm_int * a, int16 size)
 109 {
 110         int16                   i;
 111         pstm_digit              *tmp;
 112
 113 /*
 114         If the alloc size is smaller alloc more ram.
 115  */
 116         if (a->alloc < size) {
 117 /*
 118                 Reallocate the array a->dp
 119
 120                 We store the return in a temporary variable in case the operation
 121                 failed we don't want to overwrite the dp member of a.
 122 */
 123                 tmp = xrealloc(a->dp, sizeof (pstm_digit) * size);
 124 /*
 125                 reallocation succeeded so set a->dp
 126  */
 127                 a->dp = tmp;
 128 /*
 129                 zero excess digits
 130  */
 131                 i                       = a->alloc;
 132                 a->alloc        = size;
 133                 for (; i < a->alloc; i++) {
 134                         a->dp[i] = 0;
 135                 }
 136         }
 137         return PSTM_OKAY;
 138 }
 139
 140 /******************************************************************************/
 141 /*
 142         copy, b = a (b must be pre-allocated)
 143  */
 144 int32 pstm_copy(pstm_int * a, pstm_int * b)
 145 {
 146         int32   res, n;
 147
 148 /*
 149         If dst == src do nothing
 150  */
 151         if (a == b) {
 152                 return PSTM_OKAY;
 153         }
 154 /*
 155         Grow dest
 156  */
 157         if (b->alloc < a->used) {
 158                 if ((res = pstm_grow (b, a->used)) != PSTM_OKAY) {
 159                         return res;
 160                 }
 161         }
 162 /*
 163         Zero b and copy the parameters over
 164  */
 165         {
 166                 register pstm_digit *tmpa, *tmpb;
 167
 168                 /* pointer aliases */
 169                 /* source */
 170                 tmpa = a->dp;
 171
 172                 /* destination */
 173                 tmpb = b->dp;
 174
 175                 /* copy all the digits */
 176                 for (n = 0; n < a->used; n++) {
 177                         *tmpb++ = *tmpa++;
 178                 }
 179
 180                 /* clear high digits */
 181                 for (; n < b->used; n++) {
 182                         *tmpb++ = 0;
 183                 }
 184         }
 185 /*
 186         copy used count and sign
 187  */
 188         b->used = a->used;
 189         b->sign = a->sign;
 190         return PSTM_OKAY;
 191 }
 192
 193 /******************************************************************************/
 194 /*
 195         Trim unused digits
 196
 197         This is used to ensure that leading zero digits are trimed and the
 198         leading "used" digit will be non-zero. Typically very fast.  Also fixes
 199         the sign if there are no more leading digits
 200 */
 201 void pstm_clamp(pstm_int * a)
 202 {
 203 /*      decrease used while the most significant digit is zero. */
 204         while (a->used > 0 && a->dp[a->used - 1] == 0) {
 205                 --(a->used);
 206         }
 207 /*      reset the sign flag if used == 0 */
 208         if (a->used == 0) {
 209                 a->sign = PSTM_ZPOS;
 210         }
 211 }
 212
 213 /******************************************************************************/
 214 /*
 215         clear one (frees).
 216  */
 217 void pstm_clear(pstm_int * a)
 218 {
 219         int32           i;
 220 /*
 221         only do anything if a hasn't been freed previously
 222  */
 223         if (a != NULL && a->dp != NULL) {
 224 /*
 225                 first zero the digits
 226  */
 227                 for (i = 0; i < a->used; i++) {
 228                         a->dp[i] = 0;
 229                 }
 230
 231                 psFree (a->dp, a->pool);
 232 /*
 233                 reset members to make debugging easier
 234  */
 235                 a->dp           = NULL;
 236                 a->alloc        = a->used = 0;
 237                 a->sign         = PSTM_ZPOS;
 238         }
 239 }
 240
 241 /******************************************************************************/
 242 /*
 243         clear many (frees).
 244  */
 245 void pstm_clear_multi(pstm_int *mp0, pstm_int *mp1, pstm_int *mp2,
 246                                         pstm_int *mp3, pstm_int *mp4, pstm_int *mp5,
 247                                         pstm_int *mp6, pstm_int *mp7)
 248 {
 249         int32           n;              /* Number of ok inits */
 250
 251         pstm_int        *tempArray[9];
 252
 253         tempArray[0] = mp0;
 254         tempArray[1] = mp1;
 255         tempArray[2] = mp2;
 256         tempArray[3] = mp3;
 257         tempArray[4] = mp4;
 258         tempArray[5] = mp5;
 259         tempArray[6] = mp6;
 260         tempArray[7] = mp7;
 261         tempArray[8] = NULL;
 262
 263         for (n = 0; tempArray[n] != NULL; n++) {
 264                 if ((tempArray[n] != NULL) && (tempArray[n]->dp != NULL)) {
 265                         pstm_clear(tempArray[n]);
 266                 }
 267         }
 268 }
 269
 270 /******************************************************************************/
 271 /*
 272         Set to zero.
 273  */
 274 void pstm_zero(pstm_int * a)
 275 {
 276         int32           n;
 277         pstm_digit      *tmp;
 278
 279         a->sign = PSTM_ZPOS;
 280         a->used = 0;
 281
 282         tmp = a->dp;
 283         for (n = 0; n < a->alloc; n++) {
 284                 *tmp++ = 0;
 285         }
 286 }
 287
 288
 289 /******************************************************************************/
 290 /*
 291         Compare maginitude of two ints (unsigned).
 292  */
 293 int32 pstm_cmp_mag(pstm_int * a, pstm_int * b)
 294 {
 295         int16           n;
 296         pstm_digit      *tmpa, *tmpb;
 297
 298 /*
 299         compare based on # of non-zero digits
 300  */
 301         if (a->used > b->used) {
 302                 return PSTM_GT;
 303         }
 304
 305         if (a->used < b->used) {
 306                 return PSTM_LT;
 307         }
 308
 309         /* alias for a */
 310         tmpa = a->dp + (a->used - 1);
 311
 312         /* alias for b */
 313         tmpb = b->dp + (a->used - 1);
 314
 315 /*
 316         compare based on digits
 317  */
 318         for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
 319                 if (*tmpa > *tmpb) {
 320                         return PSTM_GT;
 321                 }
 322                 if (*tmpa < *tmpb) {
 323                         return PSTM_LT;
 324                 }
 325         }
 326         return PSTM_EQ;
 327 }
 328
 329 /******************************************************************************/
 330 /*
 331         Compare two ints (signed)
 332  */
 333 int32 pstm_cmp(pstm_int * a, pstm_int * b)
 334 {
 335 /*
 336         compare based on sign
 337  */
 338         if (a->sign != b->sign) {
 339                 if (a->sign == PSTM_NEG) {
 340                         return PSTM_LT;
 341                 } else {
 342                         return PSTM_GT;
 343                 }
 344         }
 345 /*
 346         compare digits
 347  */
 348         if (a->sign == PSTM_NEG) {
 349                 /* if negative compare opposite direction */
 350                 return pstm_cmp_mag(b, a);
 351         } else {
 352                 return pstm_cmp_mag(a, b);
 353         }
 354 }
 355
 356 /******************************************************************************/
 357 /*
 358         pstm_ints can be initialized more precisely when they will populated
 359         using pstm_read_unsigned_bin since the length of the byte stream is known
 360 */
 361 int32 pstm_init_for_read_unsigned_bin(psPool_t *pool, pstm_int *a, uint32 len)
 362 {
 363         int32 size;
 364 /*
 365         Need to set this based on how many words max it will take to store the bin.
 366         The magic + 2:
 367                 1 to round up for the remainder of this integer math
 368                 1 for the initial carry of '1' bits that fall between DIGIT_BIT and 8
 369 */
 370         size = (((len / sizeof(pstm_digit)) * (sizeof(pstm_digit) * CHAR_BIT))
 371                 / DIGIT_BIT) + 2;
 372         return pstm_init_size(pool, a, size);
 373 }
 374
 375
 376 /******************************************************************************/
 377 /*
 378         Reads a unsigned char array into pstm_int format.  User should have
 379         called pstm_init_for_read_unsigned_bin first.  There is some grow logic
 380         here if the default pstm_init was used but we don't really want to hit it.
 381 */
 382 int32 pstm_read_unsigned_bin(pstm_int *a, unsigned char *b, int32 c)
 383 {
 384         /* zero the int */
 385         pstm_zero (a);
 386
 387 /*
 388         If we know the endianness of this architecture, and we're using
 389         32-bit pstm_digits, we can optimize this
 390 */
 391 #if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(PSTM_64BIT)
 392   /* But not for both simultaneously */
 393 #if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG)
 394 #error Both ENDIAN_LITTLE and ENDIAN_BIG defined.
 395 #endif
 396         {
 397                 unsigned char *pd;
 398                 if ((unsigned)c > (PSTM_MAX_SIZE * sizeof(pstm_digit))) {
 399                         uint32 excess = c - (PSTM_MAX_SIZE * sizeof(pstm_digit));
 400                         c -= excess;
 401                         b += excess;
 402                 }
 403                 a->used = (int16)((c + sizeof(pstm_digit) - 1)/sizeof(pstm_digit));
 404                 if (a->alloc < a->used) {
 405                         if (pstm_grow(a, a->used) != PSTM_OKAY) {
 406                                 return PSTM_MEM;
 407                         }
 408                 }
 409                 pd = (unsigned char *)a->dp;
 410                 /* read the bytes in */
 411 #ifdef ENDIAN_BIG
 412                 {
 413                         /* Use Duff's device to unroll the loop. */
 414                         int32 idx = (c - 1) & ~3;
 415                         switch (c % 4) {
 416                                 case 0: do { pd[idx+0] = *b++;
 417                                         case 3:      pd[idx+1] = *b++;
 418                                         case 2:      pd[idx+2] = *b++;
 419                                         case 1:      pd[idx+3] = *b++;
 420                                         idx -= 4;
 421                                 } while ((c -= 4) > 0);
 422                         }
 423                 }
 424 #else
 425                 for (c -= 1; c >= 0; c -= 1) {
 426                         pd[c] = *b++;
 427                 }
 428 #endif
 429         }
 430 #else
 431         /* Big enough based on the len? */
 432         a->used = (((c / sizeof(pstm_digit)) * (sizeof(pstm_digit) * CHAR_BIT))
 433                 / DIGIT_BIT) + 2;
 434
 435         if (a->alloc < a->used) {
 436                 if (pstm_grow(a, a->used) != PSTM_OKAY) {
 437                         return PSTM_MEM;
 438                 }
 439         }
 440         /* read the bytes in */
 441         for (; c > 0; c--) {
 442                 if (pstm_mul_2d (a, 8, a) != PSTM_OKAY) {
 443                         return PS_MEM_FAIL;
 444                 }
 445                 a->dp[0] |= *b++;
 446                 a->used += 1;
 447         }
 448 #endif
 449
 450         pstm_clamp (a);
 451         return PS_SUCCESS;
 452 }
 453
 454 /******************************************************************************/
 455 /*
 456 */
 457 int16 pstm_count_bits (pstm_int * a)
 458 {
 459         int16     r;
 460         pstm_digit q;
 461
 462         if (a->used == 0) {
 463                 return 0;
 464         }
 465
 466         /* get number of digits and add that */
 467         r = (a->used - 1) * DIGIT_BIT;
 468
 469         /* take the last digit and count the bits in it */
 470         q = a->dp[a->used - 1];
 471         while (q > ((pstm_digit) 0)) {
 472                 ++r;
 473                 q >>= ((pstm_digit) 1);
 474         }
 475         return r;
 476 }
 477
 478 /******************************************************************************/
 479 int32 pstm_unsigned_bin_size(pstm_int *a)
 480 {
 481         int32     size = pstm_count_bits (a);
 482         return (size / 8 + ((size & 7) != 0 ? 1 : 0));
 483 }
 484
 485 /******************************************************************************/
 486 void pstm_set(pstm_int *a, pstm_digit b)
 487 {
 488    pstm_zero(a);
 489    a->dp[0] = b;
 490    a->used  = a->dp[0] ? 1 : 0;
 491 }
 492
 493 /******************************************************************************/
 494 /*
 495         Right shift
 496 */
 497 void pstm_rshd(pstm_int *a, int16 x)
 498 {
 499         int16 y;
 500
 501         /* too many digits just zero and return */
 502         if (x >= a->used) {
 503                 pstm_zero(a);
 504                 return;
 505         }
 506
 507         /* shift */
 508         for (y = 0; y < a->used - x; y++) {
 509                 a->dp[y] = a->dp[y+x];
 510         }
 511
 512         /* zero rest */
 513         for (; y < a->used; y++) {
 514                 a->dp[y] = 0;
 515         }
 516
 517         /* decrement count */
 518         a->used -= x;
 519         pstm_clamp(a);
 520 }
 521
 522 /******************************************************************************/
 523 /*
 524         Shift left a certain amount of digits.
 525  */
 526 int32 pstm_lshd(pstm_int * a, int16 b)
 527 {
 528         int16   x;
 529         int32   res;
 530
 531 /*
 532         If its less than zero return.
 533  */
 534         if (b <= 0) {
 535                 return PSTM_OKAY;
 536         }
 537 /*
 538         Grow to fit the new digits.
 539  */
 540         if (a->alloc < a->used + b) {
 541                 if ((res = pstm_grow (a, a->used + b)) != PSTM_OKAY) {
 542                         return res;
 543                 }
 544         }
 545
 546         {
 547                 register pstm_digit *top, *bottom;
 548 /*
 549                 Increment the used by the shift amount then copy upwards.
 550  */
 551                 a->used += b;
 552
 553                 /* top */
 554                 top = a->dp + a->used - 1;
 555
 556                 /* base */
 557                 bottom = a->dp + a->used - 1 - b;
 558 /*
 559                 This is implemented using a sliding window except the window goes the
 560                 other way around.  Copying from the bottom to the top.
 561  */
 562                 for (x = a->used - 1; x >= b; x--) {
 563                         *top-- = *bottom--;
 564                 }
 565
 566                 /* zero the lower digits */
 567                 top = a->dp;
 568                 for (x = 0; x < b; x++) {
 569                         *top++ = 0;
 570                 }
 571         }
 572         return PSTM_OKAY;
 573 }
 574
 575 /******************************************************************************/
 576 /*
 577         computes a = 2**b
 578 */
 579 int32 pstm_2expt(pstm_int *a, int16 b)
 580 {
 581         int16     z;
 582
 583    /* zero a as per default */
 584         pstm_zero (a);
 585
 586         if (b < 0) {
 587                 return PSTM_OKAY;
 588         }
 589
 590         z = b / DIGIT_BIT;
 591         if (z >= PSTM_MAX_SIZE) {
 592                 return PS_LIMIT_FAIL;
 593         }
 594
 595         /* set the used count of where the bit will go */
 596         a->used = z + 1;
 597
 598         if (a->used > a->alloc) {
 599                 if (pstm_grow(a, a->used) != PSTM_OKAY) {
 600                         return PS_MEM_FAIL;
 601                 }
 602         }
 603
 604         /* put the single bit in its place */
 605         a->dp[z] = ((pstm_digit)1) << (b % DIGIT_BIT);
 606         return PSTM_OKAY;
 607 }
 608
 609 /******************************************************************************/
 610 /*
 611
 612 */
 613 int32 pstm_mul_2(pstm_int * a, pstm_int * b)
 614 {
 615         int32   res;
 616         int16   x, oldused;
 617
 618 /*
 619         grow to accomodate result
 620  */
 621         if (b->alloc < a->used + 1) {
 622                 if ((res = pstm_grow (b, a->used + 1)) != PSTM_OKAY) {
 623                         return res;
 624                 }
 625         }
 626         oldused = b->used;
 627         b->used = a->used;
 628
 629         {
 630                 register pstm_digit r, rr, *tmpa, *tmpb;
 631
 632                 /* alias for source */
 633                 tmpa = a->dp;
 634
 635                 /* alias for dest */
 636                 tmpb = b->dp;
 637
 638                 /* carry */
 639                 r = 0;
 640                 for (x = 0; x < a->used; x++) {
 641 /*
 642                         get what will be the *next* carry bit from the
 643                         MSB of the current digit
 644 */
 645                         rr = *tmpa >> ((pstm_digit)(DIGIT_BIT - 1));
 646 /*
 647                         now shift up this digit, add in the carry [from the previous]
 648 */
 649                         *tmpb++ = ((*tmpa++ << ((pstm_digit)1)) | r);
 650 /*
 651                         copy the carry that would be from the source
 652                         digit into the next iteration
 653 */
 654                         r = rr;
 655                 }
 656
 657                 /* new leading digit? */
 658                 if (r != 0 && b->used != (PSTM_MAX_SIZE-1)) {
 659                         /* add a MSB which is always 1 at this point */
 660                         *tmpb = 1;
 661                         ++(b->used);
 662                 }
 663 /*
 664                 now zero any excess digits on the destination that we didn't write to
 665 */
 666                 tmpb = b->dp + b->used;
 667                 for (x = b->used; x < oldused; x++) {
 668                         *tmpb++ = 0;
 669                 }
 670         }
 671         b->sign = a->sign;
 672         return PSTM_OKAY;
 673 }
 674
 675 /******************************************************************************/
 676 /*
 677         unsigned subtraction ||a|| >= ||b|| ALWAYS!
 678 */
 679 int32 s_pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c)
 680 {
 681         int16           oldbused, oldused;
 682         int32           x;
 683         pstm_word       t;
 684
 685         if (b->used > a->used) {
 686                 return PS_LIMIT_FAIL;
 687         }
 688         if (c->alloc < a->used) {
 689                 if ((x = pstm_grow (c, a->used)) != PSTM_OKAY) {
 690                         return x;
 691                 }
 692         }
 693         oldused  = c->used;
 694         oldbused = b->used;
 695         c->used  = a->used;
 696         t = 0;
 697
 698         for (x = 0; x < oldbused; x++) {
 699                 t = ((pstm_word)a->dp[x]) - (((pstm_word)b->dp[x]) + t);
 700                 c->dp[x] = (pstm_digit)t;
 701                 t = (t >> DIGIT_BIT)&1;
 702         }
 703         for (; x < a->used; x++) {
 704                 t = ((pstm_word)a->dp[x]) - t;
 705                 c->dp[x] = (pstm_digit)t;
 706                 t = (t >> DIGIT_BIT);
 707         }
 708         for (; x < oldused; x++) {
 709                 c->dp[x] = 0;
 710         }
 711         pstm_clamp(c);
 712         return PSTM_OKAY;
 713 }
 714
 715 /******************************************************************************/
 716 /*
 717         unsigned addition
 718 */
 719 static int32 s_pstm_add(pstm_int *a, pstm_int *b, pstm_int *c)
 720 {
 721         int16                           x, y, oldused;
 722         register pstm_word      t, adp, bdp;
 723
 724         y = a->used;
 725         if (b->used > y) {
 726                 y = b->used;
 727         }
 728         oldused = c->used;
 729         c->used = y;
 730
 731         if (c->used > c->alloc) {
 732                 if (pstm_grow(c, c->used) != PSTM_OKAY) {
 733                         return PS_MEM_FAIL;
 734                 }
 735         }
 736
 737         t = 0;
 738         for (x = 0; x < y; x++) {
 739                 if (a->used < x) {
 740                         adp = 0;
 741                 } else {
 742                         adp = (pstm_word)a->dp[x];
 743                 }
 744                 if (b->used < x) {
 745                         bdp = 0;
 746                 } else {
 747                         bdp = (pstm_word)b->dp[x];
 748                 }
 749                 t         += (adp) + (bdp);
 750                 c->dp[x]   = (pstm_digit)t;
 751                 t        >>= DIGIT_BIT;
 752         }
 753         if (t != 0 && x < PSTM_MAX_SIZE) {
 754                 if (c->used == c->alloc) {
 755                         if (pstm_grow(c, c->alloc + 1) != PSTM_OKAY) {
 756                                 return PS_MEM_FAIL;
 757                         }
 758                 }
 759                 c->dp[c->used++] = (pstm_digit)t;
 760                 ++x;
 761         }
 762
 763         c->used = x;
 764         for (; x < oldused; x++) {
 765                 c->dp[x] = 0;
 766         }
 767         pstm_clamp(c);
 768         return PSTM_OKAY;
 769 }
 770
 771
 772 /******************************************************************************/
 773 /*
 774
 775 */
 776 int32 pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c)
 777 {
 778         int32     res;
 779         int16     sa, sb;
 780
 781         sa = a->sign;
 782         sb = b->sign;
 783
 784         if (sa != sb) {
 785 /*
 786                 subtract a negative from a positive, OR a positive from a negative.
 787                 For both, ADD their magnitudes, and use the sign of the first number.
 788  */
 789                 c->sign = sa;
 790                 if ((res = s_pstm_add (a, b, c)) != PSTM_OKAY) {
 791                         return res;
 792                 }
 793         } else {
 794 /*
 795                 subtract a positive from a positive, OR a negative from a negative.
 796                 First, take the difference between their magnitudes, then...
 797  */
 798                 if (pstm_cmp_mag (a, b) != PSTM_LT) {
 799                         /* Copy the sign from the first */
 800                         c->sign = sa;
 801                         /* The first has a larger or equal magnitude */
 802                         if ((res = s_pstm_sub (a, b, c)) != PSTM_OKAY) {
 803                                 return res;
 804                         }
 805                 } else {
 806                         /* The result has the _opposite_ sign from the first number. */
 807                         c->sign = (sa == PSTM_ZPOS) ? PSTM_NEG : PSTM_ZPOS;
 808                         /* The second has a larger magnitude */
 809                         if ((res = s_pstm_sub (b, a, c)) != PSTM_OKAY) {
 810                                 return res;
 811                         }
 812                 }
 813         }
 814         return PS_SUCCESS;
 815 }
 816
 817 /******************************************************************************/
 818 /*
 819         c = a - b
 820 */
 821 int32 pstm_sub_d(psPool_t *pool, pstm_int *a, pstm_digit b, pstm_int *c)
 822 {
 823         pstm_int        tmp;
 824         int32           res;
 825
 826         if (pstm_init_size(pool, &tmp, sizeof(pstm_digit)) != PSTM_OKAY) {
 827                 return PS_MEM_FAIL;
 828         }
 829         pstm_set(&tmp, b);
 830         res = pstm_sub(a, &tmp, c);
 831         pstm_clear(&tmp);
 832         return res;
 833 }
 834
 835 /******************************************************************************/
 836 /*
 837         setups the montgomery reduction
 838 */
 839 int32 pstm_montgomery_setup(pstm_int *a, pstm_digit *rho)
 840 {
 841         pstm_digit x, b;
 842
 843 /*
 844         fast inversion mod 2**k
 845         Based on the fact that
 846         XA = 1 (mod 2**n)       =>  (X(2-XA)) A = 1 (mod 2**2n)
 847                                                 =>  2*X*A - X*X*A*A = 1
 848                                                 =>  2*(1) - (1)     = 1
 849  */
 850         b = a->dp[0];
 851
 852         if ((b & 1) == 0) {
 853                 psTraceCrypto("pstm_montogomery_setup failure\n");
 854                 return PS_ARG_FAIL;
 855         }
 856
 857         x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
 858         x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
 859         x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
 860         x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
 861 #ifdef PSTM_64BIT
 862         x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
 863 #endif
 864         /* rho = -1/m mod b */
 865         *rho = (pstm_digit)(((pstm_word) 1 << ((pstm_word) DIGIT_BIT)) -
 866                 ((pstm_word)x));
 867         return PSTM_OKAY;
 868 }
 869
 870 /******************************************************************************/
 871 /*
 872  *      computes a = B**n mod b without division or multiplication useful for
 873  *      normalizing numbers in a Montgomery system.
 874  */
 875 int32 pstm_montgomery_calc_normalization(pstm_int *a, pstm_int *b)
 876 {
 877         int32     x;
 878         int16     bits;
 879
 880         /* how many bits of last digit does b use */
 881         bits = pstm_count_bits (b) % DIGIT_BIT;
 882         if (!bits) bits = DIGIT_BIT;
 883
 884         /* compute A = B^(n-1) * 2^(bits-1) */
 885         if (b->used > 1) {
 886                 if ((x = pstm_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) !=
 887                                 PSTM_OKAY) {
 888                         return x;
 889                 }
 890         } else {
 891                 pstm_set(a, 1);
 892                 bits = 1;
 893         }
 894
 895         /* now compute C = A * B mod b */
 896         for (x = bits - 1; x < (int32)DIGIT_BIT; x++) {
 897                 if (pstm_mul_2 (a, a) != PSTM_OKAY) {
 898                         return PS_MEM_FAIL;
 899                 }
 900                 if (pstm_cmp_mag (a, b) != PSTM_LT) {
 901                         if (s_pstm_sub (a, b, a) != PSTM_OKAY) {
 902                                 return PS_MEM_FAIL;
 903                         }
 904                 }
 905         }
 906         return PSTM_OKAY;
 907 }
 908
 909 /******************************************************************************/
 910 /*
 911         c = a * 2**d
 912 */
 913 static int32 pstm_mul_2d(pstm_int *a, int16 b, pstm_int *c)
 914 {
 915         pstm_digit      carry, carrytmp, shift;
 916         int16           x;
 917
 918         /* copy it */
 919         if (pstm_copy(a, c) != PSTM_OKAY) {
 920                 return PS_MEM_FAIL;
 921         }
 922
 923         /* handle whole digits */
 924         if (b >= DIGIT_BIT) {
 925                 if (pstm_lshd(c, b/DIGIT_BIT) != PSTM_OKAY) {
 926                         return PS_MEM_FAIL;
 927                 }
 928         }
 929         b %= DIGIT_BIT;
 930
 931         /* shift the digits */
 932         if (b != 0) {
 933                 carry = 0;
 934                 shift = DIGIT_BIT - b;
 935                 for (x = 0; x < c->used; x++) {
 936                         carrytmp = c->dp[x] >> shift;
 937                         c->dp[x] = (c->dp[x] << b) + carry;
 938                         carry = carrytmp;
 939                 }
 940                 /* store last carry if room */
 941                 if (carry && x < PSTM_MAX_SIZE) {
 942                         if (c->used == c->alloc) {
 943                                 if (pstm_grow(c, c->alloc + 1) != PSTM_OKAY) {
 944                                         return PS_MEM_FAIL;
 945                                 }
 946                         }
 947                         c->dp[c->used++] = carry;
 948                 }
 949         }
 950         pstm_clamp(c);
 951         return PSTM_OKAY;
 952 }
 953
 954 /******************************************************************************/
 955 /*
 956         c = a mod 2**d
 957 */
 958 static int32 pstm_mod_2d(pstm_int *a, int16 b, pstm_int *c)
 959 {
 960         int16   x;
 961
 962         /* zero if count less than or equal to zero */
 963         if (b <= 0) {
 964                 pstm_zero(c);
 965                 return PSTM_OKAY;
 966         }
 967
 968         /* get copy of input */
 969         if (pstm_copy(a, c) != PSTM_OKAY) {
 970                 return PS_MEM_FAIL;
 971         }
 972
 973         /* if 2**d is larger than we just return */
 974         if (b >= (DIGIT_BIT * a->used)) {
 975                 return PSTM_OKAY;
 976         }
 977
 978         /* zero digits above the last digit of the modulus */
 979         for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++)
 980         {
 981                 c->dp[x] = 0;
 982         }
 983         /* clear the digit that is not completely outside/inside the modulus */
 984         c->dp[b / DIGIT_BIT] &= ~((pstm_digit)0) >> (DIGIT_BIT - b);
 985         pstm_clamp (c);
 986         return PSTM_OKAY;
 987 }
 988
 989
 990 /******************************************************************************/
 991 /*
 992         c = a * b
 993 */
 994 int32 pstm_mul_d(pstm_int *a, pstm_digit b, pstm_int *c)
 995 {
 996         pstm_word       w;
 997         int32           res;
 998         int16           x, oldused;
 999
1000         if (c->alloc < a->used + 1) {
1001                 if ((res = pstm_grow (c, a->used + 1)) != PSTM_OKAY) {
1002                         return res;
1003                 }
1004         }
1005         oldused = c->used;
1006         c->used = a->used;
1007         c->sign = a->sign;
1008         w       = 0;
1009         for (x = 0; x < a->used; x++) {
1010                 w         = ((pstm_word)a->dp[x]) * ((pstm_word)b) + w;
1011                 c->dp[x]  = (pstm_digit)w;
1012                 w         = w >> DIGIT_BIT;
1013         }
1014         if (w != 0 && (a->used != PSTM_MAX_SIZE)) {
1015                 c->dp[c->used++] = (pstm_digit)w;
1016                 ++x;
1017         }
1018         for (; x < oldused; x++) {
1019                 c->dp[x] = 0;
1020         }
1021         pstm_clamp(c);
1022         return PSTM_OKAY;
1023 }
1024
1025 /******************************************************************************/
1026 /*
1027         c = a / 2**b
1028 */
1029 int32 pstm_div_2d(psPool_t *pool, pstm_int *a, int16 b, pstm_int *c,
1030                                         pstm_int *d)
1031 {
1032         pstm_digit      D, r, rr;
1033         int32           res;
1034         int16           x;
1035         pstm_int        t;
1036
1037         /* if the shift count is <= 0 then we do no work */
1038         if (b <= 0) {
1039                 if (pstm_copy (a, c) != PSTM_OKAY) {
1040                         return PS_MEM_FAIL;
1041                 }
1042                 if (d != NULL) {
1043                         pstm_zero (d);
1044                 }
1045                 return PSTM_OKAY;
1046         }
1047
1048         /* get the remainder */
1049         if (d != NULL) {
1050                 if (pstm_init(pool, &t) != PSTM_OKAY) {
1051                         return PS_MEM_FAIL;
1052                 }
1053                 if (pstm_mod_2d (a, b, &t) != PSTM_OKAY) {
1054                         res = PS_MEM_FAIL;
1055                         goto LBL_DONE;
1056                 }
1057         }
1058
1059         /* copy */
1060         if (pstm_copy(a, c) != PSTM_OKAY) {
1061                 res = PS_MEM_FAIL;
1062                 goto LBL_DONE;
1063         }
1064
1065         /* shift by as many digits in the bit count */
1066         if (b >= (int32)DIGIT_BIT) {
1067                 pstm_rshd (c, b / DIGIT_BIT);
1068         }
1069
1070         /* shift any bit count < DIGIT_BIT */
1071         D = (pstm_digit) (b % DIGIT_BIT);
1072         if (D != 0) {
1073                 register pstm_digit *tmpc, mask, shift;
1074
1075                 /* mask */
1076                 mask = (((pstm_digit)1) << D) - 1;
1077
1078                 /* shift for lsb */
1079                 shift = DIGIT_BIT - D;
1080
1081                 /* alias */
1082                 tmpc = c->dp + (c->used - 1);
1083
1084                 /* carry */
1085                 r = 0;
1086                 for (x = c->used - 1; x >= 0; x--) {
1087                         /* get the lower  bits of this word in a temp */
1088                         rr = *tmpc & mask;
1089
1090                         /* shift the current word and mix in the carry bits from previous */
1091                         *tmpc = (*tmpc >> D) | (r << shift);
1092                         --tmpc;
1093
1094                         /* set the carry to the carry bits of the current word above */
1095                         r = rr;
1096                 }
1097         }
1098         pstm_clamp (c);
1099
1100         res = PSTM_OKAY;
1101 LBL_DONE:
1102         if (d != NULL) {
1103                 if (pstm_copy(&t, d) != PSTM_OKAY) {
1104                         res = PS_MEM_FAIL;
1105                 }
1106                 pstm_clear(&t);
1107         }
1108         return res;
1109 }
1110
1111 /******************************************************************************/
1112 /*
1113         b = a/2
1114 */
1115 int32 pstm_div_2(pstm_int * a, pstm_int * b)
1116 {
1117         int16   x, oldused;
1118
1119         if (b->alloc < a->used) {
1120                 if (pstm_grow(b, a->used) != PSTM_OKAY) {
1121                         return PS_MEM_FAIL;
1122                 }
1123         }
1124         oldused = b->used;
1125         b->used = a->used;
1126         {
1127                 register pstm_digit r, rr, *tmpa, *tmpb;
1128
1129                 /* source alias */
1130                 tmpa = a->dp + b->used - 1;
1131
1132                 /* dest alias */
1133                 tmpb = b->dp + b->used - 1;
1134
1135                 /* carry */
1136                 r = 0;
1137                 for (x = b->used - 1; x >= 0; x--) {
1138                         /* get the carry for the next iteration */
1139                         rr = *tmpa & 1;
1140
1141                         /* shift the current digit, add in carry and store */
1142                         *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1143
1144                         /* forward carry to next iteration */
1145                         r = rr;
1146                 }
1147
1148                 /* zero excess digits */
1149                 tmpb = b->dp + b->used;
1150                 for (x = b->used; x < oldused; x++) {
1151                         *tmpb++ = 0;
1152                 }
1153         }
1154         b->sign = a->sign;
1155         pstm_clamp (b);
1156         return PSTM_OKAY;
1157 }
1158
1159 /******************************************************************************/
1160 /*
1161         Creates "a" then copies b into it
1162  */
1163 int32 pstm_init_copy(psPool_t *pool, pstm_int * a, pstm_int * b, int16 toSqr)
1164 {
1165         int16   x;
1166         int32   res;
1167
1168         if (a == b) {
1169                 return PSTM_OKAY;
1170         }
1171         x = b->alloc;
1172
1173         if (toSqr) {
1174 /*
1175                 Smart-size:  Increasing size of a if b->used is roughly half
1176                 of b->alloc because usage has shown that a lot of these copies
1177                 go on to be squared and need these extra digits
1178 */
1179                 if ((b->used * 2) + 2 >= x) {
1180                         x = (b->used * 2) + 3;
1181                 }
1182         }
1183         if ((res = pstm_init_size(pool, a, x)) != PSTM_OKAY) {
1184                 return res;
1185         }
1186         return pstm_copy(b, a);
1187 }
1188
1189 /******************************************************************************/
1190 /*
1191         With some compilers, we have seen issues linking with the builtin
1192         64 bit division routine. The issues with either manifest in a failure
1193         to find 'udivdi3' at link time, or a runtime invalid instruction fault
1194         during an RSA operation.
1195         The routine below divides a 64 bit unsigned int by a 32 bit unsigned int
1196         explicitly, rather than using the division operation
1197                 The 64 bit result is placed in the 'numerator' parameter
1198                 The 32 bit mod (remainder) of the division is the return parameter
1199         Based on implementations by:
1200                 Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
1201                 Copyright (C) 1999 Hewlett-Packard Co
1202                 Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
1203 */
1204 #if defined(USE_MATRIX_DIV64) && defined(PSTM_32BIT)
1205 static uint32 psDiv64(uint64 *numerator, uint32 denominator)
1206 {
1207         uint64  rem = *numerator;
1208         uint64  b = denominator;
1209         uint64  res = 0;
1210         uint64  d = 1;
1211         uint32  high = rem >> 32;
1212
1213         if (high >= denominator) {
1214                 high /= denominator;
1215                 res = (uint64) high << 32;
1216                 rem -= (uint64) (high * denominator) << 32;
1217         }
1218         while ((int64)b > 0 && b < rem) {
1219                 b = b+b;
1220                 d = d+d;
1221         }
1222         do {
1223                 if (rem >= b) {
1224                         rem -= b;
1225                         res += d;
1226                 }
1227                 b >>= 1;
1228                 d >>= 1;
1229         } while (d);
1230         *numerator = res;
1231         return rem;
1232 }
1233 #endif /* USE_MATRIX_DIV64 */
1234
1235 #if defined(USE_MATRIX_DIV128) && defined(PSTM_64BIT)
1236 typedef unsigned long   uint128 __attribute__ ((mode(TI)));
1237 static uint64 psDiv128(uint128 *numerator, uint64 denominator)
1238 {
1239         uint128 rem = *numerator;
1240         uint128 b = denominator;
1241         uint128 res = 0;
1242         uint128 d = 1;
1243         uint64  high = rem >> 64;
1244
1245         if (high >= denominator) {
1246                 high /= denominator;
1247                 res = (uint128) high << 64;
1248                 rem -= (uint128) (high * denominator) << 64;
1249         }
1250         while ((uint128)b > 0 && b < rem) {
1251                 b = b+b;
1252                 d = d+d;
1253         }
1254         do {
1255                 if (rem >= b) {
1256                         rem -= b;
1257                         res += d;
1258                 }
1259                 b >>= 1;
1260                 d >>= 1;
1261         } while (d);
1262         *numerator = res;
1263         return rem;
1264 }
1265 #endif /* USE_MATRIX_DIV128 */
1266
1267 /******************************************************************************/
1268 /*
1269         a/b => cb + d == a
1270 */
1271 int32 pstm_div(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
1272                                 pstm_int *d)
1273 {
1274         pstm_int        q, x, y, t1, t2;
1275         int32           res;
1276         int16           n, t, i, norm, neg;
1277
1278         /* is divisor zero ? */
1279         if (pstm_iszero (b) == 1) {
1280                 return PS_LIMIT_FAIL;
1281         }
1282
1283         /* if a < b then q=0, r = a */
1284         if (pstm_cmp_mag (a, b) == PSTM_LT) {
1285                 if (d != NULL) {
1286                         if (pstm_copy(a, d) != PSTM_OKAY) {
1287                                 return PS_MEM_FAIL;
1288                         }
1289                 }
1290                 if (c != NULL) {
1291                         pstm_zero (c);
1292                 }
1293                 return PSTM_OKAY;
1294         }
1295 /*
1296         Smart-size inits
1297 */
1298         if ((res = pstm_init_size(pool, &t1, a->alloc)) != PSTM_OKAY) {
1299                 return res;
1300         }
1301         if ((res = pstm_init_size(pool, &t2, 3)) != PSTM_OKAY) {
1302                 goto LBL_T1;
1303         }
1304         if ((res = pstm_init_copy(pool, &x, a, 0)) != PSTM_OKAY) {
1305                 goto LBL_T2;
1306         }
1307 /*
1308         Used to be an init_copy on b but pstm_grow was always hit with triple size
1309 */
1310         if ((res = pstm_init_size(pool, &y, b->used * 3)) != PSTM_OKAY) {
1311                 goto LBL_X;
1312         }
1313         if ((res = pstm_copy(b, &y)) != PSTM_OKAY) {
1314                 goto LBL_Y;
1315         }
1316
1317         /* fix the sign */
1318         neg = (a->sign == b->sign) ? PSTM_ZPOS : PSTM_NEG;
1319         x.sign = y.sign = PSTM_ZPOS;
1320
1321         /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1322         norm = pstm_count_bits(&y) % DIGIT_BIT;
1323         if (norm < (int32)(DIGIT_BIT-1)) {
1324                 norm = (DIGIT_BIT-1) - norm;
1325                 if ((res = pstm_mul_2d(&x, norm, &x)) != PSTM_OKAY) {
1326                         goto LBL_Y;
1327                 }
1328                 if ((res = pstm_mul_2d(&y, norm, &y)) != PSTM_OKAY) {
1329                         goto LBL_Y;
1330                 }
1331         } else {
1332                 norm = 0;
1333         }
1334
1335         /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1336         n = x.used - 1;
1337         t = y.used - 1;
1338
1339         if ((res = pstm_init_size(pool, &q, n - t + 1)) != PSTM_OKAY) {
1340                 goto LBL_Y;
1341         }
1342         q.used = n - t + 1;
1343
1344         /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1345         if ((res = pstm_lshd(&y, n - t)) != PSTM_OKAY) { /* y = y*b**{n-t} */
1346                 goto LBL_Q;
1347         }
1348
1349         while (pstm_cmp (&x, &y) != PSTM_LT) {
1350                 ++(q.dp[n - t]);
1351                 if ((res = pstm_sub(&x, &y, &x)) != PSTM_OKAY) {
1352                         goto LBL_Q;
1353                 }
1354         }
1355
1356         /* reset y by shifting it back down */
1357         pstm_rshd (&y, n - t);
1358
1359         /* step 3. for i from n down to (t + 1) */
1360         for (i = n; i >= (t + 1); i--) {
1361                 if (i > x.used) {
1362                         continue;
1363                 }
1364
1365                 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1366                  * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1367                 if (x.dp[i] == y.dp[t]) {
1368                         q.dp[i - t - 1] = (pstm_digit)((((pstm_word)1) << DIGIT_BIT) - 1);
1369                 } else {
1370                         pstm_word tmp;
1371                         tmp = ((pstm_word) x.dp[i]) << ((pstm_word) DIGIT_BIT);
1372                         tmp |= ((pstm_word) x.dp[i - 1]);
1373 #if defined(USE_MATRIX_DIV64) && defined(PSTM_32BIT)
1374                         psDiv64(&tmp, y.dp[t]);
1375 #elif defined(USE_MATRIX_DIV128) && defined(PSTM_64BIT)
1376                         psDiv128(&tmp, y.dp[t]);
1377 #else
1378                         tmp /= ((pstm_word) y.dp[t]);
1379 #endif /* USE_MATRIX_DIV64 */
1380                         q.dp[i - t - 1] = (pstm_digit) (tmp);
1381                 }
1382
1383                 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1384                          xi * b**2 + xi-1 * b + xi-2
1385
1386                         do q{i-t-1} -= 1;
1387                 */
1388                 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
1389                 do {
1390                         q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
1391
1392                         /* find left hand */
1393                         pstm_zero (&t1);
1394                         t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1395                         t1.dp[1] = y.dp[t];
1396                         t1.used = 2;
1397                         if ((res = pstm_mul_d (&t1, q.dp[i - t - 1], &t1)) != PSTM_OKAY) {
1398                                 goto LBL_Q;
1399                         }
1400
1401                         /* find right hand */
1402                         t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1403                         t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1404                         t2.dp[2] = x.dp[i];
1405                         t2.used = 3;
1406                 } while (pstm_cmp_mag(&t1, &t2) == PSTM_GT);
1407
1408                 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1409                 if ((res = pstm_mul_d(&y, q.dp[i - t - 1], &t1)) != PSTM_OKAY) {
1410                         goto LBL_Q;
1411                 }
1412
1413                 if ((res = pstm_lshd(&t1, i - t - 1)) != PSTM_OKAY) {
1414                         goto LBL_Q;
1415                 }
1416
1417                 if ((res = pstm_sub(&x, &t1, &x)) != PSTM_OKAY) {
1418                         goto LBL_Q;
1419                 }
1420
1421                 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1422                 if (x.sign == PSTM_NEG) {
1423                         if ((res = pstm_copy(&y, &t1)) != PSTM_OKAY) {
1424                                 goto LBL_Q;
1425                         }
1426                         if ((res = pstm_lshd (&t1, i - t - 1)) != PSTM_OKAY) {
1427                                 goto LBL_Q;
1428                         }
1429                         if ((res = pstm_add (&x, &t1, &x)) != PSTM_OKAY) {
1430                                 goto LBL_Q;
1431                         }
1432                         q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
1433                 }
1434         }
1435 /*
1436         now q is the quotient and x is the remainder (which we have to normalize)
1437 */
1438         /* get sign before writing to c */
1439         x.sign = x.used == 0 ? PSTM_ZPOS : a->sign;
1440
1441         if (c != NULL) {
1442                 pstm_clamp (&q);
1443                 if (pstm_copy (&q, c) != PSTM_OKAY) {
1444                         res = PS_MEM_FAIL;
1445                         goto LBL_Q;
1446                 }
1447                 c->sign = neg;
1448         }
1449
1450         if (d != NULL) {
1451                 if ((res = pstm_div_2d (pool, &x, norm, &x, NULL)) != PSTM_OKAY) {
1452                         goto LBL_Q;
1453                 }
1454 /*
1455                 the following is a kludge, essentially we were seeing the right
1456                 remainder but with excess digits that should have been zero
1457  */
1458                 for (i = b->used; i < x.used; i++) {
1459                         x.dp[i] = 0;
1460                 }
1461                 pstm_clamp(&x);
1462                 if (pstm_copy (&x, d) != PSTM_OKAY) {
1463                         res = PS_MEM_FAIL;
1464                         goto LBL_Q;
1465                 }
1466         }
1467
1468         res = PSTM_OKAY;
1469
1470 LBL_Q:pstm_clear (&q);
1471 LBL_Y:pstm_clear (&y);
1472 LBL_X:pstm_clear (&x);
1473 LBL_T2:pstm_clear (&t2);
1474 LBL_T1:pstm_clear (&t1);
1475
1476         return res;
1477 }
1478
1479 /******************************************************************************/
1480 /*
1481         Swap the elements of two integers, for cases where you can't simply swap
1482         the pstm_int pointers around
1483 */
1484 void pstm_exch(pstm_int * a, pstm_int * b)
1485 {
1486         pstm_int                t;
1487
1488         t       = *a;
1489         *a      = *b;
1490         *b      = t;
1491 }
1492
1493 /******************************************************************************/
1494 /*
1495         c = a mod b, 0 <= c < b
1496 */
1497 int32 pstm_mod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c)
1498 {
1499         pstm_int        t;
1500         int32           err;
1501 /*
1502         Smart-size
1503 */
1504         if ((err = pstm_init_size(pool, &t, b->alloc)) != PSTM_OKAY) {
1505                 return err;
1506         }
1507         if ((err = pstm_div(pool, a, b, NULL, &t)) != PSTM_OKAY) {
1508                 pstm_clear (&t);
1509                 return err;
1510         }
1511         if (t.sign != b->sign) {
1512                 err = pstm_add(&t, b, c);
1513         } else {
1514                 pstm_exch (&t, c);
1515         }
1516         pstm_clear (&t);
1517         return err;
1518 }
1519
1520 /******************************************************************************/
1521 /*
1522         d = a * b (mod c)
1523 */
1524 int32 pstm_mulmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
1525                         pstm_int *d)
1526 {
1527         int32           res;
1528         int16           size;
1529         pstm_int        tmp;
1530
1531 /*
1532         Smart-size pstm_inits.  d is an output that is influenced by this local 't'
1533         so don't shrink 'd' if it wants to becuase this will lead to an pstm_grow
1534         in RSA operations
1535 */
1536         size = a->used + b->used + 1;
1537         if ((a == d) && (size < a->alloc)) {
1538                 size = a->alloc;
1539         }
1540         if ((res = pstm_init_size(pool, &tmp, size)) != PSTM_OKAY) {
1541                 return res;
1542         }
1543         if ((res = pstm_mul_comba(pool, a, b, &tmp, NULL, 0)) != PSTM_OKAY) {
1544                 pstm_clear(&tmp);
1545                 return res;
1546         }
1547         res = pstm_mod(pool, &tmp, c, d);
1548         pstm_clear(&tmp);
1549         return res;
1550 }
1551
1552 /******************************************************************************/
1553 /*
1554  *      y = g**x (mod b)
1555  *      Some restrictions... x must be positive and < b
1556  */
1557 int32 pstm_exptmod(psPool_t *pool, pstm_int *G, pstm_int *X, pstm_int *P,
1558                         pstm_int *Y)
1559 {
1560         pstm_int        M[32], res; /* Keep this winsize based: (1 << max_winsize) */
1561         pstm_digit      buf, mp;
1562         pstm_digit      *paD;
1563         int32           err, bitbuf;
1564         int16           bitcpy, bitcnt, mode, digidx, x, y, winsize;
1565         uint32          paDlen;
1566
1567         /* set window size from what user set as optimization */
1568         x = pstm_count_bits(X);
1569         if (x < 50) {
1570                 winsize = 2;
1571         } else {
1572                 winsize = PS_EXPTMOD_WINSIZE;
1573         }
1574
1575         /* now setup montgomery  */
1576         if ((err = pstm_montgomery_setup (P, &mp)) != PSTM_OKAY) {
1577                 return err;
1578         }
1579
1580         /* setup result */
1581         if ((err = pstm_init_size(pool, &res, (P->used * 2) + 1)) != PSTM_OKAY) {
1582                 return err;
1583         }
1584 /*
1585         create M table
1586         The M table contains powers of the input base, e.g. M[x] = G^x mod P
1587         The first half of the table is not computed though except for M[0] and M[1]
1588  */
1589         /* now we need R mod m */
1590         if ((err = pstm_montgomery_calc_normalization (&res, P)) != PSTM_OKAY) {
1591                 goto LBL_RES;
1592         }
1593 /*
1594         init M array
1595         init first cell
1596  */
1597         if ((err = pstm_init_size(pool, &M[1], res.used)) != PSTM_OKAY) {
1598                 goto LBL_RES;
1599         }
1600
1601         /* now set M[1] to G * R mod m */
1602         if (pstm_cmp_mag(P, G) != PSTM_GT) {
1603                 /* G > P so we reduce it first */
1604                 if ((err = pstm_mod(pool, G, P, &M[1])) != PSTM_OKAY) {
1605                         goto LBL_M;
1606                 }
1607         } else {
1608                 if ((err = pstm_copy(G, &M[1])) != PSTM_OKAY) {
1609                         goto LBL_M;
1610                 }
1611         }
1612         if ((err = pstm_mulmod (pool, &M[1], &res, P, &M[1])) != PSTM_OKAY) {
1613                 goto LBL_M;
1614         }
1615 /*
1616         Pre-allocated digit.  Used for mul, sqr, AND reduce
1617 */
1618         paDlen = ((M[1].used + 3) * 2) * sizeof(pstm_digit);
1619         paD = xzalloc(paDlen);
1620 /*
1621         compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times
1622  */
1623         if (pstm_init_copy(pool, &M[1 << (winsize - 1)], &M[1], 1) != PSTM_OKAY) {
1624                 err = PS_MEM_FAIL;
1625                 goto LBL_PAD;
1626         }
1627         for (x = 0; x < (winsize - 1); x++) {
1628                 if ((err = pstm_sqr_comba (pool, &M[1 << (winsize - 1)],
1629                                 &M[1 << (winsize - 1)], paD, paDlen)) != PSTM_OKAY) {
1630                         goto LBL_PAD;
1631                 }
1632                 if ((err = pstm_montgomery_reduce(pool, &M[1 << (winsize - 1)], P, mp,
1633                                 paD, paDlen)) != PSTM_OKAY) {
1634                         goto LBL_PAD;
1635                 }
1636         }
1637 /*
1638         now init the second half of the array
1639 */
1640         for (x = (1<<(winsize-1)) + 1; x < (1 << winsize); x++) {
1641                 if ((err = pstm_init_size(pool, &M[x], M[1<<(winsize-1)].alloc + 1))
1642                                 != PSTM_OKAY) {
1643                         for (y = 1<<(winsize-1); y < x; y++) {
1644                                 pstm_clear(&M[y]);
1645                         }
1646                         goto LBL_PAD;
1647                 }
1648         }
1649
1650         /* create upper table */
1651         for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1652                 if ((err = pstm_mul_comba(pool, &M[x - 1], &M[1], &M[x], paD, paDlen))
1653                                 != PSTM_OKAY) {
1654                         goto LBL_MARRAY;
1655                 }
1656                 if ((err = pstm_montgomery_reduce(pool, &M[x], P, mp, paD, paDlen)) !=
1657                                 PSTM_OKAY) {
1658                         goto LBL_MARRAY;
1659                 }
1660         }
1661
1662         /* set initial mode and bit cnt */
1663         mode   = 0;
1664         bitcnt = 1;
1665         buf    = 0;
1666         digidx = X->used - 1;
1667         bitcpy = 0;
1668         bitbuf = 0;
1669
1670         for (;;) {
1671                 /* grab next digit as required */
1672                 if (--bitcnt == 0) {
1673                         /* if digidx == -1 we are out of digits so break */
1674                         if (digidx == -1) {
1675                                 break;
1676                         }
1677                         /* read next digit and reset bitcnt */
1678                         buf    = X->dp[digidx--];
1679                         bitcnt = (int32)DIGIT_BIT;
1680                 }
1681
1682                 /* grab the next msb from the exponent */
1683                 y     = (pstm_digit)(buf >> (DIGIT_BIT - 1)) & 1;
1684                 buf <<= (pstm_digit)1;
1685 /*
1686                  If the bit is zero and mode == 0 then we ignore it.
1687                  These represent the leading zero bits before the first 1 bit
1688                  in the exponent.  Technically this opt is not required but it
1689                  does lower the # of trivial squaring/reductions used
1690 */
1691                 if (mode == 0 && y == 0) {
1692                         continue;
1693                 }
1694
1695                 /* if the bit is zero and mode == 1 then we square */
1696                 if (mode == 1 && y == 0) {
1697                         if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) !=
1698                                         PSTM_OKAY) {
1699                                 goto LBL_MARRAY;
1700                         }
1701                         if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen))
1702                                         != PSTM_OKAY) {
1703                                 goto LBL_MARRAY;
1704                         }
1705                         continue;
1706                 }
1707
1708                 /* else we add it to the window */
1709                 bitbuf |= (y << (winsize - ++bitcpy));
1710                 mode    = 2;
1711
1712                 if (bitcpy == winsize) {
1713                         /* ok window is filled so square as required and mul square first */
1714                         for (x = 0; x < winsize; x++) {
1715                                 if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) !=
1716                                                 PSTM_OKAY) {
1717                                         goto LBL_MARRAY;
1718                                 }
1719                                 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD,
1720                                                 paDlen)) != PSTM_OKAY) {
1721                                         goto LBL_MARRAY;
1722                                 }
1723                         }
1724
1725                         /* then multiply */
1726                         if ((err = pstm_mul_comba(pool, &res, &M[bitbuf], &res, paD,
1727                                         paDlen)) != PSTM_OKAY) {
1728                                 goto LBL_MARRAY;
1729                         }
1730                         if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen))
1731                                         != PSTM_OKAY) {
1732                                 goto LBL_MARRAY;
1733                         }
1734
1735                         /* empty window and reset */
1736                         bitcpy = 0;
1737                         bitbuf = 0;
1738                         mode   = 1;
1739                 }
1740         }
1741
1742         /* if bits remain then square/multiply */
1743         if (mode == 2 && bitcpy > 0) {
1744                 /* square then multiply if the bit is set */
1745                 for (x = 0; x < bitcpy; x++) {
1746                         if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) !=
1747                                         PSTM_OKAY) {
1748                                 goto LBL_MARRAY;
1749                         }
1750                         if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen))
1751                                         != PSTM_OKAY) {
1752                                 goto LBL_MARRAY;
1753                         }
1754
1755                         /* get next bit of the window */
1756                         bitbuf <<= 1;
1757                         if ((bitbuf & (1 << winsize)) != 0) {
1758                         /* then multiply */
1759                                 if ((err = pstm_mul_comba(pool, &res, &M[1], &res, paD, paDlen))
1760                                                 != PSTM_OKAY) {
1761                                         goto LBL_MARRAY;
1762                                 }
1763                                 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD,
1764                                                 paDlen)) != PSTM_OKAY) {
1765                                         goto LBL_MARRAY;
1766                                 }
1767                         }
1768                 }
1769         }
1770 /*
1771         Fix up result if Montgomery reduction is used recall that any value in a
1772         Montgomery system is actually multiplied by R mod n.  So we have to reduce
1773         one more time to cancel out the factor of R.
1774 */
1775         if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen)) !=
1776                         PSTM_OKAY) {
1777                 goto LBL_MARRAY;
1778         }
1779         /* swap res with Y */
1780         if ((err = pstm_copy (&res, Y)) != PSTM_OKAY) {
1781                 goto LBL_MARRAY;
1782         }
1783         err = PSTM_OKAY;
1784 LBL_MARRAY:
1785         for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1786                 pstm_clear(&M[x]);
1787         }
1788 LBL_PAD:psFree(paD, pool);
1789 LBL_M: pstm_clear(&M[1]);
1790 LBL_RES:pstm_clear(&res);
1791         return err;
1792 }
1793
1794 /******************************************************************************/
1795 /*
1796
1797 */
1798 int32 pstm_add(pstm_int *a, pstm_int *b, pstm_int *c)
1799 {
1800         int32   res;
1801         int16   sa, sb;
1802
1803         /* get sign of both inputs */
1804         sa = a->sign;
1805         sb = b->sign;
1806
1807         /* handle two cases, not four */
1808         if (sa == sb) {
1809                 /* both positive or both negative, add their mags, copy the sign */
1810                 c->sign = sa;
1811                 if ((res = s_pstm_add (a, b, c)) != PSTM_OKAY) {
1812                         return res;
1813                 }
1814   } else {
1815 /*
1816                 one positive, the other negative
1817                 subtract the one with the greater magnitude from the one of the lesser
1818                 magnitude. The result gets the sign of the one with the greater mag.
1819  */
1820                 if (pstm_cmp_mag (a, b) == PSTM_LT) {
1821                         c->sign = sb;
1822                         if ((res = s_pstm_sub (b, a, c)) != PSTM_OKAY) {
1823                                 return res;
1824                         }
1825                 } else {
1826                         c->sign = sa;
1827                         if ((res = s_pstm_sub (a, b, c)) != PSTM_OKAY) {
1828                                 return res;
1829                         }
1830                 }
1831         }
1832         return PS_SUCCESS;
1833 }
1834
1835 /******************************************************************************/
1836 /*
1837         reverse an array, used for radix code
1838 */
1839 static void pstm_reverse (unsigned char *s, int16 len)
1840 {
1841         int32     ix, iy;
1842         unsigned char t;
1843
1844         ix = 0;
1845         iy = len - 1;
1846         while (ix < iy) {
1847                 t     = s[ix];
1848                 s[ix] = s[iy];
1849                 s[iy] = t;
1850                 ++ix;
1851                 --iy;
1852         }
1853 }
1854 /******************************************************************************/
1855 /*
1856         No reverse.  Useful in some of the EIP-154 PKA stuff where special byte
1857         order seems to come into play more often
1858 */
1859 int32 pstm_to_unsigned_bin_nr(psPool_t *pool, pstm_int *a, unsigned char *b)
1860 {
1861         int32     res;
1862         int16     x;
1863         pstm_int  t = { 0 };
1864
1865         if ((res = pstm_init_copy(pool, &t, a, 0)) != PSTM_OKAY) {
1866                 return res;
1867         }
1868
1869         x = 0;
1870         while (pstm_iszero (&t) == 0) {
1871                 b[x++] = (unsigned char) (t.dp[0] & 255);
1872                 if ((res = pstm_div_2d (pool, &t, 8, &t, NULL)) != PSTM_OKAY) {
1873                         pstm_clear(&t);
1874                         return res;
1875                 }
1876         }
1877         pstm_clear(&t);
1878         return PS_SUCCESS;
1879 }
1880 /******************************************************************************/
1881 /*
1882
1883 */
1884 int32 pstm_to_unsigned_bin(psPool_t *pool, pstm_int *a, unsigned char *b)
1885 {
1886         int32     res;
1887         int16     x;
1888         pstm_int  t = { 0 };
1889
1890         if ((res = pstm_init_copy(pool, &t, a, 0)) != PSTM_OKAY) {
1891                 return res;
1892         }
1893
1894         x = 0;
1895         while (pstm_iszero (&t) == 0) {
1896                 b[x++] = (unsigned char) (t.dp[0] & 255);
1897                 if ((res = pstm_div_2d (pool, &t, 8, &t, NULL)) != PSTM_OKAY) {
1898                         pstm_clear(&t);
1899                         return res;
1900                 }
1901         }
1902         pstm_reverse (b, x);
1903         pstm_clear(&t);
1904         return PS_SUCCESS;
1905 }
1906
1907 /******************************************************************************/
1908 /*
1909         compare against a single digit
1910 */
1911 int32 pstm_cmp_d(pstm_int *a, pstm_digit b)
1912 {
1913         /* compare based on sign */
1914         if ((b && a->used == 0) || a->sign == PSTM_NEG) {
1915                 return PSTM_LT;
1916         }
1917
1918         /* compare based on magnitude */
1919         if (a->used > 1) {
1920                 return PSTM_GT;
1921         }
1922
1923         /* compare the only digit of a to b */
1924         if (a->dp[0] > b) {
1925                 return PSTM_GT;
1926         } else if (a->dp[0] < b) {
1927                 return PSTM_LT;
1928         } else {
1929                 return PSTM_EQ;
1930         }
1931 }
1932
1933 /*
1934         Need invmod for ECC and also private key loading for hardware crypto
1935         in cases where dQ > dP.  The values must be switched and a new qP must be
1936         calculated using this function
1937 */
1938 static int32 pstm_invmod_slow(psPool_t *pool, pstm_int * a, pstm_int * b,
1939                                 pstm_int * c)
1940 {
1941         pstm_int  x, y, u, v, A, B, C, D;
1942         int32     res;
1943
1944         /* b cannot be negative */
1945         if (b->sign == PSTM_NEG || pstm_iszero(b) == 1) {
1946                 return PS_LIMIT_FAIL;
1947         }
1948
1949         /* init temps */
1950         if (pstm_init_size(pool, &x, b->used) != PSTM_OKAY) {
1951                 return PS_MEM_FAIL;
1952         }
1953
1954         /* x = a, y = b */
1955         if ((res = pstm_mod(pool, a, b, &x)) != PSTM_OKAY) {
1956                 goto LBL_X;
1957         }
1958
1959         if (pstm_init_copy(pool, &y, b, 0) != PSTM_OKAY) {
1960                 goto LBL_X;
1961         }
1962
1963         /* 2. [modified] if x,y are both even then return an error! */
1964         if (pstm_iseven (&x) == 1 && pstm_iseven (&y) == 1) {
1965                 res = PS_FAILURE;
1966                 goto LBL_Y;
1967         }
1968
1969         /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1970         if ((res = pstm_init_copy(pool, &u, &x, 0)) != PSTM_OKAY) {
1971                 goto LBL_Y;
1972         }
1973         if ((res = pstm_init_copy(pool, &v, &y, 0)) != PSTM_OKAY) {
1974                 goto LBL_U;
1975         }
1976
1977         if ((res = pstm_init_size(pool, &A, sizeof(pstm_digit))) != PSTM_OKAY) {
1978                 goto LBL_V;
1979         }
1980
1981         if ((res = pstm_init_size(pool, &D, sizeof(pstm_digit))) != PSTM_OKAY) {
1982                 goto LBL_A;
1983         }
1984         pstm_set (&A, 1);
1985         pstm_set (&D, 1);
1986
1987         if ((res = pstm_init(pool, &B)) != PSTM_OKAY) {
1988                 goto LBL_D;
1989         }
1990         if ((res = pstm_init(pool, &C)) != PSTM_OKAY) {
1991                 goto LBL_B;
1992         }
1993
1994 top:
1995         /* 4.  while u is even do */
1996         while (pstm_iseven (&u) == 1) {
1997                 /* 4.1 u = u/2 */
1998                 if ((res = pstm_div_2 (&u, &u)) != PSTM_OKAY) {
1999                         goto LBL_C;
2000                 }
2001
2002                 /* 4.2 if A or B is odd then */
2003                 if (pstm_isodd (&A) == 1 || pstm_isodd (&B) == 1) {
2004                         /* A = (A+y)/2, B = (B-x)/2 */
2005                         if ((res = pstm_add (&A, &y, &A)) != PSTM_OKAY) {
2006                                 goto LBL_C;
2007                         }
2008                         if ((res = pstm_sub (&B, &x, &B)) != PSTM_OKAY) {
2009                                 goto LBL_C;
2010                         }
2011                 }
2012                 /* A = A/2, B = B/2 */
2013                 if ((res = pstm_div_2 (&A, &A)) != PSTM_OKAY) {
2014                         goto LBL_C;
2015                 }
2016                 if ((res = pstm_div_2 (&B, &B)) != PSTM_OKAY) {
2017                         goto LBL_C;
2018                 }
2019         }
2020
2021         /* 5.  while v is even do */
2022         while (pstm_iseven (&v) == 1) {
2023                 /* 5.1 v = v/2 */
2024                 if ((res = pstm_div_2 (&v, &v)) != PSTM_OKAY) {
2025                         goto LBL_C;
2026                 }
2027
2028                 /* 5.2 if C or D is odd then */
2029                 if (pstm_isodd (&C) == 1 || pstm_isodd (&D) == 1) {
2030                         /* C = (C+y)/2, D = (D-x)/2 */
2031                         if ((res = pstm_add (&C, &y, &C)) != PSTM_OKAY) {
2032                                 goto LBL_C;
2033                         }
2034                         if ((res = pstm_sub (&D, &x, &D)) != PSTM_OKAY) {
2035                                 goto LBL_C;
2036                         }
2037                 }
2038                 /* C = C/2, D = D/2 */
2039                 if ((res = pstm_div_2 (&C, &C)) != PSTM_OKAY) {
2040                         goto LBL_C;
2041                 }
2042                 if ((res = pstm_div_2 (&D, &D)) != PSTM_OKAY) {
2043                         goto LBL_C;
2044                 }
2045         }
2046
2047         /* 6.  if u >= v then */
2048         if (pstm_cmp (&u, &v) != PSTM_LT) {
2049                 /* u = u - v, A = A - C, B = B - D */
2050                 if ((res = pstm_sub (&u, &v, &u)) != PSTM_OKAY) {
2051                                 goto LBL_C;
2052                 }
2053                 if ((res = pstm_sub (&A, &C, &A)) != PSTM_OKAY) {
2054                                 goto LBL_C;
2055                 }
2056                 if ((res = pstm_sub (&B, &D, &B)) != PSTM_OKAY) {
2057                                 goto LBL_C;
2058                 }
2059         } else {
2060                 /* v - v - u, C = C - A, D = D - B */
2061                 if ((res = pstm_sub (&v, &u, &v)) != PSTM_OKAY) {
2062                                 goto LBL_C;
2063                 }
2064                 if ((res = pstm_sub (&C, &A, &C)) != PSTM_OKAY) {
2065                                 goto LBL_C;
2066                 }
2067                 if ((res = pstm_sub (&D, &B, &D)) != PSTM_OKAY) {
2068                                 goto LBL_C;
2069                 }
2070         }
2071
2072         /* if not zero goto step 4 */
2073         if (pstm_iszero (&u) == 0)
2074                 goto top;
2075
2076         /* now a = C, b = D, gcd == g*v */
2077
2078         /* if v != 1 then there is no inverse */
2079         if (pstm_cmp_d (&v, 1) != PSTM_EQ) {
2080                 res = PS_FAILURE;
2081                 goto LBL_C;
2082         }
2083
2084         /* if its too low */
2085         while (pstm_cmp_d(&C, 0) == PSTM_LT) {
2086                 if ((res = pstm_add(&C, b, &C)) != PSTM_OKAY) {
2087                         goto LBL_C;
2088                 }
2089         }
2090
2091         /* too big */
2092         while (pstm_cmp_mag(&C, b) != PSTM_LT) {
2093                 if ((res = pstm_sub(&C, b, &C)) != PSTM_OKAY) {
2094                         goto LBL_C;
2095                 }
2096         }
2097
2098         /* C is now the inverse */
2099         if ((res = pstm_copy(&C, c)) != PSTM_OKAY) {
2100                 goto LBL_C;
2101         }
2102         res = PSTM_OKAY;
2103
2104 LBL_C: pstm_clear(&C);
2105 LBL_D: pstm_clear(&D);
2106 LBL_B: pstm_clear(&B);
2107 LBL_A: pstm_clear(&A);
2108 LBL_V: pstm_clear(&v);
2109 LBL_U: pstm_clear(&u);
2110 LBL_Y: pstm_clear(&y);
2111 LBL_X: pstm_clear(&x);
2112
2113         return res;
2114 }
2115
2116 /* c = 1/a (mod b) for odd b only */
2117 int32 pstm_invmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c)
2118 {
2119         pstm_int        x, y, u, v, B, D;
2120         int32           res;
2121         uint16          neg, sanity;
2122
2123         /* 2. [modified] b must be odd   */
2124         if (pstm_iseven (b) == 1) {
2125                 return pstm_invmod_slow(pool, a,b,c);
2126         }
2127
2128         /* x == modulus, y == value to invert */
2129         if ((res = pstm_init_copy(pool, &x, b, 0)) != PSTM_OKAY) {
2130                 return res;
2131         }
2132
2133         if ((res = pstm_init_size(pool, &y, a->alloc)) != PSTM_OKAY) {
2134                 goto LBL_X;
2135         }
2136
2137         /* we need y = |a| */
2138         pstm_abs(a, &y);
2139
2140         /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2141         if ((res = pstm_init_copy(pool, &u, &x, 0)) != PSTM_OKAY) {
2142                 goto LBL_Y;
2143         }
2144         if ((res = pstm_init_copy(pool, &v, &y, 0)) != PSTM_OKAY) {
2145                 goto LBL_U;
2146         }
2147         if ((res = pstm_init(pool, &B)) != PSTM_OKAY) {
2148                 goto LBL_V;
2149         }
2150         if ((res = pstm_init(pool, &D)) != PSTM_OKAY) {
2151                 goto LBL_B;
2152         }
2153
2154         pstm_set (&D, 1);
2155
2156         sanity = 0;
2157 top:
2158         /* 4.  while u is even do */
2159         while (pstm_iseven (&u) == 1) {
2160                 /* 4.1 u = u/2 */
2161                 if ((res = pstm_div_2 (&u, &u)) != PSTM_OKAY) {
2162                         goto LBL_D;
2163                 }
2164
2165                 /* 4.2 if B is odd then */
2166                 if (pstm_isodd (&B) == 1) {
2167                         if ((res = pstm_sub (&B, &x, &B)) != PSTM_OKAY) {
2168                                 goto LBL_D;
2169                         }
2170                 }
2171                 /* B = B/2 */
2172                 if ((res = pstm_div_2 (&B, &B)) !=  PSTM_OKAY) {
2173                         goto LBL_D;
2174                 }
2175         }
2176
2177         /* 5.  while v is even do */
2178         while (pstm_iseven (&v) == 1) {
2179                 /* 5.1 v = v/2 */
2180                 if ((res = pstm_div_2 (&v, &v)) != PSTM_OKAY) {
2181                         goto LBL_D;
2182                 }
2183                 /* 5.2 if D is odd then */
2184                 if (pstm_isodd (&D) == 1) {
2185                         /* D = (D-x)/2 */
2186                         if ((res = pstm_sub (&D, &x, &D)) != PSTM_OKAY) {
2187                                 goto LBL_D;
2188                         }
2189                 }
2190                 /* D = D/2 */
2191                 if ((res = pstm_div_2 (&D, &D)) !=  PSTM_OKAY) {
2192                         goto LBL_D;
2193                 }
2194         }
2195
2196         /* 6.  if u >= v then */
2197         if (pstm_cmp (&u, &v) != PSTM_LT) {
2198                 /* u = u - v, B = B - D */
2199                 if ((res = pstm_sub (&u, &v, &u)) != PSTM_OKAY) {
2200                                 goto LBL_D;
2201                 }
2202                 if ((res = pstm_sub (&B, &D, &B)) != PSTM_OKAY) {
2203                                 goto LBL_D;
2204                 }
2205         } else {
2206                 /* v - v - u, D = D - B */
2207                 if ((res = pstm_sub (&v, &u, &v)) != PSTM_OKAY) {
2208                                 goto LBL_D;
2209                 }
2210                 if ((res = pstm_sub (&D, &B, &D)) != PSTM_OKAY) {
2211                                 goto LBL_D;
2212                 }
2213         }
2214
2215         /* if not zero goto step 4 */
2216         if (sanity++ > 1000) {
2217                 res = PS_LIMIT_FAIL;
2218                 goto LBL_D;
2219         }
2220         if (pstm_iszero (&u) == 0) {
2221                 goto top;
2222         }
2223
2224         /* now a = C, b = D, gcd == g*v */
2225
2226         /* if v != 1 then there is no inverse */
2227         if (pstm_cmp_d (&v, 1) != PSTM_EQ) {
2228                 res = PS_FAILURE;
2229                 goto LBL_D;
2230         }
2231
2232         /* b is now the inverse */
2233         neg = a->sign;
2234         while (D.sign == PSTM_NEG) {
2235                 if ((res = pstm_add (&D, b, &D)) != PSTM_OKAY) {
2236                         goto LBL_D;
2237                 }
2238         }
2239         if ((res = pstm_copy (&D, c)) != PSTM_OKAY) {
2240                 goto LBL_D;
2241         }
2242         c->sign = neg;
2243         res = PSTM_OKAY;
2244
2245 LBL_D: pstm_clear(&D);
2246 LBL_B: pstm_clear(&B);
2247 LBL_V: pstm_clear(&v);
2248 LBL_U: pstm_clear(&u);
2249 LBL_Y: pstm_clear(&y);
2250 LBL_X: pstm_clear(&x);
2251         return res;
2252 }
2253 #endif /* !DISABLE_PSTM */
2254 /******************************************************************************/