2 * Copyright (C) 2017 Denys Vlasenko
4 * Licensed under GPLv2, see file LICENSE in this source tree.
10 * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
12 * Multiprecision number implementation.
15 * Copyright (c) 2013-2015 INSIDE Secure Corporation
16 * Copyright (c) PeerSec Networks, 2002-2011
19 * The latest version of this code is available at http://www.matrixssl.org
21 * This software is open source; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
26 * This General Public License does NOT permit incorporating this software
27 * into proprietary programs. If you are unable to comply with the GPL, a
28 * commercial license for this software may be purchased from INSIDE at
29 * http://www.insidesecure.com/eng/Company/Locations
31 * This program is distributed in WITHOUT ANY WARRANTY; without even the
32 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
33 * See the GNU General Public License for more details.
35 * You should have received a copy of the GNU General Public License
36 * along with this program; if not, write to the Free Software
37 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
38 * http://www.gnu.org/copyleft/gpl.html
40 /******************************************************************************/
43 //#include "../cryptoApi.h"
46 static int32 pstm_mul_2d(pstm_int *a, int16 b, pstm_int *c);
48 /******************************************************************************/
50 init an pstm_int for a given size
52 int32 pstm_init_size(psPool_t *pool, pstm_int * a, uint32 size)
59 a->dp = xzalloc(sizeof (pstm_digit) * size);
62 a->alloc = (int16)size;
68 // for (x = 0; x < size; x++) {
74 /******************************************************************************/
78 int32 pstm_init(psPool_t *pool, pstm_int * a)
82 allocate memory required and clear it
84 a->dp = xzalloc(sizeof (pstm_digit) * PSTM_DEFAULT_INIT);
86 set the digits to zero
89 // for (i = 0; i < PSTM_DEFAULT_INIT; i++) {
93 set the used to zero, allocated digits to the default precision and sign
98 a->alloc = PSTM_DEFAULT_INIT;
104 /******************************************************************************/
108 int32 pstm_grow(pstm_int * a, int16 size)
114 If the alloc size is smaller alloc more ram.
116 if (a->alloc < size) {
118 Reallocate the array a->dp
120 We store the return in a temporary variable in case the operation
121 failed we don't want to overwrite the dp member of a.
123 tmp = xrealloc(a->dp, sizeof (pstm_digit) * size);
125 reallocation succeeded so set a->dp
133 for (; i < a->alloc; i++) {
140 /******************************************************************************/
142 copy, b = a (b must be pre-allocated)
144 int32 pstm_copy(pstm_int * a, pstm_int * b)
149 If dst == src do nothing
157 if (b->alloc < a->used) {
158 if ((res = pstm_grow (b, a->used)) != PSTM_OKAY) {
163 Zero b and copy the parameters over
166 register pstm_digit *tmpa, *tmpb;
168 /* pointer aliases */
175 /* copy all the digits */
176 for (n = 0; n < a->used; n++) {
180 /* clear high digits */
181 for (; n < b->used; n++) {
186 copy used count and sign
193 /******************************************************************************/
197 This is used to ensure that leading zero digits are trimed and the
198 leading "used" digit will be non-zero. Typically very fast. Also fixes
199 the sign if there are no more leading digits
201 void pstm_clamp(pstm_int * a)
203 /* decrease used while the most significant digit is zero. */
204 while (a->used > 0 && a->dp[a->used - 1] == 0) {
207 /* reset the sign flag if used == 0 */
213 /******************************************************************************/
217 void pstm_clear(pstm_int * a)
221 only do anything if a hasn't been freed previously
223 if (a != NULL && a->dp != NULL) {
225 first zero the digits
227 for (i = 0; i < a->used; i++) {
231 psFree (a->dp, a->pool);
233 reset members to make debugging easier
236 a->alloc = a->used = 0;
241 /******************************************************************************/
245 void pstm_clear_multi(pstm_int *mp0, pstm_int *mp1, pstm_int *mp2,
246 pstm_int *mp3, pstm_int *mp4, pstm_int *mp5,
247 pstm_int *mp6, pstm_int *mp7)
249 int32 n; /* Number of ok inits */
251 pstm_int *tempArray[9];
263 for (n = 0; tempArray[n] != NULL; n++) {
264 if ((tempArray[n] != NULL) && (tempArray[n]->dp != NULL)) {
265 pstm_clear(tempArray[n]);
270 /******************************************************************************/
274 void pstm_zero(pstm_int * a)
283 for (n = 0; n < a->alloc; n++) {
289 /******************************************************************************/
291 Compare maginitude of two ints (unsigned).
293 int32 pstm_cmp_mag(pstm_int * a, pstm_int * b)
296 pstm_digit *tmpa, *tmpb;
299 compare based on # of non-zero digits
301 if (a->used > b->used) {
305 if (a->used < b->used) {
310 tmpa = a->dp + (a->used - 1);
313 tmpb = b->dp + (a->used - 1);
316 compare based on digits
318 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
329 /******************************************************************************/
331 Compare two ints (signed)
333 int32 pstm_cmp(pstm_int * a, pstm_int * b)
336 compare based on sign
338 if (a->sign != b->sign) {
339 if (a->sign == PSTM_NEG) {
348 if (a->sign == PSTM_NEG) {
349 /* if negative compare opposite direction */
350 return pstm_cmp_mag(b, a);
352 return pstm_cmp_mag(a, b);
356 /******************************************************************************/
358 pstm_ints can be initialized more precisely when they will populated
359 using pstm_read_unsigned_bin since the length of the byte stream is known
361 int32 pstm_init_for_read_unsigned_bin(psPool_t *pool, pstm_int *a, uint32 len)
365 Need to set this based on how many words max it will take to store the bin.
367 1 to round up for the remainder of this integer math
368 1 for the initial carry of '1' bits that fall between DIGIT_BIT and 8
370 size = (((len / sizeof(pstm_digit)) * (sizeof(pstm_digit) * CHAR_BIT))
372 return pstm_init_size(pool, a, size);
376 /******************************************************************************/
378 Reads a unsigned char array into pstm_int format. User should have
379 called pstm_init_for_read_unsigned_bin first. There is some grow logic
380 here if the default pstm_init was used but we don't really want to hit it.
382 int32 pstm_read_unsigned_bin(pstm_int *a, unsigned char *b, int32 c)
388 If we know the endianness of this architecture, and we're using
389 32-bit pstm_digits, we can optimize this
391 #if (defined(ENDIAN_LITTLE) || defined(ENDIAN_BIG)) && !defined(PSTM_64BIT)
392 /* But not for both simultaneously */
393 #if defined(ENDIAN_LITTLE) && defined(ENDIAN_BIG)
394 #error Both ENDIAN_LITTLE and ENDIAN_BIG defined.
398 if ((unsigned)c > (PSTM_MAX_SIZE * sizeof(pstm_digit))) {
399 uint32 excess = c - (PSTM_MAX_SIZE * sizeof(pstm_digit));
403 a->used = (int16)((c + sizeof(pstm_digit) - 1)/sizeof(pstm_digit));
404 if (a->alloc < a->used) {
405 if (pstm_grow(a, a->used) != PSTM_OKAY) {
409 pd = (unsigned char *)a->dp;
410 /* read the bytes in */
413 /* Use Duff's device to unroll the loop. */
414 int32 idx = (c - 1) & ~3;
416 case 0: do { pd[idx+0] = *b++;
417 case 3: pd[idx+1] = *b++;
418 case 2: pd[idx+2] = *b++;
419 case 1: pd[idx+3] = *b++;
421 } while ((c -= 4) > 0);
425 for (c -= 1; c >= 0; c -= 1) {
431 /* Big enough based on the len? */
432 a->used = (((c / sizeof(pstm_digit)) * (sizeof(pstm_digit) * CHAR_BIT))
435 if (a->alloc < a->used) {
436 if (pstm_grow(a, a->used) != PSTM_OKAY) {
440 /* read the bytes in */
442 if (pstm_mul_2d (a, 8, a) != PSTM_OKAY) {
454 /******************************************************************************/
457 int16 pstm_count_bits (pstm_int * a)
466 /* get number of digits and add that */
467 r = (a->used - 1) * DIGIT_BIT;
469 /* take the last digit and count the bits in it */
470 q = a->dp[a->used - 1];
471 while (q > ((pstm_digit) 0)) {
473 q >>= ((pstm_digit) 1);
478 /******************************************************************************/
479 int32 pstm_unsigned_bin_size(pstm_int *a)
481 int32 size = pstm_count_bits (a);
482 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
485 /******************************************************************************/
486 void pstm_set(pstm_int *a, pstm_digit b)
490 a->used = a->dp[0] ? 1 : 0;
493 /******************************************************************************/
497 void pstm_rshd(pstm_int *a, int16 x)
501 /* too many digits just zero and return */
508 for (y = 0; y < a->used - x; y++) {
509 a->dp[y] = a->dp[y+x];
513 for (; y < a->used; y++) {
517 /* decrement count */
522 /******************************************************************************/
524 Shift left a certain amount of digits.
526 int32 pstm_lshd(pstm_int * a, int16 b)
532 If its less than zero return.
538 Grow to fit the new digits.
540 if (a->alloc < a->used + b) {
541 if ((res = pstm_grow (a, a->used + b)) != PSTM_OKAY) {
547 register pstm_digit *top, *bottom;
549 Increment the used by the shift amount then copy upwards.
554 top = a->dp + a->used - 1;
557 bottom = a->dp + a->used - 1 - b;
559 This is implemented using a sliding window except the window goes the
560 other way around. Copying from the bottom to the top.
562 for (x = a->used - 1; x >= b; x--) {
566 /* zero the lower digits */
568 for (x = 0; x < b; x++) {
575 /******************************************************************************/
579 int32 pstm_2expt(pstm_int *a, int16 b)
583 /* zero a as per default */
591 if (z >= PSTM_MAX_SIZE) {
592 return PS_LIMIT_FAIL;
595 /* set the used count of where the bit will go */
598 if (a->used > a->alloc) {
599 if (pstm_grow(a, a->used) != PSTM_OKAY) {
604 /* put the single bit in its place */
605 a->dp[z] = ((pstm_digit)1) << (b % DIGIT_BIT);
609 /******************************************************************************/
613 int32 pstm_mul_2(pstm_int * a, pstm_int * b)
619 grow to accomodate result
621 if (b->alloc < a->used + 1) {
622 if ((res = pstm_grow (b, a->used + 1)) != PSTM_OKAY) {
630 register pstm_digit r, rr, *tmpa, *tmpb;
632 /* alias for source */
640 for (x = 0; x < a->used; x++) {
642 get what will be the *next* carry bit from the
643 MSB of the current digit
645 rr = *tmpa >> ((pstm_digit)(DIGIT_BIT - 1));
647 now shift up this digit, add in the carry [from the previous]
649 *tmpb++ = ((*tmpa++ << ((pstm_digit)1)) | r);
651 copy the carry that would be from the source
652 digit into the next iteration
657 /* new leading digit? */
658 if (r != 0 && b->used != (PSTM_MAX_SIZE-1)) {
659 /* add a MSB which is always 1 at this point */
664 now zero any excess digits on the destination that we didn't write to
666 tmpb = b->dp + b->used;
667 for (x = b->used; x < oldused; x++) {
675 /******************************************************************************/
677 unsigned subtraction ||a|| >= ||b|| ALWAYS!
679 int32 s_pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c)
681 int16 oldbused, oldused;
685 if (b->used > a->used) {
686 return PS_LIMIT_FAIL;
688 if (c->alloc < a->used) {
689 if ((x = pstm_grow (c, a->used)) != PSTM_OKAY) {
698 for (x = 0; x < oldbused; x++) {
699 t = ((pstm_word)a->dp[x]) - (((pstm_word)b->dp[x]) + t);
700 c->dp[x] = (pstm_digit)t;
701 t = (t >> DIGIT_BIT)&1;
703 for (; x < a->used; x++) {
704 t = ((pstm_word)a->dp[x]) - t;
705 c->dp[x] = (pstm_digit)t;
706 t = (t >> DIGIT_BIT);
708 for (; x < oldused; x++) {
715 /******************************************************************************/
719 static int32 s_pstm_add(pstm_int *a, pstm_int *b, pstm_int *c)
722 register pstm_word t, adp, bdp;
731 if (c->used > c->alloc) {
732 if (pstm_grow(c, c->used) != PSTM_OKAY) {
738 for (x = 0; x < y; x++) {
742 adp = (pstm_word)a->dp[x];
747 bdp = (pstm_word)b->dp[x];
750 c->dp[x] = (pstm_digit)t;
753 if (t != 0 && x < PSTM_MAX_SIZE) {
754 if (c->used == c->alloc) {
755 if (pstm_grow(c, c->alloc + 1) != PSTM_OKAY) {
759 c->dp[c->used++] = (pstm_digit)t;
764 for (; x < oldused; x++) {
772 /******************************************************************************/
776 int32 pstm_sub(pstm_int *a, pstm_int *b, pstm_int *c)
786 subtract a negative from a positive, OR a positive from a negative.
787 For both, ADD their magnitudes, and use the sign of the first number.
790 if ((res = s_pstm_add (a, b, c)) != PSTM_OKAY) {
795 subtract a positive from a positive, OR a negative from a negative.
796 First, take the difference between their magnitudes, then...
798 if (pstm_cmp_mag (a, b) != PSTM_LT) {
799 /* Copy the sign from the first */
801 /* The first has a larger or equal magnitude */
802 if ((res = s_pstm_sub (a, b, c)) != PSTM_OKAY) {
806 /* The result has the _opposite_ sign from the first number. */
807 c->sign = (sa == PSTM_ZPOS) ? PSTM_NEG : PSTM_ZPOS;
808 /* The second has a larger magnitude */
809 if ((res = s_pstm_sub (b, a, c)) != PSTM_OKAY) {
817 /******************************************************************************/
821 int32 pstm_sub_d(psPool_t *pool, pstm_int *a, pstm_digit b, pstm_int *c)
826 if (pstm_init_size(pool, &tmp, sizeof(pstm_digit)) != PSTM_OKAY) {
830 res = pstm_sub(a, &tmp, c);
835 /******************************************************************************/
837 setups the montgomery reduction
839 int32 pstm_montgomery_setup(pstm_int *a, pstm_digit *rho)
844 fast inversion mod 2**k
845 Based on the fact that
846 XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
847 => 2*X*A - X*X*A*A = 1
853 psTraceCrypto("pstm_montogomery_setup failure\n");
857 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
858 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
859 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
860 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
862 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
864 /* rho = -1/m mod b */
865 *rho = (pstm_digit)(((pstm_word) 1 << ((pstm_word) DIGIT_BIT)) -
870 /******************************************************************************/
872 * computes a = B**n mod b without division or multiplication useful for
873 * normalizing numbers in a Montgomery system.
875 int32 pstm_montgomery_calc_normalization(pstm_int *a, pstm_int *b)
880 /* how many bits of last digit does b use */
881 bits = pstm_count_bits (b) % DIGIT_BIT;
882 if (!bits) bits = DIGIT_BIT;
884 /* compute A = B^(n-1) * 2^(bits-1) */
886 if ((x = pstm_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) !=
895 /* now compute C = A * B mod b */
896 for (x = bits - 1; x < (int32)DIGIT_BIT; x++) {
897 if (pstm_mul_2 (a, a) != PSTM_OKAY) {
900 if (pstm_cmp_mag (a, b) != PSTM_LT) {
901 if (s_pstm_sub (a, b, a) != PSTM_OKAY) {
909 /******************************************************************************/
913 static int32 pstm_mul_2d(pstm_int *a, int16 b, pstm_int *c)
915 pstm_digit carry, carrytmp, shift;
919 if (pstm_copy(a, c) != PSTM_OKAY) {
923 /* handle whole digits */
924 if (b >= DIGIT_BIT) {
925 if (pstm_lshd(c, b/DIGIT_BIT) != PSTM_OKAY) {
931 /* shift the digits */
934 shift = DIGIT_BIT - b;
935 for (x = 0; x < c->used; x++) {
936 carrytmp = c->dp[x] >> shift;
937 c->dp[x] = (c->dp[x] << b) + carry;
940 /* store last carry if room */
941 if (carry && x < PSTM_MAX_SIZE) {
942 if (c->used == c->alloc) {
943 if (pstm_grow(c, c->alloc + 1) != PSTM_OKAY) {
947 c->dp[c->used++] = carry;
954 /******************************************************************************/
958 static int32 pstm_mod_2d(pstm_int *a, int16 b, pstm_int *c)
962 /* zero if count less than or equal to zero */
968 /* get copy of input */
969 if (pstm_copy(a, c) != PSTM_OKAY) {
973 /* if 2**d is larger than we just return */
974 if (b >= (DIGIT_BIT * a->used)) {
978 /* zero digits above the last digit of the modulus */
979 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++)
983 /* clear the digit that is not completely outside/inside the modulus */
984 c->dp[b / DIGIT_BIT] &= ~((pstm_digit)0) >> (DIGIT_BIT - b);
990 /******************************************************************************/
994 int32 pstm_mul_d(pstm_int *a, pstm_digit b, pstm_int *c)
1000 if (c->alloc < a->used + 1) {
1001 if ((res = pstm_grow (c, a->used + 1)) != PSTM_OKAY) {
1009 for (x = 0; x < a->used; x++) {
1010 w = ((pstm_word)a->dp[x]) * ((pstm_word)b) + w;
1011 c->dp[x] = (pstm_digit)w;
1014 if (w != 0 && (a->used != PSTM_MAX_SIZE)) {
1015 c->dp[c->used++] = (pstm_digit)w;
1018 for (; x < oldused; x++) {
1025 /******************************************************************************/
1029 int32 pstm_div_2d(psPool_t *pool, pstm_int *a, int16 b, pstm_int *c,
1032 pstm_digit D, r, rr;
1037 /* if the shift count is <= 0 then we do no work */
1039 if (pstm_copy (a, c) != PSTM_OKAY) {
1048 /* get the remainder */
1050 if (pstm_init(pool, &t) != PSTM_OKAY) {
1053 if (pstm_mod_2d (a, b, &t) != PSTM_OKAY) {
1060 if (pstm_copy(a, c) != PSTM_OKAY) {
1065 /* shift by as many digits in the bit count */
1066 if (b >= (int32)DIGIT_BIT) {
1067 pstm_rshd (c, b / DIGIT_BIT);
1070 /* shift any bit count < DIGIT_BIT */
1071 D = (pstm_digit) (b % DIGIT_BIT);
1073 register pstm_digit *tmpc, mask, shift;
1076 mask = (((pstm_digit)1) << D) - 1;
1079 shift = DIGIT_BIT - D;
1082 tmpc = c->dp + (c->used - 1);
1086 for (x = c->used - 1; x >= 0; x--) {
1087 /* get the lower bits of this word in a temp */
1090 /* shift the current word and mix in the carry bits from previous */
1091 *tmpc = (*tmpc >> D) | (r << shift);
1094 /* set the carry to the carry bits of the current word above */
1103 if (pstm_copy(&t, d) != PSTM_OKAY) {
1111 /******************************************************************************/
1115 int32 pstm_div_2(pstm_int * a, pstm_int * b)
1119 if (b->alloc < a->used) {
1120 if (pstm_grow(b, a->used) != PSTM_OKAY) {
1127 register pstm_digit r, rr, *tmpa, *tmpb;
1130 tmpa = a->dp + b->used - 1;
1133 tmpb = b->dp + b->used - 1;
1137 for (x = b->used - 1; x >= 0; x--) {
1138 /* get the carry for the next iteration */
1141 /* shift the current digit, add in carry and store */
1142 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1144 /* forward carry to next iteration */
1148 /* zero excess digits */
1149 tmpb = b->dp + b->used;
1150 for (x = b->used; x < oldused; x++) {
1159 /******************************************************************************/
1161 Creates "a" then copies b into it
1163 int32 pstm_init_copy(psPool_t *pool, pstm_int * a, pstm_int * b, int16 toSqr)
1175 Smart-size: Increasing size of a if b->used is roughly half
1176 of b->alloc because usage has shown that a lot of these copies
1177 go on to be squared and need these extra digits
1179 if ((b->used * 2) + 2 >= x) {
1180 x = (b->used * 2) + 3;
1183 if ((res = pstm_init_size(pool, a, x)) != PSTM_OKAY) {
1186 return pstm_copy(b, a);
1189 /******************************************************************************/
1191 With some compilers, we have seen issues linking with the builtin
1192 64 bit division routine. The issues with either manifest in a failure
1193 to find 'udivdi3' at link time, or a runtime invalid instruction fault
1194 during an RSA operation.
1195 The routine below divides a 64 bit unsigned int by a 32 bit unsigned int
1196 explicitly, rather than using the division operation
1197 The 64 bit result is placed in the 'numerator' parameter
1198 The 32 bit mod (remainder) of the division is the return parameter
1199 Based on implementations by:
1200 Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
1201 Copyright (C) 1999 Hewlett-Packard Co
1202 Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
1204 #if defined(USE_MATRIX_DIV64) && defined(PSTM_32BIT)
1205 static uint32 psDiv64(uint64 *numerator, uint32 denominator)
1207 uint64 rem = *numerator;
1208 uint64 b = denominator;
1211 uint32 high = rem >> 32;
1213 if (high >= denominator) {
1214 high /= denominator;
1215 res = (uint64) high << 32;
1216 rem -= (uint64) (high * denominator) << 32;
1218 while ((int64)b > 0 && b < rem) {
1233 #endif /* USE_MATRIX_DIV64 */
1235 #if defined(USE_MATRIX_DIV128) && defined(PSTM_64BIT)
1236 typedef unsigned long uint128 __attribute__ ((mode(TI)));
1237 static uint64 psDiv128(uint128 *numerator, uint64 denominator)
1239 uint128 rem = *numerator;
1240 uint128 b = denominator;
1243 uint64 high = rem >> 64;
1245 if (high >= denominator) {
1246 high /= denominator;
1247 res = (uint128) high << 64;
1248 rem -= (uint128) (high * denominator) << 64;
1250 while ((uint128)b > 0 && b < rem) {
1265 #endif /* USE_MATRIX_DIV128 */
1267 /******************************************************************************/
1271 int32 pstm_div(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
1274 pstm_int q, x, y, t1, t2;
1276 int16 n, t, i, norm, neg;
1278 /* is divisor zero ? */
1279 if (pstm_iszero (b) == 1) {
1280 return PS_LIMIT_FAIL;
1283 /* if a < b then q=0, r = a */
1284 if (pstm_cmp_mag (a, b) == PSTM_LT) {
1286 if (pstm_copy(a, d) != PSTM_OKAY) {
1298 if ((res = pstm_init_size(pool, &t1, a->alloc)) != PSTM_OKAY) {
1301 if ((res = pstm_init_size(pool, &t2, 3)) != PSTM_OKAY) {
1304 if ((res = pstm_init_copy(pool, &x, a, 0)) != PSTM_OKAY) {
1308 Used to be an init_copy on b but pstm_grow was always hit with triple size
1310 if ((res = pstm_init_size(pool, &y, b->used * 3)) != PSTM_OKAY) {
1313 if ((res = pstm_copy(b, &y)) != PSTM_OKAY) {
1318 neg = (a->sign == b->sign) ? PSTM_ZPOS : PSTM_NEG;
1319 x.sign = y.sign = PSTM_ZPOS;
1321 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1322 norm = pstm_count_bits(&y) % DIGIT_BIT;
1323 if (norm < (int32)(DIGIT_BIT-1)) {
1324 norm = (DIGIT_BIT-1) - norm;
1325 if ((res = pstm_mul_2d(&x, norm, &x)) != PSTM_OKAY) {
1328 if ((res = pstm_mul_2d(&y, norm, &y)) != PSTM_OKAY) {
1335 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1339 if ((res = pstm_init_size(pool, &q, n - t + 1)) != PSTM_OKAY) {
1344 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1345 if ((res = pstm_lshd(&y, n - t)) != PSTM_OKAY) { /* y = y*b**{n-t} */
1349 while (pstm_cmp (&x, &y) != PSTM_LT) {
1351 if ((res = pstm_sub(&x, &y, &x)) != PSTM_OKAY) {
1356 /* reset y by shifting it back down */
1357 pstm_rshd (&y, n - t);
1359 /* step 3. for i from n down to (t + 1) */
1360 for (i = n; i >= (t + 1); i--) {
1365 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1366 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1367 if (x.dp[i] == y.dp[t]) {
1368 q.dp[i - t - 1] = (pstm_digit)((((pstm_word)1) << DIGIT_BIT) - 1);
1371 tmp = ((pstm_word) x.dp[i]) << ((pstm_word) DIGIT_BIT);
1372 tmp |= ((pstm_word) x.dp[i - 1]);
1373 #if defined(USE_MATRIX_DIV64) && defined(PSTM_32BIT)
1374 psDiv64(&tmp, y.dp[t]);
1375 #elif defined(USE_MATRIX_DIV128) && defined(PSTM_64BIT)
1376 psDiv128(&tmp, y.dp[t]);
1378 tmp /= ((pstm_word) y.dp[t]);
1379 #endif /* USE_MATRIX_DIV64 */
1380 q.dp[i - t - 1] = (pstm_digit) (tmp);
1383 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1384 xi * b**2 + xi-1 * b + xi-2
1388 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1);
1390 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1);
1392 /* find left hand */
1394 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1397 if ((res = pstm_mul_d (&t1, q.dp[i - t - 1], &t1)) != PSTM_OKAY) {
1401 /* find right hand */
1402 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1403 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1406 } while (pstm_cmp_mag(&t1, &t2) == PSTM_GT);
1408 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1409 if ((res = pstm_mul_d(&y, q.dp[i - t - 1], &t1)) != PSTM_OKAY) {
1413 if ((res = pstm_lshd(&t1, i - t - 1)) != PSTM_OKAY) {
1417 if ((res = pstm_sub(&x, &t1, &x)) != PSTM_OKAY) {
1421 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1422 if (x.sign == PSTM_NEG) {
1423 if ((res = pstm_copy(&y, &t1)) != PSTM_OKAY) {
1426 if ((res = pstm_lshd (&t1, i - t - 1)) != PSTM_OKAY) {
1429 if ((res = pstm_add (&x, &t1, &x)) != PSTM_OKAY) {
1432 q.dp[i - t - 1] = q.dp[i - t - 1] - 1;
1436 now q is the quotient and x is the remainder (which we have to normalize)
1438 /* get sign before writing to c */
1439 x.sign = x.used == 0 ? PSTM_ZPOS : a->sign;
1443 if (pstm_copy (&q, c) != PSTM_OKAY) {
1451 if ((res = pstm_div_2d (pool, &x, norm, &x, NULL)) != PSTM_OKAY) {
1455 the following is a kludge, essentially we were seeing the right
1456 remainder but with excess digits that should have been zero
1458 for (i = b->used; i < x.used; i++) {
1462 if (pstm_copy (&x, d) != PSTM_OKAY) {
1470 LBL_Q:pstm_clear (&q);
1471 LBL_Y:pstm_clear (&y);
1472 LBL_X:pstm_clear (&x);
1473 LBL_T2:pstm_clear (&t2);
1474 LBL_T1:pstm_clear (&t1);
1479 /******************************************************************************/
1481 Swap the elements of two integers, for cases where you can't simply swap
1482 the pstm_int pointers around
1484 void pstm_exch(pstm_int * a, pstm_int * b)
1493 /******************************************************************************/
1495 c = a mod b, 0 <= c < b
1497 int32 pstm_mod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c)
1504 if ((err = pstm_init_size(pool, &t, b->alloc)) != PSTM_OKAY) {
1507 if ((err = pstm_div(pool, a, b, NULL, &t)) != PSTM_OKAY) {
1511 if (t.sign != b->sign) {
1512 err = pstm_add(&t, b, c);
1520 /******************************************************************************/
1524 int32 pstm_mulmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c,
1532 Smart-size pstm_inits. d is an output that is influenced by this local 't'
1533 so don't shrink 'd' if it wants to becuase this will lead to an pstm_grow
1536 size = a->used + b->used + 1;
1537 if ((a == d) && (size < a->alloc)) {
1540 if ((res = pstm_init_size(pool, &tmp, size)) != PSTM_OKAY) {
1543 if ((res = pstm_mul_comba(pool, a, b, &tmp, NULL, 0)) != PSTM_OKAY) {
1547 res = pstm_mod(pool, &tmp, c, d);
1552 /******************************************************************************/
1555 * Some restrictions... x must be positive and < b
1557 int32 pstm_exptmod(psPool_t *pool, pstm_int *G, pstm_int *X, pstm_int *P,
1560 pstm_int M[32], res; /* Keep this winsize based: (1 << max_winsize) */
1564 int16 bitcpy, bitcnt, mode, digidx, x, y, winsize;
1567 /* set window size from what user set as optimization */
1568 x = pstm_count_bits(X);
1572 winsize = PS_EXPTMOD_WINSIZE;
1575 /* now setup montgomery */
1576 if ((err = pstm_montgomery_setup (P, &mp)) != PSTM_OKAY) {
1581 if ((err = pstm_init_size(pool, &res, (P->used * 2) + 1)) != PSTM_OKAY) {
1586 The M table contains powers of the input base, e.g. M[x] = G^x mod P
1587 The first half of the table is not computed though except for M[0] and M[1]
1589 /* now we need R mod m */
1590 if ((err = pstm_montgomery_calc_normalization (&res, P)) != PSTM_OKAY) {
1597 if ((err = pstm_init_size(pool, &M[1], res.used)) != PSTM_OKAY) {
1601 /* now set M[1] to G * R mod m */
1602 if (pstm_cmp_mag(P, G) != PSTM_GT) {
1603 /* G > P so we reduce it first */
1604 if ((err = pstm_mod(pool, G, P, &M[1])) != PSTM_OKAY) {
1608 if ((err = pstm_copy(G, &M[1])) != PSTM_OKAY) {
1612 if ((err = pstm_mulmod (pool, &M[1], &res, P, &M[1])) != PSTM_OKAY) {
1616 Pre-allocated digit. Used for mul, sqr, AND reduce
1618 paDlen = ((M[1].used + 3) * 2) * sizeof(pstm_digit);
1619 paD = xzalloc(paDlen);
1621 compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times
1623 if (pstm_init_copy(pool, &M[1 << (winsize - 1)], &M[1], 1) != PSTM_OKAY) {
1627 for (x = 0; x < (winsize - 1); x++) {
1628 if ((err = pstm_sqr_comba (pool, &M[1 << (winsize - 1)],
1629 &M[1 << (winsize - 1)], paD, paDlen)) != PSTM_OKAY) {
1632 if ((err = pstm_montgomery_reduce(pool, &M[1 << (winsize - 1)], P, mp,
1633 paD, paDlen)) != PSTM_OKAY) {
1638 now init the second half of the array
1640 for (x = (1<<(winsize-1)) + 1; x < (1 << winsize); x++) {
1641 if ((err = pstm_init_size(pool, &M[x], M[1<<(winsize-1)].alloc + 1))
1643 for (y = 1<<(winsize-1); y < x; y++) {
1650 /* create upper table */
1651 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1652 if ((err = pstm_mul_comba(pool, &M[x - 1], &M[1], &M[x], paD, paDlen))
1656 if ((err = pstm_montgomery_reduce(pool, &M[x], P, mp, paD, paDlen)) !=
1662 /* set initial mode and bit cnt */
1666 digidx = X->used - 1;
1671 /* grab next digit as required */
1672 if (--bitcnt == 0) {
1673 /* if digidx == -1 we are out of digits so break */
1677 /* read next digit and reset bitcnt */
1678 buf = X->dp[digidx--];
1679 bitcnt = (int32)DIGIT_BIT;
1682 /* grab the next msb from the exponent */
1683 y = (pstm_digit)(buf >> (DIGIT_BIT - 1)) & 1;
1684 buf <<= (pstm_digit)1;
1686 If the bit is zero and mode == 0 then we ignore it.
1687 These represent the leading zero bits before the first 1 bit
1688 in the exponent. Technically this opt is not required but it
1689 does lower the # of trivial squaring/reductions used
1691 if (mode == 0 && y == 0) {
1695 /* if the bit is zero and mode == 1 then we square */
1696 if (mode == 1 && y == 0) {
1697 if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) !=
1701 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen))
1708 /* else we add it to the window */
1709 bitbuf |= (y << (winsize - ++bitcpy));
1712 if (bitcpy == winsize) {
1713 /* ok window is filled so square as required and mul square first */
1714 for (x = 0; x < winsize; x++) {
1715 if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) !=
1719 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD,
1720 paDlen)) != PSTM_OKAY) {
1726 if ((err = pstm_mul_comba(pool, &res, &M[bitbuf], &res, paD,
1727 paDlen)) != PSTM_OKAY) {
1730 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen))
1735 /* empty window and reset */
1742 /* if bits remain then square/multiply */
1743 if (mode == 2 && bitcpy > 0) {
1744 /* square then multiply if the bit is set */
1745 for (x = 0; x < bitcpy; x++) {
1746 if ((err = pstm_sqr_comba(pool, &res, &res, paD, paDlen)) !=
1750 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen))
1755 /* get next bit of the window */
1757 if ((bitbuf & (1 << winsize)) != 0) {
1759 if ((err = pstm_mul_comba(pool, &res, &M[1], &res, paD, paDlen))
1763 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD,
1764 paDlen)) != PSTM_OKAY) {
1771 Fix up result if Montgomery reduction is used recall that any value in a
1772 Montgomery system is actually multiplied by R mod n. So we have to reduce
1773 one more time to cancel out the factor of R.
1775 if ((err = pstm_montgomery_reduce(pool, &res, P, mp, paD, paDlen)) !=
1779 /* swap res with Y */
1780 if ((err = pstm_copy (&res, Y)) != PSTM_OKAY) {
1785 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1788 LBL_PAD:psFree(paD, pool);
1789 LBL_M: pstm_clear(&M[1]);
1790 LBL_RES:pstm_clear(&res);
1794 /******************************************************************************/
1798 int32 pstm_add(pstm_int *a, pstm_int *b, pstm_int *c)
1803 /* get sign of both inputs */
1807 /* handle two cases, not four */
1809 /* both positive or both negative, add their mags, copy the sign */
1811 if ((res = s_pstm_add (a, b, c)) != PSTM_OKAY) {
1816 one positive, the other negative
1817 subtract the one with the greater magnitude from the one of the lesser
1818 magnitude. The result gets the sign of the one with the greater mag.
1820 if (pstm_cmp_mag (a, b) == PSTM_LT) {
1822 if ((res = s_pstm_sub (b, a, c)) != PSTM_OKAY) {
1827 if ((res = s_pstm_sub (a, b, c)) != PSTM_OKAY) {
1835 /******************************************************************************/
1837 reverse an array, used for radix code
1839 static void pstm_reverse (unsigned char *s, int16 len)
1854 /******************************************************************************/
1856 No reverse. Useful in some of the EIP-154 PKA stuff where special byte
1857 order seems to come into play more often
1859 int32 pstm_to_unsigned_bin_nr(psPool_t *pool, pstm_int *a, unsigned char *b)
1865 if ((res = pstm_init_copy(pool, &t, a, 0)) != PSTM_OKAY) {
1870 while (pstm_iszero (&t) == 0) {
1871 b[x++] = (unsigned char) (t.dp[0] & 255);
1872 if ((res = pstm_div_2d (pool, &t, 8, &t, NULL)) != PSTM_OKAY) {
1880 /******************************************************************************/
1884 int32 pstm_to_unsigned_bin(psPool_t *pool, pstm_int *a, unsigned char *b)
1890 if ((res = pstm_init_copy(pool, &t, a, 0)) != PSTM_OKAY) {
1895 while (pstm_iszero (&t) == 0) {
1896 b[x++] = (unsigned char) (t.dp[0] & 255);
1897 if ((res = pstm_div_2d (pool, &t, 8, &t, NULL)) != PSTM_OKAY) {
1902 pstm_reverse (b, x);
1907 /******************************************************************************/
1909 compare against a single digit
1911 int32 pstm_cmp_d(pstm_int *a, pstm_digit b)
1913 /* compare based on sign */
1914 if ((b && a->used == 0) || a->sign == PSTM_NEG) {
1918 /* compare based on magnitude */
1923 /* compare the only digit of a to b */
1926 } else if (a->dp[0] < b) {
1934 Need invmod for ECC and also private key loading for hardware crypto
1935 in cases where dQ > dP. The values must be switched and a new qP must be
1936 calculated using this function
1938 static int32 pstm_invmod_slow(psPool_t *pool, pstm_int * a, pstm_int * b,
1941 pstm_int x, y, u, v, A, B, C, D;
1944 /* b cannot be negative */
1945 if (b->sign == PSTM_NEG || pstm_iszero(b) == 1) {
1946 return PS_LIMIT_FAIL;
1950 if (pstm_init_size(pool, &x, b->used) != PSTM_OKAY) {
1955 if ((res = pstm_mod(pool, a, b, &x)) != PSTM_OKAY) {
1959 if (pstm_init_copy(pool, &y, b, 0) != PSTM_OKAY) {
1963 /* 2. [modified] if x,y are both even then return an error! */
1964 if (pstm_iseven (&x) == 1 && pstm_iseven (&y) == 1) {
1969 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1970 if ((res = pstm_init_copy(pool, &u, &x, 0)) != PSTM_OKAY) {
1973 if ((res = pstm_init_copy(pool, &v, &y, 0)) != PSTM_OKAY) {
1977 if ((res = pstm_init_size(pool, &A, sizeof(pstm_digit))) != PSTM_OKAY) {
1981 if ((res = pstm_init_size(pool, &D, sizeof(pstm_digit))) != PSTM_OKAY) {
1987 if ((res = pstm_init(pool, &B)) != PSTM_OKAY) {
1990 if ((res = pstm_init(pool, &C)) != PSTM_OKAY) {
1995 /* 4. while u is even do */
1996 while (pstm_iseven (&u) == 1) {
1998 if ((res = pstm_div_2 (&u, &u)) != PSTM_OKAY) {
2002 /* 4.2 if A or B is odd then */
2003 if (pstm_isodd (&A) == 1 || pstm_isodd (&B) == 1) {
2004 /* A = (A+y)/2, B = (B-x)/2 */
2005 if ((res = pstm_add (&A, &y, &A)) != PSTM_OKAY) {
2008 if ((res = pstm_sub (&B, &x, &B)) != PSTM_OKAY) {
2012 /* A = A/2, B = B/2 */
2013 if ((res = pstm_div_2 (&A, &A)) != PSTM_OKAY) {
2016 if ((res = pstm_div_2 (&B, &B)) != PSTM_OKAY) {
2021 /* 5. while v is even do */
2022 while (pstm_iseven (&v) == 1) {
2024 if ((res = pstm_div_2 (&v, &v)) != PSTM_OKAY) {
2028 /* 5.2 if C or D is odd then */
2029 if (pstm_isodd (&C) == 1 || pstm_isodd (&D) == 1) {
2030 /* C = (C+y)/2, D = (D-x)/2 */
2031 if ((res = pstm_add (&C, &y, &C)) != PSTM_OKAY) {
2034 if ((res = pstm_sub (&D, &x, &D)) != PSTM_OKAY) {
2038 /* C = C/2, D = D/2 */
2039 if ((res = pstm_div_2 (&C, &C)) != PSTM_OKAY) {
2042 if ((res = pstm_div_2 (&D, &D)) != PSTM_OKAY) {
2047 /* 6. if u >= v then */
2048 if (pstm_cmp (&u, &v) != PSTM_LT) {
2049 /* u = u - v, A = A - C, B = B - D */
2050 if ((res = pstm_sub (&u, &v, &u)) != PSTM_OKAY) {
2053 if ((res = pstm_sub (&A, &C, &A)) != PSTM_OKAY) {
2056 if ((res = pstm_sub (&B, &D, &B)) != PSTM_OKAY) {
2060 /* v - v - u, C = C - A, D = D - B */
2061 if ((res = pstm_sub (&v, &u, &v)) != PSTM_OKAY) {
2064 if ((res = pstm_sub (&C, &A, &C)) != PSTM_OKAY) {
2067 if ((res = pstm_sub (&D, &B, &D)) != PSTM_OKAY) {
2072 /* if not zero goto step 4 */
2073 if (pstm_iszero (&u) == 0)
2076 /* now a = C, b = D, gcd == g*v */
2078 /* if v != 1 then there is no inverse */
2079 if (pstm_cmp_d (&v, 1) != PSTM_EQ) {
2084 /* if its too low */
2085 while (pstm_cmp_d(&C, 0) == PSTM_LT) {
2086 if ((res = pstm_add(&C, b, &C)) != PSTM_OKAY) {
2092 while (pstm_cmp_mag(&C, b) != PSTM_LT) {
2093 if ((res = pstm_sub(&C, b, &C)) != PSTM_OKAY) {
2098 /* C is now the inverse */
2099 if ((res = pstm_copy(&C, c)) != PSTM_OKAY) {
2104 LBL_C: pstm_clear(&C);
2105 LBL_D: pstm_clear(&D);
2106 LBL_B: pstm_clear(&B);
2107 LBL_A: pstm_clear(&A);
2108 LBL_V: pstm_clear(&v);
2109 LBL_U: pstm_clear(&u);
2110 LBL_Y: pstm_clear(&y);
2111 LBL_X: pstm_clear(&x);
2116 /* c = 1/a (mod b) for odd b only */
2117 int32 pstm_invmod(psPool_t *pool, pstm_int *a, pstm_int *b, pstm_int *c)
2119 pstm_int x, y, u, v, B, D;
2123 /* 2. [modified] b must be odd */
2124 if (pstm_iseven (b) == 1) {
2125 return pstm_invmod_slow(pool, a,b,c);
2128 /* x == modulus, y == value to invert */
2129 if ((res = pstm_init_copy(pool, &x, b, 0)) != PSTM_OKAY) {
2133 if ((res = pstm_init_size(pool, &y, a->alloc)) != PSTM_OKAY) {
2137 /* we need y = |a| */
2140 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2141 if ((res = pstm_init_copy(pool, &u, &x, 0)) != PSTM_OKAY) {
2144 if ((res = pstm_init_copy(pool, &v, &y, 0)) != PSTM_OKAY) {
2147 if ((res = pstm_init(pool, &B)) != PSTM_OKAY) {
2150 if ((res = pstm_init(pool, &D)) != PSTM_OKAY) {
2158 /* 4. while u is even do */
2159 while (pstm_iseven (&u) == 1) {
2161 if ((res = pstm_div_2 (&u, &u)) != PSTM_OKAY) {
2165 /* 4.2 if B is odd then */
2166 if (pstm_isodd (&B) == 1) {
2167 if ((res = pstm_sub (&B, &x, &B)) != PSTM_OKAY) {
2172 if ((res = pstm_div_2 (&B, &B)) != PSTM_OKAY) {
2177 /* 5. while v is even do */
2178 while (pstm_iseven (&v) == 1) {
2180 if ((res = pstm_div_2 (&v, &v)) != PSTM_OKAY) {
2183 /* 5.2 if D is odd then */
2184 if (pstm_isodd (&D) == 1) {
2186 if ((res = pstm_sub (&D, &x, &D)) != PSTM_OKAY) {
2191 if ((res = pstm_div_2 (&D, &D)) != PSTM_OKAY) {
2196 /* 6. if u >= v then */
2197 if (pstm_cmp (&u, &v) != PSTM_LT) {
2198 /* u = u - v, B = B - D */
2199 if ((res = pstm_sub (&u, &v, &u)) != PSTM_OKAY) {
2202 if ((res = pstm_sub (&B, &D, &B)) != PSTM_OKAY) {
2206 /* v - v - u, D = D - B */
2207 if ((res = pstm_sub (&v, &u, &v)) != PSTM_OKAY) {
2210 if ((res = pstm_sub (&D, &B, &D)) != PSTM_OKAY) {
2215 /* if not zero goto step 4 */
2216 if (sanity++ > 1000) {
2217 res = PS_LIMIT_FAIL;
2220 if (pstm_iszero (&u) == 0) {
2224 /* now a = C, b = D, gcd == g*v */
2226 /* if v != 1 then there is no inverse */
2227 if (pstm_cmp_d (&v, 1) != PSTM_EQ) {
2232 /* b is now the inverse */
2234 while (D.sign == PSTM_NEG) {
2235 if ((res = pstm_add (&D, b, &D)) != PSTM_OKAY) {
2239 if ((res = pstm_copy (&D, c)) != PSTM_OKAY) {
2245 LBL_D: pstm_clear(&D);
2246 LBL_B: pstm_clear(&B);
2247 LBL_V: pstm_clear(&v);
2248 LBL_U: pstm_clear(&u);
2249 LBL_Y: pstm_clear(&y);
2250 LBL_X: pstm_clear(&x);
2253 #endif /* !DISABLE_PSTM */
2254 /******************************************************************************/