2 * Copyright (C) 2017 Denys Vlasenko
4 * Licensed under GPLv2, see file LICENSE in this source tree.
8 /* The file is taken almost verbatim from matrixssl-3-7-2b-open/crypto/math/.
9 * Changes are flagged with //bbox
13 * @file pstm_sqr_comba.c
14 * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
16 * Multiprecision Squaring with Comba technique.
19 * Copyright (c) 2013-2015 INSIDE Secure Corporation
20 * Copyright (c) PeerSec Networks, 2002-2011
23 * The latest version of this code is available at http://www.matrixssl.org
25 * This software is open source; you can redistribute it and/or modify
26 * it under the terms of the GNU General Public License as published by
27 * the Free Software Foundation; either version 2 of the License, or
28 * (at your option) any later version.
30 * This General Public License does NOT permit incorporating this software
31 * into proprietary programs. If you are unable to comply with the GPL, a
32 * commercial license for this software may be purchased from INSIDE at
33 * http://www.insidesecure.com/eng/Company/Locations
35 * This program is distributed in WITHOUT ANY WARRANTY; without even the
36 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
37 * See the GNU General Public License for more details.
39 * You should have received a copy of the GNU General Public License
40 * along with this program; if not, write to the Free Software
41 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
42 * http://www.gnu.org/copyleft/gpl.html
44 /******************************************************************************/
47 //#include "../cryptoApi.h"
50 /******************************************************************************/
52 /* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
53 #if !defined(__GNUC__) || !defined(__i386__)
54 #error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
56 //#pragma message ("Using 32 bit x86 Assembly Optimizations")
63 #define COMBA_STORE(x) \
66 #define COMBA_STORE2(x) \
69 #define CARRY_FORWARD \
70 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
74 #define SQRADD(i, j) \
76 "movl %6,%%eax \n\t" \
78 "addl %%eax,%0 \n\t" \
79 "adcl %%edx,%1 \n\t" \
81 :"=rm"(c0), "=rm"(c1), "=rm"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc");
82 //bbox: ^^^ replaced "=r" with "=rm": %ebx is not available on shared build
84 #define SQRADD2(i, j) \
86 "movl %6,%%eax \n\t" \
88 "addl %%eax,%0 \n\t" \
89 "adcl %%edx,%1 \n\t" \
91 "addl %%eax,%0 \n\t" \
92 "adcl %%edx,%1 \n\t" \
94 :"=rm"(c0), "=rm"(c1), "=rm"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc");
95 //bbox: ^^^ replaced "=r" with "=rm": %ebx is not available on shared build
97 #define SQRADDSC(i, j) \
99 "movl %6,%%eax \n\t" \
101 "movl %%eax,%0 \n\t" \
102 "movl %%edx,%1 \n\t" \
104 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
106 #define SQRADDAC(i, j) \
108 "movl %6,%%eax \n\t" \
110 "addl %%eax,%0 \n\t" \
111 "adcl %%edx,%1 \n\t" \
113 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc");
123 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc");
125 /******************************************************************************/
126 #elif defined(PSTM_X86_64)
127 /* x86-64 optimized */
128 #if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
129 #error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
131 //#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
135 #define CLEAR_CARRY \
138 #define COMBA_STORE(x) \
141 #define COMBA_STORE2(x) \
144 #define CARRY_FORWARD \
145 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
149 #define SQRADD(i, j) \
151 "movq %6,%%rax \n\t" \
153 "addq %%rax,%0 \n\t" \
154 "adcq %%rdx,%1 \n\t" \
156 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc");
158 #define SQRADD2(i, j) \
160 "movq %6,%%rax \n\t" \
162 "addq %%rax,%0 \n\t" \
163 "adcq %%rdx,%1 \n\t" \
165 "addq %%rax,%0 \n\t" \
166 "adcq %%rdx,%1 \n\t" \
168 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
170 #define SQRADDSC(i, j) \
172 "movq %6,%%rax \n\t" \
174 "movq %%rax,%0 \n\t" \
175 "movq %%rdx,%1 \n\t" \
177 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
179 #define SQRADDAC(i, j) \
181 "movq %6,%%rax \n\t" \
183 "addq %%rax,%0 \n\t" \
184 "adcq %%rdx,%1 \n\t" \
186 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
196 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
198 /******************************************************************************/
199 #elif defined(PSTM_ARM)
201 //#pragma message ("Using 32 bit ARM Assembly Optimizations")
205 #define CLEAR_CARRY \
208 #define COMBA_STORE(x) \
211 #define COMBA_STORE2(x) \
214 #define CARRY_FORWARD \
215 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
219 /* multiplies point i and j, updates carry "c1" and digit c2 */
220 #define SQRADD(i, j) \
222 " UMULL r0,r1,%6,%6 \n\t" \
223 " ADDS %0,%0,r0 \n\t" \
224 " ADCS %1,%1,r1 \n\t" \
225 " ADC %2,%2,#0 \n\t" \
226 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc");
228 /* for squaring some of the terms are doubled... */
229 #define SQRADD2(i, j) \
231 " UMULL r0,r1,%6,%7 \n\t" \
232 " ADDS %0,%0,r0 \n\t" \
233 " ADCS %1,%1,r1 \n\t" \
234 " ADC %2,%2,#0 \n\t" \
235 " ADDS %0,%0,r0 \n\t" \
236 " ADCS %1,%1,r1 \n\t" \
237 " ADC %2,%2,#0 \n\t" \
238 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc");
240 #define SQRADDSC(i, j) \
242 " UMULL %0,%1,%6,%7 \n\t" \
243 " SUB %2,%2,%2 \n\t" \
244 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc");
246 #define SQRADDAC(i, j) \
248 " UMULL r0,r1,%6,%7 \n\t" \
249 " ADDS %0,%0,r0 \n\t" \
250 " ADCS %1,%1,r1 \n\t" \
251 " ADC %2,%2,#0 \n\t" \
252 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc");
256 " ADDS %0,%0,%3 \n\t" \
257 " ADCS %1,%1,%4 \n\t" \
258 " ADC %2,%2,%5 \n\t" \
259 " ADDS %0,%0,%3 \n\t" \
260 " ADCS %1,%1,%4 \n\t" \
261 " ADC %2,%2,%5 \n\t" \
262 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc");
264 /******************************************************************************/
265 #elif defined(PSTM_MIPS)
267 //#pragma message ("Using 32 bit MIPS Assembly Optimizations")
271 #define CLEAR_CARRY \
274 #define COMBA_STORE(x) \
277 #define COMBA_STORE2(x) \
280 #define CARRY_FORWARD \
281 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
285 /* multiplies point i and j, updates carry "c1" and digit c2 */
286 #define SQRADD(i, j) \
288 " multu %6,%6 \n\t" \
291 " addu %0,%0,$12 \n\t" \
292 " sltu $12,%0,$12 \n\t" \
293 " addu %1,%1,$13 \n\t" \
294 " sltu $13,%1,$13 \n\t" \
295 " addu %1,%1,$12 \n\t" \
296 " sltu $12,%1,$12 \n\t" \
297 " addu %2,%2,$13 \n\t" \
298 " addu %2,%2,$12 \n\t" \
299 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
301 /* for squaring some of the terms are doubled... */
302 #define SQRADD2(i, j) \
304 " multu %6,%7 \n\t" \
308 " addu %0,%0,$12 \n\t" \
309 " sltu $14,%0,$12 \n\t" \
310 " addu %1,%1,$13 \n\t" \
311 " sltu $15,%1,$13 \n\t" \
312 " addu %1,%1,$14 \n\t" \
313 " sltu $14,%1,$14 \n\t" \
314 " addu %2,%2,$15 \n\t" \
315 " addu %2,%2,$14 \n\t" \
317 " addu %0,%0,$12 \n\t" \
318 " sltu $14,%0,$12 \n\t" \
319 " addu %1,%1,$13 \n\t" \
320 " sltu $15,%1,$13 \n\t" \
321 " addu %1,%1,$14 \n\t" \
322 " sltu $14,%1,$14 \n\t" \
323 " addu %2,%2,$15 \n\t" \
324 " addu %2,%2,$14 \n\t" \
325 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
327 #define SQRADDSC(i, j) \
329 " multu %6,%7 \n\t" \
332 " xor %2,%2,%2 \n\t" \
333 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc");
335 #define SQRADDAC(i, j) \
337 " multu %6,%7 \n\t" \
340 " addu %0,%0,$12 \n\t" \
341 " sltu $12,%0,$12 \n\t" \
342 " addu %1,%1,$13 \n\t" \
343 " sltu $13,%1,$13 \n\t" \
344 " addu %1,%1,$12 \n\t" \
345 " sltu $12,%1,$12 \n\t" \
346 " addu %2,%2,$13 \n\t" \
347 " addu %2,%2,$12 \n\t" \
348 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
352 " addu %0,%0,%3 \n\t" \
353 " sltu $10,%0,%3 \n\t" \
354 " addu %1,%1,$10 \n\t" \
355 " sltu $10,%1,$10 \n\t" \
356 " addu %1,%1,%4 \n\t" \
357 " sltu $11,%1,%4 \n\t" \
358 " addu %2,%2,$10 \n\t" \
359 " addu %2,%2,$11 \n\t" \
360 " addu %2,%2,%5 \n\t" \
362 " addu %0,%0,%3 \n\t" \
363 " sltu $10,%0,%3 \n\t" \
364 " addu %1,%1,$10 \n\t" \
365 " sltu $10,%1,$10 \n\t" \
366 " addu %1,%1,%4 \n\t" \
367 " sltu $11,%1,%4 \n\t" \
368 " addu %2,%2,$10 \n\t" \
369 " addu %2,%2,$11 \n\t" \
370 " addu %2,%2,%5 \n\t" \
371 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
374 /******************************************************************************/
376 /* ISO C portable code */
380 #define CLEAR_CARRY \
383 #define COMBA_STORE(x) \
386 #define COMBA_STORE2(x) \
389 #define CARRY_FORWARD \
390 do { c0 = c1; c1 = c2; c2 = 0; } while (0);
394 /* multiplies point i and j, updates carry "c1" and digit c2 */
395 #define SQRADD(i, j) \
397 t = c0 + ((pstm_word)i) * ((pstm_word)j); c0 = (pstm_digit)t; \
398 t = c1 + (t >> DIGIT_BIT); \
399 c1 = (pstm_digit)t; c2 += (pstm_digit)(t >> DIGIT_BIT); \
403 /* for squaring some of the terms are doubled... */
404 #define SQRADD2(i, j) \
406 t = ((pstm_word)i) * ((pstm_word)j); \
407 tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \
408 tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \
409 c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \
410 tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \
411 tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \
412 c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \
415 #define SQRADDSC(i, j) \
417 t = ((pstm_word)i) * ((pstm_word)j); \
418 sc0 = (pstm_digit)t; sc1 = (pstm_digit)(t >> DIGIT_BIT); sc2 = 0; \
421 #define SQRADDAC(i, j) \
423 t = ((pstm_word)sc0) + ((pstm_word)i) * ((pstm_word)j); \
424 sc0 = (pstm_digit)t; \
425 t = ((pstm_word)sc1) + (t >> DIGIT_BIT); sc1 = (pstm_digit)t; \
426 sc2 += (pstm_digit)(t >> DIGIT_BIT); \
431 t = ((pstm_word)sc0) + ((pstm_word)sc0) + ((pstm_word)c0); \
432 c0 = (pstm_digit)t; \
433 t = ((pstm_word)sc1) + ((pstm_word)sc1) + c1 + (t >> DIGIT_BIT); \
434 c1 = (pstm_digit)t; \
435 c2 = c2 + sc2 + sc2 + (pstm_digit)(t >> DIGIT_BIT); \
440 /******************************************************************************/
442 Non-unrolled comba squarer
445 #define pstm_sqr_comba_gen(pool, A, B, paD, paDlen) \
446 pstm_sqr_comba_gen( A, B, paD, paDlen)
447 static int32 pstm_sqr_comba_gen(psPool_t *pool, pstm_int *A, pstm_int *B,
448 pstm_digit *paD, uint32 paDlen)
450 int paDfail, pa; //bbox: was int16
452 pstm_digit c0, c1, c2, *dst;
458 /* get size of output and trim */
459 pa = A->used + A->used;
461 /* number of output digits to produce */
465 If b is not large enough grow it and continue
468 if (pstm_grow(B, pa) != PSTM_OKAY) {
473 if (paDlen < (sizeof(pstm_digit) * pa)) {
474 paDfail = 1; /* have a paD, but it's not big enough */
475 dst = xzalloc(sizeof(pstm_digit) * pa);//bbox
478 memset(dst, 0x0, paDlen);
481 dst = xzalloc(sizeof(pstm_digit) * pa);//bbox
484 for (ix = 0; ix < pa; ix++) {
486 pstm_digit *tmpy, *tmpx;
488 /* get offsets into the two bignums */
489 ty = min(A->used-1, ix);
492 /* setup temp aliases */
497 This is the number of times the loop will iterate,
498 while (tx++ < a->used && ty-- >= 0) { ... }
500 iy = min(A->used-tx, ty+1);
503 now for squaring tx can never equal ty. We halve the distance since
504 they approach at a rate of 2x and we have to round because odd cases
507 iy = min(iy, (ty-tx+1)>>1);
509 /* forward carries */
513 for (iz = 0; iz < iy; iz++) {
514 SQRADD2(*tmpx++, *tmpy--);
517 /* even columns have the square term in them */
519 SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
523 COMBA_STORE(dst[ix]);
535 for (ix = 0; ix < pa; ix++) {
538 /* clear unused digits (that existed in the old copy of c) */
539 for (; ix < iz; ix++) {
545 if ((paD == NULL) || paDfail == 1) {
551 /******************************************************************************/
553 Unrolled Comba loop for 1024 bit keys
555 #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
556 static int32 pstm_sqr_comba16(pstm_int *A, pstm_int *B)
558 pstm_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
564 if (pstm_grow(B, 32) != PSTM_OKAY) {
587 SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
592 SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
597 SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
602 SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
607 SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
612 SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
617 SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
622 SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
627 SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
632 SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
637 SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
642 SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
647 SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
652 SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
657 SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
662 SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
667 SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
672 SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
677 SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
682 SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
687 SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
692 SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
697 SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
702 SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
707 SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]);
712 SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]);
717 SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]);
722 SQRADD2(a[14], a[15]);
727 SQRADD(a[15], a[15]);
734 memcpy(B->dp, b, 32 * sizeof(pstm_digit));
738 #endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */
741 #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
742 static int32 pstm_sqr_comba32(pstm_int *A, pstm_int *B)
744 pstm_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
750 if (pstm_grow(B, 64) != PSTM_OKAY) {
772 SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]);
777 SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]);
782 SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]);
787 SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB;
792 SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]);
797 SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB;
802 SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]);
807 SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB;
812 SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]);
817 SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB;
822 SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]);
827 SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB;
832 SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]);
837 SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB;
842 SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]);
847 SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB;
852 SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]);
857 SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB;
862 SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]);
867 SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB;
872 SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]);
877 SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB;
882 SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]);
887 SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB;
892 SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]);
897 SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB;
902 SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]);
907 SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB;
912 SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]);
917 SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB;
922 SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]);
927 SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB;
932 SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]);
937 SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB;
942 SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]);
947 SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB;
952 SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]);
957 SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB;
962 SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]);
967 SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB;
972 SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]);
977 SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB;
982 SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]);
987 SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB;
992 SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]);
997 SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB;
1002 SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]);
1007 SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB;
1012 SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]);
1017 SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB;
1022 SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]);
1027 SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB;
1032 SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]);
1037 SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB;
1042 SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]);
1047 SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB;
1052 SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]);
1057 SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]);
1062 SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]);
1067 SQRADD2(a[30], a[31]);
1072 SQRADD(a[31], a[31]);
1074 COMBA_STORE2(b[63]);
1078 B->sign = PSTM_ZPOS;
1079 memcpy(B->dp, b, 64 * sizeof(pstm_digit));
1083 #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
1085 /******************************************************************************/
1088 int32 pstm_sqr_comba(psPool_t *pool, pstm_int *A, pstm_int *B, pstm_digit *paD,
1091 #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS
1092 if (A->used == 16) {
1093 return pstm_sqr_comba16(A, B);
1095 #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
1096 if (A->used == 32) {
1097 return pstm_sqr_comba32(A, B);
1099 #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
1100 return pstm_sqr_comba_gen(pool, A, B, paD, paDlen);
1103 #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS
1104 if (A->used == 32) {
1105 return pstm_sqr_comba32(A, B);
1107 #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */
1108 return pstm_sqr_comba_gen(pool, A, B, paD, paDlen);
1112 #endif /* DISABLE_PSTM */
1113 /******************************************************************************/