c231c4ddf8f06e4d17031550d20b8df11c61a199
[oweals/busybox.git] / networking / tls_pstm_montgomery_reduce.c
1 /*
2  * Copyright (C) 2017 Denys Vlasenko
3  *
4  * Licensed under GPLv2, see file LICENSE in this source tree.
5  */
6 #include "tls.h"
7
8 /**
9  *      @file    pstm_montgomery_reduce.c
10  *      @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master)
11  *
12  *      Multiprecision Montgomery Reduction.
13  */
14 /*
15  *      Copyright (c) 2013-2015 INSIDE Secure Corporation
16  *      Copyright (c) PeerSec Networks, 2002-2011
17  *      All Rights Reserved
18  *
19  *      The latest version of this code is available at http://www.matrixssl.org
20  *
21  *      This software is open source; you can redistribute it and/or modify
22  *      it under the terms of the GNU General Public License as published by
23  *      the Free Software Foundation; either version 2 of the License, or
24  *      (at your option) any later version.
25  *
26  *      This General Public License does NOT permit incorporating this software
27  *      into proprietary programs.  If you are unable to comply with the GPL, a
28  *      commercial license for this software may be purchased from INSIDE at
29  *      http://www.insidesecure.com/eng/Company/Locations
30  *
31  *      This program is distributed in WITHOUT ANY WARRANTY; without even the
32  *      implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
33  *      See the GNU General Public License for more details.
34  *
35  *      You should have received a copy of the GNU General Public License
36  *      along with this program; if not, write to the Free Software
37  *      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
38  *      http://www.gnu.org/copyleft/gpl.html
39  */
40 /******************************************************************************/
41
42 ///bbox
43 //#include "../cryptoApi.h"
44 #ifndef DISABLE_PSTM
45
46 /******************************************************************************/
47
48 #if defined(PSTM_X86)
49 /* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */
50 #if !defined(__GNUC__) || !defined(__i386__) || !defined(PSTM_32BIT)
51 #error "PSTM_X86 option requires GCC and 32 bit mode x86 processor"
52 #endif
53 //#pragma message ("Using 32 bit x86 Assembly Optimizations")
54
55 #define MONT_START
56 #define MONT_FINI
57 #define LOOP_END
58 #define LOOP_START \
59    mu = c[x] * mp
60
61 #define INNERMUL                                          \
62 asm(                                                      \
63    "movl %5,%%eax \n\t"                                   \
64    "mull %4       \n\t"                                   \
65    "addl %1,%%eax \n\t"                                   \
66    "adcl $0,%%edx \n\t"                                   \
67    "addl %%eax,%0 \n\t"                                   \
68    "adcl $0,%%edx \n\t"                                   \
69    "movl %%edx,%1 \n\t"                                   \
70 :"=g"(_c[LO]), "=r"(cy)                                   \
71 :"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++)              \
72 : "%eax", "%edx", "%cc")
73
74 #define PROPCARRY                           \
75 asm(                                        \
76    "addl   %1,%0    \n\t"                   \
77    "setb   %%al     \n\t"                   \
78    "movzbl %%al,%1 \n\t"                    \
79 :"=g"(_c[LO]), "=r"(cy)                     \
80 :"0"(_c[LO]), "1"(cy)                       \
81 : "%eax", "%cc")
82
83 /******************************************************************************/
84 #elif defined(PSTM_X86_64)
85 /* x86-64 optimized */
86 #if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT)
87 #error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor"
88 #endif
89 //#pragma message ("Using 64 bit x86_64 Assembly Optimizations")
90
91 #define MONT_START
92 #define MONT_FINI
93 #define LOOP_END
94 #define LOOP_START \
95 mu = c[x] * mp
96
97 #define INNERMUL                                           \
98 asm(                                                       \
99         "movq %5,%%rax \n\t"                                   \
100         "mulq %4       \n\t"                                   \
101         "addq %1,%%rax \n\t"                                   \
102         "adcq $0,%%rdx \n\t"                                   \
103         "addq %%rax,%0 \n\t"                                   \
104         "adcq $0,%%rdx \n\t"                                   \
105         "movq %%rdx,%1 \n\t"                                   \
106         :"=g"(_c[LO]), "=r"(cy)                                \
107         :"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++)           \
108         : "%rax", "%rdx", "cc")
109
110 #define INNERMUL8                               \
111 asm(                                                    \
112         "movq 0(%5),%%rax    \n\t"  \
113         "movq 0(%2),%%r10    \n\t"  \
114         "movq 0x8(%5),%%r11  \n\t"  \
115         "mulq %4             \n\t"  \
116         "addq %%r10,%%rax    \n\t"  \
117         "adcq $0,%%rdx       \n\t"  \
118         "movq 0x8(%2),%%r10  \n\t"  \
119         "addq %3,%%rax       \n\t"  \
120         "adcq $0,%%rdx       \n\t"  \
121         "movq %%rax,0(%0)    \n\t"  \
122         "movq %%rdx,%1       \n\t"  \
123         \
124         "movq %%r11,%%rax    \n\t"  \
125         "movq 0x10(%5),%%r11 \n\t"  \
126         "mulq %4             \n\t"  \
127         "addq %%r10,%%rax    \n\t"  \
128         "adcq $0,%%rdx       \n\t"  \
129         "movq 0x10(%2),%%r10 \n\t"  \
130         "addq %3,%%rax       \n\t"  \
131         "adcq $0,%%rdx       \n\t"  \
132         "movq %%rax,0x8(%0)  \n\t"  \
133         "movq %%rdx,%1       \n\t"  \
134         \
135         "movq %%r11,%%rax    \n\t"  \
136         "movq 0x18(%5),%%r11 \n\t"  \
137         "mulq %4             \n\t"  \
138         "addq %%r10,%%rax    \n\t"  \
139         "adcq $0,%%rdx       \n\t"  \
140         "movq 0x18(%2),%%r10 \n\t"  \
141         "addq %3,%%rax       \n\t"  \
142         "adcq $0,%%rdx       \n\t"  \
143         "movq %%rax,0x10(%0) \n\t"  \
144         "movq %%rdx,%1       \n\t"  \
145         \
146         "movq %%r11,%%rax    \n\t"  \
147         "movq 0x20(%5),%%r11 \n\t"  \
148         "mulq %4             \n\t"  \
149         "addq %%r10,%%rax    \n\t"  \
150         "adcq $0,%%rdx       \n\t"  \
151         "movq 0x20(%2),%%r10 \n\t"  \
152         "addq %3,%%rax       \n\t"  \
153         "adcq $0,%%rdx       \n\t"  \
154         "movq %%rax,0x18(%0) \n\t"  \
155         "movq %%rdx,%1       \n\t"  \
156         \
157         "movq %%r11,%%rax    \n\t"  \
158         "movq 0x28(%5),%%r11 \n\t"  \
159         "mulq %4             \n\t"  \
160         "addq %%r10,%%rax    \n\t"  \
161         "adcq $0,%%rdx       \n\t"  \
162         "movq 0x28(%2),%%r10 \n\t"  \
163         "addq %3,%%rax       \n\t"  \
164         "adcq $0,%%rdx       \n\t"  \
165         "movq %%rax,0x20(%0) \n\t"  \
166         "movq %%rdx,%1       \n\t"  \
167         \
168         "movq %%r11,%%rax    \n\t"  \
169         "movq 0x30(%5),%%r11 \n\t"  \
170         "mulq %4             \n\t"  \
171         "addq %%r10,%%rax    \n\t"  \
172         "adcq $0,%%rdx       \n\t"  \
173         "movq 0x30(%2),%%r10 \n\t"  \
174         "addq %3,%%rax       \n\t"  \
175         "adcq $0,%%rdx       \n\t"  \
176         "movq %%rax,0x28(%0) \n\t"  \
177         "movq %%rdx,%1       \n\t"  \
178         \
179         "movq %%r11,%%rax    \n\t"  \
180         "movq 0x38(%5),%%r11 \n\t"  \
181         "mulq %4             \n\t"  \
182         "addq %%r10,%%rax    \n\t"  \
183         "adcq $0,%%rdx       \n\t"  \
184         "movq 0x38(%2),%%r10 \n\t"  \
185         "addq %3,%%rax       \n\t"  \
186         "adcq $0,%%rdx       \n\t"  \
187         "movq %%rax,0x30(%0) \n\t"  \
188         "movq %%rdx,%1       \n\t"  \
189         \
190         "movq %%r11,%%rax    \n\t"  \
191         "mulq %4             \n\t"  \
192         "addq %%r10,%%rax    \n\t"  \
193         "adcq $0,%%rdx       \n\t"  \
194         "addq %3,%%rax       \n\t"  \
195         "adcq $0,%%rdx       \n\t"  \
196         "movq %%rax,0x38(%0) \n\t"  \
197         "movq %%rdx,%1       \n\t"  \
198         \
199         :"=r"(_c), "=r"(cy)                    \
200         : "0"(_c),  "1"(cy), "g"(mu), "r"(tmpm)\
201         : "%rax", "%rdx", "%r10", "%r11", "cc")
202
203 #define PROPCARRY                          \
204 asm(                                       \
205         "addq   %1,%0    \n\t"                 \
206         "setb   %%al     \n\t"                 \
207         "movzbq %%al,%1 \n\t"                  \
208         :"=g"(_c[LO]), "=r"(cy)                \
209         :"0"(_c[LO]), "1"(cy)                  \
210         : "%rax", "cc")
211
212 /******************************************************************************/
213 #elif defined(PSTM_ARM)
214
215 #define MONT_START
216 #define MONT_FINI
217 #define LOOP_END
218 #define LOOP_START \
219 mu = c[x] * mp
220
221 #ifdef __thumb2__
222 //#pragma message ("Using 32 bit ARM Thumb2 Assembly Optimizations")
223 #define INNERMUL                    \
224 asm(                                \
225         " LDR    r0,%1            \n\t" \
226         " ADDS   r0,r0,%0         \n\t" \
227         " ITE CS                  \n\t" \
228         " MOVCS  %0,#1            \n\t" \
229         " MOVCC  %0,#0            \n\t" \
230         " UMLAL  r0,%0,%3,%4      \n\t" \
231         " STR    r0,%1            \n\t" \
232         :"=r"(cy),"=m"(_c[0])\
233         :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
234         :"r0","%cc");
235 #define PROPCARRY                  \
236 asm(                               \
237         " LDR   r0,%1            \n\t" \
238         " ADDS  r0,r0,%0         \n\t" \
239         " STR   r0,%1            \n\t" \
240         " ITE CS                 \n\t" \
241         " MOVCS %0,#1            \n\t" \
242         " MOVCC %0,#0            \n\t" \
243         :"=r"(cy),"=m"(_c[0])\
244         :"0"(cy),"m"(_c[0])\
245         :"r0","%cc");
246 #else /* Non-Thumb2 code */
247 //#pragma message ("Using 32 bit ARM Assembly Optimizations")
248 #define INNERMUL                    \
249 asm(                                \
250         " LDR    r0,%1            \n\t" \
251         " ADDS   r0,r0,%0         \n\t" \
252         " MOVCS  %0,#1            \n\t" \
253         " MOVCC  %0,#0            \n\t" \
254         " UMLAL  r0,%0,%3,%4      \n\t" \
255         " STR    r0,%1            \n\t" \
256         :"=r"(cy),"=m"(_c[0])\
257         :"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
258         :"r0","%cc");
259 #define PROPCARRY                  \
260 asm(                               \
261         " LDR   r0,%1            \n\t" \
262         " ADDS  r0,r0,%0         \n\t" \
263         " STR   r0,%1            \n\t" \
264         " MOVCS %0,#1            \n\t" \
265         " MOVCC %0,#0            \n\t" \
266         :"=r"(cy),"=m"(_c[0])\
267         :"0"(cy),"m"(_c[0])\
268         :"r0","%cc");
269 #endif /* __thumb2__ */
270
271
272 /******************************************************************************/
273 #elif defined(PSTM_MIPS)
274 /* MIPS32 */
275 //#pragma message ("Using 32 bit MIPS Assembly Optimizations")
276 #define MONT_START
277 #define MONT_FINI
278 #define LOOP_END
279 #define LOOP_START \
280 mu = c[x] * mp
281
282 #define INNERMUL                      \
283 asm(                                                              \
284         " multu    %3,%4          \n\t"   \
285         " mflo     $12            \n\t"   \
286         " mfhi     $13            \n\t"   \
287         " addu     $12,$12,%0     \n\t"   \
288         " sltu     $10,$12,%0     \n\t"   \
289         " addu     $13,$13,$10    \n\t"   \
290         " lw       $10,%1         \n\t"   \
291         " addu     $12,$12,$10    \n\t"   \
292         " sltu     $10,$12,$10    \n\t"   \
293         " addu     %0,$13,$10     \n\t"   \
294         " sw       $12,%1         \n\t"   \
295         :"=r"(cy),"=m"(_c[0])\
296         :"r"(cy),"r"(mu),"r"(tmpm[0]),"r"(_c[0])\
297         :"$10","$12","$13")\
298 ; ++tmpm;
299
300 #define PROPCARRY                     \
301 asm(                                  \
302         " lw       $10,%1        \n\t"    \
303         " addu     $10,$10,%0    \n\t"    \
304         " sw       $10,%1        \n\t"    \
305         " sltu     %0,$10,%0     \n\t"    \
306         :"=r"(cy),"=m"(_c[0])\
307         :"r"(cy),"r"(_c[0])\
308         :"$10");
309
310
311 /******************************************************************************/
312 #else
313
314 /* ISO C code */
315 #define MONT_START
316 #define MONT_FINI
317 #define LOOP_END
318 #define LOOP_START \
319    mu = c[x] * mp
320
321 #define INNERMUL                                                                                \
322         do { pstm_word t;                                                                       \
323                 t = ((pstm_word)_c[0] + (pstm_word)cy) +                \
324                         (((pstm_word)mu) * ((pstm_word)*tmpm++));       \
325                 _c[0] = (pstm_digit)t;                                                  \
326                 cy = (pstm_digit)(t >> DIGIT_BIT);                              \
327         } while (0)
328
329 #define PROPCARRY \
330    do { pstm_digit t = _c[0] += cy; cy = (t < cy); } while (0)
331
332 #endif
333
334 /******************************************************************************/
335
336 #define LO 0
337
338 /* computes x/R == x (mod N) via Montgomery Reduction */
339 int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
340                 pstm_digit mp, pstm_digit *paD, uint32 paDlen)
341 {
342         pstm_digit      *c, *_c, *tmpm, mu;
343         int32           oldused, x, y;
344         int16           pa;
345
346         pa = m->used;
347         if (pa > a->alloc) {
348                 /* Sanity test for bad numbers.  This will confirm no buffer overruns */
349                 return PS_LIMIT_FAIL;
350         }
351
352         if (paD && paDlen >= (uint32)2*pa+1) {
353                 c = paD;
354                 memset(c, 0x0, paDlen);
355         } else {
356                 c = xzalloc(2*pa+1);
357         }
358         /* copy the input */
359         oldused = a->used;
360         for (x = 0; x < oldused; x++) {
361                 c[x] = a->dp[x];
362         }
363
364         MONT_START;
365
366         for (x = 0; x < pa; x++) {
367                 pstm_digit cy = 0;
368                 /* get Mu for this round */
369                 LOOP_START;
370                 _c   = c + x;
371                 tmpm = m->dp;
372                 y = 0;
373 #ifdef PSTM_X86_64
374                 for (; y < (pa & ~7); y += 8) {
375                         INNERMUL8;
376                         _c   += 8;
377                         tmpm += 8;
378                 }
379 #endif /* PSTM_X86_64 */
380                 for (; y < pa; y++) {
381                         INNERMUL;
382                         ++_c;
383                 }
384                 LOOP_END;
385                 while (cy) {
386                         PROPCARRY;
387                         ++_c;
388                 }
389         }
390
391         /* now copy out */
392         _c   = c + pa;
393         tmpm = a->dp;
394         for (x = 0; x < pa+1; x++) {
395                 *tmpm++ = *_c++;
396         }
397
398         for (; x < oldused; x++)   {
399                 *tmpm++ = 0;
400         }
401
402         MONT_FINI;
403
404         a->used = pa+1;
405         pstm_clamp(a);
406
407         /* reuse x as return code */
408         x = PSTM_OKAY;
409
410         /* if A >= m then A = A - m */
411         if (pstm_cmp_mag (a, m) != PSTM_LT) {
412                 if (s_pstm_sub (a, m, a) != PSTM_OKAY) {
413                         x = PS_MEM_FAIL;
414                 }
415         }
416         if (paDlen < (uint32)2*pa+1) {
417                 psFree(c, pool);
418         }
419         return x;
420 }
421
422 #endif /* !DISABLE_PSTM */
423 /******************************************************************************/