"movl %%edx,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy), "g"(mu), "g"(*tmpm++) \
-: "%eax", "%edx", "%cc")
+: "%eax", "%edx", "cc")
#define PROPCARRY \
asm( \
"movzbl %%al,%1 \n\t" \
:"=g"(_c[LO]), "=r"(cy) \
:"0"(_c[LO]), "1"(cy) \
-: "%eax", "%cc")
+: "%eax", "cc")
/******************************************************************************/
#elif defined(PSTM_X86_64)
" STR r0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
- :"r0","%cc");
+ :"r0","cc");
#define PROPCARRY \
asm( \
" LDR r0,%1 \n\t" \
" MOVCC %0,#0 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"0"(cy),"m"(_c[0])\
- :"r0","%cc");
+ :"r0","cc");
#else /* Non-Thumb2 code */
//#pragma message ("Using 32 bit ARM Assembly Optimizations")
#define INNERMUL \
" STR r0,%1 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"0"(cy),"r"(mu),"r"(*tmpm++),"m"(_c[0])\
- :"r0","%cc");
+ :"r0","cc");
#define PROPCARRY \
asm( \
" LDR r0,%1 \n\t" \
" MOVCC %0,#0 \n\t" \
:"=r"(cy),"=m"(_c[0])\
:"0"(cy),"m"(_c[0])\
- :"r0","%cc");
+ :"r0","cc");
#endif /* __thumb2__ */
#define LO 0
/* computes x/R == x (mod N) via Montgomery Reduction */
-int32 pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
+int32 FAST_FUNC pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
pstm_digit mp, pstm_digit *paD, uint32 paDlen)
{
pstm_digit *c, *_c, *tmpm, mu;