From a1c769a5f668a0839c0ba98252fa4a22bee88853 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ulf=20M=C3=B6ller?= Date: Tue, 13 Mar 2001 06:31:36 +0000 Subject: [PATCH] Alpha workaround. This is a lot slower! --- CHANGES | 4 + crypto/bn/asm/alpha.s | 303 +++++++++++++++++++++++++++++------------- 2 files changed, 218 insertions(+), 89 deletions(-) diff --git a/CHANGES b/CHANGES index b2075c769e..850963a2b8 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,10 @@ Changes between 0.9.6 and 0.9.6a [xx XXX 2001] + *) The Alpha version of bn_mul_add_words could produce incorrect results. + Replace it with a CC-compiled version for the 0.9.6a release. + [Ulf Moeller] + *) Fix a memory leak in err.c: free err_data string if necessary. [Bodo Moeller] diff --git a/crypto/bn/asm/alpha.s b/crypto/bn/asm/alpha.s index 555ff0b92d..cc3024bb71 100644 --- a/crypto/bn/asm/alpha.s +++ b/crypto/bn/asm/alpha.s @@ -8,103 +8,227 @@ # after 4 cycles have elapsed. I've done modification to help # improve this. Also, normally, a ld instruction will not be available # for about 3 cycles. + + # bn_mul_add_words was broken. For now replace it with a CC compiled version .file 1 "bn_asm.c" .set noat gcc2_compiled.: __gnu_compiled_c: .text - .align 3 + .align 4 .globl bn_mul_add_words .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 + .loc 1 142 +bn_mul_add_words: # 000142 + .frame $sp, 0, $26 .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - .align 3 -$42: - mulq $20,$19,$5 # 1 2 1 ###### - ldq $21,8($17) # 2 1 - ldq $2,8($16) # 2 1 - umulh $20,$19,$20 # 1 2 ###### - ldq $27,16($17) # 3 1 - ldq $3,16($16) # 3 1 - mulq $21,$19,$6 # 2 2 1 ###### - ldq $28,24($17) # 4 1 - addq $1,$5,$1 # 1 2 2 - ldq $4,24($16) # 4 1 - umulh $21,$19,$21 # 2 2 ###### - cmpult $1,$5,$22 # 1 2 3 1 - addq $20,$22,$20 # 1 3 1 - addq $1,$0,$1 # 1 2 3 1 - mulq $27,$19,$7 # 3 2 1 ###### - cmpult $1,$0,$0 # 1 2 3 2 - addq $2,$6,$2 # 2 2 2 - addq $20,$0,$0 # 1 3 2 - cmpult $2,$6,$23 # 2 2 3 1 - addq $21,$23,$21 # 2 3 1 - umulh $27,$19,$27 # 3 2 ###### - addq $2,$0,$2 # 2 2 3 1 - cmpult $2,$0,$0 # 2 2 3 2 - subq $18,4,$18 - mulq $28,$19,$8 # 4 2 1 ###### - addq $21,$0,$0 # 2 3 2 - addq $3,$7,$3 # 3 2 2 - addq $16,32,$16 - cmpult $3,$7,$24 # 3 2 3 1 - stq $1,-32($16) # 1 2 4 - umulh $28,$19,$28 # 4 2 ###### - addq $27,$24,$27 # 3 3 1 - addq $3,$0,$3 # 3 2 3 1 - stq $2,-24($16) # 2 2 4 - cmpult $3,$0,$0 # 3 2 3 2 - stq $3,-16($16) # 3 2 4 - addq $4,$8,$4 # 4 2 2 - addq $27,$0,$0 # 3 3 2 - cmpult $4,$8,$25 # 4 2 3 1 - addq $17,32,$17 - addq $28,$25,$28 # 4 3 1 - addq $4,$0,$4 # 4 2 3 1 - cmpult $4,$0,$0 # 4 2 3 2 - stq $4,-8($16) # 4 2 4 - addq $28,$0,$0 # 4 3 2 - blt $18,$43 - - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $20,0($17) # 4 1 - ldq $1,0($16) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $20,$19,$20 # 4 2 - addq $1,$5,$1 # 4 2 2 - cmpult $1,$5,$22 # 4 2 3 1 - addq $20,$22,$20 # 4 3 1 - addq $1,$0,$1 # 4 2 3 1 - cmpult $1,$0,$0 # 4 2 3 2 - addq $20,$0,$0 # 4 3 2 - stq $1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - + .loc 1 148 + # 143 { + # 144 BN_ULONG c=0; + # 145 BN_ULONG bl,bh; + # 146 + # 147 assert(num >= 0); + # 148 if (num <= 0) return((BN_ULONG)0); + bgt $18, L$180 # 000148 + clr $0 + .loc 1 167 + # 149 + # 150 bl=LBITS(w); + # 151 bh=HBITS(w); + # 152 + # 153 for (;;) + # 154 { + # 155 mul_add(rp[0],ap[0],bl,bh,c); + # 156 if (--num == 0) break; + # 157 mul_add(rp[1],ap[1],bl,bh,c); + # 158 if (--num == 0) break; + # 159 mul_add(rp[2],ap[2],bl,bh,c); + # 160 if (--num == 0) break; + # 161 mul_add(rp[3],ap[3],bl,bh,c); + # 162 if (--num == 0) break; + # 163 ap+=4; + # 164 rp+=4; + # 165 } + # 166 return(c); + # 167 } + ret ($26) # 000167 + unop + .loc 1 148 +L$180: # 000148 + .loc 1 155 + ldq $2, ($17) # 000155 + .loc 1 151 + srl $19, 32, $1 # 000151 + .loc 1 150 + zapnot $19, 15, $19 # 000150 + .loc 1 155 + ldq $22, ($16) # 000155 + zapnot $2, 15, $4 + mov 1, $7 + mulq $1, $4, $5 + .loc 1 156 + subl $18, 1, $18 # 000156 + .loc 1 155 + srl $2, 32, $3 # 000155 + sll $7, 32, $7 + mulq $19, $3, $6 + mulq $1, $3, $3 + addq $5, $6, $5 + nop + srl $5, 32, $20 + cmpule $6, $5, $6 + insll $5, 4, $5 + mulq $19, $4, $4 + addq $3, $7, $8 + cmoveq $6, $8, $3 + addq $3, $20, $3 + addq $4, $5, $4 + cmpult $4, $5, $5 + cmpult $4, 0, $23 + addq $4, $22, $4 + addq $3, $5, $3 + cmpult $4, $22, $24 + addq $3, $23, $3 + stq $4, ($16) + addq $3, $24, $0 + .loc 1 156 + beq $18, L$183 # 000156 + unop + .loc 1 157 +L$184: # 000157 + ldq $27, 8($17) + ldq $21, 8($16) + .loc 1 158 + subl $18, 1, $18 # 000158 + .loc 1 163 + lda $17, 32($17) # 000163 + .loc 1 157 + zapnot $27, 15, $6 # 000157 + .loc 1 155 + lda $16, 32($16) # 000155 + .loc 1 157 + mulq $1, $6, $8 # 000157 + extll $27, 4, $2 + mulq $19, $2, $20 + mulq $1, $2, $2 + addq $8, $20, $8 + unop + srl $8, 32, $5 + cmpule $20, $8, $20 + insll $8, 4, $8 + mulq $19, $6, $6 + addq $2, $7, $22 + cmoveq $20, $22, $2 + addq $2, $5, $2 + addq $6, $8, $6 + cmpult $6, $8, $8 + addq $6, $0, $6 + cmpult $6, $0, $0 + addq $2, $8, $2 + addq $6, $21, $6 + addq $2, $0, $0 + cmpult $6, $21, $23 + stq $6, -24($16) + addq $0, $23, $0 + .loc 1 158 + beq $18, L$183 # 000158 + .loc 1 160 + subl $18, 1, $18 # 000160 + unop + .loc 1 159 + ldq $3, -16($17) # 000159 + ldq $21, -16($16) + zapnot $3, 15, $27 + mulq $1, $27, $20 + extll $3, 4, $24 + mulq $19, $24, $22 + mulq $1, $24, $24 + addq $20, $22, $20 + srl $20, 32, $8 + cmpule $22, $20, $22 + insll $20, 4, $20 + mulq $19, $27, $27 + addq $24, $7, $5 + cmoveq $22, $5, $24 + addq $24, $8, $8 + addq $27, $20, $27 + cmpult $27, $20, $20 + addq $27, $0, $27 + cmpult $27, $0, $0 + addq $8, $20, $8 + addq $27, $21, $27 + addq $8, $0, $0 + cmpult $27, $21, $6 + stq $27, -16($16) + addq $0, $6, $0 + .loc 1 160 + beq $18, L$183 # 000160 + .loc 1 162 + subl $18, 1, $18 # 000162 + unop + .loc 1 161 + ldq $2, -8($17) # 000161 + ldq $21, -8($16) + zapnot $2, 15, $3 + mulq $1, $3, $5 + extll $2, 4, $23 + mulq $19, $23, $22 + mulq $1, $23, $23 + addq $5, $22, $5 + srl $5, 32, $20 + cmpule $22, $5, $22 + insll $5, 4, $5 + mulq $19, $3, $3 + addq $23, $7, $24 + cmoveq $22, $24, $23 + addq $23, $20, $20 + addq $3, $5, $3 + cmpult $3, $5, $5 + addq $3, $0, $3 + cmpult $3, $0, $0 + addq $20, $5, $5 + addq $3, $21, $3 + addq $5, $0, $0 + cmpult $3, $21, $27 + stq $3, -8($16) + addq $0, $27, $0 + .loc 1 162 + beq $18, L$183 # 000162 + .loc 1 156 + subl $18, 1, $18 # 000156 + unop + .loc 1 155 + ldq $8, ($17) # 000155 + ldq $3, ($16) + zapnot $8, 15, $2 + mulq $1, $2, $22 + extll $8, 4, $6 + mulq $19, $6, $24 + mulq $1, $6, $6 + addq $22, $24, $22 + srl $22, 32, $20 + cmpule $24, $22, $24 + insll $22, 4, $22 + mulq $19, $2, $2 + addq $6, $7, $23 + cmoveq $24, $23, $6 + addq $6, $20, $6 + addq $2, $22, $2 + cmpult $2, $22, $22 + addq $2, $0, $2 + cmpult $2, $0, $0 + addq $6, $22, $6 + addq $2, $3, $2 + addq $6, $0, $0 + cmpult $2, $3, $5 + stq $2, ($16) + addq $0, $5, $0 + .loc 1 156 + bne $18, L$184 # 000156 + .loc 1 165 +L$183: # 000165 + .loc 1 167 + ret ($26) # 000167 .end bn_mul_add_words .align 3 .globl bn_mul_words @@ -3197,3 +3321,4 @@ bn_sqr_comba8..ng: stq $8, 120($16) ret $31,($26),1 .end bn_sqr_comba8 + -- 2.25.1