+++ /dev/null
- # DEC Alpha assember
- # The bn_div_words is actually gcc output but the other parts are hand done.
- # Thanks to tzeruch@ceddec.com for sending me the gcc output for
- # bn_div_words.
- # I've gone back and re-done most of routines.
- # The key thing to remeber for the 164 CPU is that while a
- # multiply operation takes 8 cycles, another one can only be issued
- # after 4 cycles have elapsed. I've done modification to help
- # improve this. Also, normally, a ld instruction will not be available
- # for about 3 cycles.
- .file 1 "bn_asm.c"
- .set noat
-gcc2_compiled.:
-__gnu_compiled_c:
- .text
- .align 3
- .globl bn_mul_add_words
- .ent bn_mul_add_words
-bn_mul_add_words:
-bn_mul_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$0
- blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- ldq $1,0($16) # 1 1
- .align 3
-$42:
- mulq $20,$19,$5 # 1 2 1 ######
- ldq $21,8($17) # 2 1
- ldq $2,8($16) # 2 1
- umulh $20,$19,$20 # 1 2 ######
- ldq $27,16($17) # 3 1
- ldq $3,16($16) # 3 1
- mulq $21,$19,$6 # 2 2 1 ######
- ldq $28,24($17) # 4 1
- addq $1,$5,$1 # 1 2 2
- ldq $4,24($16) # 4 1
- umulh $21,$19,$21 # 2 2 ######
- cmpult $1,$5,$22 # 1 2 3 1
- addq $20,$22,$20 # 1 3 1
- addq $1,$0,$1 # 1 2 3 1
- mulq $27,$19,$7 # 3 2 1 ######
- cmpult $1,$0,$0 # 1 2 3 2
- addq $2,$6,$2 # 2 2 2
- addq $20,$0,$0 # 1 3 2
- cmpult $2,$6,$23 # 2 2 3 1
- addq $21,$23,$21 # 2 3 1
- umulh $27,$19,$27 # 3 2 ######
- addq $2,$0,$2 # 2 2 3 1
- cmpult $2,$0,$0 # 2 2 3 2
- subq $18,4,$18
- mulq $28,$19,$8 # 4 2 1 ######
- addq $21,$0,$0 # 2 3 2
- addq $3,$7,$3 # 3 2 2
- addq $16,32,$16
- cmpult $3,$7,$24 # 3 2 3 1
- stq $1,-32($16) # 1 2 4
- umulh $28,$19,$28 # 4 2 ######
- addq $27,$24,$27 # 3 3 1
- addq $3,$0,$3 # 3 2 3 1
- stq $2,-24($16) # 2 2 4
- cmpult $3,$0,$0 # 3 2 3 2
- stq $3,-16($16) # 3 2 4
- addq $4,$8,$4 # 4 2 2
- addq $27,$0,$0 # 3 3 2
- cmpult $4,$8,$25 # 4 2 3 1
- addq $17,32,$17
- addq $28,$25,$28 # 4 3 1
- addq $4,$0,$4 # 4 2 3 1
- cmpult $4,$0,$0 # 4 2 3 2
- stq $4,-8($16) # 4 2 4
- addq $28,$0,$0 # 4 3 2
- blt $18,$43
-
- ldq $20,0($17) # 1 1
- ldq $1,0($16) # 1 1
-
- br $42
-
- .align 4
-$45:
- ldq $20,0($17) # 4 1
- ldq $1,0($16) # 4 1
- mulq $20,$19,$5 # 4 2 1
- subq $18,1,$18
- addq $16,8,$16
- addq $17,8,$17
- umulh $20,$19,$20 # 4 2
- addq $1,$5,$1 # 4 2 2
- cmpult $1,$5,$22 # 4 2 3 1
- addq $20,$22,$20 # 4 3 1
- addq $1,$0,$1 # 4 2 3 1
- cmpult $1,$0,$0 # 4 2 3 2
- addq $20,$0,$0 # 4 3 2
- stq $1,-8($16) # 4 2 4
- bgt $18,$45
- ret $31,($26),1 # else exit
-
- .align 4
-$43:
- addq $18,4,$18
- bgt $18,$45 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_add_words
- .align 3
- .globl bn_mul_words
- .ent bn_mul_words
-bn_mul_words:
-bn_mul_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$0
- blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- .align 3
-$142:
-
- mulq $20,$19,$5 # 1 2 1 #####
- ldq $21,8($17) # 2 1
- ldq $27,16($17) # 3 1
- umulh $20,$19,$20 # 1 2 #####
- ldq $28,24($17) # 4 1
- mulq $21,$19,$6 # 2 2 1 #####
- addq $5,$0,$5 # 1 2 3 1
- subq $18,4,$18
- cmpult $5,$0,$0 # 1 2 3 2
- umulh $21,$19,$21 # 2 2 #####
- addq $20,$0,$0 # 1 3 2
- addq $17,32,$17
- addq $6,$0,$6 # 2 2 3 1
- mulq $27,$19,$7 # 3 2 1 #####
- cmpult $6,$0,$0 # 2 2 3 2
- addq $21,$0,$0 # 2 3 2
- addq $16,32,$16
- umulh $27,$19,$27 # 3 2 #####
- stq $5,-32($16) # 1 2 4
- mulq $28,$19,$8 # 4 2 1 #####
- addq $7,$0,$7 # 3 2 3 1
- stq $6,-24($16) # 2 2 4
- cmpult $7,$0,$0 # 3 2 3 2
- umulh $28,$19,$28 # 4 2 #####
- addq $27,$0,$0 # 3 3 2
- stq $7,-16($16) # 3 2 4
- addq $8,$0,$8 # 4 2 3 1
- cmpult $8,$0,$0 # 4 2 3 2
-
- addq $28,$0,$0 # 4 3 2
-
- stq $8,-8($16) # 4 2 4
-
- blt $18,$143
-
- ldq $20,0($17) # 1 1
-
- br $142
-
- .align 4
-$145:
- ldq $20,0($17) # 4 1
- mulq $20,$19,$5 # 4 2 1
- subq $18,1,$18
- umulh $20,$19,$20 # 4 2
- addq $5,$0,$5 # 4 2 3 1
- addq $16,8,$16
- cmpult $5,$0,$0 # 4 2 3 2
- addq $17,8,$17
- addq $20,$0,$0 # 4 3 2
- stq $5,-8($16) # 4 2 4
-
- bgt $18,$145
- ret $31,($26),1 # else exit
-
- .align 4
-$143:
- addq $18,4,$18
- bgt $18,$145 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_words
- .align 3
- .globl bn_sqr_words
- .ent bn_sqr_words
-bn_sqr_words:
-bn_sqr_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $18,4,$18
- blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- .align 3
-$542:
- mulq $20,$20,$5 ######
- ldq $21,8($17) # 1 1
- subq $18,4
- umulh $20,$20,$1 ######
- ldq $27,16($17) # 1 1
- mulq $21,$21,$6 ######
- ldq $28,24($17) # 1 1
- stq $5,0($16) # r[0]
- umulh $21,$21,$2 ######
- stq $1,8($16) # r[1]
- mulq $27,$27,$7 ######
- stq $6,16($16) # r[0]
- umulh $27,$27,$3 ######
- stq $2,24($16) # r[1]
- mulq $28,$28,$8 ######
- stq $7,32($16) # r[0]
- umulh $28,$28,$4 ######
- stq $3,40($16) # r[1]
-
- addq $16,64,$16
- addq $17,32,$17
- stq $8,-16($16) # r[0]
- stq $4,-8($16) # r[1]
-
- blt $18,$543
- ldq $20,0($17) # 1 1
- br $542
-
-$442:
- ldq $20,0($17) # a[0]
- mulq $20,$20,$5 # a[0]*w low part r2
- addq $16,16,$16
- addq $17,8,$17
- subq $18,1,$18
- umulh $20,$20,$1 # a[0]*w high part r3
- stq $5,-16($16) # r[0]
- stq $1,-8($16) # r[1]
-
- bgt $18,$442
- ret $31,($26),1 # else exit
-
- .align 4
-$543:
- addq $18,4,$18
- bgt $18,$442 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_sqr_words
-
- .align 3
- .globl bn_add_words
- .ent bn_add_words
-bn_add_words:
-bn_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19,4,$19
- bis $31,$31,$0 # carry = 0
- blt $19,$900
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- .align 3
-$901:
- addq $1,$5,$1 # r=a+b;
- ldq $6,8($17) # a[1]
- cmpult $1,$5,$22 # did we overflow?
- ldq $2,8($18) # b[1]
- addq $1,$0,$1 # c+= overflow
- ldq $7,16($17) # a[2]
- cmpult $1,$0,$0 # overflow?
- ldq $3,16($18) # b[2]
- addq $0,$22,$0
- ldq $8,24($17) # a[3]
- addq $2,$6,$2 # r=a+b;
- ldq $4,24($18) # b[3]
- cmpult $2,$6,$23 # did we overflow?
- addq $3,$7,$3 # r=a+b;
- addq $2,$0,$2 # c+= overflow
- cmpult $3,$7,$24 # did we overflow?
- cmpult $2,$0,$0 # overflow?
- addq $4,$8,$4 # r=a+b;
- addq $0,$23,$0
- cmpult $4,$8,$25 # did we overflow?
- addq $3,$0,$3 # c+= overflow
- stq $1,0($16) # r[0]=c
- cmpult $3,$0,$0 # overflow?
- stq $2,8($16) # r[1]=c
- addq $0,$24,$0
- stq $3,16($16) # r[2]=c
- addq $4,$0,$4 # c+= overflow
- subq $19,4,$19 # loop--
- cmpult $4,$0,$0 # overflow?
- addq $17,32,$17 # a++
- addq $0,$25,$0
- stq $4,24($16) # r[3]=c
- addq $18,32,$18 # b++
- addq $16,32,$16 # r++
-
- blt $19,$900
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- br $901
- .align 4
-$945:
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- addq $1,$5,$1 # r=a+b;
- subq $19,1,$19 # loop--
- addq $1,$0,$1 # c+= overflow
- addq $17,8,$17 # a++
- cmpult $1,$5,$22 # did we overflow?
- cmpult $1,$0,$0 # overflow?
- addq $18,8,$18 # b++
- stq $1,0($16) # r[0]=c
- addq $0,$22,$0
- addq $16,8,$16 # r++
-
- bgt $19,$945
- ret $31,($26),1 # else exit
-
-$900:
- addq $19,4,$19
- bgt $19,$945 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_add_words
-
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .align 3
- .globl bn_div_words
- .ent bn_div_words
-bn_div_words:
- ldgp $29,0($27)
-bn_div_words..ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$119
- lda $0,-1
- br $31,$136
- .align 4
-$119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$126
- zapnot $7,15,$27
- br $31,$127
- .align 4
-$126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$127:
- srl $10,32,$4
- .align 5
-$128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$129
- subq $27,1,$27
- br $31,$128
- .align 4
-$129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$134
- addq $9,$11,$9
- subq $27,1,$27
-$134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$123
- .align 4
-$124:
- bis $13,$27,$0
-$136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div_words
-
- .set noat
- .text
- .align 3
- .globl bn_sub_words
- .ent bn_sub_words
-bn_sub_words:
-bn_sub_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19, 4, $19
- bis $31, $31, $0
- blt $19, $100
- ldq $1, 0($17)
- ldq $2, 0($18)
-$101:
- ldq $3, 8($17)
- cmpult $1, $2, $4
- ldq $5, 8($18)
- subq $1, $2, $1
- ldq $6, 16($17)
- cmpult $1, $0, $2
- ldq $7, 16($18)
- subq $1, $0, $23
- ldq $8, 24($17)
- addq $2, $4, $0
- cmpult $3, $5, $24
- subq $3, $5, $3
- ldq $22, 24($18)
- cmpult $3, $0, $5
- subq $3, $0, $25
- addq $5, $24, $0
- cmpult $6, $7, $27
- subq $6, $7, $6
- stq $23, 0($16)
- cmpult $6, $0, $7
- subq $6, $0, $28
- addq $7, $27, $0
- cmpult $8, $22, $21
- subq $8, $22, $8
- stq $25, 8($16)
- cmpult $8, $0, $22
- subq $8, $0, $20
- addq $22, $21, $0
- stq $28, 16($16)
- subq $19, 4, $19
- stq $20, 24($16)
- addq $17, 32, $17
- addq $18, 32, $18
- addq $16, 32, $16
- blt $19, $100
- ldq $1, 0($17)
- ldq $2, 0($18)
- br $101
-$102:
- ldq $1, 0($17)
- ldq $2, 0($18)
- cmpult $1, $2, $27
- subq $1, $2, $1
- cmpult $1, $0, $2
- subq $1, $0, $1
- stq $1, 0($16)
- addq $2, $27, $0
- addq $17, 8, $17
- addq $18, 8, $18
- addq $16, 8, $16
- subq $19, 1, $19
- bgt $19, $102
- ret $31,($26),1
-$100:
- addq $19, 4, $19
- bgt $19, $102
-$103:
- ret $31,($26),1
- .end bn_sub_words
- .text
- .align 3
- .globl bn_mul_comba4
- .ent bn_mul_comba4
-bn_mul_comba4:
-bn_mul_comba4..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- ldq $0, 0($17)
- ldq $1, 0($18)
- ldq $2, 8($17)
- ldq $3, 8($18)
- ldq $4, 16($17)
- ldq $5, 16($18)
- ldq $6, 24($17)
- ldq $7, 24($18)
- bis $31, $31, $23
- mulq $0, $1, $8
- umulh $0, $1, $22
- stq $8, 0($16)
- bis $31, $31, $8
- mulq $0, $3, $24
- umulh $0, $3, $25
- addq $22, $24, $22
- cmpult $22, $24, $27
- addq $27, $25, $25
- addq $23, $25, $23
- cmpult $23, $25, $28
- addq $8, $28, $8
- mulq $2, $1, $21
- umulh $2, $1, $20
- addq $22, $21, $22
- cmpult $22, $21, $19
- addq $19, $20, $20
- addq $23, $20, $23
- cmpult $23, $20, $17
- addq $8, $17, $8
- stq $22, 8($16)
- bis $31, $31, $22
- mulq $2, $3, $18
- umulh $2, $3, $24
- addq $23, $18, $23
- cmpult $23, $18, $27
- addq $27, $24, $24
- addq $8, $24, $8
- cmpult $8, $24, $25
- addq $22, $25, $22
- mulq $0, $5, $28
- umulh $0, $5, $21
- addq $23, $28, $23
- cmpult $23, $28, $19
- addq $19, $21, $21
- addq $8, $21, $8
- cmpult $8, $21, $20
- addq $22, $20, $22
- mulq $4, $1, $17
- umulh $4, $1, $18
- addq $23, $17, $23
- cmpult $23, $17, $27
- addq $27, $18, $18
- addq $8, $18, $8
- cmpult $8, $18, $24
- addq $22, $24, $22
- stq $23, 16($16)
- bis $31, $31, $23
- mulq $0, $7, $25
- umulh $0, $7, $28
- addq $8, $25, $8
- cmpult $8, $25, $19
- addq $19, $28, $28
- addq $22, $28, $22
- cmpult $22, $28, $21
- addq $23, $21, $23
- mulq $2, $5, $20
- umulh $2, $5, $17
- addq $8, $20, $8
- cmpult $8, $20, $27
- addq $27, $17, $17
- addq $22, $17, $22
- cmpult $22, $17, $18
- addq $23, $18, $23
- mulq $4, $3, $24
- umulh $4, $3, $25
- addq $8, $24, $8
- cmpult $8, $24, $19
- addq $19, $25, $25
- addq $22, $25, $22
- cmpult $22, $25, $28
- addq $23, $28, $23
- mulq $6, $1, $21
- umulh $6, $1, $0
- addq $8, $21, $8
- cmpult $8, $21, $20
- addq $20, $0, $0
- addq $22, $0, $22
- cmpult $22, $0, $27
- addq $23, $27, $23
- stq $8, 24($16)
- bis $31, $31, $8
- mulq $2, $7, $17
- umulh $2, $7, $18
- addq $22, $17, $22
- cmpult $22, $17, $24
- addq $24, $18, $18
- addq $23, $18, $23
- cmpult $23, $18, $19
- addq $8, $19, $8
- mulq $4, $5, $25
- umulh $4, $5, $28
- addq $22, $25, $22
- cmpult $22, $25, $21
- addq $21, $28, $28
- addq $23, $28, $23
- cmpult $23, $28, $20
- addq $8, $20, $8
- mulq $6, $3, $0
- umulh $6, $3, $27
- addq $22, $0, $22
- cmpult $22, $0, $1
- addq $1, $27, $27
- addq $23, $27, $23
- cmpult $23, $27, $17
- addq $8, $17, $8
- stq $22, 32($16)
- bis $31, $31, $22
- mulq $4, $7, $24
- umulh $4, $7, $18
- addq $23, $24, $23
- cmpult $23, $24, $19
- addq $19, $18, $18
- addq $8, $18, $8
- cmpult $8, $18, $2
- addq $22, $2, $22
- mulq $6, $5, $25
- umulh $6, $5, $21
- addq $23, $25, $23
- cmpult $23, $25, $28
- addq $28, $21, $21
- addq $8, $21, $8
- cmpult $8, $21, $20
- addq $22, $20, $22
- stq $23, 40($16)
- bis $31, $31, $23
- mulq $6, $7, $0
- umulh $6, $7, $1
- addq $8, $0, $8
- cmpult $8, $0, $27
- addq $27, $1, $1
- addq $22, $1, $22
- cmpult $22, $1, $17
- addq $23, $17, $23
- stq $8, 48($16)
- stq $22, 56($16)
- ret $31,($26),1
- .end bn_mul_comba4
- .text
- .align 3
- .globl bn_mul_comba8
- .ent bn_mul_comba8
-bn_mul_comba8:
-bn_mul_comba8..ng:
- .frame $30,0,$26,0
- .prologue 0
- ldq $1, 0($17)
- ldq $2, 0($18)
- zapnot $1, 15, $7
- srl $2, 32, $8
- mulq $8, $7, $22
- srl $1, 32, $6
- zapnot $2, 15, $5
- mulq $5, $6, $4
- mulq $7, $5, $24
- addq $22, $4, $22
- cmpult $22, $4, $1
- mulq $6, $8, $3
- beq $1, $173
- bis $31, 1, $1
- sll $1, 32, $1
- addq $3, $1, $3
-$173:
- sll $22, 32, $4
- addq $24, $4, $24
- stq $24, 0($16)
- ldq $2, 0($17)
- ldq $1, 8($18)
- zapnot $2, 15, $7
- srl $1, 32, $8
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $2, 32, $6
- mulq $5, $6, $23
- mulq $6, $8, $6
- srl $22, 32, $1
- cmpult $24, $4, $2
- addq $3, $1, $3
- addq $2, $3, $22
- addq $25, $23, $25
- cmpult $25, $23, $1
- bis $31, 1, $2
- beq $1, $177
- sll $2, 32, $1
- addq $6, $1, $6
-$177:
- sll $25, 32, $23
- ldq $1, 0($18)
- addq $0, $23, $0
- bis $0, $0, $7
- ldq $3, 8($17)
- addq $22, $7, $22
- srl $1, 32, $8
- cmpult $22, $7, $4
- zapnot $3, 15, $7
- mulq $8, $7, $28
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $25, 32, $1
- cmpult $0, $23, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $4, $6, $24
- srl $3, 32, $6
- mulq $5, $6, $2
- mulq $6, $8, $6
- addq $28, $2, $28
- cmpult $28, $2, $1
- bis $31, 1, $2
- beq $1, $181
- sll $2, 32, $1
- addq $6, $1, $6
-$181:
- sll $28, 32, $2
- addq $21, $2, $21
- bis $21, $21, $7
- addq $22, $7, $22
- stq $22, 8($16)
- ldq $3, 16($17)
- ldq $1, 0($18)
- cmpult $22, $7, $4
- zapnot $3, 15, $7
- srl $1, 32, $8
- mulq $8, $7, $22
- zapnot $1, 15, $5
- mulq $7, $5, $20
- srl $28, 32, $1
- cmpult $21, $2, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $4, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $3, 32, $6
- mulq $5, $6, $2
- mulq $6, $8, $6
- addq $22, $2, $22
- cmpult $22, $2, $1
- bis $31, 1, $2
- beq $1, $185
- sll $2, 32, $1
- addq $6, $1, $6
-$185:
- sll $22, 32, $2
- ldq $1, 8($18)
- addq $20, $2, $20
- bis $20, $20, $7
- ldq $4, 8($17)
- addq $24, $7, $24
- srl $1, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $22, 32, $1
- cmpult $20, $2, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $22
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $189
- sll $21, 32, $1
- addq $6, $1, $6
-$189:
- sll $25, 32, $5
- ldq $2, 16($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 0($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $193
- sll $21, 32, $1
- addq $6, $1, $6
-$193:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $24, $7, $24
- stq $24, 16($16)
- ldq $4, 0($17)
- ldq $5, 24($18)
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $24
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $0, $24, $0
- cmpult $0, $24, $1
- mulq $6, $8, $6
- beq $1, $197
- sll $21, 32, $1
- addq $6, $1, $6
-$197:
- sll $0, 32, $24
- ldq $1, 16($18)
- addq $2, $24, $2
- bis $2, $2, $7
- ldq $4, 8($17)
- addq $23, $7, $23
- srl $1, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $24, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $24
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $201
- sll $20, 32, $1
- addq $6, $1, $6
-$201:
- sll $25, 32, $5
- ldq $2, 8($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 16($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $205
- sll $20, 32, $1
- addq $6, $1, $6
-$205:
- sll $28, 32, $25
- ldq $2, 0($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $209
- sll $20, 32, $1
- addq $6, $1, $6
-$209:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $23, $7, $23
- stq $23, 24($16)
- ldq $4, 32($17)
- ldq $5, 0($18)
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $23
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $28, $23, $28
- cmpult $28, $23, $1
- mulq $6, $8, $6
- beq $1, $213
- sll $20, 32, $1
- addq $6, $1, $6
-$213:
- sll $28, 32, $23
- ldq $1, 8($18)
- addq $2, $23, $2
- bis $2, $2, $7
- ldq $4, 24($17)
- addq $22, $7, $22
- srl $1, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $28, 32, $1
- cmpult $2, $23, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $217
- sll $21, 32, $1
- addq $6, $1, $6
-$217:
- sll $25, 32, $5
- ldq $2, 16($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 16($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $221
- sll $21, 32, $1
- addq $6, $1, $6
-$221:
- sll $28, 32, $25
- ldq $2, 24($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 8($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $225
- sll $21, 32, $1
- addq $6, $1, $6
-$225:
- sll $0, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 0($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $229
- sll $21, 32, $1
- addq $6, $1, $6
-$229:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $22, $7, $22
- stq $22, 32($16)
- ldq $4, 0($17)
- ldq $5, 40($18)
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $22
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $0, $22, $0
- cmpult $0, $22, $1
- mulq $6, $8, $6
- beq $1, $233
- sll $21, 32, $1
- addq $6, $1, $6
-$233:
- sll $0, 32, $22
- ldq $1, 32($18)
- addq $2, $22, $2
- bis $2, $2, $7
- ldq $4, 8($17)
- addq $24, $7, $24
- srl $1, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $22, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $22
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $237
- sll $20, 32, $1
- addq $6, $1, $6
-$237:
- sll $25, 32, $5
- ldq $2, 24($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 16($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $241
- sll $20, 32, $1
- addq $6, $1, $6
-$241:
- sll $28, 32, $25
- ldq $2, 16($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $245
- sll $20, 32, $1
- addq $6, $1, $6
-$245:
- sll $0, 32, $25
- ldq $2, 8($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 32($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $249
- sll $20, 32, $1
- addq $6, $1, $6
-$249:
- sll $28, 32, $25
- ldq $2, 0($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 40($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $253
- sll $20, 32, $1
- addq $6, $1, $6
-$253:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $24, $7, $24
- stq $24, 40($16)
- ldq $4, 48($17)
- ldq $5, 0($18)
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $24
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $28, $24, $28
- cmpult $28, $24, $1
- mulq $6, $8, $6
- beq $1, $257
- sll $20, 32, $1
- addq $6, $1, $6
-$257:
- sll $28, 32, $24
- ldq $1, 8($18)
- addq $2, $24, $2
- bis $2, $2, $7
- ldq $4, 40($17)
- addq $23, $7, $23
- srl $1, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $28, 32, $1
- cmpult $2, $24, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $24
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $261
- sll $21, 32, $1
- addq $6, $1, $6
-$261:
- sll $25, 32, $5
- ldq $2, 16($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 32($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $265
- sll $21, 32, $1
- addq $6, $1, $6
-$265:
- sll $28, 32, $25
- ldq $2, 24($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $269
- sll $21, 32, $1
- addq $6, $1, $6
-$269:
- sll $0, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 16($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $273
- sll $21, 32, $1
- addq $6, $1, $6
-$273:
- sll $28, 32, $25
- ldq $2, 40($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 8($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $277
- sll $21, 32, $1
- addq $6, $1, $6
-$277:
- sll $0, 32, $25
- ldq $2, 48($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 0($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $281
- sll $21, 32, $1
- addq $6, $1, $6
-$281:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $23, $7, $23
- stq $23, 48($16)
- ldq $4, 0($17)
- ldq $5, 56($18)
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $23
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $0, $23, $0
- cmpult $0, $23, $1
- mulq $6, $8, $6
- beq $1, $285
- sll $21, 32, $1
- addq $6, $1, $6
-$285:
- sll $0, 32, $23
- ldq $1, 48($18)
- addq $2, $23, $2
- bis $2, $2, $7
- ldq $4, 8($17)
- addq $22, $7, $22
- srl $1, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $23, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $289
- sll $20, 32, $1
- addq $6, $1, $6
-$289:
- sll $25, 32, $5
- ldq $2, 40($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 16($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $293
- sll $20, 32, $1
- addq $6, $1, $6
-$293:
- sll $28, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $297
- sll $20, 32, $1
- addq $6, $1, $6
-$297:
- sll $0, 32, $25
- ldq $2, 24($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 32($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $301
- sll $20, 32, $1
- addq $6, $1, $6
-$301:
- sll $28, 32, $25
- ldq $2, 16($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 40($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $305
- sll $20, 32, $1
- addq $6, $1, $6
-$305:
- sll $0, 32, $25
- ldq $2, 8($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 48($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $309
- sll $20, 32, $1
- addq $6, $1, $6
-$309:
- sll $28, 32, $25
- ldq $2, 0($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 56($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $313
- sll $20, 32, $1
- addq $6, $1, $6
-$313:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $22, $7, $22
- stq $22, 56($16)
- ldq $4, 56($17)
- ldq $5, 8($18)
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $22
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $28, $22, $28
- cmpult $28, $22, $1
- mulq $6, $8, $6
- beq $1, $317
- sll $20, 32, $1
- addq $6, $1, $6
-$317:
- sll $28, 32, $22
- ldq $1, 16($18)
- addq $2, $22, $2
- bis $2, $2, $7
- ldq $4, 48($17)
- addq $24, $7, $24
- srl $1, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $28, 32, $1
- cmpult $2, $22, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $22
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $321
- sll $21, 32, $1
- addq $6, $1, $6
-$321:
- sll $25, 32, $5
- ldq $2, 24($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 40($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $325
- sll $21, 32, $1
- addq $6, $1, $6
-$325:
- sll $28, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 32($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $329
- sll $21, 32, $1
- addq $6, $1, $6
-$329:
- sll $0, 32, $25
- ldq $2, 40($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $333
- sll $21, 32, $1
- addq $6, $1, $6
-$333:
- sll $28, 32, $25
- ldq $2, 48($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 16($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $337
- sll $21, 32, $1
- addq $6, $1, $6
-$337:
- sll $0, 32, $25
- ldq $2, 56($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 8($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $341
- sll $21, 32, $1
- addq $6, $1, $6
-$341:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $24, $7, $24
- stq $24, 64($16)
- ldq $4, 16($17)
- ldq $5, 56($18)
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $24
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $0, $24, $0
- cmpult $0, $24, $1
- mulq $6, $8, $6
- beq $1, $345
- sll $21, 32, $1
- addq $6, $1, $6
-$345:
- sll $0, 32, $24
- ldq $1, 48($18)
- addq $2, $24, $2
- bis $2, $2, $7
- ldq $4, 24($17)
- addq $23, $7, $23
- srl $1, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $24, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $24
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $349
- sll $20, 32, $1
- addq $6, $1, $6
-$349:
- sll $25, 32, $5
- ldq $2, 40($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 32($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $353
- sll $20, 32, $1
- addq $6, $1, $6
-$353:
- sll $28, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 40($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $357
- sll $20, 32, $1
- addq $6, $1, $6
-$357:
- sll $0, 32, $25
- ldq $2, 24($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 48($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $361
- sll $20, 32, $1
- addq $6, $1, $6
-$361:
- sll $28, 32, $25
- ldq $2, 16($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 56($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $365
- sll $20, 32, $1
- addq $6, $1, $6
-$365:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $23, $7, $23
- stq $23, 72($16)
- ldq $4, 56($17)
- ldq $5, 24($18)
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $23
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $28, $23, $28
- cmpult $28, $23, $1
- mulq $6, $8, $6
- beq $1, $369
- sll $20, 32, $1
- addq $6, $1, $6
-$369:
- sll $28, 32, $23
- ldq $1, 32($18)
- addq $2, $23, $2
- bis $2, $2, $7
- ldq $4, 48($17)
- addq $22, $7, $22
- srl $1, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $0
- srl $28, 32, $1
- cmpult $2, $23, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $21
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $373
- sll $21, 32, $1
- addq $6, $1, $6
-$373:
- sll $25, 32, $5
- ldq $2, 40($18)
- addq $0, $5, $0
- bis $0, $0, $7
- ldq $4, 40($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $0, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $377
- sll $21, 32, $1
- addq $6, $1, $6
-$377:
- sll $28, 32, $25
- ldq $2, 48($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 32($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $23, $23
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $381
- sll $21, 32, $1
- addq $6, $1, $6
-$381:
- sll $0, 32, $25
- ldq $2, 56($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 24($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $385
- sll $21, 32, $1
- addq $6, $1, $6
-$385:
- sll $28, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $22, $7, $22
- stq $22, 80($16)
- ldq $4, 32($17)
- ldq $5, 56($18)
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $28, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $22
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $0, $22, $0
- cmpult $0, $22, $1
- mulq $6, $8, $6
- beq $1, $389
- sll $21, 32, $1
- addq $6, $1, $6
-$389:
- sll $0, 32, $22
- ldq $1, 48($18)
- addq $2, $22, $2
- bis $2, $2, $7
- ldq $4, 40($17)
- addq $24, $7, $24
- srl $1, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $25
- zapnot $1, 15, $5
- mulq $7, $5, $21
- srl $0, 32, $1
- cmpult $2, $22, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $22
- srl $4, 32, $6
- mulq $5, $6, $5
- bis $31, 1, $20
- addq $25, $5, $25
- cmpult $25, $5, $1
- mulq $6, $8, $6
- beq $1, $393
- sll $20, 32, $1
- addq $6, $1, $6
-$393:
- sll $25, 32, $5
- ldq $2, 40($18)
- addq $21, $5, $21
- bis $21, $21, $7
- ldq $4, 48($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $25, 32, $1
- addq $6, $1, $6
- cmpult $21, $5, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $397
- sll $20, 32, $1
- addq $6, $1, $6
-$397:
- sll $28, 32, $25
- ldq $2, 32($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 56($17)
- addq $24, $7, $24
- srl $2, 32, $8
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $21
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $22, $22
- addq $21, $25, $21
- cmpult $21, $25, $1
- mulq $6, $8, $6
- beq $1, $401
- sll $20, 32, $1
- addq $6, $1, $6
-$401:
- sll $21, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $24, $7, $24
- stq $24, 88($16)
- ldq $4, 56($17)
- ldq $5, 40($18)
- cmpult $24, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $0
- srl $21, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $23, $6, $23
- cmpult $23, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $24
- mulq $7, $5, $5
- addq $1, $22, $22
- addq $0, $24, $0
- cmpult $0, $24, $1
- mulq $6, $8, $6
- beq $1, $405
- sll $20, 32, $1
- addq $6, $1, $6
-$405:
- sll $0, 32, $24
- ldq $2, 48($18)
- addq $5, $24, $5
- bis $5, $5, $7
- ldq $4, 48($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $28
- srl $0, 32, $1
- addq $6, $1, $6
- cmpult $5, $24, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $24
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $5
- addq $28, $25, $28
- cmpult $28, $25, $1
- mulq $6, $8, $6
- beq $1, $409
- sll $20, 32, $1
- addq $6, $1, $6
-$409:
- sll $28, 32, $25
- ldq $2, 56($18)
- addq $5, $25, $5
- bis $5, $5, $7
- ldq $4, 40($17)
- addq $23, $7, $23
- srl $2, 32, $8
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $25, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $1, $24, $24
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $413
- sll $20, 32, $1
- addq $6, $1, $6
-$413:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $23, $7, $23
- stq $23, 96($16)
- ldq $4, 48($17)
- ldq $5, 56($18)
- cmpult $23, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $22, $6, $22
- cmpult $22, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $23
- mulq $7, $5, $5
- addq $1, $24, $24
- addq $28, $23, $28
- cmpult $28, $23, $1
- mulq $6, $8, $6
- beq $1, $417
- sll $20, 32, $1
- addq $6, $1, $6
-$417:
- sll $28, 32, $23
- ldq $2, 48($18)
- addq $5, $23, $5
- bis $5, $5, $7
- ldq $4, 56($17)
- addq $22, $7, $22
- srl $2, 32, $8
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- mulq $8, $7, $0
- srl $28, 32, $1
- addq $6, $1, $6
- cmpult $5, $23, $1
- zapnot $2, 15, $5
- addq $1, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $23
- srl $4, 32, $6
- mulq $5, $6, $25
- mulq $7, $5, $2
- addq $0, $25, $0
- cmpult $0, $25, $1
- mulq $6, $8, $6
- beq $1, $421
- sll $20, 32, $1
- addq $6, $1, $6
-$421:
- sll $0, 32, $25
- addq $2, $25, $2
- bis $2, $2, $7
- addq $22, $7, $22
- stq $22, 104($16)
- ldq $4, 56($17)
- ldq $5, 56($18)
- cmpult $22, $7, $3
- zapnot $4, 15, $7
- srl $5, 32, $8
- mulq $8, $7, $28
- srl $0, 32, $1
- cmpult $2, $25, $2
- addq $6, $1, $6
- addq $2, $6, $6
- addq $3, $6, $6
- addq $24, $6, $24
- cmpult $24, $6, $1
- srl $4, 32, $6
- zapnot $5, 15, $5
- mulq $5, $6, $22
- mulq $7, $5, $2
- addq $1, $23, $23
- addq $28, $22, $28
- cmpult $28, $22, $1
- mulq $6, $8, $3
- beq $1, $425
- sll $20, 32, $1
- addq $3, $1, $3
-$425:
- sll $28, 32, $22
- srl $28, 32, $1
- addq $2, $22, $2
- addq $3, $1, $3
- bis $2, $2, $7
- addq $24, $7, $24
- cmpult $7, $22, $1
- cmpult $24, $7, $2
- addq $1, $3, $6
- addq $2, $6, $6
- stq $24, 112($16)
- addq $23, $6, $23
- stq $23, 120($16)
- ret $31, ($26), 1
- .end bn_mul_comba8
- .text
- .align 3
- .globl bn_sqr_comba4
- .ent bn_sqr_comba4
-bn_sqr_comba4:
-bn_sqr_comba4..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- ldq $0, 0($17)
- ldq $1, 8($17)
- ldq $2, 16($17)
- ldq $3, 24($17)
- bis $31, $31, $6
- mulq $0, $0, $4
- umulh $0, $0, $5
- stq $4, 0($16)
- bis $31, $31, $4
- mulq $0, $1, $7
- umulh $0, $1, $8
- cmplt $7, $31, $22
- cmplt $8, $31, $23
- addq $7, $7, $7
- addq $8, $8, $8
- addq $8, $22, $8
- addq $4, $23, $4
- addq $5, $7, $5
- addq $6, $8, $6
- cmpult $5, $7, $24
- cmpult $6, $8, $25
- addq $6, $24, $6
- addq $4, $25, $4
- stq $5, 8($16)
- bis $31, $31, $5
- mulq $1, $1, $27
- umulh $1, $1, $28
- addq $6, $27, $6
- addq $4, $28, $4
- cmpult $6, $27, $21
- cmpult $4, $28, $20
- addq $4, $21, $4
- addq $5, $20, $5
- mulq $2, $0, $19
- umulh $2, $0, $18
- cmplt $19, $31, $17
- cmplt $18, $31, $22
- addq $19, $19, $19
- addq $18, $18, $18
- addq $18, $17, $18
- addq $5, $22, $5
- addq $6, $19, $6
- addq $4, $18, $4
- cmpult $6, $19, $23
- cmpult $4, $18, $7
- addq $4, $23, $4
- addq $5, $7, $5
- stq $6, 16($16)
- bis $31, $31, $6
- mulq $3, $0, $8
- umulh $3, $0, $24
- cmplt $8, $31, $25
- cmplt $24, $31, $27
- addq $8, $8, $8
- addq $24, $24, $24
- addq $24, $25, $24
- addq $6, $27, $6
- addq $4, $8, $4
- addq $5, $24, $5
- cmpult $4, $8, $28
- cmpult $5, $24, $21
- addq $5, $28, $5
- addq $6, $21, $6
- mulq $2, $1, $20
- umulh $2, $1, $17
- cmplt $20, $31, $22
- cmplt $17, $31, $19
- addq $20, $20, $20
- addq $17, $17, $17
- addq $17, $22, $17
- addq $6, $19, $6
- addq $4, $20, $4
- addq $5, $17, $5
- cmpult $4, $20, $18
- cmpult $5, $17, $23
- addq $5, $18, $5
- addq $6, $23, $6
- stq $4, 24($16)
- bis $31, $31, $4
- mulq $2, $2, $7
- umulh $2, $2, $25
- addq $5, $7, $5
- addq $6, $25, $6
- cmpult $5, $7, $27
- cmpult $6, $25, $8
- addq $6, $27, $6
- addq $4, $8, $4
- mulq $3, $1, $24
- umulh $3, $1, $28
- cmplt $24, $31, $21
- cmplt $28, $31, $22
- addq $24, $24, $24
- addq $28, $28, $28
- addq $28, $21, $28
- addq $4, $22, $4
- addq $5, $24, $5
- addq $6, $28, $6
- cmpult $5, $24, $19
- cmpult $6, $28, $20
- addq $6, $19, $6
- addq $4, $20, $4
- stq $5, 32($16)
- bis $31, $31, $5
- mulq $3, $2, $17
- umulh $3, $2, $18
- cmplt $17, $31, $23
- cmplt $18, $31, $7
- addq $17, $17, $17
- addq $18, $18, $18
- addq $18, $23, $18
- addq $5, $7, $5
- addq $6, $17, $6
- addq $4, $18, $4
- cmpult $6, $17, $25
- cmpult $4, $18, $27
- addq $4, $25, $4
- addq $5, $27, $5
- stq $6, 40($16)
- bis $31, $31, $6
- mulq $3, $3, $8
- umulh $3, $3, $21
- addq $4, $8, $4
- addq $5, $21, $5
- cmpult $4, $8, $22
- cmpult $5, $21, $24
- addq $5, $22, $5
- addq $6, $24, $6
- stq $4, 48($16)
- stq $5, 56($16)
- ret $31,($26),1
- .end bn_sqr_comba4
- .text
- .align 3
- .globl bn_sqr_comba8
- .ent bn_sqr_comba8
-bn_sqr_comba8:
-bn_sqr_comba8..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- ldq $0, 0($17)
- ldq $1, 8($17)
- ldq $2, 16($17)
- ldq $3, 24($17)
- ldq $4, 32($17)
- ldq $5, 40($17)
- ldq $6, 48($17)
- ldq $7, 56($17)
- bis $31, $31, $23
- mulq $0, $0, $8
- umulh $0, $0, $22
- stq $8, 0($16)
- bis $31, $31, $8
- mulq $1, $0, $24
- umulh $1, $0, $25
- cmplt $24, $31, $27
- cmplt $25, $31, $28
- addq $24, $24, $24
- addq $25, $25, $25
- addq $25, $27, $25
- addq $8, $28, $8
- addq $22, $24, $22
- addq $23, $25, $23
- cmpult $22, $24, $21
- cmpult $23, $25, $20
- addq $23, $21, $23
- addq $8, $20, $8
- stq $22, 8($16)
- bis $31, $31, $22
- mulq $1, $1, $19
- umulh $1, $1, $18
- addq $23, $19, $23
- addq $8, $18, $8
- cmpult $23, $19, $17
- cmpult $8, $18, $27
- addq $8, $17, $8
- addq $22, $27, $22
- mulq $2, $0, $28
- umulh $2, $0, $24
- cmplt $28, $31, $25
- cmplt $24, $31, $21
- addq $28, $28, $28
- addq $24, $24, $24
- addq $24, $25, $24
- addq $22, $21, $22
- addq $23, $28, $23
- addq $8, $24, $8
- cmpult $23, $28, $20
- cmpult $8, $24, $19
- addq $8, $20, $8
- addq $22, $19, $22
- stq $23, 16($16)
- bis $31, $31, $23
- mulq $2, $1, $18
- umulh $2, $1, $17
- cmplt $18, $31, $27
- cmplt $17, $31, $25
- addq $18, $18, $18
- addq $17, $17, $17
- addq $17, $27, $17
- addq $23, $25, $23
- addq $8, $18, $8
- addq $22, $17, $22
- cmpult $8, $18, $21
- cmpult $22, $17, $28
- addq $22, $21, $22
- addq $23, $28, $23
- mulq $3, $0, $24
- umulh $3, $0, $20
- cmplt $24, $31, $19
- cmplt $20, $31, $27
- addq $24, $24, $24
- addq $20, $20, $20
- addq $20, $19, $20
- addq $23, $27, $23
- addq $8, $24, $8
- addq $22, $20, $22
- cmpult $8, $24, $25
- cmpult $22, $20, $18
- addq $22, $25, $22
- addq $23, $18, $23
- stq $8, 24($16)
- bis $31, $31, $8
- mulq $2, $2, $17
- umulh $2, $2, $21
- addq $22, $17, $22
- addq $23, $21, $23
- cmpult $22, $17, $28
- cmpult $23, $21, $19
- addq $23, $28, $23
- addq $8, $19, $8
- mulq $3, $1, $27
- umulh $3, $1, $24
- cmplt $27, $31, $20
- cmplt $24, $31, $25
- addq $27, $27, $27
- addq $24, $24, $24
- addq $24, $20, $24
- addq $8, $25, $8
- addq $22, $27, $22
- addq $23, $24, $23
- cmpult $22, $27, $18
- cmpult $23, $24, $17
- addq $23, $18, $23
- addq $8, $17, $8
- mulq $4, $0, $21
- umulh $4, $0, $28
- cmplt $21, $31, $19
- cmplt $28, $31, $20
- addq $21, $21, $21
- addq $28, $28, $28
- addq $28, $19, $28
- addq $8, $20, $8
- addq $22, $21, $22
- addq $23, $28, $23
- cmpult $22, $21, $25
- cmpult $23, $28, $27
- addq $23, $25, $23
- addq $8, $27, $8
- stq $22, 32($16)
- bis $31, $31, $22
- mulq $3, $2, $24
- umulh $3, $2, $18
- cmplt $24, $31, $17
- cmplt $18, $31, $19
- addq $24, $24, $24
- addq $18, $18, $18
- addq $18, $17, $18
- addq $22, $19, $22
- addq $23, $24, $23
- addq $8, $18, $8
- cmpult $23, $24, $20
- cmpult $8, $18, $21
- addq $8, $20, $8
- addq $22, $21, $22
- mulq $4, $1, $28
- umulh $4, $1, $25
- cmplt $28, $31, $27
- cmplt $25, $31, $17
- addq $28, $28, $28
- addq $25, $25, $25
- addq $25, $27, $25
- addq $22, $17, $22
- addq $23, $28, $23
- addq $8, $25, $8
- cmpult $23, $28, $19
- cmpult $8, $25, $24
- addq $8, $19, $8
- addq $22, $24, $22
- mulq $5, $0, $18
- umulh $5, $0, $20
- cmplt $18, $31, $21
- cmplt $20, $31, $27
- addq $18, $18, $18
- addq $20, $20, $20
- addq $20, $21, $20
- addq $22, $27, $22
- addq $23, $18, $23
- addq $8, $20, $8
- cmpult $23, $18, $17
- cmpult $8, $20, $28
- addq $8, $17, $8
- addq $22, $28, $22
- stq $23, 40($16)
- bis $31, $31, $23
- mulq $3, $3, $25
- umulh $3, $3, $19
- addq $8, $25, $8
- addq $22, $19, $22
- cmpult $8, $25, $24
- cmpult $22, $19, $21
- addq $22, $24, $22
- addq $23, $21, $23
- mulq $4, $2, $27
- umulh $4, $2, $18
- cmplt $27, $31, $20
- cmplt $18, $31, $17
- addq $27, $27, $27
- addq $18, $18, $18
- addq $18, $20, $18
- addq $23, $17, $23
- addq $8, $27, $8
- addq $22, $18, $22
- cmpult $8, $27, $28
- cmpult $22, $18, $25
- addq $22, $28, $22
- addq $23, $25, $23
- mulq $5, $1, $19
- umulh $5, $1, $24
- cmplt $19, $31, $21
- cmplt $24, $31, $20
- addq $19, $19, $19
- addq $24, $24, $24
- addq $24, $21, $24
- addq $23, $20, $23
- addq $8, $19, $8
- addq $22, $24, $22
- cmpult $8, $19, $17
- cmpult $22, $24, $27
- addq $22, $17, $22
- addq $23, $27, $23
- mulq $6, $0, $18
- umulh $6, $0, $28
- cmplt $18, $31, $25
- cmplt $28, $31, $21
- addq $18, $18, $18
- addq $28, $28, $28
- addq $28, $25, $28
- addq $23, $21, $23
- addq $8, $18, $8
- addq $22, $28, $22
- cmpult $8, $18, $20
- cmpult $22, $28, $19
- addq $22, $20, $22
- addq $23, $19, $23
- stq $8, 48($16)
- bis $31, $31, $8
- mulq $4, $3, $24
- umulh $4, $3, $17
- cmplt $24, $31, $27
- cmplt $17, $31, $25
- addq $24, $24, $24
- addq $17, $17, $17
- addq $17, $27, $17
- addq $8, $25, $8
- addq $22, $24, $22
- addq $23, $17, $23
- cmpult $22, $24, $21
- cmpult $23, $17, $18
- addq $23, $21, $23
- addq $8, $18, $8
- mulq $5, $2, $28
- umulh $5, $2, $20
- cmplt $28, $31, $19
- cmplt $20, $31, $27
- addq $28, $28, $28
- addq $20, $20, $20
- addq $20, $19, $20
- addq $8, $27, $8
- addq $22, $28, $22
- addq $23, $20, $23
- cmpult $22, $28, $25
- cmpult $23, $20, $24
- addq $23, $25, $23
- addq $8, $24, $8
- mulq $6, $1, $17
- umulh $6, $1, $21
- cmplt $17, $31, $18
- cmplt $21, $31, $19
- addq $17, $17, $17
- addq $21, $21, $21
- addq $21, $18, $21
- addq $8, $19, $8
- addq $22, $17, $22
- addq $23, $21, $23
- cmpult $22, $17, $27
- cmpult $23, $21, $28
- addq $23, $27, $23
- addq $8, $28, $8
- mulq $7, $0, $20
- umulh $7, $0, $25
- cmplt $20, $31, $24
- cmplt $25, $31, $18
- addq $20, $20, $20
- addq $25, $25, $25
- addq $25, $24, $25
- addq $8, $18, $8
- addq $22, $20, $22
- addq $23, $25, $23
- cmpult $22, $20, $19
- cmpult $23, $25, $17
- addq $23, $19, $23
- addq $8, $17, $8
- stq $22, 56($16)
- bis $31, $31, $22
- mulq $4, $4, $21
- umulh $4, $4, $27
- addq $23, $21, $23
- addq $8, $27, $8
- cmpult $23, $21, $28
- cmpult $8, $27, $24
- addq $8, $28, $8
- addq $22, $24, $22
- mulq $5, $3, $18
- umulh $5, $3, $20
- cmplt $18, $31, $25
- cmplt $20, $31, $19
- addq $18, $18, $18
- addq $20, $20, $20
- addq $20, $25, $20
- addq $22, $19, $22
- addq $23, $18, $23
- addq $8, $20, $8
- cmpult $23, $18, $17
- cmpult $8, $20, $21
- addq $8, $17, $8
- addq $22, $21, $22
- mulq $6, $2, $27
- umulh $6, $2, $28
- cmplt $27, $31, $24
- cmplt $28, $31, $25
- addq $27, $27, $27
- addq $28, $28, $28
- addq $28, $24, $28
- addq $22, $25, $22
- addq $23, $27, $23
- addq $8, $28, $8
- cmpult $23, $27, $19
- cmpult $8, $28, $18
- addq $8, $19, $8
- addq $22, $18, $22
- mulq $7, $1, $20
- umulh $7, $1, $17
- cmplt $20, $31, $21
- cmplt $17, $31, $24
- addq $20, $20, $20
- addq $17, $17, $17
- addq $17, $21, $17
- addq $22, $24, $22
- addq $23, $20, $23
- addq $8, $17, $8
- cmpult $23, $20, $25
- cmpult $8, $17, $27
- addq $8, $25, $8
- addq $22, $27, $22
- stq $23, 64($16)
- bis $31, $31, $23
- mulq $5, $4, $28
- umulh $5, $4, $19
- cmplt $28, $31, $18
- cmplt $19, $31, $21
- addq $28, $28, $28
- addq $19, $19, $19
- addq $19, $18, $19
- addq $23, $21, $23
- addq $8, $28, $8
- addq $22, $19, $22
- cmpult $8, $28, $24
- cmpult $22, $19, $20
- addq $22, $24, $22
- addq $23, $20, $23
- mulq $6, $3, $17
- umulh $6, $3, $25
- cmplt $17, $31, $27
- cmplt $25, $31, $18
- addq $17, $17, $17
- addq $25, $25, $25
- addq $25, $27, $25
- addq $23, $18, $23
- addq $8, $17, $8
- addq $22, $25, $22
- cmpult $8, $17, $21
- cmpult $22, $25, $28
- addq $22, $21, $22
- addq $23, $28, $23
- mulq $7, $2, $19
- umulh $7, $2, $24
- cmplt $19, $31, $20
- cmplt $24, $31, $27
- addq $19, $19, $19
- addq $24, $24, $24
- addq $24, $20, $24
- addq $23, $27, $23
- addq $8, $19, $8
- addq $22, $24, $22
- cmpult $8, $19, $18
- cmpult $22, $24, $17
- addq $22, $18, $22
- addq $23, $17, $23
- stq $8, 72($16)
- bis $31, $31, $8
- mulq $5, $5, $25
- umulh $5, $5, $21
- addq $22, $25, $22
- addq $23, $21, $23
- cmpult $22, $25, $28
- cmpult $23, $21, $20
- addq $23, $28, $23
- addq $8, $20, $8
- mulq $6, $4, $27
- umulh $6, $4, $19
- cmplt $27, $31, $24
- cmplt $19, $31, $18
- addq $27, $27, $27
- addq $19, $19, $19
- addq $19, $24, $19
- addq $8, $18, $8
- addq $22, $27, $22
- addq $23, $19, $23
- cmpult $22, $27, $17
- cmpult $23, $19, $25
- addq $23, $17, $23
- addq $8, $25, $8
- mulq $7, $3, $21
- umulh $7, $3, $28
- cmplt $21, $31, $20
- cmplt $28, $31, $24
- addq $21, $21, $21
- addq $28, $28, $28
- addq $28, $20, $28
- addq $8, $24, $8
- addq $22, $21, $22
- addq $23, $28, $23
- cmpult $22, $21, $18
- cmpult $23, $28, $27
- addq $23, $18, $23
- addq $8, $27, $8
- stq $22, 80($16)
- bis $31, $31, $22
- mulq $6, $5, $19
- umulh $6, $5, $17
- cmplt $19, $31, $25
- cmplt $17, $31, $20
- addq $19, $19, $19
- addq $17, $17, $17
- addq $17, $25, $17
- addq $22, $20, $22
- addq $23, $19, $23
- addq $8, $17, $8
- cmpult $23, $19, $24
- cmpult $8, $17, $21
- addq $8, $24, $8
- addq $22, $21, $22
- mulq $7, $4, $28
- umulh $7, $4, $18
- cmplt $28, $31, $27
- cmplt $18, $31, $25
- addq $28, $28, $28
- addq $18, $18, $18
- addq $18, $27, $18
- addq $22, $25, $22
- addq $23, $28, $23
- addq $8, $18, $8
- cmpult $23, $28, $20
- cmpult $8, $18, $19
- addq $8, $20, $8
- addq $22, $19, $22
- stq $23, 88($16)
- bis $31, $31, $23
- mulq $6, $6, $17
- umulh $6, $6, $24
- addq $8, $17, $8
- addq $22, $24, $22
- cmpult $8, $17, $21
- cmpult $22, $24, $27
- addq $22, $21, $22
- addq $23, $27, $23
- mulq $7, $5, $25
- umulh $7, $5, $28
- cmplt $25, $31, $18
- cmplt $28, $31, $20
- addq $25, $25, $25
- addq $28, $28, $28
- addq $28, $18, $28
- addq $23, $20, $23
- addq $8, $25, $8
- addq $22, $28, $22
- cmpult $8, $25, $19
- cmpult $22, $28, $17
- addq $22, $19, $22
- addq $23, $17, $23
- stq $8, 96($16)
- bis $31, $31, $8
- mulq $7, $6, $24
- umulh $7, $6, $21
- cmplt $24, $31, $27
- cmplt $21, $31, $18
- addq $24, $24, $24
- addq $21, $21, $21
- addq $21, $27, $21
- addq $8, $18, $8
- addq $22, $24, $22
- addq $23, $21, $23
- cmpult $22, $24, $20
- cmpult $23, $21, $25
- addq $23, $20, $23
- addq $8, $25, $8
- stq $22, 104($16)
- bis $31, $31, $22
- mulq $7, $7, $28
- umulh $7, $7, $19
- addq $23, $28, $23
- addq $8, $19, $8
- cmpult $23, $28, $17
- cmpult $8, $19, $27
- addq $8, $17, $8
- addq $22, $27, $22
- stq $23, 112($16)
- stq $8, 120($16)
- ret $31,($26),1
- .end bn_sqr_comba8
+++ /dev/null
-
- # DEC Alpha assember
- # The bn_div64 is actually gcc output but the other parts are hand done.
- # Thanks to tzeruch@ceddec.com for sending me the gcc output for
- # bn_div64.
- # I've gone back and re-done most of routines.
- # The key thing to remeber for the 164 CPU is that while a
- # multiply operation takes 8 cycles, another one can only be issued
- # after 4 cycles have elapsed. I've done modification to help
- # improve this. Also, normally, a ld instruction will not be available
- # for about 3 cycles.
- .file 1 "bn_asm.c"
- .set noat
-gcc2_compiled.:
-__gnu_compiled_c:
- .text
- .align 3
- .globl bn_mul_add_words
- .ent bn_mul_add_words
-bn_mul_add_words:
-bn_mul_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$0
- blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- ldq $1,0($16) # 1 1
- .align 3
-$42:
- mulq $20,$19,$5 # 1 2 1 ######
- ldq $21,8($17) # 2 1
- ldq $2,8($16) # 2 1
- umulh $20,$19,$20 # 1 2 ######
- ldq $27,16($17) # 3 1
- ldq $3,16($16) # 3 1
- mulq $21,$19,$6 # 2 2 1 ######
- ldq $28,24($17) # 4 1
- addq $1,$5,$1 # 1 2 2
- ldq $4,24($16) # 4 1
- umulh $21,$19,$21 # 2 2 ######
- cmpult $1,$5,$22 # 1 2 3 1
- addq $20,$22,$20 # 1 3 1
- addq $1,$0,$1 # 1 2 3 1
- mulq $27,$19,$7 # 3 2 1 ######
- cmpult $1,$0,$0 # 1 2 3 2
- addq $2,$6,$2 # 2 2 2
- addq $20,$0,$0 # 1 3 2
- cmpult $2,$6,$23 # 2 2 3 1
- addq $21,$23,$21 # 2 3 1
- umulh $27,$19,$27 # 3 2 ######
- addq $2,$0,$2 # 2 2 3 1
- cmpult $2,$0,$0 # 2 2 3 2
- subq $18,4,$18
- mulq $28,$19,$8 # 4 2 1 ######
- addq $21,$0,$0 # 2 3 2
- addq $3,$7,$3 # 3 2 2
- addq $16,32,$16
- cmpult $3,$7,$24 # 3 2 3 1
- stq $1,-32($16) # 1 2 4
- umulh $28,$19,$28 # 4 2 ######
- addq $27,$24,$27 # 3 3 1
- addq $3,$0,$3 # 3 2 3 1
- stq $2,-24($16) # 2 2 4
- cmpult $3,$0,$0 # 3 2 3 2
- stq $3,-16($16) # 3 2 4
- addq $4,$8,$4 # 4 2 2
- addq $27,$0,$0 # 3 3 2
- cmpult $4,$8,$25 # 4 2 3 1
- addq $17,32,$17
- addq $28,$25,$28 # 4 3 1
- addq $4,$0,$4 # 4 2 3 1
- cmpult $4,$0,$0 # 4 2 3 2
- stq $4,-8($16) # 4 2 4
- addq $28,$0,$0 # 4 3 2
- blt $18,$43
-
- ldq $20,0($17) # 1 1
- ldq $1,0($16) # 1 1
-
- br $42
-
- .align 4
-$45:
- ldq $20,0($17) # 4 1
- ldq $1,0($16) # 4 1
- mulq $20,$19,$5 # 4 2 1
- subq $18,1,$18
- addq $16,8,$16
- addq $17,8,$17
- umulh $20,$19,$20 # 4 2
- addq $1,$5,$1 # 4 2 2
- cmpult $1,$5,$22 # 4 2 3 1
- addq $20,$22,$20 # 4 3 1
- addq $1,$0,$1 # 4 2 3 1
- cmpult $1,$0,$0 # 4 2 3 2
- addq $20,$0,$0 # 4 3 2
- stq $1,-8($16) # 4 2 4
- bgt $18,$45
- ret $31,($26),1 # else exit
-
- .align 4
-$43:
- addq $18,4,$18
- bgt $18,$45 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_add_words
- .align 3
- .globl bn_mul_words
- .ent bn_mul_words
-bn_mul_words:
-bn_mul_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$0
- blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- .align 3
-$142:
-
- mulq $20,$19,$5 # 1 2 1 #####
- ldq $21,8($17) # 2 1
- ldq $27,16($17) # 3 1
- umulh $20,$19,$20 # 1 2 #####
- ldq $28,24($17) # 4 1
- mulq $21,$19,$6 # 2 2 1 #####
- addq $5,$0,$5 # 1 2 3 1
- subq $18,4,$18
- cmpult $5,$0,$0 # 1 2 3 2
- umulh $21,$19,$21 # 2 2 #####
- addq $20,$0,$0 # 1 3 2
- addq $17,32,$17
- addq $6,$0,$6 # 2 2 3 1
- mulq $27,$19,$7 # 3 2 1 #####
- cmpult $6,$0,$0 # 2 2 3 2
- addq $21,$0,$0 # 2 3 2
- addq $16,32,$16
- umulh $27,$19,$27 # 3 2 #####
- stq $5,-32($16) # 1 2 4
- mulq $28,$19,$8 # 4 2 1 #####
- addq $7,$0,$7 # 3 2 3 1
- stq $6,-24($16) # 2 2 4
- cmpult $7,$0,$0 # 3 2 3 2
- umulh $28,$19,$28 # 4 2 #####
- addq $27,$0,$0 # 3 3 2
- stq $7,-16($16) # 3 2 4
- addq $8,$0,$8 # 4 2 3 1
- cmpult $8,$0,$0 # 4 2 3 2
-
- addq $28,$0,$0 # 4 3 2
-
- stq $8,-8($16) # 4 2 4
-
- blt $18,$143
-
- ldq $20,0($17) # 1 1
-
- br $142
-
- .align 4
-$145:
- ldq $20,0($17) # 4 1
- mulq $20,$19,$5 # 4 2 1
- subq $18,1,$18
- umulh $20,$19,$20 # 4 2
- addq $5,$0,$5 # 4 2 3 1
- addq $16,8,$16
- cmpult $5,$0,$0 # 4 2 3 2
- addq $17,8,$17
- addq $20,$0,$0 # 4 3 2
- stq $5,-8($16) # 4 2 4
-
- bgt $18,$145
- ret $31,($26),1 # else exit
-
- .align 4
-$143:
- addq $18,4,$18
- bgt $18,$145 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_words
- .align 3
- .globl bn_sqr_words
- .ent bn_sqr_words
-bn_sqr_words:
-bn_sqr_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $18,4,$18
- blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
- ldq $20,0($17) # 1 1
- .align 3
-$542:
- mulq $20,$20,$5 ######
- ldq $21,8($17) # 1 1
- subq $18,4
- umulh $20,$20,$1 ######
- ldq $27,16($17) # 1 1
- mulq $21,$21,$6 ######
- ldq $28,24($17) # 1 1
- stq $5,0($16) # r[0]
- umulh $21,$21,$2 ######
- stq $1,8($16) # r[1]
- mulq $27,$27,$7 ######
- stq $6,16($16) # r[0]
- umulh $27,$27,$3 ######
- stq $2,24($16) # r[1]
- mulq $28,$28,$8 ######
- stq $7,32($16) # r[0]
- umulh $28,$28,$4 ######
- stq $3,40($16) # r[1]
-
- addq $16,64,$16
- addq $17,32,$17
- stq $8,-16($16) # r[0]
- stq $4,-8($16) # r[1]
-
- blt $18,$543
- ldq $20,0($17) # 1 1
- br $542
-
-$442:
- ldq $20,0($17) # a[0]
- mulq $20,$20,$5 # a[0]*w low part r2
- addq $16,16,$16
- addq $17,8,$17
- subq $18,1,$18
- umulh $20,$20,$1 # a[0]*w high part r3
- stq $5,-16($16) # r[0]
- stq $1,-8($16) # r[1]
-
- bgt $18,$442
- ret $31,($26),1 # else exit
-
- .align 4
-$543:
- addq $18,4,$18
- bgt $18,$442 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_sqr_words
-
- .align 3
- .globl bn_add_words
- .ent bn_add_words
-bn_add_words:
-bn_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19,4,$19
- bis $31,$31,$0 # carry = 0
- blt $19,$900
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- .align 3
-$901:
- addq $1,$5,$1 # r=a+b;
- ldq $6,8($17) # a[1]
- cmpult $1,$5,$22 # did we overflow?
- ldq $2,8($18) # b[1]
- addq $1,$0,$1 # c+= overflow
- ldq $7,16($17) # a[2]
- cmpult $1,$0,$0 # overflow?
- ldq $3,16($18) # b[2]
- addq $0,$22,$0
- ldq $8,24($17) # a[3]
- addq $2,$6,$2 # r=a+b;
- ldq $4,24($18) # b[3]
- cmpult $2,$6,$23 # did we overflow?
- addq $3,$7,$3 # r=a+b;
- addq $2,$0,$2 # c+= overflow
- cmpult $3,$7,$24 # did we overflow?
- cmpult $2,$0,$0 # overflow?
- addq $4,$8,$4 # r=a+b;
- addq $0,$23,$0
- cmpult $4,$8,$25 # did we overflow?
- addq $3,$0,$3 # c+= overflow
- stq $1,0($16) # r[0]=c
- cmpult $3,$0,$0 # overflow?
- stq $2,8($16) # r[1]=c
- addq $0,$24,$0
- stq $3,16($16) # r[2]=c
- addq $4,$0,$4 # c+= overflow
- subq $19,4,$19 # loop--
- cmpult $4,$0,$0 # overflow?
- addq $17,32,$17 # a++
- addq $0,$25,$0
- stq $4,24($16) # r[3]=c
- addq $18,32,$18 # b++
- addq $16,32,$16 # r++
-
- blt $19,$900
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- br $901
- .align 4
-$945:
- ldq $5,0($17) # a[0]
- ldq $1,0($18) # b[1]
- addq $1,$5,$1 # r=a+b;
- subq $19,1,$19 # loop--
- addq $1,$0,$1 # c+= overflow
- addq $17,8,$17 # a++
- cmpult $1,$5,$22 # did we overflow?
- cmpult $1,$0,$0 # overflow?
- addq $18,8,$18 # b++
- stq $1,0($16) # r[0]=c
- addq $0,$22,$0
- addq $16,8,$16 # r++
-
- bgt $19,$945
- ret $31,($26),1 # else exit
-
-$900:
- addq $19,4,$19
- bgt $19,$945 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_add_words
-
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .align 3
- .globl bn_div64
- .ent bn_div64
-bn_div64:
- ldgp $29,0($27)
-bn_div64..ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$119
- lda $0,-1
- br $31,$136
- .align 4
-$119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$126
- zapnot $7,15,$27
- br $31,$127
- .align 4
-$126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$127:
- srl $10,32,$4
- .align 5
-$128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$129
- subq $27,1,$27
- br $31,$128
- .align 4
-$129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$134
- addq $9,$11,$9
- subq $27,1,$27
-$134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$123
- .align 4
-$124:
- bis $13,$27,$0
-$136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div64
-
- .set noat
- .text
- .align 3
- .globl bn_sub_words
- .ent bn_sub_words
-bn_sub_words:
-bn_sub_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19, 4, $19
- bis $31, $31, $0
- blt $19, $100
- ldq $1, 0($17)
- ldq $2, 0($18)
-$101:
- ldq $3, 8($17)
- cmpult $1, $2, $4
- ldq $5, 8($18)
- subq $1, $2, $1
- ldq $6, 16($17)
- cmpult $1, $0, $2
- ldq $7, 16($18)
- subq $1, $0, $23
- ldq $8, 24($17)
- addq $2, $4, $0
- cmpult $3, $5, $24
- subq $3, $5, $3
- ldq $22, 24($18)
- cmpult $3, $0, $5
- subq $3, $0, $25
- addq $5, $24, $0
- cmpult $6, $7, $27
- subq $6, $7, $6
- stq $23, 0($16)
- cmpult $6, $0, $7
- subq $6, $0, $28
- addq $7, $27, $0
- cmpult $8, $22, $21
- subq $8, $22, $8
- stq $25, 8($16)
- cmpult $8, $0, $22
- subq $8, $0, $20
- addq $22, $21, $0
- stq $28, 16($16)
- subq $19, 4, $19
- stq $20, 24($16)
- addq $17, 32, $17
- addq $18, 32, $18
- addq $16, 32, $16
- blt $19, $100
- ldq $1, 0($17)
- ldq $2, 0($18)
- br $101
-$102:
- ldq $1, 0($17)
- ldq $2, 0($18)
- cmpult $1, $2, $27
- subq $1, $2, $1
- cmpult $1, $0, $2
- subq $1, $0, $1
- stq $1, 0($16)
- addq $2, $27, $0
- addq $17, 8, $17
- addq $18, 8, $18
- addq $16, 8, $16
- subq $19, 1, $19
- bgt $19, $102
- ret $31,($26),1
-$100:
- addq $19, 4, $19
- bgt $19, $102
-$103:
- ret $31,($26),1
- .end bn_sub_words
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_add_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$b0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
-
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-##################################################
- # Do the last 0..3 words
-
- ($t0,$o0)=&NR(2);
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
-
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0); # will we borrow?
- &add($o0,$cc,$o0); # will we borrow?
- &cmpult($o0,$cc,$cc); # will we borrow?
- &add($cc,$t0,$cc); # add the borrows
- &st($o0,&QWPw(0,$rp)); # save
-
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &FR($o0,$t0,$a0,$b0);
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-
-sub bn_div64
- {
- local($data)=<<'EOF';
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .set noreorder
- .set volatile
- .align 3
- .globl bn_div64
- .ent bn_div64
-bn_div64:
- ldgp $29,0($27)
-bn_div64..ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$9119
- lda $0,-1
- br $31,$9136
- .align 4
-$9119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$9120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$9120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$9120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$9122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$9122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$9123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$9126
- zapnot $7,15,$27
- br $31,$9127
- .align 4
-$9126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$9127:
- srl $10,32,$4
- .align 5
-$9128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$9129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$9129
- subq $27,1,$27
- br $31,$9128
- .align 4
-$9129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$9134
- addq $9,$11,$9
- subq $27,1,$27
-$9134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$9124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$9123
- .align 4
-$9124:
- bis $13,$27,$0
-$9136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div64
-EOF
- &asm_add($data);
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
- $word=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$r0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($r0,&QWPw(0,$rp));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &mul($a0,$word,($l0)=&NR(1));
- &add($ap,$QWS,$ap);
- &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
- &add($l0,$cc,$l0);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &cmpult($l0,$cc,$cc);
- &st($l0,&QWPw(-1,$rp)); &FR($l0);
- &add($h0,$cc,$cc); &FR($h0);
-
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_add_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
- $word=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$r0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($r0,&QWPw(0,$rp));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
- &mul($a0,$word,($l0)=&NR(1));
- &sub($count,1,$count);
- &add($ap,$QWS,$ap);
- &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
- &add($r0,$l0,$r0);
- &add($rp,$QWS,$rp);
- &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
- &add($r0,$cc,$r0);
- &add($h0,$t0,$h0); &FR($t0);
- &cmpult($r0,$cc,$cc);
- &st($r0,&QWPw(-1,$rp)); &FR($r0);
- &add($h0,$cc,$cc); &FR($h0);
-
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub mul_add_c
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &add($t1,$h1,$h1); &FR($t1);
- &add($c1,$h1,$c1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub bn_mul_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &mul($a[0],$b[0],($r00)=&NR(1));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &muh($a[0],$b[0],($r01)=&NR(1));
- &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
- &mul($a[0],$b[1],($r02)=&NR(1));
-
- ($R,$H1,$H2)=&NR(3);
-
- &st($r00,&QWPw(0,$rp)); &FR($r00);
-
- &mov("zero",$R);
- &mul($a[1],$b[0],($r03)=&NR(1));
-
- &mov("zero",$H1);
- &mov("zero",$H0);
- &add($R,$r01,$R);
- &muh($a[0],$b[1],($r04)=&NR(1));
- &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
- &add($R,$r02,$R);
- &add($H1,$t01,$H1) &FR($t01);
- &muh($a[1],$b[0],($r05)=&NR(1));
- &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
- &add($R,$r03,$R);
- &add($H2,$t02,$H2) &FR($t02);
- &mul($a[0],$b[2],($r06)=&NR(1));
- &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
- &add($H1,$t03,$H1) &FR($t03);
- &st($R,&QWPw(1,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r04,$R);
- &mov("zero",$H2);
- &mul($a[1],$b[1],($r07)=&NR(1));
- &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
- &add($R,$r05,$R);
- &add($H1,$t04,$H1) &FR($t04);
- &mul($a[2],$b[0],($r08)=&NR(1));
- &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
- &add($R,$r01,$R);
- &add($H2,$t05,$H2) &FR($t05);
- &muh($a[0],$b[2],($r09)=&NR(1));
- &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
- &add($R,$r07,$R);
- &add($H1,$t06,$H1) &FR($t06);
- &muh($a[1],$b[1],($r10)=&NR(1));
- &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
- &add($R,$r08,$R);
- &add($H2,$t07,$H2) &FR($t07);
- &muh($a[2],$b[0],($r11)=&NR(1));
- &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
- &add($H1,$t08,$H1) &FR($t08);
- &st($R,&QWPw(2,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r09,$R);
- &mov("zero",$H2);
- &mul($a[0],$b[3],($r12)=&NR(1));
- &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
- &add($R,$r10,$R);
- &add($H1,$t09,$H1) &FR($t09);
- &mul($a[1],$b[2],($r13)=&NR(1));
- &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
- &add($R,$r11,$R);
- &add($H1,$t10,$H1) &FR($t10);
- &mul($a[2],$b[1],($r14)=&NR(1));
- &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
- &add($R,$r12,$R);
- &add($H1,$t11,$H1) &FR($t11);
- &mul($a[3],$b[0],($r15)=&NR(1));
- &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
- &add($R,$r13,$R);
- &add($H1,$t12,$H1) &FR($t12);
- &muh($a[0],$b[3],($r16)=&NR(1));
- &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
- &add($R,$r14,$R);
- &add($H1,$t13,$H1) &FR($t13);
- &muh($a[1],$b[2],($r17)=&NR(1));
- &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
- &add($R,$r15,$R);
- &add($H1,$t14,$H1) &FR($t14);
- &muh($a[2],$b[1],($r18)=&NR(1));
- &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
- &add($H1,$t15,$H1) &FR($t15);
- &st($R,&QWPw(3,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r16,$R);
- &mov("zero",$H2);
- &muh($a[3],$b[0],($r19)=&NR(1));
- &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
- &add($R,$r17,$R);
- &add($H1,$t16,$H1) &FR($t16);
- &mul($a[1],$b[3],($r20)=&NR(1));
- &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
- &add($R,$r18,$R);
- &add($H1,$t17,$H1) &FR($t17);
- &mul($a[2],$b[2],($r21)=&NR(1));
- &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
- &add($R,$r19,$R);
- &add($H1,$t18,$H1) &FR($t18);
- &mul($a[3],$b[1],($r22)=&NR(1));
- &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
- &add($R,$r20,$R);
- &add($H1,$t19,$H1) &FR($t19);
- &muh($a[1],$b[3],($r23)=&NR(1));
- &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
- &add($R,$r21,$R);
- &add($H1,$t20,$H1) &FR($t20);
- &muh($a[2],$b[2],($r24)=&NR(1));
- &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
- &add($R,$r22,$R);
- &add($H1,$t21,$H1) &FR($t21);
- &muh($a[3],$b[1],($r25)=&NR(1));
- &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
- &add($H1,$t22,$H1) &FR($t22);
- &st($R,&QWPw(4,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r23,$R);
- &mov("zero",$H2);
- &mul($a[2],$b[3],($r26)=&NR(1));
- &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
- &add($R,$r24,$R);
- &add($H1,$t23,$H1) &FR($t23);
- &mul($a[3],$b[2],($r27)=&NR(1));
- &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
- &add($R,$r25,$R);
- &add($H1,$t24,$H1) &FR($t24);
- &muh($a[2],$b[3],($r28)=&NR(1));
- &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
- &add($R,$r26,$R);
- &add($H1,$t25,$H1) &FR($t25);
- &muh($a[3],$b[2],($r29)=&NR(1));
- &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
- &add($R,$r27,$R);
- &add($H1,$t26,$H1) &FR($t26);
- &mul($a[3],$b[3],($r30)=&NR(1));
- &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
- &add($H1,$t27,$H1) &FR($t27);
- &st($R,&QWPw(5,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r28,$R);
- &mov("zero",$H2);
- &muh($a[3],$b[3],($r31)=&NR(1));
- &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
- &add($R,$r29,$R);
- &add($H1,$t28,$H1) &FR($t28);
- ############
- &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
- &add($R,$r30,$R);
- &add($H1,$t29,$H1) &FR($t29);
- ############
- &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
- &add($H1,$t30,$H1) &FR($t30);
- &st($R,&QWPw(6,$rp));
- &add($H1,$H2,$R);
-
- &add($R,$r31,$R); &FR($r31);
- &st($R,&QWPw(7,$rp));
-
- &FR($R,$H1,$H2);
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub mul_add_c
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
-print STDERR "count=$cnt\n"; $cnt++;
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &add($t1,$h1,$h1); &FR($t1);
- &add($c1,$h1,$c1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub bn_mul_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
- &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
-
- ($c0,$c1,$c2)=&NR(3);
- &mov("zero",$c2);
- &mul($a[0],$b[0],$c0);
- &muh($a[0],$b[0],$c1);
- &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
- &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
- &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
- &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
- &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
- &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
- &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
- &st($c0,&QWPw(6,$rp));
- &st($c1,&QWPw(7,$rp));
-
- &FR($c0,$c1,$c2);
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_comba8
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &stack_push(2);
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &st($reg_s0,&swtmp(0)); &FR($reg_s0);
- &st($reg_s1,&swtmp(1)); &FR($reg_s1);
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &ld(($b[3])=&NR(1),&QWPw(3,$bp));
- &ld(($a[4])=&NR(1),&QWPw(1,$ap));
- &ld(($b[4])=&NR(1),&QWPw(1,$bp));
- &ld(($a[5])=&NR(1),&QWPw(1,$ap));
- &ld(($b[5])=&NR(1),&QWPw(1,$bp));
- &ld(($a[6])=&NR(1),&QWPw(1,$ap));
- &ld(($b[6])=&NR(1),&QWPw(1,$bp));
- &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
- &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
-
- ($c0,$c1,$c2)=&NR(3);
- &mov("zero",$c2);
- &mul($a[0],$b[0],$c0);
- &muh($a[0],$b[0],$c1);
- &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
- &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
- &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
- &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
- &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
- &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
- &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
- &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
- &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
- &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
- &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
- &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
- &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
- &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
- &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
- &st($c0,&QWPw(14,$rp));
- &st($c1,&QWPw(15,$rp));
-
- &FR($c0,$c1,$c2);
-
- &ld($reg_s0,&swtmp(0));
- &ld($reg_s1,&swtmp(1));
- &stack_pop(2);
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sqr_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(3);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$r0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($r0,&QWPw(0,$rp));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &mul($a0,$a0,($l0)=&NR(1));
- &add($ap,$QWS,$ap);
- &add($rp,2*$QWS,$rp);
- &sub($count,1,$count);
- &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
- &st($l0,&QWPw(-2,$rp)); &FR($l0);
- &st($h0,&QWPw(-1,$rp)); &FR($h0);
-
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub sqr_add_c
- {
- local($a,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$a,($l1)=&NR(1));
- &muh($a,$a,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &add($c1,$h1,$c1);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c1,$t1,$c1); &FR($t1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub sqr_add_c2
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &cmplt($l1,"zero",($lc1)=&NR(1));
- &cmplt($h1,"zero",($hc1)=&NR(1));
- &add($l1,$l1,$l1);
- &add($h1,$h1,$h1);
- &add($h1,$lc1,$h1); &FR($lc1);
- &add($c2,$hc1,$c2); &FR($hc1);
-
- &add($c0,$l1,$c0);
- &add($c1,$h1,$c1);
- &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
- &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
-
- &add($c1,$lc1,$c1); &FR($lc1);
- &add($c2,$hc1,$c2); &FR($hc1);
- }
-
-
-sub bn_sqr_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(2);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
-
- ($c0,$c1,$c2)=&NR(3);
-
- &mov("zero",$c2);
- &mul($a[0],$a[0],$c0);
- &muh($a[0],$a[0],$c1);
- &st($c0,&QWPw(0,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[3],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp));
- &st($c1,&QWPw(7,$rp));
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sqr_comba8
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(2);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &ld(($a[4])=&NR(1),&QWPw(4,$ap));
- &ld(($a[5])=&NR(1),&QWPw(5,$ap));
- &ld(($a[6])=&NR(1),&QWPw(6,$ap));
- &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
-
- ($c0,$c1,$c2)=&NR(3);
-
- &mov("zero",$c2);
- &mul($a[0],$a[0],$c0);
- &muh($a[0],$a[0],$c1);
- &st($c0,&QWPw(0,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(7,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(8,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
- &st($c0,&QWPw(9,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[5],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
- &st($c0,&QWPw(10,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
- &st($c0,&QWPw(11,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[6],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
- &st($c0,&QWPw(12,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
- &st($c0,&QWPw(13,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[7],$c0,$c1,$c2);
- &st($c0,&QWPw(14,$rp));
- &st($c1,&QWPw(15,$rp));
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sub_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &blt($count,&label("finish"));
-
- ($a0,$b0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
-
-##########################################################
- &set_label("loop");
-
- ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
- &ld($a1,&QWPw(1,$ap));
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &ld($b1,&QWPw(1,$bp));
- &sub($a0,$b0,$a0); # do the subtract
- &ld($a2,&QWPw(2,$ap));
- &cmpult($a0,$cc,$b0); # will we borrow?
- &ld($b2,&QWPw(2,$bp));
- &sub($a0,$cc,$o0); # will we borrow?
- &ld($a3,&QWPw(3,$ap));
- &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
-
- &cmpult($a1,$b1,$t1); # will we borrow?
- &sub($a1,$b1,$a1); # do the subtract
- &ld($b3,&QWPw(3,$bp));
- &cmpult($a1,$cc,$b1); # will we borrow?
- &sub($a1,$cc,$o1); # will we borrow?
- &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
-
- &cmpult($a2,$b2,$tmp); # will we borrow?
- &sub($a2,$b2,$a2); # do the subtract
- &st($o0,&QWPw(0,$rp)); &FR($o0); # save
- &cmpult($a2,$cc,$b2); # will we borrow?
- &sub($a2,$cc,$o2); # will we borrow?
- &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
-
- &cmpult($a3,$b3,$t3); # will we borrow?
- &sub($a3,$b3,$a3); # do the subtract
- &st($o1,&QWPw(1,$rp)); &FR($o1);
- &cmpult($a3,$cc,$b3); # will we borrow?
- &sub($a3,$cc,$o3); # will we borrow?
- &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
-
- &st($o2,&QWPw(2,$rp)); &FR($o2);
- &sub($count,4,$count); # count-=4
- &st($o3,&QWPw(3,$rp)); &FR($o3);
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &sub($a0,$b0,$a0); # do the subtract
- &cmpult($a0,$cc,$b0); # will we borrow?
- &sub($a0,$cc,$a0); # will we borrow?
- &st($a0,&QWPw(0,$rp)); # save
- &add($b0,$tmp,$cc); # add the borrows
-
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &FR($a0,$b0);
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_add_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &blt($count,&label("finish"));
-
- ($a0,$b0)=&NR(2);
-
-##########################################################
- &set_label("loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap));
- &ld(($b0)=&NR(1),&QWPw(0,$bp));
- &ld(($a1)=&NR(1),&QWPw(1,$ap));
- &ld(($b1)=&NR(1),&QWPw(1,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &ld(($a2)=&NR(1),&QWPw(2,$ap));
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &ld(($b2)=&NR(1),&QWPw(2,$bp));
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &ld(($a3)=&NR(1),&QWPw(3,$ap));
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &ld(($b3)=&NR(1),&QWPw(3,$bp));
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &cmpult($o3,$cc,$cc);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
- &add($cc,$t3,$cc); &FR($t3);
-
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- ###
- &bge($count,&label("loop"));
- ###
- &br(&label("finish"));
-##################################################
- # Do the last 0..3 words
-
- ($t0,$o0)=&NR(2);
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($a0,$b0,$o0);
- &sub($count,1,$count);
- &cmpult($o0,$b0,$t0); # will we borrow?
- &add($o0,$cc,$o0); # will we borrow?
- &cmpult($o0,$cc,$cc); # will we borrow?
- &add($rp,$QWS,$rp);
- &st($o0,&QWPw(-1,$rp)); # save
- &add($cc,$t0,$cc); # add the borrows
-
- ###
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &FR($o0,$t0,$a0,$b0);
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-
-sub bn_div_words
- {
- local($data)=<<'EOF';
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .set noreorder
- .set volatile
- .align 3
- .globl bn_div_words
- .ent bn_div_words
-bn_div_words
- ldgp $29,0($27)
-bn_div_words.ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$9119
- lda $0,-1
- br $31,$9136
- .align 4
-$9119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$9120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$9120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$9120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$9122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$9122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$9123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$9126
- zapnot $7,15,$27
- br $31,$9127
- .align 4
-$9126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$9127:
- srl $10,32,$4
- .align 5
-$9128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$9129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$9129
- subq $27,1,$27
- br $31,$9128
- .align 4
-$9129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$9134
- addq $9,$11,$9
- subq $27,1,$27
-$9134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$9124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$9123
- .align 4
-$9124:
- bis $13,$27,$0
-$9136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div_words
-EOF
- &asm_add($data);
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
- $word=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- ###
- &blt($count,&label("finish"));
-
- ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
-
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
-
- &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ### wait 8
- &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
- ### wait 8
- &muh($a1,$word,($h1)=&NR(1)); &FR($a1);
- &add($l0,$cc,$l0); ### wait 8
- &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
- &cmpult($l0,$cc,$cc); ### wait 8
- &muh($a2,$word,($h2)=&NR(1)); &FR($a2);
- &add($h0,$cc,$cc); &FR($h0); ### wait 8
- &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
- &add($l1,$cc,$l1); ### wait 8
- &st($l0,&QWPw(0,$rp)); &FR($l0);
- &cmpult($l1,$cc,$cc); ### wait 8
- &muh($a3,$word,($h3)=&NR(1)); &FR($a3);
- &add($h1,$cc,$cc); &FR($h1);
- &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
- &add($l2,$cc,$l2);
- &st($l1,&QWPw(1,$rp)); &FR($l1);
- &cmpult($l2,$cc,$cc);
- &add($h2,$cc,$cc); &FR($h2);
- &sub($count,4,$count); # count-=4
- &st($l2,&QWPw(2,$rp)); &FR($l2);
- &add($l3,$cc,$l3);
- &cmpult($l3,$cc,$cc);
- &add($bp,4*$QWS,$bp); # count+=4
- &add($h3,$cc,$cc); &FR($h3);
- &add($ap,4*$QWS,$ap); # count+=4
- &st($l3,&QWPw(3,$rp)); &FR($l3);
- &add($rp,4*$QWS,$rp); # count+=4
- ###
- &blt($count,&label("finish"));
- ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
- &br(&label("finish"));
-##################################################
-
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- ###
- ###
- ###
- &muh($a0,$word,($h0)=&NR(1));
- ### Wait 8 for next mul issue
- &mul($a0,$word,($l0)=&NR(1)); &FR($a0)
- &add($ap,$QWS,$ap);
- ### Loose 12 until result is available
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &add($l0,$cc,$l0);
- ###
- &st($l0,&QWPw(-1,$rp)); &FR($l0);
- &cmpult($l0,$cc,$cc);
- &add($h0,$cc,$cc); &FR($h0);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_add_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
- $word=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- ###
- &blt($count,&label("finish"));
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- &ld(($r0)=&NR(1),&QWPw(0,$rp));
- &ld(($a1)=&NR(1),&QWPw(1,$ap));
- &muh($a0,$word,($h0)=&NR(1));
- &ld(($r1)=&NR(1),&QWPw(1,$rp));
- &ld(($a2)=&NR(1),&QWPw(2,$ap));
- ###
- &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
- &ld(($r2)=&NR(1),&QWPw(2,$rp));
- &muh($a1,$word,($h1)=&NR(1));
- &ld(($a3)=&NR(1),&QWPw(3,$ap));
- &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
- &ld(($r3)=&NR(1),&QWPw(3,$rp));
- &add($r0,$l0,$r0);
- &add($r1,$l1,$r1);
- &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
- &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1);
- &muh($a2,$word,($h2)=&NR(1));
- &add($r0,$cc,$r0);
- &add($h0,$t0,$h0); &FR($t0);
- &cmpult($r0,$cc,$cc);
- &add($h1,$t1,$h1); &FR($t1);
- &add($h0,$cc,$cc); &FR($h0);
- &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
- &add($r1,$cc,$r1);
- &cmpult($r1,$cc,$cc);
- &add($r2,$l2,$r2);
- &add($h1,$cc,$cc); &FR($h1);
- &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2);
- &muh($a3,$word,($h3)=&NR(1));
- &add($r2,$cc,$r2);
- &st($r0,&QWPw(0,$rp)); &FR($r0);
- &add($h2,$t2,$h2); &FR($t2);
- &st($r1,&QWPw(1,$rp)); &FR($r1);
- &cmpult($r2,$cc,$cc);
- &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
- &add($h2,$cc,$cc); &FR($h2);
- &st($r2,&QWPw(2,$rp)); &FR($r2);
- &sub($count,4,$count); # count-=4
- &add($rp,4*$QWS,$rp); # count+=4
- &add($r3,$l3,$r3);
- &add($ap,4*$QWS,$ap); # count+=4
- &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3);
- &add($r3,$cc,$r3);
- &add($h3,$t3,$h3); &FR($t3);
- &cmpult($r3,$cc,$cc);
- &st($r3,&QWPw(-1,$rp)); &FR($r3);
- &add($h3,$cc,$cc); &FR($h3);
-
- ###
- &blt($count,&label("finish"));
- &ld(($a0)=&NR(1),&QWPw(0,$ap));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
- ###
- ###
- &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
- ### wait 8
- &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
- &add($rp,$QWS,$rp);
- &add($ap,$QWS,$ap);
- &sub($count,1,$count);
- ### wait 3 until l0 is available
- &add($r0,$l0,$r0);
- ###
- &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
- &add($r0,$cc,$r0);
- &add($h0,$t0,$h0); &FR($t0);
- &cmpult($r0,$cc,$cc);
- &add($h0,$cc,$cc); &FR($h0);
-
- &st($r0,&QWPw(-1,$rp)); &FR($r0);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-# upto
-
-sub mul_add_c
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &add($t1,$h1,$h1); &FR($t1);
- &add($c1,$h1,$c1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub bn_mul_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &mul($a[0],$b[0],($r00)=&NR(1));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &muh($a[0],$b[0],($r01)=&NR(1));
- &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
- &mul($a[0],$b[1],($r02)=&NR(1));
-
- ($R,$H1,$H2)=&NR(3);
-
- &st($r00,&QWPw(0,$rp)); &FR($r00);
-
- &mov("zero",$R);
- &mul($a[1],$b[0],($r03)=&NR(1));
-
- &mov("zero",$H1);
- &mov("zero",$H0);
- &add($R,$r01,$R);
- &muh($a[0],$b[1],($r04)=&NR(1));
- &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
- &add($R,$r02,$R);
- &add($H1,$t01,$H1) &FR($t01);
- &muh($a[1],$b[0],($r05)=&NR(1));
- &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
- &add($R,$r03,$R);
- &add($H2,$t02,$H2) &FR($t02);
- &mul($a[0],$b[2],($r06)=&NR(1));
- &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
- &add($H1,$t03,$H1) &FR($t03);
- &st($R,&QWPw(1,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r04,$R);
- &mov("zero",$H2);
- &mul($a[1],$b[1],($r07)=&NR(1));
- &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
- &add($R,$r05,$R);
- &add($H1,$t04,$H1) &FR($t04);
- &mul($a[2],$b[0],($r08)=&NR(1));
- &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
- &add($R,$r01,$R);
- &add($H2,$t05,$H2) &FR($t05);
- &muh($a[0],$b[2],($r09)=&NR(1));
- &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
- &add($R,$r07,$R);
- &add($H1,$t06,$H1) &FR($t06);
- &muh($a[1],$b[1],($r10)=&NR(1));
- &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
- &add($R,$r08,$R);
- &add($H2,$t07,$H2) &FR($t07);
- &muh($a[2],$b[0],($r11)=&NR(1));
- &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
- &add($H1,$t08,$H1) &FR($t08);
- &st($R,&QWPw(2,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r09,$R);
- &mov("zero",$H2);
- &mul($a[0],$b[3],($r12)=&NR(1));
- &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
- &add($R,$r10,$R);
- &add($H1,$t09,$H1) &FR($t09);
- &mul($a[1],$b[2],($r13)=&NR(1));
- &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
- &add($R,$r11,$R);
- &add($H1,$t10,$H1) &FR($t10);
- &mul($a[2],$b[1],($r14)=&NR(1));
- &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
- &add($R,$r12,$R);
- &add($H1,$t11,$H1) &FR($t11);
- &mul($a[3],$b[0],($r15)=&NR(1));
- &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
- &add($R,$r13,$R);
- &add($H1,$t12,$H1) &FR($t12);
- &muh($a[0],$b[3],($r16)=&NR(1));
- &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
- &add($R,$r14,$R);
- &add($H1,$t13,$H1) &FR($t13);
- &muh($a[1],$b[2],($r17)=&NR(1));
- &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
- &add($R,$r15,$R);
- &add($H1,$t14,$H1) &FR($t14);
- &muh($a[2],$b[1],($r18)=&NR(1));
- &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
- &add($H1,$t15,$H1) &FR($t15);
- &st($R,&QWPw(3,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r16,$R);
- &mov("zero",$H2);
- &muh($a[3],$b[0],($r19)=&NR(1));
- &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
- &add($R,$r17,$R);
- &add($H1,$t16,$H1) &FR($t16);
- &mul($a[1],$b[3],($r20)=&NR(1));
- &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
- &add($R,$r18,$R);
- &add($H1,$t17,$H1) &FR($t17);
- &mul($a[2],$b[2],($r21)=&NR(1));
- &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
- &add($R,$r19,$R);
- &add($H1,$t18,$H1) &FR($t18);
- &mul($a[3],$b[1],($r22)=&NR(1));
- &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
- &add($R,$r20,$R);
- &add($H1,$t19,$H1) &FR($t19);
- &muh($a[1],$b[3],($r23)=&NR(1));
- &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
- &add($R,$r21,$R);
- &add($H1,$t20,$H1) &FR($t20);
- &muh($a[2],$b[2],($r24)=&NR(1));
- &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
- &add($R,$r22,$R);
- &add($H1,$t21,$H1) &FR($t21);
- &muh($a[3],$b[1],($r25)=&NR(1));
- &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
- &add($H1,$t22,$H1) &FR($t22);
- &st($R,&QWPw(4,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r23,$R);
- &mov("zero",$H2);
- &mul($a[2],$b[3],($r26)=&NR(1));
- &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
- &add($R,$r24,$R);
- &add($H1,$t23,$H1) &FR($t23);
- &mul($a[3],$b[2],($r27)=&NR(1));
- &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
- &add($R,$r25,$R);
- &add($H1,$t24,$H1) &FR($t24);
- &muh($a[2],$b[3],($r28)=&NR(1));
- &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
- &add($R,$r26,$R);
- &add($H1,$t25,$H1) &FR($t25);
- &muh($a[3],$b[2],($r29)=&NR(1));
- &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
- &add($R,$r27,$R);
- &add($H1,$t26,$H1) &FR($t26);
- &mul($a[3],$b[3],($r30)=&NR(1));
- &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
- &add($H1,$t27,$H1) &FR($t27);
- &st($R,&QWPw(5,$rp));
- &add($H1,$H2,$R);
-
- &mov("zero",$H1);
- &add($R,$r28,$R);
- &mov("zero",$H2);
- &muh($a[3],$b[3],($r31)=&NR(1));
- &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
- &add($R,$r29,$R);
- &add($H1,$t28,$H1) &FR($t28);
- ############
- &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
- &add($R,$r30,$R);
- &add($H1,$t29,$H1) &FR($t29);
- ############
- &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
- &add($H1,$t30,$H1) &FR($t30);
- &st($R,&QWPw(6,$rp));
- &add($H1,$H2,$R);
-
- &add($R,$r31,$R); &FR($r31);
- &st($R,&QWPw(7,$rp));
-
- &FR($R,$H1,$H2);
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub mul_add_c
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
-print STDERR "count=$cnt\n"; $cnt++;
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &add($t1,$h1,$h1); &FR($t1);
- &add($c1,$h1,$c1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub bn_mul_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
- &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
-
- ($c0,$c1,$c2)=&NR(3);
- &mov("zero",$c2);
- &mul($a[0],$b[0],$c0);
- &muh($a[0],$b[0],$c1);
- &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
- &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
- &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
- &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
- &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
- &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
- &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
- &st($c0,&QWPw(6,$rp));
- &st($c1,&QWPw(7,$rp));
-
- &FR($c0,$c1,$c2);
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_mul_comba8
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(3);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
-
- &stack_push(2);
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($b[0])=&NR(1),&QWPw(0,$bp));
- &st($reg_s0,&swtmp(0)); &FR($reg_s0);
- &st($reg_s1,&swtmp(1)); &FR($reg_s1);
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($b[1])=&NR(1),&QWPw(1,$bp));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($b[2])=&NR(1),&QWPw(2,$bp));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &ld(($b[3])=&NR(1),&QWPw(3,$bp));
- &ld(($a[4])=&NR(1),&QWPw(1,$ap));
- &ld(($b[4])=&NR(1),&QWPw(1,$bp));
- &ld(($a[5])=&NR(1),&QWPw(1,$ap));
- &ld(($b[5])=&NR(1),&QWPw(1,$bp));
- &ld(($a[6])=&NR(1),&QWPw(1,$ap));
- &ld(($b[6])=&NR(1),&QWPw(1,$bp));
- &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
- &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
-
- ($c0,$c1,$c2)=&NR(3);
- &mov("zero",$c2);
- &mul($a[0],$b[0],$c0);
- &muh($a[0],$b[0],$c1);
- &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
- &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
- &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
- &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
- &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
- &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
- &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
- &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
- &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
- &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
- &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
- &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
- &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
- &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
- &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
- &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
- &st($c0,&QWPw(14,$rp));
- &st($c1,&QWPw(15,$rp));
-
- &FR($c0,$c1,$c2);
-
- &ld($reg_s0,&swtmp(0));
- &ld($reg_s1,&swtmp(1));
- &stack_pop(2);
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sqr_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r,$couny);
-
- &init_pool(3);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $count=&wparam(2);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &br(&label("finish"));
- &blt($count,&label("finish"));
-
- ($a0,$r0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($r0,&QWPw(0,$rp));
-
-$a=<<'EOF';
-##########################################################
- &set_label("loop");
-
- ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
- ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
- ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
- ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
- ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
- ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
-
- ($o0,$t0)=&NR(2);
- &add($a0,$b0,$o0);
- &cmpult($o0,$b0,$t0);
- &add($o0,$cc,$o0);
- &cmpult($o0,$cc,$cc);
- &add($cc,$t0,$cc); &FR($t0);
-
- ($t1,$o1)=&NR(2);
-
- &add($a1,$b1,$o1); &FR($a1);
- &cmpult($o1,$b1,$t1); &FR($b1);
- &add($o1,$cc,$o1);
- &cmpult($o1,$cc,$cc);
- &add($cc,$t1,$cc); &FR($t1);
-
- ($t2,$o2)=&NR(2);
-
- &add($a2,$b2,$o2); &FR($a2);
- &cmpult($o2,$b2,$t2); &FR($b2);
- &add($o2,$cc,$o2);
- &cmpult($o2,$cc,$cc);
- &add($cc,$t2,$cc); &FR($t2);
-
- ($t3,$o3)=&NR(2);
-
- &add($a3,$b3,$o3); &FR($a3);
- &cmpult($o3,$b3,$t3); &FR($b3);
- &add($o3,$cc,$o3);
- &cmpult($o3,$cc,$cc);
- &add($cc,$t3,$cc); &FR($t3);
-
- &st($o0,&QWPw(0,$rp)); &FR($o0);
- &st($o1,&QWPw(0,$rp)); &FR($o1);
- &st($o2,&QWPw(0,$rp)); &FR($o2);
- &st($o3,&QWPw(0,$rp)); &FR($o3);
-
- &sub($count,4,$count); # count-=4
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-EOF
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
- &mul($a0,$a0,($l0)=&NR(1));
- &add($ap,$QWS,$ap);
- &add($rp,2*$QWS,$rp);
- &sub($count,1,$count);
- &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
- &st($l0,&QWPw(-2,$rp)); &FR($l0);
- &st($h0,&QWPw(-1,$rp)); &FR($h0);
-
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub sqr_add_c
- {
- local($a,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$a,($l1)=&NR(1));
- &muh($a,$a,($h1)=&NR(1));
- &add($c0,$l1,$c0);
- &add($c1,$h1,$c1);
- &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
- &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
- &add($c1,$t1,$c1); &FR($t1);
- &add($c2,$t2,$c2); &FR($t2);
- }
-
-sub sqr_add_c2
- {
- local($a,$b,$c0,$c1,$c2)=@_;
- local($l1,$h1,$t1,$t2);
-
- &mul($a,$b,($l1)=&NR(1));
- &muh($a,$b,($h1)=&NR(1));
- &cmplt($l1,"zero",($lc1)=&NR(1));
- &cmplt($h1,"zero",($hc1)=&NR(1));
- &add($l1,$l1,$l1);
- &add($h1,$h1,$h1);
- &add($h1,$lc1,$h1); &FR($lc1);
- &add($c2,$hc1,$c2); &FR($hc1);
-
- &add($c0,$l1,$c0);
- &add($c1,$h1,$c1);
- &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
- &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
-
- &add($c1,$lc1,$c1); &FR($lc1);
- &add($c2,$hc1,$c2); &FR($hc1);
- }
-
-
-sub bn_sqr_comba4
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(2);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
-
- ($c0,$c1,$c2)=&NR(3);
-
- &mov("zero",$c2);
- &mul($a[0],$a[0],$c0);
- &muh($a[0],$a[0],$c1);
- &st($c0,&QWPw(0,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[3],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp));
- &st($c1,&QWPw(7,$rp));
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sqr_comba8
- {
- local($name)=@_;
- local(@a,@b,$r,$c0,$c1,$c2);
-
- $cnt=1;
- &init_pool(2);
-
- $rp=&wparam(0);
- $ap=&wparam(1);
-
- &function_begin($name,"");
-
- &comment("");
-
- &ld(($a[0])=&NR(1),&QWPw(0,$ap));
- &ld(($a[1])=&NR(1),&QWPw(1,$ap));
- &ld(($a[2])=&NR(1),&QWPw(2,$ap));
- &ld(($a[3])=&NR(1),&QWPw(3,$ap));
- &ld(($a[4])=&NR(1),&QWPw(4,$ap));
- &ld(($a[5])=&NR(1),&QWPw(5,$ap));
- &ld(($a[6])=&NR(1),&QWPw(6,$ap));
- &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
-
- ($c0,$c1,$c2)=&NR(3);
-
- &mov("zero",$c2);
- &mul($a[0],$a[0],$c0);
- &muh($a[0],$a[0],$c1);
- &st($c0,&QWPw(0,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(1,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(2,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(3,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(4,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(5,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(6,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
- &st($c0,&QWPw(7,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
- &st($c0,&QWPw(8,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
- &st($c0,&QWPw(9,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[5],$c0,$c1,$c2);
- &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
- &st($c0,&QWPw(10,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
- &st($c0,&QWPw(11,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[6],$c0,$c1,$c2);
- &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
- &st($c0,&QWPw(12,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
- &st($c0,&QWPw(13,$rp));
- ($c0,$c1,$c2)=($c1,$c2,$c0);
- &mov("zero",$c2);
-
- &sqr_add_c($a[7],$c0,$c1,$c2);
- &st($c0,&QWPw(14,$rp));
- &st($c1,&QWPw(15,$rp));
-
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# alpha assember
-
-sub bn_sub_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- &init_pool(4);
- ($cc)=GR("r0");
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &blt($count,&label("finish"));
-
- ($a0,$b0)=&NR(2);
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
-
-##########################################################
- &set_label("loop");
-
- ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
- &ld($a1,&QWPw(1,$ap));
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &ld($b1,&QWPw(1,$bp));
- &sub($a0,$b0,$a0); # do the subtract
- &ld($a2,&QWPw(2,$ap));
- &cmpult($a0,$cc,$b0); # will we borrow?
- &ld($b2,&QWPw(2,$bp));
- &sub($a0,$cc,$o0); # will we borrow?
- &ld($a3,&QWPw(3,$ap));
- &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
-
- &cmpult($a1,$b1,$t1); # will we borrow?
- &sub($a1,$b1,$a1); # do the subtract
- &ld($b3,&QWPw(3,$bp));
- &cmpult($a1,$cc,$b1); # will we borrow?
- &sub($a1,$cc,$o1); # will we borrow?
- &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
-
- &cmpult($a2,$b2,$tmp); # will we borrow?
- &sub($a2,$b2,$a2); # do the subtract
- &st($o0,&QWPw(0,$rp)); &FR($o0); # save
- &cmpult($a2,$cc,$b2); # will we borrow?
- &sub($a2,$cc,$o2); # will we borrow?
- &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
-
- &cmpult($a3,$b3,$t3); # will we borrow?
- &sub($a3,$b3,$a3); # do the subtract
- &st($o1,&QWPw(1,$rp)); &FR($o1);
- &cmpult($a3,$cc,$b3); # will we borrow?
- &sub($a3,$cc,$o3); # will we borrow?
- &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
-
- &st($o2,&QWPw(2,$rp)); &FR($o2);
- &sub($count,4,$count); # count-=4
- &st($o3,&QWPw(3,$rp)); &FR($o3);
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &sub($a0,$b0,$a0); # do the subtract
- &cmpult($a0,$cc,$b0); # will we borrow?
- &sub($a0,$cc,$a0); # will we borrow?
- &st($a0,&QWPw(0,$rp)); # save
- &add($b0,$tmp,$cc); # add the borrows
-
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &FR($a0,$b0);
- &set_label("end");
- &function_end($name);
-
- &fin_pool;
- }
-
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-# I have this in perl so I can use more usefull register names and then convert
-# them into alpha registers.
-#
-
-$d=&data();
-$d =~ s/CC/0/g;
-$d =~ s/R1/1/g;
-$d =~ s/R2/2/g;
-$d =~ s/R3/3/g;
-$d =~ s/R4/4/g;
-$d =~ s/L1/5/g;
-$d =~ s/L2/6/g;
-$d =~ s/L3/7/g;
-$d =~ s/L4/8/g;
-$d =~ s/O1/22/g;
-$d =~ s/O2/23/g;
-$d =~ s/O3/24/g;
-$d =~ s/O4/25/g;
-$d =~ s/A1/20/g;
-$d =~ s/A2/21/g;
-$d =~ s/A3/27/g;
-$d =~ s/A4/28/g;
-if (0){
-}
-
-print $d;
-
-sub data
- {
- local($data)=<<'EOF';
-
- # DEC Alpha assember
- # The bn_div_words is actually gcc output but the other parts are hand done.
- # Thanks to tzeruch@ceddec.com for sending me the gcc output for
- # bn_div_words.
- # I've gone back and re-done most of routines.
- # The key thing to remeber for the 164 CPU is that while a
- # multiply operation takes 8 cycles, another one can only be issued
- # after 4 cycles have elapsed. I've done modification to help
- # improve this. Also, normally, a ld instruction will not be available
- # for about 3 cycles.
- .file 1 "bn_asm.c"
- .set noat
-gcc2_compiled.:
-__gnu_compiled_c:
- .text
- .align 3
- .globl bn_mul_add_words
- .ent bn_mul_add_words
-bn_mul_add_words:
-bn_mul_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$CC
- blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
- ldq $A1,0($17) # 1 1
- ldq $R1,0($16) # 1 1
- .align 3
-$42:
- mulq $A1,$19,$L1 # 1 2 1 ######
- ldq $A2,8($17) # 2 1
- ldq $R2,8($16) # 2 1
- umulh $A1,$19,$A1 # 1 2 ######
- ldq $A3,16($17) # 3 1
- ldq $R3,16($16) # 3 1
- mulq $A2,$19,$L2 # 2 2 1 ######
- ldq $A4,24($17) # 4 1
- addq $R1,$L1,$R1 # 1 2 2
- ldq $R4,24($16) # 4 1
- umulh $A2,$19,$A2 # 2 2 ######
- cmpult $R1,$L1,$O1 # 1 2 3 1
- addq $A1,$O1,$A1 # 1 3 1
- addq $R1,$CC,$R1 # 1 2 3 1
- mulq $A3,$19,$L3 # 3 2 1 ######
- cmpult $R1,$CC,$CC # 1 2 3 2
- addq $R2,$L2,$R2 # 2 2 2
- addq $A1,$CC,$CC # 1 3 2
- cmpult $R2,$L2,$O2 # 2 2 3 1
- addq $A2,$O2,$A2 # 2 3 1
- umulh $A3,$19,$A3 # 3 2 ######
- addq $R2,$CC,$R2 # 2 2 3 1
- cmpult $R2,$CC,$CC # 2 2 3 2
- subq $18,4,$18
- mulq $A4,$19,$L4 # 4 2 1 ######
- addq $A2,$CC,$CC # 2 3 2
- addq $R3,$L3,$R3 # 3 2 2
- addq $16,32,$16
- cmpult $R3,$L3,$O3 # 3 2 3 1
- stq $R1,-32($16) # 1 2 4
- umulh $A4,$19,$A4 # 4 2 ######
- addq $A3,$O3,$A3 # 3 3 1
- addq $R3,$CC,$R3 # 3 2 3 1
- stq $R2,-24($16) # 2 2 4
- cmpult $R3,$CC,$CC # 3 2 3 2
- stq $R3,-16($16) # 3 2 4
- addq $R4,$L4,$R4 # 4 2 2
- addq $A3,$CC,$CC # 3 3 2
- cmpult $R4,$L4,$O4 # 4 2 3 1
- addq $17,32,$17
- addq $A4,$O4,$A4 # 4 3 1
- addq $R4,$CC,$R4 # 4 2 3 1
- cmpult $R4,$CC,$CC # 4 2 3 2
- stq $R4,-8($16) # 4 2 4
- addq $A4,$CC,$CC # 4 3 2
- blt $18,$43
-
- ldq $A1,0($17) # 1 1
- ldq $R1,0($16) # 1 1
-
- br $42
-
- .align 4
-$45:
- ldq $A1,0($17) # 4 1
- ldq $R1,0($16) # 4 1
- mulq $A1,$19,$L1 # 4 2 1
- subq $18,1,$18
- addq $16,8,$16
- addq $17,8,$17
- umulh $A1,$19,$A1 # 4 2
- addq $R1,$L1,$R1 # 4 2 2
- cmpult $R1,$L1,$O1 # 4 2 3 1
- addq $A1,$O1,$A1 # 4 3 1
- addq $R1,$CC,$R1 # 4 2 3 1
- cmpult $R1,$CC,$CC # 4 2 3 2
- addq $A1,$CC,$CC # 4 3 2
- stq $R1,-8($16) # 4 2 4
- bgt $18,$45
- ret $31,($26),1 # else exit
-
- .align 4
-$43:
- addq $18,4,$18
- bgt $18,$45 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_add_words
- .align 3
- .globl bn_mul_words
- .ent bn_mul_words
-bn_mul_words:
-bn_mul_words..ng:
- .frame $30,0,$26,0
- .prologue 0
- .align 5
- subq $18,4,$18
- bis $31,$31,$CC
- blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
- ldq $A1,0($17) # 1 1
- .align 3
-$142:
-
- mulq $A1,$19,$L1 # 1 2 1 #####
- ldq $A2,8($17) # 2 1
- ldq $A3,16($17) # 3 1
- umulh $A1,$19,$A1 # 1 2 #####
- ldq $A4,24($17) # 4 1
- mulq $A2,$19,$L2 # 2 2 1 #####
- addq $L1,$CC,$L1 # 1 2 3 1
- subq $18,4,$18
- cmpult $L1,$CC,$CC # 1 2 3 2
- umulh $A2,$19,$A2 # 2 2 #####
- addq $A1,$CC,$CC # 1 3 2
- addq $17,32,$17
- addq $L2,$CC,$L2 # 2 2 3 1
- mulq $A3,$19,$L3 # 3 2 1 #####
- cmpult $L2,$CC,$CC # 2 2 3 2
- addq $A2,$CC,$CC # 2 3 2
- addq $16,32,$16
- umulh $A3,$19,$A3 # 3 2 #####
- stq $L1,-32($16) # 1 2 4
- mulq $A4,$19,$L4 # 4 2 1 #####
- addq $L3,$CC,$L3 # 3 2 3 1
- stq $L2,-24($16) # 2 2 4
- cmpult $L3,$CC,$CC # 3 2 3 2
- umulh $A4,$19,$A4 # 4 2 #####
- addq $A3,$CC,$CC # 3 3 2
- stq $L3,-16($16) # 3 2 4
- addq $L4,$CC,$L4 # 4 2 3 1
- cmpult $L4,$CC,$CC # 4 2 3 2
-
- addq $A4,$CC,$CC # 4 3 2
-
- stq $L4,-8($16) # 4 2 4
-
- blt $18,$143
-
- ldq $A1,0($17) # 1 1
-
- br $142
-
- .align 4
-$145:
- ldq $A1,0($17) # 4 1
- mulq $A1,$19,$L1 # 4 2 1
- subq $18,1,$18
- umulh $A1,$19,$A1 # 4 2
- addq $L1,$CC,$L1 # 4 2 3 1
- addq $16,8,$16
- cmpult $L1,$CC,$CC # 4 2 3 2
- addq $17,8,$17
- addq $A1,$CC,$CC # 4 3 2
- stq $L1,-8($16) # 4 2 4
-
- bgt $18,$145
- ret $31,($26),1 # else exit
-
- .align 4
-$143:
- addq $18,4,$18
- bgt $18,$145 # goto tail code
- ret $31,($26),1 # else exit
-
- .end bn_mul_words
- .align 3
- .globl bn_sqr_words
- .ent bn_sqr_words
-bn_sqr_words:
-bn_sqr_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $18,4,$18
- blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
- ldq $A1,0($17) # 1 1
- .align 3
-$542:
- mulq $A1,$A1,$L1 ######
- ldq $A2,8($17) # 1 1
- subq $18,4
- umulh $A1,$A1,$R1 ######
- ldq $A3,16($17) # 1 1
- mulq $A2,$A2,$L2 ######
- ldq $A4,24($17) # 1 1
- stq $L1,0($16) # r[0]
- umulh $A2,$A2,$R2 ######
- stq $R1,8($16) # r[1]
- mulq $A3,$A3,$L3 ######
- stq $L2,16($16) # r[0]
- umulh $A3,$A3,$R3 ######
- stq $R2,24($16) # r[1]
- mulq $A4,$A4,$L4 ######
- stq $L3,32($16) # r[0]
- umulh $A4,$A4,$R4 ######
- stq $R3,40($16) # r[1]
-
- addq $16,64,$16
- addq $17,32,$17
- stq $L4,-16($16) # r[0]
- stq $R4,-8($16) # r[1]
-
- blt $18,$543
- ldq $A1,0($17) # 1 1
- br $542
-
-$442:
- ldq $A1,0($17) # a[0]
- mulq $A1,$A1,$L1 # a[0]*w low part r2
- addq $16,16,$16
- addq $17,8,$17
- subq $18,1,$18
- umulh $A1,$A1,$R1 # a[0]*w high part r3
- stq $L1,-16($16) # r[0]
- stq $R1,-8($16) # r[1]
-
- bgt $18,$442
- ret $31,($26),1 # else exit
-
- .align 4
-$543:
- addq $18,4,$18
- bgt $18,$442 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_sqr_words
-
- .align 3
- .globl bn_add_words
- .ent bn_add_words
-bn_add_words:
-bn_add_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19,4,$19
- bis $31,$31,$CC # carry = 0
- blt $19,$900
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- .align 3
-$901:
- addq $R1,$L1,$R1 # r=a+b;
- ldq $L2,8($17) # a[1]
- cmpult $R1,$L1,$O1 # did we overflow?
- ldq $R2,8($18) # b[1]
- addq $R1,$CC,$R1 # c+= overflow
- ldq $L3,16($17) # a[2]
- cmpult $R1,$CC,$CC # overflow?
- ldq $R3,16($18) # b[2]
- addq $CC,$O1,$CC
- ldq $L4,24($17) # a[3]
- addq $R2,$L2,$R2 # r=a+b;
- ldq $R4,24($18) # b[3]
- cmpult $R2,$L2,$O2 # did we overflow?
- addq $R3,$L3,$R3 # r=a+b;
- addq $R2,$CC,$R2 # c+= overflow
- cmpult $R3,$L3,$O3 # did we overflow?
- cmpult $R2,$CC,$CC # overflow?
- addq $R4,$L4,$R4 # r=a+b;
- addq $CC,$O2,$CC
- cmpult $R4,$L4,$O4 # did we overflow?
- addq $R3,$CC,$R3 # c+= overflow
- stq $R1,0($16) # r[0]=c
- cmpult $R3,$CC,$CC # overflow?
- stq $R2,8($16) # r[1]=c
- addq $CC,$O3,$CC
- stq $R3,16($16) # r[2]=c
- addq $R4,$CC,$R4 # c+= overflow
- subq $19,4,$19 # loop--
- cmpult $R4,$CC,$CC # overflow?
- addq $17,32,$17 # a++
- addq $CC,$O4,$CC
- stq $R4,24($16) # r[3]=c
- addq $18,32,$18 # b++
- addq $16,32,$16 # r++
-
- blt $19,$900
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- br $901
- .align 4
-$945:
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- addq $R1,$L1,$R1 # r=a+b;
- subq $19,1,$19 # loop--
- addq $R1,$CC,$R1 # c+= overflow
- addq $17,8,$17 # a++
- cmpult $R1,$L1,$O1 # did we overflow?
- cmpult $R1,$CC,$CC # overflow?
- addq $18,8,$18 # b++
- stq $R1,0($16) # r[0]=c
- addq $CC,$O1,$CC
- addq $16,8,$16 # r++
-
- bgt $19,$945
- ret $31,($26),1 # else exit
-
-$900:
- addq $19,4,$19
- bgt $19,$945 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_add_words
-
- .align 3
- .globl bn_sub_words
- .ent bn_sub_words
-bn_sub_words:
-bn_sub_words..ng:
- .frame $30,0,$26,0
- .prologue 0
-
- subq $19,4,$19
- bis $31,$31,$CC # carry = 0
- br $800
- blt $19,$800
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- .align 3
-$801:
- addq $R1,$L1,$R1 # r=a+b;
- ldq $L2,8($17) # a[1]
- cmpult $R1,$L1,$O1 # did we overflow?
- ldq $R2,8($18) # b[1]
- addq $R1,$CC,$R1 # c+= overflow
- ldq $L3,16($17) # a[2]
- cmpult $R1,$CC,$CC # overflow?
- ldq $R3,16($18) # b[2]
- addq $CC,$O1,$CC
- ldq $L4,24($17) # a[3]
- addq $R2,$L2,$R2 # r=a+b;
- ldq $R4,24($18) # b[3]
- cmpult $R2,$L2,$O2 # did we overflow?
- addq $R3,$L3,$R3 # r=a+b;
- addq $R2,$CC,$R2 # c+= overflow
- cmpult $R3,$L3,$O3 # did we overflow?
- cmpult $R2,$CC,$CC # overflow?
- addq $R4,$L4,$R4 # r=a+b;
- addq $CC,$O2,$CC
- cmpult $R4,$L4,$O4 # did we overflow?
- addq $R3,$CC,$R3 # c+= overflow
- stq $R1,0($16) # r[0]=c
- cmpult $R3,$CC,$CC # overflow?
- stq $R2,8($16) # r[1]=c
- addq $CC,$O3,$CC
- stq $R3,16($16) # r[2]=c
- addq $R4,$CC,$R4 # c+= overflow
- subq $19,4,$19 # loop--
- cmpult $R4,$CC,$CC # overflow?
- addq $17,32,$17 # a++
- addq $CC,$O4,$CC
- stq $R4,24($16) # r[3]=c
- addq $18,32,$18 # b++
- addq $16,32,$16 # r++
-
- blt $19,$800
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- br $801
- .align 4
-$845:
- ldq $L1,0($17) # a[0]
- ldq $R1,0($18) # b[1]
- cmpult $L1,$R1,$O1 # will we borrow?
- subq $L1,$R1,$R1 # r=a-b;
- subq $19,1,$19 # loop--
- cmpult $R1,$CC,$O2 # will we borrow?
- subq $R1,$CC,$R1 # c+= overflow
- addq $17,8,$17 # a++
- addq $18,8,$18 # b++
- stq $R1,0($16) # r[0]=c
- addq $O2,$O1,$CC
- addq $16,8,$16 # r++
-
- bgt $19,$845
- ret $31,($26),1 # else exit
-
-$800:
- addq $19,4,$19
- bgt $19,$845 # goto tail code
- ret $31,($26),1 # else exit
- .end bn_sub_words
-
- #
- # What follows was taken directly from the C compiler with a few
- # hacks to redo the lables.
- #
-.text
- .align 3
- .globl bn_div_words
- .ent bn_div_words
-bn_div_words:
- ldgp $29,0($27)
-bn_div_words..ng:
- lda $30,-48($30)
- .frame $30,48,$26,0
- stq $26,0($30)
- stq $9,8($30)
- stq $10,16($30)
- stq $11,24($30)
- stq $12,32($30)
- stq $13,40($30)
- .mask 0x4003e00,-48
- .prologue 1
- bis $16,$16,$9
- bis $17,$17,$10
- bis $18,$18,$11
- bis $31,$31,$13
- bis $31,2,$12
- bne $11,$119
- lda $0,-1
- br $31,$136
- .align 4
-$119:
- bis $11,$11,$16
- jsr $26,BN_num_bits_word
- ldgp $29,0($26)
- subq $0,64,$1
- beq $1,$120
- bis $31,1,$1
- sll $1,$0,$1
- cmpule $9,$1,$1
- bne $1,$120
- # lda $16,_IO_stderr_
- # lda $17,$C32
- # bis $0,$0,$18
- # jsr $26,fprintf
- # ldgp $29,0($26)
- jsr $26,abort
- ldgp $29,0($26)
- .align 4
-$120:
- bis $31,64,$3
- cmpult $9,$11,$2
- subq $3,$0,$1
- addl $1,$31,$0
- subq $9,$11,$1
- cmoveq $2,$1,$9
- beq $0,$122
- zapnot $0,15,$2
- subq $3,$0,$1
- sll $11,$2,$11
- sll $9,$2,$3
- srl $10,$1,$1
- sll $10,$2,$10
- bis $3,$1,$9
-$122:
- srl $11,32,$5
- zapnot $11,15,$6
- lda $7,-1
- .align 5
-$123:
- srl $9,32,$1
- subq $1,$5,$1
- bne $1,$126
- zapnot $7,15,$27
- br $31,$127
- .align 4
-$126:
- bis $9,$9,$24
- bis $5,$5,$25
- divqu $24,$25,$27
-$127:
- srl $10,32,$4
- .align 5
-$128:
- mulq $27,$5,$1
- subq $9,$1,$3
- zapnot $3,240,$1
- bne $1,$129
- mulq $6,$27,$2
- sll $3,32,$1
- addq $1,$4,$1
- cmpule $2,$1,$2
- bne $2,$129
- subq $27,1,$27
- br $31,$128
- .align 4
-$129:
- mulq $27,$6,$1
- mulq $27,$5,$4
- srl $1,32,$3
- sll $1,32,$1
- addq $4,$3,$4
- cmpult $10,$1,$2
- subq $10,$1,$10
- addq $2,$4,$2
- cmpult $9,$2,$1
- bis $2,$2,$4
- beq $1,$134
- addq $9,$11,$9
- subq $27,1,$27
-$134:
- subl $12,1,$12
- subq $9,$4,$9
- beq $12,$124
- sll $27,32,$13
- sll $9,32,$2
- srl $10,32,$1
- sll $10,32,$10
- bis $2,$1,$9
- br $31,$123
- .align 4
-$124:
- bis $13,$27,$0
-$136:
- ldq $26,0($30)
- ldq $9,8($30)
- ldq $10,16($30)
- ldq $11,24($30)
- ldq $12,32($30)
- ldq $13,40($30)
- addq $30,48,$30
- ret $31,($26),1
- .end bn_div_words
-EOF
- return($data);
- }
-
+++ /dev/null
-#!/usr/local/bin/perl
-# I have this in perl so I can use more usefull register names and then convert
-# them into alpha registers.
-#
-
-push(@INC,"perlasm","../../perlasm");
-require "alpha.pl";
-require "alpha/mul_add.pl";
-require "alpha/mul.pl";
-require "alpha/sqr.pl";
-require "alpha/add.pl";
-require "alpha/sub.pl";
-require "alpha/mul_c8.pl";
-require "alpha/mul_c4.pl";
-require "alpha/sqr_c4.pl";
-require "alpha/sqr_c8.pl";
-require "alpha/div.pl";
-
-&asm_init($ARGV[0],$0);
-
-&bn_mul_words("bn_mul_words");
-&bn_sqr_words("bn_sqr_words");
-&bn_mul_add_words("bn_mul_add_words");
-&bn_add_words("bn_add_words");
-&bn_sub_words("bn_sub_words");
-&bn_div_words("bn_div_words");
-&bn_mul_comba8("bn_mul_comba8");
-&bn_mul_comba4("bn_mul_comba4");
-&bn_sqr_comba4("bn_sqr_comba4");
-&bn_sqr_comba8("bn_sqr_comba8");
-
-&asm_finish();
-
+++ /dev/null
-#!/usr/local/bin/perl
-# I have this in perl so I can use more usefull register names and then convert
-# them into alpha registers.
-#
-
-push(@INC,"perlasm","../../perlasm");
-require "alpha.pl";
-
-&asm_init($ARGV[0],$0);
-
-print &bn_sub_words("bn_sub_words");
-
-&asm_finish();
-
-sub bn_sub_words
- {
- local($name)=@_;
- local($cc,$a,$b,$r);
-
- $cc="r0";
- $a0="r1"; $b0="r5"; $r0="r9"; $tmp="r13";
- $a1="r2"; $b1="r6"; $r1="r10"; $t1="r14";
- $a2="r3"; $b2="r7"; $r2="r11";
- $a3="r4"; $b3="r8"; $r3="r12"; $t3="r15";
-
- $rp=&wparam(0);
- $ap=&wparam(1);
- $bp=&wparam(2);
- $count=&wparam(3);
-
- &function_begin($name,"");
-
- &comment("");
- &sub($count,4,$count);
- &mov("zero",$cc);
- &blt($count,&label("finish"));
-
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
-
-##########################################################
- &set_label("loop");
-
- &ld($a1,&QWPw(1,$ap));
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &ld($b1,&QWPw(1,$bp));
- &sub($a0,$b0,$a0); # do the subtract
- &ld($a2,&QWPw(2,$ap));
- &cmpult($a0,$cc,$b0); # will we borrow?
- &ld($b2,&QWPw(2,$bp));
- &sub($a0,$cc,$a0); # will we borrow?
- &ld($a3,&QWPw(3,$ap));
- &add($b0,$tmp,$cc); # add the borrows
-
- &cmpult($a1,$b1,$t1); # will we borrow?
- &sub($a1,$b1,$a1); # do the subtract
- &ld($b3,&QWPw(3,$bp));
- &cmpult($a1,$cc,$b1); # will we borrow?
- &sub($a1,$cc,$a1); # will we borrow?
- &add($b1,$t1,$cc); # add the borrows
-
- &cmpult($a2,$b2,$tmp); # will we borrow?
- &sub($a2,$b2,$a2); # do the subtract
- &st($a0,&QWPw(0,$rp)); # save
- &cmpult($a2,$cc,$b2); # will we borrow?
- &sub($a2,$cc,$a2); # will we borrow?
- &add($b2,$tmp,$cc); # add the borrows
-
- &cmpult($a3,$b3,$t3); # will we borrow?
- &sub($a3,$b3,$a3); # do the subtract
- &st($a1,&QWPw(1,$rp)); # save
- &cmpult($a3,$cc,$b3); # will we borrow?
- &sub($a3,$cc,$a3); # will we borrow?
- &add($b3,$t3,$cc); # add the borrows
-
- &st($a2,&QWPw(2,$rp)); # save
- &sub($count,4,$count); # count-=4
- &st($a3,&QWPw(3,$rp)); # save
- &add($ap,4*$QWS,$ap); # count+=4
- &add($bp,4*$QWS,$bp); # count+=4
- &add($rp,4*$QWS,$rp); # count+=4
-
- &blt($count,&label("finish"));
- &ld($a0,&QWPw(0,$ap));
- &ld($b0,&QWPw(0,$bp));
- &br(&label("loop"));
-##################################################
- # Do the last 0..3 words
-
- &set_label("last_loop");
-
- &ld($a0,&QWPw(0,$ap)); # get a
- &ld($b0,&QWPw(0,$bp)); # get b
- &cmpult($a0,$b0,$tmp); # will we borrow?
- &sub($a0,$b0,$a0); # do the subtract
- &cmpult($a0,$cc,$b0); # will we borrow?
- &sub($a0,$cc,$a0); # will we borrow?
- &st($a0,&QWPw(0,$rp)); # save
- &add($b0,$tmp,$cc); # add the borrows
-
- &add($ap,$QWS,$ap);
- &add($bp,$QWS,$bp);
- &add($rp,$QWS,$rp);
- &sub($count,1,$count);
- &bgt($count,&label("last_loop"));
- &function_end_A($name);
-
-######################################################
- &set_label("finish");
- &add($count,4,$count);
- &bgt($count,&label("last_loop"));
-
- &set_label("end");
- &function_end($name);
- }
-
+++ /dev/null
-/* This assember is for R2000/R3000 machines, or higher ones that do
- * no want to do any 64 bit arithmatic.
- * Make sure that the SSLeay bignum library is compiled with
- * THIRTY_TWO_BIT set.
- * This must either be compiled with the system CC, or, if you use GNU gas,
- * cc -E mips1.s|gas -o mips1.o
- */
- .set reorder
- .set noat
-
-#define R1 $1
-#define CC $2
-#define R2 $3
-#define R3 $8
-#define R4 $9
-#define L1 $10
-#define L2 $11
-#define L3 $12
-#define L4 $13
-#define H1 $14
-#define H2 $15
-#define H3 $24
-#define H4 $25
-
-#define P1 $4
-#define P2 $5
-#define P3 $6
-#define P4 $7
-
- .align 2
- .ent bn_mul_add_words
- .globl bn_mul_add_words
-.text
-bn_mul_add_words:
- .frame $sp,0,$31
- .mask 0x00000000,0
- .fmask 0x00000000,0
-
- #blt P3,4,$lab34
-
- subu R1,P3,4
- move CC,$0
- bltz R1,$lab34
-$lab2:
- lw R1,0(P1)
- lw L1,0(P2)
- lw R2,4(P1)
- lw L2,4(P2)
- lw R3,8(P1)
- lw L3,8(P2)
- lw R4,12(P1)
- lw L4,12(P2)
- multu L1,P4
- addu R1,R1,CC
- mflo L1
- sltu CC,R1,CC
- addu R1,R1,L1
- mfhi H1
- sltu L1,R1,L1
- sw R1,0(P1)
- addu CC,CC,L1
- multu L2,P4
- addu CC,H1,CC
- mflo L2
- addu R2,R2,CC
- sltu CC,R2,CC
- mfhi H2
- addu R2,R2,L2
- addu P2,P2,16
- sltu L2,R2,L2
- sw R2,4(P1)
- addu CC,CC,L2
- multu L3,P4
- addu CC,H2,CC
- mflo L3
- addu R3,R3,CC
- sltu CC,R3,CC
- mfhi H3
- addu R3,R3,L3
- addu P1,P1,16
- sltu L3,R3,L3
- sw R3,-8(P1)
- addu CC,CC,L3
- multu L4,P4
- addu CC,H3,CC
- mflo L4
- addu R4,R4,CC
- sltu CC,R4,CC
- mfhi H4
- addu R4,R4,L4
- subu P3,P3,4
- sltu L4,R4,L4
- addu CC,CC,L4
- addu CC,H4,CC
-
- subu R1,P3,4
- sw R4,-4(P1) # delay slot
- bgez R1,$lab2
-
- bleu P3,0,$lab3
- .align 2
-$lab33:
- lw L1,0(P2)
- lw R1,0(P1)
- multu L1,P4
- addu R1,R1,CC
- sltu CC,R1,CC
- addu P1,P1,4
- mflo L1
- mfhi H1
- addu R1,R1,L1
- addu P2,P2,4
- sltu L1,R1,L1
- subu P3,P3,1
- addu CC,CC,L1
- sw R1,-4(P1)
- addu CC,H1,CC
- bgtz P3,$lab33
- j $31
- .align 2
-$lab3:
- j $31
- .align 2
-$lab34:
- bgt P3,0,$lab33
- j $31
- .end bn_mul_add_words
-
- .align 2
- # Program Unit: bn_mul_words
- .ent bn_mul_words
- .globl bn_mul_words
-.text
-bn_mul_words:
- .frame $sp,0,$31
- .mask 0x00000000,0
- .fmask 0x00000000,0
-
- subu P3,P3,4
- move CC,$0
- bltz P3,$lab45
-$lab44:
- lw L1,0(P2)
- lw L2,4(P2)
- lw L3,8(P2)
- lw L4,12(P2)
- multu L1,P4
- subu P3,P3,4
- mflo L1
- mfhi H1
- addu L1,L1,CC
- multu L2,P4
- sltu CC,L1,CC
- sw L1,0(P1)
- addu CC,H1,CC
- mflo L2
- mfhi H2
- addu L2,L2,CC
- multu L3,P4
- sltu CC,L2,CC
- sw L2,4(P1)
- addu CC,H2,CC
- mflo L3
- mfhi H3
- addu L3,L3,CC
- multu L4,P4
- sltu CC,L3,CC
- sw L3,8(P1)
- addu CC,H3,CC
- mflo L4
- mfhi H4
- addu L4,L4,CC
- addu P1,P1,16
- sltu CC,L4,CC
- addu P2,P2,16
- addu CC,H4,CC
- sw L4,-4(P1)
-
- bgez P3,$lab44
- b $lab45
-$lab46:
- lw L1,0(P2)
- addu P1,P1,4
- multu L1,P4
- addu P2,P2,4
- mflo L1
- mfhi H1
- addu L1,L1,CC
- subu P3,P3,1
- sltu CC,L1,CC
- sw L1,-4(P1)
- addu CC,H1,CC
- bgtz P3,$lab46
- j $31
-$lab45:
- addu P3,P3,4
- bgtz P3,$lab46
- j $31
- .align 2
- .end bn_mul_words
-
- # Program Unit: bn_sqr_words
- .ent bn_sqr_words
- .globl bn_sqr_words
-.text
-bn_sqr_words:
- .frame $sp,0,$31
- .mask 0x00000000,0
- .fmask 0x00000000,0
-
- subu P3,P3,4
- bltz P3,$lab55
-$lab54:
- lw L1,0(P2)
- lw L2,4(P2)
- lw L3,8(P2)
- lw L4,12(P2)
-
- multu L1,L1
- subu P3,P3,4
- mflo L1
- mfhi H1
- sw L1,0(P1)
- sw H1,4(P1)
-
- multu L2,L2
- addu P1,P1,32
- mflo L2
- mfhi H2
- sw L2,-24(P1)
- sw H2,-20(P1)
-
- multu L3,L3
- addu P2,P2,16
- mflo L3
- mfhi H3
- sw L3,-16(P1)
- sw H3,-12(P1)
-
- multu L4,L4
-
- mflo L4
- mfhi H4
- sw L4,-8(P1)
- sw H4,-4(P1)
-
- bgtz P3,$lab54
- b $lab55
-$lab56:
- lw L1,0(P2)
- addu P1,P1,8
- multu L1,L1
- addu P2,P2,4
- subu P3,P3,1
- mflo L1
- mfhi H1
- sw L1,-8(P1)
- sw H1,-4(P1)
-
- bgtz P3,$lab56
- j $31
-$lab55:
- addu P3,P3,4
- bgtz P3,$lab56
- j $31
- .align 2
- .end bn_sqr_words
-
- # Program Unit: bn_add_words
- .ent bn_add_words
- .globl bn_add_words
-.text
-bn_add_words: # 0x590
- .frame $sp,0,$31
- .mask 0x00000000,0
- .fmask 0x00000000,0
-
- subu P4,P4,4
- move CC,$0
- bltz P4,$lab65
-$lab64:
- lw L1,0(P2)
- lw R1,0(P3)
- lw L2,4(P2)
- lw R2,4(P3)
-
- addu L1,L1,CC
- lw L3,8(P2)
- sltu CC,L1,CC
- addu L1,L1,R1
- sltu R1,L1,R1
- lw R3,8(P3)
- addu CC,CC,R1
- lw L4,12(P2)
-
- addu L2,L2,CC
- lw R4,12(P3)
- sltu CC,L2,CC
- addu L2,L2,R2
- sltu R2,L2,R2
- sw L1,0(P1)
- addu CC,CC,R2
- addu P1,P1,16
- addu L3,L3,CC
- sw L2,-12(P1)
-
- sltu CC,L3,CC
- addu L3,L3,R3
- sltu R3,L3,R3
- addu P2,P2,16
- addu CC,CC,R3
-
- addu L4,L4,CC
- addu P3,P3,16
- sltu CC,L4,CC
- addu L4,L4,R4
- subu P4,P4,4
- sltu R4,L4,R4
- sw L3,-8(P1)
- addu CC,CC,R4
- sw L4,-4(P1)
-
- bgtz P4,$lab64
- b $lab65
-$lab66:
- lw L1,0(P2)
- lw R1,0(P3)
- addu L1,L1,CC
- addu P1,P1,4
- sltu CC,L1,CC
- addu P2,P2,4
- addu P3,P3,4
- addu L1,L1,R1
- subu P4,P4,1
- sltu R1,L1,R1
- sw L1,-4(P1)
- addu CC,CC,R1
-
- bgtz P4,$lab66
- j $31
-$lab65:
- addu P4,P4,4
- bgtz P4,$lab66
- j $31
- .end bn_add_words
-
- # Program Unit: bn_div64
- .set at
- .set reorder
- .text
- .align 2
- .globl bn_div64
- # 321 {
- .ent bn_div64 2
-bn_div64:
- subu $sp, 64
- sw $31, 56($sp)
- sw $16, 48($sp)
- .mask 0x80010000, -56
- .frame $sp, 64, $31
- move $9, $4
- move $12, $5
- move $16, $6
- # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t;
- move $31, $0
- # 323 int i,count=2;
- li $13, 2
- # 324
- # 325 if (d == 0) return(BN_MASK2);
- bne $16, 0, $80
- li $2, -1
- b $93
-$80:
- # 326
- # 327 i=BN_num_bits_word(d);
- move $4, $16
- sw $31, 16($sp)
- sw $9, 24($sp)
- sw $12, 32($sp)
- sw $13, 40($sp)
- .livereg 0x800ff0e,0xfff
- jal BN_num_bits_word
- li $4, 32
- lw $31, 16($sp)
- lw $9, 24($sp)
- lw $12, 32($sp)
- lw $13, 40($sp)
- move $3, $2
- # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
- beq $2, $4, $81
- li $14, 1
- sll $15, $14, $2
- bleu $9, $15, $81
- # 329 {
- # 330 #if !defined(NO_STDIO) && !defined(WIN16)
- # 331 fprintf(stderr,"Division would overflow (%d)\n",i);
- # 332 #endif
- # 333 abort();
- sw $3, 8($sp)
- sw $9, 24($sp)
- sw $12, 32($sp)
- sw $13, 40($sp)
- sw $31, 26($sp)
- .livereg 0xff0e,0xfff
- jal abort
- lw $3, 8($sp)
- li $4, 32
- lw $9, 24($sp)
- lw $12, 32($sp)
- lw $13, 40($sp)
- lw $31, 26($sp)
- # 334 }
-$81:
- # 335 i=BN_BITS2-i;
- subu $3, $4, $3
- # 336 if (h >= d) h-=d;
- bltu $9, $16, $82
- subu $9, $9, $16
-$82:
- # 337
- # 338 if (i)
- beq $3, 0, $83
- # 339 {
- # 340 d<<=i;
- sll $16, $16, $3
- # 341 h=(h<<i)|(l>>(BN_BITS2-i));
- sll $24, $9, $3
- subu $25, $4, $3
- srl $14, $12, $25
- or $9, $24, $14
- # 342 l<<=i;
- sll $12, $12, $3
- # 343 }
-$83:
- # 344 dh=(d&BN_MASK2h)>>BN_BITS4;
- # 345 dl=(d&BN_MASK2l);
- and $8, $16, -65536
- srl $8, $8, 16
- and $10, $16, 65535
- li $6, -65536
-$84:
- # 346 for (;;)
- # 347 {
- # 348 if ((h>>BN_BITS4) == dh)
- srl $15, $9, 16
- bne $8, $15, $85
- # 349 q=BN_MASK2l;
- li $5, 65535
- b $86
-$85:
- # 350 else
- # 351 q=h/dh;
- divu $5, $9, $8
-$86:
- # 352
- # 353 for (;;)
- # 354 {
- # 355 t=(h-q*dh);
- mul $4, $5, $8
- subu $2, $9, $4
- move $3, $2
- # 356 if ((t&BN_MASK2h) ||
- # 357 ((dl*q) <= (
- # 358 (t<<BN_BITS4)+
- # 359 ((l&BN_MASK2h)>>BN_BITS4))))
- and $25, $2, $6
- bne $25, $0, $87
- mul $24, $10, $5
- sll $14, $3, 16
- and $15, $12, $6
- srl $25, $15, 16
- addu $15, $14, $25
- bgtu $24, $15, $88
-$87:
- # 360 break;
- mul $3, $10, $5
- b $89
-$88:
- # 361 q--;
- addu $5, $5, -1
- # 362 }
- b $86
-$89:
- # 363 th=q*dh;
- # 364 tl=q*dl;
- # 365 t=(tl>>BN_BITS4);
- # 366 tl=(tl<<BN_BITS4)&BN_MASK2h;
- sll $14, $3, 16
- and $2, $14, $6
- move $11, $2
- # 367 th+=t;
- srl $25, $3, 16
- addu $7, $4, $25
- # 368
- # 369 if (l < tl) th++;
- bgeu $12, $2, $90
- addu $7, $7, 1
-$90:
- # 370 l-=tl;
- subu $12, $12, $11
- # 371 if (h < th)
- bgeu $9, $7, $91
- # 372 {
- # 373 h+=d;
- addu $9, $9, $16
- # 374 q--;
- addu $5, $5, -1
- # 375 }
-$91:
- # 376 h-=th;
- subu $9, $9, $7
- # 377
- # 378 if (--count == 0) break;
- addu $13, $13, -1
- beq $13, 0, $92
- # 379
- # 380 ret=q<<BN_BITS4;
- sll $31, $5, 16
- # 381 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
- sll $24, $9, 16
- srl $15, $12, 16
- or $9, $24, $15
- # 382 l=(l&BN_MASK2l)<<BN_BITS4;
- and $12, $12, 65535
- sll $12, $12, 16
- # 383 }
- b $84
-$92:
- # 384 ret|=q;
- or $31, $31, $5
- # 385 return(ret);
- move $2, $31
-$93:
- lw $16, 48($sp)
- lw $31, 56($sp)
- addu $sp, 64
- j $31
- .end bn_div64
-
+++ /dev/null
- .SPACE $PRIVATE$
- .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31
- .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82
- .SPACE $TEXT$
- .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44
- .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY
- .IMPORT $global$,DATA
- .IMPORT $$dyncall,MILLICODE
-; gcc_compiled.:
- .SPACE $TEXT$
- .SUBSPA $CODE$
-
- .align 4
- .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
-bn_mul_add_words
- .PROC
- .CALLINFO FRAME=0,CALLS,SAVE_RP
- .ENTRY
- stw %r2,-20(0,%r30)
- ldi 0,%r28
- extru %r23,31,16,%r2
- stw %r2,-16(0,%r30)
- extru %r23,15,16,%r23
- ldil L'65536,%r31
- fldws -16(0,%r30),%fr11R
- stw %r23,-16(0,%r30)
- ldo 12(%r25),%r29
- ldo 12(%r26),%r23
- fldws -16(0,%r30),%fr11L
-L$0002
- ldw 0(0,%r25),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0005
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi 1,%r19,%r19
- ldw 0(0,%r26),%r28
- addl %r20,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0003
- stw %r20,0(0,%r26)
- ldw -8(0,%r29),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0010
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi 1,%r19,%r19
- ldw -8(0,%r23),%r28
- addl %r20,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0003
- stw %r20,-8(0,%r23)
- ldw -4(0,%r29),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0015
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi 1,%r19,%r19
- ldw -4(0,%r23),%r28
- addl %r20,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0003
- stw %r20,-4(0,%r23)
- ldw 0(0,%r29),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0020
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi 1,%r19,%r19
- ldw 0(0,%r23),%r28
- addl %r20,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0003
- stw %r20,0(0,%r23)
- ldo 16(%r29),%r29
- ldo 16(%r25),%r25
- ldo 16(%r23),%r23
- bl L$0002,0
- ldo 16(%r26),%r26
-L$0003
- ldw -20(0,%r30),%r2
- bv,n 0(%r2)
- .EXIT
- .PROCEND
- .align 4
- .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
-bn_mul_words
- .PROC
- .CALLINFO FRAME=0,CALLS,SAVE_RP
- .ENTRY
- stw %r2,-20(0,%r30)
- ldi 0,%r28
- extru %r23,31,16,%r2
- stw %r2,-16(0,%r30)
- extru %r23,15,16,%r23
- ldil L'65536,%r31
- fldws -16(0,%r30),%fr11R
- stw %r23,-16(0,%r30)
- ldo 12(%r26),%r29
- ldo 12(%r25),%r23
- fldws -16(0,%r30),%fr11L
-L$0026
- ldw 0(0,%r25),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0029
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0027
- stw %r20,0(0,%r26)
- ldw -8(0,%r23),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0033
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0027
- stw %r20,-8(0,%r29)
- ldw -4(0,%r23),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0037
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0027
- stw %r20,-4(0,%r29)
- ldw 0(0,%r23),%r19
- extru %r19,31,16,%r20
- stw %r20,-16(0,%r30)
- extru %r19,15,16,%r19
- fldws -16(0,%r30),%fr22L
- stw %r19,-16(0,%r30)
- xmpyu %fr22L,%fr11R,%fr8
- fldws -16(0,%r30),%fr22L
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr11R,%fr22L,%fr10
- ldw -16(0,%r30),%r2
- stw %r20,-16(0,%r30)
- xmpyu %fr22L,%fr11L,%fr9
- fldws -16(0,%r30),%fr22L
- fstws %fr10R,-16(0,%r30)
- copy %r2,%r22
- ldw -16(0,%r30),%r2
- fstws %fr9R,-16(0,%r30)
- xmpyu %fr11L,%fr22L,%fr8
- copy %r2,%r19
- ldw -16(0,%r30),%r2
- fstws %fr8R,-16(0,%r30)
- copy %r2,%r20
- ldw -16(0,%r30),%r2
- addl %r2,%r19,%r21
- comclr,<<= %r19,%r21,0
- addl %r20,%r31,%r20
-L$0041
- extru %r21,15,16,%r19
- addl %r20,%r19,%r20
- zdep %r21,15,16,%r19
- addl %r22,%r19,%r22
- comclr,<<= %r19,%r22,0
- addi,tr 1,%r20,%r19
- copy %r20,%r19
- addl %r22,%r28,%r20
- comclr,<<= %r28,%r20,0
- addi,tr 1,%r19,%r28
- copy %r19,%r28
- addib,= -1,%r24,L$0027
- stw %r20,0(0,%r29)
- ldo 16(%r23),%r23
- ldo 16(%r25),%r25
- ldo 16(%r29),%r29
- bl L$0026,0
- ldo 16(%r26),%r26
-L$0027
- ldw -20(0,%r30),%r2
- bv,n 0(%r2)
- .EXIT
- .PROCEND
- .align 4
- .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
-bn_sqr_words
- .PROC
- .CALLINFO FRAME=0,NO_CALLS
- .ENTRY
- ldo 28(%r26),%r23
- ldo 12(%r25),%r28
-L$0046
- ldw 0(0,%r25),%r21
- extru %r21,31,16,%r22
- stw %r22,-16(0,%r30)
- extru %r21,15,16,%r21
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- stw %r22,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r21,-16(0,%r30)
- copy %r29,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- extru %r19,16,17,%r20
- zdep %r19,14,15,%r19
- ldw -16(0,%r30),%r29
- xmpyu %fr10L,%fr10R,%fr9
- addl %r29,%r19,%r22
- stw %r22,0(0,%r26)
- fstws %fr9R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- addl %r29,%r20,%r21
- comclr,<<= %r19,%r22,0
- addi 1,%r21,%r21
- addib,= -1,%r24,L$0057
- stw %r21,-24(0,%r23)
- ldw -8(0,%r28),%r21
- extru %r21,31,16,%r22
- stw %r22,-16(0,%r30)
- extru %r21,15,16,%r21
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- stw %r22,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r21,-16(0,%r30)
- copy %r29,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- extru %r19,16,17,%r20
- zdep %r19,14,15,%r19
- ldw -16(0,%r30),%r29
- xmpyu %fr10L,%fr10R,%fr9
- addl %r29,%r19,%r22
- stw %r22,-20(0,%r23)
- fstws %fr9R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- addl %r29,%r20,%r21
- comclr,<<= %r19,%r22,0
- addi 1,%r21,%r21
- addib,= -1,%r24,L$0057
- stw %r21,-16(0,%r23)
- ldw -4(0,%r28),%r21
- extru %r21,31,16,%r22
- stw %r22,-16(0,%r30)
- extru %r21,15,16,%r21
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- stw %r22,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r21,-16(0,%r30)
- copy %r29,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- extru %r19,16,17,%r20
- zdep %r19,14,15,%r19
- ldw -16(0,%r30),%r29
- xmpyu %fr10L,%fr10R,%fr9
- addl %r29,%r19,%r22
- stw %r22,-12(0,%r23)
- fstws %fr9R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- addl %r29,%r20,%r21
- comclr,<<= %r19,%r22,0
- addi 1,%r21,%r21
- addib,= -1,%r24,L$0057
- stw %r21,-8(0,%r23)
- ldw 0(0,%r28),%r21
- extru %r21,31,16,%r22
- stw %r22,-16(0,%r30)
- extru %r21,15,16,%r21
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- stw %r22,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r21,-16(0,%r30)
- copy %r29,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10L
- stw %r21,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- extru %r19,16,17,%r20
- zdep %r19,14,15,%r19
- ldw -16(0,%r30),%r29
- xmpyu %fr10L,%fr10R,%fr9
- addl %r29,%r19,%r22
- stw %r22,-4(0,%r23)
- fstws %fr9R,-16(0,%r30)
- ldw -16(0,%r30),%r29
- addl %r29,%r20,%r21
- comclr,<<= %r19,%r22,0
- addi 1,%r21,%r21
- addib,= -1,%r24,L$0057
- stw %r21,0(0,%r23)
- ldo 16(%r28),%r28
- ldo 16(%r25),%r25
- ldo 32(%r23),%r23
- bl L$0046,0
- ldo 32(%r26),%r26
-L$0057
- bv,n 0(%r2)
- .EXIT
- .PROCEND
- .IMPORT BN_num_bits_word,CODE
- .IMPORT fprintf,CODE
- .IMPORT __iob,DATA
- .SPACE $TEXT$
- .SUBSPA $LIT$
-
- .align 4
-L$C0000
- .STRING "Division would overflow\x0a\x00"
- .IMPORT abort,CODE
- .SPACE $TEXT$
- .SUBSPA $CODE$
-
- .align 4
- .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR
-bn_div64
- .PROC
- .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8
- .ENTRY
- stw %r2,-20(0,%r30)
- stwm %r8,128(0,%r30)
- stw %r7,-124(0,%r30)
- stw %r4,-112(0,%r30)
- stw %r3,-108(0,%r30)
- copy %r26,%r3
- copy %r25,%r4
- stw %r6,-120(0,%r30)
- ldi 0,%r7
- stw %r5,-116(0,%r30)
- movb,<> %r24,%r5,L$0059
- ldi 2,%r6
- bl L$0076,0
- ldi -1,%r28
-L$0059
- .CALL ARGW0=GR
- bl BN_num_bits_word,%r2
- copy %r5,%r26
- ldi 32,%r19
- comb,= %r19,%r28,L$0060
- subi 31,%r28,%r19
- mtsar %r19
- zvdepi 1,32,%r19
- comb,>>= %r19,%r3,L$0060
- addil LR'__iob-$global$+32,%r27
- ldo RR'__iob-$global$+32(%r1),%r26
- ldil LR'L$C0000,%r25
- .CALL ARGW0=GR,ARGW1=GR
- bl fprintf,%r2
- ldo RR'L$C0000(%r25),%r25
- .CALL
- bl abort,%r2
- nop
-L$0060
- comb,>> %r5,%r3,L$0061
- subi 32,%r28,%r28
- sub %r3,%r5,%r3
-L$0061
- comib,= 0,%r28,L$0062
- subi 31,%r28,%r19
- mtsar %r19
- zvdep %r5,32,%r5
- zvdep %r3,32,%r21
- subi 32,%r28,%r20
- mtsar %r20
- vshd 0,%r4,%r20
- or %r21,%r20,%r3
- mtsar %r19
- zvdep %r4,32,%r4
-L$0062
- extru %r5,15,16,%r23
- extru %r5,31,16,%r28
-L$0063
- extru %r3,15,16,%r19
- comb,<> %r23,%r19,L$0066
- copy %r3,%r26
- bl L$0067,0
- zdepi -1,31,16,%r29
-L$0066
- .IMPORT $$divU,MILLICODE
- bl $$divU,%r31
- copy %r23,%r25
-L$0067
- stw %r29,-16(0,%r30)
- fldws -16(0,%r30),%fr10L
- stw %r28,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r23,-16(0,%r30)
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr10L,%fr10R,%fr9
- ldw -16(0,%r30),%r8
- fstws %fr9R,-16(0,%r30)
- copy %r8,%r22
- ldw -16(0,%r30),%r8
- extru %r4,15,16,%r24
- copy %r8,%r21
-L$0068
- sub %r3,%r21,%r20
- copy %r20,%r19
- depi 0,31,16,%r19
- comib,<> 0,%r19,L$0069
- zdep %r20,15,16,%r19
- addl %r19,%r24,%r19
- comb,>>= %r19,%r22,L$0069
- sub %r22,%r28,%r22
- sub %r21,%r23,%r21
- bl L$0068,0
- ldo -1(%r29),%r29
-L$0069
- stw %r29,-16(0,%r30)
- fldws -16(0,%r30),%fr10L
- stw %r28,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r8
- stw %r23,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- copy %r8,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- extru %r19,15,16,%r20
- ldw -16(0,%r30),%r8
- zdep %r19,15,16,%r19
- addl %r8,%r20,%r20
- comclr,<<= %r19,%r4,0
- addi 1,%r20,%r20
- comb,<<= %r20,%r3,L$0074
- sub %r4,%r19,%r4
- addl %r3,%r5,%r3
- ldo -1(%r29),%r29
-L$0074
- addib,= -1,%r6,L$0064
- sub %r3,%r20,%r3
- zdep %r29,15,16,%r7
- shd %r3,%r4,16,%r3
- bl L$0063,0
- zdep %r4,15,16,%r4
-L$0064
- or %r7,%r29,%r28
-L$0076
- ldw -148(0,%r30),%r2
- ldw -124(0,%r30),%r7
- ldw -120(0,%r30),%r6
- ldw -116(0,%r30),%r5
- ldw -112(0,%r30),%r4
- ldw -108(0,%r30),%r3
- bv 0(%r2)
- ldwm -128(0,%r30),%r8
- .EXIT
- .PROCEND
+++ /dev/null
- .SPACE $PRIVATE$
- .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31
- .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82
- .SPACE $TEXT$
- .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44
- .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY
- .IMPORT $global$,DATA
- .IMPORT $$dyncall,MILLICODE
-; gcc_compiled.:
- .SPACE $TEXT$
- .SUBSPA $CODE$
-
- .align 4
- .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
-bn_mul_add_words
- .PROC
- .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4
- .ENTRY
- stw %r2,-20(0,%r30)
- stwm %r4,64(0,%r30)
- copy %r24,%r31
- stw %r3,-60(0,%r30)
- ldi 0,%r20
- ldo 12(%r26),%r2
- stw %r23,-16(0,%r30)
- copy %r25,%r3
- ldo 12(%r3),%r1
- fldws -16(0,%r30),%fr8L
-L$0010
- copy %r20,%r25
- ldi 0,%r24
- fldws 0(0,%r3),%fr9L
- ldw 0(0,%r26),%r19
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r19,%r23
- ldw -16(0,%r30),%r28
- ldw -12(0,%r30),%r29
- ldi 0,%r22
- add %r23,%r29,%r29
- addc %r22,%r28,%r28
- add %r25,%r29,%r29
- addc %r24,%r28,%r28
- copy %r28,%r21
- ldi 0,%r20
- copy %r21,%r20
- addib,= -1,%r31,L$0011
- stw %r29,0(0,%r26)
- copy %r20,%r25
- ldi 0,%r24
- fldws -8(0,%r1),%fr9L
- ldw -8(0,%r2),%r19
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r19,%r23
- ldw -16(0,%r30),%r28
- ldw -12(0,%r30),%r29
- ldi 0,%r22
- add %r23,%r29,%r29
- addc %r22,%r28,%r28
- add %r25,%r29,%r29
- addc %r24,%r28,%r28
- copy %r28,%r21
- ldi 0,%r20
- copy %r21,%r20
- addib,= -1,%r31,L$0011
- stw %r29,-8(0,%r2)
- copy %r20,%r25
- ldi 0,%r24
- fldws -4(0,%r1),%fr9L
- ldw -4(0,%r2),%r19
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r19,%r23
- ldw -16(0,%r30),%r28
- ldw -12(0,%r30),%r29
- ldi 0,%r22
- add %r23,%r29,%r29
- addc %r22,%r28,%r28
- add %r25,%r29,%r29
- addc %r24,%r28,%r28
- copy %r28,%r21
- ldi 0,%r20
- copy %r21,%r20
- addib,= -1,%r31,L$0011
- stw %r29,-4(0,%r2)
- copy %r20,%r25
- ldi 0,%r24
- fldws 0(0,%r1),%fr9L
- ldw 0(0,%r2),%r19
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r19,%r23
- ldw -16(0,%r30),%r28
- ldw -12(0,%r30),%r29
- ldi 0,%r22
- add %r23,%r29,%r29
- addc %r22,%r28,%r28
- add %r25,%r29,%r29
- addc %r24,%r28,%r28
- copy %r28,%r21
- ldi 0,%r20
- copy %r21,%r20
- addib,= -1,%r31,L$0011
- stw %r29,0(0,%r2)
- ldo 16(%r1),%r1
- ldo 16(%r3),%r3
- ldo 16(%r2),%r2
- bl L$0010,0
- ldo 16(%r26),%r26
-L$0011
- copy %r20,%r28
- ldw -84(0,%r30),%r2
- ldw -60(0,%r30),%r3
- bv 0(%r2)
- ldwm -64(0,%r30),%r4
- .EXIT
- .PROCEND
- .align 4
- .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
-bn_mul_words
- .PROC
- .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3
- .ENTRY
- stw %r2,-20(0,%r30)
- copy %r25,%r2
- stwm %r4,64(0,%r30)
- copy %r24,%r19
- ldi 0,%r28
- stw %r23,-16(0,%r30)
- ldo 12(%r26),%r31
- ldo 12(%r2),%r29
- fldws -16(0,%r30),%fr8L
-L$0026
- fldws 0(0,%r2),%fr9L
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r28,%r21
- ldi 0,%r20
- ldw -16(0,%r30),%r24
- ldw -12(0,%r30),%r25
- add %r21,%r25,%r25
- addc %r20,%r24,%r24
- copy %r24,%r23
- ldi 0,%r22
- copy %r23,%r28
- addib,= -1,%r19,L$0027
- stw %r25,0(0,%r26)
- fldws -8(0,%r29),%fr9L
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r28,%r21
- ldi 0,%r20
- ldw -16(0,%r30),%r24
- ldw -12(0,%r30),%r25
- add %r21,%r25,%r25
- addc %r20,%r24,%r24
- copy %r24,%r23
- ldi 0,%r22
- copy %r23,%r28
- addib,= -1,%r19,L$0027
- stw %r25,-8(0,%r31)
- fldws -4(0,%r29),%fr9L
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r28,%r21
- ldi 0,%r20
- ldw -16(0,%r30),%r24
- ldw -12(0,%r30),%r25
- add %r21,%r25,%r25
- addc %r20,%r24,%r24
- copy %r24,%r23
- ldi 0,%r22
- copy %r23,%r28
- addib,= -1,%r19,L$0027
- stw %r25,-4(0,%r31)
- fldws 0(0,%r29),%fr9L
- xmpyu %fr8L,%fr9L,%fr9
- fstds %fr9,-16(0,%r30)
- copy %r28,%r21
- ldi 0,%r20
- ldw -16(0,%r30),%r24
- ldw -12(0,%r30),%r25
- add %r21,%r25,%r25
- addc %r20,%r24,%r24
- copy %r24,%r23
- ldi 0,%r22
- copy %r23,%r28
- addib,= -1,%r19,L$0027
- stw %r25,0(0,%r31)
- ldo 16(%r29),%r29
- ldo 16(%r2),%r2
- ldo 16(%r31),%r31
- bl L$0026,0
- ldo 16(%r26),%r26
-L$0027
- ldw -84(0,%r30),%r2
- bv 0(%r2)
- ldwm -64(0,%r30),%r4
- .EXIT
- .PROCEND
- .align 4
- .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
-bn_sqr_words
- .PROC
- .CALLINFO FRAME=0,NO_CALLS
- .ENTRY
- ldo 28(%r26),%r19
- ldo 12(%r25),%r28
-L$0042
- fldws 0(0,%r25),%fr8L
- fldws 0(0,%r25),%fr8R
- xmpyu %fr8L,%fr8R,%fr8
- fstds %fr8,-16(0,%r30)
- ldw -16(0,%r30),%r22
- ldw -12(0,%r30),%r23
- stw %r23,0(0,%r26)
- copy %r22,%r21
- ldi 0,%r20
- addib,= -1,%r24,L$0049
- stw %r21,-24(0,%r19)
- fldws -8(0,%r28),%fr8L
- fldws -8(0,%r28),%fr8R
- xmpyu %fr8L,%fr8R,%fr8
- fstds %fr8,-16(0,%r30)
- ldw -16(0,%r30),%r22
- ldw -12(0,%r30),%r23
- stw %r23,-20(0,%r19)
- copy %r22,%r21
- ldi 0,%r20
- addib,= -1,%r24,L$0049
- stw %r21,-16(0,%r19)
- fldws -4(0,%r28),%fr8L
- fldws -4(0,%r28),%fr8R
- xmpyu %fr8L,%fr8R,%fr8
- fstds %fr8,-16(0,%r30)
- ldw -16(0,%r30),%r22
- ldw -12(0,%r30),%r23
- stw %r23,-12(0,%r19)
- copy %r22,%r21
- ldi 0,%r20
- addib,= -1,%r24,L$0049
- stw %r21,-8(0,%r19)
- fldws 0(0,%r28),%fr8L
- fldws 0(0,%r28),%fr8R
- xmpyu %fr8L,%fr8R,%fr8
- fstds %fr8,-16(0,%r30)
- ldw -16(0,%r30),%r22
- ldw -12(0,%r30),%r23
- stw %r23,-4(0,%r19)
- copy %r22,%r21
- ldi 0,%r20
- addib,= -1,%r24,L$0049
- stw %r21,0(0,%r19)
- ldo 16(%r28),%r28
- ldo 16(%r25),%r25
- ldo 32(%r19),%r19
- bl L$0042,0
- ldo 32(%r26),%r26
-L$0049
- bv,n 0(%r2)
- .EXIT
- .PROCEND
- .IMPORT BN_num_bits_word,CODE
- .IMPORT fprintf,CODE
- .IMPORT __iob,DATA
- .SPACE $TEXT$
- .SUBSPA $LIT$
-
- .align 4
-L$C0000
- .STRING "Division would overflow (%d)\x0a\x00"
- .IMPORT abort,CODE
- .SPACE $TEXT$
- .SUBSPA $CODE$
-
- .align 4
- .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR
-bn_div64
- .PROC
- .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8
- .ENTRY
- stw %r2,-20(0,%r30)
- stwm %r8,128(0,%r30)
- stw %r7,-124(0,%r30)
- stw %r4,-112(0,%r30)
- stw %r3,-108(0,%r30)
- copy %r26,%r3
- copy %r25,%r4
- stw %r6,-120(0,%r30)
- ldi 0,%r7
- stw %r5,-116(0,%r30)
- movb,<> %r24,%r5,L$0051
- ldi 2,%r6
- bl L$0068,0
- ldi -1,%r28
-L$0051
- .CALL ARGW0=GR
- bl BN_num_bits_word,%r2
- copy %r5,%r26
- copy %r28,%r24
- ldi 32,%r19
- comb,= %r19,%r24,L$0052
- subi 31,%r24,%r19
- mtsar %r19
- zvdepi 1,32,%r19
- comb,>>= %r19,%r3,L$0052
- addil LR'__iob-$global$+32,%r27
- ldo RR'__iob-$global$+32(%r1),%r26
- ldil LR'L$C0000,%r25
- .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR
- bl fprintf,%r2
- ldo RR'L$C0000(%r25),%r25
- .CALL
- bl abort,%r2
- nop
-L$0052
- comb,>> %r5,%r3,L$0053
- subi 32,%r24,%r24
- sub %r3,%r5,%r3
-L$0053
- comib,= 0,%r24,L$0054
- subi 31,%r24,%r19
- mtsar %r19
- zvdep %r5,32,%r5
- zvdep %r3,32,%r21
- subi 32,%r24,%r20
- mtsar %r20
- vshd 0,%r4,%r20
- or %r21,%r20,%r3
- mtsar %r19
- zvdep %r4,32,%r4
-L$0054
- extru %r5,15,16,%r23
- extru %r5,31,16,%r28
-L$0055
- extru %r3,15,16,%r19
- comb,<> %r23,%r19,L$0058
- copy %r3,%r26
- bl L$0059,0
- zdepi -1,31,16,%r29
-L$0058
- .IMPORT $$divU,MILLICODE
- bl $$divU,%r31
- copy %r23,%r25
-L$0059
- stw %r29,-16(0,%r30)
- fldws -16(0,%r30),%fr10L
- stw %r28,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- stw %r23,-16(0,%r30)
- xmpyu %fr10L,%fr10R,%fr8
- fldws -16(0,%r30),%fr10R
- fstws %fr8R,-16(0,%r30)
- xmpyu %fr10L,%fr10R,%fr9
- ldw -16(0,%r30),%r8
- fstws %fr9R,-16(0,%r30)
- copy %r8,%r22
- ldw -16(0,%r30),%r8
- extru %r4,15,16,%r24
- copy %r8,%r21
-L$0060
- sub %r3,%r21,%r20
- copy %r20,%r19
- depi 0,31,16,%r19
- comib,<> 0,%r19,L$0061
- zdep %r20,15,16,%r19
- addl %r19,%r24,%r19
- comb,>>= %r19,%r22,L$0061
- sub %r22,%r28,%r22
- sub %r21,%r23,%r21
- bl L$0060,0
- ldo -1(%r29),%r29
-L$0061
- stw %r29,-16(0,%r30)
- fldws -16(0,%r30),%fr10L
- stw %r28,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- ldw -16(0,%r30),%r8
- stw %r23,-16(0,%r30)
- fldws -16(0,%r30),%fr10R
- copy %r8,%r19
- xmpyu %fr10L,%fr10R,%fr8
- fstws %fr8R,-16(0,%r30)
- extru %r19,15,16,%r20
- ldw -16(0,%r30),%r8
- zdep %r19,15,16,%r19
- addl %r8,%r20,%r20
- comclr,<<= %r19,%r4,0
- addi 1,%r20,%r20
- comb,<<= %r20,%r3,L$0066
- sub %r4,%r19,%r4
- addl %r3,%r5,%r3
- ldo -1(%r29),%r29
-L$0066
- addib,= -1,%r6,L$0056
- sub %r3,%r20,%r3
- zdep %r29,15,16,%r7
- shd %r3,%r4,16,%r3
- bl L$0055,0
- zdep %r4,15,16,%r4
-L$0056
- or %r7,%r29,%r28
-L$0068
- ldw -148(0,%r30),%r2
- ldw -124(0,%r30),%r7
- ldw -120(0,%r30),%r6
- ldw -116(0,%r30),%r5
- ldw -112(0,%r30),%r4
- ldw -108(0,%r30),%r3
- bv 0(%r2)
- ldwm -128(0,%r30),%r8
- .EXIT
- .PROCEND
+++ /dev/null
- .file 1 "../bn_mulw.c"
- .set nobopt
- .option pic2
-
- # GNU C 2.6.3 [AL 1.1, MM 40] SGI running IRIX 5.0 compiled by GNU C
-
- # Cc1 defaults:
- # -mabicalls
-
- # Cc1 arguments (-G value = 0, Cpu = 3000, ISA = 1):
- # -quiet -dumpbase -O2 -o
-
-gcc2_compiled.:
-__gnu_compiled_c:
- .rdata
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x39,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x33,0x34,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x35,0x20,0x24
- .byte 0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24
- .byte 0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x33,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x37,0x38,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x33,0x2e,0x37,0x30,0x20
- .byte 0x24,0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x20,0x24
- .byte 0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x20,0x24
- .byte 0x0
-
- .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
- .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24
- .byte 0x0
- .text
- .align 2
- .globl bn_mul_add_words
- .ent bn_mul_add_words
-bn_mul_add_words:
- .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
- .mask 0x00000000,0
- .fmask 0x00000000,0
- .set noreorder
- .cpload $25
- .set reorder
- move $12,$4
- move $14,$5
- move $9,$6
- move $13,$7
- move $8,$0
- addu $10,$12,12
- addu $11,$14,12
-$L2:
- lw $6,0($14)
- #nop
- multu $13,$6
- mfhi $6
- mflo $7
- #nop
- move $5,$8
- move $4,$0
- lw $3,0($12)
- addu $9,$9,-1
- move $2,$0
- addu $7,$7,$3
- sltu $8,$7,$3
- addu $6,$6,$2
- addu $6,$6,$8
- addu $7,$7,$5
- sltu $2,$7,$5
- addu $6,$6,$4
- addu $6,$6,$2
- srl $3,$6,0
- move $2,$0
- move $8,$3
- .set noreorder
- .set nomacro
- beq $9,$0,$L3
- sw $7,0($12)
- .set macro
- .set reorder
-
- lw $6,-8($11)
- #nop
- multu $13,$6
- mfhi $6
- mflo $7
- #nop
- move $5,$8
- move $4,$0
- lw $3,-8($10)
- addu $9,$9,-1
- move $2,$0
- addu $7,$7,$3
- sltu $8,$7,$3
- addu $6,$6,$2
- addu $6,$6,$8
- addu $7,$7,$5
- sltu $2,$7,$5
- addu $6,$6,$4
- addu $6,$6,$2
- srl $3,$6,0
- move $2,$0
- move $8,$3
- .set noreorder
- .set nomacro
- beq $9,$0,$L3
- sw $7,-8($10)
- .set macro
- .set reorder
-
- lw $6,-4($11)
- #nop
- multu $13,$6
- mfhi $6
- mflo $7
- #nop
- move $5,$8
- move $4,$0
- lw $3,-4($10)
- addu $9,$9,-1
- move $2,$0
- addu $7,$7,$3
- sltu $8,$7,$3
- addu $6,$6,$2
- addu $6,$6,$8
- addu $7,$7,$5
- sltu $2,$7,$5
- addu $6,$6,$4
- addu $6,$6,$2
- srl $3,$6,0
- move $2,$0
- move $8,$3
- .set noreorder
- .set nomacro
- beq $9,$0,$L3
- sw $7,-4($10)
- .set macro
- .set reorder
-
- lw $6,0($11)
- #nop
- multu $13,$6
- mfhi $6
- mflo $7
- #nop
- move $5,$8
- move $4,$0
- lw $3,0($10)
- addu $9,$9,-1
- move $2,$0
- addu $7,$7,$3
- sltu $8,$7,$3
- addu $6,$6,$2
- addu $6,$6,$8
- addu $7,$7,$5
- sltu $2,$7,$5
- addu $6,$6,$4
- addu $6,$6,$2
- srl $3,$6,0
- move $2,$0
- move $8,$3
- .set noreorder
- .set nomacro
- beq $9,$0,$L3
- sw $7,0($10)
- .set macro
- .set reorder
-
- addu $11,$11,16
- addu $14,$14,16
- addu $10,$10,16
- .set noreorder
- .set nomacro
- j $L2
- addu $12,$12,16
- .set macro
- .set reorder
-
-$L3:
- .set noreorder
- .set nomacro
- j $31
- move $2,$8
- .set macro
- .set reorder
-
- .end bn_mul_add_words
- .align 2
- .globl bn_mul_words
- .ent bn_mul_words
-bn_mul_words:
- .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
- .mask 0x00000000,0
- .fmask 0x00000000,0
- .set noreorder
- .cpload $25
- .set reorder
- move $11,$4
- move $12,$5
- move $8,$6
- move $6,$0
- addu $10,$11,12
- addu $9,$12,12
-$L10:
- lw $4,0($12)
- #nop
- multu $7,$4
- mfhi $4
- mflo $5
- #nop
- move $3,$6
- move $2,$0
- addu $8,$8,-1
- addu $5,$5,$3
- sltu $6,$5,$3
- addu $4,$4,$2
- addu $4,$4,$6
- srl $3,$4,0
- move $2,$0
- move $6,$3
- .set noreorder
- .set nomacro
- beq $8,$0,$L11
- sw $5,0($11)
- .set macro
- .set reorder
-
- lw $4,-8($9)
- #nop
- multu $7,$4
- mfhi $4
- mflo $5
- #nop
- move $3,$6
- move $2,$0
- addu $8,$8,-1
- addu $5,$5,$3
- sltu $6,$5,$3
- addu $4,$4,$2
- addu $4,$4,$6
- srl $3,$4,0
- move $2,$0
- move $6,$3
- .set noreorder
- .set nomacro
- beq $8,$0,$L11
- sw $5,-8($10)
- .set macro
- .set reorder
-
- lw $4,-4($9)
- #nop
- multu $7,$4
- mfhi $4
- mflo $5
- #nop
- move $3,$6
- move $2,$0
- addu $8,$8,-1
- addu $5,$5,$3
- sltu $6,$5,$3
- addu $4,$4,$2
- addu $4,$4,$6
- srl $3,$4,0
- move $2,$0
- move $6,$3
- .set noreorder
- .set nomacro
- beq $8,$0,$L11
- sw $5,-4($10)
- .set macro
- .set reorder
-
- lw $4,0($9)
- #nop
- multu $7,$4
- mfhi $4
- mflo $5
- #nop
- move $3,$6
- move $2,$0
- addu $8,$8,-1
- addu $5,$5,$3
- sltu $6,$5,$3
- addu $4,$4,$2
- addu $4,$4,$6
- srl $3,$4,0
- move $2,$0
- move $6,$3
- .set noreorder
- .set nomacro
- beq $8,$0,$L11
- sw $5,0($10)
- .set macro
- .set reorder
-
- addu $9,$9,16
- addu $12,$12,16
- addu $10,$10,16
- .set noreorder
- .set nomacro
- j $L10
- addu $11,$11,16
- .set macro
- .set reorder
-
-$L11:
- .set noreorder
- .set nomacro
- j $31
- move $2,$6
- .set macro
- .set reorder
-
- .end bn_mul_words
- .align 2
- .globl bn_sqr_words
- .ent bn_sqr_words
-bn_sqr_words:
- .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
- .mask 0x00000000,0
- .fmask 0x00000000,0
- .set noreorder
- .cpload $25
- .set reorder
- move $9,$4
- addu $7,$9,28
- addu $8,$5,12
-$L18:
- lw $2,0($5)
- #nop
- multu $2,$2
- mfhi $2
- mflo $3
- #nop
- addu $6,$6,-1
- sw $3,0($9)
- srl $3,$2,0
- move $2,$0
- .set noreorder
- .set nomacro
- beq $6,$0,$L19
- sw $3,-24($7)
- .set macro
- .set reorder
-
- lw $2,-8($8)
- #nop
- multu $2,$2
- mfhi $2
- mflo $3
- #nop
- addu $6,$6,-1
- sw $3,-20($7)
- srl $3,$2,0
- move $2,$0
- .set noreorder
- .set nomacro
- beq $6,$0,$L19
- sw $3,-16($7)
- .set macro
- .set reorder
-
- lw $2,-4($8)
- #nop
- multu $2,$2
- mfhi $2
- mflo $3
- #nop
- addu $6,$6,-1
- sw $3,-12($7)
- srl $3,$2,0
- move $2,$0
- .set noreorder
- .set nomacro
- beq $6,$0,$L19
- sw $3,-8($7)
- .set macro
- .set reorder
-
- lw $2,0($8)
- #nop
- multu $2,$2
- mfhi $2
- mflo $3
- #nop
- addu $6,$6,-1
- sw $3,-4($7)
- srl $3,$2,0
- move $2,$0
- .set noreorder
- .set nomacro
- beq $6,$0,$L19
- sw $3,0($7)
- .set macro
- .set reorder
-
- addu $8,$8,16
- addu $5,$5,16
- addu $7,$7,32
- .set noreorder
- .set nomacro
- j $L18
- addu $9,$9,32
- .set macro
- .set reorder
-
-$L19:
- j $31
- .end bn_sqr_words
- .rdata
- .align 2
-$LC0:
-
- .byte 0x44,0x69,0x76,0x69,0x73,0x69,0x6f,0x6e
- .byte 0x20,0x77,0x6f,0x75,0x6c,0x64,0x20,0x6f
- .byte 0x76,0x65,0x72,0x66,0x6c,0x6f,0x77,0xa
- .byte 0x0
- .text
- .align 2
- .globl bn_div64
- .ent bn_div64
-bn_div64:
- .frame $sp,56,$31 # vars= 0, regs= 7/0, args= 16, extra= 8
- .mask 0x901f0000,-8
- .fmask 0x00000000,0
- .set noreorder
- .cpload $25
- .set reorder
- subu $sp,$sp,56
- .cprestore 16
- sw $16,24($sp)
- move $16,$4
- sw $17,28($sp)
- move $17,$5
- sw $18,32($sp)
- move $18,$6
- sw $20,40($sp)
- move $20,$0
- sw $19,36($sp)
- li $19,0x00000002 # 2
- sw $31,48($sp)
- .set noreorder
- .set nomacro
- bne $18,$0,$L26
- sw $28,44($sp)
- .set macro
- .set reorder
-
- .set noreorder
- .set nomacro
- j $L43
- li $2,-1 # 0xffffffff
- .set macro
- .set reorder
-
-$L26:
- move $4,$18
- jal BN_num_bits_word
- move $4,$2
- li $2,0x00000020 # 32
- .set noreorder
- .set nomacro
- beq $4,$2,$L27
- li $2,0x00000001 # 1
- .set macro
- .set reorder
-
- sll $2,$2,$4
- sltu $2,$2,$16
- .set noreorder
- .set nomacro
- beq $2,$0,$L44
- li $5,0x00000020 # 32
- .set macro
- .set reorder
-
- la $4,__iob+32
- la $5,$LC0
- jal fprintf
- jal abort
-$L27:
- li $5,0x00000020 # 32
-$L44:
- sltu $2,$16,$18
- .set noreorder
- .set nomacro
- bne $2,$0,$L28
- subu $4,$5,$4
- .set macro
- .set reorder
-
- subu $16,$16,$18
-$L28:
- .set noreorder
- .set nomacro
- beq $4,$0,$L29
- li $10,-65536 # 0xffff0000
- .set macro
- .set reorder
-
- sll $18,$18,$4
- sll $3,$16,$4
- subu $2,$5,$4
- srl $2,$17,$2
- or $16,$3,$2
- sll $17,$17,$4
-$L29:
- srl $7,$18,16
- andi $9,$18,0xffff
-$L30:
- srl $2,$16,16
- .set noreorder
- .set nomacro
- beq $2,$7,$L34
- li $6,0x0000ffff # 65535
- .set macro
- .set reorder
-
- divu $6,$16,$7
-$L34:
- mult $6,$9
- mflo $5
- #nop
- #nop
- mult $6,$7
- and $2,$17,$10
- srl $8,$2,16
- mflo $4
-$L35:
- subu $3,$16,$4
- and $2,$3,$10
- .set noreorder
- .set nomacro
- bne $2,$0,$L36
- sll $2,$3,16
- .set macro
- .set reorder
-
- addu $2,$2,$8
- sltu $2,$2,$5
- .set noreorder
- .set nomacro
- beq $2,$0,$L36
- subu $5,$5,$9
- .set macro
- .set reorder
-
- subu $4,$4,$7
- .set noreorder
- .set nomacro
- j $L35
- addu $6,$6,-1
- .set macro
- .set reorder
-
-$L36:
- mult $6,$7
- mflo $5
- #nop
- #nop
- mult $6,$9
- mflo $4
- #nop
- #nop
- srl $3,$4,16
- sll $2,$4,16
- and $4,$2,$10
- sltu $2,$17,$4
- .set noreorder
- .set nomacro
- beq $2,$0,$L40
- addu $5,$5,$3
- .set macro
- .set reorder
-
- addu $5,$5,1
-$L40:
- sltu $2,$16,$5
- .set noreorder
- .set nomacro
- beq $2,$0,$L41
- subu $17,$17,$4
- .set macro
- .set reorder
-
- addu $16,$16,$18
- addu $6,$6,-1
-$L41:
- addu $19,$19,-1
- .set noreorder
- .set nomacro
- beq $19,$0,$L31
- subu $16,$16,$5
- .set macro
- .set reorder
-
- sll $20,$6,16
- sll $3,$16,16
- srl $2,$17,16
- or $16,$3,$2
- .set noreorder
- .set nomacro
- j $L30
- sll $17,$17,16
- .set macro
- .set reorder
-
-$L31:
- or $2,$20,$6
-$L43:
- lw $31,48($sp)
- lw $20,40($sp)
- lw $19,36($sp)
- lw $18,32($sp)
- lw $17,28($sp)
- lw $16,24($sp)
- addu $sp,$sp,56
- j $31
- .end bn_div64
-
- .globl abort .text
- .globl fprintf .text
- .globl BN_num_bits_word .text
+++ /dev/null
-#!/usr/local/bin/perl
-
-package alpha;
-use Carp qw(croak cluck);
-
-$label="100";
-
-$n_debug=0;
-$smear_regs=1;
-$reg_alloc=1;
-
-$align="3";
-$com_start="#";
-
-sub main'asm_init_output { @out=(); }
-sub main'asm_get_output { return(@out); }
-sub main'get_labels { return(@labels); }
-sub main'external_label { push(@labels,@_); }
-
-# General registers
-
-%regs=( 'r0', '$0',
- 'r1', '$1',
- 'r2', '$2',
- 'r3', '$3',
- 'r4', '$4',
- 'r5', '$5',
- 'r6', '$6',
- 'r7', '$7',
- 'r8', '$8',
- 'r9', '$22',
- 'r10', '$23',
- 'r11', '$24',
- 'r12', '$25',
- 'r13', '$27',
- 'r14', '$28',
- 'r15', '$21', # argc == 5
- 'r16', '$20', # argc == 4
- 'r17', '$19', # argc == 3
- 'r18', '$18', # argc == 2
- 'r19', '$17', # argc == 1
- 'r20', '$16', # argc == 0
- 'r21', '$9', # save 0
- 'r22', '$10', # save 1
- 'r23', '$11', # save 2
- 'r24', '$12', # save 3
- 'r25', '$13', # save 4
- 'r26', '$14', # save 5
-
- 'a0', '$16',
- 'a1', '$17',
- 'a2', '$18',
- 'a3', '$19',
- 'a4', '$20',
- 'a5', '$21',
-
- 's0', '$9',
- 's1', '$10',
- 's2', '$11',
- 's3', '$12',
- 's4', '$13',
- 's5', '$14',
- 'zero', '$31',
- 'sp', '$30',
- );
-
-$main'reg_s0="r21";
-$main'reg_s1="r22";
-$main'reg_s2="r23";
-$main'reg_s3="r24";
-$main'reg_s4="r25";
-$main'reg_s5="r26";
-
-@reg=( '$0', '$1' ,'$2' ,'$3' ,'$4' ,'$5' ,'$6' ,'$7' ,'$8',
- '$22','$23','$24','$25','$20','$21','$27','$28');
-
-
-sub main'sub { &out3("subq",@_); }
-sub main'add { &out3("addq",@_); }
-sub main'mov { &out3("bis",$_[0],$_[0],$_[1]); }
-sub main'or { &out3("bis",@_); }
-sub main'bis { &out3("bis",@_); }
-sub main'br { &out1("br",@_); }
-sub main'ld { &out2("ldq",@_); }
-sub main'st { &out2("stq",@_); }
-sub main'cmpult { &out3("cmpult",@_); }
-sub main'cmplt { &out3("cmplt",@_); }
-sub main'bgt { &out2("bgt",@_); }
-sub main'ble { &out2("ble",@_); }
-sub main'blt { &out2("blt",@_); }
-sub main'mul { &out3("mulq",@_); }
-sub main'muh { &out3("umulh",@_); }
-
-$main'QWS=8;
-
-sub main'asm_add
- {
- push(@out,@_);
- }
-
-sub main'asm_finish
- {
- &main'file_end();
- print &main'asm_get_output();
- }
-
-sub main'asm_init
- {
- ($type,$fn)=@_;
- $filename=$fn;
-
- &main'asm_init_output();
- &main'comment("Don't even think of reading this code");
- &main'comment("It was automatically generated by $filename");
- &main'comment("Which is a perl program used to generate the alpha assember.");
- &main'comment("eric <eay\@cryptsoft.com>");
- &main'comment("");
-
- $filename =~ s/\.pl$//;
- &main'file($filename);
- }
-
-sub conv
- {
- local($r)=@_;
- local($v);
-
- return($regs{$r}) if defined($regs{$r});
- return($r);
- }
-
-sub main'QWPw
- {
- local($off,$reg)=@_;
-
- return(&main'QWP($off*8,$reg));
- }
-
-sub main'QWP
- {
- local($off,$reg)=@_;
-
- $ret="$off(".&conv($reg).")";
- return($ret);
- }
-
-sub out3
- {
- local($name,$p1,$p2,$p3)=@_;
-
- $p1=&conv($p1);
- $p2=&conv($p2);
- $p3=&conv($p3);
- push(@out,"\t$name\t");
- $l=length($p1)+1;
- push(@out,$p1.",");
- $ll=3-($l+9)/8;
- $tmp1=sprintf("\t" x $ll);
- push(@out,$tmp1);
-
- $l=length($p2)+1;
- push(@out,$p2.",");
- $ll=3-($l+9)/8;
- $tmp1=sprintf("\t" x $ll);
- push(@out,$tmp1);
-
- push(@out,&conv($p3)."\n");
- }
-
-sub out2
- {
- local($name,$p1,$p2,$p3)=@_;
-
- $p1=&conv($p1);
- $p2=&conv($p2);
- push(@out,"\t$name\t");
- $l=length($p1)+1;
- push(@out,$p1.",");
- $ll=3-($l+9)/8;
- $tmp1=sprintf("\t" x $ll);
- push(@out,$tmp1);
-
- push(@out,&conv($p2)."\n");
- }
-
-sub out1
- {
- local($name,$p1)=@_;
-
- $p1=&conv($p1);
- push(@out,"\t$name\t".$p1."\n");
- }
-
-sub out0
- {
- push(@out,"\t$_[0]\n");
- }
-
-sub main'file
- {
- local($file)=@_;
-
- local($tmp)=<<"EOF";
- # DEC Alpha assember
- # Generated from perl scripts contains in SSLeay
- .file 1 "$file.s"
- .set noat
-EOF
- push(@out,$tmp);
- }
-
-sub main'function_begin
- {
- local($func)=@_;
-
-print STDERR "$func\n";
- local($tmp)=<<"EOF";
- .text
- .align $align
- .globl $func
- .ent $func
-${func}:
-${func}..ng:
- .frame \$30,0,\$26,0
- .prologue 0
-EOF
- push(@out,$tmp);
- $stack=0;
- }
-
-sub main'function_end
- {
- local($func)=@_;
-
- local($tmp)=<<"EOF";
- ret \$31,(\$26),1
- .end $func
-EOF
- push(@out,$tmp);
- $stack=0;
- %label=();
- }
-
-sub main'function_end_A
- {
- local($func)=@_;
-
- local($tmp)=<<"EOF";
- ret \$31,(\$26),1
-EOF
- push(@out,$tmp);
- }
-
-sub main'function_end_B
- {
- local($func)=@_;
-
- $func=$under.$func;
-
- push(@out,"\t.end $func\n");
- $stack=0;
- %label=();
- }
-
-sub main'wparam
- {
- local($num)=@_;
-
- if ($num < 6)
- {
- $num=20-$num;
- return("r$num");
- }
- else
- { return(&main'QWP($stack+$num*8,"sp")); }
- }
-
-sub main'stack_push
- {
- local($num)=@_;
- $stack+=$num*8;
- &main'sub("sp",$num*8,"sp");
- }
-
-sub main'stack_pop
- {
- local($num)=@_;
- $stack-=$num*8;
- &main'add("sp",$num*8,"sp");
- }
-
-sub main'swtmp
- {
- return(&main'QWP(($_[0])*8,"sp"));
- }
-
-# Should use swtmp, which is above sp. Linix can trash the stack above esp
-#sub main'wtmp
-# {
-# local($num)=@_;
-#
-# return(&main'QWP(-($num+1)*4,"esp","",0));
-# }
-
-sub main'comment
- {
- foreach (@_)
- {
- if (/^\s*$/)
- { push(@out,"\n"); }
- else
- { push(@out,"\t$com_start $_ $com_end\n"); }
- }
- }
-
-sub main'label
- {
- if (!defined($label{$_[0]}))
- {
- $label{$_[0]}=$label;
- $label++;
- }
- return('$'.$label{$_[0]});
- }
-
-sub main'set_label
- {
- if (!defined($label{$_[0]}))
- {
- $label{$_[0]}=$label;
- $label++;
- }
-# push(@out,".align $align\n") if ($_[1] != 0);
- push(@out,'$'."$label{$_[0]}:\n");
- }
-
-sub main'file_end
- {
- }
-
-sub main'data_word
- {
- push(@out,"\t.long $_[0]\n");
- }
-
-@pool_free=();
-@pool_taken=();
-$curr_num=0;
-$max=0;
-
-sub main'init_pool
- {
- local($args)=@_;
- local($i);
-
- @pool_free=();
- for ($i=(14+(6-$args)); $i >= 0; $i--)
- {
- push(@pool_free,"r$i");
- }
- print STDERR "START :register pool:@pool_free\n";
- $curr_num=$max=0;
- }
-
-sub main'fin_pool
- {
- printf STDERR "END %2d:register pool:@pool_free\n",$max;
- }
-
-sub main'GR
- {
- local($r)=@_;
- local($i,@n,$_);
-
- foreach (@pool_free)
- {
- if ($r ne $_)
- { push(@n,$_); }
- else
- {
- $curr_num++;
- $max=$curr_num if ($curr_num > $max);
- }
- }
- @pool_free=@n;
-print STDERR "GR:@pool_free\n" if $reg_alloc;
- return(@_);
- }
-
-sub main'NR
- {
- local($num)=@_;
- local(@ret);
-
- $num=1 if $num == 0;
- ($#pool_free >= ($num-1)) || croak "out of registers: want $num, have @pool_free";
- while ($num > 0)
- {
- push(@ret,pop @pool_free);
- $curr_num++;
- $max=$curr_num if ($curr_num > $max);
- $num--
- }
- print STDERR "nr @ret\n" if $n_debug;
-print STDERR "NR:@pool_free\n" if $reg_alloc;
- return(@ret);
-
- }
-
-sub main'FR
- {
- local(@r)=@_;
- local(@a,$v,$w);
-
- print STDERR "fr @r\n" if $n_debug;
-# cluck "fr @r";
- for $w (@pool_free)
- {
- foreach $v (@r)
- {
- croak "double register free of $v (@pool_free)" if $w eq $v;
- }
- }
- foreach $v (@r)
- {
- croak "bad argument to FR" if ($v !~ /^r\d+$/);
- if ($smear_regs)
- { unshift(@pool_free,$v); }
- else { push(@pool_free,$v); }
- $curr_num--;
- }
-print STDERR "FR:@pool_free\n" if $reg_alloc;
- }
-1;
+++ /dev/null
-#!/usr/local/bin/perl
-
-# x86 CodeWarrior assembler for NetWare
-
-# This file is a slightly modified version of x86nasm.pl. The Metrowerks
-# compiler for NetWare doesn't prefix symbols with an underscore.
-#
-
-$label="L000";
-
-%lb=( 'eax', 'al',
- 'ebx', 'bl',
- 'ecx', 'cl',
- 'edx', 'dl',
- 'ax', 'al',
- 'bx', 'bl',
- 'cx', 'cl',
- 'dx', 'dl',
- );
-
-%hb=( 'eax', 'ah',
- 'ebx', 'bh',
- 'ecx', 'ch',
- 'edx', 'dh',
- 'ax', 'ah',
- 'bx', 'bh',
- 'cx', 'ch',
- 'dx', 'dh',
- );
-
-sub main'asm_init_output
-{
- @out=();
- &comment("NetWare: assembly for CodeWarrior assembler (mwasmnlm)");
-}
-sub main'asm_get_output { return(@out); }
-sub main'get_labels { return(@labels); }
-
-sub main'external_label
-{
- push(@labels,@_);
- foreach (@_) {
- push(@out, ".extern\t$_\n");
- }
-}
-
-sub main'LB
- {
- (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n";
- return($lb{$_[0]});
- }
-
-sub main'HB
- {
- (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n";
- return($hb{$_[0]});
- }
-
-sub main'BP
- {
- &get_mem("BYTE",@_);
- }
-
-sub main'DWP
- {
- &get_mem("DWORD",@_);
- }
-
-sub main'BC
- {
- return "@_";
- }
-
-sub main'DWC
- {
- return "@_";
- }
-
-sub main'stack_push
- {
- my($num)=@_;
- $stack+=$num*4;
- &main'sub("esp",$num*4);
- }
-
-sub main'stack_pop
- {
- my($num)=@_;
- $stack-=$num*4;
- &main'add("esp",$num*4);
- }
-
-sub get_mem
- {
- my($size,$addr,$reg1,$reg2,$idx)=@_;
- my($t,$post);
- my($ret)="$size PTR [";
- $addr =~ s/^\s+//;
- if ($addr =~ /^(.+)\+(.+)$/)
- {
- $reg2=&conv($1);
- $addr="$2";
- }
- elsif ($addr =~ /^[_a-zA-Z]/)
- {
- $addr="$addr";
- }
-
- if ($addr =~ /^.+\-.+$/) { $addr="($addr)"; }
-
- $reg1="$regs{$reg1}" if defined($regs{$reg1});
- $reg2="$regs{$reg2}" if defined($regs{$reg2});
- if (($addr ne "") && ($addr ne 0))
- {
- if ($addr !~ /^-/)
- { $ret.="${addr}+"; }
- else { $post=$addr; }
- }
- if ($reg2 ne "")
- {
- $t="";
- $t="*$idx" if ($idx != 0);
- $reg1="+".$reg1 if ("$reg1$post" ne "");
- $ret.="$reg2$t$reg1$post]";
- }
- else
- {
- $ret.="$reg1$post]"
- }
- $ret =~ s/\+\]/]/; # in case $addr was the only argument
- return($ret);
- }
-
-sub main'mov { &out2("mov",@_); }
-sub main'movb { &out2("mov",@_); }
-sub main'and { &out2("and",@_); }
-sub main'or { &out2("or",@_); }
-sub main'shl { &out2("shl",@_); }
-sub main'shr { &out2("shr",@_); }
-sub main'xor { &out2("xor",@_); }
-sub main'xorb { &out2("xor",@_); }
-sub main'add { &out2("add",@_); }
-sub main'adc { &out2("adc",@_); }
-sub main'sub { &out2("sub",@_); }
-sub main'rotl { &out2("rol",@_); }
-sub main'rotr { &out2("ror",@_); }
-sub main'exch { &out2("xchg",@_); }
-sub main'cmp { &out2("cmp",@_); }
-sub main'lea { &out2("lea",@_); }
-sub main'mul { &out1("mul",@_); }
-sub main'div { &out1("div",@_); }
-sub main'dec { &out1("dec",@_); }
-sub main'inc { &out1("inc",@_); }
-sub main'jmp { &out1("jmp",@_); }
-sub main'jmp_ptr { &out1p("jmp",@_); }
-
-sub main'je { &out1("je ",@_); }
-sub main'jle { &out1("jle ",@_); }
-sub main'jz { &out1("jz ",@_); }
-sub main'jge { &out1("jge ",@_); }
-sub main'jl { &out1("jl ",@_); }
-sub main'ja { &out1("ja ",@_); }
-sub main'jae { &out1("jae ",@_); }
-sub main'jb { &out1("jb ",@_); }
-sub main'jbe { &out1("jbe ",@_); }
-sub main'jc { &out1("jc ",@_); }
-sub main'jnc { &out1("jnc ",@_); }
-sub main'jnz { &out1("jnz ",@_); }
-sub main'jne { &out1("jne ",@_); }
-sub main'jno { &out1("jno ",@_); }
-
-sub main'push { &out1("push",@_); $stack+=4; }
-sub main'pop { &out1("pop",@_); $stack-=4; }
-sub main'bswap { &out1("bswap",@_); &using486(); }
-sub main'not { &out1("not",@_); }
-sub main'call { &out1("call",$_[0]); }
-sub main'ret { &out0("ret"); }
-sub main'nop { &out0("nop"); }
-
-sub out2
- {
- my($name,$p1,$p2)=@_;
- my($l,$t);
-
- push(@out,"\t$name\t");
- $t=&conv($p1).",";
- $l=length($t);
- push(@out,$t);
- $l=4-($l+9)/8;
- push(@out,"\t" x $l);
- push(@out,&conv($p2));
- push(@out,"\n");
- }
-
-sub out0
- {
- my($name)=@_;
-
- push(@out,"\t$name\n");
- }
-
-sub out1
- {
- my($name,$p1)=@_;
- my($l,$t);
- push(@out,"\t$name\t".&conv($p1)."\n");
- }
-
-sub conv
- {
- my($p)=@_;
- $p =~ s/0x([0-9A-Fa-f]+)/0$1h/;
- return $p;
- }
-
-sub using486
- {
- return if $using486;
- $using486++;
- grep(s/\.386/\.486/,@out);
- }
-
-sub main'file
- {
- push(@out, ".section .text\n");
- }
-
-sub main'function_begin
- {
- my($func,$extra)=@_;
-
- push(@labels,$func);
- my($tmp)=<<"EOF";
-.global $func
-$func:
- push ebp
- push ebx
- push esi
- push edi
-EOF
- push(@out,$tmp);
- $stack=20;
- }
-
-sub main'function_begin_B
- {
- my($func,$extra)=@_;
- my($tmp)=<<"EOF";
-.global $func
-$func:
-EOF
- push(@out,$tmp);
- $stack=4;
- }
-
-sub main'function_end
- {
- my($func)=@_;
-
- my($tmp)=<<"EOF";
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-EOF
- push(@out,$tmp);
- $stack=0;
- %label=();
- }
-
-sub main'function_end_B
- {
- $stack=0;
- %label=();
- }
-
-sub main'function_end_A
- {
- my($func)=@_;
-
- my($tmp)=<<"EOF";
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-EOF
- push(@out,$tmp);
- }
-
-sub main'file_end
- {
- }
-
-sub main'wparam
- {
- my($num)=@_;
-
- return(&main'DWP($stack+$num*4,"esp","",0));
- }
-
-sub main'swtmp
- {
- return(&main'DWP($_[0]*4,"esp","",0));
- }
-
-# Should use swtmp, which is above esp. Linix can trash the stack above esp
-#sub main'wtmp
-# {
-# my($num)=@_;
-#
-# return(&main'DWP(-(($num+1)*4),"esp","",0));
-# }
-
-sub main'comment
- {
- foreach (@_)
- {
- push(@out,"\t; $_\n");
- }
- }
-
-sub main'label
- {
- if (!defined($label{$_[0]}))
- {
- $label{$_[0]}="${label}${_[0]}";
- $label++;
- }
- return($label{$_[0]});
- }
-
-sub main'set_label
- {
- if (!defined($label{$_[0]}))
- {
- $label{$_[0]}="${label}${_[0]}";
- $label++;
- }
- push(@out,"$label{$_[0]}:\n");
- }
-
-sub main'data_word
- {
- push(@out,"\t.long\t$_[0]\n");
- }
-
-sub out1p
- {
- my($name,$p1)=@_;
- my($l,$t);
-
- push(@out,"\t$name\t ".&conv($p1)."\n");
- }
-
-sub main'picmeup
- {
- local($dst,$sym)=@_;
- &main'lea($dst,&main'DWP($sym));
- }
-
-sub main'blindpop { &out1("pop",@_); }
+++ /dev/null
-#!/usr/local/bin/perl
-
-# x86 nasm assembler for NetWare
-
-# This file is a slightly modified version of x86nasm.pl. The Metrowerks
-# compiler for NetWare doesn't prefix symbols with an underscore.
-#
-
-$label="L000";
-
-%lb=( 'eax', 'al',
- 'ebx', 'bl',
- 'ecx', 'cl',
- 'edx', 'dl',
- 'ax', 'al',
- 'bx', 'bl',
- 'cx', 'cl',
- 'dx', 'dl',
- );
-
-%hb=( 'eax', 'ah',
- 'ebx', 'bh',
- 'ecx', 'ch',
- 'edx', 'dh',
- 'ax', 'ah',
- 'bx', 'bh',
- 'cx', 'ch',
- 'dx', 'dh',
- );
-
-sub main'asm_init_output
-{
- @out=();
- &comment("NetWare: assembly for NASM assembler (nasmw)");
-}
-sub main'asm_get_output { return(@out); }
-sub main'get_labels { return(@labels); }
-
-sub main'external_label
-{
- push(@labels,@_);
- foreach (@_) {
- push(@out, "extern\t$_\n");
- }
-}
-
-sub main'LB
- {
- (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n";
- return($lb{$_[0]});
- }
-
-sub main'HB
- {
- (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n";
- return($hb{$_[0]});
- }
-
-sub main'BP
- {
- &get_mem("BYTE",@_);
- }
-
-sub main'DWP
- {
- &get_mem("DWORD",@_);
- }
-
-sub main'BC
- {
- return "BYTE @_";
- }
-
-sub main'DWC
- {
- return "DWORD @_";
- }
-
-sub main'stack_push
- {
- my($num)=@_;
- $stack+=$num*4;
- &main'sub("esp",$num*4);
- }
-
-sub main'stack_pop
- {
- my($num)=@_;
- $stack-=$num*4;
- &main'add("esp",$num*4);
- }
-
-sub get_mem
- {
- my($size,$addr,$reg1,$reg2,$idx)=@_;
- my($t,$post);
- my($ret)="[";
- $addr =~ s/^\s+//;
- if ($addr =~ /^(.+)\+(.+)$/)
- {
- $reg2=&conv($1);
- $addr="$2";
- }
- elsif ($addr =~ /^[_a-zA-Z]/)
- {
- $addr="$addr";
- }
-
- if ($addr =~ /^.+\-.+$/) { $addr="($addr)"; }
-
- $reg1="$regs{$reg1}" if defined($regs{$reg1});
- $reg2="$regs{$reg2}" if defined($regs{$reg2});
- if (($addr ne "") && ($addr ne 0))
- {
- if ($addr !~ /^-/)
- { $ret.="${addr}+"; }
- else { $post=$addr; }
- }
- if ($reg2 ne "")
- {
- $t="";
- $t="*$idx" if ($idx != 0);
- $reg1="+".$reg1 if ("$reg1$post" ne "");
- $ret.="$reg2$t$reg1$post]";
- }
- else
- {
- $ret.="$reg1$post]"
- }
- $ret =~ s/\+\]/]/; # in case $addr was the only argument
- return($ret);
- }
-
-sub main'mov { &out2("mov",@_); }
-sub main'movb { &out2("mov",@_); }
-sub main'and { &out2("and",@_); }
-sub main'or { &out2("or",@_); }
-sub main'shl { &out2("shl",@_); }
-sub main'shr { &out2("shr",@_); }
-sub main'xor { &out2("xor",@_); }
-sub main'xorb { &out2("xor",@_); }
-sub main'add { &out2("add",@_); }
-sub main'adc { &out2("adc",@_); }
-sub main'sub { &out2("sub",@_); }
-sub main'rotl { &out2("rol",@_); }
-sub main'rotr { &out2("ror",@_); }
-sub main'exch { &out2("xchg",@_); }
-sub main'cmp { &out2("cmp",@_); }
-sub main'lea { &out2("lea",@_); }
-sub main'mul { &out1("mul",@_); }
-sub main'div { &out1("div",@_); }
-sub main'dec { &out1("dec",@_); }
-sub main'inc { &out1("inc",@_); }
-sub main'jmp { &out1("jmp",@_); }
-sub main'jmp_ptr { &out1p("jmp",@_); }
-
-# This is a bit of a kludge: declare all branches as NEAR.
-sub main'je { &out1("je NEAR",@_); }
-sub main'jle { &out1("jle NEAR",@_); }
-sub main'jz { &out1("jz NEAR",@_); }
-sub main'jge { &out1("jge NEAR",@_); }
-sub main'jl { &out1("jl NEAR",@_); }
-sub main'ja { &out1("ja NEAR",@_); }
-sub main'jae { &out1("jae NEAR",@_); }
-sub main'jb { &out1("jb NEAR",@_); }
-sub main'jbe { &out1("jbe NEAR",@_); }
-sub main'jc { &out1("jc NEAR",@_); }
-sub main'jnc { &out1("jnc NEAR",@_); }
-sub main'jnz { &out1("jnz NEAR",@_); }
-sub main'jne { &out1("jne NEAR",@_); }
-sub main'jno { &out1("jno NEAR",@_); }
-
-sub main'push { &out1("push",@_); $stack+=4; }
-sub main'pop { &out1("pop",@_); $stack-=4; }
-sub main'bswap { &out1("bswap",@_); &using486(); }
-sub main'not { &out1("not",@_); }
-sub main'call { &out1("call",$_[0]); }
-sub main'ret { &out0("ret"); }
-sub main'nop { &out0("nop"); }
-
-sub out2
- {
- my($name,$p1,$p2)=@_;
- my($l,$t);
-
- push(@out,"\t$name\t");
- $t=&conv($p1).",";
- $l=length($t);
- push(@out,$t);
- $l=4-($l+9)/8;
- push(@out,"\t" x $l);
- push(@out,&conv($p2));
- push(@out,"\n");
- }
-
-sub out0
- {
- my($name)=@_;
-
- push(@out,"\t$name\n");
- }
-
-sub out1
- {
- my($name,$p1)=@_;
- my($l,$t);
- push(@out,"\t$name\t".&conv($p1)."\n");
- }
-
-sub conv
- {
- my($p)=@_;
- $p =~ s/0x([0-9A-Fa-f]+)/0$1h/;
- return $p;
- }
-
-sub using486
- {
- return if $using486;
- $using486++;
- grep(s/\.386/\.486/,@out);
- }
-
-sub main'file
- {
- push(@out, "segment .text\n");
- }
-
-sub main'function_begin
- {
- my($func,$extra)=@_;
-
- push(@labels,$func);
- my($tmp)=<<"EOF";
-global $func
-$func:
- push ebp
- push ebx
- push esi
- push edi
-EOF
- push(@out,$tmp);
- $stack=20;
- }
-
-sub main'function_begin_B
- {
- my($func,$extra)=@_;
- my($tmp)=<<"EOF";
-global $func
-$func:
-EOF
- push(@out,$tmp);
- $stack=4;
- }
-
-sub main'function_end
- {
- my($func)=@_;
-
- my($tmp)=<<"EOF";
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-EOF
- push(@out,$tmp);
- $stack=0;
- %label=();
- }
-
-sub main'function_end_B
- {
- $stack=0;
- %label=();
- }
-
-sub main'function_end_A
- {
- my($func)=@_;
-
- my($tmp)=<<"EOF";
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-EOF
- push(@out,$tmp);
- }
-
-sub main'file_end
- {
- }
-
-sub main'wparam
- {
- my($num)=@_;
-
- return(&main'DWP($stack+$num*4,"esp","",0));
- }
-
-sub main'swtmp
- {
- return(&main'DWP($_[0]*4,"esp","",0));
- }
-
-# Should use swtmp, which is above esp. Linix can trash the stack above esp
-#sub main'wtmp
-# {
-# my($num)=@_;
-#
-# return(&main'DWP(-(($num+1)*4),"esp","",0));
-# }
-
-sub main'comment
- {
- foreach (@_)
- {
- push(@out,"\t; $_\n");
- }
- }
-
-sub main'label
- {
- if (!defined($label{$_[0]}))
- {
- $label{$_[0]}="\$${label}${_[0]}";
- $label++;
- }
- return($label{$_[0]});
- }
-
-sub main'set_label
- {
- if (!defined($label{$_[0]}))
- {
- $label{$_[0]}="${label}${_[0]}";
- $label++;
- }
- push(@out,"$label{$_[0]}:\n");
- }
-
-sub main'data_word
- {
- push(@out,"\tDD\t$_[0]\n");
- }
-
-sub out1p
- {
- my($name,$p1)=@_;
- my($l,$t);
-
- push(@out,"\t$name\t ".&conv($p1)."\n");
- }
-
-sub main'picmeup
- {
- local($dst,$sym)=@_;
- &main'lea($dst,&main'DWP($sym));
- }
-
-sub main'blindpop { &out1("pop",@_); }