1 .ident "sparcv8.s, Version 1.1"
2 .ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
12 * ====================================================================
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
21 * See bn_asm.sparc.v8plus.S for more details.
27 * 1.1 - new loop unrolling model(*)
28 * - 10% performance boost(*)
30 * (*) see bn_asm.sparc.v8plus.S for details
33 .section ".text",#alloc,#execinstr
38 .global bn_mul_add_words
40 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
47 bg,a .L_bn_mul_add_words_proceed
52 .L_bn_mul_add_words_proceed:
54 bz .L_bn_mul_add_words_tail
63 ba .L_bn_mul_add_words_warm_loop
66 .L_bn_mul_add_words_loop:
78 .L_bn_mul_add_words_warm_loop:
111 bnz,a .L_bn_mul_add_words_loop
115 bnz,a .L_bn_mul_add_words_tail
117 .L_bn_mul_add_words_return:
122 .L_bn_mul_add_words_tail:
131 bz .L_bn_mul_add_words_return
144 bz .L_bn_mul_add_words_return
158 .type bn_mul_add_words,#function
159 .size bn_mul_add_words,(.-bn_mul_add_words)
165 * BN_ULONG bn_mul_words(rp,ap,num,w)
172 bg,a .L_bn_mul_words_proceeed
177 .L_bn_mul_words_proceeed:
179 bz .L_bn_mul_words_tail
182 .L_bn_mul_words_loop:
214 bnz,a .L_bn_mul_words_loop
218 bnz,a .L_bn_mul_words_tail
220 .L_bn_mul_words_return:
225 .L_bn_mul_words_tail:
231 bz .L_bn_mul_words_return
241 bz .L_bn_mul_words_return
252 .type bn_mul_words,#function
253 .size bn_mul_words,(.-bn_mul_words)
258 * void bn_sqr_words(r,a,n)
264 bg,a .L_bn_sqr_words_proceeed
269 .L_bn_sqr_words_proceeed:
271 bz .L_bn_sqr_words_tail
274 .L_bn_sqr_words_loop:
302 bnz,a .L_bn_sqr_words_loop
307 bnz,a .L_bn_sqr_words_tail
309 .L_bn_sqr_words_return:
313 .L_bn_sqr_words_tail:
318 bz .L_bn_sqr_words_return
327 bz .L_bn_sqr_words_return
338 .type bn_sqr_words,#function
339 .size bn_sqr_words,(.-bn_sqr_words)
345 * BN_ULONG bn_div_words(h,l,d)
354 .type bn_div_words,#function
355 .size bn_div_words,(.-bn_div_words)
361 * BN_ULONG bn_add_words(rp,ap,bp,n)
362 * BN_ULONG *rp,*ap,*bp;
367 bg,a .L_bn_add_words_proceed
372 .L_bn_add_words_proceed:
374 bz .L_bn_add_words_tail
381 ba .L_bn_add_words_warm_loop
385 .L_bn_add_words_loop:
393 .L_bn_add_words_warm_loop:
412 bnz,a .L_bn_add_words_loop
417 bnz,a .L_bn_add_words_tail
419 .L_bn_add_words_return:
423 .L_bn_add_words_tail:
429 bz .L_bn_add_words_return
439 bz .L_bn_add_words_return
450 .type bn_add_words,#function
451 .size bn_add_words,(.-bn_add_words)
457 * BN_ULONG bn_sub_words(rp,ap,bp,n)
458 * BN_ULONG *rp,*ap,*bp;
463 bg,a .L_bn_sub_words_proceed
468 .L_bn_sub_words_proceed:
470 bz .L_bn_sub_words_tail
477 ba .L_bn_sub_words_warm_loop
481 .L_bn_sub_words_loop:
489 .L_bn_sub_words_warm_loop:
508 bnz,a .L_bn_sub_words_loop
513 bnz,a .L_bn_sub_words_tail
515 .L_bn_sub_words_return:
519 .L_bn_sub_words_tail:
525 bz .L_bn_sub_words_return
535 bz .L_bn_sub_words_return
546 .type bn_sub_words,#function
547 .size bn_sub_words,(.-bn_sub_words)
549 #define FRAME_SIZE -96
552 * Here is register usage map for *all* routines below.
561 #define a_3_ [%i1+12]
563 #define a_4_ [%i1+16]
565 #define a_5_ [%i1+20]
567 #define a_6_ [%i1+24]
569 #define a_7_ [%i1+28]
577 #define b_3_ [%i2+12]
579 #define b_4_ [%i2+16]
581 #define b_5_ [%i2+20]
583 #define b_6_ [%i2+24]
585 #define b_7_ [%i2+28]
593 .global bn_mul_comba8
595 * void bn_mul_comba8(r,a,b)
599 save %sp,FRAME_SIZE,%sp
602 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
605 st c_1,[%i0] !r[0]=c1;
607 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
611 addxcc %g0,t_2,c_3 !=
614 umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1);
618 st c_2,[%i0+4] !r[1]=c2;
621 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
624 addxcc c_1,t_2,c_1 !=
627 umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
633 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
636 addxcc c_1,t_2,c_1 !=
638 st c_3,[%i0+8] !r[2]=c3;
640 umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
645 umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
651 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
657 umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
662 st c_1,[%i0+12] !r[3]=c1;
664 umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
669 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
674 umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
680 umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
686 umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1);
691 st c_2,[%i0+16] !r[4]=c2;
693 umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
698 umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
703 umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2);
708 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
711 addxcc c_1,t_2,c_1 !=
714 umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
720 umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2);
723 addxcc c_1,t_2,c_1 !=
725 st c_3,[%i0+20] !r[5]=c3;
727 umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
732 umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
737 umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3);
740 addxcc c_2,t_2,c_2 !=
742 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
747 umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3);
753 umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
756 addxcc c_2,t_2,c_2 !=
759 umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
763 st c_1,[%i0+24] !r[6]=c1;
766 umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
769 addxcc c_3,t_2,c_3 !=
771 umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
776 umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
781 umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1);
786 umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
789 addxcc c_3,t_2,c_3 !=
791 umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
797 umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
802 umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1);
805 addxcc c_3,t_2,c_3 !=
807 st c_2,[%i0+28] !r[7]=c2;
809 umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2);
814 umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2);
819 umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
822 addxcc c_1,t_2,c_1 !=
824 umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
829 umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
834 umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2);
839 umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
844 st c_3,[%i0+32] !r[8]=c3;
846 umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3);
851 umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3);
856 umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
859 addxcc c_2,t_2,c_2 !=
861 umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
866 umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
871 umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3);
876 st c_1,[%i0+36] !r[9]=c1;
878 umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
883 umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
888 umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1);
893 umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
896 addxcc c_3,t_2,c_3 !=
898 umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
903 st c_2,[%i0+40] !r[10]=c2;
905 umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2);
910 umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
913 addxcc c_1,t_2,c_1 !=
915 umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
920 umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
924 st c_3,[%i0+44] !r[11]=c3;
927 umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
930 addxcc c_2,t_2,c_2 !=
932 umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
937 umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
941 st c_1,[%i0+48] !r[12]=c1;
944 umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
947 addxcc c_3,t_2,c_3 !=
949 umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
954 st c_2,[%i0+52] !r[13]=c2;
956 umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2);
961 st c_3,[%i0+56] !r[14]=c3;
962 st c_1,[%i0+60] !r[15]=c1;
967 .type bn_mul_comba8,#function
968 .size bn_mul_comba8,(.-bn_mul_comba8)
972 .global bn_mul_comba4
974 * void bn_mul_comba4(r,a,b)
978 save %sp,FRAME_SIZE,%sp
981 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
984 st c_1,[%i0] !r[0]=c1;
986 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
993 umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
998 st c_2,[%i0+4] !r[1]=c2;
1000 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
1006 umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2);
1012 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
1017 st c_3,[%i0+8] !r[2]=c3;
1019 umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3);
1024 umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
1027 addxcc c_2,t_2,c_2 !=
1030 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
1031 addcc c_1,t_1,c_1 !=
1035 umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);
1040 st c_1,[%i0+12] !r[3]=c1;
1042 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1047 umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1048 addcc c_2,t_1,c_2 !=
1052 umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1);
1057 st c_2,[%i0+16] !r[4]=c2;
1059 umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1064 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1065 addcc c_3,t_1,c_3 !=
1068 st c_3,[%i0+20] !r[5]=c3;
1071 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1074 addxcc c_2,t_2,c_2 !=
1075 st c_1,[%i0+24] !r[6]=c1;
1076 st c_2,[%i0+28] !r[7]=c2;
1081 .type bn_mul_comba4,#function
1082 .size bn_mul_comba4,(.-bn_mul_comba4)
1086 .global bn_sqr_comba8
1088 save %sp,FRAME_SIZE,%sp
1091 umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3);
1093 st c_1,[%i0] !r[0]=c1;
1096 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1103 st c_2,[%i0+4] !r[1]=c2;
1106 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1109 addxcc c_1,t_2,c_1 !=
1115 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1120 st c_3,[%i0+8] !r[2]=c3;
1122 umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3);
1131 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1134 addxcc c_2,t_2,c_2 !=
1139 st c_1,[%i0+12] !r[3]=c1;
1141 umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1147 addxcc c_3,t_2,c_3 !=
1149 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1155 addxcc c_3,t_2,c_3 !=
1158 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1159 addcc c_2,t_1,c_2 !=
1162 st c_2,[%i0+16] !r[4]=c2;
1165 umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1168 addxcc c_1,t_2,c_1 !=
1173 umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1176 addxcc c_1,t_2,c_1 !=
1182 umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1188 addxcc c_1,t_2,c_1 !=
1190 st c_3,[%i0+20] !r[5]=c3;
1192 umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1193 addcc c_1,t_1,c_1 !=
1197 addcc c_1,t_1,c_1 !=
1200 umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1201 addcc c_1,t_1,c_1 !=
1205 addcc c_1,t_1,c_1 !=
1208 umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1209 addcc c_1,t_1,c_1 !=
1213 addcc c_1,t_1,c_1 !=
1217 umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1222 st c_1,[%i0+24] !r[6]=c1;
1224 umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1230 addxcc c_3,t_2,c_3 !=
1232 umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1238 addxcc c_3,t_2,c_3 !=
1240 umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1246 addxcc c_3,t_2,c_3 !=
1248 umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1254 addxcc c_3,t_2,c_3 !=
1256 st c_2,[%i0+28] !r[7]=c2;
1258 umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1259 addcc c_3,t_1,c_3 !=
1263 addcc c_3,t_1,c_3 !=
1266 umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1267 addcc c_3,t_1,c_3 !=
1271 addcc c_3,t_1,c_3 !=
1274 umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1275 addcc c_3,t_1,c_3 !=
1279 addcc c_3,t_1,c_3 !=
1282 umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1283 addcc c_3,t_1,c_3 !=
1286 st c_3,[%i0+32] !r[8]=c3;
1289 umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1292 addxcc c_2,t_2,c_2 !=
1297 umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1300 addxcc c_2,t_2,c_2 !=
1305 umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1308 addxcc c_2,t_2,c_2 !=
1313 st c_1,[%i0+36] !r[9]=c1;
1315 umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1321 addxcc c_3,t_2,c_3 !=
1323 umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1329 addxcc c_3,t_2,c_3 !=
1331 umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1336 st c_2,[%i0+40] !r[10]=c2;
1338 umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2);
1346 umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2);
1353 st c_3,[%i0+44] !r[11]=c3;
1356 umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1359 addxcc c_2,t_2,c_2 !=
1364 umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1367 addxcc c_2,t_2,c_2 !=
1369 st c_1,[%i0+48] !r[12]=c1;
1371 umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1372 addcc c_2,t_1,c_2 !=
1376 addcc c_2,t_1,c_2 !=
1379 st c_2,[%i0+52] !r[13]=c2;
1382 umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1385 addxcc c_1,t_2,c_1 !=
1386 st c_3,[%i0+56] !r[14]=c3;
1387 st c_1,[%i0+60] !r[15]=c1;
1392 .type bn_sqr_comba8,#function
1393 .size bn_sqr_comba8,(.-bn_sqr_comba8)
1397 .global bn_sqr_comba4
1399 * void bn_sqr_comba4(r,a)
1403 save %sp,FRAME_SIZE,%sp
1405 umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3);
1408 st c_1,[%i0] !r[0]=c1;
1411 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1420 st c_2,[%i0+4] !r[1]=c2;
1422 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1428 addxcc c_1,t_2,c_1 !=
1431 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1432 addcc c_3,t_1,c_3 !=
1435 st c_3,[%i0+8] !r[2]=c3;
1438 umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1441 addxcc c_2,t_2,c_2 !=
1446 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1449 addxcc c_2,t_2,c_2 !=
1454 st c_1,[%i0+12] !r[3]=c1;
1456 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1462 addxcc c_3,t_2,c_3 !=
1464 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1469 st c_2,[%i0+16] !r[4]=c2;
1471 umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2);
1478 st c_3,[%i0+20] !r[5]=c3;
1481 umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1484 addxcc c_2,t_2,c_2 !=
1485 st c_1,[%i0+24] !r[6]=c1;
1486 st c_2,[%i0+28] !r[7]=c2;
1491 .type bn_sqr_comba4,#function
1492 .size bn_sqr_comba4,(.-bn_sqr_comba4)