1 .ident "sparcv8.s, Version 1.3"
2 .ident "SPARC v8 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
5 * ====================================================================
6 * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
9 * Rights for redistribution and usage in source and binary forms are
10 * granted according to the OpenSSL license. Warranty of any kind is
12 * ====================================================================
16 * This is my modest contributon to OpenSSL project (see
17 * http://www.openssl.org/ for more information about it) and is
18 * a drop-in SuperSPARC ISA replacement for crypto/bn/bn_asm.c
19 * module. For updates see http://fy.chalmers.se/~appro/hpe/.
21 * See bn_asm.sparc.v8plus.S for more details.
27 * 1.1 - new loop unrolling model(*);
28 * 1.2 - made gas friendly;
29 * 1.3 - fixed problem with /usr/ccs/lib/cpp;
31 * (*) see bn_asm.sparc.v8plus.S for details
34 .section ".text",#alloc,#execinstr
35 .file "bn_asm.sparc.v8.S"
39 .global bn_mul_add_words
41 * BN_ULONG bn_mul_add_words(rp,ap,num,w)
48 bg,a .L_bn_mul_add_words_proceed
53 .L_bn_mul_add_words_proceed:
55 bz .L_bn_mul_add_words_tail
64 ba .L_bn_mul_add_words_warm_loop
67 .L_bn_mul_add_words_loop:
79 .L_bn_mul_add_words_warm_loop:
112 bnz,a .L_bn_mul_add_words_loop
116 bnz,a .L_bn_mul_add_words_tail
118 .L_bn_mul_add_words_return:
123 .L_bn_mul_add_words_tail:
132 bz .L_bn_mul_add_words_return
145 bz .L_bn_mul_add_words_return
159 .type bn_mul_add_words,#function
160 .size bn_mul_add_words,(.-bn_mul_add_words)
166 * BN_ULONG bn_mul_words(rp,ap,num,w)
173 bg,a .L_bn_mul_words_proceeed
178 .L_bn_mul_words_proceeed:
180 bz .L_bn_mul_words_tail
183 .L_bn_mul_words_loop:
215 bnz,a .L_bn_mul_words_loop
219 bnz,a .L_bn_mul_words_tail
221 .L_bn_mul_words_return:
226 .L_bn_mul_words_tail:
232 bz .L_bn_mul_words_return
242 bz .L_bn_mul_words_return
253 .type bn_mul_words,#function
254 .size bn_mul_words,(.-bn_mul_words)
259 * void bn_sqr_words(r,a,n)
265 bg,a .L_bn_sqr_words_proceeed
270 .L_bn_sqr_words_proceeed:
272 bz .L_bn_sqr_words_tail
275 .L_bn_sqr_words_loop:
303 bnz,a .L_bn_sqr_words_loop
308 bnz,a .L_bn_sqr_words_tail
310 .L_bn_sqr_words_return:
314 .L_bn_sqr_words_tail:
319 bz .L_bn_sqr_words_return
328 bz .L_bn_sqr_words_return
339 .type bn_sqr_words,#function
340 .size bn_sqr_words,(.-bn_sqr_words)
346 * BN_ULONG bn_div_words(h,l,d)
355 .type bn_div_words,#function
356 .size bn_div_words,(.-bn_div_words)
362 * BN_ULONG bn_add_words(rp,ap,bp,n)
363 * BN_ULONG *rp,*ap,*bp;
368 bg,a .L_bn_add_words_proceed
373 .L_bn_add_words_proceed:
375 bz .L_bn_add_words_tail
382 ba .L_bn_add_words_warm_loop
386 .L_bn_add_words_loop:
394 .L_bn_add_words_warm_loop:
413 bnz,a .L_bn_add_words_loop
418 bnz,a .L_bn_add_words_tail
420 .L_bn_add_words_return:
424 .L_bn_add_words_tail:
430 bz .L_bn_add_words_return
440 bz .L_bn_add_words_return
451 .type bn_add_words,#function
452 .size bn_add_words,(.-bn_add_words)
458 * BN_ULONG bn_sub_words(rp,ap,bp,n)
459 * BN_ULONG *rp,*ap,*bp;
464 bg,a .L_bn_sub_words_proceed
469 .L_bn_sub_words_proceed:
471 bz .L_bn_sub_words_tail
478 ba .L_bn_sub_words_warm_loop
482 .L_bn_sub_words_loop:
490 .L_bn_sub_words_warm_loop:
509 bnz,a .L_bn_sub_words_loop
514 bnz,a .L_bn_sub_words_tail
516 .L_bn_sub_words_return:
520 .L_bn_sub_words_tail:
526 bz .L_bn_sub_words_return
536 bz .L_bn_sub_words_return
547 .type bn_sub_words,#function
548 .size bn_sub_words,(.-bn_sub_words)
550 #define FRAME_SIZE -96
553 * Here is register usage map for *all* routines below.
561 #define ap(I) [%i1+4*I]
562 #define bp(I) [%i2+4*I]
563 #define rp(I) [%i0+4*I]
584 .global bn_mul_comba8
586 * void bn_mul_comba8(r,a,b)
590 save %sp,FRAME_SIZE,%sp
593 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
596 st c_1,rp(0) !r[0]=c1;
598 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
602 addxcc %g0,t_2,c_3 !=
605 umul a_1,b_0,t_1 !mul_add_c(a[1],b[0],c2,c3,c1);
609 st c_2,rp(1) !r[1]=c2;
612 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
615 addxcc c_1,t_2,c_1 !=
618 umul a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2);
624 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
627 addxcc c_1,t_2,c_1 !=
629 st c_3,rp(2) !r[2]=c3;
631 umul a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3);
636 umul a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3);
642 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
648 umul a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!=
653 st c_1,rp(3) !r[3]=c1;
655 umul a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1);
660 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
665 umul a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1);
671 umul a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1);
677 umul a_0,b_4,t_1 !=!mul_add_c(a[0],b[4],c2,c3,c1);
682 st c_2,rp(4) !r[4]=c2;
684 umul a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2);
689 umul a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2);
694 umul a_2,b_3,t_1 !=!mul_add_c(a[2],b[3],c3,c1,c2);
699 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
702 addxcc c_1,t_2,c_1 !=
705 umul a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2);
711 umul a_5,b_0,t_1 !mul_add_c(a[5],b[0],c3,c1,c2);
714 addxcc c_1,t_2,c_1 !=
716 st c_3,rp(5) !r[5]=c3;
718 umul a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3);
723 umul a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3);
728 umul a_4,b_2,t_1 !mul_add_c(a[4],b[2],c1,c2,c3);
731 addxcc c_2,t_2,c_2 !=
733 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
738 umul a_2,b_4,t_1 !mul_add_c(a[2],b[4],c1,c2,c3);
744 umul a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3);
747 addxcc c_2,t_2,c_2 !=
750 umul a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3);
754 st c_1,rp(6) !r[6]=c1;
757 umul a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1);
760 addxcc c_3,t_2,c_3 !=
762 umul a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1);
767 umul a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1);
772 umul a_3,b_4,t_1 !=!mul_add_c(a[3],b[4],c2,c3,c1);
777 umul a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1);
780 addxcc c_3,t_2,c_3 !=
782 umul a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1);
788 umul a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1);
793 umul a_7,b_0,t_1 !mul_add_c(a[7],b[0],c2,c3,c1);
796 addxcc c_3,t_2,c_3 !=
798 st c_2,rp(7) !r[7]=c2;
800 umul a_7,b_1,t_1 !mul_add_c(a[7],b[1],c3,c1,c2);
805 umul a_6,b_2,t_1 !=!mul_add_c(a[6],b[2],c3,c1,c2);
810 umul a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2);
813 addxcc c_1,t_2,c_1 !=
815 umul a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2);
820 umul a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2);
825 umul a_2,b_6,t_1 !=!mul_add_c(a[2],b[6],c3,c1,c2);
830 umul a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2);
835 st c_3,rp(8) !r[8]=c3;
837 umul a_2,b_7,t_1 !mul_add_c(a[2],b[7],c1,c2,c3);
842 umul a_3,b_6,t_1 !=!mul_add_c(a[3],b[6],c1,c2,c3);
847 umul a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3);
850 addxcc c_2,t_2,c_2 !=
852 umul a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3);
857 umul a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3);
862 umul a_7,b_2,t_1 !=!mul_add_c(a[7],b[2],c1,c2,c3);
867 st c_1,rp(9) !r[9]=c1;
869 umul a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1);
874 umul a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1);
879 umul a_5,b_5,t_1 !=!mul_add_c(a[5],b[5],c2,c3,c1);
884 umul a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1);
887 addxcc c_3,t_2,c_3 !=
889 umul a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1);
894 st c_2,rp(10) !r[10]=c2;
896 umul a_4,b_7,t_1 !=!mul_add_c(a[4],b[7],c3,c1,c2);
901 umul a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2);
904 addxcc c_1,t_2,c_1 !=
906 umul a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2);
911 umul a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2);
915 st c_3,rp(11) !r[11]=c3;
918 umul a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3);
921 addxcc c_2,t_2,c_2 !=
923 umul a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3);
928 umul a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3);
932 st c_1,rp(12) !r[12]=c1;
935 umul a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1);
938 addxcc c_3,t_2,c_3 !=
940 umul a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1);
945 st c_2,rp(13) !r[13]=c2;
947 umul a_7,b_7,t_1 !=!mul_add_c(a[7],b[7],c3,c1,c2);
952 st c_3,rp(14) !r[14]=c3;
953 st c_1,rp(15) !r[15]=c1;
958 .type bn_mul_comba8,#function
959 .size bn_mul_comba8,(.-bn_mul_comba8)
963 .global bn_mul_comba4
965 * void bn_mul_comba4(r,a,b)
969 save %sp,FRAME_SIZE,%sp
972 umul a_0,b_0,c_1 !=!mul_add_c(a[0],b[0],c1,c2,c3);
975 st c_1,rp(0) !r[0]=c1;
977 umul a_0,b_1,t_1 !=!mul_add_c(a[0],b[1],c2,c3,c1);
984 umul a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1);
989 st c_2,rp(1) !r[1]=c2;
991 umul a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2);
997 umul a_1,b_1,t_1 !=!mul_add_c(a[1],b[1],c3,c1,c2);
1003 umul a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2);
1008 st c_3,rp(2) !r[2]=c3;
1010 umul a_0,b_3,t_1 !=!mul_add_c(a[0],b[3],c1,c2,c3);
1015 umul a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3);
1018 addxcc c_2,t_2,c_2 !=
1021 umul a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3);
1022 addcc c_1,t_1,c_1 !=
1026 umul a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);
1031 st c_1,rp(3) !r[3]=c1;
1033 umul a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1);
1038 umul a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1);
1039 addcc c_2,t_1,c_2 !=
1043 umul a_1,b_3,t_1 !=!mul_add_c(a[1],b[3],c2,c3,c1);
1048 st c_2,rp(4) !r[4]=c2;
1050 umul a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2);
1055 umul a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2);
1056 addcc c_3,t_1,c_3 !=
1059 st c_3,rp(5) !r[5]=c3;
1062 umul a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3);
1065 addxcc c_2,t_2,c_2 !=
1066 st c_1,rp(6) !r[6]=c1;
1067 st c_2,rp(7) !r[7]=c2;
1072 .type bn_mul_comba4,#function
1073 .size bn_mul_comba4,(.-bn_mul_comba4)
1077 .global bn_sqr_comba8
1079 save %sp,FRAME_SIZE,%sp
1082 umul a_0,a_0,c_1 !=!sqr_add_c(a,0,c1,c2,c3);
1084 st c_1,rp(0) !r[0]=c1;
1087 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1094 st c_2,rp(1) !r[1]=c2;
1097 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1100 addxcc c_1,t_2,c_1 !=
1106 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1111 st c_3,rp(2) !r[2]=c3;
1113 umul a_0,a_3,t_1 !=!sqr_add_c2(a,3,0,c1,c2,c3);
1122 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1125 addxcc c_2,t_2,c_2 !=
1130 st c_1,rp(3) !r[3]=c1;
1132 umul a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1);
1138 addxcc c_3,t_2,c_3 !=
1140 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1146 addxcc c_3,t_2,c_3 !=
1149 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1150 addcc c_2,t_1,c_2 !=
1153 st c_2,rp(4) !r[4]=c2;
1156 umul a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2);
1159 addxcc c_1,t_2,c_1 !=
1164 umul a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2);
1167 addxcc c_1,t_2,c_1 !=
1173 umul a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2);
1179 addxcc c_1,t_2,c_1 !=
1181 st c_3,rp(5) !r[5]=c3;
1183 umul a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3);
1184 addcc c_1,t_1,c_1 !=
1188 addcc c_1,t_1,c_1 !=
1191 umul a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3);
1192 addcc c_1,t_1,c_1 !=
1196 addcc c_1,t_1,c_1 !=
1199 umul a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3);
1200 addcc c_1,t_1,c_1 !=
1204 addcc c_1,t_1,c_1 !=
1208 umul a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3);
1213 st c_1,rp(6) !r[6]=c1;
1215 umul a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1);
1221 addxcc c_3,t_2,c_3 !=
1223 umul a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1);
1229 addxcc c_3,t_2,c_3 !=
1231 umul a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1);
1237 addxcc c_3,t_2,c_3 !=
1239 umul a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1);
1245 addxcc c_3,t_2,c_3 !=
1247 st c_2,rp(7) !r[7]=c2;
1249 umul a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2);
1250 addcc c_3,t_1,c_3 !=
1254 addcc c_3,t_1,c_3 !=
1257 umul a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2);
1258 addcc c_3,t_1,c_3 !=
1262 addcc c_3,t_1,c_3 !=
1265 umul a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2);
1266 addcc c_3,t_1,c_3 !=
1270 addcc c_3,t_1,c_3 !=
1273 umul a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2);
1274 addcc c_3,t_1,c_3 !=
1277 st c_3,rp(8) !r[8]=c3;
1280 umul a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3);
1283 addxcc c_2,t_2,c_2 !=
1288 umul a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3);
1291 addxcc c_2,t_2,c_2 !=
1296 umul a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3);
1299 addxcc c_2,t_2,c_2 !=
1304 st c_1,rp(9) !r[9]=c1;
1306 umul a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1);
1312 addxcc c_3,t_2,c_3 !=
1314 umul a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1);
1320 addxcc c_3,t_2,c_3 !=
1322 umul a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1);
1327 st c_2,rp(10) !r[10]=c2;
1329 umul a_4,a_7,t_1 !=!sqr_add_c2(a,7,4,c3,c1,c2);
1337 umul a_5,a_6,t_1 !=!sqr_add_c2(a,6,5,c3,c1,c2);
1344 st c_3,rp(11) !r[11]=c3;
1347 umul a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3);
1350 addxcc c_2,t_2,c_2 !=
1355 umul a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3);
1358 addxcc c_2,t_2,c_2 !=
1360 st c_1,rp(12) !r[12]=c1;
1362 umul a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1);
1363 addcc c_2,t_1,c_2 !=
1367 addcc c_2,t_1,c_2 !=
1370 st c_2,rp(13) !r[13]=c2;
1373 umul a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2);
1376 addxcc c_1,t_2,c_1 !=
1377 st c_3,rp(14) !r[14]=c3;
1378 st c_1,rp(15) !r[15]=c1;
1383 .type bn_sqr_comba8,#function
1384 .size bn_sqr_comba8,(.-bn_sqr_comba8)
1388 .global bn_sqr_comba4
1390 * void bn_sqr_comba4(r,a)
1394 save %sp,FRAME_SIZE,%sp
1396 umul a_0,a_0,c_1 !sqr_add_c(a,0,c1,c2,c3);
1399 st c_1,rp(0) !r[0]=c1;
1402 umul a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1);
1411 st c_2,rp(1) !r[1]=c2;
1413 umul a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2);
1419 addxcc c_1,t_2,c_1 !=
1422 umul a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2);
1423 addcc c_3,t_1,c_3 !=
1426 st c_3,rp(2) !r[2]=c3;
1429 umul a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3);
1432 addxcc c_2,t_2,c_2 !=
1437 umul a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3);
1440 addxcc c_2,t_2,c_2 !=
1445 st c_1,rp(3) !r[3]=c1;
1447 umul a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1);
1453 addxcc c_3,t_2,c_3 !=
1455 umul a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1);
1460 st c_2,rp(4) !r[4]=c2;
1462 umul a_2,a_3,t_1 !=!sqr_add_c2(a,3,2,c3,c1,c2);
1469 st c_3,rp(5) !r[5]=c3;
1472 umul a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3);
1475 addxcc c_2,t_2,c_2 !=
1476 st c_1,rp(6) !r[6]=c1;
1477 st c_2,rp(7) !r[7]=c2;
1482 .type bn_sqr_comba4,#function
1483 .size bn_sqr_comba4,(.-bn_sqr_comba4)