2 # Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. Rights for redistribution and usage in source and binary
13 # forms are granted according to the OpenSSL license.
14 # ====================================================================
18 # The major reason for undertaken effort was to mitigate the hazard of
19 # cache-timing attack. This is [currently and initially!] addressed in
20 # two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
21 # 2. References to them are scheduled for L2 cache latency, meaning
22 # that the tables don't have to reside in L1 cache. Once again, this
23 # is an initial draft and one should expect more countermeasures to
26 # Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
29 # Even though performance was not the primary goal [on the contrary,
30 # extra shifts "induced" by compressed S-box and longer loop epilogue
31 # "induced" by scheduling for L2 have negative effect on performance],
32 # the code turned out to run in ~23 cycles per processed byte en-/
33 # decrypted with 128-bit key. This is pretty good result for code
34 # with mentioned qualities and UltraSPARC core. Compared to Sun C
35 # generated code my encrypt procedure runs just few percents faster,
36 # while decrypt one - whole 50% faster [yes, Sun C failed to generate
37 # optimal decrypt procedure]. Compared to GNU C generated code both
38 # procedures are more than 60% faster:-)
41 open STDOUT,">$output";
78 $rounds="%i7"; # aliases with return address, which is off-loaded to stack
82 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
86 #include "sparc_arch.h"
89 .register %g2,#scratch
90 .register %g3,#scratch
92 .section ".text",#alloc,#execinstr
98 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
99 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
100 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
101 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
102 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
103 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
104 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
105 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
106 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
107 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
108 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
109 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
110 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
111 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
112 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
113 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
114 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
115 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
116 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
117 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
118 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
119 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
120 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
121 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
122 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
123 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
124 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
125 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
126 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
127 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
128 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
129 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
130 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
131 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
132 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
133 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
134 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
135 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
136 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
137 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
138 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
139 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
140 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
141 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
142 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
143 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
144 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
145 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
146 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
147 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
148 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
149 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
150 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
151 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
152 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
153 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
154 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
155 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
156 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
157 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
158 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
159 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
160 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
161 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
163 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
164 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
165 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
166 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
167 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
168 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
169 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
170 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
171 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
172 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
173 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
174 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
175 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
176 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
177 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
178 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
179 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
180 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
181 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
182 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
183 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
184 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
185 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
186 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
187 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
188 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
189 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
190 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
191 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
192 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
193 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
194 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
196 .size AES_Te,(.-AES_Te)
200 _sparcv9_AES_encrypt:
201 save %sp,-$frame-$locals,%sp
202 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
203 ld [$key+240],$rounds
207 srl $rounds,1,$rounds
224 ldx [$tbl+$acc0],$acc0
227 ldx [$tbl+$acc1],$acc1
230 ldx [$tbl+$acc2],$acc2 !
233 ldx [$tbl+$acc3],$acc3
236 ldx [$tbl+$acc4],$acc4
240 ldx [$tbl+$acc5],$acc5
243 ldx [$tbl+$acc6],$acc6
246 ldx [$tbl+$acc7],$acc7 !
249 ldx [$tbl+$acc8],$acc8
251 and $acc10,2040,$acc10
252 ldx [$tbl+$acc9],$acc9
255 and $acc11,2040,$acc11
256 ldx [$tbl+$acc10],$acc10
258 and $acc12,2040,$acc12
259 ldx [$tbl+$acc11],$acc11
261 and $acc13,2040,$acc13
262 ldx [$tbl+$acc12],$acc12 !
264 and $acc14,2040,$acc14
265 ldx [$tbl+$acc13],$acc13
266 and $acc15,2040,$acc15
268 ldx [$tbl+$acc14],$acc14
270 subcc $rounds,1,$rounds !
271 ldx [$tbl+$acc15],$acc15
272 bz,a,pn %icc,.Lenc_last
273 add $tbl,2048,$rounds
279 srlx $acc2,16,$acc2 !
295 srlx $acc10,16,$acc10 !
297 srlx $acc11,24,$acc11
301 srlx $acc14,16,$acc14
303 srlx $acc15,24,$acc15 !
305 xor $acc12,$acc14,$acc14
312 and $acc0,2040,$acc0 !
315 ldx [$tbl+$acc0],$acc0
318 ldx [$tbl+$acc1],$acc1
322 ldx [$tbl+$acc2],$acc2
325 ldx [$tbl+$acc3],$acc3
328 ldx [$tbl+$acc4],$acc4 !
331 ldx [$tbl+$acc5],$acc5
334 ldx [$tbl+$acc6],$acc6
338 ldx [$tbl+$acc7],$acc7
341 ldx [$tbl+$acc8],$acc8
343 and $acc10,2040,$acc10
344 ldx [$tbl+$acc9],$acc9 !
346 and $acc11,2040,$acc11
347 ldx [$tbl+$acc10],$acc10
349 and $acc12,2040,$acc12
350 ldx [$tbl+$acc11],$acc11
353 and $acc13,2040,$acc13
354 ldx [$tbl+$acc12],$acc12
356 and $acc14,2040,$acc14
357 ldx [$tbl+$acc13],$acc13
359 and $acc15,2040,$acc15
360 ldx [$tbl+$acc14],$acc14 !
364 ldx [$tbl+$acc15],$acc15
380 ldx [$tbl+2048+0],%g0 ! prefetch te4
381 srlx $acc10,16,$acc10
383 ldx [$tbl+2048+32],%g0 ! prefetch te4
384 srlx $acc11,24,$acc11
386 ldx [$tbl+2048+64],%g0 ! prefetch te4
389 ldx [$tbl+2048+96],%g0 ! prefetch te4
390 srlx $acc14,16,$acc14 !
392 ldx [$tbl+2048+128],%g0 ! prefetch te4
393 srlx $acc15,24,$acc15
395 ldx [$tbl+2048+160],%g0 ! prefetch te4
398 ldx [$tbl+2048+192],%g0 ! prefetch te4
399 xor $acc12,$acc14,$acc14
401 ldx [$tbl+2048+224],%g0 ! prefetch te4
428 srlx $acc10,16,$acc10
430 srlx $acc11,24,$acc11
434 srlx $acc14,16,$acc14 !
436 srlx $acc15,24,$acc15
438 xor $acc12,$acc14,$acc14
447 ldub [$rounds+$acc0],$acc0
450 ldub [$rounds+$acc1],$acc1
453 ldub [$rounds+$acc2],$acc2
454 ldub [$rounds+$acc3],$acc3
457 ldub [$rounds+$acc4],$acc4
461 ldub [$rounds+$acc5],$acc5
464 ldub [$rounds+$acc6],$acc6
465 ldub [$rounds+$acc7],$acc7
469 ldub [$rounds+$acc8],$acc8
471 and $acc10,255,$acc10
472 ldub [$rounds+$acc9],$acc9
475 ldub [$rounds+$acc10],$acc10 !
477 and $acc13,255,$acc13
478 ldub [$rounds+$acc11],$acc11
479 ldub [$rounds+$acc12],$acc12
480 and $acc14,255,$acc14
481 ldub [$rounds+$acc13],$acc13
483 ldub [$rounds+$acc14],$acc14 !
487 ldub [$rounds+$acc15],$acc15
490 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
508 sll $acc13,16,$acc13 !
512 xor $acc12,$acc14,$acc14
519 .type _sparcv9_AES_encrypt,#function
520 .size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
527 bnz,pn %xcc,.Lunaligned_enc
536 add %o7,AES_Te-1b,%o4
537 call _sparcv9_AES_encrypt
596 add %o7,AES_Te-1b,%o4
597 call _sparcv9_AES_encrypt
634 .type AES_encrypt,#function
635 .size AES_encrypt,(.-AES_encrypt)
644 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
645 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
646 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
647 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
648 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
649 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
650 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
651 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
652 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
653 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
654 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
655 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
656 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
657 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
658 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
659 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
660 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
661 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
662 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
663 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
664 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
665 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
666 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
667 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
668 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
669 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
670 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
671 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
672 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
673 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
674 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
675 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
676 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
677 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
678 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
679 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
680 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
681 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
682 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
683 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
684 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
685 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
686 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
687 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
688 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
689 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
690 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
691 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
692 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
693 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
694 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
695 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
696 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
697 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
698 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
699 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
700 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
701 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
702 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
703 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
704 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
705 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
706 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
707 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
709 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
710 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
711 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
712 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
713 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
714 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
715 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
716 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
717 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
718 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
719 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
720 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
721 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
722 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
723 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
724 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
725 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
726 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
727 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
728 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
729 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
730 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
731 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
732 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
733 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
734 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
735 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
736 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
737 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
738 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
739 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
740 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
742 .size AES_Td,(.-AES_Td)
746 _sparcv9_AES_decrypt:
747 save %sp,-$frame-$locals,%sp
748 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
749 ld [$key+240],$rounds
754 srl $rounds,1,$rounds
770 ldx [$tbl+$acc0],$acc0
773 ldx [$tbl+$acc1],$acc1
776 ldx [$tbl+$acc2],$acc2 !
779 ldx [$tbl+$acc3],$acc3
782 ldx [$tbl+$acc4],$acc4
786 ldx [$tbl+$acc5],$acc5
789 ldx [$tbl+$acc6],$acc6
792 ldx [$tbl+$acc7],$acc7 !
795 ldx [$tbl+$acc8],$acc8
797 and $acc10,2040,$acc10
798 ldx [$tbl+$acc9],$acc9
801 and $acc11,2040,$acc11
802 ldx [$tbl+$acc10],$acc10
804 and $acc12,2040,$acc12
805 ldx [$tbl+$acc11],$acc11
807 and $acc13,2040,$acc13
808 ldx [$tbl+$acc12],$acc12 !
810 and $acc14,2040,$acc14
811 ldx [$tbl+$acc13],$acc13
812 and $acc15,2040,$acc15
814 ldx [$tbl+$acc14],$acc14
816 subcc $rounds,1,$rounds !
817 ldx [$tbl+$acc15],$acc15
818 bz,a,pn %icc,.Ldec_last
819 add $tbl,2048,$rounds
825 srlx $acc2,16,$acc2 !
841 srlx $acc10,16,$acc10 !
843 srlx $acc11,24,$acc11
847 srlx $acc14,16,$acc14
849 srlx $acc15,24,$acc15 !
851 xor $acc12,$acc14,$acc14
858 and $acc0,2040,$acc0 !
861 ldx [$tbl+$acc0],$acc0
864 ldx [$tbl+$acc1],$acc1
868 ldx [$tbl+$acc2],$acc2
871 ldx [$tbl+$acc3],$acc3
874 ldx [$tbl+$acc4],$acc4 !
877 ldx [$tbl+$acc5],$acc5
880 ldx [$tbl+$acc6],$acc6
884 ldx [$tbl+$acc7],$acc7
887 ldx [$tbl+$acc8],$acc8
889 and $acc10,2040,$acc10
890 ldx [$tbl+$acc9],$acc9 !
892 and $acc11,2040,$acc11
893 ldx [$tbl+$acc10],$acc10
895 and $acc12,2040,$acc12
896 ldx [$tbl+$acc11],$acc11
899 and $acc13,2040,$acc13
900 ldx [$tbl+$acc12],$acc12
902 and $acc14,2040,$acc14
903 ldx [$tbl+$acc13],$acc13
905 and $acc15,2040,$acc15
906 ldx [$tbl+$acc14],$acc14 !
910 ldx [$tbl+$acc15],$acc15
926 ldx [$tbl+2048+0],%g0 ! prefetch td4
927 srlx $acc10,16,$acc10
929 ldx [$tbl+2048+32],%g0 ! prefetch td4
930 srlx $acc11,24,$acc11
932 ldx [$tbl+2048+64],%g0 ! prefetch td4
935 ldx [$tbl+2048+96],%g0 ! prefetch td4
936 srlx $acc14,16,$acc14 !
938 ldx [$tbl+2048+128],%g0 ! prefetch td4
939 srlx $acc15,24,$acc15
941 ldx [$tbl+2048+160],%g0 ! prefetch td4
944 ldx [$tbl+2048+192],%g0 ! prefetch td4
945 xor $acc12,$acc14,$acc14
947 ldx [$tbl+2048+224],%g0 ! prefetch td4
948 and $acc0,2040,$acc0 !
974 srlx $acc10,16,$acc10
976 srlx $acc11,24,$acc11
980 srlx $acc14,16,$acc14 !
982 srlx $acc15,24,$acc15
984 xor $acc12,$acc14,$acc14
993 ldub [$rounds+$acc0],$acc0
996 ldub [$rounds+$acc1],$acc1
999 ldub [$rounds+$acc2],$acc2
1000 ldub [$rounds+$acc3],$acc3
1003 ldub [$rounds+$acc4],$acc4
1007 ldub [$rounds+$acc5],$acc5
1010 ldub [$rounds+$acc6],$acc6
1011 ldub [$rounds+$acc7],$acc7
1015 ldub [$rounds+$acc8],$acc8
1017 and $acc10,255,$acc10
1018 ldub [$rounds+$acc9],$acc9
1021 ldub [$rounds+$acc10],$acc10 !
1023 and $acc13,255,$acc13
1024 ldub [$rounds+$acc11],$acc11
1025 ldub [$rounds+$acc12],$acc12
1026 and $acc14,255,$acc14
1027 ldub [$rounds+$acc13],$acc13
1029 ldub [$rounds+$acc14],$acc14 !
1033 ldub [$rounds+$acc15],$acc15
1036 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1046 sll $acc8,24,$acc8 !
1052 sll $acc12,24,$acc12
1054 sll $acc13,16,$acc13 !
1058 xor $acc12,$acc14,$acc14
1065 .type _sparcv9_AES_decrypt,#function
1066 .size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1073 bnz,pn %xcc,.Lunaligned_dec
1074 save %sp,-$frame,%sp
1082 add %o7,AES_Td-1b,%o4
1083 call _sparcv9_AES_decrypt
1142 add %o7,AES_Td-1b,%o4
1143 call _sparcv9_AES_decrypt
1180 .type AES_decrypt,#function
1181 .size AES_decrypt,(.-AES_decrypt)
1184 # fmovs instructions substituting for FP nops were originally added
1185 # to meet specific instruction alignment requirements to maximize ILP.
1186 # As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1187 # undesired effect, so just omit them and sacrifice some portion of
1188 # percent in performance...
1189 $code =~ s/fmovs.*$//gm;
1192 close STDOUT; # ensure flush