3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # Needs more work: key setup, page boundaries, CBC routine...
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
21 # Rescheduling instructions to favour Power6 pipeline gives 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
34 if ($flavour =~ /64/) {
39 } elsif ($flavour =~ /32/) {
44 } else { die "nonsense $flavour"; }
46 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49 die "can't locate ppc-xlate.pl";
51 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
57 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
101 # stay away from TLS pointer
102 if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
103 else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
115 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
116 addi $Tbl0,$Tbl0,`128-8`
123 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
124 addi $Tbl0,$Tbl0,`128-8-32+2048+256`
130 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
131 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
132 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
133 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
134 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
135 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
136 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
137 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
138 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
139 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
140 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
141 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
142 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
143 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
144 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
145 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
146 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
147 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
148 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
149 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
150 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
151 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
152 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
153 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
154 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
155 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
156 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
157 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
158 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
159 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
160 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
161 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
162 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
163 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
164 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
165 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
166 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
167 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
168 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
169 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
170 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
171 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
172 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
173 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
174 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
175 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
176 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
177 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
178 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
179 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
180 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
181 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
182 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
183 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
184 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
185 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
186 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
187 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
188 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
189 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
190 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
191 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
192 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
193 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
195 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
196 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
197 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
198 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
199 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
200 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
201 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
202 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
203 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
204 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
205 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
206 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
207 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
208 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
209 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
210 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
211 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
212 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
213 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
214 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
215 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
216 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
217 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
218 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
219 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
220 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
221 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
222 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
223 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
224 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
225 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
226 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
229 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
230 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
231 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
232 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
233 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
234 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
235 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
236 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
237 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
238 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
239 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
240 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
241 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
242 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
243 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
244 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
245 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
246 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
247 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
248 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
249 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
250 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
251 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
252 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
253 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
254 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
255 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
256 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
257 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
258 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
259 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
260 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
261 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
262 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
263 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
264 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
265 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
266 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
267 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
268 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
269 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
270 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
271 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
272 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
273 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
274 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
275 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
276 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
277 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
278 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
279 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
280 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
281 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
282 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
283 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
284 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
285 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
286 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
287 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
288 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
289 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
290 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
291 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
292 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
294 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
295 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
296 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
297 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
298 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
299 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
300 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
301 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
302 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
303 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
304 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
305 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
306 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
307 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
308 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
309 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
310 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
311 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
312 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
313 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
314 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
315 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
316 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
317 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
318 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
319 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
320 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
321 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
322 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
323 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
324 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
325 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
332 $STU $sp,-$FRAME($sp)
334 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
335 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
336 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
337 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
338 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
339 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
340 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
341 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
342 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
343 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
344 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
345 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
346 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
347 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
348 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
349 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
350 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
351 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
352 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
353 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
354 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
361 bl Lppc_AES_encrypt_compact
367 $POP r0,`$FRAME-$SIZE_T*21`($sp)
368 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
369 $POP r13,`$FRAME-$SIZE_T*19`($sp)
370 $POP r14,`$FRAME-$SIZE_T*18`($sp)
371 $POP r15,`$FRAME-$SIZE_T*17`($sp)
372 $POP r16,`$FRAME-$SIZE_T*16`($sp)
373 $POP r17,`$FRAME-$SIZE_T*15`($sp)
374 $POP r18,`$FRAME-$SIZE_T*14`($sp)
375 $POP r19,`$FRAME-$SIZE_T*13`($sp)
376 $POP r20,`$FRAME-$SIZE_T*12`($sp)
377 $POP r21,`$FRAME-$SIZE_T*11`($sp)
378 $POP r22,`$FRAME-$SIZE_T*10`($sp)
379 $POP r23,`$FRAME-$SIZE_T*9`($sp)
380 $POP r24,`$FRAME-$SIZE_T*8`($sp)
381 $POP r25,`$FRAME-$SIZE_T*7`($sp)
382 $POP r26,`$FRAME-$SIZE_T*6`($sp)
383 $POP r27,`$FRAME-$SIZE_T*5`($sp)
384 $POP r28,`$FRAME-$SIZE_T*4`($sp)
385 $POP r29,`$FRAME-$SIZE_T*3`($sp)
386 $POP r30,`$FRAME-$SIZE_T*2`($sp)
387 $POP r31,`$FRAME-$SIZE_T*1`($sp)
402 addi $acc00,$acc00,-1
411 rlwinm $acc00,$s0,`32-24+3`,21,28
412 rlwinm $acc01,$s1,`32-24+3`,21,28
413 rlwinm $acc02,$s2,`32-24+3`,21,28
414 rlwinm $acc03,$s3,`32-24+3`,21,28
417 rlwinm $acc04,$s1,`32-16+3`,21,28
418 rlwinm $acc05,$s2,`32-16+3`,21,28
421 rlwinm $acc06,$s3,`32-16+3`,21,28
422 rlwinm $acc07,$s0,`32-16+3`,21,28
423 lwzx $acc00,$Tbl0,$acc00
424 lwzx $acc01,$Tbl0,$acc01
425 rlwinm $acc08,$s2,`32-8+3`,21,28
426 rlwinm $acc09,$s3,`32-8+3`,21,28
427 lwzx $acc02,$Tbl0,$acc02
428 lwzx $acc03,$Tbl0,$acc03
429 rlwinm $acc10,$s0,`32-8+3`,21,28
430 rlwinm $acc11,$s1,`32-8+3`,21,28
431 lwzx $acc04,$Tbl1,$acc04
432 lwzx $acc05,$Tbl1,$acc05
433 rlwinm $acc12,$s3,`0+3`,21,28
434 rlwinm $acc13,$s0,`0+3`,21,28
435 lwzx $acc06,$Tbl1,$acc06
436 lwzx $acc07,$Tbl1,$acc07
437 rlwinm $acc14,$s1,`0+3`,21,28
438 rlwinm $acc15,$s2,`0+3`,21,28
439 lwzx $acc08,$Tbl2,$acc08
440 lwzx $acc09,$Tbl2,$acc09
443 lwzx $acc10,$Tbl2,$acc10
444 lwzx $acc11,$Tbl2,$acc11
447 lwzx $acc12,$Tbl3,$acc12
448 lwzx $acc13,$Tbl3,$acc13
451 lwzx $acc14,$Tbl3,$acc14
452 lwzx $acc15,$Tbl3,$acc15
466 addi $Tbl2,$Tbl0,2048
470 rlwinm $acc00,$s0,`32-24`,24,31
471 rlwinm $acc01,$s1,`32-24`,24,31
474 rlwinm $acc02,$s2,`32-24`,24,31
475 rlwinm $acc03,$s3,`32-24`,24,31
476 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
477 lwz $acc09,`2048+32`($Tbl0)
478 rlwinm $acc04,$s1,`32-16`,24,31
479 rlwinm $acc05,$s2,`32-16`,24,31
480 lwz $acc10,`2048+64`($Tbl0)
481 lwz $acc11,`2048+96`($Tbl0)
482 rlwinm $acc06,$s3,`32-16`,24,31
483 rlwinm $acc07,$s0,`32-16`,24,31
484 lwz $acc12,`2048+128`($Tbl0)
485 lwz $acc13,`2048+160`($Tbl0)
486 rlwinm $acc08,$s2,`32-8`,24,31
487 rlwinm $acc09,$s3,`32-8`,24,31
488 lwz $acc14,`2048+192`($Tbl0)
489 lwz $acc15,`2048+224`($Tbl0)
490 rlwinm $acc10,$s0,`32-8`,24,31
491 rlwinm $acc11,$s1,`32-8`,24,31
492 lbzx $acc00,$Tbl2,$acc00
493 lbzx $acc01,$Tbl2,$acc01
494 rlwinm $acc12,$s3,`0`,24,31
495 rlwinm $acc13,$s0,`0`,24,31
496 lbzx $acc02,$Tbl2,$acc02
497 lbzx $acc03,$Tbl2,$acc03
498 rlwinm $acc14,$s1,`0`,24,31
499 rlwinm $acc15,$s2,`0`,24,31
500 lbzx $acc04,$Tbl2,$acc04
501 lbzx $acc05,$Tbl2,$acc05
502 rlwinm $s0,$acc00,24,0,7
503 rlwinm $s1,$acc01,24,0,7
504 lbzx $acc06,$Tbl2,$acc06
505 lbzx $acc07,$Tbl2,$acc07
506 rlwinm $s2,$acc02,24,0,7
507 rlwinm $s3,$acc03,24,0,7
508 lbzx $acc08,$Tbl2,$acc08
509 lbzx $acc09,$Tbl2,$acc09
510 rlwimi $s0,$acc04,16,8,15
511 rlwimi $s1,$acc05,16,8,15
512 lbzx $acc10,$Tbl2,$acc10
513 lbzx $acc11,$Tbl2,$acc11
514 rlwimi $s2,$acc06,16,8,15
515 rlwimi $s3,$acc07,16,8,15
516 lbzx $acc12,$Tbl2,$acc12
517 lbzx $acc13,$Tbl2,$acc13
518 rlwimi $s0,$acc08,8,16,23
519 rlwimi $s1,$acc09,8,16,23
520 lbzx $acc14,$Tbl2,$acc14
521 lbzx $acc15,$Tbl2,$acc15
522 rlwimi $s2,$acc10,8,16,23
523 rlwimi $s3,$acc11,8,16,23
535 Lppc_AES_encrypt_compact:
541 addi $Tbl1,$Tbl0,2048
545 ori $mask80,$mask80,0x8080
546 ori $mask1b,$mask1b,0x1b1b
554 rlwinm $acc00,$s0,`32-24`,24,31
555 rlwinm $acc01,$s1,`32-24`,24,31
556 rlwinm $acc02,$s2,`32-24`,24,31
557 rlwinm $acc03,$s3,`32-24`,24,31
558 rlwinm $acc04,$s1,`32-16`,24,31
559 rlwinm $acc05,$s2,`32-16`,24,31
560 rlwinm $acc06,$s3,`32-16`,24,31
561 rlwinm $acc07,$s0,`32-16`,24,31
562 lbzx $acc00,$Tbl1,$acc00
563 lbzx $acc01,$Tbl1,$acc01
564 rlwinm $acc08,$s2,`32-8`,24,31
565 rlwinm $acc09,$s3,`32-8`,24,31
566 lbzx $acc02,$Tbl1,$acc02
567 lbzx $acc03,$Tbl1,$acc03
568 rlwinm $acc10,$s0,`32-8`,24,31
569 rlwinm $acc11,$s1,`32-8`,24,31
570 lbzx $acc04,$Tbl1,$acc04
571 lbzx $acc05,$Tbl1,$acc05
572 rlwinm $acc12,$s3,`0`,24,31
573 rlwinm $acc13,$s0,`0`,24,31
574 lbzx $acc06,$Tbl1,$acc06
575 lbzx $acc07,$Tbl1,$acc07
576 rlwinm $acc14,$s1,`0`,24,31
577 rlwinm $acc15,$s2,`0`,24,31
578 lbzx $acc08,$Tbl1,$acc08
579 lbzx $acc09,$Tbl1,$acc09
580 rlwinm $s0,$acc00,24,0,7
581 rlwinm $s1,$acc01,24,0,7
582 lbzx $acc10,$Tbl1,$acc10
583 lbzx $acc11,$Tbl1,$acc11
584 rlwinm $s2,$acc02,24,0,7
585 rlwinm $s3,$acc03,24,0,7
586 lbzx $acc12,$Tbl1,$acc12
587 lbzx $acc13,$Tbl1,$acc13
588 rlwimi $s0,$acc04,16,8,15
589 rlwimi $s1,$acc05,16,8,15
590 lbzx $acc14,$Tbl1,$acc14
591 lbzx $acc15,$Tbl1,$acc15
592 rlwimi $s2,$acc06,16,8,15
593 rlwimi $s3,$acc07,16,8,15
594 rlwimi $s0,$acc08,8,16,23
595 rlwimi $s1,$acc09,8,16,23
596 rlwimi $s2,$acc10,8,16,23
597 rlwimi $s3,$acc11,8,16,23
608 bdz Lenc_compact_done
610 and $acc00,$s0,$mask80 # r1=r0&0x80808080
611 and $acc01,$s1,$mask80
612 and $acc02,$s2,$mask80
613 and $acc03,$s3,$mask80
614 srwi $acc04,$acc00,7 # r1>>7
618 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
619 andc $acc09,$s1,$mask80
620 andc $acc10,$s2,$mask80
621 andc $acc11,$s3,$mask80
622 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
623 sub $acc01,$acc01,$acc05
624 sub $acc02,$acc02,$acc06
625 sub $acc03,$acc03,$acc07
626 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
627 add $acc09,$acc09,$acc09
628 add $acc10,$acc10,$acc10
629 add $acc11,$acc11,$acc11
630 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
631 and $acc01,$acc01,$mask1b
632 and $acc02,$acc02,$mask1b
633 and $acc03,$acc03,$mask1b
634 xor $acc00,$acc00,$acc08 # r2
635 xor $acc01,$acc01,$acc09
636 xor $acc02,$acc02,$acc10
637 xor $acc03,$acc03,$acc11
639 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
643 xor $s0,$s0,$acc00 # r0^r2
647 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
651 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
655 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
656 rotlwi $acc09,$acc13,8
657 rotlwi $acc10,$acc14,8
658 rotlwi $acc11,$acc15,8
681 $STU $sp,-$FRAME($sp)
683 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
684 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
685 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
686 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
687 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
688 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
689 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
690 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
691 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
692 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
693 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
694 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
695 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
696 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
697 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
698 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
699 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
700 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
701 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
702 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
703 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
710 bl Lppc_AES_decrypt_compact
716 $POP r0,`$FRAME-$SIZE_T*21`($sp)
717 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
718 $POP r13,`$FRAME-$SIZE_T*19`($sp)
719 $POP r14,`$FRAME-$SIZE_T*18`($sp)
720 $POP r15,`$FRAME-$SIZE_T*17`($sp)
721 $POP r16,`$FRAME-$SIZE_T*16`($sp)
722 $POP r17,`$FRAME-$SIZE_T*15`($sp)
723 $POP r18,`$FRAME-$SIZE_T*14`($sp)
724 $POP r19,`$FRAME-$SIZE_T*13`($sp)
725 $POP r20,`$FRAME-$SIZE_T*12`($sp)
726 $POP r21,`$FRAME-$SIZE_T*11`($sp)
727 $POP r22,`$FRAME-$SIZE_T*10`($sp)
728 $POP r23,`$FRAME-$SIZE_T*9`($sp)
729 $POP r24,`$FRAME-$SIZE_T*8`($sp)
730 $POP r25,`$FRAME-$SIZE_T*7`($sp)
731 $POP r26,`$FRAME-$SIZE_T*6`($sp)
732 $POP r27,`$FRAME-$SIZE_T*5`($sp)
733 $POP r28,`$FRAME-$SIZE_T*4`($sp)
734 $POP r29,`$FRAME-$SIZE_T*3`($sp)
735 $POP r30,`$FRAME-$SIZE_T*2`($sp)
736 $POP r31,`$FRAME-$SIZE_T*1`($sp)
751 addi $acc00,$acc00,-1
760 rlwinm $acc00,$s0,`32-24+3`,21,28
761 rlwinm $acc01,$s1,`32-24+3`,21,28
762 rlwinm $acc02,$s2,`32-24+3`,21,28
763 rlwinm $acc03,$s3,`32-24+3`,21,28
766 rlwinm $acc04,$s3,`32-16+3`,21,28
767 rlwinm $acc05,$s0,`32-16+3`,21,28
770 rlwinm $acc06,$s1,`32-16+3`,21,28
771 rlwinm $acc07,$s2,`32-16+3`,21,28
772 lwzx $acc00,$Tbl0,$acc00
773 lwzx $acc01,$Tbl0,$acc01
774 rlwinm $acc08,$s2,`32-8+3`,21,28
775 rlwinm $acc09,$s3,`32-8+3`,21,28
776 lwzx $acc02,$Tbl0,$acc02
777 lwzx $acc03,$Tbl0,$acc03
778 rlwinm $acc10,$s0,`32-8+3`,21,28
779 rlwinm $acc11,$s1,`32-8+3`,21,28
780 lwzx $acc04,$Tbl1,$acc04
781 lwzx $acc05,$Tbl1,$acc05
782 rlwinm $acc12,$s1,`0+3`,21,28
783 rlwinm $acc13,$s2,`0+3`,21,28
784 lwzx $acc06,$Tbl1,$acc06
785 lwzx $acc07,$Tbl1,$acc07
786 rlwinm $acc14,$s3,`0+3`,21,28
787 rlwinm $acc15,$s0,`0+3`,21,28
788 lwzx $acc08,$Tbl2,$acc08
789 lwzx $acc09,$Tbl2,$acc09
792 lwzx $acc10,$Tbl2,$acc10
793 lwzx $acc11,$Tbl2,$acc11
796 lwzx $acc12,$Tbl3,$acc12
797 lwzx $acc13,$Tbl3,$acc13
800 lwzx $acc14,$Tbl3,$acc14
801 lwzx $acc15,$Tbl3,$acc15
815 addi $Tbl2,$Tbl0,2048
819 rlwinm $acc00,$s0,`32-24`,24,31
820 rlwinm $acc01,$s1,`32-24`,24,31
823 rlwinm $acc02,$s2,`32-24`,24,31
824 rlwinm $acc03,$s3,`32-24`,24,31
825 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
826 lwz $acc09,`2048+32`($Tbl0)
827 rlwinm $acc04,$s3,`32-16`,24,31
828 rlwinm $acc05,$s0,`32-16`,24,31
829 lwz $acc10,`2048+64`($Tbl0)
830 lwz $acc11,`2048+96`($Tbl0)
831 lbzx $acc00,$Tbl2,$acc00
832 lbzx $acc01,$Tbl2,$acc01
833 lwz $acc12,`2048+128`($Tbl0)
834 lwz $acc13,`2048+160`($Tbl0)
835 rlwinm $acc06,$s1,`32-16`,24,31
836 rlwinm $acc07,$s2,`32-16`,24,31
837 lwz $acc14,`2048+192`($Tbl0)
838 lwz $acc15,`2048+224`($Tbl0)
839 rlwinm $acc08,$s2,`32-8`,24,31
840 rlwinm $acc09,$s3,`32-8`,24,31
841 lbzx $acc02,$Tbl2,$acc02
842 lbzx $acc03,$Tbl2,$acc03
843 rlwinm $acc10,$s0,`32-8`,24,31
844 rlwinm $acc11,$s1,`32-8`,24,31
845 lbzx $acc04,$Tbl2,$acc04
846 lbzx $acc05,$Tbl2,$acc05
847 rlwinm $acc12,$s1,`0`,24,31
848 rlwinm $acc13,$s2,`0`,24,31
849 lbzx $acc06,$Tbl2,$acc06
850 lbzx $acc07,$Tbl2,$acc07
851 rlwinm $acc14,$s3,`0`,24,31
852 rlwinm $acc15,$s0,`0`,24,31
853 lbzx $acc08,$Tbl2,$acc08
854 lbzx $acc09,$Tbl2,$acc09
855 rlwinm $s0,$acc00,24,0,7
856 rlwinm $s1,$acc01,24,0,7
857 lbzx $acc10,$Tbl2,$acc10
858 lbzx $acc11,$Tbl2,$acc11
859 rlwinm $s2,$acc02,24,0,7
860 rlwinm $s3,$acc03,24,0,7
861 lbzx $acc12,$Tbl2,$acc12
862 lbzx $acc13,$Tbl2,$acc13
863 rlwimi $s0,$acc04,16,8,15
864 rlwimi $s1,$acc05,16,8,15
865 lbzx $acc14,$Tbl2,$acc14
866 lbzx $acc15,$Tbl2,$acc15
867 rlwimi $s2,$acc06,16,8,15
868 rlwimi $s3,$acc07,16,8,15
869 rlwimi $s0,$acc08,8,16,23
870 rlwimi $s1,$acc09,8,16,23
871 rlwimi $s2,$acc10,8,16,23
872 rlwimi $s3,$acc11,8,16,23
884 Lppc_AES_decrypt_compact:
890 addi $Tbl1,$Tbl0,2048
894 ori $mask80,$mask80,0x8080
895 ori $mask1b,$mask1b,0x1b1b
897 $code.=<<___ if ($SIZE_T==8);
898 insrdi $mask80,$mask80,32,0
899 insrdi $mask1b,$mask1b,32,0
909 rlwinm $acc00,$s0,`32-24`,24,31
910 rlwinm $acc01,$s1,`32-24`,24,31
911 rlwinm $acc02,$s2,`32-24`,24,31
912 rlwinm $acc03,$s3,`32-24`,24,31
913 rlwinm $acc04,$s3,`32-16`,24,31
914 rlwinm $acc05,$s0,`32-16`,24,31
915 rlwinm $acc06,$s1,`32-16`,24,31
916 rlwinm $acc07,$s2,`32-16`,24,31
917 lbzx $acc00,$Tbl1,$acc00
918 lbzx $acc01,$Tbl1,$acc01
919 rlwinm $acc08,$s2,`32-8`,24,31
920 rlwinm $acc09,$s3,`32-8`,24,31
921 lbzx $acc02,$Tbl1,$acc02
922 lbzx $acc03,$Tbl1,$acc03
923 rlwinm $acc10,$s0,`32-8`,24,31
924 rlwinm $acc11,$s1,`32-8`,24,31
925 lbzx $acc04,$Tbl1,$acc04
926 lbzx $acc05,$Tbl1,$acc05
927 rlwinm $acc12,$s1,`0`,24,31
928 rlwinm $acc13,$s2,`0`,24,31
929 lbzx $acc06,$Tbl1,$acc06
930 lbzx $acc07,$Tbl1,$acc07
931 rlwinm $acc14,$s3,`0`,24,31
932 rlwinm $acc15,$s0,`0`,24,31
933 lbzx $acc08,$Tbl1,$acc08
934 lbzx $acc09,$Tbl1,$acc09
935 rlwinm $s0,$acc00,24,0,7
936 rlwinm $s1,$acc01,24,0,7
937 lbzx $acc10,$Tbl1,$acc10
938 lbzx $acc11,$Tbl1,$acc11
939 rlwinm $s2,$acc02,24,0,7
940 rlwinm $s3,$acc03,24,0,7
941 lbzx $acc12,$Tbl1,$acc12
942 lbzx $acc13,$Tbl1,$acc13
943 rlwimi $s0,$acc04,16,8,15
944 rlwimi $s1,$acc05,16,8,15
945 lbzx $acc14,$Tbl1,$acc14
946 lbzx $acc15,$Tbl1,$acc15
947 rlwimi $s2,$acc06,16,8,15
948 rlwimi $s3,$acc07,16,8,15
949 rlwimi $s0,$acc08,8,16,23
950 rlwimi $s1,$acc09,8,16,23
951 rlwimi $s2,$acc10,8,16,23
952 rlwimi $s3,$acc11,8,16,23
963 bdz Ldec_compact_done
965 $code.=<<___ if ($SIZE_T==8);
966 # vectorized permutation improves decrypt performance by 10%
970 and $acc00,$s0,$mask80 # r1=r0&0x80808080
971 and $acc02,$s2,$mask80
972 srdi $acc04,$acc00,7 # r1>>7
974 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
975 andc $acc10,$s2,$mask80
976 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
977 sub $acc02,$acc02,$acc06
978 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
979 add $acc10,$acc10,$acc10
980 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
981 and $acc02,$acc02,$mask1b
982 xor $acc00,$acc00,$acc08 # r2
983 xor $acc02,$acc02,$acc10
985 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
986 and $acc06,$acc02,$mask80
987 srdi $acc08,$acc04,7 # r1>>7
989 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
990 andc $acc14,$acc02,$mask80
991 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
992 sub $acc06,$acc06,$acc10
993 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
994 add $acc14,$acc14,$acc14
995 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
996 and $acc06,$acc06,$mask1b
997 xor $acc04,$acc04,$acc12 # r4
998 xor $acc06,$acc06,$acc14
1000 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1001 and $acc10,$acc06,$mask80
1002 srdi $acc12,$acc08,7 # r1>>7
1003 srdi $acc14,$acc10,7
1004 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1005 sub $acc10,$acc10,$acc14
1006 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1007 andc $acc14,$acc06,$mask80
1008 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1009 add $acc14,$acc14,$acc14
1010 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1011 and $acc10,$acc10,$mask1b
1012 xor $acc08,$acc08,$acc12 # r8
1013 xor $acc10,$acc10,$acc14
1015 xor $acc00,$acc00,$s0 # r2^r0
1016 xor $acc02,$acc02,$s2
1017 xor $acc04,$acc04,$s0 # r4^r0
1018 xor $acc06,$acc06,$s2
1020 extrdi $acc01,$acc00,32,0
1021 extrdi $acc03,$acc02,32,0
1022 extrdi $acc05,$acc04,32,0
1023 extrdi $acc07,$acc06,32,0
1024 extrdi $acc09,$acc08,32,0
1025 extrdi $acc11,$acc10,32,0
1027 $code.=<<___ if ($SIZE_T==4);
1028 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1029 and $acc01,$s1,$mask80
1030 and $acc02,$s2,$mask80
1031 and $acc03,$s3,$mask80
1032 srwi $acc04,$acc00,7 # r1>>7
1033 srwi $acc05,$acc01,7
1034 srwi $acc06,$acc02,7
1035 srwi $acc07,$acc03,7
1036 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1037 andc $acc09,$s1,$mask80
1038 andc $acc10,$s2,$mask80
1039 andc $acc11,$s3,$mask80
1040 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1041 sub $acc01,$acc01,$acc05
1042 sub $acc02,$acc02,$acc06
1043 sub $acc03,$acc03,$acc07
1044 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1045 add $acc09,$acc09,$acc09
1046 add $acc10,$acc10,$acc10
1047 add $acc11,$acc11,$acc11
1048 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1049 and $acc01,$acc01,$mask1b
1050 and $acc02,$acc02,$mask1b
1051 and $acc03,$acc03,$mask1b
1052 xor $acc00,$acc00,$acc08 # r2
1053 xor $acc01,$acc01,$acc09
1054 xor $acc02,$acc02,$acc10
1055 xor $acc03,$acc03,$acc11
1057 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1058 and $acc05,$acc01,$mask80
1059 and $acc06,$acc02,$mask80
1060 and $acc07,$acc03,$mask80
1061 srwi $acc08,$acc04,7 # r1>>7
1062 srwi $acc09,$acc05,7
1063 srwi $acc10,$acc06,7
1064 srwi $acc11,$acc07,7
1065 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1066 andc $acc13,$acc01,$mask80
1067 andc $acc14,$acc02,$mask80
1068 andc $acc15,$acc03,$mask80
1069 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1070 sub $acc05,$acc05,$acc09
1071 sub $acc06,$acc06,$acc10
1072 sub $acc07,$acc07,$acc11
1073 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1074 add $acc13,$acc13,$acc13
1075 add $acc14,$acc14,$acc14
1076 add $acc15,$acc15,$acc15
1077 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1078 and $acc05,$acc05,$mask1b
1079 and $acc06,$acc06,$mask1b
1080 and $acc07,$acc07,$mask1b
1081 xor $acc04,$acc04,$acc12 # r4
1082 xor $acc05,$acc05,$acc13
1083 xor $acc06,$acc06,$acc14
1084 xor $acc07,$acc07,$acc15
1086 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1087 and $acc09,$acc05,$mask80
1088 and $acc10,$acc06,$mask80
1089 and $acc11,$acc07,$mask80
1090 srwi $acc12,$acc08,7 # r1>>7
1091 srwi $acc13,$acc09,7
1092 srwi $acc14,$acc10,7
1093 srwi $acc15,$acc11,7
1094 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1095 sub $acc09,$acc09,$acc13
1096 sub $acc10,$acc10,$acc14
1097 sub $acc11,$acc11,$acc15
1098 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1099 andc $acc13,$acc05,$mask80
1100 andc $acc14,$acc06,$mask80
1101 andc $acc15,$acc07,$mask80
1102 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1103 add $acc13,$acc13,$acc13
1104 add $acc14,$acc14,$acc14
1105 add $acc15,$acc15,$acc15
1106 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1107 and $acc09,$acc09,$mask1b
1108 and $acc10,$acc10,$mask1b
1109 and $acc11,$acc11,$mask1b
1110 xor $acc08,$acc08,$acc12 # r8
1111 xor $acc09,$acc09,$acc13
1112 xor $acc10,$acc10,$acc14
1113 xor $acc11,$acc11,$acc15
1115 xor $acc00,$acc00,$s0 # r2^r0
1116 xor $acc01,$acc01,$s1
1117 xor $acc02,$acc02,$s2
1118 xor $acc03,$acc03,$s3
1119 xor $acc04,$acc04,$s0 # r4^r0
1120 xor $acc05,$acc05,$s1
1121 xor $acc06,$acc06,$s2
1122 xor $acc07,$acc07,$s3
1125 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1129 xor $s0,$s0,$acc00 # ^= r2^r0
1133 xor $acc00,$acc00,$acc08
1134 xor $acc01,$acc01,$acc09
1135 xor $acc02,$acc02,$acc10
1136 xor $acc03,$acc03,$acc11
1137 xor $s0,$s0,$acc04 # ^= r4^r0
1141 rotrwi $acc00,$acc00,24
1142 rotrwi $acc01,$acc01,24
1143 rotrwi $acc02,$acc02,24
1144 rotrwi $acc03,$acc03,24
1145 xor $acc04,$acc04,$acc08
1146 xor $acc05,$acc05,$acc09
1147 xor $acc06,$acc06,$acc10
1148 xor $acc07,$acc07,$acc11
1149 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1153 rotrwi $acc04,$acc04,16
1154 rotrwi $acc05,$acc05,16
1155 rotrwi $acc06,$acc06,16
1156 rotrwi $acc07,$acc07,16
1157 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1161 rotrwi $acc08,$acc08,8
1162 rotrwi $acc09,$acc09,8
1163 rotrwi $acc10,$acc10,8
1164 rotrwi $acc11,$acc11,8
1165 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1169 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1183 .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1187 $code =~ s/\`([^\`]*)\`/eval $1/gem;