3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
14 # Software performance improvement over gcc-generated code is ~70% and
15 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17 # *strictly* in-order execution and issued instruction [in this case
18 # load value from memory is critical] has to complete before execution
19 # flow proceeds. S-boxes are compressed to 2KB[+256B].
21 # As for hardware acceleration support. It's basically a "teaser," as
22 # it can and should be improved in several ways. Most notably support
23 # for CBC is not utilized, nor multiple blocks are ever processed.
24 # Then software key schedule can be postponed till hardware support
25 # detection... Performance improvement over assembler is reportedly
26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27 # support is implemented.
31 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32 # for 128-bit keys, if hardware support is detected.
36 # Add support for hardware AES192/256 and reschedule instructions to
37 # minimize/avoid Address Generation Interlock hazard and to favour
38 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39 # almost 50% on z9. The gain is smaller on z10, because being dual-
40 # issue z10 makes it improssible to eliminate the interlock condition:
41 # critial path is not long enough. Yet it spends ~24 cycles per byte
42 # processed with 128-bit key.
44 # Unlike previous version hardware support detection takes place only
45 # at the moment of key schedule setup, which is denoted in key->rounds.
46 # This is done, because deferred key setup can't be made MT-safe, not
47 # for key lengthes longer than 128 bits.
49 # Add AES_cbc_encrypt, which gives incredible performance improvement,
50 # it was measured to be ~6.6x. It's less than previously mentioned 8x,
51 # because software implementation was optimized.
53 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
54 open STDOUT,">$output";
56 $softonly=0; # allow hardware support
58 $t0="%r0"; $mask="%r0";
60 $t2="%r2"; $inp="%r2";
61 $t3="%r3"; $out="%r3"; $bits="%r3";
77 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
88 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
89 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
90 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
91 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
92 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
93 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
94 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
95 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
96 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
97 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
98 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
99 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
100 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
101 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
102 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
103 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
104 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
105 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
106 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
107 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
108 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
109 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
110 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
111 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
112 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
113 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
114 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
115 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
116 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
117 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
118 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
119 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
120 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
121 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
122 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
123 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
124 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
125 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
126 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
127 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
128 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
129 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
130 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
131 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
132 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
133 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
134 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
135 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
136 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
137 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
138 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
139 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
140 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
141 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
142 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
143 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
144 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
145 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
146 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
147 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
148 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
149 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
150 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
151 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
154 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
155 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
156 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
157 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
158 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
159 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
160 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
161 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
162 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
163 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
164 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
165 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
166 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
167 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
168 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
169 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
170 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
171 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
172 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
173 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
174 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
175 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
176 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
177 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
178 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
179 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
180 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
181 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
182 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
183 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
184 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
185 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
187 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
188 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
189 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
191 .size AES_Te,.-AES_Te
193 # void AES_encrypt(const unsigned char *inp, unsigned char *out,
194 # const AES_KEY *key) {
196 .type AES_encrypt,\@function
199 $code.=<<___ if (!$softonly);
208 lghi %r3,16 # single block length
209 .long 0xb92e0042 # km %r4,%r2
210 brc 1,.-4 # can this happen?
224 bras $ra,_s390x_AES_encrypt
234 .size AES_encrypt,.-AES_encrypt
236 .type _s390x_AES_encrypt,\@function
245 llill $mask,`0xff<<3`
259 srlg $i1,$s1,`16-3` # i0
268 l $s0,0($s0,$tbl) # Te0[s0>>24]
269 l $t1,1($t1,$tbl) # Te3[s0>>0]
270 l $t2,2($t2,$tbl) # Te2[s0>>8]
271 l $t3,3($t3,$tbl) # Te1[s0>>16]
273 x $s0,3($i1,$tbl) # Te1[s1>>16]
274 l $s1,0($s1,$tbl) # Te0[s1>>24]
275 x $t2,1($i2,$tbl) # Te3[s1>>0]
276 x $t3,2($i3,$tbl) # Te2[s1>>8]
278 srlg $i1,$s2,`8-3` # i0
279 srlg $i2,$s2,`16-3` # i1
288 srlg $ra,$s3,`8-3` # i1
289 sllg $t1,$s3,`0+3` # i0
294 x $s0,2($i1,$tbl) # Te2[s2>>8]
295 x $s1,3($i2,$tbl) # Te1[s2>>16]
296 l $s2,0($s2,$tbl) # Te0[s2>>24]
297 x $t3,1($i3,$tbl) # Te3[s2>>0]
299 srlg $i3,$s3,`16-3` # i2
310 x $s0,1($t1,$tbl) # Te3[s3>>0]
311 x $s1,2($ra,$tbl) # Te2[s3>>8]
312 x $s2,3($i3,$tbl) # Te1[s3>>16]
313 l $s3,0($s3,$tbl) # Te0[s3>>24]
316 brct $rounds,.Lenc_loop
328 srlg $i1,$s1,`16-3` # i0
337 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
338 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
340 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
341 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
345 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
346 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
347 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
348 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
357 srlg $i1,$s2,`8-3` # i0
358 srlg $i2,$s2,`16-3` # i1
366 sllg $t1,$s3,`0+3` # i0
367 srlg $ra,$s3,`8-3` # i1
370 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
371 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
373 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
374 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
383 srlg $i3,$s3,`16-3` # i2
391 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
392 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
393 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
394 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
410 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
414 .type AES_Td,\@object
419 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
420 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
421 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
422 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
423 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
424 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
425 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
426 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
427 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
428 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
429 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
430 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
431 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
432 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
433 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
434 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
435 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
436 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
437 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
438 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
439 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
440 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
441 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
442 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
443 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
444 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
445 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
446 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
447 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
448 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
449 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
450 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
451 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
452 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
453 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
454 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
455 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
456 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
457 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
458 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
459 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
460 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
461 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
462 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
463 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
464 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
465 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
466 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
467 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
468 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
469 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
470 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
471 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
472 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
473 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
474 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
475 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
476 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
477 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
478 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
479 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
480 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
481 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
482 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
485 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
486 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
487 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
488 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
489 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
490 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
491 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
492 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
493 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
494 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
495 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
496 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
497 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
498 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
499 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
500 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
501 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
502 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
503 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
504 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
505 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
506 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
507 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
508 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
509 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
510 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
511 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
512 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
513 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
514 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
515 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
516 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
517 .size AES_Td,.-AES_Td
519 # void AES_decrypt(const unsigned char *inp, unsigned char *out,
520 # const AES_KEY *key) {
522 .type AES_decrypt,\@function
525 $code.=<<___ if (!$softonly);
534 lghi %r3,16 # single block length
535 .long 0xb92e0042 # km %r4,%r2
536 brc 1,.-4 # can this happen?
550 bras $ra,_s390x_AES_decrypt
560 .size AES_decrypt,.-AES_decrypt
562 .type _s390x_AES_decrypt,\@function
571 llill $mask,`0xff<<3`
585 sllg $i1,$s1,`0+3` # i0
594 l $s0,0($s0,$tbl) # Td0[s0>>24]
595 l $t1,3($t1,$tbl) # Td1[s0>>16]
596 l $t2,2($t2,$tbl) # Td2[s0>>8]
597 l $t3,1($t3,$tbl) # Td3[s0>>0]
599 x $s0,1($i1,$tbl) # Td3[s1>>0]
600 l $s1,0($s1,$tbl) # Td0[s1>>24]
601 x $t2,3($i2,$tbl) # Td1[s1>>16]
602 x $t3,2($i3,$tbl) # Td2[s1>>8]
604 srlg $i1,$s2,`8-3` # i0
605 sllg $i2,$s2,`0+3` # i1
614 srlg $ra,$s3,`8-3` # i1
615 srlg $t1,$s3,`16-3` # i0
620 x $s0,2($i1,$tbl) # Td2[s2>>8]
621 x $s1,1($i2,$tbl) # Td3[s2>>0]
622 l $s2,0($s2,$tbl) # Td0[s2>>24]
623 x $t3,3($i3,$tbl) # Td1[s2>>16]
625 sllg $i3,$s3,`0+3` # i2
636 x $s0,3($t1,$tbl) # Td1[s3>>16]
637 x $s1,2($ra,$tbl) # Td2[s3>>8]
638 x $s2,1($i3,$tbl) # Td3[s3>>0]
639 l $s3,0($s3,$tbl) # Td0[s3>>24]
642 brct $rounds,.Ldec_loop
645 l $t1,`2048+0`($tbl) # prefetch Td4
646 l $t2,`2048+64`($tbl)
647 l $t3,`2048+128`($tbl)
648 l $i1,`2048+192`($tbl)
665 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
666 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
667 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
669 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
673 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
674 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
675 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
677 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
691 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
692 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
693 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
694 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
714 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
715 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
717 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
718 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
732 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
736 # void AES_set_encrypt_key(const unsigned char *in, int bits,
738 .globl AES_set_encrypt_key
739 .type AES_set_encrypt_key,\@function
763 $code.=<<___ if (!$softonly);
764 # convert bits to km code, [128,192,256]->[18,19,20]
771 larl %r1,OPENSSL_s390xcap_P
773 tmhl %r0,0x4000 # check for message-security assist
776 lghi %r0,0 # query capability vector
778 .long 0xb92f0042 # kmc %r4,%r2
785 lmg %r0,%r1,0($inp) # just copy 128 bits...
795 1: st $bits,236($key) # save bits
796 st %r5,240($key) # save km code
803 stmg %r6,%r13,48($sp) # all non-volatile regs
805 larl $tbl,AES_Te+2048
824 llgfr $t2,$s3 # temp=rk[3]
838 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
839 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
840 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
841 icm $t2,1,0($i3) # Te4[rk[3]>>24]
842 x $t2,256($t3,$tbl) # rcon[i]
843 xr $s0,$t2 # rk[4]=rk[0]^...
844 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
845 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
846 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
848 llgfr $t2,$s3 # temp=rk[3]
860 la $key,16($key) # key+=4
862 brct $rounds,.L128_loop
896 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
897 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
898 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
899 icm $t1,1,0($i3) # Te4[rk[5]>>24]
900 x $t1,256($t3,$tbl) # rcon[i]
901 xr $s0,$t1 # rk[6]=rk[0]^...
902 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
903 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
904 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
910 brct $rounds,.L192_continue
918 x $t1,16($key) # rk[10]=rk[4]^rk[9]
920 x $t1,20($key) # rk[11]=rk[5]^rk[10]
930 la $key,24($key) # key+=6
959 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
960 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
961 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
962 icm $t1,1,0($i3) # Te4[rk[7]>>24]
963 x $t1,256($t3,$tbl) # rcon[i]
964 xr $s0,$t1 # rk[8]=rk[0]^...
965 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
966 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
967 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
972 brct $rounds,.L256_continue
979 lgr $t1,$s3 # temp=rk[11]
990 llgc $t1,0($t1) # Te4[rk[11]>>0]
991 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
992 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
993 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
994 x $t1,16($key) # rk[12]=rk[4]^...
996 x $t1,20($key) # rk[13]=rk[5]^rk[12]
998 x $t1,24($key) # rk[14]=rk[6]^rk[13]
1000 x $t1,28($key) # rk[15]=rk[7]^rk[14]
1010 la $key,32($key) # key+=8
1017 .size AES_set_encrypt_key,.-AES_set_encrypt_key
1019 # void AES_set_decrypt_key(const unsigned char *in, int bits,
1021 .globl AES_set_decrypt_key
1022 .type AES_set_decrypt_key,\@function
1024 AES_set_decrypt_key:
1025 stg $key,32($sp) # I rely on AES_set_encrypt_key to
1026 stg $ra,112($sp) # save non-volatile registers!
1027 bras $ra,AES_set_encrypt_key
1033 $code.=<<___ if (!$softonly);
1038 oill $t0,0x80 # set "decrypt" bit
1046 bras $ra,.Lekey_internal
1052 .Lgo: llgf $rounds,240($key)
1060 .Linv: lmg $s0,$s1,0($i1)
1072 llgf $rounds,240($key)
1074 sll $rounds,2 # (rounds-1)*4
1075 llilh $mask80,0x8080
1076 llilh $mask1b,0x1b1b
1077 llilh $maskfe,0xfefe
1083 .Lmix: l $s0,16($key) # tp1
1111 xr $s1,$s0 # tp2^tp1
1112 xr $s2,$s0 # tp4^tp1
1113 rll $s0,$s0,24 # = ROTATE(tp1,8)
1115 xr $s0,$s1 # ^=tp2^tp1
1116 xr $s1,$s3 # tp2^tp1^tp8
1117 xr $s0,$s2 # ^=tp4^tp1^tp8
1120 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1122 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1123 xr $s0,$s3 # ^= ROTATE(tp8,8)
1129 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1132 .size AES_set_decrypt_key,.-AES_set_decrypt_key
1135 #void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1136 # size_t length, const AES_KEY *key,
1137 # unsigned char *ivec, const int enc)
1140 my $out="%r4"; # length and out are swapped
1146 .globl AES_cbc_encrypt
1147 .type AES_cbc_encrypt,\@function
1150 xgr %r3,%r4 # flip %r3 and %r4, out and len
1154 $code.=<<___ if (!$softonly);
1159 lg %r0,0($ivp) # copy ivec
1161 stmg %r0,%r1,16($sp)
1162 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1163 stmg %r0,%r1,32($sp)
1164 lmg %r0,%r1,16($key)
1165 stmg %r0,%r1,48($sp)
1166 l %r0,240($key) # load kmc code
1167 lghi $key,15 # res=len%16, len-=res;
1170 la %r1,16($sp) # parameter block - ivec || key
1172 .long 0xb92f0042 # kmc %r4,%r2
1173 brc 1,.-4 # pay attention to "partial completion"
1177 lmg %r0,%r1,16($sp) # copy ivec to caller
1183 ahi $key,-1 # it's the way it's encoded in mvc
1185 jnz .Lkmc_truncated_dec
1190 mvc 128(1,$sp),0($inp)
1192 la %r1,16($sp) # restore parameter block
1195 .long 0xb92f0042 # kmc %r4,%r2
1198 .Lkmc_truncated_dec:
1202 .long 0xb92f0042 # kmc %r4,%r2
1205 mvc 0(1,$out),128($sp)
1212 stmg $key,$ra,40($sp)
1226 brc 4,.Lcbc_enc_tail # if borrow
1228 stmg $inp,$out,16($sp)
1235 bras $ra,_s390x_AES_encrypt
1237 lmg $inp,$key,16($sp)
1249 brc 4,.Lcbc_enc_tail # if borrow
1269 mvc 128(1,$sp),0($inp)
1281 stmg $t0,$t1,128($sp)
1284 stmg $inp,$out,16($sp)
1291 bras $ra,_s390x_AES_decrypt
1293 lmg $inp,$key,16($sp)
1305 brc 4,.Lcbc_dec_tail # if borrow
1306 brc 2,.Lcbc_dec_done # if zero
1309 stmg $t0,$t1,128($sp)
1319 lmg $ivp,$ra,48($sp)
1320 stmg $t0,$t1,0($ivp)
1330 mvc 0(1,$out),128($sp)
1333 .size AES_cbc_encrypt,.-AES_cbc_encrypt
1334 .comm OPENSSL_s390xcap_P,8,8
1338 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1341 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1343 close STDOUT; # force flush