3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
14 # Software performance improvement over gcc-generated code is ~70% and
15 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17 # *strictly* in-order execution and issued instruction [in this case
18 # load value from memory is critical] has to complete before execution
19 # flow proceeds. S-boxes are compressed to 2KB[+256B].
21 # As for hardware acceleration support. It's basically a "teaser," as
22 # it can and should be improved in several ways. Most notably support
23 # for CBC is not utilized, nor multiple blocks are ever processed.
24 # Then software key schedule can be postponed till hardware support
25 # detection... Performance improvement over assembler is reportedly
26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27 # support is implemented.
31 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32 # for 128-bit keys, if hardware support is detected.
34 $softonly=0; # allow hardware support
38 $t3="%r2"; $inp="%r2";
39 $out="%r3"; $mask="%r3"; $bits="%r3";
55 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
66 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
67 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
68 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
69 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
70 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
71 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
72 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
73 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
74 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
75 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
76 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
77 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
78 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
79 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
80 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
81 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
82 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
83 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
84 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
85 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
86 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
87 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
88 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
89 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
90 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
91 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
92 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
93 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
94 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
95 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
96 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
97 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
98 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
99 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
100 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
101 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
102 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
103 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
104 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
105 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
106 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
107 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
108 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
109 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
110 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
111 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
112 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
113 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
114 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
115 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
116 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
117 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
118 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
119 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
120 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
121 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
122 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
123 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
124 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
125 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
126 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
127 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
128 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
129 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
132 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
133 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
134 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
135 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
136 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
137 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
138 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
139 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
140 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
141 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
142 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
143 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
144 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
145 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
146 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
147 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
148 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
149 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
150 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
151 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
152 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
153 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
154 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
155 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
156 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
157 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
158 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
159 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
160 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
161 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
162 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
163 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
165 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
166 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
167 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
168 .size AES_Te,.-AES_Te
170 # void AES_encrypt(const unsigned char *inp, unsigned char *out,
171 # const AES_KEY *key) {
173 .type AES_encrypt,\@function
177 $code.=<<___ if (!$softonly);
181 lghi %r0,0 # query capability vector
183 .long 0xb92e0042 # km %r4,%r2
187 lghi %r0,`0x00|0x12` # encrypt AES-128
191 lghi %r3,16 # single block length
192 .long 0xb92e0042 # km %r4,%r2
193 bcr 8,%r14 # return if done
194 la $out,0(%r4) # restore arguments
200 stmg $inp,$key,16($sp)
203 bras $ra,.Lekey_internal # postponed key schedule setup
204 lmg $inp,$key,16($sp)
208 stmg %r3,%r13,24($sp)
217 llill $mask,`0xff<<3`
218 bras $ra,_s390x_AES_encrypt
228 .size AES_encrypt,.-AES_encrypt
230 .type _s390x_AES_encrypt,\@function
249 l $s0,0($s0,$tbl) # Te0[s0>>24]
250 l $t1,1($i1,$tbl) # Te3[s0>>0]
251 l $t2,2($i2,$tbl) # Te2[s0>>8]
252 l $t3,3($i3,$tbl) # Te1[s0>>16]
254 srlg $i1,$s1,`16-3` # i0
262 x $s0,3($i1,$tbl) # Te1[s1>>16]
263 l $s1,0($s1,$tbl) # Te0[s1>>24]
264 x $t2,1($i2,$tbl) # Te3[s1>>0]
265 x $t3,2($i3,$tbl) # Te2[s1>>8]
268 srlg $i1,$s2,`8-3` # i0
269 srlg $i2,$s2,`16-3` # i1
276 x $s0,2($i1,$tbl) # Te2[s2>>8]
277 x $s1,3($i2,$tbl) # Te1[s2>>16]
278 l $s2,0($s2,$tbl) # Te0[s2>>24]
279 x $t3,1($i3,$tbl) # Te3[s2>>0]
282 sllg $i1,$s3,`0+3` # i0
283 srlg $i2,$s3,`8-3` # i1
284 srlg $i3,$s3,`16-3` # i2
290 x $s0,1($i1,$tbl) # Te3[s3>>0]
291 x $s1,2($i2,$tbl) # Te2[s3>>8]
292 x $s2,3($i3,$tbl) # Te1[s3>>16]
293 l $s3,0($s3,$tbl) # Te0[s3>>24]
302 brct $rounds,.Lenc_loop
312 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
313 llgc $t1,2($i1,$tbl) # Te4[s0>>0]
314 llgc $t2,2($i2,$tbl) # Te4[s0>>8]
315 llgc $t3,2($i3,$tbl) # Te4[s0>>16]
320 srlg $i1,$s1,`16-3` # i0
328 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
329 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
330 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
331 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
340 srlg $i1,$s2,`8-3` # i0
341 srlg $i2,$s2,`16-3` # i1
348 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
349 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
350 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
351 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
360 sllg $i1,$s3,`0+3` # i0
361 srlg $i2,$s3,`8-3` # i1
362 srlg $i3,$s3,`16-3` # i2
368 llgc $i1,2($i1,$tbl) # Te4[s3>>0]
369 llgc $i2,2($i2,$tbl) # Te4[s3>>8]
370 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
371 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
386 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
390 .type AES_Td,\@object
395 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
396 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
397 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
398 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
399 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
400 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
401 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
402 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
403 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
404 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
405 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
406 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
407 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
408 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
409 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
410 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
411 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
412 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
413 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
414 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
415 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
416 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
417 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
418 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
419 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
420 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
421 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
422 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
423 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
424 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
425 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
426 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
427 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
428 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
429 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
430 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
431 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
432 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
433 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
434 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
435 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
436 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
437 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
438 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
439 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
440 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
441 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
442 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
443 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
444 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
445 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
446 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
447 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
448 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
449 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
450 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
451 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
452 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
453 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
454 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
455 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
456 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
457 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
458 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
461 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
462 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
463 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
464 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
465 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
466 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
467 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
468 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
469 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
470 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
471 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
472 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
473 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
474 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
475 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
476 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
477 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
478 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
479 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
480 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
481 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
482 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
483 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
484 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
485 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
486 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
487 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
488 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
489 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
490 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
491 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
492 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
493 .size AES_Td,.-AES_Td
495 # void AES_decrypt(const unsigned char *inp, unsigned char *out,
496 # const AES_KEY *key) {
498 .type AES_decrypt,\@function
502 $code.=<<___ if (!$softonly);
506 lghi %r0,0 # query capability vector
508 .long 0xb92e0042 # km %r4,%r2
512 lghi %r0,`0x80|0x12` # decrypt AES-128
516 lghi %r3,16 # single block length
517 .long 0xb92e0042 # km %r4,%r2
518 bcr 8,%r14 # return if done
519 la $out,0(%r4) # restore arguments
526 stmg $inp,$key,16($sp)
529 bras $ra,.Ldkey_internal # postponed key schedule setup
530 lmg $inp,$key,16($sp)
534 stmg %r3,%r13,24($sp)
543 llill $mask,`0xff<<3`
544 bras $ra,_s390x_AES_decrypt
554 .size AES_decrypt,.-AES_decrypt
556 .type _s390x_AES_decrypt,\@function
575 l $s0,0($s0,$tbl) # Td0[s0>>24]
576 l $t1,3($i1,$tbl) # Td1[s0>>16]
577 l $t2,2($i2,$tbl) # Td2[s0>>8]
578 l $t3,1($i3,$tbl) # Td3[s0>>0]
580 sllg $i1,$s1,`0+3` # i0
588 x $s0,1($i1,$tbl) # Td3[s1>>0]
589 l $s1,0($s1,$tbl) # Td0[s1>>24]
590 x $t2,3($i2,$tbl) # Td1[s1>>16]
591 x $t3,2($i3,$tbl) # Td2[s1>>8]
594 srlg $i1,$s2,`8-3` # i0
595 sllg $i2,$s2,`0+3` # i1
602 x $s0,2($i1,$tbl) # Td2[s2>>8]
603 x $s1,1($i2,$tbl) # Td3[s2>>0]
604 l $s2,0($s2,$tbl) # Td0[s2>>24]
605 x $t3,3($i3,$tbl) # Td1[s2>>16]
608 srlg $i1,$s3,`16-3` # i0
609 srlg $i2,$s3,`8-3` # i1
610 sllg $i3,$s3,`0+3` # i2
616 x $s0,3($i1,$tbl) # Td1[s3>>16]
617 x $s1,2($i2,$tbl) # Td2[s3>>8]
618 x $s2,1($i3,$tbl) # Td3[s3>>0]
619 l $s3,0($s3,$tbl) # Td0[s3>>24]
628 brct $rounds,.Ldec_loop
630 l $t1,`2048+0`($tbl) # prefetch Td4
631 l $t2,`2048+32`($tbl)
632 l $t3,`2048+64`($tbl)
633 l $i1,`2048+96`($tbl)
634 l $i2,`2048+128`($tbl)
635 l $i3,`2048+160`($tbl)
636 l $t1,`2048+192`($tbl)
637 l $t2,`2048+224`($tbl)
646 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
647 llgc $t1,2048($i1,$tbl) # Td4[s0>>16]
648 llgc $t2,2048($i2,$tbl) # Td4[s0>>8]
649 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
660 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
661 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
662 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
663 llgc $i3,2048($i3,$tbl) # Td4[s1>>8]
678 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
679 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
680 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
681 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
696 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
697 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
698 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
699 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
714 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
716 # void AES_set_encrypt_key(const unsigned char *in, int bits,
718 .globl AES_set_encrypt_key
719 .type AES_set_encrypt_key,\@function
743 $code.=<<___ if (!$softonly);
744 lghi %r0,0 # query capability vector
746 .long 0xb92e0042 # km %r4,%r2
751 lmg $t1,$t2,0($inp) # just copy 128 bits...
754 st $t1,236($key) # ... postpone key setup
762 stmg %r6,%r13,48($sp) # all non-volatile regs
764 larl $tbl,AES_Te+2048
781 st $t3,236($key) # mark as set up
786 llgfr $t2,$s3 # temp=rk[3]
797 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
798 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
799 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
800 icm $t2,1,0($i3) # Te4[rk[3]>>24]
801 x $t2,256($t3,$tbl) # rcon[i]
802 xr $s0,$t2 # rk[4]=rk[0]^...
803 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
804 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
805 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
810 la $key,16($key) # key+=4
812 brct $rounds,.L128_loop
845 icm $t2,2,0($t2) # Te4[rk[5]>>0]<<8
846 icm $t2,4,0($i1) # Te4[rk[5]>>8]<<16
847 icm $t2,8,0($i2) # Te4[rk[5]>>16]<<24
848 icm $t2,1,0($i3) # Te4[rk[5]>>24]
849 x $t2,256($t3,$tbl) # rcon[i]
850 xr $s0,$t2 # rk[6]=rk[0]^...
851 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
852 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
853 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
858 brct $rounds,.L192_continue
865 x $t2,16($key) # rk[10]=rk[4]^rk[9]
867 x $t2,20($key) # rk[11]=rk[5]^rk[10]
869 la $key,24($key) # key+=6
897 icm $t2,2,0($t2) # Te4[rk[7]>>0]<<8
898 icm $t2,4,0($i1) # Te4[rk[7]>>8]<<16
899 icm $t2,8,0($i2) # Te4[rk[7]>>16]<<24
900 icm $t2,1,0($i3) # Te4[rk[7]>>24]
901 x $t2,256($t3,$tbl) # rcon[i]
902 xr $s0,$t2 # rk[8]=rk[0]^...
903 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
904 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
905 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
910 brct $rounds,.L256_continue
916 lgr $t2,$s3 # temp=rk[11]
927 llgc $t2,0($t2) # Te4[rk[11]>>0]
928 icm $t2,2,0($i1) # Te4[rk[11]>>8]<<8
929 icm $t2,4,0($i2) # Te4[rk[11]>>16]<<16
930 icm $t2,8,0($i3) # Te4[rk[11]>>24]<<24
931 x $t2,16($key) # rk[12]=rk[4]^...
933 x $t2,20($key) # rk[13]=rk[5]^rk[12]
935 x $t2,24($key) # rk[14]=rk[6]^rk[13]
937 x $t2,28($key) # rk[15]=rk[7]^rk[14]
940 la $key,32($key) # key+=8
947 .size AES_set_encrypt_key,.-AES_set_encrypt_key
949 # void AES_set_decrypt_key(const unsigned char *in, int bits,
951 .globl AES_set_decrypt_key
952 .type AES_set_decrypt_key,\@function
955 stg $key,32($sp) # I rely on AES_set_encrypt_key to
956 stg $ra,112($sp) # save non-volatile registers!
957 bras $ra,AES_set_encrypt_key
963 $code.=<<___ if (!$softonly);
971 lmg $t1,$t2,0($key) # just copy 128 bits otherwise
972 stmg $t1,$t2,160($key)
980 bras $ra,.Lekey_internal
986 .Lgo: llgf $rounds,240($key)
993 .Linv: lmg $s0,$s1,0($i1)
1005 llgf $rounds,240($key)
1007 sll $rounds,2 # (rounds-1)*4
1008 llilh $mask80,0x8080
1010 llilh $mask1b,0x1b1b
1012 llilh $maskfe,0xfefe
1016 .Lmix: l $s0,16($key) # tp1
1044 xr $s1,$s0 # tp2^tp1
1045 xr $s2,$s0 # tp4^tp1
1046 rll $s0,$s0,24 # = ROTATE(tp1,8)
1047 xr $s0,$s1 # ^=tp2^tp1
1048 xr $s0,$s2 # ^=tp4^tp1
1049 xr $s0,$s3 # ^= tp8[^(tp4^tp1)^(tp2^tp1)=tp4^tp2]
1050 xr $s1,$s3 # tp2^tp1^tp8
1052 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1053 xr $s2,$s3 # tp4^tp1^tp8
1055 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1057 xr $s0,$s3 # ^= ROTATE(tp8,8)
1063 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1066 .size AES_set_decrypt_key,.-AES_set_decrypt_key
1067 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1070 $code =~ s/\`([^\`]*)\`/eval $1/gem;