3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
14 # Software performance improvement over gcc-generated code is ~70% and
15 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17 # *strictly* in-order execution and issued instruction [in this case
18 # load value from memory is critical] has to complete before execution
19 # flow proceeds. S-boxes are compressed to 2KB[+256B].
21 # As for hardware acceleration support. It's basically a "teaser," as
22 # it can and should be improved in several ways. Most notably support
23 # for CBC is not utilized, nor multiple blocks are ever processed.
24 # Then software key schedule can be postponed till hardware support
25 # detection... Performance improvement over assembler is reportedly
26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27 # support is implemented.
31 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32 # for 128-bit keys, if hardware support is detected.
36 # Add support for hardware AES192/256 and reschedule instructions to
37 # minimize/avoid Address Generation Interlock hazard and to favour
38 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39 # almost 50% on z9. The gain is smaller on z10, because being dual-
40 # issue z10 makes it improssible to eliminate the interlock condition:
41 # critial path is not long enough. Yet it spends ~24 cycles per byte
42 # processed with 128-bit key.
44 # Unlike previous version hardware support detection takes place only
45 # at the moment of key schedule setup, which is denoted in key->rounds.
46 # This is done, because deferred key setup can't be made MT-safe, not
47 # for keys longer than 128 bits.
49 # Add AES_cbc_encrypt, which gives incredible performance improvement,
50 # it was measured to be ~6.6x. It's less than previously mentioned 8x,
51 # because software implementation was optimized.
55 # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
56 # performance improvement over "generic" counter mode routine relying
57 # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
58 # to the fact that exact throughput value depends on current stack
59 # frame alignment within 4KB page. In worst case you get ~75% of the
60 # maximum, but *on average* it would be as much as ~98%. Meaning that
61 # worst case is unlike, it's like hitting ravine on plateau.
63 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
64 open STDOUT,">$output";
66 $softonly=0; # allow hardware support
68 $t0="%r0"; $mask="%r0";
70 $t2="%r2"; $inp="%r2";
71 $t3="%r3"; $out="%r3"; $bits="%r3";
87 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
98 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
99 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
100 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
101 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
102 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
103 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
104 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
105 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
106 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
107 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
108 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
109 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
110 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
111 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
112 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
113 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
114 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
115 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
116 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
117 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
118 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
119 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
120 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
121 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
122 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
123 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
124 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
125 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
126 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
127 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
128 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
129 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
130 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
131 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
132 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
133 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
134 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
135 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
136 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
137 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
138 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
139 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
140 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
141 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
142 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
143 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
144 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
145 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
146 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
147 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
148 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
149 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
150 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
151 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
152 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
153 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
154 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
155 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
156 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
157 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
158 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
159 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
160 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
161 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
164 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
165 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
166 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
167 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
168 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
169 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
170 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
171 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
172 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
173 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
174 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
175 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
176 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
177 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
178 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
179 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
180 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
181 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
182 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
183 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
184 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
185 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
186 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
187 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
188 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
189 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
190 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
191 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
192 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
193 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
194 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
195 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
197 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
198 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
199 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
201 .size AES_Te,.-AES_Te
203 # void AES_encrypt(const unsigned char *inp, unsigned char *out,
204 # const AES_KEY *key) {
206 .type AES_encrypt,\@function
209 $code.=<<___ if (!$softonly);
218 lghi %r3,16 # single block length
219 .long 0xb92e0042 # km %r4,%r2
220 brc 1,.-4 # can this happen?
234 bras $ra,_s390x_AES_encrypt
244 .size AES_encrypt,.-AES_encrypt
246 .type _s390x_AES_encrypt,\@function
255 llill $mask,`0xff<<3`
269 srlg $i1,$s1,`16-3` # i0
278 l $s0,0($s0,$tbl) # Te0[s0>>24]
279 l $t1,1($t1,$tbl) # Te3[s0>>0]
280 l $t2,2($t2,$tbl) # Te2[s0>>8]
281 l $t3,3($t3,$tbl) # Te1[s0>>16]
283 x $s0,3($i1,$tbl) # Te1[s1>>16]
284 l $s1,0($s1,$tbl) # Te0[s1>>24]
285 x $t2,1($i2,$tbl) # Te3[s1>>0]
286 x $t3,2($i3,$tbl) # Te2[s1>>8]
288 srlg $i1,$s2,`8-3` # i0
289 srlg $i2,$s2,`16-3` # i1
298 srlg $ra,$s3,`8-3` # i1
299 sllg $t1,$s3,`0+3` # i0
304 x $s0,2($i1,$tbl) # Te2[s2>>8]
305 x $s1,3($i2,$tbl) # Te1[s2>>16]
306 l $s2,0($s2,$tbl) # Te0[s2>>24]
307 x $t3,1($i3,$tbl) # Te3[s2>>0]
309 srlg $i3,$s3,`16-3` # i2
320 x $s0,1($t1,$tbl) # Te3[s3>>0]
321 x $s1,2($ra,$tbl) # Te2[s3>>8]
322 x $s2,3($i3,$tbl) # Te1[s3>>16]
323 l $s3,0($s3,$tbl) # Te0[s3>>24]
326 brct $rounds,.Lenc_loop
338 srlg $i1,$s1,`16-3` # i0
347 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
348 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
350 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
351 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
355 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
356 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
357 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
358 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
367 srlg $i1,$s2,`8-3` # i0
368 srlg $i2,$s2,`16-3` # i1
376 sllg $t1,$s3,`0+3` # i0
377 srlg $ra,$s3,`8-3` # i1
380 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
381 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
383 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
384 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
393 srlg $i3,$s3,`16-3` # i2
401 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
402 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
403 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
404 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
420 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
424 .type AES_Td,\@object
429 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
430 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
431 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
432 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
433 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
434 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
435 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
436 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
437 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
438 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
439 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
440 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
441 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
442 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
443 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
444 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
445 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
446 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
447 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
448 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
449 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
450 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
451 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
452 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
453 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
454 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
455 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
456 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
457 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
458 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
459 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
460 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
461 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
462 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
463 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
464 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
465 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
466 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
467 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
468 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
469 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
470 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
471 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
472 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
473 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
474 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
475 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
476 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
477 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
478 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
479 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
480 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
481 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
482 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
483 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
484 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
485 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
486 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
487 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
488 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
489 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
490 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
491 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
492 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
495 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
496 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
497 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
498 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
499 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
500 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
501 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
502 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
503 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
504 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
505 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
506 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
507 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
508 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
509 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
510 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
511 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
512 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
513 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
514 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
515 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
516 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
517 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
518 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
519 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
520 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
521 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
522 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
523 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
524 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
525 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
526 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
527 .size AES_Td,.-AES_Td
529 # void AES_decrypt(const unsigned char *inp, unsigned char *out,
530 # const AES_KEY *key) {
532 .type AES_decrypt,\@function
535 $code.=<<___ if (!$softonly);
544 lghi %r3,16 # single block length
545 .long 0xb92e0042 # km %r4,%r2
546 brc 1,.-4 # can this happen?
560 bras $ra,_s390x_AES_decrypt
570 .size AES_decrypt,.-AES_decrypt
572 .type _s390x_AES_decrypt,\@function
581 llill $mask,`0xff<<3`
595 sllg $i1,$s1,`0+3` # i0
604 l $s0,0($s0,$tbl) # Td0[s0>>24]
605 l $t1,3($t1,$tbl) # Td1[s0>>16]
606 l $t2,2($t2,$tbl) # Td2[s0>>8]
607 l $t3,1($t3,$tbl) # Td3[s0>>0]
609 x $s0,1($i1,$tbl) # Td3[s1>>0]
610 l $s1,0($s1,$tbl) # Td0[s1>>24]
611 x $t2,3($i2,$tbl) # Td1[s1>>16]
612 x $t3,2($i3,$tbl) # Td2[s1>>8]
614 srlg $i1,$s2,`8-3` # i0
615 sllg $i2,$s2,`0+3` # i1
624 srlg $ra,$s3,`8-3` # i1
625 srlg $t1,$s3,`16-3` # i0
630 x $s0,2($i1,$tbl) # Td2[s2>>8]
631 x $s1,1($i2,$tbl) # Td3[s2>>0]
632 l $s2,0($s2,$tbl) # Td0[s2>>24]
633 x $t3,3($i3,$tbl) # Td1[s2>>16]
635 sllg $i3,$s3,`0+3` # i2
646 x $s0,3($t1,$tbl) # Td1[s3>>16]
647 x $s1,2($ra,$tbl) # Td2[s3>>8]
648 x $s2,1($i3,$tbl) # Td3[s3>>0]
649 l $s3,0($s3,$tbl) # Td0[s3>>24]
652 brct $rounds,.Ldec_loop
655 l $t1,`2048+0`($tbl) # prefetch Td4
656 l $t2,`2048+64`($tbl)
657 l $t3,`2048+128`($tbl)
658 l $i1,`2048+192`($tbl)
675 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
676 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
677 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
679 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
683 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
684 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
685 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
687 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
701 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
702 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
703 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
704 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
724 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
725 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
727 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
728 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
742 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
746 # void AES_set_encrypt_key(const unsigned char *in, int bits,
748 .globl AES_set_encrypt_key
749 .type AES_set_encrypt_key,\@function
773 $code.=<<___ if (!$softonly);
774 # convert bits to km code, [128,192,256]->[18,19,20]
781 larl %r1,OPENSSL_s390xcap_P
783 tmhl %r0,0x4000 # check for message-security assist
786 lghi %r0,0 # query capability vector
788 .long 0xb92f0042 # kmc %r4,%r2
795 lmg %r0,%r1,0($inp) # just copy 128 bits...
805 1: st $bits,236($key) # save bits
806 st %r5,240($key) # save km code
813 stmg %r6,%r13,48($sp) # all non-volatile regs
815 larl $tbl,AES_Te+2048
834 llgfr $t2,$s3 # temp=rk[3]
848 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
849 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
850 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
851 icm $t2,1,0($i3) # Te4[rk[3]>>24]
852 x $t2,256($t3,$tbl) # rcon[i]
853 xr $s0,$t2 # rk[4]=rk[0]^...
854 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
855 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
856 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
858 llgfr $t2,$s3 # temp=rk[3]
870 la $key,16($key) # key+=4
872 brct $rounds,.L128_loop
906 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
907 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
908 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
909 icm $t1,1,0($i3) # Te4[rk[5]>>24]
910 x $t1,256($t3,$tbl) # rcon[i]
911 xr $s0,$t1 # rk[6]=rk[0]^...
912 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
913 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
914 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
920 brct $rounds,.L192_continue
928 x $t1,16($key) # rk[10]=rk[4]^rk[9]
930 x $t1,20($key) # rk[11]=rk[5]^rk[10]
940 la $key,24($key) # key+=6
969 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
970 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
971 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
972 icm $t1,1,0($i3) # Te4[rk[7]>>24]
973 x $t1,256($t3,$tbl) # rcon[i]
974 xr $s0,$t1 # rk[8]=rk[0]^...
975 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
976 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
977 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
982 brct $rounds,.L256_continue
989 lgr $t1,$s3 # temp=rk[11]
1000 llgc $t1,0($t1) # Te4[rk[11]>>0]
1001 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
1002 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
1003 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
1004 x $t1,16($key) # rk[12]=rk[4]^...
1006 x $t1,20($key) # rk[13]=rk[5]^rk[12]
1008 x $t1,24($key) # rk[14]=rk[6]^rk[13]
1010 x $t1,28($key) # rk[15]=rk[7]^rk[14]
1020 la $key,32($key) # key+=8
1027 .size AES_set_encrypt_key,.-AES_set_encrypt_key
1029 # void AES_set_decrypt_key(const unsigned char *in, int bits,
1031 .globl AES_set_decrypt_key
1032 .type AES_set_decrypt_key,\@function
1034 AES_set_decrypt_key:
1035 stg $key,32($sp) # I rely on AES_set_encrypt_key to
1036 stg $ra,112($sp) # save non-volatile registers!
1037 bras $ra,AES_set_encrypt_key
1043 $code.=<<___ if (!$softonly);
1048 oill $t0,0x80 # set "decrypt" bit
1056 bras $ra,.Lekey_internal
1062 .Lgo: llgf $rounds,240($key)
1070 .Linv: lmg $s0,$s1,0($i1)
1082 llgf $rounds,240($key)
1084 sll $rounds,2 # (rounds-1)*4
1085 llilh $mask80,0x8080
1086 llilh $mask1b,0x1b1b
1087 llilh $maskfe,0xfefe
1093 .Lmix: l $s0,16($key) # tp1
1121 xr $s1,$s0 # tp2^tp1
1122 xr $s2,$s0 # tp4^tp1
1123 rll $s0,$s0,24 # = ROTATE(tp1,8)
1125 xr $s0,$s1 # ^=tp2^tp1
1126 xr $s1,$s3 # tp2^tp1^tp8
1127 xr $s0,$s2 # ^=tp4^tp1^tp8
1130 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1132 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1133 xr $s0,$s3 # ^= ROTATE(tp8,8)
1139 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1142 .size AES_set_decrypt_key,.-AES_set_decrypt_key
1145 #void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1146 # size_t length, const AES_KEY *key,
1147 # unsigned char *ivec, const int enc)
1150 my $out="%r4"; # length and out are swapped
1156 .globl AES_cbc_encrypt
1157 .type AES_cbc_encrypt,\@function
1160 xgr %r3,%r4 # flip %r3 and %r4, out and len
1164 $code.=<<___ if (!$softonly);
1169 lg %r0,0($ivp) # copy ivec
1171 stmg %r0,%r1,16($sp)
1172 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1173 stmg %r0,%r1,32($sp)
1174 lmg %r0,%r1,16($key)
1175 stmg %r0,%r1,48($sp)
1176 l %r0,240($key) # load kmc code
1177 lghi $key,15 # res=len%16, len-=res;
1180 la %r1,16($sp) # parameter block - ivec || key
1182 .long 0xb92f0042 # kmc %r4,%r2
1183 brc 1,.-4 # pay attention to "partial completion"
1187 lmg %r0,%r1,16($sp) # copy ivec to caller
1193 ahi $key,-1 # it's the way it's encoded in mvc
1195 jnz .Lkmc_truncated_dec
1200 mvc 128(1,$sp),0($inp)
1202 la %r1,16($sp) # restore parameter block
1205 .long 0xb92f0042 # kmc %r4,%r2
1208 .Lkmc_truncated_dec:
1212 .long 0xb92f0042 # kmc %r4,%r2
1215 mvc 0(1,$out),128($sp)
1222 stmg $key,$ra,40($sp)
1236 brc 4,.Lcbc_enc_tail # if borrow
1238 stmg $inp,$out,16($sp)
1245 bras $ra,_s390x_AES_encrypt
1247 lmg $inp,$key,16($sp)
1259 brc 4,.Lcbc_enc_tail # if borrow
1279 mvc 128(1,$sp),0($inp)
1291 stmg $t0,$t1,128($sp)
1294 stmg $inp,$out,16($sp)
1301 bras $ra,_s390x_AES_decrypt
1303 lmg $inp,$key,16($sp)
1315 brc 4,.Lcbc_dec_tail # if borrow
1316 brc 2,.Lcbc_dec_done # if zero
1319 stmg $t0,$t1,128($sp)
1329 lmg $ivp,$ra,48($sp)
1330 stmg $t0,$t1,0($ivp)
1340 mvc 0(1,$out),128($sp)
1343 .size AES_cbc_encrypt,.-AES_cbc_encrypt
1346 #void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
1347 # size_t blocks, const AES_KEY *key,
1348 # const unsigned char *ivec)
1353 my $key="%r5"; my $iv0="%r5";
1358 .globl AES_ctr32_encrypt
1359 .type AES_ctr32_encrypt,\@function
1363 $code.=<<___ if (!$softonly);
1369 stmg %r6,$s3,48($sp)
1372 la %r1,0($key) # %r1 is permanent copy of $key
1373 lg $iv0,0($ivp) # load ivec
1376 # prepare and allocate stack frame at the top of 4K page
1377 # with 1K reserved for eventual signal handling
1378 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1382 ngr $s0,$s1 # align at page boundary
1383 slgr $fp,$s0 # total buffer size
1385 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1386 slgr $fp,$s1 # deduct reservation to get usable buffer size
1387 # buffer size is at lest 256 and at most 3072+256-16
1389 la $sp,1024($s0) # alloca
1390 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
1391 stg $s2,0($sp) # back-chain
1395 brc 1,.Lctr32_hw_loop # not zero, no borrow
1396 algr $fp,$len # input is shorter than allocated buffer
1407 ahi $ivp,1 # 32-bit increment, preserves upper half
1408 brct $s3,.Lctr32_hw_prepare
1410 la $s0,16($sp) # inp
1411 sllg $s1,$fp,4 # len
1412 la $s2,16($sp) # out
1413 .long 0xb92e00a8 # km %r10,%r8
1414 brc 1,.-4 # pay attention to "partial completion"
1424 stg $s0,0($out,$inp)
1425 stg $s1,8($out,$inp)
1427 brct $s3,.Lctr32_hw_xor
1430 brc 1,.Lctr32_hw_loop # not zero, no borrow
1433 brc 4+1,.Lctr32_hw_loop # not zero
1442 brct $s1,.Lctr32_hw_zap
1451 stmg $key,$ra,40($sp)
1457 stmg $inp,$len,16($sp)
1465 bras $ra,_s390x_AES_encrypt
1467 lmg $inp,$ivp,16($sp)
1476 st $s3,12($out,$inp)
1479 ahi $t1,1 # 32-bit increment
1480 brct $len,.Lctr32_loop
1484 .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
1488 .comm OPENSSL_s390xcap_P,8,8
1489 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1492 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1494 close STDOUT; # force flush