2 # Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
9 # ====================================================================
10 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11 # project. The module is, however, dual licensed under OpenSSL and
12 # CRYPTOGAMS licenses depending on where you obtain it. For further
13 # details see http://www.openssl.org/~appro/cryptogams/.
14 # ====================================================================
16 # Keccak-1600 for PPC64.
20 # This is straightforward KECCAK_1X_ALT implementation that works on
21 # *any* PPC64. Then PowerISA 2.07 adds 2x64-bit vector rotate, and
22 # it's possible to achieve performance better than below, but that is
23 # naturally option only for POWER8 and successors...
25 ######################################################################
26 # Numbers are cycles per processed byte.
30 # PPC970/G5 14.6/+120%
34 # (*) Corresponds to SHA3-256. Percentage after slash is improvement
35 # over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
36 # much better (but watch out for them generating code specific
37 # to processor they execute on).
41 if ($flavour =~ /64/) {
48 } else { die "nonsense $flavour"; }
50 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
51 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
52 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
53 die "can't locate ppc-xlate.pl";
55 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
57 $FRAME=24*$SIZE_T+6*$SIZE_T+32;
59 $TEMP=$LOCALS+6*$SIZE_T;
63 my @A = map([ "r$_", "r".($_+1), "r".($_+2), "r".($_+3), "r".($_+4) ],
65 $A[1][1] = "r6"; # r13 is reserved
67 my @C = map("r$_", (0,3,4,5));
69 my @rhotates = ([ 0, 1, 62, 28, 27 ],
70 [ 36, 44, 6, 55, 20 ],
71 [ 3, 10, 43, 25, 39 ],
72 [ 41, 45, 15, 21, 8 ],
73 [ 18, 2, 61, 56, 14 ]);
78 .type KeccakF1600_int,\@function
86 xor $C[0],$A[0][0],$A[1][0] ; Theta
87 std $A[0][4],`$TEMP+0`($sp)
88 xor $C[1],$A[0][1],$A[1][1]
89 std $A[1][4],`$TEMP+8`($sp)
90 xor $C[2],$A[0][2],$A[1][2]
91 std $A[2][4],`$TEMP+16`($sp)
92 xor $C[3],$A[0][3],$A[1][3]
93 std $A[3][4],`$TEMP+24`($sp)
100 xor $C[4],$A[0][4],$A[1][4]
101 xor $C[0],$C[0],$A[2][0]
102 xor $C[1],$C[1],$A[2][1]
103 xor $C[2],$C[2],$A[2][2]
104 xor $C[3],$C[3],$A[2][3]
105 xor $C[4],$C[4],$A[2][4]
106 xor $C[0],$C[0],$A[3][0]
107 xor $C[1],$C[1],$A[3][1]
108 xor $C[2],$C[2],$A[3][2]
109 xor $C[3],$C[3],$A[3][3]
110 xor $C[4],$C[4],$A[3][4]
111 xor $C[0],$C[0],$A[4][0]
112 xor $C[2],$C[2],$A[4][2]
113 xor $C[1],$C[1],$A[4][1]
114 xor $C[3],$C[3],$A[4][3]
116 xor $C[4],$C[4],$A[4][4]
118 xor $C[5],$C[5],$C[0]
121 xor $A[0][1],$A[0][1],$C[5]
122 xor $A[1][1],$A[1][1],$C[5]
123 xor $A[2][1],$A[2][1],$C[5]
124 xor $A[3][1],$A[3][1],$C[5]
125 xor $A[4][1],$A[4][1],$C[5]
128 xor $C[6],$C[6],$C[1]
129 xor $C[2],$C[2],$C[7]
131 xor $C[3],$C[3],$C[5]
132 xor $C[4],$C[4],$C[7]
134 xor $C[1], $A[0][2],$C[6] ;mr $C[1],$A[0][2]
135 xor $A[1][2],$A[1][2],$C[6]
136 xor $A[2][2],$A[2][2],$C[6]
137 xor $A[3][2],$A[3][2],$C[6]
138 xor $A[4][2],$A[4][2],$C[6]
140 xor $A[0][0],$A[0][0],$C[4]
141 xor $A[1][0],$A[1][0],$C[4]
142 xor $A[2][0],$A[2][0],$C[4]
143 xor $A[3][0],$A[3][0],$C[4]
144 xor $A[4][0],$A[4][0],$C[4]
151 ld $A[0][4],`$TEMP+0`($sp)
152 xor $C[0], $A[0][3],$C[2] ;mr $C[0],$A[0][3]
153 ld $A[1][4],`$TEMP+8`($sp)
154 xor $A[1][3],$A[1][3],$C[2]
155 ld $A[2][4],`$TEMP+16`($sp)
156 xor $A[2][3],$A[2][3],$C[2]
157 ld $A[3][4],`$TEMP+24`($sp)
158 xor $A[3][3],$A[3][3],$C[2]
159 xor $A[4][3],$A[4][3],$C[2]
161 xor $C[2], $A[0][4],$C[3] ;mr $C[2],$A[0][4]
162 xor $A[1][4],$A[1][4],$C[3]
163 xor $A[2][4],$A[2][4],$C[3]
164 xor $A[3][4],$A[3][4],$C[3]
165 xor $A[4][4],$A[4][4],$C[3]
167 mr $C[3],$A[0][1] ; Rho+Pi
168 rotldi $A[0][1],$A[1][1],$rhotates[1][1]
170 rotldi $A[0][2],$A[2][2],$rhotates[2][2]
172 rotldi $A[0][3],$A[3][3],$rhotates[3][3]
174 rotldi $A[0][4],$A[4][4],$rhotates[4][4]
176 rotldi $A[1][1],$A[1][4],$rhotates[1][4]
177 rotldi $A[2][2],$A[2][3],$rhotates[2][3]
178 rotldi $A[3][3],$A[3][2],$rhotates[3][2]
179 rotldi $A[4][4],$A[4][1],$rhotates[4][1]
181 rotldi $A[1][4],$A[4][2],$rhotates[4][2]
182 rotldi $A[2][3],$A[3][4],$rhotates[3][4]
183 rotldi $A[3][2],$A[2][1],$rhotates[2][1]
184 rotldi $A[4][1],$A[1][3],$rhotates[1][3]
186 rotldi $A[4][2],$A[2][4],$rhotates[2][4]
187 rotldi $A[3][4],$A[4][3],$rhotates[4][3]
188 rotldi $A[2][1],$A[1][2],$rhotates[1][2]
189 rotldi $A[1][3],$A[3][1],$rhotates[3][1]
191 rotldi $A[2][4],$A[4][0],$rhotates[4][0]
192 rotldi $A[4][3],$A[3][0],$rhotates[3][0]
193 rotldi $A[1][2],$A[2][0],$rhotates[2][0]
194 rotldi $A[3][1],$A[1][0],$rhotates[1][0]
196 rotldi $A[1][0],$C[0],$rhotates[0][3]
197 rotldi $A[2][0],$C[3],$rhotates[0][1]
198 rotldi $A[3][0],$C[2],$rhotates[0][4]
199 rotldi $A[4][0],$C[1],$rhotates[0][2]
201 andc $C[0],$A[0][2],$A[0][1] ; Chi+Iota
202 andc $C[1],$A[0][3],$A[0][2]
203 andc $C[2],$A[0][0],$A[0][4]
204 andc $C[3],$A[0][1],$A[0][0]
205 xor $A[0][0],$A[0][0],$C[0]
206 andc $C[0],$A[0][4],$A[0][3]
207 xor $A[0][1],$A[0][1],$C[1]
208 ld $C[1],`$LOCALS+4*$SIZE_T`($sp)
209 xor $A[0][3],$A[0][3],$C[2]
210 xor $A[0][4],$A[0][4],$C[3]
211 xor $A[0][2],$A[0][2],$C[0]
212 ldu $C[3],8($C[1]) ; Iota[i++]
214 andc $C[0],$A[1][2],$A[1][1]
215 std $C[1],`$LOCALS+4*$SIZE_T`($sp)
216 andc $C[1],$A[1][3],$A[1][2]
217 andc $C[2],$A[1][0],$A[1][4]
218 xor $A[0][0],$A[0][0],$C[3] ; A[0][0] ^= Iota
219 andc $C[3],$A[1][1],$A[1][0]
220 xor $A[1][0],$A[1][0],$C[0]
221 andc $C[0],$A[1][4],$A[1][3]
222 xor $A[1][1],$A[1][1],$C[1]
223 xor $A[1][3],$A[1][3],$C[2]
224 xor $A[1][4],$A[1][4],$C[3]
225 xor $A[1][2],$A[1][2],$C[0]
227 andc $C[0],$A[2][2],$A[2][1]
228 andc $C[1],$A[2][3],$A[2][2]
229 andc $C[2],$A[2][0],$A[2][4]
230 andc $C[3],$A[2][1],$A[2][0]
231 xor $A[2][0],$A[2][0],$C[0]
232 andc $C[0],$A[2][4],$A[2][3]
233 xor $A[2][1],$A[2][1],$C[1]
234 xor $A[2][3],$A[2][3],$C[2]
235 xor $A[2][4],$A[2][4],$C[3]
236 xor $A[2][2],$A[2][2],$C[0]
238 andc $C[0],$A[3][2],$A[3][1]
239 andc $C[1],$A[3][3],$A[3][2]
240 andc $C[2],$A[3][0],$A[3][4]
241 andc $C[3],$A[3][1],$A[3][0]
242 xor $A[3][0],$A[3][0],$C[0]
243 andc $C[0],$A[3][4],$A[3][3]
244 xor $A[3][1],$A[3][1],$C[1]
245 xor $A[3][3],$A[3][3],$C[2]
246 xor $A[3][4],$A[3][4],$C[3]
247 xor $A[3][2],$A[3][2],$C[0]
249 andc $C[0],$A[4][2],$A[4][1]
250 andc $C[1],$A[4][3],$A[4][2]
251 andc $C[2],$A[4][0],$A[4][4]
252 andc $C[3],$A[4][1],$A[4][0]
253 xor $A[4][0],$A[4][0],$C[0]
254 andc $C[0],$A[4][4],$A[4][3]
255 xor $A[4][1],$A[4][1],$C[1]
256 xor $A[4][3],$A[4][3],$C[2]
257 xor $A[4][4],$A[4][4],$C[3]
258 xor $A[4][2],$A[4][2],$C[0]
264 .byte 0,12,0x14,0,0,0,0,0
265 .size KeccakF1600_int,.-KeccakF1600_int
267 .type KeccakF1600,\@function
270 $STU $sp,-$FRAME($sp)
272 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
273 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
274 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
275 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
276 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
277 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
278 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
279 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
280 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
281 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
282 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
283 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
284 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
285 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
286 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
287 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
288 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
289 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
290 $PUSH r0,`$FRAME+$LRSAVE`($sp)
293 subi r12,r12,8 ; prepare for ldu
295 $PUSH r3,`$LOCALS+0*$SIZE_T`($sp)
296 ;$PUSH r4,`$LOCALS+1*$SIZE_T`($sp)
297 ;$PUSH r5,`$LOCALS+2*$SIZE_T`($sp)
298 ;$PUSH r6,`$LOCALS+3*$SIZE_T`($sp)
299 $PUSH r12,`$LOCALS+4*$SIZE_T`($sp)
301 ld $A[0][0],`8*0`(r3) ; load A[5][5]
302 ld $A[0][1],`8*1`(r3)
303 ld $A[0][2],`8*2`(r3)
304 ld $A[0][3],`8*3`(r3)
305 ld $A[0][4],`8*4`(r3)
306 ld $A[1][0],`8*5`(r3)
307 ld $A[1][1],`8*6`(r3)
308 ld $A[1][2],`8*7`(r3)
309 ld $A[1][3],`8*8`(r3)
310 ld $A[1][4],`8*9`(r3)
311 ld $A[2][0],`8*10`(r3)
312 ld $A[2][1],`8*11`(r3)
313 ld $A[2][2],`8*12`(r3)
314 ld $A[2][3],`8*13`(r3)
315 ld $A[2][4],`8*14`(r3)
316 ld $A[3][0],`8*15`(r3)
317 ld $A[3][1],`8*16`(r3)
318 ld $A[3][2],`8*17`(r3)
319 ld $A[3][3],`8*18`(r3)
320 ld $A[3][4],`8*19`(r3)
321 ld $A[4][0],`8*20`(r3)
322 ld $A[4][1],`8*21`(r3)
323 ld $A[4][2],`8*22`(r3)
324 ld $A[4][3],`8*23`(r3)
325 ld $A[4][4],`8*24`(r3)
329 $POP r3,`$LOCALS+0*$SIZE_T`($sp)
330 std $A[0][0],`8*0`(r3) ; return A[5][5]
331 std $A[0][1],`8*1`(r3)
332 std $A[0][2],`8*2`(r3)
333 std $A[0][3],`8*3`(r3)
334 std $A[0][4],`8*4`(r3)
335 std $A[1][0],`8*5`(r3)
336 std $A[1][1],`8*6`(r3)
337 std $A[1][2],`8*7`(r3)
338 std $A[1][3],`8*8`(r3)
339 std $A[1][4],`8*9`(r3)
340 std $A[2][0],`8*10`(r3)
341 std $A[2][1],`8*11`(r3)
342 std $A[2][2],`8*12`(r3)
343 std $A[2][3],`8*13`(r3)
344 std $A[2][4],`8*14`(r3)
345 std $A[3][0],`8*15`(r3)
346 std $A[3][1],`8*16`(r3)
347 std $A[3][2],`8*17`(r3)
348 std $A[3][3],`8*18`(r3)
349 std $A[3][4],`8*19`(r3)
350 std $A[4][0],`8*20`(r3)
351 std $A[4][1],`8*21`(r3)
352 std $A[4][2],`8*22`(r3)
353 std $A[4][3],`8*23`(r3)
354 std $A[4][4],`8*24`(r3)
356 $POP r0,`$FRAME+$LRSAVE`($sp)
357 $POP r14,`$FRAME-$SIZE_T*18`($sp)
358 $POP r15,`$FRAME-$SIZE_T*17`($sp)
359 $POP r16,`$FRAME-$SIZE_T*16`($sp)
360 $POP r17,`$FRAME-$SIZE_T*15`($sp)
361 $POP r18,`$FRAME-$SIZE_T*14`($sp)
362 $POP r19,`$FRAME-$SIZE_T*13`($sp)
363 $POP r20,`$FRAME-$SIZE_T*12`($sp)
364 $POP r21,`$FRAME-$SIZE_T*11`($sp)
365 $POP r22,`$FRAME-$SIZE_T*10`($sp)
366 $POP r23,`$FRAME-$SIZE_T*9`($sp)
367 $POP r24,`$FRAME-$SIZE_T*8`($sp)
368 $POP r25,`$FRAME-$SIZE_T*7`($sp)
369 $POP r26,`$FRAME-$SIZE_T*6`($sp)
370 $POP r27,`$FRAME-$SIZE_T*5`($sp)
371 $POP r28,`$FRAME-$SIZE_T*4`($sp)
372 $POP r29,`$FRAME-$SIZE_T*3`($sp)
373 $POP r30,`$FRAME-$SIZE_T*2`($sp)
374 $POP r31,`$FRAME-$SIZE_T*1`($sp)
379 .byte 0,12,4,1,0x80,18,1,0
381 .size KeccakF1600,.-KeccakF1600
383 .type dword_le_load,\@function
403 .byte 0,12,0x14,0,0,0,1,0
405 .size dword_le_load,.-dword_le_load
408 .type SHA3_absorb,\@function
411 $STU $sp,-$FRAME($sp)
413 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
414 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
415 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
416 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
417 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
418 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
419 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
420 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
421 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
422 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
423 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
424 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
425 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
426 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
427 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
428 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
429 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
430 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
431 $PUSH r0,`$FRAME+$LRSAVE`($sp)
434 subi r4,r4,1 ; prepare for lbzu
435 subi r12,r12,8 ; prepare for ldu
437 $PUSH r3,`$LOCALS+0*$SIZE_T`($sp) ; save A[][]
438 $PUSH r4,`$LOCALS+1*$SIZE_T`($sp) ; save inp
439 $PUSH r5,`$LOCALS+2*$SIZE_T`($sp) ; save len
440 $PUSH r6,`$LOCALS+3*$SIZE_T`($sp) ; save bsz
442 $PUSH r12,`$LOCALS+4*$SIZE_T`($sp)
444 ld $A[0][0],`8*0`(r3) ; load A[5][5]
445 ld $A[0][1],`8*1`(r3)
446 ld $A[0][2],`8*2`(r3)
447 ld $A[0][3],`8*3`(r3)
448 ld $A[0][4],`8*4`(r3)
449 ld $A[1][0],`8*5`(r3)
450 ld $A[1][1],`8*6`(r3)
451 ld $A[1][2],`8*7`(r3)
452 ld $A[1][3],`8*8`(r3)
453 ld $A[1][4],`8*9`(r3)
454 ld $A[2][0],`8*10`(r3)
455 ld $A[2][1],`8*11`(r3)
456 ld $A[2][2],`8*12`(r3)
457 ld $A[2][3],`8*13`(r3)
458 ld $A[2][4],`8*14`(r3)
459 ld $A[3][0],`8*15`(r3)
460 ld $A[3][1],`8*16`(r3)
461 ld $A[3][2],`8*17`(r3)
462 ld $A[3][3],`8*18`(r3)
463 ld $A[3][4],`8*19`(r3)
464 ld $A[4][0],`8*20`(r3)
465 ld $A[4][1],`8*21`(r3)
466 ld $A[4][2],`8*22`(r3)
467 ld $A[4][3],`8*23`(r3)
468 ld $A[4][4],`8*24`(r3)
478 $UCMP r4,r5 ; len < bsz?
481 sub r4,r4,r5 ; len -= bsz
483 $PUSH r4,`$LOCALS+2*$SIZE_T`($sp) ; save len
485 bl dword_le_load ; *inp++
486 xor $A[0][0],$A[0][0],r0
488 bl dword_le_load ; *inp++
489 xor $A[0][1],$A[0][1],r0
491 bl dword_le_load ; *inp++
492 xor $A[0][2],$A[0][2],r0
494 bl dword_le_load ; *inp++
495 xor $A[0][3],$A[0][3],r0
497 bl dword_le_load ; *inp++
498 xor $A[0][4],$A[0][4],r0
500 bl dword_le_load ; *inp++
501 xor $A[1][0],$A[1][0],r0
503 bl dword_le_load ; *inp++
504 xor $A[1][1],$A[1][1],r0
506 bl dword_le_load ; *inp++
507 xor $A[1][2],$A[1][2],r0
509 bl dword_le_load ; *inp++
510 xor $A[1][3],$A[1][3],r0
512 bl dword_le_load ; *inp++
513 xor $A[1][4],$A[1][4],r0
515 bl dword_le_load ; *inp++
516 xor $A[2][0],$A[2][0],r0
518 bl dword_le_load ; *inp++
519 xor $A[2][1],$A[2][1],r0
521 bl dword_le_load ; *inp++
522 xor $A[2][2],$A[2][2],r0
524 bl dword_le_load ; *inp++
525 xor $A[2][3],$A[2][3],r0
527 bl dword_le_load ; *inp++
528 xor $A[2][4],$A[2][4],r0
530 bl dword_le_load ; *inp++
531 xor $A[3][0],$A[3][0],r0
533 bl dword_le_load ; *inp++
534 xor $A[3][1],$A[3][1],r0
536 bl dword_le_load ; *inp++
537 xor $A[3][2],$A[3][2],r0
539 bl dword_le_load ; *inp++
540 xor $A[3][3],$A[3][3],r0
542 bl dword_le_load ; *inp++
543 xor $A[3][4],$A[3][4],r0
545 bl dword_le_load ; *inp++
546 xor $A[4][0],$A[4][0],r0
548 bl dword_le_load ; *inp++
549 xor $A[4][1],$A[4][1],r0
551 bl dword_le_load ; *inp++
552 xor $A[4][2],$A[4][2],r0
554 bl dword_le_load ; *inp++
555 xor $A[4][3],$A[4][3],r0
557 bl dword_le_load ; *inp++
558 xor $A[4][4],$A[4][4],r0
561 $PUSH r3,`$LOCALS+1*$SIZE_T`($sp) ; save inp
565 $POP r0,`$LOCALS+4*$SIZE_T`($sp) ; pull iotas[24]
566 $POP r5,`$LOCALS+3*$SIZE_T`($sp) ; restore bsz
567 $POP r4,`$LOCALS+2*$SIZE_T`($sp) ; restore len
568 $POP r3,`$LOCALS+1*$SIZE_T`($sp) ; restore inp
569 addic r0,r0,`-8*24` ; rewind iotas
570 $PUSH r0,`$LOCALS+4*$SIZE_T`($sp)
576 $POP r3,`$LOCALS+0*$SIZE_T`($sp)
577 std $A[0][0],`8*0`(r3) ; return A[5][5]
578 std $A[0][1],`8*1`(r3)
579 std $A[0][2],`8*2`(r3)
580 std $A[0][3],`8*3`(r3)
581 std $A[0][4],`8*4`(r3)
582 std $A[1][0],`8*5`(r3)
583 std $A[1][1],`8*6`(r3)
584 std $A[1][2],`8*7`(r3)
585 std $A[1][3],`8*8`(r3)
586 std $A[1][4],`8*9`(r3)
587 std $A[2][0],`8*10`(r3)
588 std $A[2][1],`8*11`(r3)
589 std $A[2][2],`8*12`(r3)
590 std $A[2][3],`8*13`(r3)
591 std $A[2][4],`8*14`(r3)
592 std $A[3][0],`8*15`(r3)
593 std $A[3][1],`8*16`(r3)
594 std $A[3][2],`8*17`(r3)
595 std $A[3][3],`8*18`(r3)
596 std $A[3][4],`8*19`(r3)
597 std $A[4][0],`8*20`(r3)
598 std $A[4][1],`8*21`(r3)
599 std $A[4][2],`8*22`(r3)
600 std $A[4][3],`8*23`(r3)
601 std $A[4][4],`8*24`(r3)
603 mr r3,r4 ; return value
604 $POP r0,`$FRAME+$LRSAVE`($sp)
605 $POP r14,`$FRAME-$SIZE_T*18`($sp)
606 $POP r15,`$FRAME-$SIZE_T*17`($sp)
607 $POP r16,`$FRAME-$SIZE_T*16`($sp)
608 $POP r17,`$FRAME-$SIZE_T*15`($sp)
609 $POP r18,`$FRAME-$SIZE_T*14`($sp)
610 $POP r19,`$FRAME-$SIZE_T*13`($sp)
611 $POP r20,`$FRAME-$SIZE_T*12`($sp)
612 $POP r21,`$FRAME-$SIZE_T*11`($sp)
613 $POP r22,`$FRAME-$SIZE_T*10`($sp)
614 $POP r23,`$FRAME-$SIZE_T*9`($sp)
615 $POP r24,`$FRAME-$SIZE_T*8`($sp)
616 $POP r25,`$FRAME-$SIZE_T*7`($sp)
617 $POP r26,`$FRAME-$SIZE_T*6`($sp)
618 $POP r27,`$FRAME-$SIZE_T*5`($sp)
619 $POP r28,`$FRAME-$SIZE_T*4`($sp)
620 $POP r29,`$FRAME-$SIZE_T*3`($sp)
621 $POP r30,`$FRAME-$SIZE_T*2`($sp)
622 $POP r31,`$FRAME-$SIZE_T*1`($sp)
627 .byte 0,12,4,1,0x80,18,4,0
629 .size SHA3_absorb,.-SHA3_absorb
632 my ($A_flat,$out,$len,$bsz) = map("r$_",(28..31));
635 .type SHA3_squeeze,\@function
638 $STU $sp,`-10*$SIZE_T`($sp)
640 $PUSH r28,`6*$SIZE_T`($sp)
641 $PUSH r29,`7*$SIZE_T`($sp)
642 $PUSH r30,`8*$SIZE_T`($sp)
643 $PUSH r31,`9*$SIZE_T`($sp)
644 $PUSH r0,`10*$SIZE_T+$LRSAVE`($sp)
647 subi r3,r3,8 ; prepare for ldu
648 subi $out,r4,1 ; prepare for stbu
683 subi r3,$A_flat,8 ; prepare for ldu
696 $POP r0,`10*$SIZE_T+$LRSAVE`($sp)
697 $POP r28,`6*$SIZE_T`($sp)
698 $POP r29,`7*$SIZE_T`($sp)
699 $POP r30,`8*$SIZE_T`($sp)
700 $POP r31,`9*$SIZE_T`($sp)
702 addi $sp,$sp,`10*$SIZE_T`
705 .byte 0,12,4,1,0x80,4,4,0
707 .size SHA3_squeeze,.-SHA3_squeeze
711 # Ugly hack here, because PPC assembler syntax seem to vary too
712 # much from platforms to platform...
718 mflr r12 ; vvvvvv "distance" between . and 1st data entry
723 .byte 0,12,0x14,0,0,0,0,0
727 .quad 0x0000000000000001
728 .quad 0x0000000000008082
729 .quad 0x800000000000808a
730 .quad 0x8000000080008000
731 .quad 0x000000000000808b
732 .quad 0x0000000080000001
733 .quad 0x8000000080008081
734 .quad 0x8000000000008009
735 .quad 0x000000000000008a
736 .quad 0x0000000000000088
737 .quad 0x0000000080008009
738 .quad 0x000000008000000a
739 .quad 0x000000008000808b
740 .quad 0x800000000000008b
741 .quad 0x8000000000008089
742 .quad 0x8000000000008003
743 .quad 0x8000000000008002
744 .quad 0x8000000000000080
745 .quad 0x000000000000800a
746 .quad 0x800000008000000a
747 .quad 0x8000000080008081
748 .quad 0x8000000000008080
749 .quad 0x0000000080000001
750 .quad 0x8000000080008008
752 .asciz "Keccak-1600 absorb and squeeze for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
755 $code =~ s/\`([^\`]*)\`/eval $1/gem;