2 # Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
19 # The module implements "4-bit" GCM GHASH function and underlying
20 # single multiplication operation in GF(2^128). "4-bit" means that it
21 # uses 256 bytes per-key table [+128 bytes shared table]. Even though
22 # loops are aggressively modulo-scheduled in respect to references to
23 # Htbl and Z.hi updates for 8 cycles per byte, measured performance is
24 # ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic
25 # scheduling "glitch," because uprofile(1) indicates uniform sample
26 # distribution, as if all instruction bundles execute in 1.5 cycles.
27 # Meaning that it could have been even faster, yet 12 cycles is ~60%
28 # better than gcc-generated code and ~80% than code generated by vendor
41 $Xi="a0"; # $16, input argument block
76 s8addq $remp,$rem_4bit,$remp
104 s8addq $remp,$rem_4bit,$remp
122 s8addq $remp,$rem_4bit,$remp
146 s8addq $remp,$rem_4bit,$remp
163 s8addq $remp,$rem_4bit,$remp
188 s8addq $remp,$rem_4bit,$remp
206 s8addq $remp,$rem_4bit,$remp
229 s8addq $remp,$rem_4bit,$remp
242 s8addq $remp,$rem_4bit,$remp
256 #include <asm/regdef.h>
266 .globl gcm_gmult_4bit
283 srl $Zlo,24,$t0 # byte swap
291 zapnot $Zlo,0x88,$Zlo
310 zapnot $Zhi,0x88,$Zhi
332 .globl gcm_ghash_4bit
355 extql $inhi,$inp,$inhi
356 extqh $Thi0,$inp,$Thi0
360 extql $inlo,$inp,$inlo
361 extqh $Tlo0,$inp,$Tlo0
372 srl $Zlo,24,$t0 # byte swap
380 zapnot $Zlo,0x88,$Zlo
401 zapnot $Zhi,0x88,$Zhi
422 zapnot $Zhi,0x88,$Zhi
451 .Lpic: lda $rem_4bit,12($rem_4bit)
456 .long 0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16
457 .long 0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16
458 .long 0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16
459 .long 0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16
460 .ascii "GHASH for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
464 $output=pop and open STDOUT,">$output";