Throw in AES CBC assembler, up to +40% on aes-128-cbc benchmark.
[oweals/openssl.git] / crypto / perlasm / cbc.pl
1 #!/usr/local/bin/perl
2
3 # void des_ncbc_encrypt(input, output, length, schedule, ivec, enc)
4 # des_cblock (*input);
5 # des_cblock (*output);
6 # long length;
7 # des_key_schedule schedule;
8 # des_cblock (*ivec);
9 # int enc;
10 #
11 # calls 
12 # des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT);
13 #
14
15 #&cbc("des_ncbc_encrypt","des_encrypt",0);
16 #&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",
17 #       1,4,5,3,5,-1);
18 #&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",
19 #       0,4,5,3,5,-1);
20 #&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",
21 #       0,6,7,3,4,5);
22 #
23 # When doing a cipher that needs bigendian order,
24 # for encrypt, the iv is kept in bigendian form,
25 # while for decrypt, it is kept in little endian.
26 sub cbc
27         {
28         local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_;
29         # name is the function name
30         # enc_func and dec_func and the functions to call for encrypt/decrypt
31         # swap is true if byte order needs to be reversed
32         # iv_off is parameter number for the iv 
33         # enc_off is parameter number for the encrypt/decrypt flag
34         # p1,p2,p3 are the offsets for parameters to be passed to the
35         # underlying calls.
36
37         &function_begin_B($name,"");
38         &comment("");
39
40         $in="esi";
41         $out="edi";
42         $count="ebp";
43
44         &push("ebp");
45         &push("ebx");
46         &push("esi");
47         &push("edi");
48
49         $data_off=4;
50         $data_off+=4 if ($p1 > 0);
51         $data_off+=4 if ($p2 > 0);
52         $data_off+=4 if ($p3 > 0);
53
54         &mov($count,    &wparam(2));    # length
55
56         &comment("getting iv ptr from parameter $iv_off");
57         &mov("ebx",     &wparam($iv_off));      # Get iv ptr
58
59         &mov($in,       &DWP(0,"ebx","",0));#   iv[0]
60         &mov($out,      &DWP(4,"ebx","",0));#   iv[1]
61
62         &push($out);
63         &push($in);
64         &push($out);    # used in decrypt for iv[1]
65         &push($in);     # used in decrypt for iv[0]
66
67         &mov("ebx",     "esp");         # This is the address of tin[2]
68
69         &mov($in,       &wparam(0));    # in
70         &mov($out,      &wparam(1));    # out
71
72         # We have loaded them all, how lets push things
73         &comment("getting encrypt flag from parameter $enc_off");
74         &mov("ecx",     &wparam($enc_off));     # Get enc flag
75         if ($p3 > 0)
76                 {
77                 &comment("get and push parameter $p3");
78                 if ($enc_off != $p3)
79                         { &mov("eax",   &wparam($p3)); &push("eax"); }
80                 else    { &push("ecx"); }
81                 }
82         if ($p2 > 0)
83                 {
84                 &comment("get and push parameter $p2");
85                 if ($enc_off != $p2)
86                         { &mov("eax",   &wparam($p2)); &push("eax"); }
87                 else    { &push("ecx"); }
88                 }
89         if ($p1 > 0)
90                 {
91                 &comment("get and push parameter $p1");
92                 if ($enc_off != $p1)
93                         { &mov("eax",   &wparam($p1)); &push("eax"); }
94                 else    { &push("ecx"); }
95                 }
96         &push("ebx");           # push data/iv
97
98         &cmp("ecx",0);
99         &jz(&label("decrypt"));
100
101         &and($count,0xfffffff8);
102         &mov("eax",     &DWP($data_off,"esp","",0));    # load iv[0]
103         &mov("ebx",     &DWP($data_off+4,"esp","",0));  # load iv[1]
104
105         &jz(&label("encrypt_finish"));
106
107         #############################################################
108
109         &set_label("encrypt_loop");
110         # encrypt start 
111         # "eax" and "ebx" hold iv (or the last cipher text)
112
113         &mov("ecx",     &DWP(0,$in,"",0));      # load first 4 bytes
114         &mov("edx",     &DWP(4,$in,"",0));      # second 4 bytes
115
116         &xor("eax",     "ecx");
117         &xor("ebx",     "edx");
118
119         &bswap("eax")   if $swap;
120         &bswap("ebx")   if $swap;
121
122         &mov(&DWP($data_off,"esp","",0),        "eax"); # put in array for call
123         &mov(&DWP($data_off+4,"esp","",0),      "ebx"); #
124
125         &call($enc_func);
126
127         &mov("eax",     &DWP($data_off,"esp","",0));
128         &mov("ebx",     &DWP($data_off+4,"esp","",0));
129
130         &bswap("eax")   if $swap;
131         &bswap("ebx")   if $swap;
132
133         &mov(&DWP(0,$out,"",0),"eax");
134         &mov(&DWP(4,$out,"",0),"ebx");
135
136         # eax and ebx are the next iv.
137
138         &add($in,       8);
139         &add($out,      8);
140
141         &sub($count,    8);
142         &jnz(&label("encrypt_loop"));
143
144 ###################################################################3
145         &set_label("encrypt_finish");
146         &mov($count,    &wparam(2));    # length
147         &and($count,    7);
148         &jz(&label("finish"));
149         &call(&label("PIC_point"));
150 &set_label("PIC_point");
151         &blindpop("edx");
152         &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx"));
153         &mov($count,&DWP(0,"ecx",$count,4))
154         &add($count,"edx");
155         &xor("ecx","ecx");
156         &xor("edx","edx");
157         #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4));
158         &jmp_ptr($count);
159
160 &set_label("ej7");
161         &xor("edx",             "edx") if $ppro; # ppro friendly
162         &movb(&HB("edx"),       &BP(6,$in,"",0));
163         &shl("edx",8);
164 &set_label("ej6");
165         &movb(&HB("edx"),       &BP(5,$in,"",0));
166 &set_label("ej5");
167         &movb(&LB("edx"),       &BP(4,$in,"",0));
168 &set_label("ej4");
169         &mov("ecx",             &DWP(0,$in,"",0));
170         &jmp(&label("ejend"));
171 &set_label("ej3");
172         &movb(&HB("ecx"),       &BP(2,$in,"",0));
173         &xor("ecx",             "ecx") if $ppro; # ppro friendly
174         &shl("ecx",8);
175 &set_label("ej2");
176         &movb(&HB("ecx"),       &BP(1,$in,"",0));
177 &set_label("ej1");
178         &movb(&LB("ecx"),       &BP(0,$in,"",0));
179 &set_label("ejend");
180
181         &xor("eax",     "ecx");
182         &xor("ebx",     "edx");
183
184         &bswap("eax")   if $swap;
185         &bswap("ebx")   if $swap;
186
187         &mov(&DWP($data_off,"esp","",0),        "eax"); # put in array for call
188         &mov(&DWP($data_off+4,"esp","",0),      "ebx"); #
189
190         &call($enc_func);
191
192         &mov("eax",     &DWP($data_off,"esp","",0));
193         &mov("ebx",     &DWP($data_off+4,"esp","",0));
194
195         &bswap("eax")   if $swap;
196         &bswap("ebx")   if $swap;
197
198         &mov(&DWP(0,$out,"",0),"eax");
199         &mov(&DWP(4,$out,"",0),"ebx");
200
201         &jmp(&label("finish"));
202
203         #############################################################
204         #############################################################
205         &set_label("decrypt",1);
206         # decrypt start 
207         &and($count,0xfffffff8);
208         # The next 2 instructions are only for if the jz is taken
209         &mov("eax",     &DWP($data_off+8,"esp","",0));  # get iv[0]
210         &mov("ebx",     &DWP($data_off+12,"esp","",0)); # get iv[1]
211         &jz(&label("decrypt_finish"));
212
213         &set_label("decrypt_loop");
214         &mov("eax",     &DWP(0,$in,"",0));      # load first 4 bytes
215         &mov("ebx",     &DWP(4,$in,"",0));      # second 4 bytes
216
217         &bswap("eax")   if $swap;
218         &bswap("ebx")   if $swap;
219
220         &mov(&DWP($data_off,"esp","",0),        "eax"); # put back
221         &mov(&DWP($data_off+4,"esp","",0),      "ebx"); #
222
223         &call($dec_func);
224
225         &mov("eax",     &DWP($data_off,"esp","",0));    # get return
226         &mov("ebx",     &DWP($data_off+4,"esp","",0));  #
227
228         &bswap("eax")   if $swap;
229         &bswap("ebx")   if $swap;
230
231         &mov("ecx",     &DWP($data_off+8,"esp","",0));  # get iv[0]
232         &mov("edx",     &DWP($data_off+12,"esp","",0)); # get iv[1]
233
234         &xor("ecx",     "eax");
235         &xor("edx",     "ebx");
236
237         &mov("eax",     &DWP(0,$in,"",0));      # get old cipher text,
238         &mov("ebx",     &DWP(4,$in,"",0));      # next iv actually
239
240         &mov(&DWP(0,$out,"",0),"ecx");
241         &mov(&DWP(4,$out,"",0),"edx");
242
243         &mov(&DWP($data_off+8,"esp","",0),      "eax"); # save iv
244         &mov(&DWP($data_off+12,"esp","",0),     "ebx"); #
245
246         &add($in,       8);
247         &add($out,      8);
248
249         &sub($count,    8);
250         &jnz(&label("decrypt_loop"));
251 ############################ ENDIT #######################3
252         &set_label("decrypt_finish");
253         &mov($count,    &wparam(2));    # length
254         &and($count,    7);
255         &jz(&label("finish"));
256
257         &mov("eax",     &DWP(0,$in,"",0));      # load first 4 bytes
258         &mov("ebx",     &DWP(4,$in,"",0));      # second 4 bytes
259
260         &bswap("eax")   if $swap;
261         &bswap("ebx")   if $swap;
262
263         &mov(&DWP($data_off,"esp","",0),        "eax"); # put back
264         &mov(&DWP($data_off+4,"esp","",0),      "ebx"); #
265
266         &call($dec_func);
267
268         &mov("eax",     &DWP($data_off,"esp","",0));    # get return
269         &mov("ebx",     &DWP($data_off+4,"esp","",0));  #
270
271         &bswap("eax")   if $swap;
272         &bswap("ebx")   if $swap;
273
274         &mov("ecx",     &DWP($data_off+8,"esp","",0));  # get iv[0]
275         &mov("edx",     &DWP($data_off+12,"esp","",0)); # get iv[1]
276
277         &xor("ecx",     "eax");
278         &xor("edx",     "ebx");
279
280         # this is for when we exit
281         &mov("eax",     &DWP(0,$in,"",0));      # get old cipher text,
282         &mov("ebx",     &DWP(4,$in,"",0));      # next iv actually
283
284 &set_label("dj7");
285         &rotr("edx",    16);
286         &movb(&BP(6,$out,"",0), &LB("edx"));
287         &shr("edx",16);
288 &set_label("dj6");
289         &movb(&BP(5,$out,"",0), &HB("edx"));
290 &set_label("dj5");
291         &movb(&BP(4,$out,"",0), &LB("edx"));
292 &set_label("dj4");
293         &mov(&DWP(0,$out,"",0), "ecx");
294         &jmp(&label("djend"));
295 &set_label("dj3");
296         &rotr("ecx",    16);
297         &movb(&BP(2,$out,"",0), &LB("ecx"));
298         &shl("ecx",16);
299 &set_label("dj2");
300         &movb(&BP(1,$in,"",0),  &HB("ecx"));
301 &set_label("dj1");
302         &movb(&BP(0,$in,"",0),  &LB("ecx"));
303 &set_label("djend");
304
305         # final iv is still in eax:ebx
306         &jmp(&label("finish"));
307
308
309 ############################ FINISH #######################3
310         &set_label("finish",1);
311         &mov("ecx",     &wparam($iv_off));      # Get iv ptr
312
313         #################################################
314         $total=16+4;
315         $total+=4 if ($p1 > 0);
316         $total+=4 if ($p2 > 0);
317         $total+=4 if ($p3 > 0);
318         &add("esp",$total);
319
320         &mov(&DWP(0,"ecx","",0),        "eax"); # save iv
321         &mov(&DWP(4,"ecx","",0),        "ebx"); # save iv
322
323         &function_end_A($name);
324
325         &set_label("cbc_enc_jmp_table",1);
326         &data_word("0");
327         &data_word(&label("ej1")."-".&label("PIC_point"));
328         &data_word(&label("ej2")."-".&label("PIC_point"));
329         &data_word(&label("ej3")."-".&label("PIC_point"));
330         &data_word(&label("ej4")."-".&label("PIC_point"));
331         &data_word(&label("ej5")."-".&label("PIC_point"));
332         &data_word(&label("ej6")."-".&label("PIC_point"));
333         &data_word(&label("ej7")."-".&label("PIC_point"));
334         # not used
335         #&set_label("cbc_dec_jmp_table",1);
336         #&data_word("0");
337         #&data_word(&label("dj1")."-".&label("PIC_point"));
338         #&data_word(&label("dj2")."-".&label("PIC_point"));
339         #&data_word(&label("dj3")."-".&label("PIC_point"));
340         #&data_word(&label("dj4")."-".&label("PIC_point"));
341         #&data_word(&label("dj5")."-".&label("PIC_point"));
342         #&data_word(&label("dj6")."-".&label("PIC_point"));
343         #&data_word(&label("dj7")."-".&label("PIC_point"));
344
345         &function_end_B($name);
346         
347         }
348
349 1;