+&function_begin_B("OPENSSL_cleanse");
+ &mov ("edx",&wparam(0));
+ &mov ("ecx",&wparam(1));
+ &xor ("eax","eax");
+ &cmp ("ecx",7);
+ &jae (&label("lot"));
+ &cmp ("ecx",0);
+ &je (&label("ret"));
+&set_label("little");
+ &mov (&BP(0,"edx"),"al");
+ &sub ("ecx",1);
+ &lea ("edx",&DWP(1,"edx"));
+ &jnz (&label("little"));
+&set_label("ret");
+ &ret ();
+
+&set_label("lot",16);
+ &test ("edx",3);
+ &jz (&label("aligned"));
+ &mov (&BP(0,"edx"),"al");
+ &lea ("ecx",&DWP(-1,"ecx"));
+ &lea ("edx",&DWP(1,"edx"));
+ &jmp (&label("lot"));
+&set_label("aligned");
+ &mov (&DWP(0,"edx"),"eax");
+ &lea ("ecx",&DWP(-4,"ecx"));
+ &test ("ecx",-4);
+ &lea ("edx",&DWP(4,"edx"));
+ &jnz (&label("aligned"));
+ &cmp ("ecx",0);
+ &jne (&label("little"));
+ &ret ();
+&function_end_B("OPENSSL_cleanse");
+
+{
+my $lasttick = "esi";
+my $lastdiff = "ebx";
+my $out = "edi";
+my $cnt = "ecx";
+my $max = "ebp";
+
+&function_begin("OPENSSL_instrument_bus");
+ &mov ("eax",0);
+ if ($sse2) {
+ &picmeup("edx","OPENSSL_ia32cap_P");
+ &bt (&DWP(0,"edx"),4);
+ &jnc (&label("nogo")); # no TSC
+ &bt (&DWP(0,"edx"),19);
+ &jnc (&label("nogo")); # no CLFLUSH
+
+ &mov ($out,&wparam(0)); # load arguments
+ &mov ($cnt,&wparam(1));
+
+ # collect 1st tick
+ &rdtsc ();
+ &mov ($lasttick,"eax"); # lasttick = tick
+ &mov ($lastdiff,0); # lastdiff = 0
+ &clflush(&DWP(0,$out));
+ &data_byte(0xf0); # lock
+ &add (&DWP(0,$out),$lastdiff);
+ &jmp (&label("loop"));
+
+&set_label("loop",16);
+ &rdtsc ();
+ &mov ("edx","eax"); # put aside tick (yes, I neglect edx)
+ &sub ("eax",$lasttick); # diff
+ &mov ($lasttick,"edx"); # lasttick = tick
+ &mov ($lastdiff,"eax"); # lastdiff = diff
+ &clflush(&DWP(0,$out));
+ &data_byte(0xf0); # lock
+ &add (&DWP(0,$out),"eax"); # accumulate diff
+ &lea ($out,&DWP(4,$out)); # ++$out
+ &sub ($cnt,1); # --$cnt
+ &jnz (&label("loop"));
+
+ &mov ("eax",&wparam(1));
+&set_label("nogo");
+ }
+&function_end("OPENSSL_instrument_bus");
+
+&function_begin("OPENSSL_instrument_bus2");
+ &mov ("eax",0);
+ if ($sse2) {
+ &picmeup("edx","OPENSSL_ia32cap_P");
+ &bt (&DWP(0,"edx"),4);
+ &jnc (&label("nogo")); # no TSC
+ &bt (&DWP(0,"edx"),19);
+ &jnc (&label("nogo")); # no CLFLUSH
+
+ &mov ($out,&wparam(0)); # load arguments
+ &mov ($cnt,&wparam(1));
+ &mov ($max,&wparam(2));
+
+ &rdtsc (); # collect 1st tick
+ &mov ($lasttick,"eax"); # lasttick = tick
+ &mov ($lastdiff,0); # lastdiff = 0
+
+ &clflush(&DWP(0,$out));
+ &data_byte(0xf0); # lock
+ &add (&DWP(0,$out),$lastdiff);
+
+ &rdtsc (); # collect 1st diff
+ &mov ("edx","eax"); # put aside tick (yes, I neglect edx)
+ &sub ("eax",$lasttick); # diff
+ &mov ($lasttick,"edx"); # lasttick = tick
+ &mov ($lastdiff,"eax"); # lastdiff = diff
+ &jmp (&label("loop2"));
+
+&set_label("loop2",16);
+ &clflush(&DWP(0,$out));
+ &data_byte(0xf0); # lock
+ &add (&DWP(0,$out),"eax"); # accumulate diff
+
+ &sub ($max,1);
+ &jz (&label("done2"));
+
+ &rdtsc ();
+ &mov ("edx","eax"); # put aside tick (yes, I neglect edx)
+ &sub ("eax",$lasttick); # diff
+ &mov ($lasttick,"edx"); # lasttick = tick
+ &cmp ("eax",$lastdiff);
+ &mov ($lastdiff,"eax"); # lastdiff = diff
+ &mov ("edx",0);
+ &setne ("dl");
+ &sub ($cnt,"edx"); # conditional --$cnt
+ &lea ($out,&DWP(0,$out,"edx",4)); # conditional ++$out
+ &jnz (&label("loop2"));
+
+&set_label("done2");
+ &mov ("eax",&wparam(1));
+ &sub ("eax",$cnt);
+&set_label("nogo");
+ }
+&function_end("OPENSSL_instrument_bus2");
+}
+
+&function_begin_B("OPENSSL_ia32_rdrand");
+ &mov ("ecx",8);
+&set_label("loop");
+ &rdrand ("eax");
+ &jc (&label("break"));
+ &loop (&label("loop"));
+&set_label("break");
+ &cmp ("eax",0);
+ &cmove ("eax","ecx");
+ &ret ();
+&function_end_B("OPENSSL_ia32_rdrand");
+
+&function_begin_B("OPENSSL_ia32_rdseed");
+ &mov ("ecx",8);
+&set_label("loop");
+ &rdseed ("eax");
+ &jc (&label("break"));
+ &loop (&label("loop"));
+&set_label("break");
+ &cmp ("eax",0);
+ &cmove ("eax","ecx");
+ &ret ();
+&function_end_B("OPENSSL_ia32_rdseed");
+