this is a lot more efficient and also what is generally wanted.
perhaps the bit shuffling could be more efficient...
.type remquof,@function
remquof:
mov 12(%esp),%ecx
- fldl 4(%esp)
fldl 8(%esp)
+ fldl 4(%esp)
+ mov 11(%esp),%dh
+ xor 7(%esp),%dh
jmp 1f
.global remquol
.type remquol,@function
remquol:
mov 28(%esp),%ecx
- fldl 4(%esp)
fldl 16(%esp)
+ fldl 4(%esp)
+ mov 25(%esp),%dh
+ xor 13(%esp),%dh
jmp 1f
.global remquo
.type remquo,@function
remquo:
mov 20(%esp),%ecx
- fldl 4(%esp)
fldl 12(%esp)
-1: fld %st(1)
+ fldl 4(%esp)
+ mov 19(%esp),%dh
+ xor 11(%esp),%dh
1: fprem1
fnstsw %ax
sahf
jp 1b
- fsubr %st(0),%st(2)
- fxch %st(2)
- fdivp
- mov $0x4f000000,%eax
- mov %eax,4(%esp)
- flds 4(%esp)
- fxch %st(1)
-1: fprem
- fnstsw %ax
- sahf
- jp 1b
- fistpl (%ecx)
- fstp %st(0)
+ fstp %st(1)
+ mov %ah,%dl
+ shr %dl
+ and $1,%dl
+ mov %ah,%al
+ shr $5,%al
+ and $2,%al
+ or %al,%dl
+ mov %ah,%al
+ shl $2,%al
+ and $4,%al
+ or %al,%dl
+ test %dh,%dh
+ jns 1f
+ neg %dl
+1: movsbl %dl,%edx
+ mov %edx,(%ecx)
ret