underflow is raised by an inexact subnormal float store,
since subnormal operations are slow, check the underflow
flag and skip the store if it's already raised
.type asinf,@function
asinf:
flds 4(%esp)
- jmp 1f
+ mov 4(%esp),%eax
+ add %eax,%eax
+ cmp $0x01000000,%eax
+ jae 1f
+ # subnormal x, return x with underflow
+ fnstsw %ax
+ and $16,%ax
+ jnz 2f
+ fld %st(0)
+ fmul %st(1)
+ fstps 4(%esp)
+2: ret
.global asinl
.type asinl,@function
.type asin,@function
asin:
fldl 4(%esp)
+ mov 8(%esp),%eax
+ add %eax,%eax
+ cmp $0x00200000,%eax
+ jae 1f
+ # subnormal x, return x with underflow
+ fnstsw %ax
+ and $16,%ax
+ jnz 2f
+ fsts 4(%esp)
+2: ret
1: fld %st(0)
fld1
fsub %st(0),%st(1)
.type atan,@function
atan:
fldl 4(%esp)
+ mov 8(%esp),%eax
+ add %eax,%eax
+ cmp $0x00200000,%eax
+ jb 1f
fld1
fpatan
ret
+ # subnormal x, return x with underflow
+1: fnstsw %ax
+ and $16,%ax
+ jnz 2f
+ fsts 4(%esp)
+2: ret
.type atanf,@function
atanf:
flds 4(%esp)
+ mov 4(%esp),%eax
+ add %eax,%eax
+ cmp $0x01000000,%eax
+ jb 1f
fld1
fpatan
ret
+ # subnormal x, return x with underflow
+1: fnstsw %ax
+ and $16,%ax
+ jnz 2f
+ fld %st(0)
+ fmul %st(1)
+ fstps 4(%esp)
+2: ret
.type expm1f,@function
expm1f:
flds 4(%esp)
- jmp 1f
+ mov 4(%esp),%eax
+ add %eax,%eax
+ cmp $0x01000000,%eax
+ jae 1f
+ # subnormal x, return x with underflow
+ fnstsw %ax
+ and $16,%ax
+ jnz 2f
+ fld %st(0)
+ fmul %st(1)
+ fstps 4(%esp)
+2: ret
.global expm1l
.type expm1l,@function
.type expm1,@function
expm1:
fldl 4(%esp)
+ mov 8(%esp),%eax
+ add %eax,%eax
+ cmp $0x00200000,%eax
+ jae 1f
+ # subnormal x, return x with underflow
+ fnstsw %ax
+ and $16,%ax
+ jnz 2f
+ fsts 4(%esp)
+2: ret
1: fldl2e
fmulp
+ mov $0xc2820000,%eax
+ push %eax
+ flds (%esp)
+ pop %eax
+ fucomp %st(1)
+ fnstsw %ax
+ sahf
fld1
- fld %st(1)
+ jb 1f
+ # x*log2e < -65, return -1 without underflow
+ fstp %st(1)
+ fchs
+ ret
+1: fld %st(1)
fabs
fucom %st(1)
fnstsw %ax
fldl 4(%esp)
cmp $0x3fd28f00,%eax
ja 1f
+ cmp $0x00100000,%eax
+ jb 2f
fyl2xp1
ret
1: fld1
faddp
fyl2x
ret
+ # subnormal x, return x with underflow
+2: fnstsw %ax
+ and $16,%ax
+ jnz 1f
+ fsts 4(%esp)
+ fstp %st(1)
+1: ret
flds 4(%esp)
cmp $0x3e940000,%eax
ja 1f
+ cmp $0x00800000,%eax
+ jb 2f
fyl2xp1
ret
1: fld1
faddp
fyl2x
ret
+ # subnormal x, return x with underflow
+2: fnstsw %ax
+ and $16,%ax
+ jnz 1f
+ fxch
+ fmul %st(1)
+ fstps 4(%esp)
+1: ret