From 54807d47acecab778498ced88ce8f62bfa16e379 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sun, 19 Mar 2017 05:26:45 +0100 Subject: [PATCH] aarch64: add single instruction math functions this should increase performance and reduce code size on aarch64. the compiled code was checked against using __builtin_* instead of inline asm with gcc-6.2.0. lrint is two instructions. c with inline asm is used because it is safer than a pure asm implementation, this prevents ll{rint,round} to be an alias of l{rint,round} (because the types don't match) and depends on gcc style inline asm support. ceil, floor, round, trunc can either raise inexact on finite non-integer inputs or not raise any exceptions. the new implementation does not raise exceptions while the generic c code does. on aarch64, the underflow exception is signaled before rounding (ieee 754 allows both before and after rounding, but it must be consistent), the generic fma c code signals it after rounding so using single instruction fixes a slight conformance issue too. --- src/math/aarch64/ceil.c | 7 +++++++ src/math/aarch64/ceilf.c | 7 +++++++ src/math/aarch64/fabs.c | 7 +++++++ src/math/aarch64/fabs.s | 6 ------ src/math/aarch64/fabsf.c | 7 +++++++ src/math/aarch64/fabsf.s | 6 ------ src/math/aarch64/floor.c | 7 +++++++ src/math/aarch64/floorf.c | 7 +++++++ src/math/aarch64/fma.c | 7 +++++++ src/math/aarch64/fmaf.c | 7 +++++++ src/math/aarch64/fmax.c | 7 +++++++ src/math/aarch64/fmaxf.c | 7 +++++++ src/math/aarch64/fmin.c | 7 +++++++ src/math/aarch64/fminf.c | 7 +++++++ src/math/aarch64/llrint.c | 10 ++++++++++ src/math/aarch64/llrintf.c | 10 ++++++++++ src/math/aarch64/llround.c | 8 ++++++++ src/math/aarch64/llroundf.c | 8 ++++++++ src/math/aarch64/lrint.c | 10 ++++++++++ src/math/aarch64/lrintf.c | 10 ++++++++++ src/math/aarch64/lround.c | 8 ++++++++ src/math/aarch64/lroundf.c | 8 ++++++++ src/math/aarch64/nearbyint.c | 7 +++++++ src/math/aarch64/nearbyintf.c | 7 +++++++ src/math/aarch64/rint.c | 7 +++++++ src/math/aarch64/rintf.c | 7 +++++++ src/math/aarch64/round.c | 7 +++++++ src/math/aarch64/roundf.c | 7 +++++++ src/math/aarch64/sqrt.c | 7 +++++++ src/math/aarch64/sqrt.s | 6 ------ src/math/aarch64/sqrtf.c | 7 +++++++ src/math/aarch64/sqrtf.s | 6 ------ src/math/aarch64/trunc.c | 7 +++++++ src/math/aarch64/truncf.c | 7 +++++++ 34 files changed, 226 insertions(+), 24 deletions(-) create mode 100644 src/math/aarch64/ceil.c create mode 100644 src/math/aarch64/ceilf.c create mode 100644 src/math/aarch64/fabs.c delete mode 100644 src/math/aarch64/fabs.s create mode 100644 src/math/aarch64/fabsf.c delete mode 100644 src/math/aarch64/fabsf.s create mode 100644 src/math/aarch64/floor.c create mode 100644 src/math/aarch64/floorf.c create mode 100644 src/math/aarch64/fma.c create mode 100644 src/math/aarch64/fmaf.c create mode 100644 src/math/aarch64/fmax.c create mode 100644 src/math/aarch64/fmaxf.c create mode 100644 src/math/aarch64/fmin.c create mode 100644 src/math/aarch64/fminf.c create mode 100644 src/math/aarch64/llrint.c create mode 100644 src/math/aarch64/llrintf.c create mode 100644 src/math/aarch64/llround.c create mode 100644 src/math/aarch64/llroundf.c create mode 100644 src/math/aarch64/lrint.c create mode 100644 src/math/aarch64/lrintf.c create mode 100644 src/math/aarch64/lround.c create mode 100644 src/math/aarch64/lroundf.c create mode 100644 src/math/aarch64/nearbyint.c create mode 100644 src/math/aarch64/nearbyintf.c create mode 100644 src/math/aarch64/rint.c create mode 100644 src/math/aarch64/rintf.c create mode 100644 src/math/aarch64/round.c create mode 100644 src/math/aarch64/roundf.c create mode 100644 src/math/aarch64/sqrt.c delete mode 100644 src/math/aarch64/sqrt.s create mode 100644 src/math/aarch64/sqrtf.c delete mode 100644 src/math/aarch64/sqrtf.s create mode 100644 src/math/aarch64/trunc.c create mode 100644 src/math/aarch64/truncf.c diff --git a/src/math/aarch64/ceil.c b/src/math/aarch64/ceil.c new file mode 100644 index 00000000..ac80c1dc --- /dev/null +++ b/src/math/aarch64/ceil.c @@ -0,0 +1,7 @@ +#include + +double ceil(double x) +{ + __asm__ ("frintp %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/ceilf.c b/src/math/aarch64/ceilf.c new file mode 100644 index 00000000..1ef1e9c8 --- /dev/null +++ b/src/math/aarch64/ceilf.c @@ -0,0 +1,7 @@ +#include + +float ceilf(float x) +{ + __asm__ ("frintp %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/fabs.c b/src/math/aarch64/fabs.c new file mode 100644 index 00000000..5c3ecaf4 --- /dev/null +++ b/src/math/aarch64/fabs.c @@ -0,0 +1,7 @@ +#include + +double fabs(double x) +{ + __asm__ ("fabs %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/fabs.s b/src/math/aarch64/fabs.s deleted file mode 100644 index 8c04d091..00000000 --- a/src/math/aarch64/fabs.s +++ /dev/null @@ -1,6 +0,0 @@ -.text -.global fabs -.type fabs,%function -fabs: - fabs d0, d0 - ret diff --git a/src/math/aarch64/fabsf.c b/src/math/aarch64/fabsf.c new file mode 100644 index 00000000..7fde9817 --- /dev/null +++ b/src/math/aarch64/fabsf.c @@ -0,0 +1,7 @@ +#include + +float fabsf(float x) +{ + __asm__ ("fabs %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/fabsf.s b/src/math/aarch64/fabsf.s deleted file mode 100644 index 6e96dd43..00000000 --- a/src/math/aarch64/fabsf.s +++ /dev/null @@ -1,6 +0,0 @@ -.text -.global fabsf -.type fabsf,%function -fabsf: - fabs s0, s0 - ret diff --git a/src/math/aarch64/floor.c b/src/math/aarch64/floor.c new file mode 100644 index 00000000..50ffdb28 --- /dev/null +++ b/src/math/aarch64/floor.c @@ -0,0 +1,7 @@ +#include + +double floor(double x) +{ + __asm__ ("frintm %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/floorf.c b/src/math/aarch64/floorf.c new file mode 100644 index 00000000..8d007e9f --- /dev/null +++ b/src/math/aarch64/floorf.c @@ -0,0 +1,7 @@ +#include + +float floorf(float x) +{ + __asm__ ("frintm %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/fma.c b/src/math/aarch64/fma.c new file mode 100644 index 00000000..2450ea7e --- /dev/null +++ b/src/math/aarch64/fma.c @@ -0,0 +1,7 @@ +#include + +double fma(double x, double y, double z) +{ + __asm__ ("fmadd %d0, %d1, %d2, %d3" : "=w"(x) : "w"(x), "w"(y), "w"(z)); + return x; +} diff --git a/src/math/aarch64/fmaf.c b/src/math/aarch64/fmaf.c new file mode 100644 index 00000000..9a147213 --- /dev/null +++ b/src/math/aarch64/fmaf.c @@ -0,0 +1,7 @@ +#include + +float fmaf(float x, float y, float z) +{ + __asm__ ("fmadd %s0, %s1, %s2, %s3" : "=w"(x) : "w"(x), "w"(y), "w"(z)); + return x; +} diff --git a/src/math/aarch64/fmax.c b/src/math/aarch64/fmax.c new file mode 100644 index 00000000..86dcb3b4 --- /dev/null +++ b/src/math/aarch64/fmax.c @@ -0,0 +1,7 @@ +#include + +double fmax(double x, double y) +{ + __asm__ ("fmaxnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/aarch64/fmaxf.c b/src/math/aarch64/fmaxf.c new file mode 100644 index 00000000..ee5eac2d --- /dev/null +++ b/src/math/aarch64/fmaxf.c @@ -0,0 +1,7 @@ +#include + +float fmaxf(float x, float y) +{ + __asm__ ("fmaxnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/aarch64/fmin.c b/src/math/aarch64/fmin.c new file mode 100644 index 00000000..f1e99808 --- /dev/null +++ b/src/math/aarch64/fmin.c @@ -0,0 +1,7 @@ +#include + +double fmin(double x, double y) +{ + __asm__ ("fminnm %d0, %d1, %d2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/aarch64/fminf.c b/src/math/aarch64/fminf.c new file mode 100644 index 00000000..80468f67 --- /dev/null +++ b/src/math/aarch64/fminf.c @@ -0,0 +1,7 @@ +#include + +float fminf(float x, float y) +{ + __asm__ ("fminnm %s0, %s1, %s2" : "=w"(x) : "w"(x), "w"(y)); + return x; +} diff --git a/src/math/aarch64/llrint.c b/src/math/aarch64/llrint.c new file mode 100644 index 00000000..a9e07a93 --- /dev/null +++ b/src/math/aarch64/llrint.c @@ -0,0 +1,10 @@ +#include + +long long llrint(double x) +{ + long long n; + __asm__ ( + "frintx %d1, %d1\n" + "fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/aarch64/llrintf.c b/src/math/aarch64/llrintf.c new file mode 100644 index 00000000..12b6804f --- /dev/null +++ b/src/math/aarch64/llrintf.c @@ -0,0 +1,10 @@ +#include + +long long llrintf(float x) +{ + long long n; + __asm__ ( + "frintx %s1, %s1\n" + "fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/aarch64/llround.c b/src/math/aarch64/llround.c new file mode 100644 index 00000000..e09ddd48 --- /dev/null +++ b/src/math/aarch64/llround.c @@ -0,0 +1,8 @@ +#include + +long long llround(double x) +{ + long long n; + __asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/aarch64/llroundf.c b/src/math/aarch64/llroundf.c new file mode 100644 index 00000000..16699598 --- /dev/null +++ b/src/math/aarch64/llroundf.c @@ -0,0 +1,8 @@ +#include + +long long llroundf(float x) +{ + long long n; + __asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/aarch64/lrint.c b/src/math/aarch64/lrint.c new file mode 100644 index 00000000..cb7785ad --- /dev/null +++ b/src/math/aarch64/lrint.c @@ -0,0 +1,10 @@ +#include + +long lrint(double x) +{ + long n; + __asm__ ( + "frintx %d1, %d1\n" + "fcvtzs %x0, %d1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/aarch64/lrintf.c b/src/math/aarch64/lrintf.c new file mode 100644 index 00000000..4d750d69 --- /dev/null +++ b/src/math/aarch64/lrintf.c @@ -0,0 +1,10 @@ +#include + +long lrintf(float x) +{ + long n; + __asm__ ( + "frintx %s1, %s1\n" + "fcvtzs %x0, %s1\n" : "=r"(n), "+w"(x)); + return n; +} diff --git a/src/math/aarch64/lround.c b/src/math/aarch64/lround.c new file mode 100644 index 00000000..85656c78 --- /dev/null +++ b/src/math/aarch64/lround.c @@ -0,0 +1,8 @@ +#include + +long lround(double x) +{ + long n; + __asm__ ("fcvtas %x0, %d1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/aarch64/lroundf.c b/src/math/aarch64/lroundf.c new file mode 100644 index 00000000..32e51f3c --- /dev/null +++ b/src/math/aarch64/lroundf.c @@ -0,0 +1,8 @@ +#include + +long lroundf(float x) +{ + long n; + __asm__ ("fcvtas %x0, %s1" : "=r"(n) : "w"(x)); + return n; +} diff --git a/src/math/aarch64/nearbyint.c b/src/math/aarch64/nearbyint.c new file mode 100644 index 00000000..9c3fdb44 --- /dev/null +++ b/src/math/aarch64/nearbyint.c @@ -0,0 +1,7 @@ +#include + +double nearbyint(double x) +{ + __asm__ ("frinti %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/nearbyintf.c b/src/math/aarch64/nearbyintf.c new file mode 100644 index 00000000..8e7f61df --- /dev/null +++ b/src/math/aarch64/nearbyintf.c @@ -0,0 +1,7 @@ +#include + +float nearbyintf(float x) +{ + __asm__ ("frinti %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/rint.c b/src/math/aarch64/rint.c new file mode 100644 index 00000000..45b194b5 --- /dev/null +++ b/src/math/aarch64/rint.c @@ -0,0 +1,7 @@ +#include + +double rint(double x) +{ + __asm__ ("frintx %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/rintf.c b/src/math/aarch64/rintf.c new file mode 100644 index 00000000..1ae7dd25 --- /dev/null +++ b/src/math/aarch64/rintf.c @@ -0,0 +1,7 @@ +#include + +float rintf(float x) +{ + __asm__ ("frintx %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/round.c b/src/math/aarch64/round.c new file mode 100644 index 00000000..897a84cc --- /dev/null +++ b/src/math/aarch64/round.c @@ -0,0 +1,7 @@ +#include + +double round(double x) +{ + __asm__ ("frinta %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/roundf.c b/src/math/aarch64/roundf.c new file mode 100644 index 00000000..91637eaa --- /dev/null +++ b/src/math/aarch64/roundf.c @@ -0,0 +1,7 @@ +#include + +float roundf(float x) +{ + __asm__ ("frinta %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/sqrt.c b/src/math/aarch64/sqrt.c new file mode 100644 index 00000000..fe93c3e6 --- /dev/null +++ b/src/math/aarch64/sqrt.c @@ -0,0 +1,7 @@ +#include + +double sqrt(double x) +{ + __asm__ ("fsqrt %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/sqrt.s b/src/math/aarch64/sqrt.s deleted file mode 100644 index 1917e18d..00000000 --- a/src/math/aarch64/sqrt.s +++ /dev/null @@ -1,6 +0,0 @@ -.text -.global sqrt -.type sqrt,%function -sqrt: - fsqrt d0, d0 - ret diff --git a/src/math/aarch64/sqrtf.c b/src/math/aarch64/sqrtf.c new file mode 100644 index 00000000..275c7f39 --- /dev/null +++ b/src/math/aarch64/sqrtf.c @@ -0,0 +1,7 @@ +#include + +float sqrtf(float x) +{ + __asm__ ("fsqrt %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/sqrtf.s b/src/math/aarch64/sqrtf.s deleted file mode 100644 index 1639497b..00000000 --- a/src/math/aarch64/sqrtf.s +++ /dev/null @@ -1,6 +0,0 @@ -.text -.global sqrtf -.type sqrtf,%function -sqrtf: - fsqrt s0, s0 - ret diff --git a/src/math/aarch64/trunc.c b/src/math/aarch64/trunc.c new file mode 100644 index 00000000..e592147a --- /dev/null +++ b/src/math/aarch64/trunc.c @@ -0,0 +1,7 @@ +#include + +double trunc(double x) +{ + __asm__ ("frintz %d0, %d1" : "=w"(x) : "w"(x)); + return x; +} diff --git a/src/math/aarch64/truncf.c b/src/math/aarch64/truncf.c new file mode 100644 index 00000000..20ef30f1 --- /dev/null +++ b/src/math/aarch64/truncf.c @@ -0,0 +1,7 @@ +#include + +float truncf(float x) +{ + __asm__ ("frintz %s0, %s1" : "=w"(x) : "w"(x)); + return x; +} -- 2.25.1