arch/mips/math-emu/sp_maddf.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * IEEE754 floating point arithmetic
   4  * single precision: MADDF.f (Fused Multiply Add)
   5  * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
   6  *
   7  * MIPS floating point support
   8  * Copyright (C) 2015 Imagination Technologies, Ltd.
   9  * Author: Markos Chandras <markos.chandras@imgtec.com>
  10  */
  11
  12 #include "ieee754sp.h"
  13
  14
  15 static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
  16                                  union ieee754sp y, enum maddf_flags flags)
  17 {
  18         int re;
  19         int rs;
  20         unsigned int rm;
  21         u64 rm64;
  22         u64 zm64;
  23         int s;
  24
  25         COMPXSP;
  26         COMPYSP;
  27         COMPZSP;
  28
  29         EXPLODEXSP;
  30         EXPLODEYSP;
  31         EXPLODEZSP;
  32
  33         FLUSHXSP;
  34         FLUSHYSP;
  35         FLUSHZSP;
  36
  37         ieee754_clearcx();
  38
  39         /*
  40          * Handle the cases when at least one of x, y or z is a NaN.
  41          * Order of precedence is sNaN, qNaN and z, x, y.
  42          */
  43         if (zc == IEEE754_CLASS_SNAN)
  44                 return ieee754sp_nanxcpt(z);
  45         if (xc == IEEE754_CLASS_SNAN)
  46                 return ieee754sp_nanxcpt(x);
  47         if (yc == IEEE754_CLASS_SNAN)
  48                 return ieee754sp_nanxcpt(y);
  49         if (zc == IEEE754_CLASS_QNAN)
  50                 return z;
  51         if (xc == IEEE754_CLASS_QNAN)
  52                 return x;
  53         if (yc == IEEE754_CLASS_QNAN)
  54                 return y;
  55
  56         if (zc == IEEE754_CLASS_DNORM)
  57                 SPDNORMZ;
  58         /* ZERO z cases are handled separately below */
  59
  60         switch (CLPAIR(xc, yc)) {
  61
  62
  63         /*
  64          * Infinity handling
  65          */
  66         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
  67         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
  68                 ieee754_setcx(IEEE754_INVALID_OPERATION);
  69                 return ieee754sp_indef();
  70
  71         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
  72         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
  73         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
  74         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
  75         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
  76                 if ((zc == IEEE754_CLASS_INF) &&
  77                     ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
  78                      ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
  79                         /*
  80                          * Cases of addition of infinities with opposite signs
  81                          * or subtraction of infinities with same signs.
  82                          */
  83                         ieee754_setcx(IEEE754_INVALID_OPERATION);
  84                         return ieee754sp_indef();
  85                 }
  86                 /*
  87                  * z is here either not an infinity, or an infinity having the
  88                  * same sign as product (x*y) (in case of MADDF.D instruction)
  89                  * or product -(x*y) (in MSUBF.D case). The result must be an
  90                  * infinity, and its sign is determined only by the value of
  91                  * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
  92                  */
  93                 if (flags & MADDF_NEGATE_PRODUCT)
  94                         return ieee754sp_inf(1 ^ (xs ^ ys));
  95                 else
  96                         return ieee754sp_inf(xs ^ ys);
  97
  98         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
  99         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
 100         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
 101         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 102         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
 103                 if (zc == IEEE754_CLASS_INF)
 104                         return ieee754sp_inf(zs);
 105                 if (zc == IEEE754_CLASS_ZERO) {
 106                         /* Handle cases +0 + (-0) and similar ones. */
 107                         if ((!(flags & MADDF_NEGATE_PRODUCT)
 108                                         && (zs == (xs ^ ys))) ||
 109                             ((flags & MADDF_NEGATE_PRODUCT)
 110                                         && (zs != (xs ^ ys))))
 111                                 /*
 112                                  * Cases of addition of zeros of equal signs
 113                                  * or subtraction of zeroes of opposite signs.
 114                                  * The sign of the resulting zero is in any
 115                                  * such case determined only by the sign of z.
 116                                  */
 117                                 return z;
 118
 119                         return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 120                 }
 121                 /* x*y is here 0, and z is not 0, so just return z */
 122                 return z;
 123
 124         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 125                 SPDNORMX;
 126                 /* fall through */
 127
 128         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 129                 if (zc == IEEE754_CLASS_INF)
 130                         return ieee754sp_inf(zs);
 131                 SPDNORMY;
 132                 break;
 133
 134         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
 135                 if (zc == IEEE754_CLASS_INF)
 136                         return ieee754sp_inf(zs);
 137                 SPDNORMX;
 138                 break;
 139
 140         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
 141                 if (zc == IEEE754_CLASS_INF)
 142                         return ieee754sp_inf(zs);
 143                 /* continue to real computations */
 144         }
 145
 146         /* Finally get to do some computation */
 147
 148         /*
 149          * Do the multiplication bit first
 150          *
 151          * rm = xm * ym, re = xe + ye basically
 152          *
 153          * At this point xm and ym should have been normalized.
 154          */
 155
 156         /* rm = xm * ym, re = xe+ye basically */
 157         assert(xm & SP_HIDDEN_BIT);
 158         assert(ym & SP_HIDDEN_BIT);
 159
 160         re = xe + ye;
 161         rs = xs ^ ys;
 162         if (flags & MADDF_NEGATE_PRODUCT)
 163                 rs ^= 1;
 164
 165         /* Multiple 24 bit xm and ym to give 48 bit results */
 166         rm64 = (uint64_t)xm * ym;
 167
 168         /* Shunt to top of word */
 169         rm64 = rm64 << 16;
 170
 171         /* Put explicit bit at bit 62 if necessary */
 172         if ((int64_t) rm64 < 0) {
 173                 rm64 = rm64 >> 1;
 174                 re++;
 175         }
 176
 177         assert(rm64 & (1 << 62));
 178
 179         if (zc == IEEE754_CLASS_ZERO) {
 180                 /*
 181                  * Move explicit bit from bit 62 to bit 26 since the
 182                  * ieee754sp_format code expects the mantissa to be
 183                  * 27 bits wide (24 + 3 rounding bits).
 184                  */
 185                 rm = XSPSRS64(rm64, (62 - 26));
 186                 return ieee754sp_format(rs, re, rm);
 187         }
 188
 189         /* Move explicit bit from bit 23 to bit 62 */
 190         zm64 = (uint64_t)zm << (62 - 23);
 191         assert(zm64 & (1 << 62));
 192
 193         /* Make the exponents the same */
 194         if (ze > re) {
 195                 /*
 196                  * Have to shift r fraction right to align.
 197                  */
 198                 s = ze - re;
 199                 rm64 = XSPSRS64(rm64, s);
 200                 re += s;
 201         } else if (re > ze) {
 202                 /*
 203                  * Have to shift z fraction right to align.
 204                  */
 205                 s = re - ze;
 206                 zm64 = XSPSRS64(zm64, s);
 207                 ze += s;
 208         }
 209         assert(ze == re);
 210         assert(ze <= SP_EMAX);
 211
 212         /* Do the addition */
 213         if (zs == rs) {
 214                 /*
 215                  * Generate 64 bit result by adding two 63 bit numbers
 216                  * leaving result in zm64, zs and ze.
 217                  */
 218                 zm64 = zm64 + rm64;
 219                 if ((int64_t)zm64 < 0) {        /* carry out */
 220                         zm64 = XSPSRS1(zm64);
 221                         ze++;
 222                 }
 223         } else {
 224                 if (zm64 >= rm64) {
 225                         zm64 = zm64 - rm64;
 226                 } else {
 227                         zm64 = rm64 - zm64;
 228                         zs = rs;
 229                 }
 230                 if (zm64 == 0)
 231                         return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 232
 233                 /*
 234                  * Put explicit bit at bit 62 if necessary.
 235                  */
 236                 while ((zm64 >> 62) == 0) {
 237                         zm64 <<= 1;
 238                         ze--;
 239                 }
 240         }
 241
 242         /*
 243          * Move explicit bit from bit 62 to bit 26 since the
 244          * ieee754sp_format code expects the mantissa to be
 245          * 27 bits wide (24 + 3 rounding bits).
 246          */
 247         zm = XSPSRS64(zm64, (62 - 26));
 248
 249         return ieee754sp_format(zs, ze, zm);
 250 }
 251
 252 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
 253                                 union ieee754sp y)
 254 {
 255         return _sp_maddf(z, x, y, 0);
 256 }
 257
 258 union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
 259                                 union ieee754sp y)
 260 {
 261         return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
 262 }