1 /* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */
3 * ====================================================
4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
6 * Developed at SunSoft, a Sun Microsystems, Inc. business.
7 * Permission to use, copy, modify, and distribute this
8 * software is freely granted, provided that this notice
10 * ====================================================
13 * __rem_pio2_large(x,y,e0,nx,prec)
14 * double x[],y[]; int e0,nx,prec;
16 * __rem_pio2_large return the last three digits of N with
20 * The method is to compute the integer (mod 8) and fraction parts of
21 * (2/pi)*x without doing the full multiplication. In general we
22 * skip the part of the product that are known to be a huge integer (
23 * more accurately, = 0 mod 8 ). Thus the number of operations are
24 * independent of the exponent of the input.
26 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
29 * x[] The input value (must be positive) is broken into nx
30 * pieces of 24-bit integers in double precision format.
31 * x[i] will be the i-th 24 bit of x. The scaled exponent
32 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
33 * match x's up to 24 bits.
35 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
43 * y[] ouput result in an array of double precision numbers.
44 * The dimension of y[] is:
49 * The actual value is the sum of them. Thus for 113-bit
50 * precison, one may have to do something like:
52 * long double t,w,r_head, r_tail;
53 * t = (long double)y[2] + (long double)y[1];
54 * w = (long double)y[0];
56 * r_tail = w - (r_head - t);
58 * e0 The exponent of x[0]. Must be <= 16360 or you need to
59 * expand the ipio2 table.
63 * prec an integer indicating the precision:
66 * 2 64 bits (extended)
70 * double scalbn(), floor();
73 * Here is the description of some local variables:
75 * jk jk+1 is the initial number of terms of ipio2[] needed
76 * in the computation. The minimum and recommended value
77 * for jk is 3,4,4,6 for single, double, extended, and quad.
78 * jk+1 must be 2 larger than you might expect so that our
79 * recomputation test works. (Up to 24 bits in the integer
80 * part (the 24 bits of it that we compute) and 23 bits in
81 * the fraction part may be lost to cancelation before we
84 * jz local integer variable indicating the number of
85 * terms of ipio2[] used.
89 * jv index for pointing to the suitable ipio2[] for the
90 * computation. In general, we want
91 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
93 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
94 * Hence jv = max(0,(e0-3)/24).
96 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
98 * q[] double array with integral value, representing the
99 * 24-bits chunk of the product of x and 2/pi.
101 * q0 the corresponding exponent of q[0]. Note that the
102 * exponent for q[i] would be q0-24*i.
104 * PIo2[] double precision array, obtained by cutting pi/2
105 * into 24 bits chunks.
107 * f[] ipio2[] in floating point
109 * iq[] integer array by breaking up q[] in 24-bits chunk.
111 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
113 * ih integer. If >0 it indicates q[] is >= 0.5, hence
114 * it also indicates the *sign* of the result.
119 * The hexadecimal values are the intended ones for the following
120 * constants. The decimal values may be used, provided that the
121 * compiler will convert from decimal to binary accurately enough
122 * to produce the hexadecimal values shown.
127 static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
130 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
132 * integer array, contains the (24*i)-th to (24*i+23)-th
133 * bit of 2/pi after binary point. The corresponding
136 * ipio2[i] * 2^(-24(i+1)).
138 * NB: This table must have at least (e0-3)/24 + jk terms.
139 * For quad precision (e0 <= 16360, jk = 6), this is 686.
141 static const int32_t ipio2[] = {
142 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
143 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
144 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
145 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
146 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
147 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
148 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
149 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
150 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
151 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
152 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
154 #if LDBL_MAX_EXP > 1024
155 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
156 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2,
157 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35,
158 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
159 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C,
160 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4,
161 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
162 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7,
163 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19,
164 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
165 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16,
166 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6,
167 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
168 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48,
169 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3,
170 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
171 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55,
172 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612,
173 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
174 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC,
175 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B,
176 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
177 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4,
178 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB,
179 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
180 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C,
181 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F,
182 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
183 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437,
184 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B,
185 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
186 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD,
187 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3,
188 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
189 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717,
190 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F,
191 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
192 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB,
193 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51,
194 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
195 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C,
196 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6,
197 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
198 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED,
199 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328,
200 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
201 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0,
202 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B,
203 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
204 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3,
205 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F,
206 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
207 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B,
208 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4,
209 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
210 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31,
211 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30,
212 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
213 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E,
214 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1,
215 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
216 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4,
217 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08,
218 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
219 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9,
220 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4,
221 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
222 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C,
223 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0,
224 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
225 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0,
226 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC,
227 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
228 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893,
229 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7,
230 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
231 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F,
232 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4,
233 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
234 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B,
235 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2,
236 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
237 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E,
238 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569,
239 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
240 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9,
241 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D,
242 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
243 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855,
244 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569,
245 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
246 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE,
247 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41,
248 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
249 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F,
250 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110,
251 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
252 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365,
253 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A,
254 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
255 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5,
256 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616,
257 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
258 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0,
262 static const double PIo2[] = {
263 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
264 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
265 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
266 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
267 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
268 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
269 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
270 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
273 int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
275 int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
276 double z,fw,f[20],fq[20],q[20];
282 /* determine jx,jv,q0, note that 3>q0 */
284 jv = (e0-3)/24; if(jv<0) jv=0;
287 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
288 j = jv-jx; m = jx+jk;
289 for (i=0; i<=m; i++,j++)
290 f[i] = j<0 ? 0.0 : (double)ipio2[j];
292 /* compute q[0],q[1],...q[jk] */
293 for (i=0; i<=jk; i++) {
294 for (j=0,fw=0.0; j<=jx; j++)
295 fw += x[j]*f[jx+i-j];
301 /* distill q[] into iq[] reversingly */
302 for (i=0,j=jz,z=q[jz]; j>0; i++,j--) {
303 fw = (double)(int32_t)(0x1p-24*z);
304 iq[i] = (int32_t)(z - 0x1p24*fw);
309 z = scalbn(z,q0); /* actual value of z */
310 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
314 if (q0 > 0) { /* need iq[jz-1] to determine n */
315 i = iq[jz-1]>>(24-q0); n += i;
316 iq[jz-1] -= i<<(24-q0);
317 ih = iq[jz-1]>>(23-q0);
319 else if (q0 == 0) ih = iq[jz-1]>>23;
320 else if (z >= 0.5) ih = 2;
322 if (ih > 0) { /* q > 0.5 */
324 for (i=0; i<jz; i++) { /* compute 1-q */
329 iq[i] = 0x1000000 - j;
332 iq[i] = 0xffffff - j;
334 if (q0 > 0) { /* rare case: chance is 1 in 12 */
337 iq[jz-1] &= 0x7fffff; break;
339 iq[jz-1] &= 0x3fffff; break;
349 /* check if recomputation is needed */
352 for (i=jz-1; i>=jk; i--) j |= iq[i];
353 if (j == 0) { /* need recomputation */
354 for (k=1; iq[jk-k]==0; k++); /* k = no. of terms needed */
356 for (i=jz+1; i<=jz+k; i++) { /* add q[jz+1] to q[jz+k] */
357 f[jx+i] = (double)ipio2[jv+i];
358 for (j=0,fw=0.0; j<=jx; j++)
359 fw += x[j]*f[jx+i-j];
367 /* chop off zero terms */
371 while (iq[jz] == 0) {
375 } else { /* break z into 24-bit if necessary */
378 fw = (double)(int32_t)(0x1p-24*z);
379 iq[jz] = (int32_t)(z - 0x1p24*fw);
382 iq[jz] = (int32_t)fw;
387 /* convert integer "bit" chunk to floating-point value */
389 for (i=jz; i>=0; i--) {
390 q[i] = fw*(double)iq[i];
394 /* compute PIo2[0,...,jp]*q[jz,...,0] */
395 for(i=jz; i>=0; i--) {
396 for (fw=0.0,k=0; k<=jp && k<=jz-i; k++)
397 fw += PIo2[k]*q[i+k];
401 /* compress fq[] into y[] */
405 for (i=jz; i>=0; i--)
407 y[0] = ih==0 ? fw : -fw;
412 for (i=jz; i>=0; i--)
414 // TODO: drop excess precision here once double_t is used
416 y[0] = ih==0 ? fw : -fw;
418 for (i=1; i<=jz; i++)
420 y[1] = ih==0 ? fw : -fw;
422 case 3: /* painful */
423 for (i=jz; i>0; i--) {
428 for (i=jz; i>1; i--) {
433 for (fw=0.0,i=jz; i>=2; i--)
436 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
438 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;