cde/lib/DtHelp/il/iljpgdedct.c

   1 /*
   2  * CDE - Common Desktop Environment
   3  *
   4  * Copyright (c) 1993-2012, The Open Group. All rights reserved.
   5  *
   6  * These libraries and programs are free software; you can
   7  * redistribute them and/or modify them under the terms of the GNU
   8  * Lesser General Public License as published by the Free Software
   9  * Foundation; either version 2 of the License, or (at your option)
  10  * any later version.
  11  *
  12  * These libraries and programs are distributed in the hope that
  13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE. See the GNU Lesser General Public License for more
  16  * details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with these libraries and programs; if not, write
  20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21  * Floor, Boston, MA 02110-1301 USA
  22  */
  23 /* $XConsortium: iljpgdedct.c /main/3 1995/10/23 15:55:19 rswiston $ */
  24 /**---------------------------------------------------------------------
  25 ***
  26 ***    (c)Copyright 1992 Hewlett-Packard Co.
  27 ***
  28 ***                             RESTRICTED RIGHTS LEGEND
  29 ***    Use, duplication, or disclosure by the U.S. Government is subject to
  30 ***    restrictions as set forth in sub-paragraph (c)(1)(ii) of the Rights in
  31 ***    Technical Data and Computer Software clause in DFARS 252.227-7013.
  32 ***                             Hewlett-Packard Company
  33 ***                             3000 Hanover Street
  34 ***                             Palo Alto, CA 94304 U.S.A.
  35 ***    Rights for non-DOD U.S. Government Departments and Agencies are as set
  36 ***    forth in FAR 52.227-19(c)(1,2).
  37 ***
  38 ***-------------------------------------------------------------------*/
  39
  40 #include "iljpgdecodeint.h"
  41 #include <math.h>
  42 #include <stdlib.h>
  43
  44     /*  Macros to check if "clipValue" (an int) is  outside range 0..255, and
  45         to branch to point named by second macro if so, which clips and returns.
  46         This is done to avoid taking a branch on the common case of value
  47         not out of range - significant speedup on RISC machine.
  48     */
  49 #define ILJPG_CLIP_256(_gotoLabel, _returnLabel) \
  50     if ((clipValue) >> 8) goto _gotoLabel; \
  51 _returnLabel:
  52
  53 #define ILJPG_CLIP_256_LABEL(_gotoLabel, _returnLabel) \
  54 _gotoLabel: \
  55     if ((clipValue) > 255) clipValue = 255; else clipValue = 0; \
  56     goto _returnLabel;
  57
  58
  59     /* compute 2-D DCT descaling matrix */
  60 static void _il_fwft_rev_scale (
  61     iljpgPtr q,                /* pointer to quantization matrix */
  62     float   *s                 /* pointer to pointer to descaling matrix */
  63     )
  64 {
  65   int   i, j, prevValue, value, QIndex;
  66   double         a, c0, b[8], pi;
  67   iljpgPtr qptr;
  68   float *sptr;
  69
  70   pi = 4.0 * atan(1.0);
  71   c0 = 1.0 / (2.0 * 0.707106718);
  72   a =  1.0 / (2 * c0);
  73   b[0] = a / 2.0;
  74   for (i = 1; i < 8; i++) {
  75       a = cos (i * pi / 16.0) / 2.0;
  76       b[i] = a;
  77   }
  78
  79   /* descaling matrix including dequantization effects */
  80   /* Note: given Q table is zigzag'd, as in JIF stream.  De-zigzag *qptr.
  81      Also: if an entry in the Q table is zero, then: if it is the first entry,
  82      store 16 (?) in the Q table, otherwise store the previous Q table value.
  83   */
  84
  85   sptr = s;
  86   qptr = q;
  87   prevValue = 16;               /* in case 1st Q table value is 0 */
  88   QIndex = 0;
  89
  90   for (i = 0; i < 8; i++) {
  91       for (j = 0; j < 8; j++) {
  92           value = qptr[_iljpgZigzagTable[QIndex++]];
  93           if (value == 0)
  94               value = prevValue;
  95           *sptr++ = b[i] * b[j] * value;
  96           prevValue = value;
  97       }
  98   }
  99 }
 100
 101     /*  -------------------- _iljpgDeDCTInit -------------------------- */
 102     /*  Called by iljpgDecode() to init for DCT decoding.
 103     */
 104     ILJPG_PRIVATE_EXTERN
 105 iljpgError _iljpgDeDCTInit (
 106     iljpgDecodePrivPtr  pPriv
 107     )
 108 {
 109     iljpgDataPtr        pData;
 110     int        i;
 111
 112     pData = pPriv->pData;
 113
 114         /*  Build a "rev scale" table for each QTable; store into private */
 115     for (i = 0; i < 4; i++)
 116         pPriv->DCTRevScaleTables[i] = (float *)NULL;
 117
 118     for (i = 0; i < 4; i++) {
 119         if (pData->QTables[i]) {
 120             if (!(pPriv->DCTRevScaleTables[i] = (float *)ILJPG_MALLOC(sizeof(float) * 64)))
 121                 return ILJPG_ERROR_DECODE_MALLOC;
 122             _il_fwft_rev_scale (pData->QTables[i], pPriv->DCTRevScaleTables[i]);
 123             }
 124         }
 125
 126     return 0;
 127 }
 128
 129
 130     /*  -------------------- _iljpgDeDCTCleanup -------------------------- */
 131     /*  Called by iljpgDecode() to cleanup after DCT decoding.
 132     */
 133     ILJPG_PRIVATE_EXTERN
 134 iljpgError _iljpgDeDCTCleanup (
 135     iljpgDecodePrivPtr  pPriv
 136     )
 137 {
 138     int        i;
 139
 140         /*  Free any "rev scale" tables that were allocated in iljpgDeDCTInit() */
 141     for (i = 0; i < 4; i++) {
 142         if (pPriv->DCTRevScaleTables[i])
 143             ILJPG_FREE (pPriv->DCTRevScaleTables[i]);
 144         }
 145
 146     return 0;
 147 }
 148
 149
 150 /*
 151  NAME
 152                float implementation of inverse quantize and inverse dct.
 153                input and output in integer format.
 154                Output is clamped to the range 0-255.
 155  PURPOSE
 156   perform inverse quantization and inverse DCT of a 8x8 array. During
 157   inverse quantization, the input array is processed in dezigzag order.
 158
 159  NOTES
 160   includes the +128 shift in inverse DCT as per JPEG spec. This shift is
 161   accomplished by shifting the DC value by 128 prior to performing the
 162   inverse DCT.
 163
 164   The constants b1,..,b5 used by the fast DCT method are declared as
 165   register variables; this allows the 'c89' compiler to perform the
 166   multplies as floating point 32 bit multiplies rather than promoting
 167   to double followed by a double to float conversion.
 168   (4/29/92, V. Bhaskaran)
 169
 170  METHOD
 171    8x8 INVERSE DCT
 172
 173    This implementation is based on computation of a 8 point inverse DCT
 174    using a 16 point Winograd Fourier Transform.
 175    The winograd fourier transform method requires 5 multiplies and 29
 176    additions for a 8 point inverse dct.
 177
 178    Let T[] be a 8x8 input DCT array for which we need to perform the
 179    inverse DCT.
 180
 181          | t00 t01 t02 t03 t04 t05 t06 t07 |
 182          | t10 t11 t12 t13 t14 t15 t16 t17 |
 183          | t20 t21 t22 t23 t24 t25 t26 t27 |
 184    T[] = | t30 t31 t32 t33 t34 t35 t36 t37 |
 185          | t40 t41 t42 t43 t44 t45 t46 t47 |
 186          | t50 t51 t52 t53 t54 t55 t56 t57 |
 187          | t60 t61 t62 t63 t64 t65 t66 t67 |
 188          | t70 t71 t72 t73 t74 t75 t76 t77 |
 189
 190    1. The T[] matrix values are descaled by the inverse DCT denormalization
 191       values and the quantization matrix values.
 192       If we denote T[] = {t(i,j)}, S[] = {s(i,j)}, and X[] = {y(i,j)},
 193       then,
 194              X(i,j) = s(i,j) * t(i,j)
 195       Here, S[] is the descaling matrix and includes quantization matrix.
 196
 197       Define a[n] as
 198          a[n] = cos (n pi / 16) / 2 C[n],   pi = 3.1415....
 199          and,
 200                 C[0] = 1 / sqrt(2), C[1] = C[2] = ... = C[7] = 1,
 201       Define b[n] as
 202          b0 = a[0]
 203          bi = 2 a[i], i = 1,2,...,7
 204
 205     Then, descaling matrix S[] has the following structure
 206     | b0b0q00 b1b0q01 b2b0q02 b3b0q03 b4b0q04 b5b0q05 b6b0q06 b7b0q07 |
 207     | b0b1q10 b1b1q11 b2b1q12 b3b1q13 b4b1q14 b5b1q15 b6b1q16 b7b1q17 |
 208     | b0b2q20 b1b2q21 b2b2q22 b3b2q23 b4b2q24 b5b2q25 b6b2q26 b7b2q27 |
 209     | b0b3q30 b1b3q31 b2b3q32 b3b3q33 b4b3q34 b5b3q35 b6b3q36 b7b3q37 |
 210     | b0b4q40 b1b4q41 b2b4q42 b3b4q43 b4b4q44 b5b4q45 b6b4q46 b7b4q47 |
 211     | b0b5q50 b1b5q51 b2b5q52 b3b5q53 b4b5q54 b5b5q55 b6b5q56 b7b5q57 |
 212     | b0b6q60 b1b6q61 b2b6q62 b3b6q63 b4b6q64 b5b6q65 b6b6q66 b7b6q67 |
 213     | b0b7q70 b1b7q71 b2b7q72 b3b7q73 b4b7q74 b5b7q75 b6b7q76 b7b7q77 |
 214
 215     Note that q00,q01,...,q60,..,q77 is the quantization matrix specified
 216     during the compression stage.
 217     Note that in the above descaling matrix description,
 218     bibj = bjbi, and bibjqji implies multiplying bi,bj and qji.
 219     The descaling matrix can be precomputed at the start of the component
 220     scan (JPEG terminology).
 221
 222    2. After T[] has been descaled, for each column of the descaled matrix,
 223       X[], perform a 8 point inverse dct and write results back to X[] in
 224       column order.
 225       Note that input to 8 point inverse dct is out of order, i.e. x0, x4,
 226       x2, x6, x5, x1, x7, x3, but output is in order.
 227       y0, y1, y2, y3, y4, y5, y6, y7
 228    3. After all eight column inverse dcts have been computed, perform
 229       8 point inverse dct on each row of X[] and write results to Y[]
 230       in row order.
 231    4. The Y[] matrix values must be rounded to meet the specifications of
 232       the input data that was compressed. Typically, for image data, this
 233       implies restricting Y[] to take on values only in the range 0 - 255.
 234
 235   NOTES
 236     Author: V. Bhaskaran, HPL, PaloAlto. Telnet: 7-7153.
 237             bhaskara@hplvab.hpl.hp.com
 238     Version: 0 (5-29-92).
 239 */
 240
 241
 242     /*  -------------------- _iljpgDeDCTFull -------------------------- */
 243     /*  Do a full 8x8 inverse DCT, from *pSrc to *pDst, where each "scan line"
 244         in the 8x8 block pointed to by pDst is "nBytesPerRow" bytes away.
 245         pRevScale is from DCTRevScaleTables[i], where i is the Q table index
 246         for this component.
 247     */
 248     ILJPG_PRIVATE_EXTERN
 249 void _iljpgDeDCTFull (
 250     int       *pSrc,
 251     long                nBytesPerRow,
 252     iljpgPtr            ix,                 /* RETURNED */
 253     float      *pRevScale
 254     )
 255 {
 256   int   i;
 257   int   *zptr;
 258   float           ox[64];
 259   float in0, in1, in2, in3, in4, in5, in6, in7;
 260   float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 261   float tmp;
 262   int   clipValue;
 263   iljpgPtr ixaddr;
 264   float *oxaddr;
 265   /* Constants needed by the 16 point Winograd Fourier Transform for inv. DCT */
 266   float b1 =  1.41421356;
 267   float b2 = -2.61312587;
 268   float b3 =  1.41421356;
 269   float b4 =  1.08239220;
 270   float b5 =  0.76536686;
 271
 272
 273   oxaddr = ox;
 274   zptr  = &_iljpgZigzagTable[0]; /* zigzag scanning order */
 275   tmp = 128.0;                   /* JPEG +128 shift in spatial domain for DCT output */
 276
 277           /* do 1-D inverse DCT along columns. setup input data, which is out of order */
 278   for (i = 0; i < 8; i++, oxaddr++, zptr++, pRevScale++, tmp = 0.0) {
 279
 280       in0 = *(pSrc + zptr[0])  * pRevScale[0] + tmp;  /* Add 128.0 to first DCT only */
 281       in1 = *(pSrc + zptr[32]) * pRevScale[32];
 282       in2 = *(pSrc + zptr[16]) * pRevScale[16];
 283       in3 = *(pSrc + zptr[48]) * pRevScale[48];
 284       in4 = *(pSrc + zptr[40]) * pRevScale[40];
 285       in5 = *(pSrc + zptr[8])  * pRevScale[8];
 286       in6 = *(pSrc + zptr[56]) * pRevScale[56];
 287       in7 = *(pSrc + zptr[24]) * pRevScale[24];
 288
 289       /* Stage 0 */
 290       tmp4 = in4 - in7;
 291       tmp5 = in5 + in6;
 292       tmp6 = in5 - in6;
 293       tmp7 = in4 + in7;
 294
 295       /* Stage 1 */
 296       tmp2 = in2 - in3;
 297       tmp3 = in2 + in3;
 298       in5  = tmp5 - tmp7;
 299       in7  = tmp5 + tmp7;
 300
 301       /* Stage 1.1 */
 302       tmp  = tmp4 - tmp6;
 303       tmp  = b5 * tmp;
 304
 305       /* Stage 2 */
 306       in2  = b1 * tmp2;
 307       in4  = b2 * tmp4;
 308       tmp5 = b3 * in5;
 309       in6  = b4 * tmp6;
 310       tmp0 = in0 + in1;
 311       tmp1 = in0 - in1;
 312       in2 -= tmp3;
 313       in4 += tmp;
 314       in6 -= tmp;
 315
 316       /* Stage 3 */
 317       in6  -= in7;
 318       tmp5 -= in6;
 319
 320       /* Stage 4 */
 321       in0   = tmp0 + tmp3;
 322       in1   = tmp1 + in2;
 323       tmp2  = tmp1 - in2;
 324       in3   = tmp0 - tmp3;
 325       tmp4  = in4 + tmp5;
 326
 327       /* Stage 5 */
 328       *oxaddr       = in0  + in7;    /* y0 */
 329       *(oxaddr+8)   = in1  + in6;    /* y1 */
 330       *(oxaddr+16)  = tmp2 + tmp5;   /* y2 */
 331       *(oxaddr+24)  = in3  - tmp4;   /* y3 */
 332       *(oxaddr+32)  = in3  + tmp4;   /* y4 */
 333       *(oxaddr+40)  = tmp2 - tmp5;   /* y5 */
 334       *(oxaddr+48)  = in1  - in6;    /* y6 */
 335       *(oxaddr+56)  = in0  - in7;    /* y7 */
 336   }
 337
 338   ixaddr = ix;
 339   oxaddr = ox;
 340   for (i = 0; i < 8; i++) { /* do 1-D inverse DCT along rows */
 341
 342       /* setup input data - input data is out of order */
 343       in0 = *oxaddr;
 344       in1 = *(oxaddr+4);
 345       in2 = *(oxaddr+2);
 346       in3 = *(oxaddr+6);
 347       in4 = *(oxaddr+5);
 348       in5 = *(oxaddr+1);
 349       in6 = *(oxaddr+7);
 350       in7 = *(oxaddr+3);
 351
 352       /* Stage 0 */
 353       tmp4 = in4 - in7;
 354       tmp5 = in5 + in6;
 355       tmp6 = in5 - in6;
 356       tmp7 = in4 + in7;
 357
 358       /* Stage 1 */
 359       tmp2 = in2 - in3;
 360       tmp3 = in2 + in3;
 361       in5  = tmp5 - tmp7;
 362       in7  = tmp5 + tmp7;
 363
 364       /* Stage 1.1 */
 365       tmp  = tmp4 - tmp6;
 366       tmp  = b5 * tmp;
 367
 368       /* Stage 2 */
 369       tmp0 = in0 + in1;
 370       tmp1 = in0 - in1;
 371       in2  = b1 * tmp2;
 372       in4  = b2 * tmp4 + tmp;
 373       tmp5 = b3 * in5;
 374       in6  = b4 * tmp6 - tmp;
 375       in2 -= tmp3;
 376
 377       /* Stage 2.1 */
 378       in6  -= in7;
 379       tmp5 -= in6;
 380
 381       /* Stage 3 */
 382       in0   = tmp0 + tmp3;
 383       in1   = tmp1 + in2;
 384       tmp2  = tmp1 - in2;
 385       in3   = tmp0 - tmp3;
 386       tmp4  = in4 + tmp5;
 387
 388       /* Stage 4: clip values to 0..255 and store */
 389       clipValue = (int)(in0 + in7);     /* y0 */
 390       ILJPG_CLIP_256 (fClipG0, fClipR0)
 391       *ixaddr = clipValue;
 392       clipValue = (int)(in1 + in6);     /* y1 */
 393       ILJPG_CLIP_256 (fClipG1, fClipR1)
 394       *(ixaddr+1) = clipValue;
 395       clipValue = (int)(tmp2 + tmp5);   /* y2 */
 396       ILJPG_CLIP_256 (fClipG2, fClipR2)
 397       *(ixaddr+2) = clipValue;
 398       clipValue = (int)(in3  - tmp4);   /* y3 */
 399       ILJPG_CLIP_256 (fClipG3, fClipR3)
 400       *(ixaddr+3) = clipValue;
 401       clipValue = (int)(in3  + tmp4);   /* y4 */
 402       ILJPG_CLIP_256 (fClipG4, fClipR4)
 403       *(ixaddr+4) = clipValue;
 404       clipValue = (int)(tmp2 - tmp5);   /* y5 */
 405       ILJPG_CLIP_256 (fClipG5, fClipR5)
 406       *(ixaddr+5) = clipValue;
 407       clipValue = (int)(in1  - in6);    /* y6 */
 408       ILJPG_CLIP_256 (fClipG6, fClipR6)
 409       *(ixaddr+6) = clipValue;
 410       clipValue = (int)(in0  - in7);    /* y7 */
 411       ILJPG_CLIP_256 (fClipG7, fClipR7)
 412       *(ixaddr+7) = clipValue;
 413
 414       oxaddr += 8;
 415       ixaddr += nBytesPerRow;
 416   }
 417
 418         /*  Goto points for above clip macros */
 419   return;
 420
 421   ILJPG_CLIP_256_LABEL (fClipG0, fClipR0)
 422   ILJPG_CLIP_256_LABEL (fClipG1, fClipR1)
 423   ILJPG_CLIP_256_LABEL (fClipG2, fClipR2)
 424   ILJPG_CLIP_256_LABEL (fClipG3, fClipR3)
 425   ILJPG_CLIP_256_LABEL (fClipG4, fClipR4)
 426   ILJPG_CLIP_256_LABEL (fClipG5, fClipR5)
 427   ILJPG_CLIP_256_LABEL (fClipG6, fClipR6)
 428   ILJPG_CLIP_256_LABEL (fClipG7, fClipR7)
 429 }
 430
 431
 432
 433 /*
 434                 float implementation of inverse quantize and inverse dct.
 435                 Assumes that only the first 4x4 submatrix of DCT
 436                 coefficients is non-zero.
 437                 Input and output in integer format.
 438
 439                This reduces the fast inverse DCT multiply and add count
 440                from 80 multiplies, 464 additions to 60 multiplies,
 441                252 additions.
 442  PURPOSE
 443   perform inverse quantization and inverse DCT of a 8x8 array.
 444   Data is dezigzagged during the inverse quantization process.
 445
 446  NOTES
 447   includes the +128 shift in inverse DCT as per JPEG spec. This shift is
 448   accomplished by shifting the DC value by 128 prior to performing the
 449   inverse DCT.
 450  METHOD
 451    8x8 INVERSE DCT
 452
 453    This implementation is based on computation of a 8 point inverse DCT
 454    using a 16 point Winograd Fourier Transform.
 455    The winograd fourier transform method requires 5 multiplies and 29
 456    additions for a 8 point inverse dct.
 457
 458    Let T[] be a 8x8 input DCT array for which we need to perform the
 459    inverse DCT.
 460    Note that the matrix shown below suggests that the DCT matrix has
 461    only 16 non-zero coefficients and the coefficient locations are
 462    as indicated in the matrix shown below.
 463
 464          | t00 t01 t02 t03 --- --- --- --- |
 465          | t10 t11 t12 t13 --- --- --- --- |
 466          | t20 t21 t22 t23 --- --- --- --- |
 467    T[] = | t30 t31 t32 t33 --- --- --- --- |
 468          | --- --- --- --- --- --- --- --- |
 469          | --- --- --- --- --- --- --- --- |
 470          | --- --- --- --- --- --- --- --- |
 471          | --- --- --- --- --- --- --- --- |
 472
 473    1. The T[] matrix values are descaled by the inverse DCT denormalization
 474       values and the quantization matrix values.
 475       If we denote T[] = {t(i,j)}, S[] = {s(i,j)}, and X[] = {y(i,j)},
 476       then,
 477              X(i,j) = s(i,j) * t(i,j)
 478       Here, S[] is the descaling matrix and includes quantization matrix.
 479       Since only 1/4th of the T[] matrix is non-zero we need to descale
 480       only these components. The rest are simply accounted for by setting
 481       the corresponding locations in X[] to zero.
 482
 483       Define a[n] as
 484          a[n] = cos (n pi / 16) / 2 C[n],   pi = 3.1415....
 485          and,
 486                 C[0] = 1 / sqrt(2), C[1] = C[2] = ... = C[7] = 1,
 487       Define b[n] as
 488          b0 = a[0]
 489          bi = 2 a[i], i = 1,2,...,7
 490
 491     Then, descaling matrix S[] has the following structure
 492     | b0b0q00 b1b0q01 b2b0q02 b3b0q03 b4b0q04 b5b0q05 b6b0q06 b7b0q07 |
 493     | b0b1q10 b1b1q11 b2b1q12 b3b1q13 b4b1q14 b5b1q15 b6b1q16 b7b1q17 |
 494     | b0b2q20 b1b2q21 b2b2q22 b3b2q23 b4b2q24 b5b2q25 b6b2q26 b7b2q27 |
 495     | b0b3q30 b1b3q31 b2b3q32 b3b3q33 b4b3q34 b5b3q35 b6b3q36 b7b3q37 |
 496     | b0b4q40 b1b4q41 b2b4q42 b3b4q43 b4b4q44 b5b4q45 b6b4q46 b7b4q47 |
 497     | b0b5q50 b1b5q51 b2b5q52 b3b5q53 b4b5q54 b5b5q55 b6b5q56 b7b5q57 |
 498     | b0b6q60 b1b6q61 b2b6q62 b3b6q63 b4b6q64 b5b6q65 b6b6q66 b7b6q67 |
 499     | b0b7q70 b1b7q71 b2b7q72 b3b7q73 b4b7q74 b5b7q75 b6b7q76 b7b7q77 |
 500
 501     Note that q00,q01,...,q60,..,q77 is the quantization matrix specified
 502     during the compression stage.
 503     Note that in the above descaling matrix description,
 504     bibj = bjbi, and bibjqji implies multiplying bi,bj and qji.
 505     The descaling matrix can be precomputed at the start of the component
 506     scan (JPEG terminology).
 507
 508    2. After T[] has been descaled, for each column of the descaled matrix,
 509       X[], perform a 8 point inverse dct and write results back to X[] in
 510       column order. A pruned dct which uses 4 input points and generates
 511       8 output points is used.
 512       Note that input to 8 point inverse dct is out of order, i.e. x0, x4,
 513       x2, x6, x5, x1, x7, x3, but output is in order.
 514       y0, y1, y2, y3, y4, y5, y6, y7
 515       Since we know that only the first four components of each column is
 516       nonzero, we exploit this in pruning our fast DCT computation, thus,
 517       reducing the multiplies/adds by a factor of two.
 518
 519    3. After the first four column inverse dcts have been computed, perform
 520       8 point inverse dct on each row of X[] and write results to Y[]
 521       in row order.
 522       Since we know that the first four components along each row could
 523       be non-zero (the remaining have to be zero as per the structure of
 524       the X[] matrix), we can exploit this in pruning the fast DCT
 525       computation, and thereby, reducing the multiplies/adds by a factor
 526       of two.
 527    4. The Y[] matrix values must be rounded to meet the specifications of
 528       the input data that was compressed. Typically, for image data, this
 529       implies restricting Y[] to take on values only in the range 0 - 255.
 530
 531    NOTES
 532     Author: V. Bhaskaran, HPL, PaloAlto. Telnet: 7-7153.
 533     Version: 0 (5-29-92).
 534 */
 535
 536     /*  -------------------- _iljpgDeDCT4x4 -------------------------- */
 537     /*  Do an inverse DCT, from *pSrc to *pDst, each a ptr to 64 ints.
 538         Assumes that only the top-left 4x4 DCT coefficients are non-zero.
 539         pRevScale is from DCTRevScaleTables[i], where i is the Q table index
 540         for this component.
 541     */
 542     ILJPG_PRIVATE_EXTERN
 543 void _iljpgDeDCT4x4 (
 544     int       *pSrc,
 545     long                nBytesPerRow,
 546     iljpgPtr            ix,                 /* RETURNED */
 547     float      *pRevScale
 548     )
 549 {
 550   int   i;
 551   int   *zptr;
 552   float           ox[64];
 553   float in0, in2, in3, in4, in5, in7;
 554   float tmp0, tmp1, tmp2, tmp5, tmp6, tmp7;
 555   float tmp;
 556   float *oxaddr;
 557   int   clipValue;
 558   iljpgPtr ixaddr;
 559   /* Constants needed by the 16 point Winograd Fourier Transform for inv. DCT */
 560   float b1 =  1.41421356;
 561   float b2 = -2.61312587;
 562   float b3 =  1.41421356;
 563   float b4 =  1.08239220;
 564   float b5 =  0.76536686;
 565
 566 #ifdef NOTDEF
 567   orxptr = ox;
 568   rsptr  = pRevScale;
 569   rzptr  = &_iljpgZigzagTable[0];
 570   /* descale-dequantize dct coefficients - need to do this for 4x4 submatrix */
 571   for (i = 0; i < 4; i++, orxptr += 8, rsptr += 8, rzptr += 8) {
 572       for (j = 0, ocxptr = orxptr, csptr = rsptr, czptr = rzptr;
 573            j < 4; j++, ocxptr++, czptr++) {
 574            k = *(pSrc + *czptr);
 575            *ocxptr   = (k * *csptr++);
 576       }
 577   }
 578
 579   *ox += 128.0; /* JPEG +128 shift in spatial domain for DCT output */
 580 #endif
 581
 582   oxaddr = ox;
 583   zptr  = &_iljpgZigzagTable[0]; /* zigzag scanning order */
 584   tmp = 128.0;                   /* JPEG +128 shift in spatial domain for DCT output */
 585
 586           /* do 1-D inverse DCT along columns. setup input data, which is out of order */
 587   for (i = 0; i < 4; i++, oxaddr++, zptr++, pRevScale++, tmp = 0.0) {
 588
 589       in0 = *(pSrc + zptr[0])  * pRevScale[0] + tmp;  /* Add 128.0 to first DCT only */
 590       in2 = *(pSrc + zptr[16])  * pRevScale[16];
 591       in5 = *(pSrc + zptr[8])  * pRevScale[8];
 592       in7 = *(pSrc + zptr[24])  * pRevScale[24];
 593
 594 #ifdef NOTDEF
 595       in0 = *oxaddr;
 596       in2 = *(oxaddr+16);
 597       in5 = *(oxaddr+8);
 598       in7 = *(oxaddr+24);
 599 #endif
 600
 601       /* Stage 1 */
 602       tmp5 = in5 - in7;
 603       tmp7 = in5 + in7;
 604
 605       /* Stage 2 */
 606       tmp  = -in7 -in5;
 607       tmp  = b5 * tmp;
 608
 609       tmp2  = b1 * in2;
 610       tmp6  = b4 * in5;
 611       in4   = b2 * -in7;
 612       in5   = b3 * tmp5;
 613       in4  += tmp;
 614       tmp6 -= tmp;
 615       tmp2 -= in2;
 616
 617       /* Stage 3 */
 618       tmp6 -= tmp7;
 619       in5  -= tmp6;
 620
 621       /* Stage 4 */
 622       tmp0  = in0 + in2;
 623       tmp1  = in0 + tmp2;
 624       in3   = in0 - in2;
 625       in4  += in5;
 626       tmp2  = in0 - tmp2;
 627
 628       /* Stage 5 */
 629       *oxaddr       = tmp0 + tmp7;  /* y0 */
 630       *(oxaddr+8)   = tmp1 + tmp6;  /* y1 */
 631       *(oxaddr+16)  = tmp2 + in5;   /* y2 */
 632       *(oxaddr+24)  = in3  - in4;   /* y3 */
 633       *(oxaddr+32)  = in3  + in4;   /* y4 */
 634       *(oxaddr+40)  = tmp2 - in5;   /* y5 */
 635       *(oxaddr+48)  = tmp1 - tmp6;  /* y6 */
 636       *(oxaddr+56)  = tmp0 - tmp7;  /* y7 */
 637   }
 638
 639   ixaddr = ix;
 640   oxaddr = ox;
 641   for (i = 0; i < 8; i++) { /* do 1-D inverse DCT along rows */
 642
 643       /* setup input data - input data is out of order */
 644       in0 = *oxaddr;
 645       in2 = *(oxaddr+2);
 646       in5 = *(oxaddr+1);
 647       in7 = *(oxaddr+3);
 648
 649       /* Stage 1 */
 650       tmp5 = in5 - in7;
 651       tmp7 = in5 + in7;
 652
 653       /* Stage 2 */
 654       tmp  = -in7 -in5;
 655       tmp  = b5 * tmp;
 656
 657       tmp2 = b1 * in2;
 658       tmp6 = b4 * in5;
 659       in4  = b2 * -in7;
 660       in5  = b3 * tmp5;
 661       in4  += tmp;
 662       tmp6 -= tmp;
 663
 664       /* Stage 3 */
 665       tmp2 -= in2;
 666
 667       /* Stage 3.1 */
 668       tmp6 -= tmp7;
 669       in5  -= tmp6;
 670
 671       /* Stage 4 */
 672       tmp0  = in0 + in2;
 673       tmp1  = in0 + tmp2;
 674       in3   = in0 - in2;
 675       in4  += in5;
 676       tmp2  = in0 - tmp2;
 677
 678       /* Stage 5 */
 679
 680       clipValue = (int)(tmp0  + tmp7);    /* y0 */
 681       ILJPG_CLIP_256 (pClipG0, pClipR0)
 682       *ixaddr = clipValue;
 683       clipValue = (int)(tmp1 + tmp6);   /* y1 */
 684       ILJPG_CLIP_256 (pClipG1, pClipR1)
 685       *(ixaddr+1) = clipValue;
 686       clipValue = (int)(tmp2 + in5);    /* y2 */
 687       ILJPG_CLIP_256 (pClipG2, pClipR2)
 688       *(ixaddr+2) = clipValue;
 689       clipValue = (int)(in3  - in4);    /* y3 */
 690       ILJPG_CLIP_256 (pClipG3, pClipR3)
 691       *(ixaddr+3) = clipValue;
 692       clipValue = (int)(in3  + in4);    /* y4 */
 693       ILJPG_CLIP_256 (pClipG4, pClipR4)
 694       *(ixaddr+4) = clipValue;
 695       clipValue = (int)(tmp2 - in5);    /* y5 */
 696       ILJPG_CLIP_256 (pClipG5, pClipR5)
 697       *(ixaddr+5) = clipValue;
 698       clipValue = (int)(tmp1 - tmp6);   /* y6 */
 699       ILJPG_CLIP_256 (pClipG6, pClipR6)
 700       *(ixaddr+6) = clipValue;
 701       clipValue = (int)(tmp0 - tmp7);   /* y7 */
 702       ILJPG_CLIP_256 (pClipG7, pClipR7)
 703       *(ixaddr+7) = clipValue;
 704
 705       ixaddr += nBytesPerRow;
 706       oxaddr += 8;
 707
 708   }
 709
 710         /*  Goto points for above clip macros */
 711   return;
 712
 713   ILJPG_CLIP_256_LABEL (pClipG0, pClipR0)
 714   ILJPG_CLIP_256_LABEL (pClipG1, pClipR1)
 715   ILJPG_CLIP_256_LABEL (pClipG2, pClipR2)
 716   ILJPG_CLIP_256_LABEL (pClipG3, pClipR3)
 717   ILJPG_CLIP_256_LABEL (pClipG4, pClipR4)
 718   ILJPG_CLIP_256_LABEL (pClipG5, pClipR5)
 719   ILJPG_CLIP_256_LABEL (pClipG6, pClipR6)
 720   ILJPG_CLIP_256_LABEL (pClipG7, pClipR7)
 721 }
 722
 723 /*
 724   float implementation of inverse quantize and inverse dct.
 725  PURPOSE
 726   perform inverse quantization and inverse DCT of a 8x8 array.
 727   Assumes that all coefficients of 8x8 DCT matrix except DC coefficient
 728   are zero.
 729   Input and output in integer format.
 730  NOTES
 731   includes the +128 shift in inverse DCT as per JPEG spec. This shift is
 732   accomplished by shifting the DC value by 128 prior to performing the
 733   inverse DCT.
 734  METHOD
 735    8x8 INVERSE DCT
 736
 737    Let T[] be a 8x8 input DCT array for which we need to perform the
 738    inverse DCT.
 739    Note that the matrix shown below suggests that the DCT matrix has
 740    only 16 non-zero coefficients and the coefficient locations are
 741    as indicated in the matrix shown below.
 742
 743          | t00 --- --- --- --- --- --- --- |
 744          | --- --- --- --- --- --- --- --- |
 745          | --- --- --- --- --- --- --- --- |
 746    T[] = | --- --- --- --- --- --- --- --- |
 747          | --- --- --- --- --- --- --- --- |
 748          | --- --- --- --- --- --- --- --- |
 749          | --- --- --- --- --- --- --- --- |
 750          | --- --- --- --- --- --- --- --- |
 751
 752    1. The T[] matrix values are descaled by the inverse DCT denormalization
 753       values and the quantization matrix values.
 754       If we denote T[] = {t(i,j)}, S[] = {s(i,j)}, and X[] = {y(i,j)},
 755       then,
 756              X(i,j) = s(i,j) * t(i,j)
 757       Here, S[] is the descaling matrix and includes quantization matrix.
 758       Since only 1/4th of the T[] matrix is non-zero we need to descale
 759       only these components. The rest are simply accounted for by setting
 760       the corresponding locations in X[] to zero.
 761
 762       Define a[n] as
 763          a[n] = cos (n pi / 16) / 2 C[n],   pi = 3.1415....
 764          and,
 765                 C[0] = 1 / sqrt(2), C[1] = C[2] = ... = C[7] = 1,
 766       Define b[n] as
 767          b0 = a[0]
 768          bi = 2 a[i], i = 1,2,...,7
 769
 770     Then, descaling matrix S[] has the following structure
 771     | b0b0q00 b1b0q01 b2b0q02 b3b0q03 b4b0q04 b5b0q05 b6b0q06 b7b0q07 |
 772     | b0b1q10 b1b1q11 b2b1q12 b3b1q13 b4b1q14 b5b1q15 b6b1q16 b7b1q17 |
 773     | b0b2q20 b1b2q21 b2b2q22 b3b2q23 b4b2q24 b5b2q25 b6b2q26 b7b2q27 |
 774     | b0b3q30 b1b3q31 b2b3q32 b3b3q33 b4b3q34 b5b3q35 b6b3q36 b7b3q37 |
 775     | b0b4q40 b1b4q41 b2b4q42 b3b4q43 b4b4q44 b5b4q45 b6b4q46 b7b4q47 |
 776     | b0b5q50 b1b5q51 b2b5q52 b3b5q53 b4b5q54 b5b5q55 b6b5q56 b7b5q57 |
 777     | b0b6q60 b1b6q61 b2b6q62 b3b6q63 b4b6q64 b5b6q65 b6b6q66 b7b6q67 |
 778     | b0b7q70 b1b7q71 b2b7q72 b3b7q73 b4b7q74 b5b7q75 b6b7q76 b7b7q77 |
 779
 780     Note that q00,q01,...,q60,..,q77 is the quantization matrix specified
 781     during the compression stage.
 782     Note that in the above descaling matrix description,
 783     bibj = bjbi, and bibjqji implies multiplying bi,bj and qji.
 784     The descaling matrix can be precomputed at the start of the component
 785     scan (JPEG terminology).
 786
 787    2. After T[] has been descaled, compute y(0,0) = t(0,0) * s(0,0) + 128.
 788       set y(i,j) = y(0,0), for all i = 0,7, j = 0,7.
 789    4. The Y[] matrix values must be rounded to meet the specifications of
 790       the input data that was compressed. Typically, for image data, this
 791       implies restricting Y[] to take on values only in the range 0 - 255.
 792    NOTES
 793     Author: V. Bhaskaran, HPL, PaloAlto. Telnet: 7-7153.
 794     Version: 0 (5-29-92).
 795 */
 796
 797     /*  -------------------- _iljpgDeDCTDCOnly -------------------------- */
 798     /*  Do an inverse DCT, from *pSrc to *pDst, each a ptr to 64 ints.
 799         Assumes that only the top-left coefficient (the DC) is non-zero.
 800         pRevScale is from DCTRevScaleTables[i], where i is the Q table index
 801         for this component.
 802     */
 803     ILJPG_PRIVATE_EXTERN
 804 void _iljpgDeDCTDCOnly (
 805     int                *pSrc,
 806     long                nBytesPerRow,
 807     iljpgPtr   pDst,               /* RETURNED */
 808     float              *pRevScale
 809     )
 810 {
 811   int   i, dc;
 812   int   j;
 813
 814   j       = *pSrc;
 815   j = (j < -2047) ? -2047 : ((j > 2047) ? 2047 : j);
 816   dc      = (int)(j * *pRevScale + 128.0);
 817   if (dc < 0) dc = 0; else if (dc > 255) dc = 255;
 818
 819   /*
 820      since only DC value is nonzero, inverse DCT is simply a copy of the
 821      descaled and dequantized DC value copied to rest of 8x8 array.
 822   */
 823   for (i = 0; i < 8; i++, pDst += nBytesPerRow) {
 824     pDst[0] = dc;
 825     pDst[1] = dc;
 826     pDst[2] = dc;
 827     pDst[3] = dc;
 828     pDst[4] = dc;
 829     pDst[5] = dc;
 830     pDst[6] = dc;
 831     pDst[7] = dc;
 832     }
 833 }
 834
 835