1 // SPDX-License-Identifier: (GPL-2.0 or BSD-2-Clause)
3 * Huffman decoder, part of New Generation Entropy library
4 * Copyright (C) 2013-2016, Yann Collet.
6 * You can contact the author at :
7 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
10 /* **************************************************************
12 ****************************************************************/
13 #define FORCE_INLINE static __always_inline
15 /* **************************************************************
17 ****************************************************************/
18 #include "bitstream.h" /* BIT_* */
19 #include "fse.h" /* header compression */
21 #include <linux/compiler.h>
22 #include <linux/kernel.h>
23 #include <linux/string.h> /* memcpy, memset */
25 /* **************************************************************
27 ****************************************************************/
28 #define HUF_STATIC_ASSERT(c) \
30 enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
31 } /* use only *after* variable declarations */
33 /*-***************************/
34 /* generic DTableDesc */
35 /*-***************************/
44 static DTableDesc HUF_getDTableDesc(const HUF_DTable *table)
47 memcpy(&dtd, table, sizeof(dtd));
51 /*-***************************/
52 /* single-symbol decoding */
53 /*-***************************/
58 } HUF_DEltX2; /* single-symbol decoding */
60 size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
65 void *const dtPtr = DTable + 1;
66 HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr;
70 size_t spaceUsed32 = 0;
72 rankVal = (U32 *)workspace + spaceUsed32;
73 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
74 huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
75 spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
77 if ((spaceUsed32 << 2) > workspaceSize)
78 return ERROR(tableLog_tooLarge);
79 workspace = (U32 *)workspace + spaceUsed32;
80 workspaceSize -= (spaceUsed32 << 2);
82 HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
83 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
85 iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
86 if (HUF_isError(iSize))
91 DTableDesc dtd = HUF_getDTableDesc(DTable);
92 if (tableLog > (U32)(dtd.maxTableLog + 1))
93 return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
95 dtd.tableLog = (BYTE)tableLog;
96 memcpy(DTable, &dtd, sizeof(dtd));
99 /* Calculate starting value for each rank */
101 U32 n, nextRankStart = 0;
102 for (n = 1; n < tableLog + 1; n++) {
103 U32 const curr = nextRankStart;
104 nextRankStart += (rankVal[n] << (n - 1));
112 for (n = 0; n < nbSymbols; n++) {
113 U32 const w = huffWeight[n];
114 U32 const length = (1 << w) >> 1;
118 D.nbBits = (BYTE)(tableLog + 1 - w);
119 for (u = rankVal[w]; u < rankVal[w] + length; u++)
121 rankVal[w] += length;
128 static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog)
130 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
131 BYTE const c = dt[val].byte;
132 BIT_skipBits(Dstream, dt[val].nbBits);
136 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
138 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
139 if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
140 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
142 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
144 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
146 FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
148 BYTE *const pStart = p;
150 /* up to 4 symbols at a time */
151 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) {
152 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
153 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
154 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
155 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
158 /* closer to the end */
159 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
160 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
162 /* no more data to retrieve from bitstream, hence no need to reload */
164 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
166 return pEnd - pStart;
169 static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
171 BYTE *op = (BYTE *)dst;
172 BYTE *const oend = op + dstSize;
173 const void *dtPtr = DTable + 1;
174 const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
176 DTableDesc const dtd = HUF_getDTableDesc(DTable);
177 U32 const dtLog = dtd.tableLog;
180 size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
181 if (HUF_isError(errorCode))
185 HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
188 if (!BIT_endOfDStream(&bitD))
189 return ERROR(corruption_detected);
194 size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
196 DTableDesc dtd = HUF_getDTableDesc(DTable);
197 if (dtd.tableType != 0)
198 return ERROR(GENERIC);
199 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
202 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
204 const BYTE *ip = (const BYTE *)cSrc;
206 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
207 if (HUF_isError(hSize))
209 if (hSize >= cSrcSize)
210 return ERROR(srcSize_wrong);
214 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
217 static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
221 return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
224 const BYTE *const istart = (const BYTE *)cSrc;
225 BYTE *const ostart = (BYTE *)dst;
226 BYTE *const oend = ostart + dstSize;
227 const void *const dtPtr = DTable + 1;
228 const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
235 size_t const length1 = ZSTD_readLE16(istart);
236 size_t const length2 = ZSTD_readLE16(istart + 2);
237 size_t const length3 = ZSTD_readLE16(istart + 4);
238 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
239 const BYTE *const istart1 = istart + 6; /* jumpTable */
240 const BYTE *const istart2 = istart1 + length1;
241 const BYTE *const istart3 = istart2 + length2;
242 const BYTE *const istart4 = istart3 + length3;
243 const size_t segmentSize = (dstSize + 3) / 4;
244 BYTE *const opStart2 = ostart + segmentSize;
245 BYTE *const opStart3 = opStart2 + segmentSize;
246 BYTE *const opStart4 = opStart3 + segmentSize;
248 BYTE *op2 = opStart2;
249 BYTE *op3 = opStart3;
250 BYTE *op4 = opStart4;
252 DTableDesc const dtd = HUF_getDTableDesc(DTable);
253 U32 const dtLog = dtd.tableLog;
255 if (length4 > cSrcSize)
256 return ERROR(corruption_detected); /* overflow */
258 size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
259 if (HUF_isError(errorCode))
263 size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
264 if (HUF_isError(errorCode))
268 size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
269 if (HUF_isError(errorCode))
273 size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
274 if (HUF_isError(errorCode))
278 /* 16-32 symbols per loop (4-8 symbols per stream) */
279 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
280 for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) {
281 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
282 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
283 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
284 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
285 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
286 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
287 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
288 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
289 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
290 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
291 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
292 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
293 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
294 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
295 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
296 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
297 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
300 /* check corruption */
302 return ERROR(corruption_detected);
304 return ERROR(corruption_detected);
306 return ERROR(corruption_detected);
307 /* note : op4 supposed already verified within main loop */
309 /* finish bitStreams one by one */
310 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
311 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
312 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
313 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
316 endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
318 return ERROR(corruption_detected);
325 size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
327 DTableDesc dtd = HUF_getDTableDesc(DTable);
328 if (dtd.tableType != 0)
329 return ERROR(GENERIC);
330 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
333 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
335 const BYTE *ip = (const BYTE *)cSrc;
337 size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
338 if (HUF_isError(hSize))
340 if (hSize >= cSrcSize)
341 return ERROR(srcSize_wrong);
345 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
348 /* *************************/
349 /* double-symbols decoding */
350 /* *************************/
355 } HUF_DEltX4; /* double-symbols decoding */
362 /* HUF_fillDTableX4Level2() :
363 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
364 static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight,
365 const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq)
368 U32 rankVal[HUF_TABLELOG_MAX + 1];
370 /* get pre-calculated rankVal */
371 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
373 /* fill skipped values */
375 U32 i, skipSize = rankVal[minWeight];
376 ZSTD_writeLE16(&(DElt.sequence), baseSeq);
377 DElt.nbBits = (BYTE)(consumed);
379 for (i = 0; i < skipSize; i++)
386 for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */
387 const U32 symbol = sortedSymbols[s].symbol;
388 const U32 weight = sortedSymbols[s].weight;
389 const U32 nbBits = nbBitsBaseline - weight;
390 const U32 length = 1 << (sizeLog - nbBits);
391 const U32 start = rankVal[weight];
393 const U32 end = start + length;
395 ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
396 DElt.nbBits = (BYTE)(nbBits + consumed);
400 } while (i < end); /* since length >= 1 */
402 rankVal[weight] += length;
407 typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
408 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
410 static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart,
411 rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline)
413 U32 rankVal[HUF_TABLELOG_MAX + 1];
414 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
415 const U32 minBits = nbBitsBaseline - maxWeight;
418 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
421 for (s = 0; s < sortedListSize; s++) {
422 const U16 symbol = sortedList[s].symbol;
423 const U32 weight = sortedList[s].weight;
424 const U32 nbBits = nbBitsBaseline - weight;
425 const U32 start = rankVal[weight];
426 const U32 length = 1 << (targetLog - nbBits);
428 if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */
430 int minWeight = nbBits + scaleLog;
433 sortedRank = rankStart[minWeight];
434 HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank,
435 sortedListSize - sortedRank, nbBitsBaseline, symbol);
438 ZSTD_writeLE16(&(DElt.sequence), symbol);
439 DElt.nbBits = (BYTE)(nbBits);
442 U32 const end = start + length;
444 for (u = start; u < end; u++)
448 rankVal[weight] += length;
452 size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
454 U32 tableLog, maxW, sizeOfSort, nbSymbols;
455 DTableDesc dtd = HUF_getDTableDesc(DTable);
456 U32 const maxTableLog = dtd.maxTableLog;
458 void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
459 HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr;
462 rankValCol_t *rankVal;
465 sortedSymbol_t *sortedSymbol;
467 size_t spaceUsed32 = 0;
469 HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0);
471 rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32);
472 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
473 rankStats = (U32 *)workspace + spaceUsed32;
474 spaceUsed32 += HUF_TABLELOG_MAX + 1;
475 rankStart0 = (U32 *)workspace + spaceUsed32;
476 spaceUsed32 += HUF_TABLELOG_MAX + 2;
477 sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32);
478 spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
479 weightList = (BYTE *)((U32 *)workspace + spaceUsed32);
480 spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
482 if ((spaceUsed32 << 2) > workspaceSize)
483 return ERROR(tableLog_tooLarge);
484 workspace = (U32 *)workspace + spaceUsed32;
485 workspaceSize -= (spaceUsed32 << 2);
487 rankStart = rankStart0 + 1;
488 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
490 HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
491 if (maxTableLog > HUF_TABLELOG_MAX)
492 return ERROR(tableLog_tooLarge);
493 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
495 iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
496 if (HUF_isError(iSize))
500 if (tableLog > maxTableLog)
501 return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
504 for (maxW = tableLog; rankStats[maxW] == 0; maxW--) {
505 } /* necessarily finds a solution before 0 */
507 /* Get start index of each weight */
509 U32 w, nextRankStart = 0;
510 for (w = 1; w < maxW + 1; w++) {
511 U32 curr = nextRankStart;
512 nextRankStart += rankStats[w];
515 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
516 sizeOfSort = nextRankStart;
519 /* sort symbols by weight */
522 for (s = 0; s < nbSymbols; s++) {
523 U32 const w = weightList[s];
524 U32 const r = rankStart[w]++;
525 sortedSymbol[r].symbol = (BYTE)s;
526 sortedSymbol[r].weight = (BYTE)w;
528 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
533 U32 *const rankVal0 = rankVal[0];
535 int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
538 for (w = 1; w < maxW + 1; w++) {
539 U32 curr = nextRankVal;
540 nextRankVal += rankStats[w] << (w + rescale);
545 U32 const minBits = tableLog + 1 - maxW;
547 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
548 U32 *const rankValPtr = rankVal[consumed];
550 for (w = 1; w < maxW + 1; w++) {
551 rankValPtr[w] = rankVal0[w] >> consumed;
557 HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1);
559 dtd.tableLog = (BYTE)maxTableLog;
561 memcpy(DTable, &dtd, sizeof(dtd));
565 static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
567 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
568 memcpy(op, dt + val, 2);
569 BIT_skipBits(DStream, dt[val].nbBits);
570 return dt[val].length;
573 static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
575 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
576 memcpy(op, dt + val, 1);
577 if (dt[val].length == 1)
578 BIT_skipBits(DStream, dt[val].nbBits);
580 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) {
581 BIT_skipBits(DStream, dt[val].nbBits);
582 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
583 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
584 DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
590 #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
592 #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
593 if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
594 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
596 #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
598 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
600 FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog)
602 BYTE *const pStart = p;
604 /* up to 8 symbols at a time */
605 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) {
606 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
607 HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
608 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
609 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
612 /* closer to end : up to 2 symbols at a time */
613 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
614 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
616 while (p <= pEnd - 2)
617 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
620 p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
625 static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
631 size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
632 if (HUF_isError(errorCode))
638 BYTE *const ostart = (BYTE *)dst;
639 BYTE *const oend = ostart + dstSize;
640 const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */
641 const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
642 DTableDesc const dtd = HUF_getDTableDesc(DTable);
643 HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
647 if (!BIT_endOfDStream(&bitD))
648 return ERROR(corruption_detected);
654 size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
656 DTableDesc dtd = HUF_getDTableDesc(DTable);
657 if (dtd.tableType != 1)
658 return ERROR(GENERIC);
659 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
662 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
664 const BYTE *ip = (const BYTE *)cSrc;
666 size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
667 if (HUF_isError(hSize))
669 if (hSize >= cSrcSize)
670 return ERROR(srcSize_wrong);
674 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
677 static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
680 return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
683 const BYTE *const istart = (const BYTE *)cSrc;
684 BYTE *const ostart = (BYTE *)dst;
685 BYTE *const oend = ostart + dstSize;
686 const void *const dtPtr = DTable + 1;
687 const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
694 size_t const length1 = ZSTD_readLE16(istart);
695 size_t const length2 = ZSTD_readLE16(istart + 2);
696 size_t const length3 = ZSTD_readLE16(istart + 4);
697 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
698 const BYTE *const istart1 = istart + 6; /* jumpTable */
699 const BYTE *const istart2 = istart1 + length1;
700 const BYTE *const istart3 = istart2 + length2;
701 const BYTE *const istart4 = istart3 + length3;
702 size_t const segmentSize = (dstSize + 3) / 4;
703 BYTE *const opStart2 = ostart + segmentSize;
704 BYTE *const opStart3 = opStart2 + segmentSize;
705 BYTE *const opStart4 = opStart3 + segmentSize;
707 BYTE *op2 = opStart2;
708 BYTE *op3 = opStart3;
709 BYTE *op4 = opStart4;
711 DTableDesc const dtd = HUF_getDTableDesc(DTable);
712 U32 const dtLog = dtd.tableLog;
714 if (length4 > cSrcSize)
715 return ERROR(corruption_detected); /* overflow */
717 size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
718 if (HUF_isError(errorCode))
722 size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
723 if (HUF_isError(errorCode))
727 size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
728 if (HUF_isError(errorCode))
732 size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
733 if (HUF_isError(errorCode))
737 /* 16-32 symbols per loop (4-8 symbols per stream) */
738 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
739 for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) {
740 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
741 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
742 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
743 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
744 HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
745 HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
746 HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
747 HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
748 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
749 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
750 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
751 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
752 HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
753 HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
754 HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
755 HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
757 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
760 /* check corruption */
762 return ERROR(corruption_detected);
764 return ERROR(corruption_detected);
766 return ERROR(corruption_detected);
767 /* note : op4 already verified within main loop */
769 /* finish bitStreams one by one */
770 HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
771 HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
772 HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
773 HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
777 U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
779 return ERROR(corruption_detected);
787 size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
789 DTableDesc dtd = HUF_getDTableDesc(DTable);
790 if (dtd.tableType != 1)
791 return ERROR(GENERIC);
792 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
795 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
797 const BYTE *ip = (const BYTE *)cSrc;
799 size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
800 if (HUF_isError(hSize))
802 if (hSize >= cSrcSize)
803 return ERROR(srcSize_wrong);
807 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
810 /* ********************************/
811 /* Generic decompression selector */
812 /* ********************************/
814 size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
816 DTableDesc const dtd = HUF_getDTableDesc(DTable);
817 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
818 : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
821 size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
823 DTableDesc const dtd = HUF_getDTableDesc(DTable);
824 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
825 : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
832 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = {
833 /* single, double, quad */
834 {{0, 0}, {1, 1}, {2, 2}}, /* Q==0 : impossible */
835 {{0, 0}, {1, 1}, {2, 2}}, /* Q==1 : impossible */
836 {{38, 130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
837 {{448, 128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
838 {{556, 128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
839 {{714, 128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
840 {{883, 128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
841 {{897, 128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
842 {{926, 128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
843 {{947, 128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
844 {{1107, 128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
845 {{1177, 128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
846 {{1242, 128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
847 {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
848 {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
849 {{722, 128}, {1891, 145}, {1936, 146}}, /* Q ==15 : 93-99% */
852 /** HUF_selectDecoder() :
853 * Tells which decoder is likely to decode faster,
854 * based on a set of pre-determined metrics.
855 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
856 * Assumption : 0 < cSrcSize < dstSize <= 128 KB */
857 U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
859 /* decoder timing evaluation */
860 U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
861 U32 const D256 = (U32)(dstSize >> 8);
862 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
863 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
864 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
866 return DTime1 < DTime0;
869 typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize);
871 size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
873 /* validation checks */
875 return ERROR(dstSize_tooSmall);
876 if (cSrcSize > dstSize)
877 return ERROR(corruption_detected); /* invalid */
878 if (cSrcSize == dstSize) {
879 memcpy(dst, cSrc, dstSize);
881 } /* not compressed */
883 memset(dst, *(const BYTE *)cSrc, dstSize);
888 U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
889 return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
890 : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
894 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
896 /* validation checks */
898 return ERROR(dstSize_tooSmall);
899 if ((cSrcSize >= dstSize) || (cSrcSize <= 1))
900 return ERROR(corruption_detected); /* invalid */
903 U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
904 return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
905 : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
909 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
911 /* validation checks */
913 return ERROR(dstSize_tooSmall);
914 if (cSrcSize > dstSize)
915 return ERROR(corruption_detected); /* invalid */
916 if (cSrcSize == dstSize) {
917 memcpy(dst, cSrc, dstSize);
919 } /* not compressed */
921 memset(dst, *(const BYTE *)cSrc, dstSize);
926 U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
927 return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
928 : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);