2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * $XConsortium: huffman.cc /main/3 1996/06/11 17:15:06 cde-hal $
26 * Copyright (c) 1993 HAL Computer Systems International, Ltd.
27 * All rights reserved. Unpublished -- rights reserved under
28 * the Copyright Laws of the United States. USE OF A COPYRIGHT
29 * NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
32 * THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
33 * SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
34 * DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
35 * PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
38 * RESTRICTED RIGHTS LEGEND
39 * Use, duplication, or disclosure by the Government is subject
40 * to the restrictions as set forth in subparagraph (c)(l)(ii)
41 * of the Rights in Technical Data and Computer Software clause
42 * at DFARS 252.227-7013.
44 * HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
51 #include "compression/huffman.h"
52 #include "dstr/heap.h"
54 ////////////////////////////////////////
56 ////////////////////////////////////////
58 htr_node::htr_node(encoding_unit* e, htr_node* lt, htr_node* rt):
59 left(lt), right(rt), eu(e), freq(e->freq), parent(0)
63 htr_node::htr_node(unsigned long f, htr_node* lt, htr_node* rt):
64 left(lt), right(rt), eu(0), freq(f), parent(0)
74 ////////////////////////////////////////
76 ////////////////////////////////////////
77 Boolean htr_eq(const void* n1, const void* n2)
79 if ( ((htr_node*)n1) -> freq == ((htr_node*)n2) -> freq )
85 Boolean htr_ls(const void* n1, const void* n2)
87 if ( ((htr_node*)n1) -> freq > ((htr_node*)n2) -> freq )
93 ////////////////////////////////////////
95 ////////////////////////////////////////
96 huff::huff(): compress_agent(HUFFMAN_AGENT_CODE),
97 e_units(0), cts(0), tri(new trie(26)), htr_root(0)
107 void huff::build_tree()
109 heap htr_node_set(htr_eq, htr_ls, cts);
112 for (int i=0; i<cts; i++ ) {
114 x = new htr_node(e_units[i]);
115 e_units[i] -> leaf_htr_node = x;
116 htr_node_set.insert(x);
120 htr_node_set.heapify();
122 htr_node *n1, *n2, *n3;
123 while ( htr_node_set.count() > 1 ) {
125 // max is the smallest element. see htr_ls()
126 n1 = (htr_node*)htr_node_set.max_elm() ;
127 htr_node_set.delete_max() ;
129 // max is the smallest element. see htr_ls()
130 n2 = (htr_node*)htr_node_set.max_elm() ;
131 htr_node_set.delete_max() ;
133 n3 = new htr_node(n1->freq+n2->freq, n1, n2);
135 n1 -> parent = n2 -> parent = n3;
137 htr_node_set.insert_heapify(n3);
140 htr_root = (htr_node*)htr_node_set.max_elm();
141 htr_node_set.delete_max() ;
144 void huff::calculate_code()
149 for (int i=0; i<cts; i++ ) {
151 if ( e_units[i] == 0 )
154 e_units[i] -> code = 0;
155 e_units[i] -> bits = 0;
157 x = e_units[i] -> leaf_htr_node;
160 parent = x -> parent;
165 e_units[i] -> code >>= 1;
167 if ( parent -> left == x ) {
168 e_units[i] -> code |= 0x80000000;
170 if ( parent -> right != x ) {
172 throw(stringException("huffman tree corrupted"));
176 e_units[i] -> bits++;
178 if ( e_units[i] -> bits > BITS_IN(unsigned long) ) {
179 debug(cerr, e_units[i] -> bits);
180 throw(stringException("huffman tree too deep"));
184 e_units[i] -> code >>= ( 32 - e_units[i] -> bits );
185 //debug(cerr, hex(e_units[i] -> code));
189 ostream& huff::print_alphabet(ostream& out)
191 unsigned long total_uncmp = 0;
192 unsigned long int total_cmp = 0;
194 for (int i=0; i<cts; i++ ) {
196 if ( e_units[i] == 0 )
199 total_uncmp += (e_units[i] -> word -> size()) * (e_units[i] -> freq);
200 total_cmp += (e_units[i] -> bits) * (e_units[i] -> freq);
202 out << *(e_units[i] -> word) << ":" << e_units[i]->bits << "\n";
204 total_cmp = total_cmp / 8 + total_cmp % 8;
207 debug(cerr, total_uncmp);
208 debug(cerr, total_cmp);
210 debug(cerr, 1 - float(total_cmp) / float(total_uncmp) );
216 // self modifying buf ptr after taking an encoding unit.
217 encoding_unit* huff::get_e_unit(unsigned char*& buf, int len)
219 encoding_unit* x = tri -> parse(buf, len) ;
221 //debug(cerr, *(x -> word));
223 buf += x -> word -> size();
227 int total_uncomp = 0;
230 void huff::compress(const buffer& uncompressed, buffer& compressed)
232 //debug(cerr, *(buffer*)&uncompressed);
233 if ( compressed.buf_sz() < uncompressed.buf_sz() )
234 compressed.expand_chunk(uncompressed.buf_sz());
237 unsigned short total_bits = 0;
239 int uncmp_sz = uncompressed.content_sz();
240 unsigned char* buf = (unsigned char*)uncompressed.get_base();
243 unsigned int code_buf = 0;
244 unsigned int rem_long = 0;
247 encoding_unit *e_ptr = 0;
249 while ( uncmp_sz > 0 ) {
251 //e_ptr = get_e_unit(buf, uncmp_sz);
253 e_ptr = tri -> parse(buf, uncmp_sz);
255 buf += e_ptr -> word -> size();
256 uncmp_sz -= e_ptr -> word -> size();
258 if ( rem_bits + e_ptr -> bits > 32 ) {
260 code_buf = e_ptr -> code; // shift bits to the higher end
262 rem_long <<= 32-rem_bits;
264 rem_bits += e_ptr -> bits - 32; // new rem_bits
266 code_buf >>= rem_bits; // get padding part
268 rem_long |= code_buf; // padding
270 compressed.put( rem_long );
272 // save remaining (rem_bits + e_ptr -> bits - 32) bits to rem_bits.
274 rem_long = e_ptr -> code & (~0L >> (32 - rem_bits));
277 rem_long <<= e_ptr -> bits;
278 rem_long |= e_ptr -> code;
279 rem_bits += e_ptr -> bits;
280 //debug(cerr, hex(rem_long));
283 total_bits += e_ptr -> bits;
284 total_bits &= 0x1f; // take the mod on 32
287 if ( rem_bits > 0 ) {
288 rem_long <<= 32 - rem_bits;
289 //MESSAGE(cerr, "PUT");
290 //debug(cerr, hex(rem_long));
291 compressed.put( rem_long );
294 //debug(cerr, total_bits);
295 compressed.put(char(total_bits));
297 // total_uncomp += uncompressed.content_sz();
298 // total_comp += compressed.content_sz();
301 debug(cerr, total_uncomp);
302 debug(cerr, total_comp);
305 1-float(compressed.content_sz()-1)/float(uncompressed.content_sz())
310 void huff::decompress(buffer& compressed, buffer& uncompressed)
312 char* buf_base = uncompressed.get_base();
318 int ct = (compressed.content_sz() - 1) >> 2;
324 htr_node *node_ptr = htr_root;
327 compressed.get(c); ct--;
330 compressed.get(rem_bits);
331 //debug(cerr, int(rem_bits));
332 bits_bound = rem_bits ;
335 for ( int i=0;i<bits_bound; i++ ) {
336 if ( node_ptr -> left == 0 && node_ptr -> right == 0 ) {
337 //for ( int j=0; j<node_ptr -> eu -> word -> size(); j++ ) {
338 // cerr << (node_ptr -> eu -> word -> get())[j];
341 str_len = node_ptr -> eu -> word -> size();
342 str = node_ptr -> eu -> word -> get();
344 if ( str_len == 1 ) {
349 // uncompressed.put((node_ptr -> eu -> word -> get())[0]);
353 memcpy(buf_base, str, str_len);
358 uncompressed.put( node_ptr -> eu -> word -> get(),
359 node_ptr -> eu -> word -> size()
367 if ( c & 0x80000000 )
368 node_ptr = node_ptr -> left;
370 node_ptr = node_ptr -> right;
378 //debug(cerr, buf_base-uncompressed.get_base());
379 uncompressed.set_content_sz(buf_base-uncompressed.get_base());
382 uncompressed.put( node_ptr -> eu -> word -> get(),
383 node_ptr -> eu -> word -> size()
387 //////////////////////////////////////////////////////////
389 //////////////////////////////////////////////////////////
393 int huff::cdr_sizeof()
395 return pstring::cdr_sizeof();
398 io_status huff::cdrOut(buffer& buf)
400 //MESSAGE(cerr, "huff::cdrOut");
401 //debug(cerr, my_oid());
402 static buffer v_out_buf(LBUFSIZ);
405 //MESSAGE(cerr, "huff::cdrOut: dict out");
406 int sz = sizeof(int);
407 for ( int i=0; i<cts; i++ ) {
408 sz += ( e_units[i] -> word -> size() +
409 sizeof(unsigned int) +
414 v_out_buf.expand_chunk(sz);
420 for ( i=0; i<cts; i++ ) {
421 word_sz = e_units[i] -> word -> size();
422 v_out_buf.put(char(word_sz));
424 v_out_buf.put(e_units[i] -> word -> get(), word_sz);
425 v_out_buf.put(e_units[i] -> freq);
428 pstring::update(v_out_buf.get_base(), v_out_buf.content_sz());
431 return pstring::cdrOut(buf);
436 // (len_byte word_chars freq_int)+
438 io_status huff::cdrIn(buffer& buf)
440 static buffer v_in_buf(0);
444 if ( pstring::size() > 0 ) {
446 v_in_buf.set_chunk(pstring::get(), pstring::size());
447 v_in_buf.set_content_sz(pstring::size());
451 char word_buf[BUFSIZ];
453 unsigned int word_freq;
456 for ( int i=0; i<cts; i++ ) {
458 v_in_buf.get(word_sz);
459 v_in_buf.get(word_buf, int(word_sz));
460 v_in_buf.get(word_freq);
463 z = new ostring((char*)word_buf, word_sz);
465 alphabet[alphabet_sz++] = new encoding_unit(z, word_freq);
468 tri -> add_to_alphabet((unsigned char*)word_buf, word_sz, word_freq);
471 e_units = tri -> get_alphabet(cts);
477 //print_alphabet(cerr);
486 void trie_add_wrap(unsigned char* buf, int len, int action_num)
488 switch ( action_num ) {
490 alphabet -> add(buf, len);
493 alphabet -> add_letters(buf, len);
497 debug(cerr, action_num);
498 throw(stringException("unknown action number"));
503 io_status huff::build_dict(lex_func_t f_lex, getchar_func_t f_getchar)
505 MESSAGE(cerr, "get to huff build dict");
506 fill_buf_func = f_getchar;
510 lex_action_func = trie_add_wrap;
512 if ( (*f_lex)() != 0 )
513 throw(stringException("huff::asciiIn(): Parsing input failed"));
515 e_units = tri -> get_alphabet(cts);
522 //print_alphabet(cerr);
524 set_mode(UPDATE, true);