2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * $XConsortium: sorter.cc /main/3 1996/06/11 17:20:47 cde-hal $
26 * Copyright (c) 1993 HAL Computer Systems International, Ltd.
27 * All rights reserved. Unpublished -- rights reserved under
28 * the Copyright Laws of the United States. USE OF A COPYRIGHT
29 * NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
32 * THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
33 * SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
34 * DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
35 * PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
38 * RESTRICTED RIGHTS LEGEND
39 * Use, duplication, or disclosure by the Government is subject
40 * to the restrictions as set forth in subparagraph (c)(l)(ii)
41 * of the Rights in Technical Data and Computer Software clause
42 * at DFARS 252.227-7013.
44 * HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
53 #include "hmphf/sorter.h"
55 /*#define NUM_BUCKETS 10 */
56 #define NUM_BUCKETS 5000
58 sorter::sorter(char* key_file) :
62 b_convertor(NUM_BUCKETS, 128)
64 fstream in(key_file, ios::in);
68 sorter::sorter(istream& in) :
72 b_convertor(NUM_BUCKETS, 128)
77 void sorter::_init(istream& in)
79 v_bucket_array = new bucketPtr[NUM_BUCKETS];
82 for ( i=0; i<NUM_BUCKETS; v_bucket_array[i++] = 0);
84 char key_buf[LBUFSIZ];
87 while ( in.getline(key_buf, LBUFSIZ) ) {
88 i = b_convertor.atoi(key_buf, strlen(key_buf), 0, NUM_BUCKETS);
90 if ( v_bucket_array[i] == 0 ) {
91 v_bucket_array[i] = new bucket(key_buf, i, true);
94 k = v_bucket_array[i] -> add_key(key_buf, true);
97 v_max_bucket_sz = MAX(v_max_bucket_sz, k);
99 in.getline(key_buf, LBUFSIZ); // skip the next info field
104 assemble_unique_keys();
110 for ( i=0; i<NUM_BUCKETS; delete v_bucket_array[i++] );
111 delete v_bucket_array;
113 for ( i=0; i<v_no_unique_keys; delete v_unique_keys[i++] );
114 delete v_unique_keys;
117 void sorter::filter_by_hash()
119 // a small hash table for keys to map to
120 v_map_table = new charPtr[2*v_max_bucket_sz];
123 for ( i=0; i<2*v_max_bucket_sz; v_map_table[i++] = 0 );
125 // an int table remembering slots in the v_map_table
126 // that have been mapped.
127 v_index_table = new int[v_max_bucket_sz];
129 // a charPtr table remembering possibly duplicated keys
130 v_check_table = new charPtr[v_max_bucket_sz];
132 // a charPtr table remembering dupcated keys
133 v_dup_table = new charPtr[v_max_bucket_sz];
135 for ( i=0; i<NUM_BUCKETS; i++ ) {
136 if ( v_bucket_array[i] )
137 filter_a_bucket(v_bucket_array[i]);
141 delete v_check_table;
142 delete v_index_table;
146 void sorter::filter_a_bucket(bucketPtr bkt)
148 slist_void_ptr_cell *lp = bkt -> key_ptr;
151 remove_keys(bkt, (char*)(lp -> void_ptr()), lp);
152 lp = (slist_void_ptr_cell*)(lp -> v_succ);
156 void sorter::remove_keys(bucketPtr bkt, char* key, slist_void_ptr_cell* lp)
160 slist_void_ptr_cell *llp = lp;
162 while ( llp && llp -> v_succ ) {
164 slist_void_ptr_cell *next_lp =
165 (slist_void_ptr_cell*)llp -> v_succ;
166 //debug(cerr, (char*)(next_lp -> void_ptr()));
168 if ( strcmp( key, (char*)(next_lp -> void_ptr()) ) == 0 ) {
169 //MESSAGE(cerr, "rmove key:");
170 //cerr << (char*)(next_lp -> void_ptr()) << "\n";
171 llp -> v_succ = next_lp -> v_succ;
173 delete (char*)(next_lp -> void_ptr());
178 llp = (slist_void_ptr_cell*)(llp -> v_succ);
189 // slist_void_ptr_cell *lp = bkt -> key_ptr;
193 //// maintaint the order of key chains in v_check_table[]!!!
195 // char* key2 = ((char*)(lp -> void_ptr()));
197 // int hash = b_convertor.atoi(
198 // key2, strlen(key2),
199 // 1, 2*v_max_bucket_sz
202 // if ( v_map_table[hash] != 0 ) {
204 // if ( v_map_table[hash] != (charPtr)1 ) {
205 // v_check_table[n_chk++] = v_map_table[hash];
206 // v_map_table[hash] = (charPtr)1;
207 // // set to occupied. so that the same
208 // // key will be in the v_check_table once.
209 // // assume 1 will never be the address
213 // v_check_table[n_chk++] = key2;
216 // //v_map_table[hash] = ((Ostring*)(lp -> void_ptr())) -> get();
217 // v_map_table[hash] = ((char*)(lp -> void_ptr()));
220 // v_index_table[n_idx++] = hash; // remember the slot being set
222 // lp = (slist_void_ptr_cell*)(lp -> succ);
224 ////debug(cerr, n_chk);
226 //// double check possiblly collided keys
228 // for ( int u=0; u<n_chk-1; u++ ) {
229 // for (int v=u+1; v<n_chk; v++ ) {
230 // if ( v_check_table[u] && v_check_table[v] &&
231 // strcmp(v_check_table[u], v_check_table[v]) == 0 )
233 //// mark v as a duplicated key
234 // v_dup_table[n_dup++] = v_check_table[v];
235 // v_check_table[v] = 0;
241 ////debug(cerr, n_dup);
243 //// remove the duplicates
244 // if ( n_dup > 0 ) {
245 // slist_void_ptr_cell *lp = bkt -> key_ptr;
246 // slist_void_ptr_cell *prev_lp = lp;
249 // lp = (slist_void_ptr_cell*)(lp -> succ);
251 // while ( lp && u<n_dup ) {
255 //debug(cerr, v_dup_table[u]);
256 //debug(cerr, int(v_dup_table[u]));
257 //debug(cerr, (char*)(lp -> void_ptr()));
258 //debug(cerr, int((char*)(lp -> void_ptr())));
262 // if ( v_dup_table[u] == ((char*)(lp -> void_ptr())) )
265 //MESSAGE(cerr, "key removed:");
266 //debug(cerr, v_dup_table[u]);
268 // prev_lp -> succ = lp -> succ;
269 // bkt -> v_no_keys --;
274 // lp = (slist_void_ptr_cell*)(lp -> succ);
279 //// clean v_map_table
280 // for ( int k=0; k<n_idx; k++ ) {
281 // v_map_table[v_index_table[k]] = 0;
284 ////MESSAGE(cerr, "SOS0");
287 void sorter::assemble_unique_keys()
290 for ( i=0; i<NUM_BUCKETS; i++ ) {
291 if ( v_bucket_array[i] ) {
292 v_no_unique_keys += v_bucket_array[i] -> v_no_keys;
296 v_unique_keys = new charPtr[v_no_unique_keys];
299 slist_void_ptr_cell *lp = 0;
301 for ( i=0; i<NUM_BUCKETS; i++ ) {
303 if ( v_bucket_array[i] == 0 ) continue;
305 lp = v_bucket_array[i] -> key_ptr;
309 v_unique_keys[j++] = ((char*)(lp -> void_ptr()));
312 lp = (slist_void_ptr_cell*)(lp -> v_succ);
317 ostream& operator<< (ostream& out, sorter& st)
319 debug(out, st.v_no_unique_keys);
321 for ( int i=0; i<st.v_no_unique_keys; i++ ) {
322 out << st.v_unique_keys[i] << "\n";