2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * $XConsortium: mphf_funcs.cc /main/4 1996/07/18 14:33:08 drk $
26 * Copyright (c) 1993 HAL Computer Systems International, Ltd.
27 * All rights reserved. Unpublished -- rights reserved under
28 * the Copyright Laws of the United States. USE OF A COPYRIGHT
29 * NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
32 * THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
33 * SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
34 * DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
35 * PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
38 * RESTRICTED RIGHTS LEGEND
39 * Use, duplication, or disclosure by the Government is subject
40 * to the restrictions as set forth in subparagraph (c)(l)(ii)
41 * of the Rights in Technical Data and Computer Software clause
42 * at DFARS 252.227-7013.
44 * HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
52 #include "hmphf/mphf_funcs.h"
54 #define NO_BACKTRACKS 50
55 #define NO_MAPPINGS 20
56 #define NO_SEARCHINGS 10
58 //compute_a_mphf(char* key_file, params& pms, char* mphf_spec_file)
60 compute_a_mphf(char** keys, params& pms, buffer& mphf_buffer)
62 mphf_hash_table ht(pms);
66 for ( k=0; k<NO_MAPPINGS; k++ ) {
68 //buckets bs(key_file, pms);
69 buckets bs(keys, pms);
71 //MESSAGE(cerr, "buckets built");
73 /* search for a MPHF */
74 for ( i = 0; i<NO_SEARCHINGS; i++) {
76 //MESSAGE(cerr, form("%dth search:", i+1));
78 bs.set_control_bit(-1);
81 if ( search(bs, ht, pms) == 0 ) {
82 //MESSAGE(cerr, "search done");
84 /* verify computed MPHF */
85 if ( verify(bs, ht, pms) == 0 ) {
87 /* output the computed MPHF */
88 return write_spec(bs, pms, mphf_buffer);
101 search(buckets& bs, mphf_hash_table& ht, params& pms)
107 int num_backtracks = 0;
108 int no_search_fails = bs.no_buckets() / 2;
110 int_pattern new_pattern(bs.max_bucket_sz());
112 while ( i < bs.no_buckets() && fails < no_search_fails ) {
114 if ( bs[i] == 0 || bs[i] -> no_keys() == 0 ) {
121 //MESSAGE(cerr, "before fit");
122 while ( bs.get_pattern(i, new_pattern, pms) == 0 ) {
124 //debug(cerr, new_pattern);
125 //cerr << new_pattern.no_elmts();
126 patternFit = ht.fast_fit(new_pattern);
128 if ( patternFit >= 0 ) {
129 //MESSAGE(cerr, " fit");
130 bs[i] -> set_g_value(patternFit);
133 //MESSAGE(cerr, " fail");
139 if ( patternFit == -1 ) {
141 //MESSAGE(cerr, "BACKTRACK");
147 if ( num_backtracks > bs.no_buckets() ) {
153 //MESSAGE(cerr, "increment i");
158 return ( patternFit >= 0 ) ? 0 : -1;
161 verify(buckets& bs, mphf_hash_table& ht, params& pms)
164 int_pattern new_pattern(bs.max_bucket_sz());
166 //debug(cerr, ht.num_filled_slots());
167 //debug(cerr, ht.no_slots());
168 //debug(cerr, pms.n);
170 if ( ht.num_filled_slots() != ht.no_slots() ) {
172 MESSAGE(cerr, "panic: hash table not full or 'too' full");
173 MESSAGE(cerr, form("filled_slots = %d\n", ht.num_filled_slots()));
174 MESSAGE(cerr, form("no_slots = %d\n", ht.no_slots()));
181 for ( i = 0; i < bs.no_buckets(); i++ ) {
183 if ( bs[i] == 0 || bs[i] -> no_keys() == 0 ) continue;
185 bs.use_current_params(i, new_pattern, pms, true);
186 //debug(cerr, bs[i] -> orig_pos());
187 //debug(cerr, new_pattern);
189 if ( ht.fit_hash_table(new_pattern) != 0 ) {
190 MESSAGE(cerr, "panic: collision occurred");
195 //debug(cerr, ht.num_filled_slots());
196 //debug(cerr, ht.no_slots());
198 if ( ht.num_filled_slots() != ht.no_slots() ) {
200 MESSAGE(cerr, "panic: hash table not full after test insertion");
201 MESSAGE(cerr, form("filled_slots = %d\n", ht.num_filled_slots()));
202 MESSAGE(cerr, form("no_slots = %d\n", ht.no_slots()));
205 MESSAGE(cerr, "verifying OK");
210 int write_spec(buckets& bs, params& pms, buffer& mphf_buffer)
212 unsigned int gv_bits = 0;
215 gv_bits = (int)(flog2(pms.v_n)) + 1; /* bits of each g value.*/
216 if ( floor(flog2(pms.v_n)) < flog2(pms.v_n) )
220 int uints_of_cmpat_gv = gv_bits * pms.v_b;
222 if ( uints_of_cmpat_gv % BITS_IN(unsigned) > 0 )
223 uints_of_cmpat_gv += BITS_IN(unsigned);
225 uints_of_cmpat_gv /= BITS_IN(unsigned);
228 unsigned int *c_array = new unsigned[uints_of_cmpat_gv];
230 compact(bs, c_array, gv_bits, mphf_buffer.get_swap_order());
232 unsigned int g_array_bytes = sizeof(unsigned int) * uints_of_cmpat_gv;
234 int spec_bytes = 7 * ( sizeof(unsigned int) + 1) + g_array_bytes + 1;
236 mphf_buffer.expand_chunk(spec_bytes);
239 mphf_buffer.put(pms.v_n); mphf_buffer.put('\n');
240 mphf_buffer.put(pms.v_b); mphf_buffer.put('\n');
241 mphf_buffer.put(pms.v_p1); mphf_buffer.put('\n');
242 mphf_buffer.put(pms.v_p2); mphf_buffer.put('\n');
243 mphf_buffer.put(pms.v_r); mphf_buffer.put('\n');
244 mphf_buffer.put(pms.v_seed); mphf_buffer.put('\n');
245 mphf_buffer.put(g_array_bytes); mphf_buffer.put('\t');
246 mphf_buffer.put((char*)c_array, g_array_bytes); mphf_buffer.put('\n');
249 ostrstream fout(mphf_buffer.get_base(), mphf_buffer.buf_sz(), ios::out);
251 fout << pms.v_n << "\n";
252 fout << pms.v_b << "\n";
253 fout << pms.v_p1 << "\n";
254 fout << pms.v_p2 << "\n";
257 int new_v_r = pms.v_r; SET_BIT(new_v_r, 0x80000000);
258 fout << new_v_r << "\n";
260 fout << pms.v_r << "\n";
262 fout << pms.v_seed << "\n";
264 fout << g_array_bytes << '\t';
265 fout.write((char*)c_array, g_array_bytes);
269 mphf_buffer.set_content_sz(spec_bytes);
276 int compact(buckets& bs, unsigned s[], int t, Boolean swap)
278 int target, k, i, remaining_bits, branch;
279 unsigned unsigned_g, high_part_bits, lower_part_bits;
281 remaining_bits = BITS_IN(unsigned);
284 unsigned* y = new unsigned[bs.no_buckets()];
285 for ( i = 0; i < bs.no_buckets(); y[i++] = 0 );
287 for ( i = 0; i < bs.no_buckets(); i++ ) {
289 if ( bs[i] && bs[i] -> no_keys() > 0 ) {
290 y[bs[i] -> orig_pos()] =
291 ((bs[i] -> g_value()) << 1) + bs[i] -> control_bit();
293 cerr << bs[i] -> orig_pos() << " ";
294 cerr << bs[i] -> g_value() << " ";
295 cerr << bs[i] -> control_bit() << " ";
296 cerr << y[bs[i] -> orig_pos()] << "\n";
303 MESSAGE(cerr, "=======BIT ARRAY:");
304 debug(cerr, bs.no_buckets());
305 for ( i = 0; i < bs.no_buckets(); i++ )
306 cerr << i << " " << y[i] << "\n";
310 //MESSAGE(cerr, "=======BIT ARRAY (before swap):");
312 for ( i = 0; i < bs.no_buckets(); i++ ) {
318 debug(cerr, hex(unsigned_g));
321 debug(cerr, form("%x", c_bit));
322 debug(cerr, "=====");
325 if (remaining_bits >= t ) {
326 unsigned_g <<= (remaining_bits -t);
327 target |= unsigned_g;
332 high_part_bits = getbits(unsigned_g,t,remaining_bits);
333 lower_part_bits = unsigned_g & ~(~0 << t-remaining_bits);
334 lower_part_bits <<= (BITS_IN(unsigned)- (t-remaining_bits));
336 s[k++] = target | high_part_bits;
340 ORDER_SWAP_UINT(s[k-1]);
343 target = lower_part_bits;
344 remaining_bits = BITS_IN(unsigned) - ( t - remaining_bits );
350 if ( bs.no_buckets() > 0 ) {
351 s[k] = ( branch == 0 ) ? target : lower_part_bits;
354 ORDER_SWAP_UINT(s[k]);
359 MESSAGE(cerr, "=======BIT ARRAY (after swap):");
361 for ( i = 0; i <= k; i++ )
362 cerr << i << " " << s[i] << "\n";
370 int wc(char* file_name, unsigned int& lines, unsigned int& max_length)
374 fstream in(file_name, ios::in);
377 MESSAGE(cerr, "can not open key file");
378 throw(streamException(in.rdstate()));
383 while ( in.getline(buf, BUFSIZ) ) {
384 max_length = MAX(strlen(buf)-1, max_length);
389 MESSAGE(cerr, "empty key file");