2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: tclHash.c /main/2 1996/08/08 14:44:13 cde-hp $ */
27 * Implementation of in-memory hash tables for Tcl and Tcl-based
30 * Copyright (c) 1991-1993 The Regents of the University of California.
31 * Copyright (c) 1994 Sun Microsystems, Inc.
33 * See the file "license.terms" for information on usage and redistribution
34 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
36 * SCCS: @(#) tclHash.c 1.15 96/02/15 11:50:23
42 * When there are this many entries per bucket, on average, rebuild
43 * the hash table to make it larger.
46 #define REBUILD_MULTIPLIER 3
50 * The following macro takes a preliminary integer hash value and
51 * produces an index into a hash tables bucket list. The idea is
52 * to make it so that preliminary values that are arbitrarily similar
53 * will end up in different buckets. The hash function was taken
54 * from a random-number generator.
57 #define RANDOM_INDEX(tablePtr, i) \
58 (((((long) (i))*1103515245) >> (tablePtr)->downShift) & (tablePtr)->mask)
61 * Procedure prototypes for static procedures in this file:
64 static Tcl_HashEntry * ArrayFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
66 static Tcl_HashEntry * ArrayCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
67 char *key, int *newPtr));
68 static Tcl_HashEntry * BogusFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
70 static Tcl_HashEntry * BogusCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
71 char *key, int *newPtr));
72 static unsigned int HashString _ANSI_ARGS_((char *string));
73 static void RebuildTable _ANSI_ARGS_((Tcl_HashTable *tablePtr));
74 static Tcl_HashEntry * StringFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
76 static Tcl_HashEntry * StringCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
77 char *key, int *newPtr));
78 static Tcl_HashEntry * OneWordFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
80 static Tcl_HashEntry * OneWordCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
81 char *key, int *newPtr));
84 *----------------------------------------------------------------------
86 * Tcl_InitHashTable --
88 * Given storage for a hash table, set up the fields to prepare
89 * the hash table for use.
95 * TablePtr is now ready to be passed to Tcl_FindHashEntry and
96 * Tcl_CreateHashEntry.
98 *----------------------------------------------------------------------
103 Tcl_HashTable *tablePtr, /* Pointer to table record, which
104 * is supplied by the caller. */
105 int keyType /* Type of keys to use in table:
106 * TCL_STRING_KEYS, TCL_ONE_WORD_KEYS,
107 * or an integer >= 2. */
110 tablePtr->buckets = tablePtr->staticBuckets;
111 tablePtr->staticBuckets[0] = tablePtr->staticBuckets[1] = 0;
112 tablePtr->staticBuckets[2] = tablePtr->staticBuckets[3] = 0;
113 tablePtr->numBuckets = TCL_SMALL_HASH_TABLE;
114 tablePtr->numEntries = 0;
115 tablePtr->rebuildSize = TCL_SMALL_HASH_TABLE*REBUILD_MULTIPLIER;
116 tablePtr->downShift = 28;
118 tablePtr->keyType = keyType;
119 if (keyType == TCL_STRING_KEYS) {
120 tablePtr->findProc = StringFind;
121 tablePtr->createProc = StringCreate;
122 } else if (keyType == TCL_ONE_WORD_KEYS) {
123 tablePtr->findProc = OneWordFind;
124 tablePtr->createProc = OneWordCreate;
126 tablePtr->findProc = ArrayFind;
127 tablePtr->createProc = ArrayCreate;
132 *----------------------------------------------------------------------
134 * Tcl_DeleteHashEntry --
136 * Remove a single entry from a hash table.
142 * The entry given by entryPtr is deleted from its table and
143 * should never again be used by the caller. It is up to the
144 * caller to free the clientData field of the entry, if that
147 *----------------------------------------------------------------------
152 Tcl_HashEntry *entryPtr
155 Tcl_HashEntry *prevPtr;
157 if (*entryPtr->bucketPtr == entryPtr) {
158 *entryPtr->bucketPtr = entryPtr->nextPtr;
160 for (prevPtr = *entryPtr->bucketPtr; ; prevPtr = prevPtr->nextPtr) {
161 if (prevPtr == NULL) {
162 panic("malformed bucket chain in Tcl_DeleteHashEntry");
164 if (prevPtr->nextPtr == entryPtr) {
165 prevPtr->nextPtr = entryPtr->nextPtr;
170 entryPtr->tablePtr->numEntries--;
171 ckfree((char *) entryPtr);
175 *----------------------------------------------------------------------
177 * Tcl_DeleteHashTable --
179 * Free up everything associated with a hash table except for
180 * the record for the table itself.
186 * The hash table is no longer useable.
188 *----------------------------------------------------------------------
193 Tcl_HashTable *tablePtr /* Table to delete. */
196 Tcl_HashEntry *hPtr, *nextPtr;
200 * Free up all the entries in the table.
203 for (i = 0; i < tablePtr->numBuckets; i++) {
204 hPtr = tablePtr->buckets[i];
205 while (hPtr != NULL) {
206 nextPtr = hPtr->nextPtr;
207 ckfree((char *) hPtr);
213 * Free up the bucket array, if it was dynamically allocated.
216 if (tablePtr->buckets != tablePtr->staticBuckets) {
217 ckfree((char *) tablePtr->buckets);
221 * Arrange for panics if the table is used again without
225 tablePtr->findProc = BogusFind;
226 tablePtr->createProc = BogusCreate;
230 *----------------------------------------------------------------------
232 * Tcl_FirstHashEntry --
234 * Locate the first entry in a hash table and set up a record
235 * that can be used to step through all the remaining entries
239 * The return value is a pointer to the first entry in tablePtr,
240 * or NULL if tablePtr has no entries in it. The memory at
241 * *searchPtr is initialized so that subsequent calls to
242 * Tcl_NextHashEntry will return all of the entries in the table,
248 *----------------------------------------------------------------------
253 Tcl_HashTable *tablePtr, /* Table to search. */
254 Tcl_HashSearch *searchPtr /* Place to store information about
255 * progress through the table. */
258 searchPtr->tablePtr = tablePtr;
259 searchPtr->nextIndex = 0;
260 searchPtr->nextEntryPtr = NULL;
261 return Tcl_NextHashEntry(searchPtr);
265 *----------------------------------------------------------------------
267 * Tcl_NextHashEntry --
269 * Once a hash table enumeration has been initiated by calling
270 * Tcl_FirstHashEntry, this procedure may be called to return
271 * successive elements of the table.
274 * The return value is the next entry in the hash table being
275 * enumerated, or NULL if the end of the table is reached.
280 *----------------------------------------------------------------------
285 Tcl_HashSearch *searchPtr /* Place to store information about
286 * progress through the table. Must
287 * have been initialized by calling
288 * Tcl_FirstHashEntry. */
293 while (searchPtr->nextEntryPtr == NULL) {
294 if (searchPtr->nextIndex >= searchPtr->tablePtr->numBuckets) {
297 searchPtr->nextEntryPtr =
298 searchPtr->tablePtr->buckets[searchPtr->nextIndex];
299 searchPtr->nextIndex++;
301 hPtr = searchPtr->nextEntryPtr;
302 searchPtr->nextEntryPtr = hPtr->nextPtr;
307 *----------------------------------------------------------------------
311 * Return statistics describing the layout of the hash table
312 * in its hash buckets.
315 * The return value is a malloc-ed string containing information
316 * about tablePtr. It is the caller's responsibility to free
322 *----------------------------------------------------------------------
327 Tcl_HashTable *tablePtr /* Table for which to produce stats. */
330 #define NUM_COUNTERS 10
331 int count[NUM_COUNTERS], overflow, i, j;
337 * Compute a histogram of bucket usage.
340 for (i = 0; i < NUM_COUNTERS; i++) {
345 for (i = 0; i < tablePtr->numBuckets; i++) {
347 for (hPtr = tablePtr->buckets[i]; hPtr != NULL; hPtr = hPtr->nextPtr) {
350 if (j < NUM_COUNTERS) {
356 average += (tmp+1.0)*(tmp/tablePtr->numEntries)/2.0;
360 * Print out the histogram and a few other pieces of information.
363 result = (char *) ckalloc((unsigned) ((NUM_COUNTERS*60) + 300));
364 sprintf(result, "%d entries in table, %d buckets\n",
365 tablePtr->numEntries, tablePtr->numBuckets);
366 p = result + strlen(result);
367 for (i = 0; i < NUM_COUNTERS; i++) {
368 sprintf(p, "number of buckets with %d entries: %d\n",
372 sprintf(p, "number of buckets with %d or more entries: %d\n",
373 NUM_COUNTERS, overflow);
375 sprintf(p, "average search distance for entry: %.1f", average);
380 *----------------------------------------------------------------------
384 * Compute a one-word summary of a text string, which can be
385 * used to generate a hash index.
388 * The return value is a one-word summary of the information in
394 *----------------------------------------------------------------------
399 char *string /* String from which to compute hash value. */
406 * I tried a zillion different hash functions and asked many other
407 * people for advice. Many people had their own favorite functions,
408 * all different, but no-one had much idea why they were good ones.
409 * I chose the one below (multiply by 9 and add new character)
410 * because of the following reasons:
412 * 1. Multiplying by 10 is perfect for keys that are decimal strings,
413 * and multiplying by 9 is just about as good.
414 * 2. Times-9 is (shift-left-3) plus (old). This means that each
415 * character's bits hang around in the low-order bits of the
416 * hash value for ever, plus they spread fairly rapidly up to
417 * the high-order bits to fill out the hash value. This seems
418 * works well both for decimal and non-decimal strings.
428 result += (result<<3) + c;
434 *----------------------------------------------------------------------
438 * Given a hash table with string keys, and a string key, find
439 * the entry with a matching key.
442 * The return value is a token for the matching entry in the
443 * hash table, or NULL if there was no matching entry.
448 *----------------------------------------------------------------------
451 static Tcl_HashEntry *
453 Tcl_HashTable *tablePtr, /* Table in which to lookup entry. */
454 char *key /* Key to use to find matching entry. */
461 index = HashString(key) & tablePtr->mask;
464 * Search all of the entries in the appropriate bucket.
467 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
468 hPtr = hPtr->nextPtr) {
469 for (p1 = key, p2 = hPtr->key.string; ; p1++, p2++) {
482 *----------------------------------------------------------------------
486 * Given a hash table with string keys, and a string key, find
487 * the entry with a matching key. If there is no matching entry,
488 * then create a new entry that does match.
491 * The return value is a pointer to the matching entry. If this
492 * is a newly-created entry, then *newPtr will be set to a non-zero
493 * value; otherwise *newPtr will be set to 0. If this is a new
494 * entry the value stored in the entry will initially be 0.
497 * A new entry may be added to the hash table.
499 *----------------------------------------------------------------------
502 static Tcl_HashEntry *
504 Tcl_HashTable *tablePtr, /* Table in which to lookup entry. */
505 char *key, /* Key to use to find or create matching
507 int *newPtr /* Store info here telling whether a new
508 * entry was created. */
515 index = HashString(key) & tablePtr->mask;
518 * Search all of the entries in this bucket.
521 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
522 hPtr = hPtr->nextPtr) {
523 for (p1 = key, p2 = hPtr->key.string; ; p1++, p2++) {
535 * Entry not found. Add a new one to the bucket.
539 hPtr = (Tcl_HashEntry *) ckalloc((unsigned)
540 (sizeof(Tcl_HashEntry) + strlen(key) - (sizeof(hPtr->key) -1)));
541 hPtr->tablePtr = tablePtr;
542 hPtr->bucketPtr = &(tablePtr->buckets[index]);
543 hPtr->nextPtr = *hPtr->bucketPtr;
544 hPtr->clientData = 0;
545 strcpy(hPtr->key.string, key);
546 *hPtr->bucketPtr = hPtr;
547 tablePtr->numEntries++;
550 * If the table has exceeded a decent size, rebuild it with many
554 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
555 RebuildTable(tablePtr);
561 *----------------------------------------------------------------------
565 * Given a hash table with one-word keys, and a one-word key, find
566 * the entry with a matching key.
569 * The return value is a token for the matching entry in the
570 * hash table, or NULL if there was no matching entry.
575 *----------------------------------------------------------------------
578 static Tcl_HashEntry *
580 Tcl_HashTable *tablePtr, /* Table in which to lookup entry. */
581 char *key /* Key to use to find matching entry. */
587 index = RANDOM_INDEX(tablePtr, key);
590 * Search all of the entries in the appropriate bucket.
593 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
594 hPtr = hPtr->nextPtr) {
595 if (hPtr->key.oneWordValue == key) {
603 *----------------------------------------------------------------------
607 * Given a hash table with one-word keys, and a one-word key, find
608 * the entry with a matching key. If there is no matching entry,
609 * then create a new entry that does match.
612 * The return value is a pointer to the matching entry. If this
613 * is a newly-created entry, then *newPtr will be set to a non-zero
614 * value; otherwise *newPtr will be set to 0. If this is a new
615 * entry the value stored in the entry will initially be 0.
618 * A new entry may be added to the hash table.
620 *----------------------------------------------------------------------
623 static Tcl_HashEntry *
625 Tcl_HashTable *tablePtr, /* Table in which to lookup entry. */
626 char *key, /* Key to use to find or create matching
628 int *newPtr /* Store info here telling whether a new
629 * entry was created. */
635 index = RANDOM_INDEX(tablePtr, key);
638 * Search all of the entries in this bucket.
641 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
642 hPtr = hPtr->nextPtr) {
643 if (hPtr->key.oneWordValue == key) {
650 * Entry not found. Add a new one to the bucket.
654 hPtr = (Tcl_HashEntry *) ckalloc(sizeof(Tcl_HashEntry));
655 hPtr->tablePtr = tablePtr;
656 hPtr->bucketPtr = &(tablePtr->buckets[index]);
657 hPtr->nextPtr = *hPtr->bucketPtr;
658 hPtr->clientData = 0;
659 hPtr->key.oneWordValue = key;
660 *hPtr->bucketPtr = hPtr;
661 tablePtr->numEntries++;
664 * If the table has exceeded a decent size, rebuild it with many
668 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
669 RebuildTable(tablePtr);
675 *----------------------------------------------------------------------
679 * Given a hash table with array-of-int keys, and a key, find
680 * the entry with a matching key.
683 * The return value is a token for the matching entry in the
684 * hash table, or NULL if there was no matching entry.
689 *----------------------------------------------------------------------
692 static Tcl_HashEntry *
694 Tcl_HashTable *tablePtr, /* Table in which to lookup entry. */
695 char *key /* Key to use to find matching entry. */
699 int *arrayPtr = (int *) key;
703 for (index = 0, count = tablePtr->keyType, iPtr1 = arrayPtr;
704 count > 0; count--, iPtr1++) {
707 index = RANDOM_INDEX(tablePtr, index);
710 * Search all of the entries in the appropriate bucket.
713 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
714 hPtr = hPtr->nextPtr) {
715 for (iPtr1 = arrayPtr, iPtr2 = hPtr->key.words,
716 count = tablePtr->keyType; ; count--, iPtr1++, iPtr2++) {
720 if (*iPtr1 != *iPtr2) {
729 *----------------------------------------------------------------------
733 * Given a hash table with one-word keys, and a one-word key, find
734 * the entry with a matching key. If there is no matching entry,
735 * then create a new entry that does match.
738 * The return value is a pointer to the matching entry. If this
739 * is a newly-created entry, then *newPtr will be set to a non-zero
740 * value; otherwise *newPtr will be set to 0. If this is a new
741 * entry the value stored in the entry will initially be 0.
744 * A new entry may be added to the hash table.
746 *----------------------------------------------------------------------
749 static Tcl_HashEntry *
751 Tcl_HashTable *tablePtr, /* Table in which to lookup entry. */
752 char *key, /* Key to use to find or create matching
754 int *newPtr /* Store info here telling whether a new
755 * entry was created. */
759 int *arrayPtr = (int *) key;
763 for (index = 0, count = tablePtr->keyType, iPtr1 = arrayPtr;
764 count > 0; count--, iPtr1++) {
767 index = RANDOM_INDEX(tablePtr, index);
770 * Search all of the entries in the appropriate bucket.
773 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
774 hPtr = hPtr->nextPtr) {
775 for (iPtr1 = arrayPtr, iPtr2 = hPtr->key.words,
776 count = tablePtr->keyType; ; count--, iPtr1++, iPtr2++) {
781 if (*iPtr1 != *iPtr2) {
788 * Entry not found. Add a new one to the bucket.
792 hPtr = (Tcl_HashEntry *) ckalloc((unsigned) (sizeof(Tcl_HashEntry)
793 + (tablePtr->keyType*sizeof(int)) - 4));
794 hPtr->tablePtr = tablePtr;
795 hPtr->bucketPtr = &(tablePtr->buckets[index]);
796 hPtr->nextPtr = *hPtr->bucketPtr;
797 hPtr->clientData = 0;
798 for (iPtr1 = arrayPtr, iPtr2 = hPtr->key.words, count = tablePtr->keyType;
799 count > 0; count--, iPtr1++, iPtr2++) {
802 *hPtr->bucketPtr = hPtr;
803 tablePtr->numEntries++;
806 * If the table has exceeded a decent size, rebuild it with many
810 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
811 RebuildTable(tablePtr);
817 *----------------------------------------------------------------------
821 * This procedure is invoked when an Tcl_FindHashEntry is called
822 * on a table that has been deleted.
825 * If panic returns (which it shouldn't) this procedure returns
831 *----------------------------------------------------------------------
835 static Tcl_HashEntry *
837 Tcl_HashTable *tablePtr, /* Table in which to lookup entry. */
838 char *key /* Key to use to find matching entry. */
841 panic("called Tcl_FindHashEntry on deleted table");
846 *----------------------------------------------------------------------
850 * This procedure is invoked when an Tcl_CreateHashEntry is called
851 * on a table that has been deleted.
854 * If panic returns (which it shouldn't) this procedure returns
860 *----------------------------------------------------------------------
864 static Tcl_HashEntry *
866 Tcl_HashTable *tablePtr, /* Table in which to lookup entry. */
867 char *key, /* Key to use to find or create matching
869 int *newPtr /* Store info here telling whether a new
870 * entry was created. */
873 panic("called Tcl_CreateHashEntry on deleted table");
878 *----------------------------------------------------------------------
882 * This procedure is invoked when the ratio of entries to hash
883 * buckets becomes too large. It creates a new table with a
884 * larger bucket array and moves all of the entries into the
891 * Memory gets reallocated and entries get re-hashed to new
894 *----------------------------------------------------------------------
899 Tcl_HashTable *tablePtr /* Table to enlarge. */
902 int oldSize, count, index;
903 Tcl_HashEntry **oldBuckets;
904 Tcl_HashEntry **oldChainPtr, **newChainPtr;
907 oldSize = tablePtr->numBuckets;
908 oldBuckets = tablePtr->buckets;
911 * Allocate and initialize the new bucket array, and set up
912 * hashing constants for new array size.
915 tablePtr->numBuckets *= 4;
916 tablePtr->buckets = (Tcl_HashEntry **) ckalloc((unsigned)
917 (tablePtr->numBuckets * sizeof(Tcl_HashEntry *)));
918 for (count = tablePtr->numBuckets, newChainPtr = tablePtr->buckets;
919 count > 0; count--, newChainPtr++) {
922 tablePtr->rebuildSize *= 4;
923 tablePtr->downShift -= 2;
924 tablePtr->mask = (tablePtr->mask << 2) + 3;
927 * Rehash all of the existing entries into the new bucket array.
930 for (oldChainPtr = oldBuckets; oldSize > 0; oldSize--, oldChainPtr++) {
931 for (hPtr = *oldChainPtr; hPtr != NULL; hPtr = *oldChainPtr) {
932 *oldChainPtr = hPtr->nextPtr;
933 if (tablePtr->keyType == TCL_STRING_KEYS) {
934 index = HashString(hPtr->key.string) & tablePtr->mask;
935 } else if (tablePtr->keyType == TCL_ONE_WORD_KEYS) {
936 index = RANDOM_INDEX(tablePtr, hPtr->key.oneWordValue);
941 for (index = 0, count = tablePtr->keyType,
942 iPtr = hPtr->key.words; count > 0; count--, iPtr++) {
945 index = RANDOM_INDEX(tablePtr, index);
947 hPtr->bucketPtr = &(tablePtr->buckets[index]);
948 hPtr->nextPtr = *hPtr->bucketPtr;
949 *hPtr->bucketPtr = hPtr;
954 * Free up the old bucket array, if it was dynamically allocated.
957 if (oldBuckets != tablePtr->staticBuckets) {
958 ckfree((char *) oldBuckets);