1 /* $XConsortium: tclHash.c /main/2 1996/08/08 14:44:13 cde-hp $ */
5 * Implementation of in-memory hash tables for Tcl and Tcl-based
8 * Copyright (c) 1991-1993 The Regents of the University of California.
9 * Copyright (c) 1994 Sun Microsystems, Inc.
11 * See the file "license.terms" for information on usage and redistribution
12 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
14 * SCCS: @(#) tclHash.c 1.15 96/02/15 11:50:23
20 * When there are this many entries per bucket, on average, rebuild
21 * the hash table to make it larger.
24 #define REBUILD_MULTIPLIER 3
28 * The following macro takes a preliminary integer hash value and
29 * produces an index into a hash tables bucket list. The idea is
30 * to make it so that preliminary values that are arbitrarily similar
31 * will end up in different buckets. The hash function was taken
32 * from a random-number generator.
35 #define RANDOM_INDEX(tablePtr, i) \
36 (((((long) (i))*1103515245) >> (tablePtr)->downShift) & (tablePtr)->mask)
39 * Procedure prototypes for static procedures in this file:
42 static Tcl_HashEntry * ArrayFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
44 static Tcl_HashEntry * ArrayCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
45 char *key, int *newPtr));
46 static Tcl_HashEntry * BogusFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
48 static Tcl_HashEntry * BogusCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
49 char *key, int *newPtr));
50 static unsigned int HashString _ANSI_ARGS_((char *string));
51 static void RebuildTable _ANSI_ARGS_((Tcl_HashTable *tablePtr));
52 static Tcl_HashEntry * StringFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
54 static Tcl_HashEntry * StringCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
55 char *key, int *newPtr));
56 static Tcl_HashEntry * OneWordFind _ANSI_ARGS_((Tcl_HashTable *tablePtr,
58 static Tcl_HashEntry * OneWordCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr,
59 char *key, int *newPtr));
62 *----------------------------------------------------------------------
64 * Tcl_InitHashTable --
66 * Given storage for a hash table, set up the fields to prepare
67 * the hash table for use.
73 * TablePtr is now ready to be passed to Tcl_FindHashEntry and
74 * Tcl_CreateHashEntry.
76 *----------------------------------------------------------------------
80 Tcl_InitHashTable(tablePtr, keyType)
81 register Tcl_HashTable *tablePtr; /* Pointer to table record, which
82 * is supplied by the caller. */
83 int keyType; /* Type of keys to use in table:
84 * TCL_STRING_KEYS, TCL_ONE_WORD_KEYS,
85 * or an integer >= 2. */
87 tablePtr->buckets = tablePtr->staticBuckets;
88 tablePtr->staticBuckets[0] = tablePtr->staticBuckets[1] = 0;
89 tablePtr->staticBuckets[2] = tablePtr->staticBuckets[3] = 0;
90 tablePtr->numBuckets = TCL_SMALL_HASH_TABLE;
91 tablePtr->numEntries = 0;
92 tablePtr->rebuildSize = TCL_SMALL_HASH_TABLE*REBUILD_MULTIPLIER;
93 tablePtr->downShift = 28;
95 tablePtr->keyType = keyType;
96 if (keyType == TCL_STRING_KEYS) {
97 tablePtr->findProc = StringFind;
98 tablePtr->createProc = StringCreate;
99 } else if (keyType == TCL_ONE_WORD_KEYS) {
100 tablePtr->findProc = OneWordFind;
101 tablePtr->createProc = OneWordCreate;
103 tablePtr->findProc = ArrayFind;
104 tablePtr->createProc = ArrayCreate;
109 *----------------------------------------------------------------------
111 * Tcl_DeleteHashEntry --
113 * Remove a single entry from a hash table.
119 * The entry given by entryPtr is deleted from its table and
120 * should never again be used by the caller. It is up to the
121 * caller to free the clientData field of the entry, if that
124 *----------------------------------------------------------------------
128 Tcl_DeleteHashEntry(entryPtr)
129 Tcl_HashEntry *entryPtr;
131 register Tcl_HashEntry *prevPtr;
133 if (*entryPtr->bucketPtr == entryPtr) {
134 *entryPtr->bucketPtr = entryPtr->nextPtr;
136 for (prevPtr = *entryPtr->bucketPtr; ; prevPtr = prevPtr->nextPtr) {
137 if (prevPtr == NULL) {
138 panic("malformed bucket chain in Tcl_DeleteHashEntry");
140 if (prevPtr->nextPtr == entryPtr) {
141 prevPtr->nextPtr = entryPtr->nextPtr;
146 entryPtr->tablePtr->numEntries--;
147 ckfree((char *) entryPtr);
151 *----------------------------------------------------------------------
153 * Tcl_DeleteHashTable --
155 * Free up everything associated with a hash table except for
156 * the record for the table itself.
162 * The hash table is no longer useable.
164 *----------------------------------------------------------------------
168 Tcl_DeleteHashTable(tablePtr)
169 register Tcl_HashTable *tablePtr; /* Table to delete. */
171 register Tcl_HashEntry *hPtr, *nextPtr;
175 * Free up all the entries in the table.
178 for (i = 0; i < tablePtr->numBuckets; i++) {
179 hPtr = tablePtr->buckets[i];
180 while (hPtr != NULL) {
181 nextPtr = hPtr->nextPtr;
182 ckfree((char *) hPtr);
188 * Free up the bucket array, if it was dynamically allocated.
191 if (tablePtr->buckets != tablePtr->staticBuckets) {
192 ckfree((char *) tablePtr->buckets);
196 * Arrange for panics if the table is used again without
200 tablePtr->findProc = BogusFind;
201 tablePtr->createProc = BogusCreate;
205 *----------------------------------------------------------------------
207 * Tcl_FirstHashEntry --
209 * Locate the first entry in a hash table and set up a record
210 * that can be used to step through all the remaining entries
214 * The return value is a pointer to the first entry in tablePtr,
215 * or NULL if tablePtr has no entries in it. The memory at
216 * *searchPtr is initialized so that subsequent calls to
217 * Tcl_NextHashEntry will return all of the entries in the table,
223 *----------------------------------------------------------------------
227 Tcl_FirstHashEntry(tablePtr, searchPtr)
228 Tcl_HashTable *tablePtr; /* Table to search. */
229 Tcl_HashSearch *searchPtr; /* Place to store information about
230 * progress through the table. */
232 searchPtr->tablePtr = tablePtr;
233 searchPtr->nextIndex = 0;
234 searchPtr->nextEntryPtr = NULL;
235 return Tcl_NextHashEntry(searchPtr);
239 *----------------------------------------------------------------------
241 * Tcl_NextHashEntry --
243 * Once a hash table enumeration has been initiated by calling
244 * Tcl_FirstHashEntry, this procedure may be called to return
245 * successive elements of the table.
248 * The return value is the next entry in the hash table being
249 * enumerated, or NULL if the end of the table is reached.
254 *----------------------------------------------------------------------
258 Tcl_NextHashEntry(searchPtr)
259 register Tcl_HashSearch *searchPtr; /* Place to store information about
260 * progress through the table. Must
261 * have been initialized by calling
262 * Tcl_FirstHashEntry. */
266 while (searchPtr->nextEntryPtr == NULL) {
267 if (searchPtr->nextIndex >= searchPtr->tablePtr->numBuckets) {
270 searchPtr->nextEntryPtr =
271 searchPtr->tablePtr->buckets[searchPtr->nextIndex];
272 searchPtr->nextIndex++;
274 hPtr = searchPtr->nextEntryPtr;
275 searchPtr->nextEntryPtr = hPtr->nextPtr;
280 *----------------------------------------------------------------------
284 * Return statistics describing the layout of the hash table
285 * in its hash buckets.
288 * The return value is a malloc-ed string containing information
289 * about tablePtr. It is the caller's responsibility to free
295 *----------------------------------------------------------------------
299 Tcl_HashStats(tablePtr)
300 Tcl_HashTable *tablePtr; /* Table for which to produce stats. */
302 #define NUM_COUNTERS 10
303 int count[NUM_COUNTERS], overflow, i, j;
305 register Tcl_HashEntry *hPtr;
309 * Compute a histogram of bucket usage.
312 for (i = 0; i < NUM_COUNTERS; i++) {
317 for (i = 0; i < tablePtr->numBuckets; i++) {
319 for (hPtr = tablePtr->buckets[i]; hPtr != NULL; hPtr = hPtr->nextPtr) {
322 if (j < NUM_COUNTERS) {
328 average += (tmp+1.0)*(tmp/tablePtr->numEntries)/2.0;
332 * Print out the histogram and a few other pieces of information.
335 result = (char *) ckalloc((unsigned) ((NUM_COUNTERS*60) + 300));
336 sprintf(result, "%d entries in table, %d buckets\n",
337 tablePtr->numEntries, tablePtr->numBuckets);
338 p = result + strlen(result);
339 for (i = 0; i < NUM_COUNTERS; i++) {
340 sprintf(p, "number of buckets with %d entries: %d\n",
344 sprintf(p, "number of buckets with %d or more entries: %d\n",
345 NUM_COUNTERS, overflow);
347 sprintf(p, "average search distance for entry: %.1f", average);
352 *----------------------------------------------------------------------
356 * Compute a one-word summary of a text string, which can be
357 * used to generate a hash index.
360 * The return value is a one-word summary of the information in
366 *----------------------------------------------------------------------
371 register char *string; /* String from which to compute hash value. */
373 register unsigned int result;
377 * I tried a zillion different hash functions and asked many other
378 * people for advice. Many people had their own favorite functions,
379 * all different, but no-one had much idea why they were good ones.
380 * I chose the one below (multiply by 9 and add new character)
381 * because of the following reasons:
383 * 1. Multiplying by 10 is perfect for keys that are decimal strings,
384 * and multiplying by 9 is just about as good.
385 * 2. Times-9 is (shift-left-3) plus (old). This means that each
386 * character's bits hang around in the low-order bits of the
387 * hash value for ever, plus they spread fairly rapidly up to
388 * the high-order bits to fill out the hash value. This seems
389 * works well both for decimal and non-decimal strings.
399 result += (result<<3) + c;
405 *----------------------------------------------------------------------
409 * Given a hash table with string keys, and a string key, find
410 * the entry with a matching key.
413 * The return value is a token for the matching entry in the
414 * hash table, or NULL if there was no matching entry.
419 *----------------------------------------------------------------------
422 static Tcl_HashEntry *
423 StringFind(tablePtr, key)
424 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
425 char *key; /* Key to use to find matching entry. */
427 register Tcl_HashEntry *hPtr;
428 register char *p1, *p2;
431 index = HashString(key) & tablePtr->mask;
434 * Search all of the entries in the appropriate bucket.
437 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
438 hPtr = hPtr->nextPtr) {
439 for (p1 = key, p2 = hPtr->key.string; ; p1++, p2++) {
452 *----------------------------------------------------------------------
456 * Given a hash table with string keys, and a string key, find
457 * the entry with a matching key. If there is no matching entry,
458 * then create a new entry that does match.
461 * The return value is a pointer to the matching entry. If this
462 * is a newly-created entry, then *newPtr will be set to a non-zero
463 * value; otherwise *newPtr will be set to 0. If this is a new
464 * entry the value stored in the entry will initially be 0.
467 * A new entry may be added to the hash table.
469 *----------------------------------------------------------------------
472 static Tcl_HashEntry *
473 StringCreate(tablePtr, key, newPtr)
474 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
475 char *key; /* Key to use to find or create matching
477 int *newPtr; /* Store info here telling whether a new
478 * entry was created. */
480 register Tcl_HashEntry *hPtr;
481 register char *p1, *p2;
484 index = HashString(key) & tablePtr->mask;
487 * Search all of the entries in this bucket.
490 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
491 hPtr = hPtr->nextPtr) {
492 for (p1 = key, p2 = hPtr->key.string; ; p1++, p2++) {
504 * Entry not found. Add a new one to the bucket.
508 hPtr = (Tcl_HashEntry *) ckalloc((unsigned)
509 (sizeof(Tcl_HashEntry) + strlen(key) - (sizeof(hPtr->key) -1)));
510 hPtr->tablePtr = tablePtr;
511 hPtr->bucketPtr = &(tablePtr->buckets[index]);
512 hPtr->nextPtr = *hPtr->bucketPtr;
513 hPtr->clientData = 0;
514 strcpy(hPtr->key.string, key);
515 *hPtr->bucketPtr = hPtr;
516 tablePtr->numEntries++;
519 * If the table has exceeded a decent size, rebuild it with many
523 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
524 RebuildTable(tablePtr);
530 *----------------------------------------------------------------------
534 * Given a hash table with one-word keys, and a one-word key, find
535 * the entry with a matching key.
538 * The return value is a token for the matching entry in the
539 * hash table, or NULL if there was no matching entry.
544 *----------------------------------------------------------------------
547 static Tcl_HashEntry *
548 OneWordFind(tablePtr, key)
549 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
550 register char *key; /* Key to use to find matching entry. */
552 register Tcl_HashEntry *hPtr;
555 index = RANDOM_INDEX(tablePtr, key);
558 * Search all of the entries in the appropriate bucket.
561 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
562 hPtr = hPtr->nextPtr) {
563 if (hPtr->key.oneWordValue == key) {
571 *----------------------------------------------------------------------
575 * Given a hash table with one-word keys, and a one-word key, find
576 * the entry with a matching key. If there is no matching entry,
577 * then create a new entry that does match.
580 * The return value is a pointer to the matching entry. If this
581 * is a newly-created entry, then *newPtr will be set to a non-zero
582 * value; otherwise *newPtr will be set to 0. If this is a new
583 * entry the value stored in the entry will initially be 0.
586 * A new entry may be added to the hash table.
588 *----------------------------------------------------------------------
591 static Tcl_HashEntry *
592 OneWordCreate(tablePtr, key, newPtr)
593 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
594 register char *key; /* Key to use to find or create matching
596 int *newPtr; /* Store info here telling whether a new
597 * entry was created. */
599 register Tcl_HashEntry *hPtr;
602 index = RANDOM_INDEX(tablePtr, key);
605 * Search all of the entries in this bucket.
608 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
609 hPtr = hPtr->nextPtr) {
610 if (hPtr->key.oneWordValue == key) {
617 * Entry not found. Add a new one to the bucket.
621 hPtr = (Tcl_HashEntry *) ckalloc(sizeof(Tcl_HashEntry));
622 hPtr->tablePtr = tablePtr;
623 hPtr->bucketPtr = &(tablePtr->buckets[index]);
624 hPtr->nextPtr = *hPtr->bucketPtr;
625 hPtr->clientData = 0;
626 hPtr->key.oneWordValue = key;
627 *hPtr->bucketPtr = hPtr;
628 tablePtr->numEntries++;
631 * If the table has exceeded a decent size, rebuild it with many
635 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
636 RebuildTable(tablePtr);
642 *----------------------------------------------------------------------
646 * Given a hash table with array-of-int keys, and a key, find
647 * the entry with a matching key.
650 * The return value is a token for the matching entry in the
651 * hash table, or NULL if there was no matching entry.
656 *----------------------------------------------------------------------
659 static Tcl_HashEntry *
660 ArrayFind(tablePtr, key)
661 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
662 char *key; /* Key to use to find matching entry. */
664 register Tcl_HashEntry *hPtr;
665 int *arrayPtr = (int *) key;
666 register int *iPtr1, *iPtr2;
669 for (index = 0, count = tablePtr->keyType, iPtr1 = arrayPtr;
670 count > 0; count--, iPtr1++) {
673 index = RANDOM_INDEX(tablePtr, index);
676 * Search all of the entries in the appropriate bucket.
679 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
680 hPtr = hPtr->nextPtr) {
681 for (iPtr1 = arrayPtr, iPtr2 = hPtr->key.words,
682 count = tablePtr->keyType; ; count--, iPtr1++, iPtr2++) {
686 if (*iPtr1 != *iPtr2) {
695 *----------------------------------------------------------------------
699 * Given a hash table with one-word keys, and a one-word key, find
700 * the entry with a matching key. If there is no matching entry,
701 * then create a new entry that does match.
704 * The return value is a pointer to the matching entry. If this
705 * is a newly-created entry, then *newPtr will be set to a non-zero
706 * value; otherwise *newPtr will be set to 0. If this is a new
707 * entry the value stored in the entry will initially be 0.
710 * A new entry may be added to the hash table.
712 *----------------------------------------------------------------------
715 static Tcl_HashEntry *
716 ArrayCreate(tablePtr, key, newPtr)
717 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
718 register char *key; /* Key to use to find or create matching
720 int *newPtr; /* Store info here telling whether a new
721 * entry was created. */
723 register Tcl_HashEntry *hPtr;
724 int *arrayPtr = (int *) key;
725 register int *iPtr1, *iPtr2;
728 for (index = 0, count = tablePtr->keyType, iPtr1 = arrayPtr;
729 count > 0; count--, iPtr1++) {
732 index = RANDOM_INDEX(tablePtr, index);
735 * Search all of the entries in the appropriate bucket.
738 for (hPtr = tablePtr->buckets[index]; hPtr != NULL;
739 hPtr = hPtr->nextPtr) {
740 for (iPtr1 = arrayPtr, iPtr2 = hPtr->key.words,
741 count = tablePtr->keyType; ; count--, iPtr1++, iPtr2++) {
746 if (*iPtr1 != *iPtr2) {
753 * Entry not found. Add a new one to the bucket.
757 hPtr = (Tcl_HashEntry *) ckalloc((unsigned) (sizeof(Tcl_HashEntry)
758 + (tablePtr->keyType*sizeof(int)) - 4));
759 hPtr->tablePtr = tablePtr;
760 hPtr->bucketPtr = &(tablePtr->buckets[index]);
761 hPtr->nextPtr = *hPtr->bucketPtr;
762 hPtr->clientData = 0;
763 for (iPtr1 = arrayPtr, iPtr2 = hPtr->key.words, count = tablePtr->keyType;
764 count > 0; count--, iPtr1++, iPtr2++) {
767 *hPtr->bucketPtr = hPtr;
768 tablePtr->numEntries++;
771 * If the table has exceeded a decent size, rebuild it with many
775 if (tablePtr->numEntries >= tablePtr->rebuildSize) {
776 RebuildTable(tablePtr);
782 *----------------------------------------------------------------------
786 * This procedure is invoked when an Tcl_FindHashEntry is called
787 * on a table that has been deleted.
790 * If panic returns (which it shouldn't) this procedure returns
796 *----------------------------------------------------------------------
800 static Tcl_HashEntry *
801 BogusFind(tablePtr, key)
802 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
803 char *key; /* Key to use to find matching entry. */
805 panic("called Tcl_FindHashEntry on deleted table");
810 *----------------------------------------------------------------------
814 * This procedure is invoked when an Tcl_CreateHashEntry is called
815 * on a table that has been deleted.
818 * If panic returns (which it shouldn't) this procedure returns
824 *----------------------------------------------------------------------
828 static Tcl_HashEntry *
829 BogusCreate(tablePtr, key, newPtr)
830 Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */
831 char *key; /* Key to use to find or create matching
833 int *newPtr; /* Store info here telling whether a new
834 * entry was created. */
836 panic("called Tcl_CreateHashEntry on deleted table");
841 *----------------------------------------------------------------------
845 * This procedure is invoked when the ratio of entries to hash
846 * buckets becomes too large. It creates a new table with a
847 * larger bucket array and moves all of the entries into the
854 * Memory gets reallocated and entries get re-hashed to new
857 *----------------------------------------------------------------------
861 RebuildTable(tablePtr)
862 register Tcl_HashTable *tablePtr; /* Table to enlarge. */
864 int oldSize, count, index;
865 Tcl_HashEntry **oldBuckets;
866 register Tcl_HashEntry **oldChainPtr, **newChainPtr;
867 register Tcl_HashEntry *hPtr;
869 oldSize = tablePtr->numBuckets;
870 oldBuckets = tablePtr->buckets;
873 * Allocate and initialize the new bucket array, and set up
874 * hashing constants for new array size.
877 tablePtr->numBuckets *= 4;
878 tablePtr->buckets = (Tcl_HashEntry **) ckalloc((unsigned)
879 (tablePtr->numBuckets * sizeof(Tcl_HashEntry *)));
880 for (count = tablePtr->numBuckets, newChainPtr = tablePtr->buckets;
881 count > 0; count--, newChainPtr++) {
884 tablePtr->rebuildSize *= 4;
885 tablePtr->downShift -= 2;
886 tablePtr->mask = (tablePtr->mask << 2) + 3;
889 * Rehash all of the existing entries into the new bucket array.
892 for (oldChainPtr = oldBuckets; oldSize > 0; oldSize--, oldChainPtr++) {
893 for (hPtr = *oldChainPtr; hPtr != NULL; hPtr = *oldChainPtr) {
894 *oldChainPtr = hPtr->nextPtr;
895 if (tablePtr->keyType == TCL_STRING_KEYS) {
896 index = HashString(hPtr->key.string) & tablePtr->mask;
897 } else if (tablePtr->keyType == TCL_ONE_WORD_KEYS) {
898 index = RANDOM_INDEX(tablePtr, hPtr->key.oneWordValue);
903 for (index = 0, count = tablePtr->keyType,
904 iPtr = hPtr->key.words; count > 0; count--, iPtr++) {
907 index = RANDOM_INDEX(tablePtr, index);
909 hPtr->bucketPtr = &(tablePtr->buckets[index]);
910 hPtr->nextPtr = *hPtr->bucketPtr;
911 *hPtr->bucketPtr = hPtr;
916 * Free up the old bucket array, if it was dynamically allocated.
919 if (oldBuckets != tablePtr->staticBuckets) {
920 ckfree((char *) oldBuckets);