rpm  5.2.1
rpmhash.c
Go to the documentation of this file.
1 
6 #include "system.h"
7 #include <rpmiotypes.h>
8 #include <rpmio.h>
9 #include <rpmhash.h>
10 #include "debug.h"
11 
12 /*@unchecked@*/
13 int _ht_debug = 0;
14 
15 typedef /*@owned@*/ const void * voidptr;
16 
17 typedef struct hashBucket_s * hashBucket;
18 
21 struct hashBucket_s {
23 /*@owned@*/ voidptr * data;
24  int dataCount;
25 /*@dependent@*/hashBucket next;
26 };
27 
30 struct hashTable_s {
31  struct rpmioItem_s _item;
32  int numBuckets;
33  size_t keySize;
34  int freeData;
35  hashBucket * buckets;
36 /*@relnull@*/
38 /*@relnull@*/
40 #if defined(__LCLINT__)
41 /*@refs@*/
42  int nrefs;
43 #endif
44 };
45 
52 static /*@shared@*/ /*@null@*/
53 hashBucket findEntry(hashTable ht, const void * key)
54  /*@*/
55 {
56  rpmuint32_t hash = 0;
57  hashBucket b;
58 
59  /*@-modunconnomods@*/
60  hash = ht->fn(hash, key, 0) % ht->numBuckets;
61  b = ht->buckets[hash];
62 
63  while (b && b->key && ht->eq(b->key, key))
64  b = b->next;
65  /*@=modunconnomods@*/
66 
67  return b;
68 }
69 
70 int hashEqualityString(const void * key1, const void * key2)
71 {
72  const char *k1 = (const char *)key1;
73  const char *k2 = (const char *)key2;
74  return strcmp(k1, k2);
75 }
76 
77 rpmuint32_t hashFunctionString(rpmuint32_t h, const void * data, size_t size)
78 {
79  const char *key = data;
80 
81  if (size == 0)
82  size = strlen(key);
83 
84  /*
85  * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
86  *
87  * This is Daniel J. Bernstein's popular `times 33' hash function as
88  * posted by him years ago on comp.lang.c. It basically uses a function
89  * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
90  * known hash functions for strings. Because it is both computed very
91  * fast and distributes very well.
92  *
93  * The magic of number 33, i.e. why it works better than many other
94  * constants, prime or not, has never been adequately explained by
95  * anyone. So I try an explanation: if one experimentally tests all
96  * multipliers between 1 and 256 (as RSE did now) one detects that even
97  * numbers are not useable at all. The remaining 128 odd numbers
98  * (except for the number 1) work more or less all equally well. They
99  * all distribute in an acceptable way and this way fill a hash table
100  * with an average percent of approx. 86%.
101  *
102  * If one compares the Chi^2 values of the variants, the number 33 not
103  * even has the best value. But the number 33 and a few other equally
104  * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
105  * advantage to the remaining numbers in the large set of possible
106  * multipliers: their multiply operation can be replaced by a faster
107  * operation based on just one shift plus either a single addition
108  * or subtraction operation. And because a hash function has to both
109  * distribute good _and_ has to be very fast to compute, those few
110  * numbers should be preferred and seems to be the reason why Daniel J.
111  * Bernstein also preferred it.
112  *
113  * Below you can find the variant implemented with the
114  * multiplication optimized via bit shifts and hash unrolled eight
115  * times for speed.
116  * -- Ralf S. Engelschall <rse@engelschall.com>
117  */
118  if (h == 0)
119  h = 5381;
120  for (; size >= 8; size -= 8) {
121  h = ((h << 5) + h) + (rpmuint32_t)*key++;
122  h = ((h << 5) + h) + (rpmuint32_t)*key++;
123  h = ((h << 5) + h) + (rpmuint32_t)*key++;
124  h = ((h << 5) + h) + (rpmuint32_t)*key++;
125  h = ((h << 5) + h) + (rpmuint32_t)*key++;
126  h = ((h << 5) + h) + (rpmuint32_t)*key++;
127  h = ((h << 5) + h) + (rpmuint32_t)*key++;
128  h = ((h << 5) + h) + (rpmuint32_t)*key++;
129  }
130  switch (size) {
131  case 7: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
132  case 6: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
133  case 5: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
134  case 4: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
135  case 3: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
136  case 2: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
137  case 1: h = ((h << 5) + h) + (rpmuint32_t)*key++; break;
138  default: /* case 0: */ break;
139  }
140 
141  return h;
142 }
143 
144 void htAddEntry(hashTable ht, const void * key, const void * data)
145 {
146  rpmuint32_t hash = 0;
147  hashBucket b;
148 
149  hash = ht->fn(hash, key, 0) % ht->numBuckets;
150  b = ht->buckets[hash];
151 
152  while (b && b->key && ht->eq(b->key, key))
153  b = b->next;
154 
155  if (b == NULL) {
156  b = xmalloc(sizeof(*b));
157  if (ht->keySize) {
158  char *k = xmalloc(ht->keySize);
159  memcpy(k, key, ht->keySize);
160  b->key = k;
161  } else {
162  b->key = key;
163  }
164  b->dataCount = 0;
165  b->next = ht->buckets[hash];
166  b->data = NULL;
167  ht->buckets[hash] = b;
168  }
169 
170  b->data = xrealloc(b->data, sizeof(*b->data) * (b->dataCount + 1));
171  b->data[b->dataCount++] = data;
172 }
173 
174 int htHasEntry(hashTable ht, const void * key)
175 {
176  hashBucket b;
177 
178  if (!(b = findEntry(ht, key))) return 0; else return 1;
179 }
180 
181 int htGetEntry(hashTable ht, const void * key, const void * data,
182  int * dataCount, const void * tableKey)
183 {
184  hashBucket b;
185 
186  if ((b = findEntry(ht, key)) == NULL)
187  return 1;
188 
189  if (data)
190  *(const void ***)data = (const void **) b->data;
191  if (dataCount)
192  *dataCount = b->dataCount;
193  if (tableKey)
194  *(const void **)tableKey = b->key;
195 
196  return 0;
197 }
198 
199 const void ** htGetKeys(hashTable ht)
200 {
201  const void ** keys = xcalloc(ht->numBuckets+1, sizeof(const void*));
202  const void ** keypointer = keys;
203  hashBucket b, n;
204  int i;
205 
206  for (i = 0; i < ht->numBuckets; i++) {
207  b = ht->buckets[i];
208  if (b == NULL)
209  continue;
210  if (b->data)
211  *(keys++) = b->key;
212  do {
213  n = b->next;
214  if(n != NULL)
215  *(keys++) = n->key;
216  } while ((b = n) != NULL);
217  }
218 
219  return keypointer;
220 }
221 
222 /*@-mustmod@*/ /* XXX splint on crack */
223 static void htFini(void * _ht)
224  /*@modifies _ht @*/
225 {
226  hashTable ht = _ht;
227  hashBucket b, n;
228  int i;
229 
230  for (i = 0; i < ht->numBuckets; i++) {
231  b = ht->buckets[i];
232  if (b == NULL)
233  continue;
234  ht->buckets[i] = NULL;
235  if (ht->keySize > 0)
236  b->key = _free(b->key);
237  do {
238  n = b->next;
239  if (b->data) {
240  if (ht->freeData)
241  *b->data = _free(*b->data);
242  b->data = _free(b->data);
243  }
244  b = _free(b);
245  } while ((b = n) != NULL);
246  }
247 
248  ht->buckets = _free(ht->buckets);
249 }
250 /*@=mustmod@*/
251 
252 /*@unchecked@*/ /*@only@*/ /*@null@*/
254 
255 static hashTable htGetPool(/*@null@*/ rpmioPool pool)
256  /*@globals _htPool, fileSystem @*/
257  /*@modifies pool, _htPool, fileSystem @*/
258 {
259  hashTable ht;
260 
261  if (_htPool == NULL) {
262  _htPool = rpmioNewPool("ht", sizeof(*ht), -1, _ht_debug,
263  NULL, NULL, htFini);
264  pool = _htPool;
265  }
266  return (hashTable) rpmioGetPool(pool, sizeof(*ht));
267 }
268 
269 hashTable htCreate(int numBuckets, size_t keySize, int freeData,
271 {
272  hashTable ht = htGetPool(_htPool);
273 
274  ht->numBuckets = numBuckets;
275  ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
276  ht->keySize = keySize;
277  ht->freeData = freeData;
278  /*@-assignexpose@*/
279  ht->fn = (fn != NULL ? fn : hashFunctionString);
280  ht->eq = (eq != NULL ? eq : hashEqualityString);
281  /*@=assignexpose@*/
282 
283  return htLink(ht);
284 }