GCS  0.2.3
gu_mmh3.h
1 // Copyright (C) 2012 Codership Oy <info@codership.com>
2 
12 #ifndef _gu_mmh3_h_
13 #define _gu_mmh3_h_
14 
15 #include "gu_byteswap.h"
16 
17 #include <string.h> // for memset() and memcpy()
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 //-----------------------------------------------------------------------------
24 // Finalization mix - force all bits of a hash block to avalanche
25 
26 static GU_FORCE_INLINE uint32_t _mmh3_fmix32 (uint32_t h)
27 {
28  h ^= h >> 16;
29  h *= 0x85ebca6b;
30  h ^= h >> 13;
31  h *= 0xc2b2ae35;
32  h ^= h >> 16;
33 
34  return h;
35 }
36 
37 static GU_FORCE_INLINE uint64_t _mmh3_fmix64 (uint64_t k)
38 {
39  k ^= k >> 33;
40  k *= GU_ULONG_LONG(0xff51afd7ed558ccd);
41  k ^= k >> 33;
42  k *= GU_ULONG_LONG(0xc4ceb9fe1a85ec53);
43  k ^= k >> 33;
44 
45  return k;
46 }
47 
48 //-----------------------------------------------------------------------------
49 
50 static uint32_t const _mmh3_32_c1 = 0xcc9e2d51;
51 static uint32_t const _mmh3_32_c2 = 0x1b873593;
52 
53 static GU_FORCE_INLINE void
54 _mmh3_block_32 (uint32_t k1, uint32_t* h1)
55 {
56  k1 *= _mmh3_32_c1;
57  k1 = GU_ROTL32(k1,15);
58  k1 *= _mmh3_32_c2;
59 
60  *h1 ^= k1;
61  *h1 = GU_ROTL32(*h1,13);
62  *h1 *= 5;
63  *h1 += 0xe6546b64;
64 }
65 
66 static GU_FORCE_INLINE void
67 _mmh3_blocks_32 (const uint32_t* const blocks,size_t const nblocks,uint32_t* h1)
68 {
69  //----------
70  // body
71 
72  size_t i;
73  for (i = 0; i < nblocks; i++)
74  {
75 //-----------------------------------------------------------------------------
76 // Block read - if your platform needs to do endian-swapping or can only
77 // handle aligned reads, do the conversion here
78  _mmh3_block_32 (gu_le32(blocks[i]), h1);/* convert from little-endian */
79  }
80 }
81 
82 static GU_FORCE_INLINE uint32_t
83 _mmh3_tail_32 (const uint8_t* const tail, size_t const len, uint32_t h1)
84 {
85  //----------
86  // tail
87 
88 #if 0 /* Reference implementation */
89  uint32_t k1 = 0;
90 
91  switch(len & 3)
92  {
93  case 3: k1 ^= tail[2] << 16;
94  case 2: k1 ^= tail[1] << 8;
95  case 1: k1 ^= tail[0];
96 
97  k1 *= _mmh3_32_c1; k1 = GU_ROTL32(k1,15); k1 *= _mmh3_32_c2; h1 ^= k1;
98  };
99 #else /* Optimized implementation */
100  size_t const shift = (len & 3) << 3;
101  if (shift)
102  {
103  uint32_t k1 = gu_le32(((uint32_t*)tail)[0]) & (0x00ffffff>>(24-shift));
104  k1 *= _mmh3_32_c1; k1 = GU_ROTL32(k1,15); k1 *= _mmh3_32_c2; h1 ^= k1;
105  }
106 #endif /* Optimized implementation */
107 
108  //----------
109  // finalization
110 
111  h1 ^= len;
112  h1 = _mmh3_fmix32(h1);
113 
114  return h1;
115 }
116 
117 static GU_FORCE_INLINE uint32_t
118 _mmh32_seed (const void* key, size_t const len, uint32_t seed)
119 {
120  size_t const nblocks = len >> 2;
121  const uint32_t* const blocks = (const uint32_t*)key;
122  const uint8_t* const tail = (const uint8_t*)(blocks + nblocks);
123 
124  _mmh3_blocks_32 (blocks, nblocks, &seed);
125 
126  return _mmh3_tail_32 (tail, len, seed);
127 }
128 
129 // same as FNV32 seed
130 static uint32_t const GU_MMH32_SEED = GU_ULONG(2166136261);
131 
133 #define gu_mmh32(_buf, _len) \
134  _mmh32_seed (_buf, _len, GU_MMH32_SEED);
135 
136 /*
137  * 128-bit MurmurHash3
138  */
139 static uint64_t const _mmh3_128_c1 = GU_ULONG_LONG(0x87c37b91114253d5);
140 static uint64_t const _mmh3_128_c2 = GU_ULONG_LONG(0x4cf5ad432745937f);
141 
142 static GU_FORCE_INLINE void
143 _mmh3_128_block (uint64_t k1, uint64_t k2, uint64_t* h1, uint64_t* h2)
144 {
145  k1 *= _mmh3_128_c1; k1 = GU_ROTL64(k1,31); k1 *= _mmh3_128_c2; *h1 ^= k1;
146 
147  *h1 = GU_ROTL64(*h1,27); *h1 += *h2; *h1 *= 5; *h1 += 0x52dce729;
148 
149  k2 *= _mmh3_128_c2; k2 = GU_ROTL64(k2,33); k2 *= _mmh3_128_c1; *h2 ^= k2;
150 
151  *h2 = GU_ROTL64(*h2,31); *h2 += *h1; *h2 *= 5; *h2 += 0x38495ab5;
152 }
153 
154 static GU_FORCE_INLINE void
155 _mmh3_128_blocks (const uint64_t* const blocks, size_t const nblocks,
156  uint64_t* h1, uint64_t* h2)
157 {
158  //----------
159  // body
160 
161  size_t i;
162  for(i = 0; i < nblocks; i++)
163  {
164 //-----------------------------------------------------------------------------
165 // Block read - if your platform needs to do endian-swapping or can only
166 // handle aligned reads, do the conversion here
167  uint64_t k1 = gu_le64(blocks[i]);
168  i++;
169  uint64_t k2 = gu_le64(blocks[i]);
170 
171  _mmh3_128_block (k1, k2, h1, h2);
172  }
173 }
174 
175 static GU_FORCE_INLINE void
176 _mmh3_128_tail (const uint8_t* const tail, size_t const len,
177  uint64_t h1, uint64_t h2, uint64_t* const out)
178 {
179  //----------
180  // tail
181 
182  uint64_t k1 = 0;
183  uint64_t k2 = 0;
184 
185  switch(len & 15)
186  {
187  case 15: k2 ^= ((uint64_t)tail[14]) << 48;
188  case 14: k2 ^= ((uint64_t)tail[13]) << 40;
189  case 13: k2 ^= ((uint64_t)tail[12]) << 32;
190  case 12: k2 ^= ((uint64_t)tail[11]) << 24;
191  case 11: k2 ^= ((uint64_t)tail[10]) << 16;
192  case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
193  case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
194  k2 *= _mmh3_128_c2; k2 = GU_ROTL64(k2,33); k2 *= _mmh3_128_c1; h2 ^= k2;
195  k1 = gu_le64(((uint64_t*)tail)[0]);
196  k1 *= _mmh3_128_c1; k1 = GU_ROTL64(k1,31); k1 *= _mmh3_128_c2; h1 ^= k1;
197  break;
198  case 8: k1 ^= ((uint64_t)tail[ 7]) << 56;
199  case 7: k1 ^= ((uint64_t)tail[ 6]) << 48;
200  case 6: k1 ^= ((uint64_t)tail[ 5]) << 40;
201  case 5: k1 ^= ((uint64_t)tail[ 4]) << 32;
202  case 4: k1 ^= ((uint64_t)tail[ 3]) << 24;
203  case 3: k1 ^= ((uint64_t)tail[ 2]) << 16;
204  case 2: k1 ^= ((uint64_t)tail[ 1]) << 8;
205  case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
206  k1 *= _mmh3_128_c1; k1 = GU_ROTL64(k1,31); k1 *= _mmh3_128_c2; h1 ^= k1;
207  };
208 
209  //----------
210  // finalization
211 
212  h1 ^= len; h2 ^= len;
213 
214  h1 += h2;
215  h2 += h1;
216 
217  h1 = _mmh3_fmix64(h1);
218  h2 = _mmh3_fmix64(h2);
219 
220  h1 += h2;
221  h2 += h1;
222 
223  out[0] = h1;
224  out[1] = h2;
225 }
226 
227 static GU_FORCE_INLINE void
228 _mmh3_128_seed (const void* const key, size_t const len,
229  uint64_t s1, uint64_t s2, uint64_t* const out)
230 {
231  size_t const nblocks = (len >> 4) << 1; /* using 64-bit half-blocks */
232  const uint64_t* const blocks = (const uint64_t*)(key);
233  const uint8_t* const tail = (const uint8_t*)(blocks + nblocks);
234 
235  _mmh3_128_blocks (blocks, nblocks, &s1, &s2);
236  _mmh3_128_tail (tail, len, s1, s2, out);
237 }
238 
239 // same as FNV128 seed
240 static uint64_t const GU_MMH128_SEED1 = GU_ULONG_LONG(0x6C62272E07BB0142);
241 static uint64_t const GU_MMH128_SEED2 = GU_ULONG_LONG(0x62B821756295C58D);
242 
243 /* returns hash in the canonical byte order, as a byte array */
244 static GU_FORCE_INLINE void
245 gu_mmh128 (const void* const msg, size_t const len, void* const out)
246 {
247  _mmh3_128_seed (msg, len, GU_MMH128_SEED1, GU_MMH128_SEED2, (uint64_t*)out);
248  uint64_t* const res = (uint64_t*)out;
249  res[0] = gu_le64(res[0]);
250  res[1] = gu_le64(res[1]);
251 }
252 
253 /* returns hash as an integer, in host byte-order */
254 static GU_FORCE_INLINE uint64_t
255 gu_mmh128_64 (const void* const msg, size_t len)
256 {
257  uint64_t res[2];
258  _mmh3_128_seed (msg, len, GU_MMH128_SEED1, GU_MMH128_SEED2, res);
259  return res[0];
260 }
261 
262 /* returns hash as an integer, in host byte-order */
263 static GU_FORCE_INLINE uint32_t
264 gu_mmh128_32 (const void* const msg, size_t len)
265 {
266  uint64_t res[2];
267  _mmh3_128_seed (msg, len, GU_MMH128_SEED1, GU_MMH128_SEED2, res);
268  return (uint32_t)res[0];
269 }
270 
271 /*
272  * Functions to hash message by parts
273  * (only 128-bit version, 32-bit is not relevant any more)
274  */
275 
276 typedef struct gu_mmh128_ctx
277 {
278  uint64_t hash[2];
279  uint64_t tail[2];
280  size_t length;
282 
286 static GU_INLINE void
287 _mmh128_init_seed (gu_mmh128_ctx_t* const mmh,
288  uint64_t const s1,
289  uint64_t const s2)
290 {
291  memset (mmh, 0, sizeof(*mmh));
292  mmh->hash[0] = s1;
293  mmh->hash[1] = s2;
294 }
295 
297 #define gu_mmh128_init(_mmh) \
298  _mmh128_init_seed (_mmh, GU_MMH128_SEED1, GU_MMH128_SEED2);
299 
301 static GU_INLINE void
302 gu_mmh128_append (gu_mmh128_ctx_t* const mmh,
303  const void* part,
304  size_t len)
305 {
306  size_t tail_len = mmh->length & 15;
307 
308  mmh->length += len;
309 
310  if (tail_len) /* there's something in the tail */// do we need this if()?
311  {
312  size_t const to_fill = 16 - tail_len;
313  void* const tail_end = (uint8_t*)mmh->tail + tail_len;
314 
315  if (len >= to_fill) /* we can fill a full block */
316  {
317  memcpy (tail_end, part, to_fill);
318  _mmh3_128_block (gu_le64(mmh->tail[0]), gu_le64(mmh->tail[1]),
319  &mmh->hash[0], &mmh->hash[1]);
320  part = ((char*)part) + to_fill;
321  len -= to_fill;
322  }
323  else
324  {
325  memcpy (tail_end, part, len);
326  return;
327  }
328  }
329 
330  size_t const nblocks = (len >> 4) << 1; /* using 64-bit half-blocks */
331  const uint64_t* const blocks = (const uint64_t*)(part);
332 
333  _mmh3_128_blocks (blocks, nblocks, &mmh->hash[0], &mmh->hash[1]);
334 
335  /* save possible trailing bytes to tail */
336  memcpy (mmh->tail, blocks + nblocks, len & 15);
337 }
338 
340 static GU_INLINE void
341 gu_mmh128_get (const gu_mmh128_ctx_t* const mmh, void* const res)
342 {
343  uint64_t* const r = (uint64_t*)res;
344  _mmh3_128_tail ((const uint8_t*)mmh->tail, mmh->length,
345  mmh->hash[0], mmh->hash[1], r);
346  r[0] = gu_le64(r[0]);
347  r[1] = gu_le64(r[1]);
348 }
349 
350 static GU_INLINE uint64_t
351 gu_mmh128_get64 (const gu_mmh128_ctx_t* const mmh)
352 {
353  uint64_t res[2];
354  _mmh3_128_tail ((const uint8_t*)mmh->tail, mmh->length,
355  mmh->hash[0], mmh->hash[1], res);
356  return res[0];
357 }
358 
359 static GU_INLINE uint32_t
360 gu_mmh128_get32 (const gu_mmh128_ctx_t* const mmh)
361 {
362  uint64_t res[2];
363  _mmh3_128_tail ((const uint8_t*)mmh->tail, mmh->length,
364  mmh->hash[0], mmh->hash[1], res);
365  return (uint32_t)res[0];
366 }
367 
368 /*
369  * Below are fuctions with reference signatures for implementation verification
370  */
371 extern void
372 gu_mmh3_32 (const void* key, int len, uint32_t seed, void* out);
373 
374 #if 0 /* x86 variant is faulty and unsuitable for short keys, ignore */
375 extern void
376 gu_mmh3_x86_128 (const void* key, int len, uint32_t seed, void* out);
377 #endif /* 0 */
378 
379 extern void
380 gu_mmh3_x64_128 (const void* key, int len, uint32_t seed, void* out);
381 
382 #ifdef __cplusplus
383 }
384 #endif
385 
386 #endif /* _gu_mmh3_h_ */
Definition: gu_mmh3.h:276