17 #include "gu_byteswap.h"
26 #define _spooky_numVars 12
27 #define _spooky_blockSize 96
28 #define _spooky_bufSize 192
29 static uint64_t
const _spooky_const = GU_ULONG_LONG(0xDEADBEEFDEADBEEF);
44 static GU_FORCE_INLINE
void _spooky_mix(
46 uint64_t* s0, uint64_t* s1, uint64_t* s2, uint64_t* s3,
47 uint64_t* s4, uint64_t* s5, uint64_t* s6, uint64_t* s7,
48 uint64_t* s8, uint64_t* s9, uint64_t* sA, uint64_t* sB)
50 *s0 += gu_le64(data[0]); *s2 ^= *sA; *sB ^= *s0; *s0 =GU_ROTL64(*s0,11); *sB += *s1;
51 *s1 += gu_le64(data[1]); *s3 ^= *sB; *s0 ^= *s1; *s1 =GU_ROTL64(*s1,32); *s0 += *s2;
52 *s2 += gu_le64(data[2]); *s4 ^= *s0; *s1 ^= *s2; *s2 =GU_ROTL64(*s2,43); *s1 += *s3;
53 *s3 += gu_le64(data[3]); *s5 ^= *s1; *s2 ^= *s3; *s3 =GU_ROTL64(*s3,31); *s2 += *s4;
54 *s4 += gu_le64(data[4]); *s6 ^= *s2; *s3 ^= *s4; *s4 =GU_ROTL64(*s4,17); *s3 += *s5;
55 *s5 += gu_le64(data[5]); *s7 ^= *s3; *s4 ^= *s5; *s5 =GU_ROTL64(*s5,28); *s4 += *s6;
56 *s6 += gu_le64(data[6]); *s8 ^= *s4; *s5 ^= *s6; *s6 =GU_ROTL64(*s6,39); *s5 += *s7;
57 *s7 += gu_le64(data[7]); *s9 ^= *s5; *s6 ^= *s7; *s7 =GU_ROTL64(*s7,57); *s6 += *s8;
58 *s8 += gu_le64(data[8]); *sA ^= *s6; *s7 ^= *s8; *s8 =GU_ROTL64(*s8,55); *s7 += *s9;
59 *s9 += gu_le64(data[9]); *sB ^= *s7; *s8 ^= *s9; *s9 =GU_ROTL64(*s9,54); *s8 += *sA;
60 *sA += gu_le64(data[10]); *s0 ^= *s8; *s9 ^= *sA; *sA =GU_ROTL64(*sA,22); *s9 += *sB;
61 *sB += gu_le64(data[11]); *s1 ^= *s9; *sA ^= *sB; *sB =GU_ROTL64(*sB,46); *sA += *s0;
80 static GU_FORCE_INLINE
void _spooky_end_part(
81 uint64_t* h0, uint64_t* h1, uint64_t* h2, uint64_t* h3,
82 uint64_t* h4, uint64_t* h5, uint64_t* h6, uint64_t* h7,
83 uint64_t* h8, uint64_t* h9, uint64_t* h10,uint64_t* h11)
85 *h11+= *h1; *h2 ^= *h11; *h1 = GU_ROTL64(*h1,44);
86 *h0 += *h2; *h3 ^= *h0; *h2 = GU_ROTL64(*h2,15);
87 *h1 += *h3; *h4 ^= *h1; *h3 = GU_ROTL64(*h3,34);
88 *h2 += *h4; *h5 ^= *h2; *h4 = GU_ROTL64(*h4,21);
89 *h3 += *h5; *h6 ^= *h3; *h5 = GU_ROTL64(*h5,38);
90 *h4 += *h6; *h7 ^= *h4; *h6 = GU_ROTL64(*h6,33);
91 *h5 += *h7; *h8 ^= *h5; *h7 = GU_ROTL64(*h7,10);
92 *h6 += *h8; *h9 ^= *h6; *h8 = GU_ROTL64(*h8,13);
93 *h7 += *h9; *h10^= *h7; *h9 = GU_ROTL64(*h9,38);
94 *h8 += *h10; *h11^= *h8; *h10= GU_ROTL64(*h10,53);
95 *h9 += *h11; *h0 ^= *h9; *h11= GU_ROTL64(*h11,42);
96 *h10+= *h0; *h1 ^= *h10; *h0 = GU_ROTL64(*h0,54);
99 static GU_FORCE_INLINE
void _spooky_end(
100 uint64_t* h0, uint64_t* h1, uint64_t* h2, uint64_t* h3,
101 uint64_t* h4, uint64_t* h5, uint64_t* h6, uint64_t* h7,
102 uint64_t* h8, uint64_t* h9, uint64_t* h10,uint64_t* h11)
105 _spooky_end_part(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
106 _spooky_end_part(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
107 _spooky_end_part(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
110 for (i = 0; i < 3; i++)
112 _spooky_end_part(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
131 static GU_FORCE_INLINE
void _spooky_short_mix(uint64_t* h0, uint64_t* h1,
132 uint64_t* h2, uint64_t* h3)
134 *h2 = GU_ROTL64(*h2,50); *h2 += *h3; *h0 ^= *h2;
135 *h3 = GU_ROTL64(*h3,52); *h3 += *h0; *h1 ^= *h3;
136 *h0 = GU_ROTL64(*h0,30); *h0 += *h1; *h2 ^= *h0;
137 *h1 = GU_ROTL64(*h1,41); *h1 += *h2; *h3 ^= *h1;
138 *h2 = GU_ROTL64(*h2,54); *h2 += *h3; *h0 ^= *h2;
139 *h3 = GU_ROTL64(*h3,48); *h3 += *h0; *h1 ^= *h3;
140 *h0 = GU_ROTL64(*h0,38); *h0 += *h1; *h2 ^= *h0;
141 *h1 = GU_ROTL64(*h1,37); *h1 += *h2; *h3 ^= *h1;
142 *h2 = GU_ROTL64(*h2,62); *h2 += *h3; *h0 ^= *h2;
143 *h3 = GU_ROTL64(*h3,34); *h3 += *h0; *h1 ^= *h3;
144 *h0 = GU_ROTL64(*h0,5); *h0 += *h1; *h2 ^= *h0;
145 *h1 = GU_ROTL64(*h1,36); *h1 += *h2; *h3 ^= *h1;
160 static GU_FORCE_INLINE
void _spooky_short_end(uint64_t* h0, uint64_t* h1,
161 uint64_t* h2, uint64_t* h3)
163 *h3 ^= *h2; *h2 = GU_ROTL64(*h2,15); *h3 += *h2;
164 *h0 ^= *h3; *h3 = GU_ROTL64(*h3,52); *h0 += *h3;
165 *h1 ^= *h0; *h0 = GU_ROTL64(*h0,26); *h1 += *h0;
166 *h2 ^= *h1; *h1 = GU_ROTL64(*h1,51); *h2 += *h1;
167 *h3 ^= *h2; *h2 = GU_ROTL64(*h2,28); *h3 += *h2;
168 *h0 ^= *h3; *h3 = GU_ROTL64(*h3,9); *h0 += *h3;
169 *h1 ^= *h0; *h0 = GU_ROTL64(*h0,47); *h1 += *h0;
170 *h2 ^= *h1; *h1 = GU_ROTL64(*h1,54); *h2 += *h1;
171 *h3 ^= *h2; *h2 = GU_ROTL64(*h2,32); *h3 += *h2;
172 *h0 ^= *h3; *h3 = GU_ROTL64(*h3,25); *h0 += *h3;
173 *h1 ^= *h0; *h0 = GU_ROTL64(*h0,63); *h1 += *h0;
180 static GU_INLINE
void gu_spooky_short_host(
181 const void*
const message,
183 uint64_t*
const hash)
190 #if !GU_ALLOW_UNALIGNED_READS
195 u.p8 = (
const uint8_t *)message;
197 #if !GU_ALLOW_UNALIGNED_READS
200 uint64_t buf[_spooky_numVars << 1];
201 memcpy(buf, message, length);
206 size_t remainder = length & 0x1F;
214 uint64_t c = _spooky_const;
215 uint64_t d = _spooky_const;
219 const uint64_t *end = u.p64 + ((length >> 5) << 2);
222 for (; u.p64 < end; u.p64 += 4)
224 c += gu_le64(u.p64[0]);
225 d += gu_le64(u.p64[1]);
226 _spooky_short_mix(&a, &b, &c, &d);
227 a += gu_le64(u.p64[2]);
228 b += gu_le64(u.p64[3]);
234 c += gu_le64(u.p64[0]);
235 d += gu_le64(u.p64[1]);
236 _spooky_short_mix(&a, &b, &c, &d);
243 d = ((uint64_t)length) << 56;
247 d += ((uint64_t)u.p8[14]) << 48;
249 d += ((uint64_t)u.p8[13]) << 40;
251 d += ((uint64_t)u.p8[12]) << 32;
253 d += gu_le32(u.p32[2]);
254 c += gu_le64(u.p64[0]);
257 d += ((uint64_t)u.p8[10]) << 16;
259 d += ((uint64_t)u.p8[9]) << 8;
261 d += (uint64_t)u.p8[8];
263 c += gu_le64(u.p64[0]);
266 c += ((uint64_t)u.p8[6]) << 48;
268 c += ((uint64_t)u.p8[5]) << 40;
270 c += ((uint64_t)u.p8[4]) << 32;
272 c += gu_le32(u.p32[0]);
275 c += ((uint64_t)u.p8[2]) << 16;
277 c += ((uint64_t)u.p8[1]) << 8;
279 c += (uint64_t)u.p8[0];
286 _spooky_short_end(&a, &b, &c, &d);
293 static GU_FORCE_INLINE
void gu_spooky_short(
298 uint64_t*
const u64 = (uint64_t*)hash;
299 gu_spooky_short_host(message, length, u64);
300 u64[0] = gu_le64(u64[0]);
301 u64[1] = gu_le64(u64[1]);
305 static GU_INLINE
void gu_spooky_inline (
306 const void*
const message,
308 uint64_t*
const hash)
310 #ifdef GU_USE_SPOOKY_SHORT
311 if (length < _spooky_bufSize)
313 gu_spooky_short_base (message, length, hash);
318 uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
319 uint64_t buf[_spooky_numVars];
326 #if !GU_ALLOW_UNALIGNED_READS
339 h2=h5=h8=h11 = _spooky_const;
341 u.p8 = (
const uint8_t*) message;
342 end = u.p64 + (length/_spooky_blockSize)*_spooky_numVars;
345 #if !GU_ALLOW_UNALIGNED_READS
346 if ((u.i & 0x7) == 0)
351 _spooky_mix(u.p64, &h0,&h1,&h2,&h3,&h4,&h5,&h6,&h7,&h8,&h9,&h10,&h11);
352 u.p64 += _spooky_numVars;
354 #if !GU_ALLOW_UNALIGNED_READS
360 memcpy(buf, u.p64, _spooky_blockSize);
361 _spooky_mix(buf, &h0,&h1,&h2,&h3,&h4,&h5,&h6,&h7,&h8,&h9,&h10,&h11);
362 u.p64 += _spooky_numVars;
368 remainder = (length - ((
const uint8_t*)end - (
const uint8_t*)message));
369 memcpy(buf, end, remainder);
370 memset(((uint8_t*)buf) + remainder, 0, _spooky_blockSize - remainder);
371 ((uint8_t*)buf)[_spooky_blockSize - 1] = remainder;
372 _spooky_mix(buf, &h0,&h1,&h2,&h3,&h4,&h5,&h6,&h7,&h8,&h9,&h10,&h11);
375 _spooky_end(&h0,&h1,&h2,&h3,&h4,&h5,&h6,&h7,&h8,&h9,&h10,&h11);
387 gu_spooky128_host (const
void* const msg,
size_t const len, uint64_t* res);
390 static GU_FORCE_INLINE
void
391 gu_spooky128 (const
void* const msg,
size_t const len,
void* const res)
393 uint64_t*
const r = (uint64_t*)res;
394 gu_spooky128_host (msg, len, r);
395 r[0] = gu_le64(r[0]);
396 r[1] = gu_le64(r[1]);
400 static GU_FORCE_INLINE uint64_t
401 gu_spooky64 (
const void*
const msg,
size_t const len)
404 gu_spooky128_host (msg, len, res);
409 static GU_FORCE_INLINE uint32_t
410 gu_spooky32 (
const void*
const msg,
size_t const len)
413 gu_spooky128_host (msg, len, res);
414 return (uint32_t)res[0];