hash.h

Go to the documentation of this file.
00001 /* Copyright Benoit Hudson 2007 */
00002 #ifndef details_hash_HEADER
00003 #define details_hash_HEADER
00004 
00005 #ifdef HAVE_CONFIG_H
00006  #include <config.h>
00007 #endif
00008 
00009 #include <limits.h> // CHAR_BIT
00010 BOOST_STATIC_ASSERT(CHAR_BIT == 8);
00011 #include <assert.h>
00012 
00023 namespace hudson { namespace details {
00024 
00025   /************************************************************
00026    * Paul Hsieh's hash function, 32-bit only.
00027    *
00028    * This operates a word at a time, and is often reported to
00029    * be faster than Jenkin's function.
00030    ************************************************************/
00031   template <class iterator, class hashfn>
00032   uint32_t hsieh(iterator begin, const iterator& end, hashfn h) {
00033     BOOST_STATIC_ASSERT(sizeof(h(*begin)) * CHAR_BIT == 32);
00034     uint32_t hash = 0, tmp;
00035 
00036     if (begin == end) return 0;
00037 
00038     /* Main loop */
00039     for( ; begin != end; ++begin) {
00040       uint32_t hashed = h(*begin);
00041       uint32_t hi = hashed >> 16;
00042       uint32_t lo = hashed & ( (1<<16) - 1);
00043       hash += hi;
00044       tmp   = (lo << 11) ^ hash;
00045       hash  = (hash << 16) ^ tmp;
00046       hash += hash >> 11;
00047     }
00048 
00049     /* Force "avalanching" of final 127 bits */
00050     hash ^= hash << 3;
00051     hash += hash >> 5;
00052     hash ^= hash << 4;
00053     hash += hash >> 17;
00054     hash ^= hash << 25;
00055     hash += hash >> 6;
00056 
00057     return hash;
00058   }
00059 
00060   /************************************************************
00061    * Bob Jenkin's hash functions, 32- and 64-bit.
00062    * These are adapted to drop the assumption that we're hashing
00063    * characters.
00064    * @{
00065    ************************************************************/
00066   template <class Int> Int rot(Int x, unsigned k) {
00067     size_t nbits = sizeof(Int) * CHAR_BIT;
00068     return (x << k) | (x >> (nbits - k));
00069   }
00070 
00071   template <size_t wordsize> class mixer;
00072 
00073   template <> struct mixer<4> {
00074     typedef uint32_t word;
00075 
00079     static void mix(uint32_t& a, uint32_t& b, uint32_t& c) {
00080       a -= c;  a ^= rot(c, 4);  c += b;
00081       b -= a;  b ^= rot(a, 6);  a += c;
00082       c -= b;  c ^= rot(b, 8);  b += a;
00083       a -= c;  a ^= rot(c,16);  c += b;
00084       b -= a;  b ^= rot(a,19);  a += c;
00085       c -= b;  c ^= rot(b, 4);  b += a;
00086     }
00087 
00091     static uint32_t final(uint32_t a, uint32_t b, uint32_t c) {
00092       c ^= b; c -= rot(b,14);
00093       a ^= c; a -= rot(c,11);
00094       b ^= a; b -= rot(a,25);
00095       c ^= b; c -= rot(b,16);
00096       a ^= c; a -= rot(c,4);
00097       b ^= a; b -= rot(a,14);
00098       c ^= b; c -= rot(b,24);
00099       return c;
00100     }
00101 
00103     static uint32_t startvalue(uint32_t length) {
00104       return 0xdeadbeef + (((uint32_t)length)<<2);
00105     }
00106   };
00107 
00108 
00109   template <> struct mixer<8> {
00110     typedef uint64_t word;
00111 
00116     static void mix(uint64_t& a, uint64_t& b, uint64_t& c) {
00117       a -= b; a -= c; a ^= (c>>43);
00118       b -= c; b -= a; b ^= (a<<9);
00119       c -= a; c -= b; c ^= (b>>8);
00120       a -= b; a -= c; a ^= (c>>38);
00121       b -= c; b -= a; b ^= (a<<23);
00122       c -= a; c -= b; c ^= (b>>5);
00123       a -= b; a -= c; a ^= (c>>35);
00124       b -= c; b -= a; b ^= (a<<49);
00125       c -= a; c -= b; c ^= (b>>11);
00126       a -= b; a -= c; a ^= (c>>12);
00127       b -= c; b -= a; b ^= (a<<18);
00128       c -= a; c -= b; c ^= (b>>22);
00129     }
00130 
00135     static uint64_t final(uint64_t a, uint64_t b, uint64_t c) {
00136       mix(a,b,c);
00137       return c;
00138     }
00139 
00141     static uint64_t startvalue(uint64_t /*length*/) {
00142       return 0x9e3779b97f4a7c13LL;
00143     }
00144   };
00145 
00146 
00148   template <class iterator, class hasher>
00149   size_t
00150   jenkins(iterator begin, iterator end, const hasher& hash) {
00151     typedef mixer<sizeof(size_t)> M;
00152 
00153     M::word a,b,c;
00154     size_t length = (end - begin); // ideally we wouldn't need this
00155 
00156     /* Set up the internal state */
00157     a = b = c = M::startvalue(length);
00158 
00159     /* Handle most of the key */
00160     while (length > 3) {
00161       a += hash(*begin); ++begin;
00162       b += hash(*begin); ++begin;
00163       c += hash(*begin); ++begin;
00164       M::mix(a,b,c);
00165       length -= 3;
00166     }
00167 
00168     /* Handle the last 3 items */
00169     switch(length) {
00170       case 3: a += hash(*begin); ++begin; // fall-through
00171       case 2: b += hash(*begin); ++begin; // fall-through
00172       case 1: c += hash(*begin); //++begin;
00173               return M::final(a,b,c);
00174       case 0: return c;
00175     }
00176     /* Can't get here. */
00177     assert(0);
00178     return 0;
00179   }
00181 
00182   /************************************************************
00183    * Hash functions for a single word at a time.
00184    * @{
00185    ************************************************************/
00186 
00187   template <size_t wordsize> class hash_single;
00188 
00189   template <> struct hash_single<4> {
00190     /* Thomas Wang's 32-bit hash.  Jenkins claims it's good.
00191      * Should take a mere 9 cycles. */
00192     static uint32_t hash( uint32_t a) {
00193       a = (a ^ 61) ^ (a >> 16);
00194       a = a + (a << 3);
00195       a = a ^ (a >> 4);
00196       a = a * 0x27d4eb2d;
00197       a = a ^ (a >> 15);
00198       return a;
00199     }
00200 
00201 #if 0
00202     /* Renamed from jenkins' 6-shift 4-byte integer hash function.
00203      * This is a good hash for a gappy integer value (like a pointer).
00204      * Should take 12 cycles.  */
00205     static uint32_t hash(uint32_t a) {
00206       a = (a+0x7ed55d16) + (a<<12);
00207       a = (a^0xc761c23c) ^ (a>>19);
00208       a = (a+0x165667b1) + (a<<5);
00209       a = (a+0xd3a2646c) ^ (a<<9);
00210       a = (a+0xfd7046c5) + (a<<3);
00211       a = (a^0xb55a4f09) ^ (a>>16);
00212       return a;
00213     }
00214 #endif
00215   };
00216 
00217   template <> struct hash_single<8> {
00220     static uint64_t hash(uint64_t a) {
00221       a = (~a) + (a << 21); // a = (a << 21) - a - 1;
00222       a = a ^ (a >> 24);
00223       a = (a + (a << 3)) + (a << 8); // a * 265
00224       a = a ^ (a >> 14);
00225       a = (a + (a << 2)) + (a << 4); // a * 21
00226       a = a ^ (a >> 28);
00227       a = a + (a << 31);
00228       return a;
00229     }
00230   };
00232 
00233   /************************************************************
00234    * Hash functions for a pair of words.
00235    * In 32 bits, we can use the 64-bit word-at-a-time hash.
00236    * In 64 bits, I use Jenkin's function, which is overkill.
00237    * @{
00238    ************************************************************/
00239   template <size_t nbytes> struct hash_pair;
00240 
00241   template <> struct hash_pair<4> {
00242     static uint32_t hash(uint32_t a, uint32_t b) {
00243       // hash as a 64-bit quantity, then return the lower 32 bits
00244       uint64_t ab = (((uint64_t)a) << 32) | (uint64_t)b;
00245       uint64_t h = hash_single<8>::hash(ab);
00246       return (uint32_t) h;
00247     }
00248   };
00249 
00250   template <> struct hash_pair<8> {
00251     static uint64_t hash(uint64_t a, uint64_t b) {
00252       // The answer is to run jenkins' hash with a third value of 42
00253       return mixer<8>::final(a, b, 42);
00254     }
00255   };
00257 
00258   /************************************************************
00259    * Hash functions for a range of items.
00260    *
00261    * @{
00262    ************************************************************/
00263   template <size_t nbytes> struct hash_range;
00264 
00265   template <> struct hash_range<4> {
00266     template <class iterator, class hasher>
00267     static uint32_t hash(const iterator& begin, const iterator& end,
00268         const hasher& h) {
00269       return hsieh(begin, end, h);
00270     }
00271   };
00272 
00273   template <> struct hash_range<8> {
00274     template <class iterator, class hasher>
00275     static uint64_t hash(const iterator& begin, const iterator& end,
00276         const hasher& h) {
00277       return jenkins(begin, end, h);
00278     }
00279   };
00280 
00281 } } // close namespace hudson::details
00282 
00283 #endif

Generated on Thu Mar 27 19:04:14 2008 by  doxygen 1.4.6