00001 #ifndef FastHash_HEADER
00002 #define FastHash_HEADER
00003
00004
00005
00006
00007
00008 #ifdef HAVE_CONFIG_H
00009 # include <config.h>
00010 #endif
00011
00012 #include <assert.h>
00013 #include <boost/utility.hpp>
00014 #include <string.h>
00015 #include <utility>
00016 #include "SmallMemoryPool.h"
00017
00018 #include "bit-hacks.h"
00019 #include "hash.h"
00020 using namespace std;
00021
00057 namespace hudson {
00058
00061 struct FHS_Stats {
00062 size_t N;
00063 size_t sumnitems;
00064 size_t sumcapacity;
00065 size_t sumnonempty;
00066 vector<size_t> histogram;
00067 FHS_Stats() {
00068 memset(this, 0, sizeof(*this));
00069 }
00070 void print();
00071 };
00072 extern FHS_Stats global_stats;
00073
00080 template <typename Key, typename Value> struct KeyValue {
00081 Key key_;
00082 Value value_;
00083 KeyValue(const Key& k): key_(k) { }
00084
00085 template <typename KeyRef, typename ValueRef> struct Pair {
00086 KeyRef first;
00087 ValueRef second;
00088 Pair(KeyRef f, ValueRef s): first(f), second(s) { }
00089 };
00090 typedef Pair<Key&, Value&> ref;
00091 typedef Pair<const Key&, const Value&> const_ref;
00092 operator ref () { return ref(key_, value_); }
00093 operator const_ref() const { return const_ref(key_, value_); }
00094 operator const Key& () const { return key_; }
00095
00096 bool operator== (const Key& k) const { return key_ == k; }
00097 bool operator!= (const Key& k) const { return !(key_ == k); }
00098 };
00099
00106 template <typename Key> struct KeyValue<Key,void> {
00107 Key key_;
00108 KeyValue(const Key& k): key_(k) { }
00109
00110 typedef Key& ref;
00111 typedef const Key& const_ref;
00112 operator ref () { return key_; }
00113 operator const_ref () const { return key_; }
00114
00115 bool operator== (const Key& k) const { return key_ == k; }
00116 bool operator!= (const Key& k) const { return !(key_ == k); }
00117 };
00118
00125 template <typename Key, typename Value, class hasher, bool NeedsDelete, bool collectStats> class FastHashBase
00126 : public boost::noncopyable {
00127
00128
00129 typedef hudson::KeyValue<Key,Value> KeyValue;
00130
00131 struct link : public SmallMemoryItem<link> {
00132 KeyValue key_;
00133 link *next_;
00134 link(const Key& key): key_(key), next_(NULL) { }
00135 };
00136
00137 public:
00138 struct iterator {
00139 typedef typename KeyValue::ref value_type;
00140
00141 iterator(link *l): l_(l) { }
00142 iterator(const iterator& it): l_(it.l_) { }
00143 bool operator== (const iterator& other) const { return l_ == other.l_; }
00144 bool operator!= (const iterator& other) const { return l_ != other.l_; }
00145 value_type operator*() const { return value_type(this->l_->key_); }
00146
00147 private:
00148 link *l_;
00149 };
00150 struct const_iterator {
00151 typedef typename KeyValue::const_ref value_type;
00152
00153 const_iterator(const link *l): l_(l) { }
00154 const_iterator(const const_iterator& it): l_(it.l_) { }
00155 bool operator== (const const_iterator& other) const { return l_ == other.l_; }
00156 bool operator!= (const const_iterator& other) const { return l_ != other.l_; }
00157 value_type operator*() const { return value_type(this->l_->key_); }
00158
00159 private:
00160 const link *l_;
00161 };
00162
00163
00164
00165
00167 private:
00168 void allocateTable() {
00169 if (capacity() == 0) {
00170 table_ = 0;
00171 return;
00172 }
00173 size_t nbytes = sizeof(*table_) * capacity();
00174 table_ = (link**)SmallMemoryPool::allocate_n(nbytes);
00175 memset(table_, 0, nbytes);
00176 }
00177
00178 void freeTable(link **tab, size_t capacity) {
00179 if (tab != 0) {
00180 size_t nbytes = sizeof(*table_) * capacity;
00181 SmallMemoryPool::deallocate_n(tab, nbytes);
00182 }
00183 }
00185
00186 public:
00193 FastHashBase(size_t size_hint) {
00194 if (size_hint == 0) {
00195 capacity_ = 0;
00196 } else {
00197 capacity_ = bithacks::ceil_pow2(size_hint);
00198 }
00199 n_ = 0;
00200 allocateTable();
00201 }
00202
00206 FastHashBase() {
00207 capacity_ = 0;
00208 n_ = 0;
00209 allocateTable();
00210 }
00211
00219 void reserve(size_t capacity) {
00220 capacity = bithacks::ceil_pow2(capacity);
00221 if (capacity > capacity_) {
00222 resize(capacity);
00223 }
00224 }
00225
00226 public:
00238 ~FastHashBase() {
00239 if(collectStats) { collectstats(global_stats); }
00240 clear(true);
00241 }
00242
00249 void clear(bool free_table = false) {
00250 for (size_t i = 0; i < capacity(); ++i) {
00251 link *l = table_[i];
00252 while(l) {
00253 link *next = l->next_;
00254 delete l;
00255 l = next;
00256 }
00257 table_[i] = NULL;
00258 }
00259 if (free_table) {
00260 freeTable(table_, capacity());
00261 capacity_ = 0;
00262 table_ = NULL;
00263 }
00264 n_ = 0;
00265 }
00266
00267
00268
00269
00270
00271
00272
00273 public:
00274 void collectstats(FHS_Stats& s) {
00275 size_t nbuckets = bithacks::log2(n_) + 1;
00276 if (s.histogram.size() < nbuckets) {
00277 s.histogram.resize(nbuckets);
00278 }
00279
00280 s.N++;
00281 s.sumnitems += n_;
00282 s.sumcapacity += capacity();
00283 for(size_t i = 0; i < capacity(); ++i) {
00284 size_t n = 0;
00285 link *l = table_[i];
00286 while(l) { l = l->next_; n++; }
00287 if (n > 0) {
00288 s.sumnonempty++;
00289 s.histogram[bithacks::log2(n)]++;
00290 }
00291 }
00292 }
00293
00294
00295
00296 private:
00297 size_t mask() const {
00298 assert(capacity_ != 0);
00299 return capacity_ - 1;
00300 }
00301
00302 size_t index(const Key& key) const {
00303 hasher h;
00304 return h(key) & mask();
00305 }
00306
00307
00308 private:
00309 static link *findLink(const Key& key, link *l) {
00310 while (l && l->key_ != key) {
00311 l = l->next_;
00312 }
00313 return l;
00314 }
00315 static const link *findLink(const Key& key, const link *l) {
00316 while (l && l->key_ != key) {
00317 l = l->next_;
00318 }
00319 return l;
00320 }
00321
00322
00323
00324
00325
00326
00328 public:
00329 const_iterator find(const Key& key) const {
00330 if (size() == 0) { return end(); }
00331 size_t i = index(key);
00332 const link *l = findLink(key, table_[i]);
00333 return const_iterator(l);
00334 }
00335 iterator find(const Key& key) {
00336 if (size() == 0) { return end(); }
00337 size_t i = index(key);
00338 link *l = findLink(key, table_[i]);
00339 return iterator(l);
00340 }
00342
00343
00344
00345
00346
00347 public:
00348 pair<iterator,bool> insert(const Key& key) {
00349 if (capacity() == 0) {
00350 resize();
00351 }
00352
00353 hasher h;
00354 size_t hashval = h(key);
00355 size_t i = hashval & mask();
00356 link *l = findLink(key, table_[i]);
00357 if (l) {
00358
00359 return make_pair(iterator(l), false);
00360 } else {
00361
00362 if (n_ == capacity()) {
00363 resize();
00364 i = hashval & mask();
00365 }
00366 link *l = new link(key);
00367 l->next_ = table_[i];
00368 table_[i] = l;
00369 n_++;
00370 return make_pair(iterator(l), true);
00371 }
00372 }
00373
00377 private:
00378 void resize(size_t newcap) {
00379 link **oldtable = table_;
00380 size_t oldcap = capacity();
00381
00382 capacity_ = newcap;
00383
00384 allocateTable();
00385
00386
00387 for(size_t oldi = 0; oldi < oldcap; ++oldi) {
00388 link *l = oldtable[oldi];
00389 while(l) {
00390 link *next = l->next_;
00391 size_t i = index(l->key_);
00392 l->next_ = table_[i];
00393 table_[i] = l;
00394 l = next;
00395 }
00396 }
00397 freeTable(oldtable, oldcap);
00398 }
00399
00400 void resize() {
00401 size_t oldcap = capacity();
00402
00403 if (oldcap == 0) {
00404 resize(2);
00405 } else {
00406 resize(oldcap * 2);
00407 }
00408 }
00409
00411 public:
00412 template <class key_iterator> void insert(key_iterator begin, const key_iterator& end) {
00413 while(begin != end) {
00414 insert(*begin);
00415 ++begin;
00416 }
00417 }
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428 public:
00429 bool erase(const Key& key) {
00430 if (size() == 0) {
00431
00432
00433 return false;
00434 }
00435 size_t i = index(key);
00436 link *l = table_[i];
00437 if (!l) { return false; }
00438 if (l->key_ == key) {
00439 table_[i] = l->next_;
00440 goto remove;
00441 }
00442
00443 while(1) {
00444 link *prev = l;
00445 l = l->next_;
00446 if (!l) { return false; }
00447 if (l->key_ == key) {
00448 prev->next_ = l->next_;
00449 goto remove;
00450 }
00451 }
00452 remove:
00453 n_--;
00454 delete l;
00455
00456 return true;
00457 }
00458
00459
00460
00461
00462 public:
00464 size_t size() const { return n_; }
00465 size_t capacity() const { return capacity_; }
00466
00467
00468
00469
00470
00471
00473 iterator end() { return iterator(NULL); }
00474 const_iterator end() const { return const_iterator(NULL); }
00476
00477
00478
00479
00480 private:
00481 size_t capacity_;
00482 size_t n_;
00483 link **table_;
00484 };
00485
00486
00487
00488
00489
00490
00491
00493 template <typename Key, class hasher = hashers::hash<Key>, bool NeedsDelete = true, bool collectStats = false>
00494 class FastHashSet : public FastHashBase<Key, void, hasher, NeedsDelete, collectStats> {
00495 private:
00496 typedef FastHashBase<Key, void, hasher, NeedsDelete, collectStats> super;
00497 public:
00498 FastHashSet() : super() { }
00499 FastHashSet(size_t size_hint) : super(size_hint) { }
00500
00501 bool has(const Key& k) const { return this->find(k) != this->end(); }
00502 };
00503
00504
00505
00506
00508 template <typename Key, typename Value, class hasher = hashers::hash<Key>, bool NeedsDelete = true, bool collectStats = false>
00509 class FastHashMap : public FastHashBase<Key, Value, hasher, NeedsDelete, collectStats> {
00510 private:
00511 typedef FastHashBase<Key, Value, hasher, NeedsDelete, collectStats> super;
00512 public:
00513 FastHashMap() : super() { }
00514 FastHashMap(size_t size_hint) : super(size_hint) { }
00515
00517 Value& operator[] (const Key& k) {
00518
00519
00520 return (*(super::insert(k).first)).second;
00521 }
00522
00523 Value& findOrDie (const Key& k) {
00524 typename super::iterator it = this->find(k);
00525 assert(it != this->end());
00526 return (*it).second;
00527 }
00528
00529 const Value& findOrDie (const Key& k) const {
00530 typename super::const_iterator it = this->find(k);
00531 assert(it != this->end());
00532 return (*it).second;
00533 }
00534
00536 pair<typename super::iterator,bool> insert(const Key& key, const Value& val) {
00537 pair<typename super::iterator, bool> p = super::insert(key);
00538 if (p.second) {
00539 (*(p.first)).second = val;
00540 }
00541 return p;
00542 }
00543
00545 pair<typename super::iterator,bool> insert(const Key& key) {
00546 return super::insert(key);
00547 }
00548 };
00549
00550 }
00551
00552 #endif