Sierra Toolkit  Version of the Day
densehashtable.h
1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // ---
31 // Author: Craig Silverstein
32 //
33 // A dense hashtable is a particular implementation of
34 // a hashtable: one that is meant to minimize memory allocation.
35 // It does this by using an array to store all the data. We
36 // steal a value from the key space to indicate "empty" array
37 // elements (ie indices where no item lives) and another to indicate
38 // "deleted" elements.
39 //
40 // (Note it is possible to change the value of the delete key
41 // on the fly; you can even remove it, though after that point
42 // the hashtable is insert_only until you set it again. The empty
43 // value however can't be changed.)
44 //
45 // To minimize allocation and pointer overhead, we use internal
46 // probing, in which the hashtable is a single table, and collisions
47 // are resolved by trying to insert again in another bucket. The
48 // most cache-efficient internal probing schemes are linear probing
49 // (which suffers, alas, from clumping) and quadratic probing, which
50 // is what we implement by default.
51 //
52 // Type requirements: value_type is required to be Copy Constructible
53 // and Default Constructible. It is not required to be (and commonly
54 // isn't) Assignable.
55 //
56 // You probably shouldn't use this code directly. Use
57 // <google/dense_hash_map> or <google/dense_hash_set> instead.
58 
59 // You can change the following below:
60 // HT_OCCUPANCY_PCT -- how full before we double size
61 // HT_EMPTY_PCT -- how empty before we halve size
62 // HT_MIN_BUCKETS -- default smallest bucket size
63 //
64 // You can also change enlarge_factor (which defaults to
65 // HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
66 // HT_EMPTY_PCT) with set_resizing_parameters().
67 //
68 // How to decide what values to use?
69 // shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good.
70 // HT_MIN_BUCKETS is probably unnecessary since you can specify
71 // (indirectly) the starting number of buckets at construct-time.
72 // For enlarge_factor, you can use this chart to try to trade-off
73 // expected lookup time to the space taken up. By default, this
74 // code uses quadratic probing, though you can change it to linear
75 // via _JUMP below if you really want to.
76 //
77 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
78 // NUMBER OF PROBES / LOOKUP Successful Unsuccessful
79 // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
80 // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
81 //
82 // -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
83 // QUADRATIC COLLISION RES.
84 // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
85 // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
86 // LINEAR COLLISION RES.
87 // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
88 // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
89 
90 #ifndef _DENSEHASHTABLE_H_
91 #define _DENSEHASHTABLE_H_
92 
93 // The probing method
94 // Linear probing
95 // #define JUMP_(key, num_probes) ( 1 )
96 // Quadratic probing
97 #define JUMP_(key, num_probes) ( num_probes )
98 
99 
100 #include <stk_util/util/sparseconfig.h>
101 #include <stdio.h>
102 #include <assert.h>
103 #include <stdlib.h> // for abort()
104 #include <algorithm> // For swap(), eg
105 #include <stdexcept> // For length_error
106 #include <iostream> // For cerr
107 #include <memory> // For uninitialized_fill, uninitialized_copy
108 #include <utility> // for pair<>
109 #include <iterator> // for facts about iterator tags
110 #include <limits> // for numeric_limits<>
111 #include <stk_util/util/libc_allocator_with_realloc.h>
112 #include <stk_util/util/hashtable-common.h>
113 #include <stk_util/util/type_traits_google.h> // for true_type, integral_constant, etc.
114 
115 _START_GOOGLE_NAMESPACE_
116 
117 using STL_NAMESPACE::pair;
118 
119 // Hashtable class, used to implement the hashed associative containers
120 // hash_set and hash_map.
121 
122 // Value: what is stored in the table (each bucket is a Value).
123 // Key: something in a 1-to-1 correspondence to a Value, that can be used
124 // to search for a Value in the table (find() takes a Key).
125 // HashFcn: Takes a Key and returns an integer, the more unique the better.
126 // ExtractKey: given a Value, returns the unique Key associated with it.
127 // Must inherit from unary_function, or at least have a
128 // result_type enum indicating the return type of operator().
129 // SetKey: given a Value* and a Key, modifies the value such that
130 // ExtractKey(value) == key. We guarantee this is only called
131 // with key == deleted_key or key == empty_key.
132 // EqualKey: Given two Keys, says whether they are the same (that is,
133 // if they are both associated with the same Value).
134 // Alloc: STL allocator to use to allocate memory.
135 
136 template <class Value, class Key, class HashFcn,
137  class ExtractKey, class SetKey, class EqualKey, class Alloc>
138 class dense_hashtable;
139 
140 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
141 struct dense_hashtable_iterator;
142 
143 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
144 struct dense_hashtable_const_iterator;
145 
146 // We're just an array, but we need to skip over empty and deleted elements
147 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
148 struct dense_hashtable_iterator {
149  private:
150  typedef typename A::template rebind<V>::other value_alloc_type;
151 
152  public:
153  typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
154  typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
155 
156  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
157  typedef V value_type;
158  typedef typename value_alloc_type::difference_type difference_type;
159  typedef typename value_alloc_type::size_type size_type;
160  typedef typename value_alloc_type::reference reference;
161  typedef typename value_alloc_type::pointer pointer;
162 
163  // "Real" constructor and default constructor
164  dense_hashtable_iterator(const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
165  pointer it, pointer it_end, bool advance)
166  : ht(h), pos(it), end(it_end) {
167  if (advance) advance_past_empty_and_deleted();
168  }
169  dense_hashtable_iterator() { }
170  // The default destructor is fine; we don't define one
171  // The default operator= is fine; we don't define one
172 
173  // Happy dereferencer
174  reference operator*() const { return *pos; }
175  pointer operator->() const { return &(operator*()); }
176 
177  // Arithmetic. The only hard part is making sure that
178  // we're not on an empty or marked-deleted array element
179  void advance_past_empty_and_deleted() {
180  while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
181  ++pos;
182  }
183  iterator& operator++() {
184  assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
185  }
186  iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
187 
188  // Comparison.
189  bool operator==(const iterator& it) const { return pos == it.pos; }
190  bool operator!=(const iterator& it) const { return pos != it.pos; }
191 
192 
193  // The actual data
194  const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht;
195  pointer pos, end;
196 };
197 
198 
199 // Now do it all again, but with const-ness!
200 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
201 struct dense_hashtable_const_iterator {
202  private:
203  typedef typename A::template rebind<V>::other value_alloc_type;
204 
205  public:
206  typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
207  typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
208 
209  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
210  typedef V value_type;
211  typedef typename value_alloc_type::difference_type difference_type;
212  typedef typename value_alloc_type::size_type size_type;
213  typedef typename value_alloc_type::const_reference reference;
214  typedef typename value_alloc_type::const_pointer pointer;
215 
216  // "Real" constructor and default constructor
217  dense_hashtable_const_iterator(
218  const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
219  pointer it, pointer it_end, bool advance)
220  : ht(h), pos(it), end(it_end) {
221  if (advance) advance_past_empty_and_deleted();
222  }
223  dense_hashtable_const_iterator()
224  : ht(NULL), pos(pointer()), end(pointer()) { }
225  // This lets us convert regular iterators to const iterators
226  dense_hashtable_const_iterator(const iterator &it)
227  : ht(it.ht), pos(it.pos), end(it.end) { }
228  // The default destructor is fine; we don't define one
229  // The default operator= is fine; we don't define one
230 
231  // Happy dereferencer
232  reference operator*() const { return *pos; }
233  pointer operator->() const { return &(operator*()); }
234 
235  // Arithmetic. The only hard part is making sure that
236  // we're not on an empty or marked-deleted array element
237  void advance_past_empty_and_deleted() {
238  while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
239  ++pos;
240  }
241  const_iterator& operator++() {
242  assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
243  }
244  const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; }
245 
246  // Comparison.
247  bool operator==(const const_iterator& it) const { return pos == it.pos; }
248  bool operator!=(const const_iterator& it) const { return pos != it.pos; }
249 
250 
251  // The actual data
252  const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht;
253  pointer pos, end;
254 };
255 
256 template <class Value, class Key, class HashFcn,
257  class ExtractKey, class SetKey, class EqualKey, class Alloc>
258 class dense_hashtable {
259  private:
260  typedef typename Alloc::template rebind<Value>::other value_alloc_type;
261 
262  public:
263  typedef Key key_type;
264  typedef Value value_type;
265  typedef HashFcn hasher;
266  typedef EqualKey key_equal;
267  typedef Alloc allocator_type;
268 
269  typedef typename value_alloc_type::size_type size_type;
270  typedef typename value_alloc_type::difference_type difference_type;
271  typedef typename value_alloc_type::reference reference;
272  typedef typename value_alloc_type::const_reference const_reference;
273  typedef typename value_alloc_type::pointer pointer;
274  typedef typename value_alloc_type::const_pointer const_pointer;
275  typedef dense_hashtable_iterator<Value, Key, HashFcn,
276  ExtractKey, SetKey, EqualKey, Alloc>
277  iterator;
278 
279  typedef dense_hashtable_const_iterator<Value, Key, HashFcn,
280  ExtractKey, SetKey, EqualKey, Alloc>
281  const_iterator;
282 
283  // These come from tr1. For us they're the same as regular iterators.
284  typedef iterator local_iterator;
285  typedef const_iterator const_local_iterator;
286 
287  // How full we let the table get before we resize, by default.
288  // Knuth says .8 is good -- higher causes us to probe too much,
289  // though it saves memory.
290  static const int HT_OCCUPANCY_PCT; // = 50 (out of 100)
291 
292  // How empty we let the table get before we resize lower, by default.
293  // (0.0 means never resize lower.)
294  // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
295  static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
296 
297  // Minimum size we're willing to let hashtables be.
298  // Must be a power of two, and at least 4.
299  // Note, however, that for a given hashtable, the initial size is a
300  // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
301  static const size_type HT_MIN_BUCKETS = 4;
302 
303  // By default, if you don't specify a hashtable size at
304  // construction-time, we use this size. Must be a power of two, and
305  // at least HT_MIN_BUCKETS.
306  static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
307 
308  // ITERATOR FUNCTIONS
309  iterator begin() { return iterator(this, table,
310  table + num_buckets, true); }
311  iterator end() { return iterator(this, table + num_buckets,
312  table + num_buckets, true); }
313  const_iterator begin() const { return const_iterator(this, table,
314  table+num_buckets,true);}
315  const_iterator end() const { return const_iterator(this, table + num_buckets,
316  table+num_buckets,true);}
317 
318  // These come from tr1 unordered_map. They iterate over 'bucket' n.
319  // We'll just consider bucket n to be the n-th element of the table.
320  local_iterator begin(size_type i) {
321  return local_iterator(this, table + i, table + i+1, false);
322  }
323  local_iterator end(size_type i) {
324  local_iterator it = begin(i);
325  if (!test_empty(i) && !test_deleted(i))
326  ++it;
327  return it;
328  }
329  const_local_iterator begin(size_type i) const {
330  return const_local_iterator(this, table + i, table + i+1, false);
331  }
332  const_local_iterator end(size_type i) const {
333  const_local_iterator it = begin(i);
334  if (!test_empty(i) && !test_deleted(i))
335  ++it;
336  return it;
337  }
338 
339  // ACCESSOR FUNCTIONS for the things we templatize on, basically
340  hasher hash_funct() const { return settings; }
341  key_equal key_eq() const { return key_info; }
342  allocator_type get_allocator() const {
343  return allocator_type(val_info);
344  }
345 
346  // Accessor function for statistics gathering.
347  int num_table_copies() const { return settings.num_ht_copies(); }
348 
349  private:
350  // Annoyingly, we can't copy values around, because they might have
351  // const components (they're probably pair<const X, Y>). We use
352  // explicit destructor invocation and placement new to get around
353  // this. Arg.
354  void set_value(pointer dst, const_reference src) {
355  dst->~value_type(); // delete the old value, if any
356  new(dst) value_type(src);
357  }
358 
359  void destroy_buckets(size_type first, size_type last) {
360  for ( ; first != last; ++first)
361  table[first].~value_type();
362  }
363 
364  // DELETE HELPER FUNCTIONS
365  // This lets the user describe a key that will indicate deleted
366  // table entries. This key should be an "impossible" entry --
367  // if you try to insert it for real, you won't be able to retrieve it!
368  // (NB: while you pass in an entire value, only the key part is looked
369  // at. This is just because I don't know how to assign just a key.)
370  private:
371  void squash_deleted() { // gets rid of any deleted entries we have
372  if ( num_deleted ) { // get rid of deleted before writing
373  dense_hashtable tmp(*this); // copying will get rid of deleted
374  swap(tmp); // now we are tmp
375  }
376  assert(num_deleted == 0);
377  }
378 
379  bool test_deleted_key(const key_type& key) const {
380  // The num_deleted test is crucial for read(): after read(), the ht values
381  // are garbage, and we don't want to think some of them are deleted.
382  // Invariant: !use_deleted implies num_deleted is 0.
383  assert(settings.use_deleted() || num_deleted == 0);
384  return num_deleted > 0 && equals(key_info.delkey, key);
385  }
386 
387  public:
388  void set_deleted_key(const key_type &key) {
389  // the empty indicator (if specified) and the deleted indicator
390  // must be different
391  assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval)))
392  && "Passed the empty-key to set_deleted_key");
393  // It's only safe to change what "deleted" means if we purge deleted guys
394  squash_deleted();
395  settings.set_use_deleted(true);
396  key_info.delkey = key;
397  }
398  void clear_deleted_key() {
399  squash_deleted();
400  settings.set_use_deleted(false);
401  }
402  key_type deleted_key() const {
403  assert(settings.use_deleted()
404  && "Must set deleted key before calling deleted_key");
405  return key_info.delkey;
406  }
407 
408  // These are public so the iterators can use them
409  // True if the item at position bucknum is "deleted" marker
410  bool test_deleted(size_type bucknum) const {
411  return test_deleted_key(get_key(table[bucknum]));
412  }
413  bool test_deleted(const iterator &it) const {
414  return test_deleted_key(get_key(*it));
415  }
416  bool test_deleted(const const_iterator &it) const {
417  return test_deleted_key(get_key(*it));
418  }
419 
420  private:
421  // Set it so test_deleted is true. true if object didn't used to be deleted.
422  bool set_deleted(iterator &it) {
423  assert(settings.use_deleted());
424  bool retval = !test_deleted(it);
425  // &* converts from iterator to value-type.
426  set_key(&(*it), key_info.delkey);
427  return retval;
428  }
429  // Set it so test_deleted is false. true if object used to be deleted.
430  bool clear_deleted(iterator &it) {
431  assert(settings.use_deleted());
432  // Happens automatically when we assign something else in its place.
433  return test_deleted(it);
434  }
435 
436  // We also allow to set/clear the deleted bit on a const iterator.
437  // We allow a const_iterator for the same reason you can delete a
438  // const pointer: it's convenient, and semantically you can't use
439  // 'it' after it's been deleted anyway, so its const-ness doesn't
440  // really matter.
441  bool set_deleted(const_iterator &it) {
442  assert(settings.use_deleted());
443  bool retval = !test_deleted(it);
444  set_key(const_cast<pointer>(&(*it)), key_info.delkey);
445  return retval;
446  }
447  // Set it so test_deleted is false. true if object used to be deleted.
448  bool clear_deleted(const_iterator &it) {
449  assert(settings.use_deleted());
450  return test_deleted(it);
451  }
452 
453  // EMPTY HELPER FUNCTIONS
454  // This lets the user describe a key that will indicate empty (unused)
455  // table entries. This key should be an "impossible" entry --
456  // if you try to insert it for real, you won't be able to retrieve it!
457  // (NB: while you pass in an entire value, only the key part is looked
458  // at. This is just because I don't know how to assign just a key.)
459  public:
460  // These are public so the iterators can use them
461  // True if the item at position bucknum is "empty" marker
462  bool test_empty(size_type bucknum) const {
463  assert(settings.use_empty()); // we always need to know what's empty!
464  return equals(get_key(val_info.emptyval), get_key(table[bucknum]));
465  }
466  bool test_empty(const iterator &it) const {
467  assert(settings.use_empty()); // we always need to know what's empty!
468  return equals(get_key(val_info.emptyval), get_key(*it));
469  }
470  bool test_empty(const const_iterator &it) const {
471  assert(settings.use_empty()); // we always need to know what's empty!
472  return equals(get_key(val_info.emptyval), get_key(*it));
473  }
474 
475  private:
476  void fill_range_with_empty(pointer table_start, pointer table_end) {
477  STL_NAMESPACE::uninitialized_fill(table_start, table_end, val_info.emptyval);
478  }
479 
480  public:
481  // TODO(csilvers): change all callers of this to pass in a key instead,
482  // and take a const key_type instead of const value_type.
483  void set_empty_key(const_reference val) {
484  // Once you set the empty key, you can't change it
485  assert(!settings.use_empty() && "Calling set_empty_key multiple times");
486  // The deleted indicator (if specified) and the empty indicator
487  // must be different.
488  assert((!settings.use_deleted() || !equals(get_key(val), key_info.delkey))
489  && "Setting the empty key the same as the deleted key");
490  settings.set_use_empty(true);
491  set_value(&val_info.emptyval, val);
492 
493  assert(!table); // must set before first use
494  // num_buckets was set in constructor even though table was NULL
495  table = val_info.allocate(num_buckets);
496  assert(table);
497  fill_range_with_empty(table, table + num_buckets);
498  }
499  // TODO(sjackman): return a key_type rather than a value_type
500  value_type empty_key() const {
501  assert(settings.use_empty());
502  return val_info.emptyval;
503  }
504 
505  // FUNCTIONS CONCERNING SIZE
506  public:
507  size_type size() const { return num_elements - num_deleted; }
508  size_type max_size() const { return val_info.max_size(); }
509  bool empty() const { return size() == 0; }
510  size_type bucket_count() const { return num_buckets; }
511  size_type max_bucket_count() const { return max_size(); }
512  size_type nonempty_bucket_count() const { return num_elements; }
513  // These are tr1 methods. Their idea of 'bucket' doesn't map well to
514  // what we do. We just say every bucket has 0 or 1 items in it.
515  size_type bucket_size(size_type i) const {
516  return begin(i) == end(i) ? 0 : 1;
517  }
518 
519  private:
520  // Because of the above, size_type(-1) is never legal; use it for errors
521  static const size_type ILLEGAL_BUCKET = size_type(-1);
522 
523  // Used after a string of deletes. Returns true if we actually shrunk.
524  // TODO(csilvers): take a delta so we can take into account inserts
525  // done after shrinking. Maybe make part of the Settings class?
526  bool maybe_shrink() {
527  assert(num_elements >= num_deleted);
528  assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
529  assert(bucket_count() >= HT_MIN_BUCKETS);
530  bool retval = false;
531 
532  // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
533  // we'll never shrink until you get relatively big, and we'll never
534  // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
535  // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
536  // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
537  const size_type num_remain = num_elements - num_deleted;
538  const size_type shrink_threshold = settings.shrink_threshold();
539  if (shrink_threshold > 0 && num_remain < shrink_threshold &&
540  bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
541  const float shrink_factor = settings.shrink_factor();
542  size_type sz = bucket_count() / 2; // find how much we should shrink
543  while (sz > HT_DEFAULT_STARTING_BUCKETS &&
544  num_remain < sz * shrink_factor) {
545  sz /= 2; // stay a power of 2
546  }
547  dense_hashtable tmp(*this, sz); // Do the actual resizing
548  swap(tmp); // now we are tmp
549  retval = true;
550  }
551  settings.set_consider_shrink(false); // because we just considered it
552  return retval;
553  }
554 
555  // We'll let you resize a hashtable -- though this makes us copy all!
556  // When you resize, you say, "make it big enough for this many more elements"
557  // Returns true if we actually resized, false if size was already ok.
558  bool resize_delta(size_type delta) {
559  bool did_resize = false;
560  if ( settings.consider_shrink() ) { // see if lots of deletes happened
561  if ( maybe_shrink() )
562  did_resize = true;
563  }
564  if (num_elements >= (STL_NAMESPACE::numeric_limits<size_type>::max)() - delta)
565  throw std::length_error("resize overflow");
566  if ( bucket_count() >= HT_MIN_BUCKETS &&
567  (num_elements + delta) <= settings.enlarge_threshold() )
568  return did_resize; // we're ok as we are
569 
570  // Sometimes, we need to resize just to get rid of all the
571  // "deleted" buckets that are clogging up the hashtable. So when
572  // deciding whether to resize, count the deleted buckets (which
573  // are currently taking up room). But later, when we decide what
574  // size to resize to, *don't* count deleted buckets, since they
575  // get discarded during the resize.
576  const size_type needed_size = settings.min_buckets(num_elements + delta, 0);
577  if ( needed_size <= bucket_count() ) // we have enough buckets
578  return did_resize;
579 
580  size_type resize_to =
581  settings.min_buckets(num_elements - num_deleted + delta, bucket_count());
582 
583  if (resize_to < needed_size && // may double resize_to
584  resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) {
585  // This situation means that we have enough deleted elements,
586  // that once we purge them, we won't actually have needed to
587  // grow. But we may want to grow anyway: if we just purge one
588  // element, say, we'll have to grow anyway next time we
589  // insert. Might as well grow now, since we're already going
590  // through the trouble of copying (in order to purge the
591  // deleted elements).
592  const size_type target =
593  static_cast<size_type>(settings.shrink_size(resize_to*2));
594  if (num_elements - num_deleted + delta >= target) {
595  // Good, we won't be below the shrink threshhold even if we double.
596  resize_to *= 2;
597  }
598  }
599  dense_hashtable tmp(*this, resize_to);
600  swap(tmp); // now we are tmp
601  return true;
602  }
603 
604  // We require table be not-NULL and empty before calling this.
605  void resize_table(size_type /*old_size*/, size_type new_size,
606  true_type) {
607  table = val_info.realloc_or_die(table, new_size);
608  }
609 
610  void resize_table(size_type old_size, size_type new_size, false_type) {
611  val_info.deallocate(table, old_size);
612  table = val_info.allocate(new_size);
613  }
614 
615  // Used to actually do the rehashing when we grow/shrink a hashtable
616  void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted) {
617  clear_to_size(settings.min_buckets(ht.size(), min_buckets_wanted));
618 
619  // We use a normal iterator to get non-deleted bcks from ht
620  // We could use insert() here, but since we know there are
621  // no duplicates and no deleted items, we can be more efficient
622  assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
623  for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
624  size_type num_probes = 0; // how many times we've probed
625  size_type bucknum;
626  const size_type bucket_count_minus_one = bucket_count() - 1;
627  for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
628  !test_empty(bucknum); // not empty
629  bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
630  ++num_probes;
631  assert(num_probes < bucket_count()
632  && "Hashtable is full: an error in key_equal<> or hash<>");
633  }
634  set_value(&table[bucknum], *it); // copies the value to here
635  num_elements++;
636  }
637  settings.inc_num_ht_copies();
638  }
639 
640  // Required by the spec for hashed associative container
641  public:
642  // Though the docs say this should be num_buckets, I think it's much
643  // more useful as num_elements. As a special feature, calling with
644  // req_elements==0 will cause us to shrink if we can, saving space.
645  void resize(size_type req_elements) { // resize to this or larger
646  if ( settings.consider_shrink() || req_elements == 0 )
647  maybe_shrink();
648  if ( req_elements > num_elements )
649  resize_delta(req_elements - num_elements);
650  }
651 
652  // Get and change the value of shrink_factor and enlarge_factor. The
653  // description at the beginning of this file explains how to choose
654  // the values. Setting the shrink parameter to 0.0 ensures that the
655  // table never shrinks.
656  void get_resizing_parameters(float* shrink, float* grow) const {
657  *shrink = settings.shrink_factor();
658  *grow = settings.enlarge_factor();
659  }
660  void set_resizing_parameters(float shrink, float grow) {
661  settings.set_resizing_parameters(shrink, grow);
662  settings.reset_thresholds(bucket_count());
663  }
664 
665  // CONSTRUCTORS -- as required by the specs, we take a size,
666  // but also let you specify a hashfunction, key comparator,
667  // and key extractor. We also define a copy constructor and =.
668  // DESTRUCTOR -- needs to free the table
669  explicit dense_hashtable(size_type expected_max_items_in_table = 0,
670  const HashFcn& hf = HashFcn(),
671  const EqualKey& eql = EqualKey(),
672  const ExtractKey& ext = ExtractKey(),
673  const SetKey& set = SetKey(),
674  const Alloc& alloc = Alloc())
675  : settings(hf),
676  key_info(ext, set, eql),
677  num_deleted(0),
678  num_elements(0),
679  num_buckets(expected_max_items_in_table == 0
680  ? HT_DEFAULT_STARTING_BUCKETS
681  : settings.min_buckets(expected_max_items_in_table, 0)),
682  val_info(alloc_impl<value_alloc_type>(alloc)),
683  table(NULL) {
684  // table is NULL until emptyval is set. However, we set num_buckets
685  // here so we know how much space to allocate once emptyval is set
686  settings.reset_thresholds(bucket_count());
687  }
688 
689  // As a convenience for resize(), we allow an optional second argument
690  // which lets you make this new hashtable a different size than ht
691  dense_hashtable(const dense_hashtable& ht,
692  size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
693  : settings(ht.settings),
694  key_info(ht.key_info),
695  num_deleted(0),
696  num_elements(0),
697  num_buckets(0),
698  val_info(ht.val_info),
699  table(NULL) {
700  if (!ht.settings.use_empty()) {
701  // If use_empty isn't set, copy_from will crash, so we do our own copying.
702  assert(ht.empty());
703  num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted);
704  settings.reset_thresholds(bucket_count());
705  return;
706  }
707  settings.reset_thresholds(bucket_count());
708  copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
709  }
710 
711  dense_hashtable& operator= (const dense_hashtable& ht) {
712  if (&ht == this) return *this; // don't copy onto ourselves
713  if (!ht.settings.use_empty()) {
714  assert(ht.empty());
715  dense_hashtable empty_table(ht); // empty table with ht's thresholds
716  this->swap(empty_table);
717  return *this;
718  }
719  settings = ht.settings;
720  key_info = ht.key_info;
721  set_value(&val_info.emptyval, ht.val_info.emptyval);
722  // copy_from() calls clear and sets num_deleted to 0 too
723  copy_from(ht, HT_MIN_BUCKETS);
724  // we purposefully don't copy the allocator, which may not be copyable
725  return *this;
726  }
727 
728  ~dense_hashtable() {
729  if (table) {
730  destroy_buckets(0, num_buckets);
731  val_info.deallocate(table, num_buckets);
732  }
733  }
734 
735  // Many STL algorithms use swap instead of copy constructors
736  void swap(dense_hashtable& ht) {
737  STL_NAMESPACE::swap(settings, ht.settings);
738  STL_NAMESPACE::swap(key_info, ht.key_info);
739  STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
740  STL_NAMESPACE::swap(num_elements, ht.num_elements);
741  STL_NAMESPACE::swap(num_buckets, ht.num_buckets);
742  { value_type tmp; // for annoying reasons, swap() doesn't work
743  set_value(&tmp, val_info.emptyval);
744  set_value(&val_info.emptyval, ht.val_info.emptyval);
745  set_value(&ht.val_info.emptyval, tmp);
746  }
747  STL_NAMESPACE::swap(table, ht.table);
748  settings.reset_thresholds(bucket_count()); // this also resets consider_shrink
749  ht.settings.reset_thresholds(bucket_count());
750  // we purposefully don't swap the allocator, which may not be swap-able
751  }
752 
753  private:
754  void clear_to_size(size_type new_num_buckets) {
755  if (!table) {
756  table = val_info.allocate(new_num_buckets);
757  } else {
758  destroy_buckets(0, num_buckets);
759  if (new_num_buckets != num_buckets) { // resize, if necessary
760  typedef integral_constant<bool,
761  is_same<value_alloc_type,
762  libc_allocator_with_realloc<value_type> >::value>
763  realloc_ok;
764  resize_table(num_buckets, new_num_buckets, realloc_ok());
765  }
766  }
767  assert(table);
768  fill_range_with_empty(table, table + new_num_buckets);
769  num_elements = 0;
770  num_deleted = 0;
771  num_buckets = new_num_buckets; // our new size
772  settings.reset_thresholds(bucket_count());
773  }
774 
775  public:
776  // It's always nice to be able to clear a table without deallocating it
777  void clear() {
778  // If the table is already empty, and the number of buckets is
779  // already as we desire, there's nothing to do.
780  const size_type new_num_buckets = settings.min_buckets(0, 0);
781  if (num_elements == 0 && new_num_buckets == num_buckets) {
782  return;
783  }
784  clear_to_size(new_num_buckets);
785  }
786 
787  // Clear the table without resizing it.
788  // Mimicks the stl_hashtable's behaviour when clear()-ing in that it
789  // does not modify the bucket count
790  void clear_no_resize() {
791  if (num_elements > 0) {
792  assert(table);
793  destroy_buckets(0, num_buckets);
794  fill_range_with_empty(table, table + num_buckets);
795  }
796  // don't consider to shrink before another erase()
797  settings.reset_thresholds(bucket_count());
798  num_elements = 0;
799  num_deleted = 0;
800  }
801 
802  // LOOKUP ROUTINES
803  private:
804  // Returns a pair of positions: 1st where the object is, 2nd where
805  // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
806  // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
807  // Note: because of deletions where-to-insert is not trivial: it's the
808  // first deleted bucket we see, as long as we don't find the key later
809  pair<size_type, size_type> find_position(const key_type &key) const {
810  size_type num_probes = 0; // how many times we've probed
811  const size_type bucket_count_minus_one = bucket_count() - 1;
812  size_type bucknum = hash(key) & bucket_count_minus_one;
813  size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
814  while ( 1 ) { // probe until something happens
815  if ( test_empty(bucknum) ) { // bucket is empty
816  if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert
817  return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
818  else
819  return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
820 
821  } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
822  if ( insert_pos == ILLEGAL_BUCKET )
823  insert_pos = bucknum;
824 
825  } else if ( equals(key, get_key(table[bucknum])) ) {
826  return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
827  }
828  ++num_probes; // we're doing another probe
829  bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
830  assert(num_probes < bucket_count()
831  && "Hashtable is full: an error in key_equal<> or hash<>");
832  }
833  }
834 
835  public:
836  iterator find(const key_type& key) {
837  if ( size() == 0 ) return end();
838  pair<size_type, size_type> pos = find_position(key);
839  if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
840  return end();
841  else
842  return iterator(this, table + pos.first, table + num_buckets, false);
843  }
844 
845  const_iterator find(const key_type& key) const {
846  if ( size() == 0 ) return end();
847  pair<size_type, size_type> pos = find_position(key);
848  if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
849  return end();
850  else
851  return const_iterator(this, table + pos.first, table+num_buckets, false);
852  }
853 
854  // This is a tr1 method: the bucket a given key is in, or what bucket
855  // it would be put in, if it were to be inserted. Shrug.
856  size_type bucket(const key_type& key) const {
857  pair<size_type, size_type> pos = find_position(key);
858  return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first;
859  }
860 
861  // Counts how many elements have key key. For maps, it's either 0 or 1.
862  size_type count(const key_type &key) const {
863  pair<size_type, size_type> pos = find_position(key);
864  return pos.first == ILLEGAL_BUCKET ? 0 : 1;
865  }
866 
867  // Likewise, equal_range doesn't really make sense for us. Oh well.
868  pair<iterator,iterator> equal_range(const key_type& key) {
869  iterator pos = find(key); // either an iterator or end
870  if (pos == end()) {
871  return pair<iterator,iterator>(pos, pos);
872  } else {
873  const iterator startpos = pos++;
874  return pair<iterator,iterator>(startpos, pos);
875  }
876  }
877  pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
878  const_iterator pos = find(key); // either an iterator or end
879  if (pos == end()) {
880  return pair<const_iterator,const_iterator>(pos, pos);
881  } else {
882  const const_iterator startpos = pos++;
883  return pair<const_iterator,const_iterator>(startpos, pos);
884  }
885  }
886 
887 
888  // INSERTION ROUTINES
889  private:
890  // Private method used by insert_noresize and find_or_insert.
891  iterator insert_at(const_reference obj, size_type pos) {
892  if (size() >= max_size())
893  throw std::length_error("insert overflow");
894  if ( test_deleted(pos) ) { // just replace if it's been del.
895  // shrug: shouldn't need to be const.
896  const_iterator delpos(this, table + pos, table + num_buckets, false);
897  clear_deleted(delpos);
898  assert( num_deleted > 0);
899  --num_deleted; // used to be, now it isn't
900  } else {
901  ++num_elements; // replacing an empty bucket
902  }
903  set_value(&table[pos], obj);
904  return iterator(this, table + pos, table + num_buckets, false);
905  }
906 
907  // If you know *this is big enough to hold obj, use this routine
908  pair<iterator, bool> insert_noresize(const_reference obj) {
909  // First, double-check we're not inserting delkey or emptyval
910  assert((!settings.use_empty() || !equals(get_key(obj),
911  get_key(val_info.emptyval)))
912  && "Inserting the empty key");
913  assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey))
914  && "Inserting the deleted key");
915  const pair<size_type,size_type> pos = find_position(get_key(obj));
916  if ( pos.first != ILLEGAL_BUCKET) { // object was already there
917  return pair<iterator,bool>(iterator(this, table + pos.first,
918  table + num_buckets, false),
919  false); // false: we didn't insert
920  } else { // pos.second says where to put it
921  return pair<iterator,bool>(insert_at(obj, pos.second), true);
922  }
923  }
924 
925  // Specializations of insert(it, it) depending on the power of the iterator:
926  // (1) Iterator supports operator-, resize before inserting
927  template <class ForwardIterator>
928  void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) {
929  size_t dist = STL_NAMESPACE::distance(f, l);
930  if (dist >= (std::numeric_limits<size_type>::max)())
931  throw std::length_error("insert-range overflow");
932  resize_delta(static_cast<size_type>(dist));
933  for ( ; dist > 0; --dist, ++f) {
934  insert_noresize(*f);
935  }
936  }
937 
938  // (2) Arbitrary iterator, can't tell how much to resize
939  template <class InputIterator>
940  void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) {
941  for ( ; f != l; ++f)
942  insert(*f);
943  }
944 
945  public:
946  // This is the normal insert routine, used by the outside world
947  pair<iterator, bool> insert(const_reference obj) {
948  resize_delta(1); // adding an object, grow if need be
949  return insert_noresize(obj);
950  }
951 
952  // When inserting a lot at a time, we specialize on the type of iterator
953  template <class InputIterator>
954  void insert(InputIterator f, InputIterator l) {
955  // specializes on iterator type
956  insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
957  }
958 
959  // DefaultValue is a functor that takes a key and returns a value_type
960  // representing the default value to be inserted if none is found.
961  template <class DefaultValue>
962  value_type& find_or_insert(const key_type& key) {
963  // First, double-check we're not inserting emptykey or delkey
964  assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval)))
965  && "Inserting the empty key");
966  assert((!settings.use_deleted() || !equals(key, key_info.delkey))
967  && "Inserting the deleted key");
968  const pair<size_type,size_type> pos = find_position(key);
969  DefaultValue default_value;
970  if ( pos.first != ILLEGAL_BUCKET) { // object was already there
971  return table[pos.first];
972  } else if (resize_delta(1)) { // needed to rehash to make room
973  // Since we resized, we can't use pos, so recalculate where to insert.
974  return *insert_noresize(default_value(key)).first;
975  } else { // no need to rehash, insert right here
976  return *insert_at(default_value(key), pos.second);
977  }
978  }
979 
980  // DELETION ROUTINES
981  size_type erase(const key_type& key) {
982  // First, double-check we're not trying to erase delkey or emptyval.
983  assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval)))
984  && "Erasing the empty key");
985  assert((!settings.use_deleted() || !equals(key, key_info.delkey))
986  && "Erasing the deleted key");
987  const_iterator pos = find(key); // shrug: shouldn't need to be const
988  if ( pos != end() ) {
989  assert(!test_deleted(pos)); // or find() shouldn't have returned it
990  set_deleted(pos);
991  ++num_deleted;
992  settings.set_consider_shrink(true); // will think about shrink after next insert
993  return 1; // because we deleted one thing
994  } else {
995  return 0; // because we deleted nothing
996  }
997  }
998 
999  // We return the iterator past the deleted item.
1000  void erase(iterator pos) {
1001  if ( pos == end() ) return; // sanity check
1002  if ( set_deleted(pos) ) { // true if object has been newly deleted
1003  ++num_deleted;
1004  settings.set_consider_shrink(true); // will think about shrink after next insert
1005  }
1006  }
1007 
1008  void erase(iterator f, iterator l) {
1009  for ( ; f != l; ++f) {
1010  if ( set_deleted(f) ) // should always be true
1011  ++num_deleted;
1012  }
1013  settings.set_consider_shrink(true); // will think about shrink after next insert
1014  }
1015 
1016  // We allow you to erase a const_iterator just like we allow you to
1017  // erase an iterator. This is in parallel to 'delete': you can delete
1018  // a const pointer just like a non-const pointer. The logic is that
1019  // you can't use the object after it's erased anyway, so it doesn't matter
1020  // if it's const or not.
1021  void erase(const_iterator pos) {
1022  if ( pos == end() ) return; // sanity check
1023  if ( set_deleted(pos) ) { // true if object has been newly deleted
1024  ++num_deleted;
1025  settings.set_consider_shrink(true); // will think about shrink after next insert
1026  }
1027  }
1028  void erase(const_iterator f, const_iterator l) {
1029  for ( ; f != l; ++f) {
1030  if ( set_deleted(f) ) // should always be true
1031  ++num_deleted;
1032  }
1033  settings.set_consider_shrink(true); // will think about shrink after next insert
1034  }
1035 
1036 
1037  // COMPARISON
1038  bool operator==(const dense_hashtable& ht) const {
1039  if (size() != ht.size()) {
1040  return false;
1041  } else if (this == &ht) {
1042  return true;
1043  } else {
1044  // Iterate through the elements in "this" and see if the
1045  // corresponding element is in ht
1046  for ( const_iterator it = begin(); it != end(); ++it ) {
1047  const_iterator it2 = ht.find(get_key(*it));
1048  if ((it2 == ht.end()) || (*it != *it2)) {
1049  return false;
1050  }
1051  }
1052  return true;
1053  }
1054  }
1055  bool operator!=(const dense_hashtable& ht) const {
1056  return !(*this == ht);
1057  }
1058 
1059 
1060  // I/O
1061  // We support reading and writing hashtables to disk. Alas, since
1062  // I don't know how to write a hasher or key_equal, you have to make
1063  // sure everything but the table is the same. We compact before writing
1064  //
1065  // NOTE: These functions are currently TODO. They've not been implemented.
1066  bool write_metadata(FILE * /*fp*/) {
1067  squash_deleted(); // so we don't have to worry about delkey
1068  return false; // TODO
1069  }
1070 
1071  bool read_metadata(FILE* /*fp*/) {
1072  num_deleted = 0; // since we got rid before writing
1073  assert(settings.use_empty() && "empty_key not set for read_metadata");
1074  if (table) val_info.deallocate(table, num_buckets); // we'll make our own
1075  // TODO: read magic number
1076  // TODO: read num_buckets
1077  settings.reset_thresholds(bucket_count());
1078  table = val_info.allocate(num_buckets);
1079  assert(table);
1080  fill_range_with_empty(table, table + num_buckets);
1081  // TODO: read num_elements
1082  for ( size_type i = 0; i < num_elements; ++i ) {
1083  // TODO: read bucket_num
1084  // TODO: set with non-empty, non-deleted value
1085  }
1086  return false; // TODO
1087  }
1088 
1089  // If your keys and values are simple enough, we can write them to
1090  // disk for you. "simple enough" means value_type is a POD type
1091  // that contains no pointers. However, we don't try to normalize
1092  // endianness
1093  bool write_nopointer_data(FILE *fp) const {
1094  for ( const_iterator it = begin(); it != end(); ++it ) {
1095  // TODO: skip empty/deleted values
1096  if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false;
1097  }
1098  return false;
1099  }
1100 
1101  // When reading, we have to override the potential const-ness of *it
1102  bool read_nopointer_data(FILE *fp) {
1103  for ( iterator it = begin(); it != end(); ++it ) {
1104  // TODO: skip empty/deleted values
1105  if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
1106  return false;
1107  }
1108  return false;
1109  }
1110 
1111  private:
1112  template <class A>
1113  class alloc_impl : public A {
1114  public:
1115  typedef typename A::pointer pointer;
1116  typedef typename A::size_type size_type;
1117 
1118  // Convert a normal allocator to one that has realloc_or_die()
1119  alloc_impl(const A& a) : A(a) { }
1120 
1121  // realloc_or_die should only be used when using the default
1122  // allocator (libc_allocator_with_realloc).
1123  pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) {
1124  fprintf(stderr, "realloc_or_die is only supported for "
1125  "libc_allocator_with_realloc");
1126  exit(1);
1127  return NULL;
1128  }
1129  };
1130 
1131  // A template specialization of alloc_impl for
1132  // libc_allocator_with_realloc that can handle realloc_or_die.
1133  template <class A>
1134  class alloc_impl<libc_allocator_with_realloc<A> >
1135  : public libc_allocator_with_realloc<A> {
1136  public:
1137  typedef typename libc_allocator_with_realloc<A>::pointer pointer;
1138  typedef typename libc_allocator_with_realloc<A>::size_type size_type;
1139 
1140  alloc_impl(const libc_allocator_with_realloc<A>& a)
1141  : libc_allocator_with_realloc<A>(a) { }
1142 
1143  pointer realloc_or_die(pointer ptr, size_type n) {
1144  pointer retval = this->reallocate(ptr, n);
1145  if (retval == NULL) {
1146  // We really should use PRIuS here, but I don't want to have to add
1147  // a whole new configure option, with concomitant macro namespace
1148  // pollution, just to print this (unlikely) error message. So I cast.
1149  fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
1150  "%lu elements for ptr %p",
1151  static_cast<unsigned long>(n), ptr);
1152  exit(1);
1153  }
1154  return retval;
1155  }
1156  };
1157 
1158  // Package allocator with emptyval to eliminate memory needed for
1159  // the zero-size allocator.
1160  // If new fields are added to this class, we should add them to
1161  // operator= and swap.
1162  class ValInfo : public alloc_impl<value_alloc_type> {
1163  public:
1164  typedef typename alloc_impl<value_alloc_type>::value_type value_type;
1165 
1166  ValInfo(const alloc_impl<value_alloc_type>& a)
1167  : alloc_impl<value_alloc_type>(a), emptyval() { }
1168  ValInfo(const ValInfo& v)
1169  : alloc_impl<value_alloc_type>(v), emptyval(v.emptyval) { }
1170 
1171  value_type emptyval; // which key marks unused entries
1172  };
1173 
1174 
1175  // Package functors with another class to eliminate memory needed for
1176  // zero-size functors. Since ExtractKey and hasher's operator() might
1177  // have the same function signature, they must be packaged in
1178  // different classes.
1179  struct Settings :
1180  sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> {
1181  explicit Settings(const hasher& hf)
1182  : sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>(
1183  hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
1184  };
1185 
1186  // Packages ExtractKey and SetKey functors.
1187  class KeyInfo : public ExtractKey, public SetKey, public key_equal {
1188  public:
1189  KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq)
1190  : ExtractKey(ek),
1191  SetKey(sk),
1192  key_equal(eq) {
1193  }
1194 
1195  // We want to return the exact same type as ExtractKey: Key or const Key&
1196  typename ExtractKey::result_type get_key(const_reference v) const {
1197  return ExtractKey::operator()(v);
1198  }
1199  void set_key(pointer v, const key_type& k) const {
1200  SetKey::operator()(v, k);
1201  }
1202  bool equals(const key_type& a, const key_type& b) const {
1203  return key_equal::operator()(a, b);
1204  }
1205 
1206  // Which key marks deleted entries.
1207  // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
1208  typename remove_const<key_type>::type delkey;
1209  };
1210 
1211  // Utility functions to access the templated operators
1212  size_type hash(const key_type& v) const {
1213  return settings.hash(v);
1214  }
1215  bool equals(const key_type& a, const key_type& b) const {
1216  return key_info.equals(a, b);
1217  }
1218  typename ExtractKey::result_type get_key(const_reference v) const {
1219  return key_info.get_key(v);
1220  }
1221  void set_key(pointer v, const key_type& k) const {
1222  key_info.set_key(v, k);
1223  }
1224 
1225  private:
1226  // Actual data
1227  Settings settings;
1228  KeyInfo key_info;
1229 
1230  size_type num_deleted; // how many occupied buckets are marked deleted
1231  size_type num_elements;
1232  size_type num_buckets;
1233  ValInfo val_info; // holds emptyval, and also the allocator
1234  pointer table;
1235 };
1236 
1237 
1238 // We need a global swap as well
1239 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1240 inline void swap(dense_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
1241  dense_hashtable<V,K,HF,ExK,SetK,EqK,A> &y) {
1242  x.swap(y);
1243 }
1244 
1245 #undef JUMP_
1246 
1247 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1248 const typename dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
1249  dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
1250 
1251 // How full we let the table get before we resize. Knuth says .8 is
1252 // good -- higher causes us to probe too much, though saves memory.
1253 // However, we go with .5, getting better performance at the cost of
1254 // more space (a trade-off densehashtable explicitly chooses to make).
1255 // Feel free to play around with different values, though.
1256 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1257 const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 50;
1258 
1259 // How empty we let the table get before we resize lower.
1260 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
1261 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
1262 const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
1263  = static_cast<int>(0.4 *
1264  dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
1265 
1266 _END_GOOGLE_NAMESPACE_
1267 
1268 #endif /* _DENSEHASHTABLE_H_ */
pair< ForwardIterator, ForwardIterator > equal_range(ForwardIterator first, ForwardIterator last, const T &value)
Part * find(const PartVector &parts, const std::string &name)
Find a part by name in a collection of parts.
Definition: Part.cpp:22
bool insert(PartVector &v, Part &part)
Insert a part into a properly ordered collection of parts. Returns true if this is a new insertion...
Definition: Part.cpp:85
eastl::iterator_traits< InputIterator >::difference_type count(InputIterator first, InputIterator last, const T &value)