1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 // File: vogl_rh_hash_map.h
30 // stl-like hash map/hash set, with predictable performance across platforms/compilers/C run-times libs/etc.
32 // Robin Hood hashing, open addressing, hashtable contains pointers to keys/values, backshifting on erasures.
33 // No support for items with duplicate keys (use vogl::map instead).
36 #include "vogl_core.h"
37 #include "vogl_hash.h"
38 #include "vogl_data_stream_serializer.h"
39 #include "vogl_object_pool.h"
55 void swap(heap_allocator &other)
57 VOGL_NOTE_UNUSED(other);
60 bool is_valid_ptr(void *p) const
62 return (p != NULL) && (reinterpret_cast<uintptr_t>(p) & (VOGL_MIN_ALLOC_ALIGNMENT - 1)) == 0;
65 T *alloc_no_construction()
67 return static_cast<T *>(vogl_malloc(sizeof(T)));
70 void destroy_no_destruction(void *p)
79 object_pool<T> m_pool;
91 void swap(pool_allocator &other)
93 m_pool.swap(other.m_pool);
96 bool is_valid_ptr(const T *p) const
98 return m_pool.is_valid_ptr(p);
101 T *alloc_no_construction()
103 return m_pool.alloc_no_construction();
106 void destroy_no_destruction(T *p)
108 m_pool.destroy_no_destruction(p);
112 // See vogl_types.h for the default hash function and more alternatives.
114 // With default template options the type should define operator size_t() (for hashing) and operator== (for equality).
115 // The Key and Value objects are stored contiguously in the hash table, and will move on rehashing.
116 // Iterators are invalidated on rehashing or erasing.
117 // The Hasher and Equals objects must be bitwise movable (i.e. using memcpy).
118 template <typename Key, typename Value = empty_type, typename Hasher = hasher<Key>, typename Equals = equal_to<Key>, typename Allocator = pool_allocator<std::pair<Key, Value> > >
121 friend class iterator;
122 friend class const_iterator;
130 typedef rh_hash_map<Key, Value, Hasher, Equals> rh_hash_map_type;
131 typedef std::pair<Key, Value> value_type;
132 typedef Key key_type;
133 typedef Value referent_type;
134 typedef Hasher hasher_type;
135 typedef Equals equals_type;
136 typedef Allocator allocator_type;
145 rh_hash_map(const rh_hash_map &other)
153 rh_hash_map &operator=(const rh_hash_map &other)
160 m_values.resize(other.m_values.size());
162 m_hash_shift = other.m_hash_shift;
163 m_grow_threshold = other.m_grow_threshold;
164 m_num_valid = other.m_num_valid;
165 m_hasher = other.m_hasher;
166 m_equals = other.m_equals;
168 const hash_entry *pSrc_entry = other.m_values.get_ptr();
170 hash_entry *pDst_entry = m_values.get_ptr();
171 hash_entry *pDst_entry_end = m_values.end();
173 while (pDst_entry != pDst_entry_end)
175 if (pSrc_entry->m_pValue)
177 pDst_entry->m_pValue = construct_value_type(pSrc_entry->m_pValue->first, pSrc_entry->m_pValue->second);
178 pDst_entry->m_hash = pSrc_entry->m_hash;
182 memset(pDst_entry, 0, sizeof(hash_entry));
192 inline ~rh_hash_map()
197 const Equals &get_equals() const
206 void set_equals(const Equals &equals)
211 const Hasher &get_hasher() const
220 void set_hasher(const Hasher &hasher)
225 const allocator_type &get_allocator() const
229 allocator_type &get_allocator()
234 void set_allocator(const allocator_type &alloc)
239 // erases container, but doesn't free the allocated memory block
242 uint num_remaining = m_num_valid;
246 hash_entry *p = m_values.begin();
247 hash_entry *p_end = m_values.end();
253 delete_value_type(p->m_pValue);
269 hash_entry *p = m_values.begin();
270 hash_entry *p_end = m_values.end();
272 uint num_remaining = m_num_valid;
275 value_type *pValue = p->m_pValue;
278 scalar_type<Key>::destruct(&pValue->first);
279 scalar_type<Value>::destruct(&pValue->second);
289 m_values.clear_no_destruction();
292 m_grow_threshold = 0;
298 // Returns the number of active items in the container.
299 inline uint size() const
304 // Returns the size of the hash table.
305 inline uint get_table_size() const
307 return m_values.size();
310 inline bool is_empty() const
315 // Before insertion, when the size() == get_grow_threshold() the container will rehash (double in size).
316 inline uint get_grow_threshold() const
318 return m_grow_threshold;
321 inline bool will_rehash_on_next_insertion() const
323 return m_num_valid >= m_grow_threshold;
326 inline void reserve(uint new_capacity)
331 uint new_hash_size = new_capacity;
333 new_hash_size = new_hash_size * 2U;
335 if (!math::is_power_of_2(new_hash_size))
336 new_hash_size = math::next_pow2(new_hash_size);
338 new_hash_size = math::maximum<uint>(cMinHashSize, new_hash_size);
340 if (new_hash_size > m_values.size())
341 rehash(new_hash_size);
344 class const_iterator;
348 friend class rh_hash_map<Key, Value, Hasher, Equals>;
349 friend class rh_hash_map<Key, Value, Hasher, Equals>::const_iterator;
353 : m_pTable(NULL), m_index(0)
356 inline iterator(rh_hash_map_type &table, uint index)
357 : m_pTable(&table), m_index(index)
360 inline iterator(const iterator &other)
361 : m_pTable(other.m_pTable), m_index(other.m_index)
365 inline iterator &operator=(const iterator &other)
367 m_pTable = other.m_pTable;
368 m_index = other.m_index;
373 inline iterator operator++(int)
375 iterator result(*this);
381 inline iterator &operator++()
387 inline value_type &operator*() const
391 inline value_type *operator->() const
396 inline bool operator==(const iterator &b) const
398 return (m_pTable == b.m_pTable) && (m_index == b.m_index);
400 inline bool operator!=(const iterator &b) const
402 return !(*this == b);
404 inline bool operator==(const const_iterator &b) const
406 return (m_pTable == b.m_pTable) && (m_index == b.m_index);
408 inline bool operator!=(const const_iterator &b) const
410 return !(*this == b);
413 inline uint get_index() const
419 rh_hash_map_type *m_pTable;
422 inline value_type *get_cur() const
424 VOGL_ASSERT(m_pTable && (m_index < m_pTable->m_values.size()));
425 VOGL_ASSERT(m_pTable->m_values[m_index].m_pValue != NULL);
426 return m_pTable->m_values[m_index].m_pValue;
431 VOGL_ASSERT(m_pTable);
432 m_index = m_pTable->find_next(m_index);
438 friend class rh_hash_map<Key, Value, Hasher, Equals>;
439 friend class rh_hash_map<Key, Value, Hasher, Equals>::iterator;
442 inline const_iterator()
443 : m_pTable(NULL), m_index(0)
446 inline const_iterator(const rh_hash_map_type &table, uint index)
447 : m_pTable(&table), m_index(index)
450 inline const_iterator(const iterator &other)
451 : m_pTable(other.m_pTable), m_index(other.m_index)
454 inline const_iterator(const const_iterator &other)
455 : m_pTable(other.m_pTable), m_index(other.m_index)
459 inline const_iterator &operator=(const const_iterator &other)
461 m_pTable = other.m_pTable;
462 m_index = other.m_index;
466 inline const_iterator &operator=(const iterator &other)
468 m_pTable = other.m_pTable;
469 m_index = other.m_index;
474 inline const_iterator operator++(int)
476 const_iterator result(*this);
482 inline const_iterator &operator++()
488 inline const value_type &operator*() const
492 inline const value_type *operator->() const
497 inline bool operator==(const const_iterator &b) const
499 return (m_pTable == b.m_pTable) && (m_index == b.m_index);
501 inline bool operator!=(const const_iterator &b) const
503 return !(*this == b);
505 inline bool operator==(const iterator &b) const
507 return (m_pTable == b.m_pTable) && (m_index == b.m_index);
509 inline bool operator!=(const iterator &b) const
511 return !(*this == b);
514 inline uint get_index() const
520 const rh_hash_map_type *m_pTable;
523 inline const value_type *get_cur() const
525 VOGL_ASSERT(m_pTable && (m_index < m_pTable->m_values.size()));
526 VOGL_ASSERT(m_pTable->m_values[m_index].m_pValue != NULL);
527 return m_pTable->m_values[m_index].m_pValue;
532 VOGL_ASSERT(m_pTable);
533 m_index = m_pTable->find_next(m_index);
537 inline const_iterator begin() const
542 return const_iterator(*this, find_next(-1));
545 inline const_iterator end() const
547 return const_iterator(*this, m_values.size());
550 inline iterator begin()
555 return iterator(*this, find_next(-1));
558 inline iterator end()
560 return iterator(*this, m_values.size());
563 // insert_result.first will always point to inserted key/value (or the already existing key/value).
564 // insert_result.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value).
565 // insert() may grow the container. After growing, any iterators (and indices) will be invalid.
566 typedef std::pair<iterator, bool> insert_result;
568 inline insert_result insert(const Key &k, const Value &v = Value())
570 insert_result result;
571 if (!insert_no_grow(result, k, v))
575 // This must succeed.
576 if (!insert_no_grow(result, k, v))
578 VOGL_FAIL("insert() failed");
585 inline insert_result insert(const value_type &v)
587 return insert(v.first, v.second);
590 inline bool insert_no_grow(insert_result &result, const Key &k, const Value &v = Value())
592 if ((!m_values.size()) || (m_num_valid >= m_grow_threshold))
595 value_type *pValue = NULL;
596 uint hash = hash_key(k);
597 uint index_init = hash >> m_hash_shift;
599 const uint size = m_values.size();
601 uint index_current = index_init & (size - 1);
602 uint probe_current = 0;
605 hash_entry *pEntry = &m_values[index_current];
607 if (!pEntry->m_pValue)
611 pValue = construct_value_type(k, v);
614 VOGL_ASSERT(m_num_valid <= size);
616 result.first = iterator(*this, index_current);
617 result.second = true;
620 pEntry->m_pValue = pValue;
621 pEntry->m_hash = hash;
625 if ((!pValue) && (m_equals(pEntry->m_pValue->first, k)))
627 result.first = iterator(*this, index_current);
628 result.second = false;
632 uint probe_distance = calc_distance(index_current);
633 if (probe_current > probe_distance)
637 pValue = construct_value_type(k, v);
640 VOGL_ASSERT(m_num_valid <= size);
642 result.first = iterator(*this, index_current);
643 result.second = true;
646 value_type *pTemp_value = pEntry->m_pValue;
647 uint temp_hash = pEntry->m_hash;
649 pEntry->m_pValue = pValue;
650 pEntry->m_hash = hash;
652 pValue = pTemp_value;
655 probe_current = probe_distance;
658 if (++index_current == size)
667 inline Value &operator[](const Key &key)
669 return (insert(key).first)->second;
672 // Returns const ref to value if key is found, otherwise returns the default.
673 inline const Value &value(const Key &key, const Value &def = Value()) const
675 const_iterator it(find(key));
681 // Returns index if found, or index==size() if not found.
682 uint find_index(const Key &k) const
684 uint hash = hash_key(k);
685 uint index_init = hash >> m_hash_shift;
687 const uint size = m_values.size();
689 for (uint i = 0; i < size; ++i)
691 uint index_current = (index_init + i) & (size - 1);
692 const hash_entry *pEntry = &m_values[index_current];
694 if (!pEntry->m_pValue)
697 uint probe_distance = calc_distance(index_current);
698 if (i > probe_distance)
701 if ((hash == pEntry->m_hash) && (m_equals(pEntry->m_pValue->first, k)))
702 return index_current;
708 inline const_iterator find(const Key &k) const
710 return const_iterator(*this, find_index(k));
713 inline iterator find(const Key &k)
715 return iterator(*this, find_index(k));
718 inline Value *find_value(const Key &key)
720 uint index = find_index(key);
721 if (index == m_values.size())
723 return &(m_values[index].m_pEntry->second);
726 inline const Value *find_value(const Key &key) const
728 uint index = find_index(key);
729 if (index == m_values.size())
731 return &(m_values[index].m_pEntry->second);
734 inline bool contains(const Key &key) const
736 return find_index(key) != m_values.size();
739 // All active iterators become invalid after erase().
740 inline bool erase(const Key &k)
745 uint hash = hash_key(k);
746 uint index_init = hash >> m_hash_shift;
748 uint index_current = 0;
750 const uint size = m_values.size();
752 for (uint i = 0; i < size; ++i)
754 index_current = (index_init + i) & (size - 1);
755 hash_entry *pEntry = &m_values[index_current];
757 if (!pEntry->m_pValue)
760 uint probe_distance = calc_distance(index_current);
761 if (i > probe_distance)
764 if ((hash == pEntry->m_hash) && (m_equals(pEntry->m_pValue->first, k)))
766 delete_value_type(pEntry->m_pValue);
773 for (uint i = 1; i < size; ++i)
775 uint prev = (index_current + i - 1) & (size - 1);
776 uint idx = (index_current + i) & (size - 1);
778 if (!m_values[idx].m_pValue)
780 m_values[prev].m_pValue = NULL;
784 uint dist = calc_distance(idx);
787 m_values[prev].m_pValue = NULL;
791 m_values[prev].m_pValue = m_values[idx].m_pValue;
792 m_values[prev].m_hash = m_values[idx].m_hash;
798 inline bool erase(const iterator &it)
800 uint index_current = it.m_index;
801 if ((index_current >= m_values.size()) || (!m_values[index_current].m_pValue))
807 const uint size = m_values.size();
809 delete_value_type(m_values[index_current].m_pValue);
812 for (uint i = 1; i < size; ++i)
814 uint prev = (index_current + i - 1) & (size - 1);
815 uint idx = (index_current + i) & (size - 1);
817 if (!m_values[idx].m_pValue)
819 m_values[prev].m_pValue = NULL;
823 uint dist = calc_distance(idx);
826 m_values[prev].m_pValue = NULL;
830 m_values[prev].m_pValue = m_values[idx].m_pValue;
831 m_values[prev].m_hash = m_values[idx].m_hash;
837 inline void swap(rh_hash_map_type &other)
839 m_values.swap(other.m_values);
840 utils::swap(m_hash_shift, other.m_hash_shift);
841 utils::swap(m_num_valid, other.m_num_valid);
842 utils::swap(m_grow_threshold, other.m_grow_threshold);
843 utils::swap(m_hasher, other.m_hasher);
844 utils::swap(m_equals, other.m_equals);
845 m_allocator.swap(other.m_allocator);
848 // Obviously, this method is very slow! It scans the entire hash table.
849 inline const_iterator search_table_for_value(const Value &val) const
851 for (const_iterator it = begin(); it != end(); ++it)
852 if (it->second == val)
857 // Obviously, this method is very slow! It scans the entire hash table.
858 inline iterator search_table_for_value(const Value &val)
860 for (iterator it = begin(); it != end(); ++it)
861 if (it->second == val)
866 // Obviously, this method is very slow! It scans the entire hash table.
867 inline uint search_table_for_value_get_count(const Value &val) const
870 for (const_iterator it = begin(); it != end(); ++it)
871 if (it->second == val)
876 bool operator==(const rh_hash_map &other) const
881 if (size() != other.size())
884 for (const_iterator it = begin(); it != end(); ++it)
886 const_iterator other_it(other.find(it->first));
887 if (other_it == other.end())
890 if (!(it->second == other_it->second))
897 bool operator!=(const rh_hash_map &other) const
899 return !(*this == other);
902 bool check_failure() const
909 if (!m_values.size())
911 if (m_hash_shift != 32)
912 return check_failure();
914 return check_failure();
915 if (m_grow_threshold)
916 return check_failure();
920 if (m_hash_shift != (32U - math::floor_log2i(m_values.size())))
921 return check_failure();
923 uint total_found = 0;
925 for (uint i = 0; i < m_values.size(); i++)
927 if (m_values[i].m_pValue)
929 if (!m_allocator.is_valid_ptr(m_values[i].m_pValue))
930 return check_failure();
932 uint actual_hash = hash_key(m_values[i].m_pValue->first);
933 if (m_values[i].m_hash != actual_hash)
934 return check_failure();
936 uint dist = calc_distance(i);
937 max_dist = math::maximum<uint>(max_dist, dist);
939 uint k = find_index(m_values[i].m_pValue->first);
941 return check_failure();
947 if (total_found != m_num_valid)
948 return check_failure();
953 // Direct hash table low-level manipulation
955 // index can be retrieved from a iterator by calling get_index()
956 inline bool is_valid_index(uint index) const
958 return (index < m_values.size()) && (m_values[index].m_pValue != NULL);
961 inline const value_type &value_type_at_index(uint index) const
963 return *m_values[index].m_pValue;
966 inline const Key &key_at_index(uint index) const
968 return m_values[index].m_pValue->first;
971 inline const Value &value_at_index(uint index) const
973 return m_values[index].m_pValue->second;
976 inline Value &value_at_index(uint index)
978 return m_values[index].m_pValue->second;
984 value_type *m_pValue;
988 typedef vogl::vector<hash_entry> hash_entry_vec;
990 hash_entry_vec m_values;
996 uint m_grow_threshold;
1001 allocator_type m_allocator;
1003 value_type *construct_value_type(const Key &k, const Value &v)
1005 //value_type *p = static_cast<value_type *>(vogl_malloc(sizeof(value_type)));
1006 value_type *p = m_allocator.alloc_no_construction();
1008 if (VOGL_IS_BITWISE_COPYABLE(Key))
1009 memcpy(&p->first, &k, sizeof(Key));
1011 scalar_type<Key>::construct(&p->first, k);
1013 if (VOGL_IS_BITWISE_COPYABLE(Value))
1014 memcpy(&p->second, &v, sizeof(Value));
1016 scalar_type<Value>::construct(&p->second, v);
1021 void delete_value_type(value_type *p)
1023 scalar_type<Key>::destruct(&p->first);
1024 scalar_type<Value>::destruct(&p->second);
1027 m_allocator.destroy_no_destruction(p);
1030 inline uint32 calc_distance(uint32 index_stored) const
1032 VOGL_ASSERT(m_values[index_stored].m_pValue);
1034 uint32 initial_index = m_values[index_stored].m_hash >> m_hash_shift;
1035 VOGL_ASSERT((index_stored < m_values.size()) && (initial_index < m_values.size()));
1037 uint32 distance = index_stored - initial_index;
1038 if (initial_index > index_stored)
1039 distance += m_values.size();
1044 inline uint hash_key(const Key &k) const
1046 VOGL_ASSERT((1U << (32U - m_hash_shift)) == m_values.size());
1048 uint hash = static_cast<uint>(m_hasher(k));
1050 // Fibonacci hashing
1051 return 2654435769U * hash;
1056 if (m_values.size() >= 0x80000000UL)
1058 // FIXME: This case (ginormous arrays on x64) will die.
1063 rehash(math::maximum<uint>(cMinHashSize, m_values.size() * 2U));
1066 inline void move_into_container(hash_entry *pEntry_to_move)
1069 VOGL_ASSERT(m_num_valid <= m_values.size());
1071 value_type *pValue = pEntry_to_move->m_pValue;
1072 uint hash = pEntry_to_move->m_hash;
1074 uint index_init = hash >> m_hash_shift;
1075 uint probe_current = 0;
1077 const uint size = m_values.size();
1080 for (i = 0; i < size; ++i, ++probe_current)
1082 uint index_current = (index_init + i) & (m_values.size() - 1);
1083 hash_entry *pEntry = &m_values[index_current];
1085 if (!pEntry->m_pValue)
1087 pEntry->m_pValue = pValue;
1088 pEntry->m_hash = hash;
1092 uint probe_distance = calc_distance(index_current);
1093 if (probe_current > probe_distance)
1095 value_type *pTemp_value = pEntry->m_pValue;
1096 uint temp_hash = pEntry->m_hash;
1098 pEntry->m_pValue = pValue;
1099 pEntry->m_hash = hash;
1101 pValue = pTemp_value;
1104 probe_current = probe_distance;
1108 VOGL_ASSERT(i != size);
1111 inline void rehash(uint new_hash_size)
1113 VOGL_ASSERT(new_hash_size >= m_num_valid);
1114 VOGL_ASSERT(math::is_power_of_2(new_hash_size));
1116 if ((new_hash_size < m_num_valid) || (new_hash_size == m_values.size()))
1119 rh_hash_map new_map;
1121 new_map.m_values.resize(new_hash_size);
1122 memset(new_map.m_values.get_ptr(), 0, new_map.m_values.size_in_bytes());
1124 new_map.m_hash_shift = 32U - math::floor_log2i(new_hash_size);
1125 VOGL_ASSERT(new_hash_size == (1U << (32U - new_map.m_hash_shift)));
1126 new_map.m_grow_threshold = UINT_MAX;
1128 // Move items from old array into new array
1129 hash_entry *pNode = reinterpret_cast<hash_entry *>(m_values.begin());
1130 hash_entry *pNode_end = pNode + m_values.size();
1132 while (pNode != pNode_end)
1134 if (pNode->m_pValue)
1136 new_map.move_into_container(pNode);
1138 if (new_map.m_num_valid == m_num_valid)
1146 new_map.m_grow_threshold = (new_hash_size * 4) / 5;
1149 m_values.clear_no_destruction();
1155 inline uint find_next(uint index) const
1157 if (++index >= m_values.size())
1160 const hash_entry *pEntry = &m_values[index];
1161 const hash_entry *pLast_entry = m_values.end();
1165 if ((pEntry->m_pValue) || (++pEntry == pLast_entry))
1169 return static_cast<uint>(pEntry - m_values.begin());
1173 bool rh_hash_map_test();
1179 template <typename Key, typename Value, typename Hasher, typename Equals, typename Allocator>
1180 inline void swap(vogl::rh_hash_map<Key, Value, Hasher, Equals, Allocator> &a, vogl::rh_hash_map<Key, Value, Hasher, Equals, Allocator> &b)