1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 // File: vogl_hash_map.h
30 // stl-like hash map/hash set, with predictable performance across platforms/compilers/C run-times libs/etc.
31 // Hash function ref: "Data Structures and Algorithms with Object-Oriented Design Patterns in C++"
32 // http://www.brpreiss.com/books/opus4/html/page215.html
34 // Compared for performance against VC9's std::hash_map.
36 // Open addressing, linear probing, auto rehashes on ~50% load factor. Uses Knuth's multiplicative method (Fibonacci hashing).
37 // No support for items with duplicate keys (use vogl::map instead).
40 #include "vogl_core.h"
41 #include "vogl_hash.h"
42 #include "vogl_data_stream_serializer.h"
46 // See vogl_types.h for the default hash function and more alternatives.
48 // With default template options the type should define operator size_t() (for hashing) and operator== (for equality).
49 // The Key and Value objects are stored contiguously in the hash table, and will move on rehashing.
50 // Iterators are invalidated on rehashing or erasing.
51 // The Hasher and Equals objects must be bitwise movable (i.e. using memcpy).
52 template <typename Key, typename Value = empty_type, typename Hasher = hasher<Key>, typename Equals = equal_to<Key> >
55 friend class iterator;
56 friend class const_iterator;
70 typedef hash_map<Key, Value, Hasher, Equals> hash_map_type;
71 typedef std::pair<Key, Value> value_type;
73 typedef Value referent_type;
74 typedef Hasher hasher_type;
75 typedef Equals equals_type;
78 : m_hash_shift(32), m_num_valid(0), m_grow_threshold(0)
82 hash_map(const hash_map &other)
83 : m_values(other.m_values),
84 m_hash_shift(other.m_hash_shift),
85 m_hasher(other.m_hasher),
86 m_equals(other.m_equals),
87 m_num_valid(other.m_num_valid),
88 m_grow_threshold(other.m_grow_threshold)
92 hash_map &operator=(const hash_map &other)
99 m_values = other.m_values;
100 m_hash_shift = other.m_hash_shift;
101 m_num_valid = other.m_num_valid;
102 m_grow_threshold = other.m_grow_threshold;
103 m_hasher = other.m_hasher;
104 m_equals = other.m_equals;
114 const Equals &get_equals() const
123 void set_equals(const Equals &equals)
128 const Hasher &get_hasher() const
137 void set_hasher(const Hasher &hasher)
144 if (!m_values.is_empty())
146 if (VOGL_HAS_DESTRUCTOR(Key) || VOGL_HAS_DESTRUCTOR(Value))
148 node *p = &get_node(0);
149 node *p_end = p + m_values.size();
151 uint num_remaining = m_num_valid;
156 destruct_value_type(p);
166 m_values.clear_no_destruction();
170 m_grow_threshold = 0;
174 // erases container, but doesn't free the allocated memory block
180 if (VOGL_HAS_DESTRUCTOR(Key) || VOGL_HAS_DESTRUCTOR(Value))
182 node *p = &get_node(0);
183 node *p_end = p + m_values.size();
185 uint num_remaining = m_num_valid;
190 destruct_value_type(p);
191 p->state = cStateInvalid;
201 else if (sizeof(node) <= 32)
203 memset(&m_values[0], 0, m_values.size_in_bytes());
207 node *p = &get_node(0);
208 node *p_end = p + m_values.size();
210 uint num_remaining = m_num_valid;
215 p->state = cStateInvalid;
229 // Returns the number of active items in the container.
230 inline uint size() const
235 // Returns the size of the hash table.
236 inline uint get_table_size() const
238 return m_values.size();
241 inline bool is_empty() const
246 // Before insertion, when the size() == get_grow_threshold() the container will rehash (double in size).
247 inline uint get_grow_threshold() const
249 return m_grow_threshold;
252 inline bool will_rehash_on_next_insertion() const
254 return m_num_valid >= m_grow_threshold;
257 inline void reserve(uint new_capacity)
262 uint new_hash_size = new_capacity;
264 new_hash_size = new_hash_size * 2U;
266 if (!math::is_power_of_2(new_hash_size))
267 new_hash_size = math::next_pow2(new_hash_size);
269 new_hash_size = math::maximum<uint>(cMinHashSize, new_hash_size);
271 if (new_hash_size > m_values.size())
272 rehash(new_hash_size);
275 class const_iterator;
279 friend class hash_map<Key, Value, Hasher, Equals>;
280 friend class hash_map<Key, Value, Hasher, Equals>::const_iterator;
284 : m_pTable(NULL), m_index(0)
287 inline iterator(hash_map_type &table, uint index)
288 : m_pTable(&table), m_index(index)
291 inline iterator(const iterator &other)
292 : m_pTable(other.m_pTable), m_index(other.m_index)
296 inline iterator &operator=(const iterator &other)
298 m_pTable = other.m_pTable;
299 m_index = other.m_index;
304 inline iterator operator++(int)
306 iterator result(*this);
312 inline iterator &operator++()
318 inline value_type &operator*() const
322 inline value_type *operator->() const
327 inline bool operator==(const iterator &b) const
329 return (m_pTable == b.m_pTable) && (m_index == b.m_index);
331 inline bool operator!=(const iterator &b) const
333 return !(*this == b);
335 inline bool operator==(const const_iterator &b) const
337 return (m_pTable == b.m_pTable) && (m_index == b.m_index);
339 inline bool operator!=(const const_iterator &b) const
341 return !(*this == b);
344 inline uint get_index() const
350 hash_map_type *m_pTable;
353 inline value_type *get_cur() const
355 VOGL_ASSERT(m_pTable && (m_index < m_pTable->m_values.size()));
356 VOGL_ASSERT(m_pTable->get_node_state(m_index) == cStateValid);
358 return &m_pTable->get_node(m_index);
363 VOGL_ASSERT(m_pTable);
364 m_index = m_pTable->find_next(m_index);
370 friend class hash_map<Key, Value, Hasher, Equals>;
371 friend class hash_map<Key, Value, Hasher, Equals>::iterator;
374 inline const_iterator()
375 : m_pTable(NULL), m_index(0)
378 inline const_iterator(const hash_map_type &table, uint index)
379 : m_pTable(&table), m_index(index)
382 inline const_iterator(const iterator &other)
383 : m_pTable(other.m_pTable), m_index(other.m_index)
386 inline const_iterator(const const_iterator &other)
387 : m_pTable(other.m_pTable), m_index(other.m_index)
391 inline const_iterator &operator=(const const_iterator &other)
393 m_pTable = other.m_pTable;
394 m_index = other.m_index;
398 inline const_iterator &operator=(const iterator &other)
400 m_pTable = other.m_pTable;
401 m_index = other.m_index;
406 inline const_iterator operator++(int)
408 const_iterator result(*this);
414 inline const_iterator &operator++()
420 inline const value_type &operator*() const
424 inline const value_type *operator->() const
429 inline bool operator==(const const_iterator &b) const
431 return (m_pTable == b.m_pTable) && (m_index == b.m_index);
433 inline bool operator!=(const const_iterator &b) const
435 return !(*this == b);
437 inline bool operator==(const iterator &b) const
439 return (m_pTable == b.m_pTable) && (m_index == b.m_index);
441 inline bool operator!=(const iterator &b) const
443 return !(*this == b);
446 inline uint get_index() const
452 const hash_map_type *m_pTable;
455 inline const value_type *get_cur() const
457 VOGL_ASSERT(m_pTable && (m_index < m_pTable->m_values.size()));
458 VOGL_ASSERT(m_pTable->get_node_state(m_index) == cStateValid);
460 return &m_pTable->get_node(m_index);
465 VOGL_ASSERT(m_pTable);
466 m_index = m_pTable->find_next(m_index);
470 inline const_iterator begin() const
475 return const_iterator(*this, find_next(-1));
478 inline const_iterator end() const
480 return const_iterator(*this, m_values.size());
483 inline iterator begin()
488 return iterator(*this, find_next(-1));
491 inline iterator end()
493 return iterator(*this, m_values.size());
496 // insert_result.first will always point to inserted key/value (or the already existing key/value).
497 // insert_result.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value).
498 // insert() may grow the container. After growing, any iterators (and indices) will be invalid.
499 typedef std::pair<iterator, bool> insert_result;
501 inline insert_result insert(const Key &k, const Value &v = Value())
503 insert_result result;
504 if (!insert_no_grow(result, k, v))
508 // This must succeed.
509 if (!insert_no_grow(result, k, v))
511 VOGL_FAIL("insert() failed");
518 inline insert_result insert(const value_type &v)
520 return insert(v.first, v.second);
523 inline bool insert_no_grow(insert_result &result, const Key &k, const Value &v = Value())
525 if (!m_values.size())
528 int index = hash_key(k);
529 node *pNode = &get_node(index);
533 if (m_equals(pNode->first, k))
535 result.first = iterator(*this, index);
536 result.second = false;
540 const int orig_index = index;
546 index = m_values.size() - 1;
547 pNode = &get_node(index);
555 if (orig_index == index)
561 if (m_equals(pNode->first, k))
563 result.first = iterator(*this, index);
564 result.second = false;
570 if (m_num_valid >= m_grow_threshold)
573 construct_value_type(pNode, k, v);
575 pNode->state = cStateValid;
578 VOGL_ASSERT(m_num_valid <= m_values.size());
580 result.first = iterator(*this, index);
581 result.second = true;
586 inline Value &operator[](const Key &key)
588 return (insert(key).first)->second;
591 // Returns const ref to value if key is found, otherwise returns the default.
592 inline const Value &value(const Key &key, const Value &def = Value()) const
594 const_iterator it(find(key));
600 inline const_iterator find(const Key &k) const
602 return const_iterator(*this, find_index(k));
605 inline iterator find(const Key &k)
607 return iterator(*this, find_index(k));
610 inline Value *find_value(const Key &key)
612 uint index = find_index(key);
613 if (index == m_values.size())
615 return &(get_node(index).second);
618 inline const Value *find_value(const Key &key) const
620 uint index = find_index(key);
621 if (index == m_values.size())
623 return &(get_node(index).second);
626 inline bool contains(const Key &key) const
628 return find_index(key) != m_values.size();
631 // All active iterators become invalid after erase().
632 inline bool erase(const Key &k)
634 int i = find_index(k);
636 if (i >= static_cast<int>(m_values.size()))
639 node *pDst = &get_node(i);
640 destruct_value_type(pDst);
641 pDst->state = cStateInvalid;
655 i = m_values.size() - 1;
667 r = hash_key(pSrc->first);
669 } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r));
671 move_node(pDst, pSrc);
677 inline void swap(hash_map_type &other)
679 m_values.swap(other.m_values);
680 utils::swap(m_hash_shift, other.m_hash_shift);
681 utils::swap(m_num_valid, other.m_num_valid);
682 utils::swap(m_grow_threshold, other.m_grow_threshold);
683 utils::swap(m_hasher, other.m_hasher);
684 utils::swap(m_equals, other.m_equals);
687 // Obviously, this method is very slow! It scans the entire hash table.
688 inline const_iterator search_table_for_value(const Value &val) const
690 for (const_iterator it = begin(); it != end(); ++it)
691 if (it->second == val)
696 // Obviously, this method is very slow! It scans the entire hash table.
697 inline iterator search_table_for_value(const Value &val)
699 for (iterator it = begin(); it != end(); ++it)
700 if (it->second == val)
705 // Obviously, this method is very slow! It scans the entire hash table.
706 inline uint search_table_for_value_get_count(const Value &val) const
709 for (const_iterator it = begin(); it != end(); ++it)
710 if (it->second == val)
715 bool operator==(const hash_map &other) const
720 if (size() != other.size())
723 for (const_iterator it = begin(); it != end(); ++it)
725 const_iterator other_it(other.find(it->first));
726 if (other_it == other.end())
729 if (!(it->second == other_it->second))
736 bool operator!=(const hash_map &other) const
738 return !(*this == other);
741 // Direct hash table low-level manipulation
743 // index can be retrieved from a iterator by calling get_index()
744 inline bool is_valid_index(uint index) const
746 return (index < m_values.size()) && (get_node(index).state != cStateInvalid);
749 inline const value_type &value_type_at_index(uint index) const
751 return get_node(index).value_type;
754 inline const Key &key_at_index(uint index) const
756 return get_node(index).value_type.first;
759 inline const Value &value_at_index(uint index) const
761 return get_node(index).value_type.first;
764 inline Value &value_at_index(uint index)
766 return get_node(index).value_type.first;
769 // Returns index if found, or index==size() if not found.
770 inline uint find_index(const Key &k) const
774 int index = hash_key(k);
775 const node *pNode = &get_node(index);
779 if (m_equals(pNode->first, k))
782 const int orig_index = index;
788 index = m_values.size() - 1;
789 pNode = &get_node(index);
797 if (index == orig_index)
803 if (m_equals(pNode->first, k))
809 return m_values.size();
812 // This method serializes out the *entire* hash table (including unused nodes), so the key/value must be POD's with no ptrs, etc.
813 bool raw_write_to_serializer(data_stream_serializer &serializer)
815 uint32 size = m_values.size();
816 uint32 inv_size = ~size;
818 serializer << size << m_hash_shift << m_num_valid << m_grow_threshold << static_cast<uint32>(sizeof(node)) << inv_size;
821 serializer.write(m_values.get_ptr(), m_values.size_in_bytes());
823 return !serializer.get_error();
826 uint64_t get_raw_write_size() const
828 return sizeof(uint32) * 6 + m_values.size_in_bytes();
831 // key/value must be POD's with no ptrs, it serializes the raw data.
832 bool raw_read_from_serializer(data_stream_serializer &serializer)
834 uint32 size = serializer.read_uint32();
835 uint32 hash_shift = serializer.read_uint32();
836 uint32 num_valid = serializer.read_uint32();
837 uint32 grow_threshold = serializer.read_uint32();
838 uint32 node_size = serializer.read_uint32();
839 uint32 inv_size = serializer.read_uint32();
841 if ((size != ~inv_size) || (node_size != sizeof(node)))
844 if (size != m_values.size())
846 if (!m_values.try_resize(size))
852 if (!serializer.read(m_values.get_ptr(), m_values.size_in_bytes()))
856 m_hash_shift = hash_shift;
857 m_num_valid = num_valid;
858 m_grow_threshold = grow_threshold;
864 struct node : public value_type
869 static inline void construct_value_type(value_type *pDst, const Key &k, const Value &v)
871 if (VOGL_IS_BITWISE_COPYABLE(Key))
872 memcpy(&pDst->first, &k, sizeof(Key));
874 scalar_type<Key>::construct(&pDst->first, k);
876 if (VOGL_IS_BITWISE_COPYABLE(Value))
877 memcpy(&pDst->second, &v, sizeof(Value));
879 scalar_type<Value>::construct(&pDst->second, v);
882 static inline void construct_value_type(value_type *pDst, const value_type *pSrc)
884 if ((VOGL_IS_BITWISE_COPYABLE(Key)) && (VOGL_IS_BITWISE_COPYABLE(Value)))
886 memcpy(pDst, pSrc, sizeof(value_type));
890 if (VOGL_IS_BITWISE_COPYABLE(Key))
891 memcpy(&pDst->first, &pSrc->first, sizeof(Key));
893 scalar_type<Key>::construct(&pDst->first, pSrc->first);
895 if (VOGL_IS_BITWISE_COPYABLE(Value))
896 memcpy(&pDst->second, &pSrc->second, sizeof(Value));
898 scalar_type<Value>::construct(&pDst->second, pSrc->second);
902 static inline void destruct_value_type(value_type *p)
904 scalar_type<Key>::destruct(&p->first);
905 scalar_type<Value>::destruct(&p->second);
908 // Moves *pSrc to *pDst efficiently.
909 // pDst should NOT be constructed on entry.
910 static inline void move_node(node *pDst, node *pSrc)
912 VOGL_ASSERT(!pDst->state);
914 if (VOGL_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && VOGL_IS_BITWISE_COPYABLE_OR_MOVABLE(Value))
916 memcpy(pDst, pSrc, sizeof(node));
920 if (VOGL_IS_BITWISE_COPYABLE_OR_MOVABLE(Key))
921 memcpy(&pDst->first, &pSrc->first, sizeof(Key));
924 scalar_type<Key>::construct(&pDst->first, pSrc->first);
925 scalar_type<Key>::destruct(&pSrc->first);
928 if (VOGL_IS_BITWISE_COPYABLE_OR_MOVABLE(Value))
929 memcpy(&pDst->second, &pSrc->second, sizeof(Value));
932 scalar_type<Value>::construct(&pDst->second, pSrc->second);
933 scalar_type<Value>::destruct(&pSrc->second);
936 pDst->state = cStateValid;
939 pSrc->state = cStateInvalid;
946 node *p = reinterpret_cast<node *>(this);
947 p->state = cStateInvalid;
952 node *p = reinterpret_cast<node *>(this);
954 hash_map_type::destruct_value_type(p);
957 inline raw_node(const raw_node &other)
959 node *pDst = reinterpret_cast<node *>(this);
960 const node *pSrc = reinterpret_cast<const node *>(&other);
964 hash_map_type::construct_value_type(pDst, pSrc);
965 pDst->state = cStateValid;
968 pDst->state = cStateInvalid;
971 inline raw_node &operator=(const raw_node &rhs)
976 node *pDst = reinterpret_cast<node *>(this);
977 const node *pSrc = reinterpret_cast<const node *>(&rhs);
983 pDst->first = pSrc->first;
984 pDst->second = pSrc->second;
988 hash_map_type::construct_value_type(pDst, pSrc);
989 pDst->state = cStateValid;
992 else if (pDst->state)
994 hash_map_type::destruct_value_type(pDst);
995 pDst->state = cStateInvalid;
1001 uint8 m_bits[sizeof(node)];
1004 typedef vogl::vector<raw_node> node_vector;
1006 node_vector m_values;
1014 uint m_grow_threshold;
1016 inline int hash_key(const Key &k) const
1018 VOGL_ASSERT((1U << (32U - m_hash_shift)) == m_values.size());
1020 uint hash = static_cast<uint>(m_hasher(k));
1022 // Fibonacci hashing
1023 hash = (2654435769U * hash) >> m_hash_shift;
1025 VOGL_ASSERT(hash < m_values.size());
1029 inline const node &get_node(uint index) const
1031 return *reinterpret_cast<const node *>(&m_values[index]);
1034 inline node &get_node(uint index)
1036 return *reinterpret_cast<node *>(&m_values[index]);
1039 inline state get_node_state(uint index) const
1041 return static_cast<state>(get_node(index).state);
1044 inline void set_node_state(uint index, bool valid)
1046 get_node(index).state = valid;
1051 if (m_values.size() >= 0x80000000UL)
1053 // FIXME: This case (ginormous arrays on x64) will die.
1058 rehash(math::maximum<uint>(cMinHashSize, m_values.size() * 2U));
1061 inline void rehash(uint new_hash_size)
1063 VOGL_ASSERT(new_hash_size >= m_num_valid);
1064 VOGL_ASSERT(math::is_power_of_2(new_hash_size));
1066 if ((new_hash_size < m_num_valid) || (new_hash_size == m_values.size()))
1070 new_map.m_values.resize(new_hash_size);
1071 new_map.m_hash_shift = 32U - math::floor_log2i(new_hash_size);
1072 VOGL_ASSERT(new_hash_size == (1U << (32U - new_map.m_hash_shift)));
1073 new_map.m_grow_threshold = UINT_MAX;
1075 // Move items from old array into new array
1076 node *pNode = reinterpret_cast<node *>(m_values.begin());
1077 node *pNode_end = pNode + m_values.size();
1079 while (pNode != pNode_end)
1083 new_map.move_into(pNode);
1085 if (new_map.m_num_valid == m_num_valid)
1092 // Account for 50% target load factor
1093 new_map.m_grow_threshold = (new_hash_size + 1U) >> 1U;
1096 m_values.clear_no_destruction();
1102 inline uint find_next(int index) const
1106 if (index >= static_cast<int>(m_values.size()))
1109 const node *pNode = &get_node(index);
1116 if (++index >= static_cast<int>(m_values.size()))
1125 inline void move_into(node *pNode)
1127 int index = hash_key(pNode->first);
1128 node *pDst_node = &get_node(index);
1130 if (pDst_node->state)
1132 const int orig_index = index;
1138 index = m_values.size() - 1;
1139 pDst_node = &get_node(index);
1147 if (index == orig_index)
1153 if (!pDst_node->state)
1158 move_node(pDst_node, pNode);
1164 template <typename Key, typename Value, typename Hasher, typename Equals>
1165 struct bitwise_movable<hash_map<Key, Value, Hasher, Equals> >
1173 template <typename Key, typename Value, typename Hasher, typename Equals>
1174 inline void swap(hash_map<Key, Value, Hasher, Equals> &a, hash_map<Key, Value, Hasher, Equals> &b)
1179 bool hash_map_test();
1185 template <typename Key, typename Value, typename Hasher, typename Equals>
1186 inline void swap(vogl::hash_map<Key, Value, Hasher, Equals> &a, vogl::hash_map<Key, Value, Hasher, Equals> &b)