C++ unordered关联式容器

2023-12-25 17:04:22

哈希是一种高效查询的方法，其大致思想是将每个值映射成下表并用容器存储，需要查询的时候能够在相应位置直接拿到数据，因此查询的效率是O(1)

一、unordered系列关联式容器

1.unordered_map

1. unordered_map是存储<key, value>键值对的关联式容器，其允许通过keys快速的索引到与
其对应的value。
2. 在unordered_map中，键值通常用于惟一地标识元素，而映射值是一个对象，其内容与此
键关联。键和映射值的类型可能不同。
3. 在内部,unordered_map没有对<kye, value>按照任何特定的顺序排序, 为了能在常数范围内
找到key所对应的value，unordered_map将相同哈希值的键值对放在相同的桶中。
4. unordered_map容器通过key访问单个元素要比map快，但它通常在遍历元素子集的范围迭
代方面效率较低。
5. unordered_maps实现了直接访问操作符(operator[])，它允许使用key作为参数直接访问
value。
6. 它的迭代器至少是前向迭代器。

unordered_map存储的元素是pair<key,value>,通过key将pair映射到相应位置，查询的时候一般采用[? ]的方式

unordered_map模拟实现

#pragma once
#include<vector>
namespace myspace
{
	template<class T>
	struct HashFunc
	{
		size_t operator()(const T& key)
		{
			return (size_t)key;
		}
	};
	template<>
	struct HashFunc<string>
	{
		size_t operator()(const string& key)
		{
			size_t hash = 0;
			for (auto& e : key)
			{
				hash *= 31;
				hash += e;
			}
			return hash;
		}
	};
	template<class T>
	struct HashBucketNode
	{
		HashBucketNode(const T& data = T())
			:_data(data), _next(nullptr)
		{}
		HashBucketNode* _next;
		T _data;
	};
	template<class K, class V, class KeyOfValue, class HF>
	struct HBIterator;
	// 为了实现简单，在哈希桶的迭代器类中需要用到hashBucket本身，
	template<class K, class V, class KeyOfValue, class HF>
	class HashBucket
	{
		typedef HashBucketNode<V> Node;
		HF hf;
		KeyOfValue kot;
	public:
		typedef HBIterator<K, V, KeyOfValue, HF> iterator;

		HashBucket()
			:_n(0)
		{
			_ht.resize(10);
		}
		size_t BucketCount()
		{
			return _ht.size();
		}
		size_t BucketSize(const K& key)
		{
			size_t hashi = hf(key) % _ht.size();
			Node* cur = _ht[hashi];
			size_t ret = 0;
			while (cur)
			{
				++ret;
				cur = cur->_next;
			}
			return ret;
		}
		pair<iterator, bool> Insert(const V& data)
		{
			if (_n == _ht.size())
			{
				size_t newsize = _ht.size() * 2;
				vector<Node*>newht(newsize);
				for (int i = 0; i < _ht.size(); i++)
				{
					Node* cur = _ht[i];
					while (cur)
					{
						Node* next = cur->_next;
						cur->_next = nullptr;
						K key = kot(cur->_data);
						size_t hashi = hf(key) % newsize;
						if (newht[hashi])
						{
							Node* prev = newht[hashi];
							while (prev->_next)
							{
								prev = prev->_next;
							}
							prev->_next = cur;
						}
						else
						{
							newht[hashi] = cur;
						}
						cur = next;
					}
				}
				_ht.swap(newht);
				return Insert(data);
			}
			K key = kot(data);
			size_t hashi = hf(key) % _ht.size();
			Node* cur = _ht[hashi];
			if (cur)
			{
				Node* prev = nullptr;
				while (cur)
				{
					if (kot(cur->_data) == kot(data))
					{
						iterator it(cur, this);
						return make_pair(it, false);
					}
					prev = cur;
					cur = cur->_next;
				}
				Node* newnode = new Node(data);
				prev->_next = newnode;
				iterator it(newnode, this);
				++_n;
				return make_pair(it, true);
			}
			else
			{
				Node* newnode = new Node(data);
				_ht[hashi] = newnode;
				iterator it(newnode, this);
				++_n;
				return make_pair(it, true);
			}
		}
		iterator Erase(iterator pos)
		{
			pos++;
			K key = kot(pos->_data);
			size_t hashi = hf(key) % _ht.size();
			Node* cur = _ht[hashi];
			if (cur == pos->_pNode)
			{

				_ht[hashi] = nullptr;
				delete cur;
			}
			else
			{
				while (cur->_next != pos->_pNode)
				{
					cur = cur->_next;
				}
				Node* next = cur->_next->_next;
				delete cur->_next;
				cur->_next = next;
			}
			--_n;
			return pos;
		}
		size_t Count(const K& key)
		{
			size_t hashi = hf(key) % _ht.size();
			size_t ret = 0;
			Node* cur = _ht[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return 1;
				}
				cur = cur->_next;
			}
			return 0;
		}
		iterator Find(const K& key)
		{
			size_t hashi = hf(key) % _ht.size();
			Node* cur = _ht[hashi];
			while (cur)
			{
				if (cur->_data.first == key)
				{
					return iterator(cur);
				}
			}
			return end();
		}
		iterator begin()
		{
			for (int i = 0; i < BucketCount(); i++)
			{
				if (_ht[i])
					return iterator(_ht[i]);
			}
			return iterator(nullptr);
		}
		iterator end()
		{
			return iterator(nullptr);
		}
		size_t size()
		{
			size_t ret = 0;
			for (int i = 0; i < BucketCount(); i++)
			{
				Node* cur = _ht[i];
				while (cur)
				{
					++ret;
					cur = cur->_next;
				}
			}
			return ret;
		}
		bool empty()
		{
			return size() == 0;
		}
	private:
		vector<Node*> _ht;
		size_t _n;
	};

	template <class K, class V, class KeyOfValue, class HF>
	struct HBIterator
	{
		typedef HashBucket<K, V, KeyOfValue, HF> HashBucket;
		typedef HashBucketNode<V> Node;
		typedef HBIterator<K, V, KeyOfValue, HF> Self;
		KeyOfValue kot;
		HF hf;
		HBIterator(Node* pNode = nullptr, HashBucket* pHt = nullptr)
			:_pNode(pNode), _pHt(pHt)
		{}

		Self& operator++()
		{
			// 当前迭代器所指节点后还有节点时直接取其下一个节点
			if (_pNode->_next)
				_pNode = _pNode->_next;
			else
			{
				// 找下一个不空的桶，返回该桶中第一个节点
				size_t bucketNoempty = hf(kot(_pNode->_data)) % _pHt->BucketCount() + 1;
				for (; bucketNoempty < _pHt->BucketCount(); ++bucketNoempty)
				{
					if (_pNode = _pHt->_ht[bucketNoempty])
						break;
				}
			}
			return *this;
		}
		Self operator++(int)
		{
			Self temp = *this;
			++temp;
			return temp;
		}
		V& operator*()
		{
			return _pNode->_data;
		}
		V* operator->()
		{
			return &_pNode->_data;
		}
		bool operator==(const Self& it) const
		{
			return _pNode == it._pNode;
		}
		bool operator!=(const Self& it) const
		{
			return !(*this == it);
		}
		Node* _pNode;             // 当前迭代器关联的节点
		HashBucket* _pHt;         // 哈希桶--主要是为了找下一个空桶时候方便
	};
}
#include"HashBucket.h"
namespace myspace
{
	

	// unordered_map中存储的是pair<K, V>的键值对，K为key的类型，V为value的类型，HF哈希函数类型
	// unordered_map在实现时，只需将hashbucket中的接口重新封装即可
	template<class K, class V, class HF = HashFunc<K>>
	class unordered_map
	{
		struct MapKeyOfValue
		{
			const K& operator()(const pair<K, V>& data)
			{
				return data.first;
			}
		};
		typedef HashBucket<K, pair<K, V>, MapKeyOfValue, HF> HT;
		// 通过key获取value的操作
		
	public:
		typedef typename HT::iterator iterator;
	public:
		unordered_map() 
			: _ht()
		{}
		
		iterator begin() { return _ht.begin(); }
		iterator end() { return _ht.end(); }
		//
		// capacity
		size_t size()const { return _ht.size(); }
		bool empty()const { return _ht.empty(); }
		/
		// Acess
		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = _ht.Insert(pair<K, V>(key, V()));
			return (ret.first)->second;
		}
		const V& operator[](const K& key)const
		{
			pair<iterator, bool> ret = _ht.Insert(pair<K, V>(key, V()));
			return (ret.first)->second;
		}
		
		// lookup
		iterator find(const K& key) { return _ht.Find(key); }
		size_t count(const K& key) { return _ht.Count(key); }
		/
		// modify
		pair<iterator, bool> insert(const pair<K, V>& value)
		{
			return _ht.Insert(value);
		}

		iterator erase(iterator position)
		{
			return _ht.Erase(position);
		}
		
		// bucket
		size_t bucket_count() { return _ht.BucketCount(); }
		size_t bucket_size(const K& key) { return _ht.BucketSize(key); }
	private:
		HT _ht;
	};
}

2.unordered_set

unordered_set和unordered_map类似，因为他们底层都是哈希桶，即HashBucket。他们的区别是unordered_set存储的元素是key,即它的value=key,而unordered_map的value是pair<key,value>

unordered_set模拟实现

#pragma once
#include<vector>
namespace bit
{
	template<class T>
	struct HashFunc
	{
		size_t operator()(const T& key)
		{
			return (size_t)key;
		}
	};
	template<>
	struct HashFunc<string>
	{
		size_t operator()(const string& key)
		{
			size_t hash = 0;
			for (auto& e : key)
			{
				hash *= 31;
				hash += e;
			}
			return hash;
		}
	};
	template<class T>
	struct HashBucketNode
	{
		HashBucketNode(const T& data = T())
			:_data(data), _next(nullptr)
		{}
		HashBucketNode* _next;
		T _data;
	};
	template<class K, class V, class KeyOfValue, class HF>
	struct HBIterator;
	// 为了实现简单，在哈希桶的迭代器类中需要用到hashBucket本身，
	template<class K, class V, class KeyOfValue, class HF>
	class HashBucket
	{
		typedef HashBucketNode<V> Node;
		HF hf;
		KeyOfValue kot;
	public:
		typedef HBIterator<K, V, KeyOfValue, HF> iterator;

		HashBucket()
			:_n(0)
		{
			_ht.resize(10);
		}
		size_t BucketCount()
		{
			return _ht.size();
		}
		size_t BucketSize(const K& key)
		{
			size_t hashi = hf(key) % _ht.size();
			Node* cur = _ht[hashi];
			size_t ret = 0;
			while (cur)
			{
				++ret;
				cur = cur->_next;
			}
			return ret;
		}
		pair<iterator, bool> Insert(const V& data)
		{
			if (_n == _ht.size())
			{
				size_t newsize = _ht.size() * 2;
				vector<Node*>newht(newsize);
				for (int i = 0; i < _ht.size(); i++)
				{
					Node* cur = _ht[i];
					while (cur)
					{
						Node* next = cur->_next;
						cur->_next = nullptr;
						K key = kot(cur->_data);
						size_t hashi = hf(key) % newsize;
						if (newht[hashi])
						{
							Node* prev = newht[hashi];
							while (prev->_next)
							{
								prev = prev->_next;
							}
							prev->_next = cur;
						}
						else
						{
							newht[hashi] = cur;
						}
						cur = next;
					}
				}
				_ht.swap(newht);
				return Insert(data);
			}
			K key = kot(data);
			size_t hashi = hf(key) % _ht.size();
			Node* cur = _ht[hashi];
			if (cur)
			{
				Node* prev = nullptr;
				while (cur)
				{
					if (kot(cur->_data) == kot(data))
					{
						iterator it(cur, this);
						return make_pair(it, false);
					}
					prev = cur;
					cur = cur->_next;
				}
				Node* newnode = new Node(data);
				prev->_next = newnode;
				iterator it(newnode, this);
				++_n;
				return make_pair(it, true);
			}
			else
			{
				Node* newnode = new Node(data);
				_ht[hashi] = newnode;
				iterator it(newnode, this);
				++_n;
				return make_pair(it, true);
			}
		}
		iterator Erase(iterator pos)
		{
			pos++;
			K key = kot(pos->_data);
			size_t hashi = hf(key) % _ht.size();
			Node* cur = _ht[hashi];
			if (cur == pos->_pNode)
			{

				_ht[hashi] = nullptr;
				delete cur;
			}
			else
			{
				while (cur->_next != pos->_pNode)
				{
					cur = cur->_next;
				}
				Node* next = cur->_next->_next;
				delete cur->_next;
				cur->_next = next;
			}
			--_n;
			return pos;
		}
		size_t Count(const K& key)
		{
			size_t hashi = hf(key) % _ht.size();
			size_t ret = 0;
			Node* cur = _ht[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return 1;
				}
				cur = cur->_next;
			}
			return 0;
		}
		iterator Find(const K& key)
		{
			size_t hashi = hf(key) % _ht.size();
			Node* cur = _ht[hashi];
			while (cur)
			{
				if (cur->_data.first == key)
				{
					return iterator(cur);
				}
			}
			return end();
		}
		iterator begin()
		{
			for (int i = 0; i < BucketCount(); i++)
			{
				if (_ht[i])
					return iterator(_ht[i]);
			}
			return iterator(nullptr);
		}
		iterator end()
		{
			return iterator(nullptr);
		}
		size_t size()
		{
			size_t ret = 0;
			for (int i = 0; i < BucketCount(); i++)
			{
				Node* cur = _ht[i];
				while (cur)
				{
					++ret;
					cur = cur->_next;
				}
			}
			return ret;
		}
		bool empty()
		{
			return size() == 0;
		}
	private:
		vector<Node*> _ht;
		size_t _n;
	};

	template <class K, class V, class KeyOfValue, class HF>
	struct HBIterator
	{
		typedef HashBucket<K, V, KeyOfValue, HF> HashBucket;
		typedef HashBucketNode<V> Node;
		typedef HBIterator<K, V, KeyOfValue, HF> Self;
		KeyOfValue kot;
		HF hf;
		HBIterator(Node* pNode = nullptr, HashBucket* pHt = nullptr)
			:_pNode(pNode), _pHt(pHt)
		{}

		Self& operator++()
		{
			// 当前迭代器所指节点后还有节点时直接取其下一个节点
			if (_pNode->_next)
				_pNode = _pNode->_next;
			else
			{
				// 找下一个不空的桶，返回该桶中第一个节点
				size_t bucketNoempty = hf(kot(_pNode->_data)) % _pHt->BucketCount() + 1;
				for (; bucketNoempty < _pHt->BucketCount(); ++bucketNoempty)
				{
					if (_pNode = _pHt->_ht[bucketNoempty])
						break;
				}
			}
			return *this;
		}
		Self operator++(int)
		{
			Self temp = *this;
			++temp;
			return temp;
		}
		V& operator*()
		{
			return _pNode->_data;
		}
		V* operator->()
		{
			return &_pNode->_data;
		}
		bool operator==(const Self& it) const
		{
			return _pNode == it._pNode;
		}
		bool operator!=(const Self& it) const
		{
			return !(*this == it);
		}
		Node* _pNode;             // 当前迭代器关联的节点
		HashBucket* _pHt;         // 哈希桶--主要是为了找下一个空桶时候方便
	};
}
#pragma once
#include"HashBucket.h"
namespace bit
{
	template<class K, class HF = HashFunc<K>>
	class unordered_set
	{
		struct SetKeyOfValue
		{
			const K& operator()(const K& data)
			{
				return data;
			}
		};
		typedef HashBucket<K, K, SetKeyOfValue, HF> HT;
		// 通过key获取value的操作
		
	public:
		typedef typename  HT::iterator iterator;
	public:
		unordered_set() : _ht()
		{}
		
		iterator begin() { return _ht.begin(); }
		iterator end() { return _ht.end(); }
		
		// capacity
		size_t size()const { return _ht.size(); }
		bool empty()const { return _ht.empty(); }
		///
		// lookup
		iterator find(const K& key) { return _ht.Find(key); }
		size_t count(const K& key) { return _ht.Count(key); }
		/
		// modify
		pair<iterator, bool> insert(const K& value)
		{
			return _ht.Insert(value);
		}

		iterator erase(iterator position)
		{
			return _ht.Erase(position);
		}
		
		// bucket
		size_t bucket_count() { return _ht.BucketCount(); }
		size_t bucket_size(const K& key) { return _ht.BucketSize(key); }
	private:
		HT _ht;
	};
}

二、哈希冲突

处理哈希冲突的方法一般是闭散列和开散列，这里实现的是开散列

闭散列：也叫开放定址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，那么可以把key存放到冲突位置中的“下一个” 空位置中去。?

只是寻找下一个空位置又有不同的方法，包括线性探测和二次探测等L:

?开散列:选择在相应位置挂链表（如果同一位置元素过多则选择挂红黑树），映射到相同位置的元素直接在链表上尾插即可

文章来源:https://blog.csdn.net/2201_75404212/article/details/135201182
本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若内容造成侵权/违法违规/事实不符，请联系我的编程经验分享网邮箱：veading@qq.com进行投诉反馈，一经查实，立即删除！