libstdc++: Simplify _Hashtable merge functions

I realised that _M_merge_unique and _M_merge_multi call extract(iter)
which then has to call _M_get_previous_node to iterate through the
bucket to find the node before the one iter points to. Since the merge
function is already iterating over the entire container, we had the
previous node a moment ago. Walking the whole bucket to find it again is
wasteful. We could just rewrite the loop in terms of node pointers
instead of iterators, and then call _M_extract_node directly. However,
this is only possible when the source container is the same type as the
destination, because otherwise we can't access the source's private
members (_M_before_begin, _M_begin, _M_extract_node etc.)

Add overloads of _M_merge_unique and _M_merge_multi that work with
source containers of the same type, to enable this optimization.

For both overloads of _M_merge_unique we can also remove the conditional
modifications to __n_elt and just consistently decrement it for every
element processed. Use a multiplier of one or zero that dictates whether
__n_elt is passed to _M_insert_unique_node or not. We can also remove
the repeated calls to size() and just keep track of the size in a local
variable.

Although _M_merge_unique and _M_merge_multi should be safe for
"self-merge", i.e. when doing c.merge(c), it's wasteful to search/insert
every element when we don't need to do anything. Add 'this == &source'
checks to the overloads taking an lvalue of the container's own type.
Because those checks aren't needed for the rvalue overloads, change
those to call the underlying _M_merge_xxx function directly instead of
going through the lvalue overload that checks the address.

I've also added more extensive tests for better coverage of the new
overloads added in this commit.

libstdc++-v3/ChangeLog:

	* include/bits/hashtable.h (_M_merge_unique): Add overload for
	merging from same type.
	(_M_merge_unique<Compatible>): Simplify size tracking. Add
	comment.
	(_M_merge_multi): Add overload for merging from same type.
	(_M_merge_multi<Compatible>): Add comment.
	* include/bits/unordered_map.h (unordered_map::merge): Check for
	self-merge in the lvalue overload. Call _M_merge_unique directly
	for the rvalue overload.
	(unordered_multimap::merge): Likewise.
	* include/bits/unordered_set.h (unordered_set::merge): Likewise.
	(unordered_multiset::merge): Likewise.
	* testsuite/23_containers/unordered_map/modifiers/merge.cc:
	Add more tests.
	* testsuite/23_containers/unordered_multimap/modifiers/merge.cc:
	Likewise.
	* testsuite/23_containers/unordered_multiset/modifiers/merge.cc:
	Likewise.
	* testsuite/23_containers/unordered_set/modifiers/merge.cc:
	Likewise.
This commit is contained in:
Jonathan Wakely 2024-11-07 17:04:49 +00:00 committed by Jonathan Wakely
parent 55dbf154ef
commit a147bfcaf1
No known key found for this signature in database
7 changed files with 626 additions and 28 deletions

View File

@ -1212,6 +1212,52 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __nh;
}
/// Merge from another container of the same type.
void
_M_merge_unique(_Hashtable& __src)
{
__glibcxx_assert(get_allocator() == __src.get_allocator());
auto __size = size();
auto __n_elt = __src.size();
size_type __first = 1;
// For a container of identical type we can use its private members.
auto __p = static_cast<__node_ptr>(&__src._M_before_begin);
while (__n_elt--)
{
const auto __prev = __p;
__p = __p->_M_next();
const auto& __node = *__p;
const key_type& __k = _ExtractKey{}(__node._M_v());
if (__size <= __small_size_threshold())
{
auto __n = _M_begin();
for (; __n; __n = __n->_M_next())
if (this->_M_key_equals(__k, *__n))
break;
if (__n)
continue;
}
__hash_code __code
= _M_src_hash_code(__src.hash_function(), __k, __node);
size_type __bkt = _M_bucket_index(__code);
if (__size > __small_size_threshold())
if (_M_find_node(__bkt, __k, __code) != nullptr)
continue;
__hash_code __src_code = __src.hash_function()(__k);
size_type __src_bkt = __src._M_bucket_index(__src_code);
auto __nh = __src._M_extract_node(__src_bkt, __prev);
_M_insert_unique_node(__bkt, __code, __nh._M_ptr,
__first * __n_elt + 1);
__nh.release();
__first = 0;
++__size;
__p = __prev;
}
}
/// Merge from a compatible container into one with unique keys.
template<typename _Compatible_Hashtable>
void
@ -1221,46 +1267,68 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
node_type>, "Node types are compatible");
__glibcxx_assert(get_allocator() == __src.get_allocator());
auto __size = size();
auto __n_elt = __src.size();
size_type __first = 1;
// For a compatible container we can only use the public API,
// so cbegin(), cend(), hash_function(), and extract(iterator).
for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;)
{
--__n_elt;
auto __pos = __i++;
const size_type __size = size();
const key_type& __k = _ExtractKey{}(*__pos);
if (__size <= __small_size_threshold())
{
bool __found = false;
for (auto __n = _M_begin(); __n; __n = __n->_M_next())
auto __n = _M_begin();
for (; __n; __n = __n->_M_next())
if (this->_M_key_equals(__k, *__n))
{
__found = true;
break;
}
if (__found)
{
if (__n_elt != 1)
--__n_elt;
continue;
}
break;
if (__n)
continue;
}
__hash_code __code
= _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur);
size_type __bkt = _M_bucket_index(__code);
if (__size <= __small_size_threshold()
|| _M_find_node(__bkt, __k, __code) == nullptr)
{
auto __nh = __src.extract(__pos);
_M_insert_unique_node(__bkt, __code, __nh._M_ptr, __n_elt);
__nh.release();
__n_elt = 1;
}
else if (__n_elt != 1)
--__n_elt;
if (__size > __small_size_threshold())
if (_M_find_node(__bkt, __k, __code) != nullptr)
continue;
auto __nh = __src.extract(__pos);
_M_insert_unique_node(__bkt, __code, __nh._M_ptr,
__first * __n_elt + 1);
__nh.release();
__first = 0;
++__size;
}
}
/// Merge from another container of the same type.
void
_M_merge_multi(_Hashtable& __src)
{
__glibcxx_assert(get_allocator() == __src.get_allocator());
if (__src.size() == 0) [[__unlikely__]]
return;
__node_ptr __hint = nullptr;
this->reserve(size() + __src.size());
// For a container of identical type we can use its private members.
auto __prev = static_cast<__node_ptr>(&__src._M_before_begin);
do
{
const auto& __node = *__prev->_M_next();
const key_type& __k = _ExtractKey{}(__node._M_v());
__hash_code __code = this->_M_hash_code(__k);
size_type __src_bkt = __src._M_bucket_index(__node);
auto __nh = __src._M_extract_node(__src_bkt, __prev);
__hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur;
__nh.release();
}
while (__prev->_M_nxt != nullptr);
}
/// Merge from a compatible container into one with equivalent keys.
template<typename _Compatible_Hashtable>
void
@ -1272,6 +1340,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__node_ptr __hint = nullptr;
this->reserve(size() + __src.size());
// For a compatible container we can only use the public API,
// so cbegin(), cend(), hash_function(), and extract(iterator).
for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;)
{
auto __pos = __i++;

View File

@ -821,6 +821,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
void
merge(unordered_map<_Key, _Tp, _H2, _P2, _Alloc>& __source)
{
if constexpr (is_same_v<_H2, _Hash> && is_same_v<_P2, _Pred>)
if (std::__addressof(__source) == this) [[__unlikely__]]
return;
using _Merge_helper = _Hash_merge_helper<unordered_map, _H2, _P2>;
_M_h._M_merge_unique(_Merge_helper::_S_get_table(__source));
}
@ -828,7 +832,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
template<typename _H2, typename _P2>
void
merge(unordered_map<_Key, _Tp, _H2, _P2, _Alloc>&& __source)
{ merge(__source); }
{
using _Merge_helper = _Hash_merge_helper<unordered_map, _H2, _P2>;
_M_h._M_merge_unique(_Merge_helper::_S_get_table(__source));
}
template<typename _H2, typename _P2>
void
@ -1764,6 +1771,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
void
merge(unordered_multimap<_Key, _Tp, _H2, _P2, _Alloc>& __source)
{
if constexpr (is_same_v<_H2, _Hash> && is_same_v<_P2, _Pred>)
if (std::__addressof(__source) == this) [[__unlikely__]]
return;
using _Merge_helper
= _Hash_merge_helper<unordered_multimap, _H2, _P2>;
_M_h._M_merge_multi(_Merge_helper::_S_get_table(__source));
@ -1772,7 +1783,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
template<typename _H2, typename _P2>
void
merge(unordered_multimap<_Key, _Tp, _H2, _P2, _Alloc>&& __source)
{ merge(__source); }
{
using _Merge_helper
= _Hash_merge_helper<unordered_multimap, _H2, _P2>;
_M_h._M_merge_multi(_Merge_helper::_S_get_table(__source));
}
template<typename _H2, typename _P2>
void

View File

@ -602,6 +602,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
void
merge(unordered_set<_Value, _H2, _P2, _Alloc>& __source)
{
if constexpr (is_same_v<_H2, _Hash> && is_same_v<_P2, _Pred>)
if (std::__addressof(__source) == this) [[__unlikely__]]
return;
using _Merge_helper = _Hash_merge_helper<unordered_set, _H2, _P2>;
_M_h._M_merge_unique(_Merge_helper::_S_get_table(__source));
}
@ -609,7 +613,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
template<typename _H2, typename _P2>
void
merge(unordered_set<_Value, _H2, _P2, _Alloc>&& __source)
{ merge(__source); }
{
using _Merge_helper = _Hash_merge_helper<unordered_set, _H2, _P2>;
_M_h._M_merge_unique(_Merge_helper::_S_get_table(__source));
}
template<typename _H2, typename _P2>
void
@ -1452,6 +1459,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
void
merge(unordered_multiset<_Value, _H2, _P2, _Alloc>& __source)
{
if constexpr (is_same_v<_H2, _Hash> && is_same_v<_P2, _Pred>)
if (std::__addressof(__source) == this) [[__unlikely__]]
return;
using _Merge_helper
= _Hash_merge_helper<unordered_multiset, _H2, _P2>;
_M_h._M_merge_multi(_Merge_helper::_S_get_table(__source));
@ -1460,7 +1471,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
template<typename _H2, typename _P2>
void
merge(unordered_multiset<_Value, _H2, _P2, _Alloc>&& __source)
{ merge(__source); }
{
using _Merge_helper
= _Hash_merge_helper<unordered_multiset, _H2, _P2>;
_M_h._M_merge_multi(_Merge_helper::_S_get_table(__source));
}
template<typename _H2, typename _P2>
void

View File

@ -290,6 +290,133 @@ test06()
VERIFY( c2.empty() );
}
void
test07()
{
test_type c1{ {1, 1}, {3, 3}, {5, 5} };
test_type c2{ {2, 2}, {4, 4}, {6, 6} };
const test_type c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
const test_type c4 = c1;
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c1 == c3 );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
test_type c5 = c3;
c2.merge(c5);
VERIFY( c2 == c3 );
VERIFY( c5 == c3 );
c5.emplace(9, 9);
c2.merge(c5);
VERIFY( c2.size() == c3.size() + 1 );
VERIFY( c5 == c3 );
}
void
test08()
{
test_type c1{ {1, 1}, {3, 3}, {5, 5} };
std::unordered_map<int, int, xhash<int>, equal> c2{ {2, 2}, {4, 4}, {6, 6} };
const auto c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
const test_type c4 = c1;
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( equal_elements(c1, c3) );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
auto c5 = c3;
c2.merge(c5);
VERIFY( c2 == c3 );
VERIFY( c5 == c3 );
c5.emplace(9, 9);
c2.merge(c5);
VERIFY( c2.size() == c3.size() + 1 );
VERIFY( c5 == c3 );
}
void
test09()
{
struct stateful_hash
{
size_t seed = 0;
auto operator()(const int& i) const noexcept
{ return std::hash<int>()(i) + seed; }
};
using map_type = std::unordered_map<int, int, stateful_hash>;
map_type c1({ {1, 1}, {3, 3}, {5, 5} }, 0, stateful_hash{1});
map_type c2({ {2, 2}, {4, 4}, {6, 6} }, 0, stateful_hash{2});
const auto c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c1 == c3 );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
test_type c5{ {-1, 1}, {-3, 3}, {-5, 5} };
c2.merge(c5);
VERIFY( c2.size() == 6 );
VERIFY( c5.empty() );
auto c6 = c3;
c6.merge(c2);
VERIFY( c6.size() == 6 );
VERIFY( c2.size() == 3 );
}
int
main()
{
@ -299,4 +426,7 @@ main()
test04();
test05();
test06();
test07();
test08();
test09();
}

View File

@ -105,6 +105,122 @@ test04()
VERIFY( c2.empty() );
}
void
test07()
{
test_type c1{ {1, 1}, {3, 3}, {5, 5} };
test_type c2{ {2, 2}, {4, 4}, {6, 6} };
const test_type c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
const test_type c4 = c1;
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c1 == c3 );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
auto c5 = c4;
c2.merge(c5);
VERIFY( c2.size() == 9 );
VERIFY( c5.empty() );
c5 = c4;
c5.emplace(9, 9);
c2.merge(c5);
VERIFY( c2.size() == 16 );
VERIFY( c5.empty() );
}
void
test08()
{
test_type c1{ {1, 1}, {3, 3}, {5, 5} };
std::unordered_map<int, int, hash, equal> c2{ {2, 2}, {4, 4}, {6, 6} };
const auto c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
const test_type c4 = c1;
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
}
void
test09()
{
struct stateful_hash
{
size_t seed = 0;
auto operator()(const int& i) const noexcept
{ return std::hash<int>()(i) + seed; }
};
using map_type = std::unordered_multimap<int, int, stateful_hash>;
map_type c1({ {1, 1}, {3, 3}, {5, 5} }, 0, stateful_hash{1});
map_type c2({ {2, 2}, {4, 4}, {6, 6} }, 0, stateful_hash{2});
const auto c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c1 == c3 );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
test_type c4{ {-1, 0}, {-3, 0}, {-5, 0} };
c2.merge(c4);
VERIFY( c2.size() == 6 );
VERIFY( c4.empty() );
auto c6 = c3;
c6.merge(c2);
VERIFY( c6.size() == 9 );
VERIFY( c2.size() == 0 );
}
int
main()
{
@ -112,4 +228,7 @@ main()
test02();
test03();
test04();
test07();
test08();
test09();
}

View File

@ -126,6 +126,124 @@ test05()
VERIFY( c2.empty() );
}
void
test07()
{
test_type c1{ 1, 3, 5 };
test_type c2{ 2, 4, 6 };
const test_type c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
const test_type c4 = c1;
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c1 == c3 );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
auto c5 = c4;
c2.merge(c5);
VERIFY( c2.size() == 9 );
VERIFY( c5.empty() );
c5 = c4;
c5.emplace(9);
c2.merge(c5);
VERIFY( c2.size() == 16 );
VERIFY( c5.empty() );
}
void
test08()
{
test_type c1{ 1, 3, 5 };
std::unordered_set<int, hash, equal> c2{ 2, 4, 6 };
const auto c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
const test_type c4 = c1;
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
}
void
test09()
{
struct stateful_hash
{
size_t seed = 0;
auto operator()(const int& i) const noexcept
{ return std::hash<int>()(i) + seed; }
};
using set_type = std::unordered_multiset<int, stateful_hash>;
set_type c1({ 1, 3, 5 }, 0, stateful_hash{1});
set_type c2({ 2, 4, 6 }, 0, stateful_hash{2});
const auto c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c1 == c3 );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
test_type c4{ -1, -3, -5 };
c2.merge(c4);
VERIFY( c2.size() == 6 );
VERIFY( c4.empty() );
auto c6 = c3;
c6.merge(c2);
VERIFY( c6.size() == 9 );
VERIFY( c2.size() == 0 );
}
int
main()
{
@ -134,4 +252,7 @@ main()
test03();
test04();
test05();
test07();
test08();
test09();
}

View File

@ -167,6 +167,131 @@ test04()
VERIFY( c2.empty() );
}
void
test07()
{
test_type c1{ 1, 3, 5 };
test_type c2{ 2, 4, 6 };
const test_type c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c1 == c3 );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
test_type c4 = c3;
c2.merge(c4);
VERIFY( c2 == c3 );
VERIFY( c4 == c3 );
c4.emplace(9);
c2.merge(c4);
VERIFY( c2.size() == c3.size() + 1 );
VERIFY( c4 == c3 );
}
void
test08()
{
test_type c1{ 1, 3, 5 };
std::unordered_set<int, hash, equal> c2{ 2, 4, 6 };
const auto c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( equal_elements(c1, c3) );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
auto c4 = c3;
c2.merge(c4);
VERIFY( c2 == c3 );
VERIFY( c4 == c3 );
c4.emplace(9);
c2.merge(c4);
VERIFY( c2.size() == c3.size() + 1 );
VERIFY( c4 == c3 );
}
void
test09()
{
struct stateful_hash
{
size_t seed = 0;
auto operator()(const int& i) const noexcept
{ return std::hash<int>()(i) + seed; }
};
using set_type = std::unordered_set<int, stateful_hash>;
set_type c1({ 1, 3, 5 }, 0, stateful_hash{1});
set_type c2({ 2, 4, 6 }, 0, stateful_hash{2});
const auto c3 = c2;
c1.merge(c2);
VERIFY( c1.size() == 6 );
VERIFY( c2.empty() );
c2 = c3;
c1.clear();
c1.merge(std::move(c2));
VERIFY( c1 == c3 );
VERIFY( c2.empty() );
c2.merge(std::move(c1));
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c1);
VERIFY( c1.empty() );
VERIFY( c2 == c3 );
c2.merge(c2);
VERIFY( c2 == c3 );
test_type c4{ -1, -3, -5 };
c2.merge(c4);
VERIFY( c2.size() == 6 );
VERIFY( c4.empty() );
auto c6 = c3;
c6.merge(c2);
VERIFY( c6.size() == 6 );
VERIFY( c2.size() == 3 );
}
int
main()
{
@ -174,4 +299,7 @@ main()
test02();
test03();
test04();
test07();
test08();
test09();
}