> e791e52ec329277474f3218d8a44cd37ded14ac3..8101d868d0c5f7ac4f97931a
> > ffcf71d826c88094 100644
> > > --- a/libstdc++-v3/include/bits/hashtable.h
> > > +++ b/libstdc++-v3/include/bits/hashtable.h
> > > @@ -2171,7 +2171,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> > >           if (this->_M_equals(__k, __code, *__p))
> > >             return __prev_p;
> > >
> > > -         if (!__p->_M_nxt || _M_bucket_index(*__p->_M_next()) != __bkt)
> > > +         if (__builtin_expect (!__p->_M_nxt || _M_bucket_index(*__p-
> >_M_next())
> > != __bkt, 0))
> > >             break;
> > >           __prev_p = __p;
> > >         }
> > > @@ -2201,7 +2201,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> > >             if (this->_M_equals_tr(__k, __code, *__p))
> > >               return __prev_p;
> > >
> > > -           if (!__p->_M_nxt || _M_bucket_index(*__p->_M_next()) != __bkt)
> > > +           if (__builtin_expect (!__p->_M_nxt || _M_bucket_index(*__p-
> > >_M_next()) != __bkt, 0))
> > >               break;
> > >             __prev_p = __p;
> > >           }
> > > @@ -2228,7 +2228,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> > >                pointer_to(const_cast<__node_base&>(_M_before_begin));
> > >           while (__loc._M_before->_M_nxt)
> > >             {
> > > -             if (this->_M_key_equals(__k, *__loc._M_node()))
> > > +             if (__builtin_expect (this->_M_key_equals(__k, 
> > > *__loc._M_node()), 1))
> > >                 return __loc;
> > >               __loc._M_before = __loc._M_before->_M_nxt;
> > >             }
> >
> > The first two changes make sense to me: we're typically going to loop
> > more than once when searching for an element, so the break is
> > unlikely. The third change is a bit more surprising, because although
> > it's likely we'll find an element *eventually* it is only likely once
> > out of N iterations, not likely on every iteration. But if the numbers
> > show it helps, then that just shows my intuition is probably wrong.
> >
> > If this is optimized for 'find' is it possible that this change
> > pessimizes insertion (which also has to use _M_locate to find where to
> > insert)?
> 
> You're right there seems to be a uniform slowdown for insertions in small 
> sized
> hashtables of ~10 entries.  They're about 10-14% slower with that change.
> 
> As expected just the ones on _M_find_before_node does not cause any issues.
> 
> Since the insertions into small hashtables don't run long enough I've also 
> increased
> the number of iterations to ~1 million even out the score a bit more but 
> there's still
> a sizable 5 regressions.
> 
> I've kicked off a longer run removing the change on _M_locate and with some
> more
> variable size finds/inserts.
> 
> So far it looks like the additional benefits gotten on _M_locate were
> mainly on two tests.  I'll look at the perf traces to figure out exactly why 
> but I'll
> respin
> the patch without the _M_locate change and send it tomorrow morning once
> benchmarks finish.
> 

Hi,

Here's a new patch and new numbers showing the improvements over the previous
one and total improvement with the two together.  This change seems to be mostly
beneficial on larger sized hashmaps and otherwise no real losses on Insert or 
Find.
(around the region of ~1% which look like noise).

which results in ~0-10% extra on top of the previous patch.

In table form:

+-------------+---------------+-------+--------------------+-------------------+-----------------+
| benchmark   | Type          | Size  | Inline vs baseline | final vs baseline 
| final vs inline |
+-------------+---------------+-------+--------------------+-------------------+-----------------+
| find many   | uint64_t      | 11253 | -15.67%            | -22.96%           
| -8.65%          |
| find many   | uint64_t      | 11253 | -16.74%            | -23.37%           
| -7.96%          |
| find single | uint64_t      | 345   | -5.88%             | -11.54%           
| -6.02%          |
| find many   | string        | 11253 | -4.50%             | -9.56%            
| -5.29%          |
| find single | uint64_t      | 345   | -4.38%             | -9.41%            
| -5.26%          |
| find single | shared string | 11253 | -6.67%             | -11.00%           
| -4.64%          |
| find single | shared string | 11253 | -4.63%             | -9.03%            
| -4.61%          |
| find single | shared string | 345   | -10.41%            | -14.44%           
| -4.50%          |
| find many   | string        | 11253 | -3.41%             | -7.51%            
| -4.24%          |
| find many   | shared string | 11253 | -2.30%             | -5.72%            
| -3.50%          |
| find many   | string        | 13    | 2.86%              | -0.30%            
| -3.07%          |
| find single | string        | 11253 | 4.47%              | 1.34%             
| -3.00%          |
| find many   | custom string | 11253 | 0.25%              | -2.75%            
| -2.99%          |
| find single | uint64_t      | 345   | 2.99%              | 0.01%             
| -2.90%          |
| find single | shared string | 345   | -11.53%            | -13.67%           
| -2.41%          |
| find single | uint64_t      | 11253 | 0.49%              | -1.59%            
| -2.07%          |
+-------------+---------------+-------+--------------------+-------------------+-----------------+


Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

libstdc++-v3/ChangeLog:

        * include/bits/hashtable.h
        (_M_find_before_node): Make it likely that the map has at least one
        entry and so we do at least one iteration.

-- inline copy of patch --

diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index 
e791e52ec329277474f3218d8a44cd37ded14ac3..51437619b44ce8b93be6a1d223828fda5a4a4c45
 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -2171,7 +2171,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          if (this->_M_equals(__k, __code, *__p))
            return __prev_p;
 
-         if (!__p->_M_nxt || _M_bucket_index(*__p->_M_next()) != __bkt)
+         if (__builtin_expect (!__p->_M_nxt || 
_M_bucket_index(*__p->_M_next()) != __bkt, 0))
            break;
          __prev_p = __p;
        }
@@ -2201,7 +2201,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
            if (this->_M_equals_tr(__k, __code, *__p))
              return __prev_p;
 
-           if (!__p->_M_nxt || _M_bucket_index(*__p->_M_next()) != __bkt)
+           if (__builtin_expect (!__p->_M_nxt || 
_M_bucket_index(*__p->_M_next()) != __bkt, 0))
              break;
            __prev_p = __p;
          }

Attachment: rb19066.patch
Description: rb19066.patch

Reply via email to