Hi, the following testcase: bool f(const std::vector<bool>& v, std::size_t x) { return v[x]; }
is compiled as: f(std::vector<bool, std::allocator<bool> > const&, unsigned long): testq %rsi, %rsi leaq 63(%rsi), %rax movq (%rdi), %rdx cmovns %rsi, %rax sarq $6, %rax leaq (%rdx,%rax,8), %rdx movq %rsi, %rax sarq $63, %rax shrq $58, %rax addq %rax, %rsi andl $63, %esi subq %rax, %rsi jns .L2 addq $64, %rsi subq $8, %rdx .L2: movl $1, %eax shlx %rsi, %rax, %rax andq (%rdx), %rax setne %al ret which is quite expensive for simple bit access in a bitmap. The reason is that the bit access is implemented using iterators return begin()[__n]; Which in turn cares about situation where __n is negative yielding the extra conditional. _GLIBCXX20_CONSTEXPR void _M_incr(ptrdiff_t __i) { _M_assume_normalized(); difference_type __n = __i + _M_offset; _M_p += __n / int(_S_word_bit); __n = __n % int(_S_word_bit); if (__n < 0) { __n += int(_S_word_bit); --_M_p; } _M_offset = static_cast<unsigned int>(__n); } While we can use __builtin_unreachable to declare that __n is in range 0...max_size () but I think it is better to implement it directly, since resulting code is shorter and much easier to optimize. We now porduce: .LFB1248: .cfi_startproc movq (%rdi), %rax movq %rsi, %rdx shrq $6, %rdx andq (%rax,%rdx,8), %rsi andl $63, %esi setne %al ret Testcase suggests movq (%rdi), %rax movl %esi, %ecx shrq $5, %rsi # does still need to be 64-bit movl (%rax,%rsi,4), %eax btl %ecx, %eax setb %al retq Which is still one instruction shorter. Bootstrapped/regtested x86_64-linux, OK? libstdc++-v3/ChangeLog: PR target/80813 * include/bits/stl_bvector.h (vector<bool, _Alloc>::operator []): Do not use iterators. gcc/testsuite/ChangeLog: PR target/80813 * g++.dg/tree-ssa/bvector-3.C: New test. diff --git a/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C b/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C new file mode 100644 index 00000000000..feae791b20d --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/bvector-3.C @@ -0,0 +1,10 @@ +// { dg-do compile } +// { dg-options "-O2 -fdump-tree-optimized" } +// { dg-skip-if "requires hosted libstdc++ for vector" { ! hostedlib } } + +#include <vector> +bool f(const std::vector<bool>& v, std::size_t x) { + return v[x]; +} +// All references to src should be optimized out, so there should be no name for it +// { dg-final { scan-tree-dump-not "if \\(" optimized } } diff --git a/libstdc++-v3/include/bits/stl_bvector.h b/libstdc++-v3/include/bits/stl_bvector.h index 341eee33b21..975857bfdbd 100644 --- a/libstdc++-v3/include/bits/stl_bvector.h +++ b/libstdc++-v3/include/bits/stl_bvector.h @@ -1132,7 +1141,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER operator[](size_type __n) { __glibcxx_requires_subscript(__n); - return begin()[__n]; + return _Bit_reference (this->_M_impl._M_start._M_p + + __n / int(_S_word_bit), + 1UL << __n % int(_S_word_bit)); } _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR @@ -1140,7 +1151,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER operator[](size_type __n) const { __glibcxx_requires_subscript(__n); - return begin()[__n]; + return _Bit_reference (this->_M_impl._M_start._M_p + + __n / int(_S_word_bit), + 1UL << __n % int(_S_word_bit)); } protected: