danlark created this revision.
danlark added reviewers: EricWF, mclow.lists.
Herald added subscribers: cfe-commits, christof.

Consider the following code.

  #include <memory>
  #include <vector>
  
  class TestClass {
  public:
      TestClass(size_t size)
          : Data(size)
      {
      }
  private:
      std::vector<char> Data;
  };
  
  int main(void) {
      std::unique_ptr<TestClass> test;
      for (int i = 0; i < 100000; ++i)
          test.reset(new TestClass(0x10000));
      return 0;
  }

For clang 5.0.1 it works for 14sec on my laptop. If you replace `char` by 
`short` it becomes 35 times faster(wow). The main difference in the generated 
code that for `char` no `memset` is called inside `__construct_at_end` function.

By manipulating a local variable in the loop, this lets it be fully optimized 
away.

Prior to this change, this would be generated (on x86-64):

  51,79c58,66
  <   movq  %rax, 8(%rbx)
  <   movq  %rax, (%rbx)
  <   movq  %rax, %rcx
  <   addq  $65536, %rcx            # imm = 0x10000
  <   movq  %rcx, 16(%rbx)
  <   movq  $-65536, %rcx           # imm = 0xFFFFFFFFFFFF0000
  <   .align  16, 0x90
  < .LBB0_4:                                #   Parent Loop BB0_1 Depth=1
  <                                         # =>  This Inner Loop Header: 
Depth=2
  <   movb  $0, (%rax)
  <   movq  8(%rbx), %rax
  <   leaq  1(%rax), %rdx
  <   movq  %rdx, 8(%rbx)
  <   movb  $0, 1(%rax)
  <   movq  8(%rbx), %rax
  <   leaq  1(%rax), %rdx
  <   movq  %rdx, 8(%rbx)
  <   movb  $0, 1(%rax)
  <   movq  8(%rbx), %rax
  <   leaq  1(%rax), %rdx
  <   movq  %rdx, 8(%rbx)
  <   movb  $0, 1(%rax)
  <   movq  8(%rbx), %rax
  <   incq  %rax
  <   movq  %rax, 8(%rbx)
  <   addq  $4, %rcx
  <   jne  .LBB0_4
  < # BB#5:                                 # %_ZN9TestClassC2Em.exit
  <                                         #   in Loop: Header=BB0_1 Depth=1
  ---
  >   movq  %rax, (%r12)
  >   movq  %rax, %rbx
  >   addq  $65536, %rbx            # imm = 0x10000
  >   movq  %rbx, 16(%r12)
  >   xorl  %esi, %esi
  >   movl  $65536, %edx            # imm = 0x10000
  >   movq  %rax, %rdi
  >   callq  memset
  >   movq  %rbx, 8(%r12)
  81,82c68,69


Repository:
  rCXX libc++

https://reviews.llvm.org/D44823

Files:
  libcxx/trunk/include/__split_buffer
  libcxx/trunk/include/vector


Index: libcxx/trunk/include/vector
===================================================================
--- libcxx/trunk/include/vector
+++ libcxx/trunk/include/vector
@@ -984,11 +984,13 @@
 vector<_Tp, _Allocator>::__construct_at_end(size_type __n)
 {
     allocator_type& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
         __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end));
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
         __annotator.__done();
     } while (__n > 0);
@@ -1006,11 +1008,13 @@
 vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
 {
     allocator_type& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
         __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), 
__x);
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end), 
__x);
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
         __annotator.__done();
     } while (__n > 0);
Index: libcxx/trunk/include/__split_buffer
===================================================================
--- libcxx/trunk/include/__split_buffer
+++ libcxx/trunk/include/__split_buffer
@@ -198,10 +198,12 @@
 __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n)
 {
     __alloc_rr& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end));
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
     } while (__n > 0);
 }
@@ -217,10 +219,12 @@
 __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, 
const_reference __x)
 {
     __alloc_rr& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), 
__x);
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end), 
__x);
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
     } while (__n > 0);
 }


Index: libcxx/trunk/include/vector
===================================================================
--- libcxx/trunk/include/vector
+++ libcxx/trunk/include/vector
@@ -984,11 +984,13 @@
 vector<_Tp, _Allocator>::__construct_at_end(size_type __n)
 {
     allocator_type& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
         __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end));
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
         __annotator.__done();
     } while (__n > 0);
@@ -1006,11 +1008,13 @@
 vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
 {
     allocator_type& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
         __RAII_IncreaseAnnotator __annotator(*this);
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x);
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end), __x);
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
         __annotator.__done();
     } while (__n > 0);
Index: libcxx/trunk/include/__split_buffer
===================================================================
--- libcxx/trunk/include/__split_buffer
+++ libcxx/trunk/include/__split_buffer
@@ -198,10 +198,12 @@
 __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n)
 {
     __alloc_rr& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_));
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end));
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
     } while (__n > 0);
 }
@@ -217,10 +219,12 @@
 __split_buffer<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
 {
     __alloc_rr& __a = this->__alloc();
+    pointer __to_be_end = this->__end_;
     do
     {
-        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(this->__end_), __x);
-        ++this->__end_;
+        __alloc_traits::construct(__a, _VSTD::__to_raw_pointer(__to_be_end), __x);
+        ++__to_be_end;
+        this->__end_ = __to_be_end;
         --__n;
     } while (__n > 0);
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to