yaxunl created this revision.
Herald added subscribers: t-tye, Anastasia, tpr, dstuttard, wdng, kzhuravl.

AMDGPU backend requires global variables in global or constant address space 
and alloca in private address space. However, in C++ all variables are in 
default address space. Previously change has been made to cast automatic 
variables to default address space. However that is not sufficient since all 
temporary variables need to be casted to default address space.

This patch casts all temporary variables to default address space except those 
for passing indirect arguments since they are only used for load/store.

It also puts global variables in global or constant address spaces in a similar 
approach as CUDA.

It also fixes pointer size and removed OpenCL version metadata for C++ so that 
backend handles IR properly.

This patch only affects amdgcn target.


https://reviews.llvm.org/D33706

Files:
  lib/Basic/Targets.cpp
  lib/CodeGen/CGCall.cpp
  lib/CodeGen/CGDecl.cpp
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CodeGenFunction.h
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/TargetInfo.cpp
  test/CodeGen/address-space.c
  test/CodeGen/default-address-space.c
  test/CodeGenCXX/amdgcn-automatic-variable.cpp
  test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp

Index: test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
===================================================================
--- test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
+++ test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN1: %clang_cc1 -std=c++11 -triple x86_64-none-linux-gnu -emit-llvm -o - %s | FileCheck -check-prefixes=X86,CHECK %s
+// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa-amdgiz -DNO_TLS -emit-llvm -o - %s | FileCheck -check-prefixes=AMD,CHECK %s
 
 namespace std {
   typedef decltype(sizeof(int)) size_t;
@@ -46,62 +47,82 @@
   wantslist1(std::initializer_list<destroyme1>);
   ~wantslist1();
 };
-
-// CHECK: @_ZGR15globalInitList1_ = internal constant [3 x i32] [i32 1, i32 2, i32 3]
-// CHECK: @globalInitList1 = global %{{[^ ]+}} { i32* getelementptr inbounds ([3 x i32], [3 x i32]* @_ZGR15globalInitList1_, i32 0, i32 0), i{{32|64}} 3 }
+// X86: @_ZGR15globalInitList1_ = internal constant [3 x i32] [i32 1, i32 2, i32 3]
+// X86: @globalInitList1 = global %{{[^ ]+}} { i32* getelementptr inbounds ([3 x i32], [3 x i32]* @_ZGR15globalInitList1_, i32 0, i32 0), i{{32|64}} 3 }
+// AMD: @_ZGR15globalInitList1_ = internal addrspace(1) constant [3 x i32] [i32 1, i32 2, i32 3]
+// AMD: @globalInitList1 = addrspace(1) global %{{[^ ]+}} { i32* addrspacecast (i32 addrspace(1)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(1)* @_ZGR15globalInitList1_, i32 0, i32 0) to i32*), i{{32|64}} 3 }
 std::initializer_list<int> globalInitList1 = {1, 2, 3};
 
+#ifndef NO_TLS
 namespace thread_local_global_array {
   // FIXME: We should be able to constant-evaluate this even though the
   // initializer is not a constant expression (pointers to thread_local
   // objects aren't really a problem).
   //
-  // CHECK: @_ZN25thread_local_global_array1xE = thread_local global
-  // CHECK: @_ZGRN25thread_local_global_array1xE_ = internal thread_local constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
+  // X86: @_ZN25thread_local_global_array1xE = thread_local global
+  // X86: @_ZGRN25thread_local_global_array1xE_ = internal thread_local constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
   std::initializer_list<int> thread_local x = { 1, 2, 3, 4 };
 }
-
-// CHECK: @globalInitList2 = global %{{[^ ]+}} zeroinitializer
-// CHECK: @_ZGR15globalInitList2_ = internal global [2 x %[[WITHARG:[^ ]*]]] zeroinitializer
-
-// CHECK: @_ZN15partly_constant1kE = global i32 0, align 4
-// CHECK: @_ZN15partly_constant2ilE = global {{.*}} null, align 8
-// CHECK: @[[PARTLY_CONSTANT_OUTER:_ZGRN15partly_constant2ilE.*]] = internal global {{.*}} zeroinitializer, align 8
-// CHECK: @[[PARTLY_CONSTANT_INNER:_ZGRN15partly_constant2ilE.*]] = internal global [3 x {{.*}}] zeroinitializer, align 8
-// CHECK: @[[PARTLY_CONSTANT_FIRST:_ZGRN15partly_constant2ilE.*]] = internal constant [3 x i32] [i32 1, i32 2, i32 3], align 4
-// CHECK: @[[PARTLY_CONSTANT_SECOND:_ZGRN15partly_constant2ilE.*]] = internal global [2 x i32] zeroinitializer, align 4
-// CHECK: @[[PARTLY_CONSTANT_THIRD:_ZGRN15partly_constant2ilE.*]] = internal constant [4 x i32] [i32 5, i32 6, i32 7, i32 8], align 4
-
-// CHECK: @[[REFTMP1:.*]] = private constant [2 x i32] [i32 42, i32 43], align 4
-// CHECK: @[[REFTMP2:.*]] = private constant [3 x %{{.*}}] [%{{.*}} { i32 1 }, %{{.*}} { i32 2 }, %{{.*}} { i32 3 }], align 4
+#endif
+
+// X86: @globalInitList2 = global %{{[^ ]+}} zeroinitializer
+// X86: @_ZGR15globalInitList2_ = internal global [2 x %[[WITHARG:[^ ]*]]] zeroinitializer
+// AMD: @globalInitList2 = addrspace(1) global %{{[^ ]+}} zeroinitializer
+// AMD: @_ZGR15globalInitList2_ = internal addrspace(1) global [2 x %[[WITHARG:[^ ]*]]] zeroinitializer
+
+// X86: @_ZN15partly_constant1kE = global i32 0, align 4
+// X86: @_ZN15partly_constant2ilE = global {{.*}} null, align 8
+// X86: @[[PARTLY_CONSTANT_OUTER:_ZGRN15partly_constant2ilE.*]] = internal global {{.*}} zeroinitializer, align 8
+// X86: @[[PARTLY_CONSTANT_INNER:_ZGRN15partly_constant2ilE.*]] = internal global [3 x {{.*}}] zeroinitializer, align 8
+// X86: @[[PARTLY_CONSTANT_FIRST:_ZGRN15partly_constant2ilE.*]] = internal constant [3 x i32] [i32 1, i32 2, i32 3], align 4
+// X86: @[[PARTLY_CONSTANT_SECOND:_ZGRN15partly_constant2ilE.*]] = internal global [2 x i32] zeroinitializer, align 4
+// X86: @[[PARTLY_CONSTANT_THIRD:_ZGRN15partly_constant2ilE.*]] = internal constant [4 x i32] [i32 5, i32 6, i32 7, i32 8], align 4
+// AMD: @_ZN15partly_constant1kE = addrspace(1) global i32 0, align 4
+// AMD: @_ZN15partly_constant2ilE = addrspace(2) global {{.*}} null, align 8
+// AMD: @[[PARTLY_CONSTANT_OUTER:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) global {{.*}} zeroinitializer, align 8
+// AMD: @[[PARTLY_CONSTANT_INNER:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) global [3 x {{.*}}] zeroinitializer, align 8
+// AMD: @[[PARTLY_CONSTANT_FIRST:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3], align 4
+// AMD: @[[PARTLY_CONSTANT_SECOND:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) global [2 x i32] zeroinitializer, align 4
+// AMD: @[[PARTLY_CONSTANT_THIRD:_ZGRN15partly_constant2ilE.*]] = internal addrspace(2) constant [4 x i32] [i32 5, i32 6, i32 7, i32 8], align 4
+
+// X86: @[[REFTMP1:.*]] = private constant [2 x i32] [i32 42, i32 43], align 4
+// X86: @[[REFTMP2:.*]] = private constant [3 x %{{.*}}] [%{{.*}} { i32 1 }, %{{.*}} { i32 2 }, %{{.*}} { i32 3 }], align 4
+// AMD: @[[REFTMP1:.*]] = private addrspace(2) constant [2 x i32] [i32 42, i32 43], align 4
+// AMD: @[[REFTMP2:.*]] = private addrspace(2) constant [3 x %{{.*}}] [%{{.*}} { i32 1 }, %{{.*}} { i32 2 }, %{{.*}} { i32 3 }], align 4
 
 // CHECK: appending global
 
-
 // thread_local initializer:
-// CHECK-LABEL: define internal void
-// CHECK: store i32* getelementptr inbounds ([4 x i32], [4 x i32]* @_ZGRN25thread_local_global_array1xE_, i64 0, i64 0),
-// CHECK:       i32** getelementptr inbounds ({{.*}}, {{.*}}* @_ZN25thread_local_global_array1xE, i32 0, i32 0), align 8
-// CHECK: store i64 4, i64* getelementptr inbounds ({{.*}}, {{.*}}* @_ZN25thread_local_global_array1xE, i32 0, i32 1), align 8
-
-
-// CHECK-LABEL: define internal void
-// CHECK: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* @_ZGR15globalInitList2_, i{{32|64}} 0, i{{32|64}} 0
-// CHECK: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* @_ZGR15globalInitList2_, i{{32|64}} 0, i{{32|64}} 1
-// CHECK: __cxa_atexit
-// CHECK: store %[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* @_ZGR15globalInitList2_, i64 0, i64 0),
-// CHECK:       %[[WITHARG]]** getelementptr inbounds (%{{.*}}, %{{.*}}* @globalInitList2, i32 0, i32 0), align 8
-// CHECK: store i64 2, i64* getelementptr inbounds (%{{.*}}, %{{.*}}* @globalInitList2, i32 0, i32 1), align 8
-// CHECK: call void @_ZN10destroyme1D1Ev
+// X86-LABEL: define internal void @__cxx_global_var_init
+// X86: store i32* getelementptr inbounds ([4 x i32], [4 x i32]* @_ZGRN25thread_local_global_array1xE_, i64 0, i64 0),
+// X86:       i32** getelementptr inbounds ({{.*}}, {{.*}}* @_ZN25thread_local_global_array1xE, i32 0, i32 0), align 8
+// X86: store i64 4, i64* getelementptr inbounds ({{.*}}, {{.*}}* @_ZN25thread_local_global_array1xE, i32 0, i32 1), align 8
+
+// CHECK-LABEL: define internal void @__cxx_global_var_init
+// X86: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* @_ZGR15globalInitList2_, i{{32|64}} 0, i{{32|64}} 0
+// X86: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* @_ZGR15globalInitList2_, i{{32|64}} 0, i{{32|64}} 1
+// AMD: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* addrspacecast ({{[^@]+}} @_ZGR15globalInitList2_ {{[^)]+}}), i{{32|64}} 0, i{{32|64}} 0
+// AMD: call void @_ZN8witharg1C1ERK10destroyme1(%[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* addrspacecast ({{[^@]+}} @_ZGR15globalInitList2_ {{[^)]+}}), i{{32|64}} 0, i{{32|64}} 1
+// CHECK: call i32 @__cxa_atexit
+// X86: store %[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* @_ZGR15globalInitList2_, i64 0, i64 0),
+// X86:       %[[WITHARG]]** getelementptr inbounds (%{{.*}}, %{{.*}}* @globalInitList2, i32 0, i32 0), align 8
+// X86: store i64 2, i64* getelementptr inbounds (%{{.*}}, %{{.*}}* @globalInitList2, i32 0, i32 1), align 8
+// AMD: store %[[WITHARG]]* getelementptr inbounds ([2 x %[[WITHARG]]], [2 x %[[WITHARG]]]* addrspacecast ({{[^@]+}} @_ZGR15globalInitList2_ {{[^)]+}}), i64 0, i64 0),
+// AMD:       %[[WITHARG]]** getelementptr inbounds (%{{.*}}, %{{.*}}* addrspacecast ({{[^@]+}} @globalInitList2 {{[^)]+}}), i32 0, i32 0), align 8
+// AMD: store i64 2, i64* getelementptr inbounds (%{{.*}}, %{{.*}}* addrspacecast ({{[^@]+}} @globalInitList2 {{[^)]+}}), i32 0, i32 1), align 8
 // CHECK: call void @_ZN10destroyme1D1Ev
+// CHECK-NEXT: call void @_ZN10destroyme1D1Ev
+// CHECK-NEXT: ret void
 std::initializer_list<witharg1> globalInitList2 = {
   witharg1(destroyme1()), witharg1(destroyme1())
 };
 
 void fn1(int i) {
   // CHECK-LABEL: define void @_Z3fn1i
   // temporary array
-  // CHECK: [[array:%[^ ]+]] = alloca [3 x i32]
+  // X86: [[array:%[^ ]+]] = alloca [3 x i32]
+  // AMD: [[alloca:%[^ ]+]] = alloca [3 x i32], align 4, addrspace(5)
+  // AMD: [[array:%[^ ]+]] = addrspacecast [3 x i32] addrspace(5)* [[alloca]] to [3 x i32]*
   // CHECK: getelementptr inbounds [3 x i32], [3 x i32]* [[array]], i{{32|64}} 0
   // CHECK-NEXT: store i32 1, i32*
   // CHECK-NEXT: getelementptr
@@ -175,7 +196,6 @@
   destroyme2 dm2;
   // CHECK: call void @_ZN10destroyme2D1Ev
 }
-
 void fn7() {
   // CHECK-LABEL: define void @_Z3fn7v
   // temps should be destroyed before dm2
@@ -366,37 +386,36 @@
   std::initializer_list<std::initializer_list<int>> &&il = { { 1, 2, 3 }, { 4, k }, { 5, 6, 7, 8 } };
   // First init list.
   // CHECK-NOT: @[[PARTLY_CONSTANT_FIRST]],
-  // CHECK: store i32* getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_FIRST]], i64 0, i64 0),
-  // CHECK:       i32** getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_INNER]], i64 0, i64 0, i32 0)
-  // CHECK: store i64 3, i64* getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_INNER]], i64 0, i64 0, i32 1)
+  // CHECK: store i32* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_FIRST]]{{.*}}, i64 0, i64 0),
+  // CHECK:       i32** getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 0, i64 0, i32 0)
+  // CHECK: store i64 3, i64* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 0, i64 0, i32 1)
   // CHECK-NOT: @[[PARTLY_CONSTANT_FIRST]],
   //
   // Second init list array (non-constant).
-  // CHECK: store i32 4, i32* getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_SECOND]], i64 0, i64 0)
-  // CHECK: load i32, i32* @_ZN15partly_constant1kE
-  // CHECK: store i32 {{.*}}, i32* getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_SECOND]], i64 0, i64 1)
+  // CHECK: store i32 4, i32* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_SECOND]]{{.*}}, i64 0, i64 0)
+  // CHECK: load i32, i32* {{.*}}@_ZN15partly_constant1kE
+  // CHECK: store i32 {{.*}}, i32* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_SECOND]]{{.*}}, i64 0, i64 1)
   //
   // Second init list.
-  // CHECK: store i32* getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_SECOND]], i64 0, i64 0),
-  // CHECK:       i32** getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_INNER]], i64 0, i64 1, i32 0)
-  // CHECK: store i64 2, i64* getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_INNER]], i64 0, i64 1, i32 1)
+  // CHECK: store i32* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_SECOND]]{{.*}}, i64 0, i64 0),
+  // CHECK:       i32** getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 0, i64 1, i32 0)
+  // CHECK: store i64 2, i64* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 0, i64 1, i32 1)
   //
   // Third init list.
   // CHECK-NOT: @[[PARTLY_CONSTANT_THIRD]],
-  // CHECK: store i32* getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_THIRD]], i64 0, i64 0),
-  // CHECK:       i32** getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_INNER]], i64 0, i64 2, i32 0)
-  // CHECK: store i64 4, i64* getelementptr inbounds ({{.*}}, {{.*}}* @_ZGRN15partly_constant2ilE4_, i64 0, i64 2, i32 1)
+  // CHECK: store i32* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_THIRD]]{{.*}}, i64 0, i64 0),
+  // CHECK:       i32** getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 0, i64 2, i32 0)
+  // CHECK: store i64 4, i64* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@_ZGRN15partly_constant2ilE4_{{.*}}, i64 0, i64 2, i32 1)
   // CHECK-NOT: @[[PARTLY_CONSTANT_THIRD]],
   //
   // Outer init list.
-  // CHECK: store {{.*}}* getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_INNER]], i64 0, i64 0),
-  // CHECK:       {{.*}}** getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_OUTER]], i32 0, i32 0)
-  // CHECK: store i64 3, i64* getelementptr inbounds ({{.*}}, {{.*}}* @[[PARTLY_CONSTANT_OUTER]], i32 0, i32 1)
+  // CHECK: store {{.*}}* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_INNER]]{{.*}}, i64 0, i64 0),
+  // CHECK:       {{.*}}** getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_OUTER]]{{.*}}, i32 0, i32 0)
+  // CHECK: store i64 3, i64* getelementptr inbounds ({{.*}}, {{.*}}* {{.*}}@[[PARTLY_CONSTANT_OUTER]]{{.*}}, i32 0, i32 1)
   //
   // 'il' reference.
-  // CHECK: store {{.*}}* @[[PARTLY_CONSTANT_OUTER]], {{.*}}** @_ZN15partly_constant2ilE, align 8
+  // CHECK: store {{.*}}* {{.*}}@[[PARTLY_CONSTANT_OUTER]]{{.*}}, {{.*}}** {{.*}}@_ZN15partly_constant2ilE{{.*}}, align 8
 }
-
 namespace nested {
   struct A { A(); ~A(); };
   struct B { const A &a; ~B(); };
@@ -463,7 +482,7 @@
   template<int x> void f() { new MyClass({42, 43}); }
   template void f<0>();
   // CHECK-LABEL: define {{.*}} @_ZN7PR204451fILi0EEEvv(
-  // CHECK: store i32* getelementptr inbounds ([2 x i32], [2 x i32]* @[[REFTMP1]], i64 0, i64 0)
+  // CHECK: store i32* getelementptr inbounds ([2 x i32], [2 x i32]* {{.*}}@[[REFTMP1]]{{.*}}, i64 0, i64 0)
   // CHECK: call void @_ZN7PR204456vectorC1ESt16initializer_listIiE(
   // CHECK: call void @_ZN7PR204457MyClassC1ERKNS_6vectorE(
 }
@@ -476,9 +495,9 @@
   };
   void f(std::initializer_list<C>);
   void g() {
-// CHECK-LABEL: _ZN9ConstExpr1gEv
-// CHECK: store %"class.ConstExpr::C"* getelementptr inbounds ([3 x %"class.ConstExpr::C"], [3 x %"class.ConstExpr::C"]* @[[REFTMP2]], i64 0, i64 0)
-// CHECK: call void @_ZN9ConstExpr1fESt16initializer_listINS_1CEE
+    // CHECK-LABEL: _ZN9ConstExpr1gEv
+    // CHECK: store %"class.ConstExpr::C"* getelementptr inbounds ([3 x %"class.ConstExpr::C"], [3 x %"class.ConstExpr::C"]* {{.*}}@[[REFTMP2]]{{.*}}, i64 0, i64 0)
+    // CHECK: call void @_ZN9ConstExpr1fESt16initializer_listINS_1CEE
     f({C(1), C(2), C(3)});
   }
 }
@@ -498,11 +517,13 @@
   void f1() {
     // CHECK-LABEL: @_ZN9B197730102f1Ev
     testcase a{{"", ENUM_CONSTANT}};
-    // CHECK: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* bitcast ([1 x { i8*, i32 }]* @.ref.tmp{{.*}} to [1 x %"struct.B19773010::pair"]*), i64 0, i64 0), %"struct.B19773010::pair"** %{{.*}}, align 8
+    // X86: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* bitcast ([1 x { i8*, i32 }]* @.ref.tmp{{.*}} to [1 x %"struct.B19773010::pair"]*), i64 0, i64 0), %"struct.B19773010::pair"** %{{.*}}, align 8
+    // AMD: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* addrspacecast{{.*}} bitcast ([1 x { i8*, i32 }] addrspace(2)* @.ref.tmp{{.*}} to [1 x %"struct.B19773010::pair"] addrspace(2)*){{.*}}, i64 0, i64 0), %"struct.B19773010::pair"** %{{.*}}, align 8
   }
   void f2() {
     // CHECK-LABEL: @_ZN9B197730102f2Ev
-    // CHECK: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* bitcast ([1 x { i8*, i32 }]* @_ZGRZN9B197730102f2EvE1p_ to [1 x %"struct.B19773010::pair"]*), i64 0, i64 0), %"struct.B19773010::pair"** getelementptr inbounds ([2 x %"class.std::initializer_list.10"], [2 x %"class.std::initializer_list.10"]* @_ZZN9B197730102f2EvE1p, i64 0, i64 1, i32 0), align 16
+    // X86: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* bitcast ([1 x { i8*, i32 }]* @_ZGRZN9B197730102f2EvE1p_ to [1 x %"struct.B19773010::pair"]*), i64 0, i64 0), %"struct.B19773010::pair"** getelementptr inbounds ([2 x %"class.std::initializer_list.10"], [2 x %"class.std::initializer_list.10"]* @_ZZN9B197730102f2EvE1p, i64 0, i64 1, i32 0), align 16
+    // AMD: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* addrspacecast{{.*}} bitcast ([1 x { i8*, i32 }] addrspace(1)* @_ZGRZN9B197730102f2EvE1p_ to [1 x %"struct.B19773010::pair"] addrspace(1)*){{.*}}, i64 0, i64 0), %"struct.B19773010::pair"** getelementptr inbounds ([2 x %"class.std::initializer_list.10"], [2 x %"class.std::initializer_list.10"]* addrspacecast{{.*}}@_ZZN9B197730102f2EvE1p{{.*}}, i64 0, i64 1, i32 0), align 8
     static std::initializer_list<pair<const char *, E>> a, p[2] =
         {a, {{"", ENUM_CONSTANT}}};
   }
Index: test/CodeGenCXX/amdgcn-automatic-variable.cpp
===================================================================
--- test/CodeGenCXX/amdgcn-automatic-variable.cpp
+++ test/CodeGenCXX/amdgcn-automatic-variable.cpp
@@ -3,19 +3,20 @@
 // CHECK-LABEL: define void @_Z5func1Pi(i32* %x)
 void func1(int *x) {
   // CHECK: %[[x_addr:.*]] = alloca i32*{{.*}}addrspace(5)
-  // CHECK: store i32* %x, i32* addrspace(5)* %[[x_addr]]
-  // CHECK: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[x_addr]]
-  // CHECK: store i32 1, i32* %[[r0]]
+  // CHECK: %[[r0:.*]] = addrspacecast i32* addrspace(5)* %[[x_addr]] to i32**
+  // CHECK: store i32* %x, i32** %[[r0]]
+  // CHECK: %[[r1:.*]] = load i32*, i32** %[[r0]]
+  // CHECK: store i32 1, i32* %[[r1]]
   *x = 1;
 }
 
 // CHECK-LABEL: define void @_Z5func2v()
 void func2(void) {
   // CHECK: %lv1 = alloca i32, align 4, addrspace(5)
   // CHECK: %lv2 = alloca i32, align 4, addrspace(5)
   // CHECK: %la = alloca [100 x i32], align 4, addrspace(5)
-  // CHECK: %lp1 = alloca i32*, align 4, addrspace(5)
-  // CHECK: %lp2 = alloca i32*, align 4, addrspace(5)
+  // CHECK: %lp1 = alloca i32*, align 8, addrspace(5)
+  // CHECK: %lp2 = alloca i32*, align 8, addrspace(5)
   // CHECK: %lvc = alloca i32, align 4, addrspace(5)
 
   // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %lv1 to i32*
@@ -33,12 +34,12 @@
   la[0] = 3;
 
   // CHECK: %[[r3:.*]] = addrspacecast i32* addrspace(5)* %lp1 to i32**
-  // CHECK: store i32* %[[r0]], i32** %[[r3]], align 4
+  // CHECK: store i32* %[[r0]], i32** %[[r3]], align 8
   int *lp1 = &lv1;
 
   // CHECK: %[[r4:.*]] = addrspacecast i32* addrspace(5)* %lp2 to i32**
   // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[r2]], i32 0, i32 0
-  // CHECK: store i32* %[[arraydecay]], i32** %[[r4]], align 4
+  // CHECK: store i32* %[[arraydecay]], i32** %[[r4]], align 8
   int *lp2 = la;
 
   // CHECK: call void @_Z5func1Pi(i32* %[[r0]])
@@ -70,3 +71,14 @@
   // CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]])
   A a;
 }
+
+// CHECK-LABEL: define void @_Z5func4i
+void func4(int x) {
+  // CHECK: %[[x_addr:.*]] = alloca i32, align 4, addrspace(5)
+  // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %[[x_addr]] to i32*
+  // CHECK: store i32 %x, i32* %[[r0]], align 4
+  // CHECK: call void @_Z5func1Pi(i32* %[[r0]])
+  func1(&x);
+}
+
+// CHECK-NOT: !opencl.ocl.version 
Index: test/CodeGen/default-address-space.c
===================================================================
--- test/CodeGen/default-address-space.c
+++ test/CodeGen/default-address-space.c
@@ -1,56 +1,59 @@
 // RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=PIZ,COM %s
 // RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,COM %s
 
-// PIZ-DAG: @foo = common addrspace(4) global i32 0
-// CHECK-DAG: @foo = common global i32 0
+// PIZ-DAG: @foo = common addrspace(1) global i32 0
+// CHECK-DAG: @foo = common addrspace(1) global i32 0
 int foo;
 
-// PIZ-DAG: @ban = common addrspace(4) global [10 x i32] zeroinitializer
-// CHECK-DAG: @ban = common global [10 x i32] zeroinitializer
+// PIZ-DAG: @ban = common addrspace(1) global [10 x i32] zeroinitializer
+// CHECK-DAG: @ban = common addrspace(1) global [10 x i32] zeroinitializer
 int ban[10];
 
-// PIZ-DAG: @A = common addrspace(4) global i32 addrspace(4)* null
-// PIZ-DAG: @B = common addrspace(4) global i32 addrspace(4)* null
-// CHECK-DAG: @A = common global i32* null
-// CHECK-DAG: @B = common global i32* null
+// PIZ-DAG: @A = common addrspace(1) global i32 addrspace(4)* null
+// PIZ-DAG: @B = common addrspace(1) global i32 addrspace(4)* null
+// CHECK-DAG: @A = common addrspace(1) global i32* null
+// CHECK-DAG: @B = common addrspace(1) global i32* null
 int *A;
 int *B;
 
 // COM-LABEL: define i32 @test1()
-// PIZ: load i32, i32 addrspace(4)* @foo
-// CHECK: load i32, i32* @foo
+// PIZ: load i32, i32 addrspace(4)* addrspacecast{{[^@]+}} @foo
+// CHECK: load i32, i32* addrspacecast{{[^@]+}} @foo
 int test1() { return foo; }
 
 // COM-LABEL: define i32 @test2(i32 %i)
-// PIZ: load i32, i32 addrspace(4)*
+// COM: %[[addr:.*]] = getelementptr
+// PIZ: load i32, i32 addrspace(4)* %[[addr]]
 // PIZ-NEXT: ret i32
-// CHECK: load i32, i32*
+// CHECK: load i32, i32* %[[addr]]
 // CHECK-NEXT: ret i32
 int test2(int i) { return ban[i]; }
 
 // COM-LABEL: define void @test3()
-// PIZ: load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* @B
+// PIZ: load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* addrspacecast{{[^@]+}} @B
 // PIZ: load i32, i32 addrspace(4)*
-// PIZ: load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* @A
+// PIZ: load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* addrspacecast{{[^@]+}} @A
 // PIZ: store i32 {{.*}}, i32 addrspace(4)*
-// CHECK: load i32*, i32** @B
+// CHECK: load i32*, i32** addrspacecast{{.*}} @B
 // CHECK: load i32, i32*
-// CHECK: load i32*, i32** @A
+// CHECK: load i32*, i32** addrspacecast{{.*}} @A
 // CHECK: store i32 {{.*}}, i32*
 void test3() {
   *A = *B;
 }
 
 // PIZ-LABEL: define void @test4(i32 addrspace(4)* %a)
-// PIZ: %[[a_addr:.*]] = alloca i32 addrspace(4)*
-// PIZ: store i32 addrspace(4)* %a, i32 addrspace(4)** %[[a_addr]]
-// PIZ: %[[r0:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[a_addr]]
+// PIZ: %[[alloca:.*]] = alloca i32 addrspace(4)*
+// PIZ: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32 addrspace(4)* addrspace(4)*
+// PIZ: store i32 addrspace(4)* %a, i32 addrspace(4)* addrspace(4)* %[[a_addr]]
+// PIZ: %[[r0:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %[[a_addr]]
 // PIZ: %[[arrayidx:.*]] = getelementptr inbounds i32, i32 addrspace(4)* %[[r0]]
 // PIZ: store i32 0, i32 addrspace(4)* %[[arrayidx]]
 // CHECK-LABEL: define void @test4(i32* %a)
-// CHECK: %[[a_addr:.*]] = alloca i32*, align 4, addrspace(5)
-// CHECK: store i32* %a, i32* addrspace(5)* %[[a_addr]]
-// CHECK: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[a_addr]]
+// CHECK: %[[alloca:.*]] = alloca i32*, align 8, addrspace(5)
+// CHECK: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32**
+// CHECK: store i32* %a, i32** %[[a_addr]]
+// CHECK: %[[r0:.*]] = load i32*, i32** %[[a_addr]]
 // CHECK: %[[arrayidx:.*]] = getelementptr inbounds i32, i32* %[[r0]]
 // CHECK: store i32 0, i32* %[[arrayidx]]
 void test4(int *a) {
Index: test/CodeGen/address-space.c
===================================================================
--- test/CodeGen/address-space.c
+++ test/CodeGen/address-space.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,X86,GIZ %s
 // RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=CHECK,PIZ %s
-// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ %s
+// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,AMDGIZ,GIZ %s
 
 // CHECK: @foo = common addrspace(1) global
 int foo __attribute__((address_space(1)));
@@ -24,11 +24,13 @@
 __attribute__((address_space(2))) int *A, *B;
 
 // CHECK-LABEL: define void @test3()
-// GIZ: load i32 addrspace(2)*, i32 addrspace(2)** @B
-// PIZ: load i32 addrspace(2)*, i32 addrspace(2)* addrspace(4)* @B
+// X86: load i32 addrspace(2)*, i32 addrspace(2)** @B
+// AMDGIZ: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @B to i32 addrspace(2)**)
+// PIZ: load i32 addrspace(2)*, i32 addrspace(2)* addrspace(4)* addrspacecast (i32 addrspace(2)* addrspace(1)* @B to i32 addrspace(2)* addrspace(4)*)
 // CHECK: load i32, i32 addrspace(2)*
-// GIZ: load i32 addrspace(2)*, i32 addrspace(2)** @A
-// PIZ: load i32 addrspace(2)*, i32 addrspace(2)* addrspace(4)* @A
+// X86: load i32 addrspace(2)*, i32 addrspace(2)** @A
+// AMDGIZ: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 addrspace(2)* addrspace(1)* @A to i32 addrspace(2)**)
+// PIZ: load i32 addrspace(2)*, i32 addrspace(2)* addrspace(4)* addrspacecast (i32 addrspace(2)* addrspace(1)* @A to i32 addrspace(2)* addrspace(4)*)
 // CHECK: store i32 {{.*}}, i32 addrspace(2)*
 void test3() {
   *A = *B;
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -7363,7 +7363,8 @@
       F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
   }
 
-  appendOpenCLVersionMD(M);
+  if (M.getLangOpts().OpenCL)
+    appendOpenCLVersionMD(M);
 }
 
 unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -2471,13 +2471,34 @@
 
 unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D,
                                                  unsigned AddrSpace) {
-  if (D && LangOpts.CUDA && LangOpts.CUDAIsDevice) {
-    if (D->hasAttr<CUDAConstantAttr>())
-      AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant);
-    else if (D->hasAttr<CUDASharedAttr>())
-      AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared);
-    else
-      AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device);
+  if (D) {
+    if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
+      if (D->hasAttr<CUDAConstantAttr>())
+        AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant);
+      else if (D->hasAttr<CUDASharedAttr>())
+        AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared);
+      else
+        AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device);
+    } else if (getTriple().getArch() == llvm::Triple::amdgcn) {
+      auto LangAddr = D->getType().getAddressSpace();
+      if (LangOpts.OpenCL) {
+        assert(LangAddr == LangAS::opencl_global ||
+               LangAddr == LangAS::opencl_constant ||
+               LangAddr == LangAS::opencl_local ||
+               LangAddr >= LangAS::FirstTargetAddressSpace);
+      } else {
+        assert(LangAddr == LangAS::Default ||
+               LangAddr >= LangAS::FirstTargetAddressSpace);
+      }
+      if (!LangOpts.OpenCL && LangAddr == LangAS::Default) {
+        if (isTypeConstant(D->getType(), false)) {
+          LangAddr = LangAS::opencl_constant;
+        } else {
+          LangAddr = LangAS::opencl_global;
+        }
+        AddrSpace = getContext().getTargetAddressSpace(LangAddr);
+      }
+    }
   }
 
   return AddrSpace;
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -1915,13 +1915,26 @@
                             LValueBaseInfo *BaseInfo = nullptr);
   LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy);
 
-  /// CreateTempAlloca - This creates a alloca and inserts it into the entry
-  /// block. The caller is responsible for setting an appropriate alignment on
+  /// CreateTempAlloca - This creates an alloca and inserts it into the entry
+  /// block if \p ArraySize is constant, otherwise inserts it at the current
+  /// insertion point of the builder. The caller is responsible for setting an
+  /// appropriate alignment on
   /// the alloca.
-  llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty,
-                                     const Twine &Name = "tmp");
+  ///
+  /// \p ArraySize is the number of array elements to be allocated if it
+  ///    is not nullptr.
+  ///
+  /// For certain targets the alloca address space may be different from the
+  /// default address space expected by the language. E.g. C++ expects a
+  /// temporary variable in default address space. If \p DoCast is true,
+  /// alloca will be casted to the address space expected by the language,
+  /// otherwise it stays in the alloca address space.
+  llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
+                                     llvm::Value *ArraySize = nullptr);
   Address CreateTempAlloca(llvm::Type *Ty, CharUnits align,
-                           const Twine &Name = "tmp");
+                           const Twine &Name = "tmp",
+                           llvm::Value *ArraySize = nullptr,
+                           bool DoCast = true);
 
   /// CreateDefaultAlignedTempAlloca - This creates an alloca with the
   /// default ABI alignment of the given LLVM type.
@@ -1956,9 +1969,12 @@
   Address CreateIRTemp(QualType T, const Twine &Name = "tmp");
 
   /// CreateMemTemp - Create a temporary memory object of the given type, with
-  /// appropriate alignment.
-  Address CreateMemTemp(QualType T, const Twine &Name = "tmp");
-  Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp");
+  /// appropriate alignment. Cast it to the expected address space if \p DoCast
+  /// is true.
+  Address CreateMemTemp(QualType T, const Twine &Name = "tmp",
+                        bool DoCast = true);
+  Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp",
+                        bool DoCast = true);
 
   /// CreateAggTemp - Create a temporary memory object for the given
   /// aggregate type.
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -61,18 +61,35 @@
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
 /// block.
 Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
-                                          const Twine &Name) {
-  auto Alloca = CreateTempAlloca(Ty, Name);
+                                          const Twine &Name,
+                                          llvm::Value *ArraySize, bool DoCast) {
+  auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
   Alloca->setAlignment(Align.getQuantity());
-  return Address(Alloca, Align);
+  llvm::Value *V = Alloca;
+  // Alloca always returns a pointer in alloca address space, which may
+  // be different from the type defined by the language. For example,
+  // in C++ the auto variables are in the default address space. Therefore
+  // cast alloca to the expected address space when necessary.
+  if (DoCast && getASTAllocaAddressSpace() != LangAS::Default) {
+    auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
+    V = getTargetHooks().performAddrSpaceCast(
+        *this, V, getASTAllocaAddressSpace(), DestAddrSpace,
+        Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
+  }
+
+  return Address(V, Align);
 }
 
-/// CreateTempAlloca - This creates a alloca and inserts it into the entry
-/// block.
+/// CreateTempAlloca - This creates an alloca and inserts it into the entry
+/// block if \p ArraySize is constant, otherwise inserts it at the current
+/// insertion point of the builder.
 llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
-                                                    const Twine &Name) {
+                                                    const Twine &Name,
+                                                    llvm::Value *ArraySize) {
+  if (ArraySize)
+    return Builder.CreateAlloca(Ty, ArraySize, Name);
   return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(),
-                              nullptr, Name, AllocaInsertPt);
+                              ArraySize, Name, AllocaInsertPt);
 }
 
 /// CreateDefaultAlignTempAlloca - This creates an alloca with the
@@ -99,14 +116,15 @@
   return CreateTempAlloca(ConvertType(Ty), Align, Name);
 }
 
-Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name) {
+Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
+                                       bool DoCast) {
   // FIXME: Should we prefer the preferred type alignment here?
-  return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name);
+  return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, DoCast);
 }
 
 Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
-                                       const Twine &Name) {
-  return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name);
+                                       const Twine &Name, bool DoCast) {
+  return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr, DoCast);
 }
 
 /// EvaluateExprAsBool - Perform the usual unary conversions on the specified
@@ -331,9 +349,15 @@
         (Ty->isArrayType() || Ty->isRecordType()) &&
         CGF.CGM.isTypeConstant(Ty, true))
       if (llvm::Constant *Init = CGF.CGM.EmitConstantExpr(Inner, Ty, &CGF)) {
+        unsigned AddrSpace = 0;
+        if (CGF.CGM.getTriple().getArch() == llvm::Triple::amdgcn) {
+          AddrSpace =
+              CGF.getContext().getTargetAddressSpace(LangAS::opencl_constant);
+        }
         auto *GV = new llvm::GlobalVariable(
             CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
-            llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp");
+            llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp", nullptr,
+            llvm::GlobalValue::NotThreadLocal, AddrSpace);
         CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty);
         GV->setAlignment(alignment.getQuantity());
         // FIXME: Should we put the new global into a COMDAT?
@@ -419,8 +443,8 @@
   // Create and initialize the reference temporary.
   Address Object = createReferenceTemporary(*this, M, E);
   if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) {
-    Object = Address(llvm::ConstantExpr::getBitCast(
-        Var, ConvertTypeForMem(E->getType())->getPointerTo()),
+    Object = Address(llvm::ConstantExpr::getPointerCast(
+                         Var, ConvertTypeForMem(E->getType())->getPointerTo()),
                      Object.getAlignment());
     // If the temporary is a global and has a constant initializer or is a
     // constant temporary that we promoted to a global, we may have already
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -1039,8 +1039,7 @@
       // Create the alloca.  Note that we set the name separately from
       // building the instruction so that it's there even in no-asserts
       // builds.
-      address = CreateTempAlloca(allocaTy, allocaAlignment);
-      address.getPointer()->setName(D.getName());
+      address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName());
 
       // Don't emit lifetime markers for MSVC catch parameters. The lifetime of
       // the catch parameter starts in the catchpad instruction, and we can't
@@ -1100,16 +1099,9 @@
     llvm::Type *llvmTy = ConvertTypeForMem(elementType);
 
     // Allocate memory for the array.
-    llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla");
-    vla->setAlignment(alignment.getQuantity());
-
-    address = Address(vla, alignment);
+    address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount);
   }
 
-  // Alloca always returns a pointer in alloca address space, which may
-  // be different from the type defined by the language. For example,
-  // in C++ the auto variables are in the default address space. Therefore
-  // cast alloca to the expected address space when necessary.
   auto T = D.getType();
   assert(T.getAddressSpace() == LangAS::Default);
   if (getASTAllocaAddressSpace() != LangAS::Default) {
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -3801,7 +3801,8 @@
       assert(NumIRArgs == 1);
       if (RV.isScalar() || RV.isComplex()) {
         // Make a temporary alloca to pass the argument.
-        Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+        Address Addr =
+            CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), "tmp", false);
         IRCallArgs[FirstIRArg] = Addr.getPointer();
 
         LValue argLV = MakeAddrLValue(Addr, I->Ty);
@@ -3830,7 +3831,8 @@
                < Align.getQuantity()) ||
             (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
           // Create an aligned temporary, and copy to it.
-          Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+          Address AI =
+              CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), "tmp", false);
           IRCallArgs[FirstIRArg] = AI.getPointer();
           EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
         } else {
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2170,6 +2170,13 @@
     assert(DataLayout->getAllocaAddrSpace() == AS.Private);
 
     UseAddrSpaceMapMangling = true;
+
+    if (getMaxPointerWidth() == 64) {
+      LongWidth = LongAlign = 64;
+      SizeType = UnsignedLong;
+      PtrDiffType = SignedLong;
+      IntPtrType = SignedLong;
+    }
   }
 
   void adjust(LangOptions &Opts) override {
@@ -2181,6 +2188,9 @@
       AddrSpaceMap = Opts.OpenCL ? &AMDGPUOpenCLPrivateIsZeroMap
                                  : &AMDGPUNonOpenCLPrivateIsZeroMap;
     }
+    // Set default pointer width and alignment.
+    PointerWidth = getPointerWidthV(Opts.OpenCL ? AS.Private : AS.Generic);
+    PointerAlign = PointerWidth;
   }
 
   uint64_t getPointerWidthV(unsigned AddrSpace) const override {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to