[llvm-bugs] [Bug 33434] New: 265 bit double shuffles not optimal

via llvm-bugs Tue, 13 Jun 2017 03:14:05 -0700

https://bugs.llvm.org/show_bug.cgi?id=33434


            Bug ID: 33434
           Summary: 265 bit double shuffles not optimal
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedb...@nondot.org
          Reporter: tob...@grosser.es
                CC: llvm-bugs@lists.llvm.org

Hi,

I just tried to generate AVX2 code for some 256 bit AVX2 double shuffles, but
despite Chandler's outstanding work on improving X86 shuffles two years ago,
the shuffle sequences seem not be be optimal (using llc out.ll -o -
-mcpu=x86-64 -mattr=+avx2 on r304555).

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"                     

define void @test_0(<4 x double>* %PA, <4 x double>* %PB) {                     
entry:                                                                          
  %A = load <4 x double>, <4 x double>* %PA                                     
  %B = load <4 x double>, <4 x double>* %PB                                     
  %SA = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 0, i32
4, i32 2, i32 3>
  %SB = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 1, i32
5, i32 6, i32 7>

;       vmovddup        %xmm1, %xmm2    # xmm2 = xmm1[0,0]
;       vblendpd        $2, %ymm2, %ymm0, %ymm2 # ymm2 =
ymm0[0],ymm2[1],ymm0[2,3]
;       vpermilpd       $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0]
;       vblendpd        $1, %ymm0, %ymm1, %ymm0 # ymm0 = ymm0[0],ymm1[1,2,3]

  store <4 x double> %SA, <4 x double>* %PA                                     
  store <4 x double> %SB, <4 x double>* %PB                                     
  ret void                                                                      
}                                                                               

define void @test_1(<4 x double>* %PA, <4 x double>* %PB) {                    
 entry:                                                                         
  %A = load <4 x double>, <4 x double>* %PA                                     
  %B = load <4 x double>, <4 x double>* %PB                                     
  %SA = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 4, i32
5, i32 0, i32 6>
  %SB = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 2, i32
3, i32 1, i32 7>

;       vinsertf128     $1, %xmm0, %ymm0, %ymm2
;       vpermilpd       $2, %ymm1, %ymm3 # ymm3 = ymm1[0,1,2,2]
;       vblendpd        $4, %ymm2, %ymm3, %ymm2 # ymm2 =
ymm3[0,1],ymm2[2],ymm3[3]
;       vpermpd $222, %ymm0, %ymm0      # ymm0 = ymm0[2,3,1,3]
;       vblendpd        $8, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[0,1,2],ymm1[3]

  store <4 x double> %SA, <4 x double>* %PA                                     
  store <4 x double> %SB, <4 x double>* %PB                                     
  ret void                                                                      
}                                                                               

define void @test_2(<4 x double>* %PA, <4 x double>* %PB) {                     
entry:                                                                          
  %A = load <4 x double>, <4 x double>* %PA                                     
  %B = load <4 x double>, <4 x double>* %PB                                     
  %SA = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 0, i32
1, i32 4, i32 5>
  %SB = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 2, i32
3, i32 6, i32 7>

;       vinsertf128     $1, %xmm1, %ymm0, %ymm2
;       vperm2f128      $49, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm1[2,3]

  store <4 x double> %SA, <4 x double>* %PA                                     
  store <4 x double> %SB, <4 x double>* %PB                                     
  ret void                                                                      
}

Am I missing something or could these really be translated to at most two
vblendpd instructions?

Best,
Tobias

-- 
You are receiving this mail because:
You are on the CC list for the bug.

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

[llvm-bugs] [Bug 33434] New: 265 bit double shuffles not optimal

Reply via email to