Hello! Attached patch slightly improves {TF,DF,SF,TI,DI,SI}mode move patterns by calculating various attributes in a more clear way, the patch reorders operand alternatives to avoid checks for interleaved "alternatives" and implements various other janitorial improvements. In addition, the patch fixes wrong declaration of type attribute for alternatives 3,4 of SFmode pattern and adds missing prefix_data16 calculations in SFmode and DFmode patterns.
The later fixes will be backported to other branches, after 4.8.0 is released. 2013-03-19 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (*movti_internal): Set prefix attribute to maybe_vex for sselog1 and ssemov types. (*movdi_internal): Reorder operand constraints. (*movsi_internal): Ditto. Set prefix attribute to maybe_vex for sselog1 and ssemov types. (*movtf_internal): Set prefix attribute to maybe_vex for sselog1 and ssemov types. (*movdf_internal): Ditto. Set prefix_data16 attribute for DImode ssemov types. Reorder operand constraints. (*movsf_internal): Set type of alternatives 3,4 to imov. Set prefix attribute to maybe_vex for sselog1 and ssemov types. Set prefix_data16 attribute for SImode ssemov types. Reorder operand constraints. Tested on x86_64-pc-linux-gnu {,-m32}. Will be committed to mainline SVN once servers come into operating order. Uros.
Index: i386.md =================================================================== --- i386.md (revision 196784) +++ i386.md (working copy) @@ -1832,7 +1832,10 @@ } [(set_attr "isa" "x64,x64,*,*,*") (set_attr "type" "*,*,sselog1,ssemov,ssemov") - (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex") + (set (attr "prefix") + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "alternative" "0,1") (const_string "DI") @@ -1859,9 +1862,9 @@ (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o ,r,r ,r,m ,*y,m*y,*y,?*y,?r ,?*Ym,*x,m ,*x,*x,?r ,?*Yi,?*x,?*Ym") + "=r ,o ,r,r ,r,m ,*y,m*y,*y,?*y,?r ,?*Ym,*x,*x,*x,m ,?r ,?*Yi,?*x,?*Ym") (match_operand:DI 1 "general_operand" - "riFo,riF,Z,rem,i,re,C ,*y ,m ,m ,*Ym,r ,C ,*x,*x,m ,*Yi,r ,*Ym,*x"))] + "riFo,riF,Z,rem,i,re,C ,*y ,m ,m ,*Ym,r ,C ,*x,m ,*x,*Yi,r ,*Ym,*x"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -1970,7 +1973,7 @@ (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") - (eq_attr "alternative" "12,14") + (eq_attr "alternative" "12,13") (cond [(ior (not (match_test "TARGET_SSE2")) (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") @@ -1981,7 +1984,7 @@ ] (const_string "TI")) - (and (eq_attr "alternative" "13,15") + (and (eq_attr "alternative" "14,15") (not (match_test "TARGET_SSE2"))) (const_string "V2SF") ] @@ -1998,9 +2001,9 @@ (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x") + "=r,m ,*y,*y,?rm,?*y,*x,*x,*x,m ,?r ,?*Yi") (match_operand:SI 1 "general_operand" - "g ,re,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m"))] + "g ,re,C ,*y,*y ,rm ,C ,*x,m ,*x,*Yi,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2056,9 +2059,9 @@ ] (const_string "imov"))) (set (attr "prefix") - (if_then_else (eq_attr "alternative" "0,1,2,3,4,5") - (const_string "orig") - (const_string "maybe_vex"))) + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) (set (attr "prefix_data16") (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI")) (const_string "1") @@ -2067,17 +2070,17 @@ (cond [(eq_attr "alternative" "2,3") (const_string "DI") (eq_attr "alternative" "6,7") - (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (cond [(ior (not (match_test "TARGET_SSE2")) + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") (match_test "TARGET_AVX") (const_string "TI") - (ior (not (match_test "TARGET_SSE2")) - (match_test "optimize_function_for_size_p (cfun)")) + (match_test "optimize_function_for_size_p (cfun)") (const_string "V4SF") ] (const_string "TI")) - (and (eq_attr "alternative" "8,9,10,11") + (and (eq_attr "alternative" "8,9") (not (match_test "TARGET_SSE2"))) (const_string "SF") ] @@ -2658,7 +2661,10 @@ } [(set_attr "isa" "*,*,*,x64,x64") (set_attr "type" "sselog1,ssemov,ssemov,*,*") - (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*") + (set (attr "prefix") + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "alternative" "3,4") (const_string "DI") @@ -2722,9 +2728,9 @@ ;; Possible store forwarding (partial memory) stall in alternative 4. (define_insn "*movdf_internal" [(set (match_operand:DF 0 "nonimmediate_operand" - "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,x,x,x,m,*x,*x,*x,m ,Yi,r") + "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,x,x,x,m,*x,*x,*x,m ,r ,Yi") (match_operand:DF 1 "general_operand" - "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,r ,Yi"))] + "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,x,m,x,C ,*x,m ,*x,Yi,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2831,11 +2837,13 @@ (const_string "8") (const_string "*"))) (set (attr "prefix") - (if_then_else (eq_attr "alternative" "0,1,2,3,4,5,6,7,8") - (const_string "orig") - (const_string "maybe_vex"))) + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) (set (attr "prefix_data16") - (if_then_else (eq_attr "mode" "V1DF") + (if_then_else + (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI")) + (eq_attr "mode" "V1DF")) (const_string "1") (const_string "*"))) (set (attr "mode") @@ -2894,9 +2902,9 @@ (define_insn "*movsf_internal" [(set (match_operand:SF 0 "nonimmediate_operand" - "=Yf*f,m ,Yf*f,?r ,?m,x,x,x,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") + "=Yf*f,m ,Yf*f,?r ,?m,x,x,x,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym") (match_operand:SF 1 "general_operand" - "Yf*fm,Yf*f,G ,rmF,rF,C,x,m,x,m ,*y,*y ,r ,Yi,r ,*Ym"))] + "Yf*fm,Yf*f,G ,rmF,rF,C,x,m,x,Yi,r ,*y ,m ,*y,*Ym,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2925,27 +2933,32 @@ return standard_sse_constant_opcode (insn, operands[1]); case 6: - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovaps\t{%1, %0|%0, %1}"; - if (TARGET_AVX) - return "vmovss\t{%1, %0, %0|%0, %0, %1}"; - case 7: case 8: - return "%vmovss\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_SF: + if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) + return "vmovss\t{%1, %0, %0|%0, %0, %1}"; + return "%vmovss\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } case 9: case 10: - case 14: - case 15: - return "movd\t{%1, %0|%0, %1}"; + return "%vmovd\t{%1, %0|%0, %1}"; case 11: - return "movq\t{%1, %0|%0, %1}"; - case 12: case 13: - return "%vmovd\t{%1, %0|%0, %1}"; + case 14: + case 15: + if (get_attr_mode (insn) == MODE_DI) + return "movq\t{%1, %0|%0, %1}"; + return "movd\t{%1, %0|%0, %1}"; default: gcc_unreachable (); @@ -2955,27 +2968,34 @@ (cond [(eq_attr "alternative" "0,1,2") (const_string "fmov") (eq_attr "alternative" "3,4") - (const_string "multi") + (const_string "imov") (eq_attr "alternative" "5") (const_string "sselog1") - (eq_attr "alternative" "9,10,11,14,15") + (eq_attr "alternative" "11,12,13,14,15") (const_string "mmxmov") ] (const_string "ssemov"))) (set (attr "prefix") - (if_then_else (eq_attr "alternative" "5,6,7,8,12,13") + (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") (const_string "orig"))) + (set (attr "prefix_data16") + (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI")) + (const_string "1") + (const_string "*"))) (set (attr "mode") - (cond [(eq_attr "alternative" "3,4,9,10") + (cond [(eq_attr "alternative" "3,4,9,10,14,15") (const_string "SI") + (eq_attr "alternative" "11") + (const_string "DI") (eq_attr "alternative" "5") - (cond [(match_test "TARGET_AVX") + (cond [(not (match_test "TARGET_SSE2")) + (const_string "V4SF") + (match_test "TARGET_AVX") (const_string "V4SF") - (ior (not (match_test "TARGET_SSE2")) - (match_test "optimize_function_for_size_p (cfun)")) - (const_string "V4SF") - (match_test "TARGET_SSE_LOAD0_BY_PXOR") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "V4SF") + (match_test "TARGET_SSE_LOAD0_BY_PXOR") (const_string "TI") ] (const_string "V4SF")) @@ -2990,15 +3010,12 @@ of instructions to load just part of the register. It is better to maintain the whole registers in single format to avoid problems on using packed logical operations. */ - (eq_attr "alternative" "6") - (if_then_else - (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") - (match_test "TARGET_SSE_SPLIT_REGS")) - (const_string "V4SF") - (const_string "SF")) - (eq_attr "alternative" "11") - (const_string "DI")] - (const_string "SF")))]) + (and (eq_attr "alternative" "6") + (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (match_test "TARGET_SSE_SPLIT_REGS"))) + (const_string "V4SF") + ] + (const_string "SF")))]) (define_split [(set (match_operand 0 "any_fp_register_operand")