Another operation that exists on a number of processors is an "average"
operation, which computes "(ext (A) + ext (B) + 1) >> 1" from two input
operands. Some of our backends try to represnt this with complex RTL
expressions, others use unspec. This patch adds two new rtx codes.

Bootstrapped and tested on i686-linux (with the usual pass39-frag test
failures). Ok?


Bernd
        * doc/rtl.texi (avg, uavg): Document.
        * rtl.def (AVG, UAVG): New.
        * simplify-rtx.c (simplify_binary_operation_1,
        simplify_const_binary_operation): Add them to default clauses.
        * config/i386/mmx.md (mmx_uavgv8qi3, *mmx_uavgv8qi3,
        mmx_uavgv4hi3, *mmx_uavgv4hi3): Rewrite to use UAVG.
        * config/i386/sse.md (sse2_uavgv16qi3, *sse2_uavgv16qi3,
        sse2_uavgv8hi3, *sse2_uavgv8hi3): Likewise.

Index: gcc/doc/rtl.texi
===================================================================
--- gcc/doc/rtl.texi    (revision 174339)
+++ gcc/doc/rtl.texi    (working copy)
@@ -2385,6 +2385,18 @@ Represents the absolute value of @var{x}
 @code{ss_abs} ensures that an out-of-bounds result saturates to the
 maximum signed value.
 
+@findex avg
+@cindex average
+@findex uavg
+@cindex unsigned average
+@item (avg:@var{m} @var{x} @var{c})
+@itemx (uavg:@var{m} @var{x} @var{c})
+Represents an averaging operation.  Two integer values are added, the
+constant 1 is added to the result, and the whole is shifted right by one
+to produce the result.  The result has the same mode as the inputs, but
+the operation uses intermediate values which are one bit wider.  The
+inputs are sign-extended to that wider precision for @code{avg},
+zero-extended for @code{uavg}.
 
 @findex sqrt
 @cindex square root
Index: gcc/rtl.def
===================================================================
--- gcc/rtl.def (revision 174339)
+++ gcc/rtl.def (working copy)
@@ -674,6 +674,15 @@ DEF_RTL_EXPR(VEC_CONCAT, "vec_concat", "
    an integer multiple of the number of input parts.  */
 DEF_RTL_EXPR(VEC_DUPLICATE, "vec_duplicate", "e", RTX_UNARY)
 
+/* Describes an operation that averages two integer values by adding
+   them together, adding 1, and shifting the result right by 1.  The
+   result is as large as the inputs, but the operation uses
+   intermediate values with a precision one bit wider.  For AVG, the
+   input values are sign-extended to that wider precision, for UAVG,
+   they are zero-extended.  */
+DEF_RTL_EXPR(AVG, "avg", "ee", RTX_COMM_ARITH)
+DEF_RTL_EXPR(UAVG, "uavg", "ee", RTX_COMM_ARITH)
+
 /* Addition with signed saturation */
 DEF_RTL_EXPR(SS_PLUS, "ss_plus", "ee", RTX_COMM_ARITH)
 
Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c  (revision 174339)
+++ gcc/simplify-rtx.c  (working copy)
@@ -2962,6 +2962,8 @@ simplify_binary_operation_1 (enum rtx_co
     case US_MULT:
     case SS_DIV:
     case US_DIV:
+    case AVG:
+    case UAVG:
       /* ??? There are simplifications that can be done.  */
       return 0;
 
@@ -3671,6 +3673,8 @@ simplify_const_binary_operation (enum rt
        case US_DIV:
        case SS_ASHIFT:
        case US_ASHIFT:
+       case AVG:
+       case UAVG:
          /* ??? There are simplifications that can be done.  */
          return 0;
 
Index: gcc/config/i386/mmx.md
===================================================================
--- gcc/config/i386/mmx.md      (revision 174339)
+++ gcc/config/i386/mmx.md      (working copy)
@@ -1460,37 +1460,15 @@ (define_expand "vec_initv8qi"
 
 (define_expand "mmx_uavgv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "")
-       (truncate:V8QI
-         (lshiftrt:V8HI
-           (plus:V8HI
-             (plus:V8HI
-               (zero_extend:V8HI
-                 (match_operand:V8QI 1 "nonimmediate_operand" ""))
-               (zero_extend:V8HI
-                 (match_operand:V8QI 2 "nonimmediate_operand" "")))
-             (const_vector:V8HI [(const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)]))
-           (const_int 1))))]
+       (uavg:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "")
+                  (match_operand:V8QI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE || TARGET_3DNOW"
   "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
 
 (define_insn "*mmx_uavgv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (truncate:V8QI
-         (lshiftrt:V8HI
-           (plus:V8HI
-             (plus:V8HI
-               (zero_extend:V8HI
-                 (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
-               (zero_extend:V8HI
-                 (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
-             (const_vector:V8HI [(const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)]))
-           (const_int 1))))]
+       (uavg:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0")
+                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
   "(TARGET_SSE || TARGET_3DNOW)
    && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
 {
@@ -1511,33 +1489,15 @@ (define_insn "*mmx_uavgv8qi3"
 
 (define_expand "mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "")
-       (truncate:V4HI
-         (lshiftrt:V4SI
-           (plus:V4SI
-             (plus:V4SI
-               (zero_extend:V4SI
-                 (match_operand:V4HI 1 "nonimmediate_operand" ""))
-               (zero_extend:V4SI
-                 (match_operand:V4HI 2 "nonimmediate_operand" "")))
-             (const_vector:V4SI [(const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)]))
-           (const_int 1))))]
+       (uavg:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE || TARGET_3DNOW_A"
   "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
 
 (define_insn "*mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (truncate:V4HI
-         (lshiftrt:V4SI
-           (plus:V4SI
-             (plus:V4SI
-               (zero_extend:V4SI
-                 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
-               (zero_extend:V4SI
-                 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
-             (const_vector:V4SI [(const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)]))
-           (const_int 1))))]
+       (uavg:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
   "(TARGET_SSE || TARGET_3DNOW_A)
    && ix86_binary_operator_ok (PLUS, V4HImode, operands)"
   "pavgw\t{%2, %0|%0, %2}"
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md      (revision 174339)
+++ gcc/config/i386/sse.md      (working copy)
@@ -6734,45 +6734,15 @@ (define_expand "vec_unpacku_hi_<mode>"
 
 (define_expand "sse2_uavgv16qi3"
   [(set (match_operand:V16QI 0 "register_operand" "")
-       (truncate:V16QI
-         (lshiftrt:V16HI
-           (plus:V16HI
-             (plus:V16HI
-               (zero_extend:V16HI
-                 (match_operand:V16QI 1 "nonimmediate_operand" ""))
-               (zero_extend:V16HI
-                 (match_operand:V16QI 2 "nonimmediate_operand" "")))
-             (const_vector:V16QI [(const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)]))
-           (const_int 1))))]
+       (uavg:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
+                   (match_operand:V16QI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
 
 (define_insn "*sse2_uavgv16qi3"
   [(set (match_operand:V16QI 0 "register_operand" "=x,x")
-       (truncate:V16QI
-         (lshiftrt:V16HI
-           (plus:V16HI
-             (plus:V16HI
-               (zero_extend:V16HI
-                 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
-               (zero_extend:V16HI
-                 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
-             (const_vector:V16QI [(const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)
-                                  (const_int 1) (const_int 1)]))
-           (const_int 1))))]
+       (uavg:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
+                   (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
   "@
    pavgb\t{%2, %0|%0, %2}
@@ -6785,37 +6755,15 @@ (define_insn "*sse2_uavgv16qi3"
 
 (define_expand "sse2_uavgv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "")
-       (truncate:V8HI
-         (lshiftrt:V8SI
-           (plus:V8SI
-             (plus:V8SI
-               (zero_extend:V8SI
-                 (match_operand:V8HI 1 "nonimmediate_operand" ""))
-               (zero_extend:V8SI
-                 (match_operand:V8HI 2 "nonimmediate_operand" "")))
-             (const_vector:V8HI [(const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)]))
-           (const_int 1))))]
+       (uavg:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+                  (match_operand:V8HI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
 
 (define_insn "*sse2_uavgv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x,x")
-       (truncate:V8HI
-         (lshiftrt:V8SI
-           (plus:V8SI
-             (plus:V8SI
-               (zero_extend:V8SI
-                 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
-               (zero_extend:V8SI
-                 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
-             (const_vector:V8HI [(const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)
-                                 (const_int 1) (const_int 1)]))
-           (const_int 1))))]
+       (uavg:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
+                  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
   "@
    pavgw\t{%2, %0|%0, %2}

Reply via email to