Hi Segher,
Pinging the patch. Please let me know if it is ok for trunk.

Regards,
Surya

On 06/05/24 1:54 pm, Surya Kumari Jangala wrote:
> Ping
> 
> On 08/01/24 11:19 am, Surya Kumari Jangala wrote:
>> Ping
>>
>> On 28/11/23 6:24 pm, Surya Kumari Jangala wrote:
>>> Ping
>>>
>>> On 10/11/23 12:27 pm, Surya Kumari Jangala wrote:
>>>> Ping
>>>>
>>>> On 03/11/23 1:14 pm, Surya Kumari Jangala wrote:
>>>>> Hi Segher,
>>>>> I have incorporated changes in the code as per the review comments 
>>>>> provided by you 
>>>>> for version 2 of the patch. Please review.
>>>>>
>>>>> Regards,
>>>>> Surya
>>>>>
>>>>>
>>>>> rs6000/p8swap: Fix incorrect lane extraction by vec_extract() [PR106770]
>>>>>
>>>>> In the routine rs6000_analyze_swaps(), special handling of swappable
>>>>> instructions is done even if the webs that contain the swappable 
>>>>> instructions
>>>>> are not optimized, i.e., the webs do not contain any permuting load/store
>>>>> instructions along with the associated register swap instructions. Doing 
>>>>> special
>>>>> handling in such webs will result in the extracted lane being adjusted
>>>>> unnecessarily for vec_extract.
>>>>>
>>>>> Another issue is that existing code treats non-permuting loads/stores as 
>>>>> special
>>>>> swappables. Non-permuting loads/stores (that have not yet been split into 
>>>>> a
>>>>> permuting load/store and a swap) are handled by converting them into a 
>>>>> permuting
>>>>> load/store (which effectively removes the swap). As a result, if special
>>>>> swappables are handled only in webs containing permuting loads/stores, 
>>>>> then
>>>>> non-optimal code is generated for non-permuting loads/stores.
>>>>>
>>>>> Hence, in this patch, all webs containing either permuting loads/ stores 
>>>>> or
>>>>> non-permuting loads/stores are marked as requiring special handling of
>>>>> swappables. Swaps associated with permuting loads/stores are marked for 
>>>>> removal,
>>>>> and non-permuting loads/stores are converted to permuting loads/stores. 
>>>>> Then the
>>>>> special swappables in the webs are fixed up.
>>>>>
>>>>> This patch also ensures that swappable instructions are not modified in 
>>>>> the
>>>>> following webs as it is incorrect to do so:
>>>>>  - webs containing permuting load/store instructions and associated swap
>>>>>    instructions that are transformed by converting the permuting memory
>>>>>    instructions into non-permuting instructions and removing the swap
>>>>>    instructions.
>>>>>  - webs where swap(load(vector constant)) instructions are replaced with
>>>>>    load(swapped vector constant).
>>>>>
>>>>> 2023-09-10  Surya Kumari Jangala  <jskum...@linux.ibm.com>
>>>>>
>>>>> gcc/
>>>>>   PR rtl-optimization/PR106770
>>>>>   * config/rs6000/rs6000-p8swap.cc (non_permuting_mem_insn): New function.
>>>>>   (handle_non_permuting_mem_insn): New function.
>>>>>   (rs6000_analyze_swaps): Handle swappable instructions only in certain
>>>>>   webs.
>>>>>   (web_requires_special_handling): New instance variable.
>>>>>   (handle_special_swappables): Remove handling of non-permuting load/store
>>>>>   instructions.
>>>>>
>>>>> gcc/testsuite/
>>>>>   PR rtl-optimization/PR106770
>>>>>   * gcc.target/powerpc/pr106770.c: New test.
>>>>> ---
>>>>>
>>>>> diff --git a/gcc/config/rs6000/rs6000-p8swap.cc 
>>>>> b/gcc/config/rs6000/rs6000-p8swap.cc
>>>>> index 0388b9bd736..02ea299bc3d 100644
>>>>> --- a/gcc/config/rs6000/rs6000-p8swap.cc
>>>>> +++ b/gcc/config/rs6000/rs6000-p8swap.cc
>>>>> @@ -179,6 +179,13 @@ class swap_web_entry : public web_entry_base
>>>>>    unsigned int special_handling : 4;
>>>>>    /* Set if the web represented by this entry cannot be optimized.  */
>>>>>    unsigned int web_not_optimizable : 1;
>>>>> +  /* Set if the swappable insns in the web represented by this entry
>>>>> +     have to be fixed. Swappable insns have to be fixed in:
>>>>> +       - webs containing permuting loads/stores and the swap insns
>>>>> +  in such webs have been marked for removal
>>>>> +       - webs where non-permuting loads/stores have been converted
>>>>> +  to permuting loads/stores  */
>>>>> +  unsigned int web_requires_special_handling : 1;
>>>>>    /* Set if this insn should be deleted.  */
>>>>>    unsigned int will_delete : 1;
>>>>>  };
>>>>> @@ -1468,14 +1475,6 @@ handle_special_swappables (swap_web_entry 
>>>>> *insn_entry, unsigned i)
>>>>>        if (dump_file)
>>>>>   fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
>>>>>        break;
>>>>> -    case SH_NOSWAP_LD:
>>>>> -      /* Convert a non-permuting load to a permuting one.  */
>>>>> -      permute_load (insn);
>>>>> -      break;
>>>>> -    case SH_NOSWAP_ST:
>>>>> -      /* Convert a non-permuting store to a permuting one.  */
>>>>> -      permute_store (insn);
>>>>> -      break;
>>>>>      case SH_EXTRACT:
>>>>>        /* Change the lane on an extract operation.  */
>>>>>        adjust_extract (insn);
>>>>> @@ -2401,6 +2400,25 @@ recombine_lvx_stvx_patterns (function *fun)
>>>>>    free (to_delete);
>>>>>  }
>>>>>  
>>>>> +/* Return true if insn is a non-permuting load/store.  */
>>>>> +static bool
>>>>> +non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i)
>>>>> +{
>>>>> +  return insn_entry[i].special_handling == SH_NOSWAP_LD
>>>>> +  || insn_entry[i].special_handling == SH_NOSWAP_ST;
>>>>> +}
>>>>> +
>>>>> +/* Convert a non-permuting load/store insn to a permuting one.  */
>>>>> +static void
>>>>> +convert_mem_insn (swap_web_entry *insn_entry, unsigned int i)
>>>>> +{
>>>>> +  rtx_insn *insn = insn_entry[i].insn;
>>>>> +  if (insn_entry[i].special_handling == SH_NOSWAP_LD)
>>>>> +    permute_load (insn);
>>>>> +  if (insn_entry[i].special_handling == SH_NOSWAP_ST)
>>>>> +    permute_store (insn);
>>>>> +}
>>>>> +
>>>>>  /* Main entry point for this pass.  */
>>>>>  unsigned int
>>>>>  rs6000_analyze_swaps (function *fun)
>>>>> @@ -2624,25 +2642,55 @@ rs6000_analyze_swaps (function *fun)
>>>>>        dump_swap_insn_table (insn_entry);
>>>>>      }
>>>>>  
>>>>> -  /* For each load and store in an optimizable web (which implies
>>>>> -     the loads and stores are permuting), find the associated
>>>>> -     register swaps and mark them for removal.  Due to various
>>>>> -     optimizations we may mark the same swap more than once.  Also
>>>>> -     perform special handling for swappable insns that require it.  */
>>>>> +  /* There are two kinds of optimizations that can be performed on an
>>>>> +     optimizable web:
>>>>> +     1. Remove the register swaps associated with permuting load/store
>>>>> + in an optimizable web
>>>>> +     2. Convert the vanilla loads/stores (that have not yet been split
>>>>> + into a permuting load/store and a swap) into a permuting
>>>>> + load/store (which effectively removes the swap)
>>>>> +     In both the cases, swappable instructions in the webs need
>>>>> +     special handling to fix them up.  */
>>>>>    for (i = 0; i < e; ++i)
>>>>> +    /* For each permuting load/store in an optimizable web, find
>>>>> +       the associated register swaps and mark them for removal.
>>>>> +       Due to various optimizations we may mark the same swap more
>>>>> +       than once.  */
>>>>>      if ((insn_entry[i].is_load || insn_entry[i].is_store)
>>>>>   && insn_entry[i].is_swap)
>>>>>        {
>>>>>   swap_web_entry* root_entry
>>>>>     = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>>>>>   if (!root_entry->web_not_optimizable)
>>>>> -   mark_swaps_for_removal (insn_entry, i);
>>>>> +   {
>>>>> +     mark_swaps_for_removal (insn_entry, i);
>>>>> +     root_entry->web_requires_special_handling = true;
>>>>> +   }
>>>>>        }
>>>>> -    else if (insn_entry[i].is_swappable && 
>>>>> insn_entry[i].special_handling)
>>>>> +    /* Convert the non-permuting loads/stores into a permuting
>>>>> +       load/store.  */
>>>>> +    else if (insn_entry[i].is_swappable
>>>>> +      && non_permuting_mem_insn (insn_entry, i))
>>>>>        {
>>>>>   swap_web_entry* root_entry
>>>>>     = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>>>>>   if (!root_entry->web_not_optimizable)
>>>>> +   {
>>>>> +     convert_mem_insn (insn_entry, i);
>>>>> +     root_entry->web_requires_special_handling = true;
>>>>> +   }
>>>>> +      }
>>>>> +
>>>>> +  /* Now that the webs which require special handling have been
>>>>> +     identified, modify the instructions that are sensitive to
>>>>> +     element order.  */
>>>>> +  for (i = 0; i < e; ++i)
>>>>> +    if (insn_entry[i].is_swappable && insn_entry[i].special_handling
>>>>> + && !non_permuting_mem_insn (insn_entry, i))
>>>>> +      {
>>>>> + swap_web_entry* root_entry
>>>>> +   = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
>>>>> + if (root_entry->web_requires_special_handling)
>>>>>     handle_special_swappables (insn_entry, i);
>>>>>        }
>>>>>  
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr106770.c 
>>>>> b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>>>>> new file mode 100644
>>>>> index 00000000000..5b300b94a41
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr106770.c
>>>>> @@ -0,0 +1,20 @@
>>>>> +/* { dg-require-effective-target powerpc_p8vector_ok } */
>>>>> +/* { dg-options "-mdejagnu-cpu=power8 -O2 " } */
>>>>> +/* The 2 xxpermdi instructions are generated by the two
>>>>> +   calls to vec_promote() */
>>>>> +/* { dg-final { scan-assembler-times {xxpermdi} 2 } } */
>>>>> +
>>>>> +/* Test case to resolve PR106770  */
>>>>> +
>>>>> +#include <altivec.h>
>>>>> +
>>>>> +int cmp2(double a, double b)
>>>>> +{
>>>>> +    vector double va = vec_promote(a, 1);
>>>>> +    vector double vb = vec_promote(b, 1);
>>>>> +    vector long long vlt = (vector long long)vec_cmplt(va, vb);
>>>>> +    vector long long vgt = (vector long long)vec_cmplt(vb, va);
>>>>> +    vector signed long long vr = vec_sub(vlt, vgt);
>>>>> +
>>>>> +    return vec_extract(vr, 1);
>>>>> +}

Reply via email to