================ @@ -145,17 +145,21 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) { // Shuffles the the lanes inside the wavefront according to the given index. _DEFAULT_FN_ATTRS static __inline__ uint32_t -__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x) { - return __builtin_amdgcn_ds_bpermute(__idx << 2, __x); +__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, + uint32_t __width) { + uint32_t __lane = __idx + (__gpu_lane_id() & ~(__width - 1)); ---------------- Artem-B wrote:
It would be great to document what `__width` means here and that, IIUIC, it must match the "warp" size. Perhaps rename it to reflect that. Applies to other functions, too. https://github.com/llvm/llvm-project/pull/125896 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits