On 8/30/23 01:48, Song Gao wrote:
void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
{
- Int128 shft_res1, shft_res2, mask;
+ int i, j;
+ Int128 shft_res[4], mask;
VReg *Vd = (VReg *)vd;
VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
- if (imm == 0) {
- shft_res1 = Vj->Q(0);
- shft_res2 = Vd->Q(0);
- } else {
- shft_res1 = int128_urshift(Vj->Q(0), imm);
- shft_res2 = int128_urshift(Vd->Q(0), imm);
- }
mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
- if (int128_ult(mask, shft_res1)) {
- Vd->D(0) = int128_getlo(mask);
- }else {
- Vd->D(0) = int128_getlo(shft_res1);
- }
-
- if (int128_ult(mask, shft_res2)) {
- Vd->D(1) = int128_getlo(mask);
- }else {
- Vd->D(1) = int128_getlo(shft_res2);
+ for (i = 0; i < oprsz / 16; i++) {
+ if (imm == 0) {
+ shft_res[2 * i] = Vj->Q(i);
+ shft_res[2 * i + 1] = Vd->Q(i);
+ } else {
+ shft_res[2 * i] = int128_urshift(Vj->Q(i), imm);
+ shft_res[2 * i + 1] = int128_urshift(Vd->Q(i), imm);
+ }
+ for (j = 2 * i; j <= 2 * i + 1; j++) {
+ if (int128_ult(mask, shft_res[j])) {
+ Vd->D(j) = int128_getlo(mask);
+ }else {
+ Vd->D(j) = int128_getlo(shft_res[j]);
+ }
+ }
}
}
This does not require an array of shift_res.
In fact, I encourage you to split out a helper.
r~