On 23.05.19 14:27, Richard Henderson wrote: > On 5/23/19 3:50 AM, David Hildenbrand wrote: >> /* >> * Returns the number of bits composing one element. >> */ >> static uint8_t get_element_bits(uint8_t es) >> { >> return (1 << es) * BITS_PER_BYTE; >> } >> >> /* >> * Returns the bitmask for a single element. >> */ >> static uint64_t get_single_element_mask(uint8_t es) >> { >> return -1ull >> (64 - get_element_bits(es)); >> } >> >> /* >> * Returns the bitmask for a single element (excluding the MSB). >> */ >> static uint64_t get_single_element_lsbs_mask(uint8_t es) >> { >> return -1ull >> (65 - get_element_bits(es)); >> } >> >> /* >> * Returns the bitmasks for multiple elements (excluding the MSBs). >> */ >> static uint64_t get_element_lsbs_mask(uint8_t es) >> { >> return dup_const(es, get_single_element_lsbs_mask(es)); >> } >> >> static int vfae(void *v1, const void *v2, const void *v3, bool in, >> bool rt, bool zs, uint8_t es) >> { >> const uint64_t mask = get_element_lsbs_mask(es); >> const int bits = get_element_bits(es); >> uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1; >> uint64_t first_zero = 16; >> uint64_t first_equal; >> int i; >> >> a0 = s390_vec_read_element64(v2, 0); >> a1 = s390_vec_read_element64(v2, 1); >> b0 = s390_vec_read_element64(v3, 0); >> b1 = s390_vec_read_element64(v3, 1); >> e0 = 0; >> e1 = 0; >> /* compare against equality with every other element */ >> for (i = 0; i < 64; i += bits) { >> t0 = i ? rol64(b0, i) : b0; >> t1 = i ? rol64(b1, i) : b1; >> e0 |= zero_search(a0 ^ t0, mask); >> e0 |= zero_search(a0 ^ t1, mask); >> e1 |= zero_search(a1 ^ t0, mask); >> e1 |= zero_search(a1 ^ t1, mask); >> } > > I don't see that this is doing what you want. You're shifting one element of > B > down, but not broadcasting it so that it is compared against every element of > A. > > I'd expect something like > > t0 = dup_const(es, b0 >> i); > t1 = dup_const(es, b1 >> i); > > (I also don't see what rol is getting you that shift doesn't.)
Let's assume a0 = [0, 1, 2, 3] a1 = [4, 5, 6, 7] b0 = [8, 8, 8, 4] b1 = [8, 8, 8, 8] What I would check is First iteration a0 == [8, 8, 8, 4] -> no match a0 == [8, 8, 8, 8] -> no match a1 == [8, 8, 8, 4] -> no match a1 == [8, 8, 8, 8] -> no match Second iteration a0 == [8, 8, 4, 8] -> no match a0 == [8, 8, 8, 8] -> no match a1 == [8, 8, 4, 8] a1 == [8, 8, 8, 8] -> no match ... Last iteration a0 == [4, 8, 8, 8] -> no match a0 == [8, 8, 8, 8] -> no match a1 == [4, 8, 8, 8] -> match in first element a1 == [8, 8, 8, 8] -> no match What am i missing? -- Thanks, David / dhildenb