On Wed, 11 Mar 2020 at 06:44, Richard Henderson <richard.hender...@linaro.org> wrote: > > For contiguous predicated memory operations, we want to > minimize the number of tlb lookups performed. We have > open-coded this for sve_ld1_r, but for correctness with > MTE we will need this for all of the memory operations. > > Create a structure that holds the bounds of active elements, > and metadata for two pages. Add routines to find those > active elements, lookup the pages, and run watchpoints > for those pages. > > Temporarily mark the functions unused to avoid Werror. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > target/arm/sve_helper.c | 240 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 240 insertions(+) > > diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c > index 8b470991db..3f653e46a0 100644 > --- a/target/arm/sve_helper.c > +++ b/target/arm/sve_helper.c > @@ -4155,6 +4155,246 @@ static intptr_t max_for_page(target_ulong base, > intptr_t mem_off, > return MIN(split, mem_max - mem_off) + mem_off; > } > > +/* > + * Resolve the guest virtual address to info->host and info->flags. > + * If @nofault, return false if the page is invalid, otherwise > + * exit via page fault exception. > + */ > + > +typedef struct { > + void *host; > + int flags; > + MemTxAttrs attrs; > +} SVEHostPage; > + > +static bool sve_probe_page(SVEHostPage *info, bool nofault, > + CPUARMState *env, target_ulong addr, > + int mem_off, MMUAccessType access_type, > + int mmu_idx, uintptr_t retaddr) > +{ > + int flags; > + > + addr += mem_off; > + flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault, > + &info->host, retaddr); > + info->flags = flags; > + > + if (flags & TLB_INVALID_MASK) { > + g_assert(nofault); > + return false; > + } > + > + /* Ensure that info->host[] is relative to addr, not addr + mem_off. */ > + info->host -= mem_off; > + > +#ifdef CONFIG_USER_ONLY > + memset(&info->attrs, 0, sizeof(info->attrs));
Could just write "info->attrs = {};" ? > +#else > + /* > + * Find the iotlbentry for addr and return the transaction attributes. > + * This *must* be present in the TLB because we just found the mapping. > + */ > + { > + uintptr_t index = tlb_index(env, mmu_idx, addr); > + > +# ifdef CONFIG_DEBUG_TCG > + CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); > + target_ulong comparator = (access_type == MMU_DATA_LOAD > + ? entry->addr_read > + : tlb_addr_write(entry)); > + g_assert(tlb_hit(comparator, addr)); > +# endif > + > + CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; > + info->attrs = iotlbentry->attrs; > + } > +#endif > + > + return true; > +} > + > + > +/* > + * Analyse contiguous data, protected by a governing predicate. > + */ > + > +typedef enum { > + FAULT_NO, > + FAULT_FIRST, > + FAULT_ALL, > +} SVEContFault; > + > +typedef struct { > + /* First and last element wholy contained within the two pages. */ "wholly" > + int16_t mem_off_first[2]; > + int16_t reg_off_first[2]; > + int16_t reg_off_last[2]; It would be helpful to document what these actually are, and in particular what the behaviour is if the whole thing fits in a single page. (Judging by the code, the elements at index 1 for the 2nd page are set to -1 ?) > + > + /* One element that is misaligned and spans both pages. */ > + int16_t mem_off_split; > + int16_t reg_off_split; > + int16_t page_split; > + > + /* TLB data for the two pages. */ > + SVEHostPage page[2]; > +} SVEContLdSt; > + > +/* > + * Find first active element on each page, and a loose bound for the > + * final element on each page. Identify any single element that spans > + * the page boundary. Return true if there are any active elements. > + */ > +static bool __attribute__((unused)) > +sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg, > + intptr_t reg_max, int esz, int msize) > +{ > + const int esize = 1 << esz; > + const uint64_t pg_mask = pred_esz_masks[esz]; > + intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split; > + intptr_t mem_off_last, mem_off_split; > + intptr_t page_split, elt_split; > + intptr_t i; intptr_t seems like a funny type to be using here, since these aren't actually related to pointers as far as I can tell. It's also odd that the type is not the same one used in the SVEContLdSt struct for the corresponding fields. > + > + /* Set all of the element indicies to -1, and the TLB data to 0. */ "indices" > + memset(info, -1, offsetof(SVEContLdSt, page)); I guess this isn't conceptually much different from zeroing out integer struct fields, but it feels a bit less safe somehow. > + memset(info->page, 0, sizeof(info->page)); > + > + /* Gross scan over the entire predicate to find bounds. */ > + i = 0; > + do { > + uint64_t pg = vg[i] & pg_mask; > + if (pg) { > + reg_off_last = i * 64 + 63 - clz64(pg); > + if (reg_off_first < 0) { > + reg_off_first = i * 64 + ctz64(pg); > + } > + } > + } while (++i * 64 < reg_max); > + > + if (unlikely(reg_off_first < 0)) { > + /* No active elements, no pages touched. */ > + return false; > + } > + tcg_debug_assert(reg_off_last >= 0 && reg_off_last < reg_max); > + > + info->reg_off_first[0] = reg_off_first; > + info->mem_off_first[0] = (reg_off_first >> esz) * msize; > + mem_off_last = (reg_off_last >> esz) * msize; > + > + page_split = -(addr | TARGET_PAGE_MASK); What is the negation for ? > + if (likely(mem_off_last + msize <= page_split)) { > + /* The entire operation fits within a single page. */ > + info->reg_off_last[0] = reg_off_last; > + return true; > + } > + > + info->page_split = page_split; > + elt_split = page_split / msize; > + reg_off_split = elt_split << esz; > + mem_off_split = elt_split * msize; > + > + /* > + * This is the last full element on the first page, but it is not > + * necessarily active. If there is no full element, i.e. the first > + * active element is the one that's split, this value remains -1. > + * It is useful as iteration bounds. > + */ > + if (elt_split != 0) { > + info->reg_off_last[0] = reg_off_split - esize; > + } > + > + /* Determine if an unaligned element spans the pages. */ > + if (page_split % msize != 0) { > + /* It is helpful to know if the split element is active. */ > + if ((vg[reg_off_split >> 6] >> (reg_off_split & 63)) & 1) { > + info->reg_off_split = reg_off_split; > + info->mem_off_split = mem_off_split; > + > + if (reg_off_split == reg_off_last) { > + /* The page crossing element is last. */ > + return true; > + } > + } > + reg_off_split += esize; > + mem_off_split += msize; > + } > + > + /* > + * We do want the first active element on the second page, because > + * this may affect the address reported in an exception. > + */ > + reg_off_split = find_next_active(vg, reg_off_split, reg_max, esz); > + tcg_debug_assert(reg_off_split <= reg_off_last); > + info->reg_off_first[1] = reg_off_split; > + info->mem_off_first[1] = (reg_off_split >> esz) * msize; > + info->reg_off_last[1] = reg_off_last; > + return true; > +} thanks -- PMM