On Wed, Oct 09, 2019 at 08:39:21PM -0400, Richard Henderson wrote: > On 10/4/19 8:03 AM, Andrew Jones wrote: > > +#ifdef TARGET_AARCH64 > > +static off_t sve_zreg_offset(uint32_t vq, int n) > > +{ > > + off_t off = sizeof(struct aarch64_user_sve_header); > > + return ROUND_UP(off, 16) + vq * 16 * n; > > +} > > +static off_t sve_preg_offset(uint32_t vq, int n) > > +{ > > + return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; > > +} > > +static off_t sve_fpsr_offset(uint32_t vq) > > +{ > > + off_t off = sve_preg_offset(vq, 17) + offsetof(struct aarch64_note, > > sve); > > + return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve); > > +} > > +static off_t sve_fpcr_offset(uint32_t vq) > > +{ > > + return sve_fpsr_offset(vq) + sizeof(uint32_t); > > +} > > +static uint32_t sve_current_vq(CPUARMState *env) > > +{ > > + return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; > > +} > > +static size_t sve_size_vq(uint32_t vq) > > +{ > > + off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t) + > > + offsetof(struct aarch64_note, sve); > > + return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve); > > +} > > +static size_t sve_size(CPUARMState *env) > > +{ > > + return sve_size_vq(sve_current_vq(env)); > > +} > > Watch the missing spaces between functions.
I'll put in the blank lines > > > + for (i = 0; i < 32; ++i) { > > +#ifdef HOST_WORDS_BIGENDIAN > > + uint64_t d[vq * 2]; > > + int j; > > + > > + for (j = 0; j < vq * 2; ++j) { > > + d[j] = bswap64(env->vfp.zregs[i].d[j]); > > + } > > +#else > > + uint64_t *d = &env->vfp.zregs[i].d[0]; > > +#endif > > + memcpy(&buf[sve_zreg_offset(vq, i)], &d[0], vq * 16); > > + } > > We should avoid the variable sized array here. > > It might be best to avoid the ifdef altogether: > > for (i = 0; i < 32; ++i) { > uint64_t *d = (uint64_t *)&buf[sve_zreg_offset(vq, i)]; > for (j = 0; j < vq * 2; ++j) { > d[j] = cpu_to_le64(env->vfp.zregs[i].d[j]); > } > } > > The compiler may well transform the inner loop to memcpy for little-endian > host, but even if it doesn't core dumping is hardly performance sensitive. True. I even had something like the above at first, but then overcomplicated it with the #ifdef-ing. Thanks, drew