On Wednesday, March 14, 2018 10:19:11 AM PDT Lionel Landwerlin wrote: > There are a couple of ways we can get the fusing information from the > kernel : > > - Through DRM_I915_GETPARAM with the SLICE_MASK/SUBSLICE_MASK > parameters > > - Through the new DRM_IOCTL_I915_QUERY by requesting the > DRM_I915_QUERY_TOPOLOGY_INFO > > The second method is more accurate and also gives us the EUs fusing > masks. It's also a requirement for CNL as this platform has asymetric > subslices and the first method SUBSLICE_MASK value is assumed uniform > across slices. > > Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> > --- > src/intel/dev/gen_device_info.c | 129 > ++++++++++++++++++++++++++++++++++++++++ > src/intel/dev/gen_device_info.h | 11 ++++ > 2 files changed, 140 insertions(+) > > diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c > index c1bdc997f2c..a8c9f7738b2 100644 > --- a/src/intel/dev/gen_device_info.c > +++ b/src/intel/dev/gen_device_info.c > @@ -28,8 +28,11 @@ > #include <unistd.h> > #include "gen_device_info.h" > #include "compiler/shader_enums.h" > +#include "util/bitscan.h" > #include "util/macros.h" > > +#include <i915_drm.h> > + > /** > * Get the PCI ID for the device name. > * > @@ -913,6 +916,132 @@ fill_masks(struct gen_device_info *devinfo) > } > } > > +static void > +reset_masks(struct gen_device_info *devinfo) > +{ > + devinfo->subslice_slice_stride = > + devinfo->eu_subslice_stride = > + devinfo->eu_slice_stride = 0; > + > + devinfo->num_slices = > + devinfo->num_eu_per_subslice = 0;
We tend to avoid chained assignments like this, can we just do: devinfo->subslice_slice_stride = 0; devinfo->eu_subslice_stride = 0; devinfo->eu_slice_stride = 0; devinfo->num_slices = 0; devinfo->num_eu_per_subslice = 0; With it already being on multiple lines, it's no more code, and easier to read IMHO. > + memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices)); > + > + memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks)); > + memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks)); > + memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks)); > +} > + > +void > +gen_device_info_update_from_masks(struct gen_device_info *devinfo, > + uint32_t slice_mask, > + uint32_t subslice_mask, > + uint32_t n_eus) I wonder if it would be better to simply populate a drm_i915_query_topology_info structure from the masks, if we don't have the topology query uABI. Then, we could just have one codepath for filling out devinfo, with a "whoops, no kernel support" shim. I'm not sure whether it would be more or less code, but it might drop some of the duplicated complexity? > +{ > + reset_masks(devinfo); > + > + assert((slice_mask & 0xff) == slice_mask); > + > + devinfo->slice_masks = slice_mask; > + devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); _mesa_bitcount() here and elsewhere. > + > + uint32_t max_slices = util_last_bit(slice_mask); > + uint32_t max_subslices = util_last_bit(subslice_mask); > + devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8); > + uint32_t n_subslices = 0; > + for (int s = 0; s < util_last_bit(slice_mask); s++) { for (int s = 0; s < max_slices; s++) { > + if ((slice_mask & (1UL << s)) == 0) 1u << s Perhaps you like my topology-struct idea and will make a v3...if not... Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> > + continue; > + > + for (int b = 0; b < devinfo->subslice_slice_stride; b++) { > + int subslice_offset = s * devinfo->subslice_slice_stride + b; > + > + devinfo->subslice_masks[subslice_offset] = > + (subslice_mask >> (b * 8)) & 0xff; > + devinfo->num_subslices[s] += > + __builtin_popcount(devinfo->subslice_masks[subslice_offset]); > + } > + > + n_subslices += devinfo->num_subslices[s]; > + } > + > + /* We expect the total number of EUs to be uniformly distributed > throughout > + * the subslices. > + */ > + assert((n_eus % n_subslices) == 0); > + devinfo->num_eu_per_subslice = n_eus / n_subslices; > + > + devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, > 8); > + devinfo->eu_slice_stride = devinfo->eu_subslice_stride * max_subslices; > + > + for (int s = 0; s < max_slices; s++) { > + if ((slice_mask & (1UL << s)) == 0) > + continue; > + > + for (int ss = 0; ss < max_subslices; ss++) { > + if ((subslice_mask & (1UL << ss)) == 0) > + continue; > + > + for (int b = 0; b < devinfo->eu_subslice_stride; b++) { > + int eus_offset = s * devinfo->eu_slice_stride + > + ss * devinfo->eu_subslice_stride + b; > + > + devinfo->eu_masks[eus_offset] = > + (((1UL << devinfo->num_eu_per_subslice) - 1) >> (b * 8)) & > 0xff; > + } > + } > + } > +} > + > +void > +gen_device_info_update_from_topology(struct gen_device_info *devinfo, > + const struct > drm_i915_query_topology_info *topology) > +{ > + reset_masks(devinfo); > + > + devinfo->subslice_slice_stride = topology->subslice_stride; > + > + devinfo->eu_subslice_stride = > DIV_ROUND_UP(topology->max_eus_per_subslice, 8); > + devinfo->eu_slice_stride = topology->max_subslices * > devinfo->eu_subslice_stride; > + > + assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, > 8)); > + memcpy(&devinfo->slice_masks, topology->data, > DIV_ROUND_UP(topology->max_slices, 8)); > + devinfo->num_slices = __builtin_popcount(devinfo->slice_masks); > + > + uint32_t subslice_mask_len = > + topology->max_slices * topology->subslice_stride; > + assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len); > + memcpy(devinfo->subslice_masks, > &topology->data[topology->subslice_offset], > + subslice_mask_len); > + > + uint32_t n_subslices = 0; > + for (int s = 0; s < topology->max_slices; s++) { > + if ((devinfo->slice_masks & (1UL << s)) == 0) > + continue; > + > + for (int b = 0; b < devinfo->subslice_slice_stride; b++) { > + devinfo->num_subslices[s] += > + __builtin_popcount(devinfo->subslice_masks[b]); > + } > + n_subslices += devinfo->num_subslices[s]; > + } > + > + uint32_t eu_mask_len = > + topology->eu_stride * topology->max_subslices * topology->max_slices; > + assert(sizeof(devinfo->eu_masks) >= eu_mask_len); > + memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], > eu_mask_len); > + > + uint32_t n_eus = 0; > + for (int b = 0; b < eu_mask_len; b++) > + n_eus += __builtin_popcount(devinfo->eu_masks[b]); > + > + /* We expect the total number of EUs to be uniformly distributed > throughout > + * the subslices. > + */ > + assert((n_eus % n_subslices) == 0); > + devinfo->num_eu_per_subslice = n_eus / n_subslices; > +} > + > bool > gen_get_device_info(int devid, struct gen_device_info *devinfo) > { > diff --git a/src/intel/dev/gen_device_info.h b/src/intel/dev/gen_device_info.h > index 793ce094850..41aa54ab424 100644 > --- a/src/intel/dev/gen_device_info.h > +++ b/src/intel/dev/gen_device_info.h > @@ -34,6 +34,8 @@ > extern "C" { > #endif > > +struct drm_i915_query_topology_info; > + > #define GEN_DEVICE_MAX_SLICES (6) /* Maximum on gen10 */ > #define GEN_DEVICE_MAX_SUBSLICES (8) /* Maximum on gen11 */ > #define GEN_DEVICE_MAX_EUS_PER_SUBSLICE (10) /* Maximum on Haswell */ > @@ -248,6 +250,15 @@ int gen_device_name_to_pci_device_id(const char *name); > bool gen_get_device_info(int devid, struct gen_device_info *devinfo); > const char *gen_get_device_name(int devid); > > +/* Used with SLICE_MASK/SUBSLICE_MASK values from DRM_I915_GETPARAM. */ > +void gen_device_info_update_from_masks(struct gen_device_info *devinfo, > + uint32_t slice_mask, > + uint32_t subslice_mask, > + uint32_t n_eus); > +/* Used with DRM_IOCTL_I915_QUERY & DRM_I915_QUERY_TOPOLOGY_INFO. */ > +void gen_device_info_update_from_topology(struct gen_device_info *devinfo, > + const struct > drm_i915_query_topology_info *topology); > + > #ifdef __cplusplus > } > #endif >
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev