On Thu, Mar 14, 2019 at 2:52 PM Eleni Maria Stea <es...@igalia.com> wrote:
> Allowing the user to set custom sample locations non-dynamically, by > filling the extension structs and chaining them to the pipeline structs > according to the Vulkan specification section [26.5. Custom Sample > Locations] for the following structures: > > 'VkPipelineSampleLocationsStateCreateInfoEXT' > 'VkSampleLocationsInfoEXT' > 'VkSampleLocationEXT' > > Once custom locations are used, the default locations are lost and need > to be re-emitted again in the next pipeline creation. For that, we emit > the 3DSTATE_SAMPLE_PATTERN at every pipeline creation. > > v2: In v1, we used the custom anv_sample struct to store the location > and the distance from the pixel center because we would then use > this distance to sort the locations and send them in increasing > monotonical order to the GPU. That was because the Skylake PRM Vol. > 2a "3DSTATE_SAMPLE_PATTERN" says that the samples must have > monotonically increasing distance from the pixel center to get the > correct centroid computation in the device. However, the Vulkan > spec seems to require that the samples occur in the order provided > through the API and this requirement is only for the standard > locations. As long as this only affects centroid calculations as > the docs say, we should be ok because OpenGL and Vulkan only > require that the centroid be some lit sample and that it's the same > for all samples in a pixel; they have no requirement that it be the > one closest to center. (Jason Ekstrand) > For that we made the following changes: > 1- We removed the custom structs and functions from anv_private.h > and anv_sample_locations.h and anv_sample_locations.c (the last > two files were removed). (Jason Ekstrand) > 2- We modified the macros used to take also the array as parameter > and we renamed them to start by GEN_. (Jason Ekstrand) > 3- We don't sort the samples anymore. (Jason Ekstrand) > --- > src/intel/common/gen_sample_positions.h | 57 ++++++++++++++++++ > src/intel/vulkan/anv_genX.h | 5 ++ > src/intel/vulkan/anv_private.h | 1 + > src/intel/vulkan/genX_pipeline.c | 79 +++++++++++++++++++++---- > src/intel/vulkan/genX_state.c | 72 ++++++++++++++++++++++ > 5 files changed, 201 insertions(+), 13 deletions(-) > > diff --git a/src/intel/common/gen_sample_positions.h > b/src/intel/common/gen_sample_positions.h > index da48dcb5ed0..850661931cf 100644 > --- a/src/intel/common/gen_sample_positions.h > +++ b/src/intel/common/gen_sample_positions.h > @@ -160,4 +160,61 @@ prefix##14YOffset = 0.9375; \ > prefix##15XOffset = 0.0625; \ > prefix##15YOffset = 0.0000; > > +/* Examples: > + * in case of GEN_GEN < 8: > + * GEN_SAMPLE_POS_ELEM(ms.Sample, info->pSampleLocations, 0); expands to: > + * ms.Sample0XOffset = info->pSampleLocations[0].pos.x; > + * ms.Sample0YOffset = info->pSampleLocations[0].y; > + * > + * in case of GEN_GEN >= 8: > + * GEN_SAMPLE_POS_ELEM(sp._16xSample, info->pSampleLocations, 0); expands > to: > + * sp._16xSample0XOffset = info->pSampleLocations[0].x; > + * sp._16xSample0YOffset = info->pSampleLocations[0].y; > + */ > + > +#define GEN_SAMPLE_POS_ELEM(prefix, arr, sample_idx) \ > +prefix##sample_idx##XOffset = arr[sample_idx].x; \ > +prefix##sample_idx##YOffset = arr[sample_idx].y; > + > +#define GEN_SAMPLE_POS_1X_ARRAY(prefix, arr)\ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 0); > + > +#define GEN_SAMPLE_POS_2X_ARRAY(prefix, arr) \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 1); > + > +#define GEN_SAMPLE_POS_4X_ARRAY(prefix, arr) \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 1); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 2); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 3); > + > +#define GEN_SAMPLE_POS_8X_ARRAY(prefix, arr) \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 1); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 2); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 3); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 4); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 5); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 6); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 7); > + > +#define GEN_SAMPLE_POS_16X_ARRAY(prefix, arr) \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 0); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 1); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 2); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 3); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 4); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 5); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 6); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 7); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 8); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 9); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 10); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 11); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 12); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 13); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 14); \ > +GEN_SAMPLE_POS_ELEM(prefix, arr, 15); > + > #endif /* GEN_SAMPLE_POSITIONS_H */ > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h > index 8fd32cabf1e..fb7419b6347 100644 > --- a/src/intel/vulkan/anv_genX.h > +++ b/src/intel/vulkan/anv_genX.h > @@ -88,3 +88,8 @@ void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer > *cmd_buffer, > > void genX(blorp_exec)(struct blorp_batch *batch, > const struct blorp_params *params); > + > +void genX(emit_sample_locations)(struct anv_batch *batch, > + const VkSampleLocationEXT *sl, > + uint32_t num_samples, > + bool custom_locations); > You probably don't need the "custom_locations bool. You could just use sl == NULL or num_samples == 0 for that. > diff --git a/src/intel/vulkan/anv_private.h > b/src/intel/vulkan/anv_private.h > index eed282ff985..a39195733cd 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -165,6 +165,7 @@ struct gen_l3_config; > #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */ > #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096 > #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 > +#define MAX_SAMPLE_LOCATIONS 16 > > /* The kernel relocation API has a limitation of a 32-bit delta value > * applied to the address before it is written which, in spite of it being > diff --git a/src/intel/vulkan/genX_pipeline.c > b/src/intel/vulkan/genX_pipeline.c > index 975052deb79..07b45db1988 100644 > --- a/src/intel/vulkan/genX_pipeline.c > +++ b/src/intel/vulkan/genX_pipeline.c > @@ -548,12 +548,9 @@ emit_rs_state(struct anv_pipeline *pipeline, > } > > static void > -emit_ms_state(struct anv_pipeline *pipeline, > - const VkPipelineMultisampleStateCreateInfo *info) > +emit_sample_mask(struct anv_pipeline *pipeline, > + const VkPipelineMultisampleStateCreateInfo *info) > { > - uint32_t samples = 1; > - uint32_t log2_samples = 0; > - > /* From the Vulkan 1.0 spec: > * If pSampleMask is NULL, it is treated as if the mask has all bits > * enabled, i.e. no coverage is removed from fragments. > @@ -566,14 +563,19 @@ emit_ms_state(struct anv_pipeline *pipeline, > uint32_t sample_mask = 0xff; > #endif > > - if (info) { > - samples = info->rasterizationSamples; > - log2_samples = __builtin_ffs(samples) - 1; > - } > - > if (info && info->pSampleMask) > sample_mask &= info->pSampleMask[0]; > > + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) { > + sm.SampleMask = sample_mask; > + } > +} > + > +static void > +emit_multisample(struct anv_pipeline *pipeline, > + uint32_t samples, > + uint32_t log2_samples) > +{ > Is there some particular reason why you're breaking emit_ms_state up into multiple functions? It seems unrelated to enabling this feature and it makes this patch very hard to read. > anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) { > ms.NumberofMultisamples = log2_samples; > > @@ -605,10 +607,61 @@ emit_ms_state(struct anv_pipeline *pipeline, > } > #endif > } > +} > > - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) { > - sm.SampleMask = sample_mask; > +static void > +emit_ms_state(struct anv_pipeline *pipeline, > + const VkPipelineMultisampleStateCreateInfo *info, > + const VkPipelineDynamicStateCreateInfo *dinfo) > +{ > +#if GEN_GEN >= 8 > + VkSampleLocationsInfoEXT *sl; > + bool custom_locations = false; > +#endif > + > + uint32_t samples = 1; > + uint32_t log2_samples = 0; > + > + emit_sample_mask(pipeline, info); > + > + if (info) { > + samples = info->rasterizationSamples; > + > +#if GEN_GEN >= 8 > + if (info->pNext) { > + VkPipelineSampleLocationsStateCreateInfoEXT *slinfo = > + (VkPipelineSampleLocationsStateCreateInfoEXT *)info->pNext; > + > + if (slinfo && > + (slinfo->sType == > + > VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT)) { > + if (slinfo->sampleLocationsEnable == VK_TRUE) { > + uint32_t i; > + > + for (i = 0; i < dinfo->dynamicStateCount; i++) { > + if (dinfo->pDynamicStates[i] == > + VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT) > + return; > + } > + > + if ((sl = &slinfo->sampleLocationsInfo)) > + samples = sl->sampleLocationsPerPixel; > You should be able to assert that these are equal > + > + custom_locations = true; > + } > + } > + } > There are two ways to do this both of which are easier and less complicated than what you have here: 1. vk_foreach_struct() and a switch statement like we do for the Properties2 queries 2. vk_find_struct_const() to just look for the one struct. > +#endif > + > + log2_samples = __builtin_ffs(samples) - 1; > } > + > + emit_multisample(pipeline, samples, log2_samples); > + > +#if GEN_GEN >= 8 > + genX(emit_sample_locations)(&pipeline->batch, sl->pSampleLocations, > + samples, custom_locations); > +#endif > } > > static const uint32_t vk_to_gen_logic_op[] = { > @@ -1938,7 +1991,7 @@ genX(graphics_pipeline_create)( > assert(pCreateInfo->pRasterizationState); > emit_rs_state(pipeline, pCreateInfo->pRasterizationState, > pCreateInfo->pMultisampleState, pass, subpass); > - emit_ms_state(pipeline, pCreateInfo->pMultisampleState); > + emit_ms_state(pipeline, pCreateInfo->pMultisampleState, > pCreateInfo->pDynamicState); > emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, > subpass); > emit_cb_state(pipeline, pCreateInfo->pColorBlendState, > pCreateInfo->pMultisampleState); > diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c > index cffd1e47247..3f628710b31 100644 > --- a/src/intel/vulkan/genX_state.c > +++ b/src/intel/vulkan/genX_state.c > @@ -435,3 +435,75 @@ VkResult genX(CreateSampler)( > > return VK_SUCCESS; > } > + > +void > +genX(emit_sample_locations)(struct anv_batch *batch, > + const VkSampleLocationEXT *sl, > + uint32_t num_samples, > + bool custom_locations) > +{ > + /* The Skylake PRM Vol. 2a "3DSTATE_SAMPLE_PATTERN" says: > + * "When programming the sample offsets (for NUMSAMPLES_4 or _8 and > + * MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7 for > 8X, > + * or 15 for 16X) must have monotonically increasing distance from the > + * pixel center. This is required to get the correct centroid > computation > + * in the device." > + * > + * However, the Vulkan spec seems to require that the the samples > occur in > + * the order provided through the API. The standard sample patterns > have > + * the above property that they have monotonically increasing distances > + * from the center but client-provided ones do not. As long as this > only > + * affects centroid calculations as the docs say, we should be ok > because > + * OpenGL and Vulkan only require that the centroid be some lit sample > and > + * that it's the same for all samples in a pixel; they have no > requirement > + * that it be the one closest to center. > + */ > + > +#if GEN_GEN >= 8 > + > +#if GEN_GEN == 10 > + gen10_emit_wa_cs_stall_flush(batch); > +#endif > + > + if (custom_locations) { > + anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) { > + switch (num_samples) { > + case 1: > + GEN_SAMPLE_POS_1X_ARRAY(sp._1xSample, sl); > + break; > + case 2: > + GEN_SAMPLE_POS_2X_ARRAY(sp._2xSample, sl); > + break; > + case 4: > + GEN_SAMPLE_POS_4X_ARRAY(sp._4xSample, sl); > + break; > + case 8: > + GEN_SAMPLE_POS_8X_ARRAY(sp._8xSample, sl); > + break; > +#if GEN_GEN >= 9 > + case 16: > + GEN_SAMPLE_POS_16X_ARRAY(sp._16xSample, sl); > + break; > +#endif > + default: > + break; > + } > + } > + } else { > + anv_batch_emit(batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) { > + GEN_SAMPLE_POS_1X(sp._1xSample); > + GEN_SAMPLE_POS_2X(sp._2xSample); > + GEN_SAMPLE_POS_4X(sp._4xSample); > + GEN_SAMPLE_POS_8X(sp._8xSample); > +#if GEN_GEN >= 9 > + GEN_SAMPLE_POS_16X(sp._16xSample); > +#endif > + } > + } > + > +#if GEN_GEN == 10 > + gen10_emit_wa_lri_to_cache_mode_zero(batch); > +#endif > + > +#endif > +} > -- > 2.20.1 > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev