On Sat, 2016-11-12 at 13:34 -0800, Jason Ekstrand wrote: In this patch we no longer do:
ps.RenderTargetResolveEnable = false; It would be nice to be consistent and either initialise everything or just drop all the lines setting things to false/0 and let the memset do its job. > --- > src/intel/vulkan/gen7_pipeline.c | 57 +---------------------- > ----- > src/intel/vulkan/gen8_pipeline.c | 38 +------------------ > src/intel/vulkan/genX_pipeline_util.h | 70 > +++++++++++++++++++++++++++++++++++ > 3 files changed, 72 insertions(+), 93 deletions(-) > > diff --git a/src/intel/vulkan/gen7_pipeline.c > b/src/intel/vulkan/gen7_pipeline.c > index 40e1d81..dbec828 100644 > --- a/src/intel/vulkan/gen7_pipeline.c > +++ b/src/intel/vulkan/gen7_pipeline.c > @@ -107,6 +107,7 @@ genX(graphics_pipeline_create)( > emit_3dstate_vs(pipeline); > emit_3dstate_gs(pipeline); > emit_3dstate_sbe(pipeline); > + emit_3dstate_ps(pipeline); > > if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { > anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) { > @@ -117,16 +118,7 @@ genX(graphics_pipeline_create)( > wm.EarlyDepthStencilControl = EDSC_NORMAL; > wm.PointRasterizationRule = > RASTRULE_UPPER_RIGHT; > } > - > - /* Even if no fragments are ever dispatched, the hardware > hangs if we > - * don't at least set the maximum number of threads. > - */ > - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { > - ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; > - } > } else { > - const struct anv_shader_bin *fs_bin = > - pipeline->shaders[MESA_SHADER_FRAGMENT]; > const struct brw_wm_prog_data *wm_prog_data = > get_wm_prog_data(pipeline); > > if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || > @@ -135,53 +127,6 @@ genX(graphics_pipeline_create)( > if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) > anv_finishme("primitive_id needs sbe swizzling setup"); > > - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { > - ps.KernelStartPointer0 = fs_bin->kernel.offset; > - ps.KernelStartPointer1 = 0; > - ps.KernelStartPointer2 = fs_bin->kernel.offset + > - wm_prog_data- > >prog_offset_2; > - > - ps.ScratchSpaceBasePointer = (struct anv_address) { > - .bo = anv_scratch_pool_alloc(device, &device- > >scratch_pool, > - MESA_SHADER_FRAGMENT, > - wm_prog_data- > >base.total_scratch), > - .offset = 0, > - }; > - ps.PerThreadScratchSpace = > scratch_space(&wm_prog_data->base); > - > - ps.SamplerCount = > get_sampler_count(fs_bin); > - ps.BindingTableEntryCount = > get_binding_table_entry_count(fs_bin); > - > - ps.MaximumNumberofThreads = devinfo->max_wm_threads > - 1; > - ps.PushConstantEnable = wm_prog_data- > >base.nr_params > 0; > - ps.AttributeEnable = wm_prog_data- > >num_varying_inputs > 0; > - ps.oMaskPresenttoRenderTarget = wm_prog_data- > >uses_omask; > - > - ps.RenderTargetFastClearEnable = false; > - ps.DualSourceBlendEnable = false; > - ps.RenderTargetResolveEnable = false; > - > - ps.PositionXYOffsetSelect = wm_prog_data- > >uses_pos_offset ? > - POSOFFSET_SAMPLE : > POSOFFSET_NONE; > - > - ps._32PixelDispatchEnable = false; > - ps._16PixelDispatchEnable = wm_prog_data- > >dispatch_16; > - ps._8PixelDispatchEnable = wm_prog_data- > >dispatch_8; > - > - ps.DispatchGRFStartRegisterForConstantSetupData0 = > - wm_prog_data->base.dispatch_grf_start_reg, > - ps.DispatchGRFStartRegisterForConstantSetupData1 = 0, > - ps.DispatchGRFStartRegisterForConstantSetupData2 = > - wm_prog_data->dispatch_grf_start_reg_2; > - > - /* Haswell requires the sample mask to be set in this > packet as well as > - * in 3DSTATE_SAMPLE_MASK; the values should match. */ > - /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ > -#if GEN_IS_HASWELL > - ps.SampleMask = 0xff; > -#endif > - } > - > uint32_t samples = pCreateInfo->pMultisampleState ? > pCreateInfo->pMultisampleState- > >rasterizationSamples : 1; > > diff --git a/src/intel/vulkan/gen8_pipeline.c > b/src/intel/vulkan/gen8_pipeline.c > index f2499dc..56eb032 100644 > --- a/src/intel/vulkan/gen8_pipeline.c > +++ b/src/intel/vulkan/gen8_pipeline.c > @@ -112,49 +112,13 @@ genX(graphics_pipeline_create)( > emit_3dstate_gs(pipeline); > emit_3dstate_vs(pipeline); > emit_3dstate_sbe(pipeline); > + emit_3dstate_ps(pipeline); > > - const int num_thread_bias = GEN_GEN == 8 ? 2 : 1; > if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { > - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps); > anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), > extra) { > extra.PixelShaderValid = false; > } > } else { > - const struct anv_shader_bin *fs_bin = > - pipeline->shaders[MESA_SHADER_FRAGMENT]; > - > - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { > - ps.KernelStartPointer0 = fs_bin->kernel.offset; > - ps.KernelStartPointer1 = 0; > - ps.KernelStartPointer2 = fs_bin->kernel.offset + > - wm_prog_data->prog_offset_2; > - ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; > - ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; > - ps._32PixelDispatchEnable = false; > - ps.SingleProgramFlow = false; > - ps.VectorMaskEnable = true; > - ps.SamplerCount = get_sampler_count(fs_bin); > - ps.BindingTableEntryCount = > get_binding_table_entry_count(fs_bin); > - ps.PushConstantEnable = wm_prog_data->base.nr_params > > 0; > - ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset > ? > - POSOFFSET_SAMPLE: POSOFFSET_NONE; > - > - ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias; > - > - ps.ScratchSpaceBasePointer = (struct anv_address) { > - .bo = anv_scratch_pool_alloc(device, &device- > >scratch_pool, > - MESA_SHADER_FRAGMENT, > - wm_prog_data- > >base.total_scratch), > - .offset = 0, > - }; > - ps.PerThreadScratchSpace = scratch_space(&wm_prog_data- > >base); > - > - ps.DispatchGRFStartRegisterForConstantSetupData0 = > - wm_prog_data->base.dispatch_grf_start_reg; > - ps.DispatchGRFStartRegisterForConstantSetupData1 = 0; > - ps.DispatchGRFStartRegisterForConstantSetupData2 = > - wm_prog_data->dispatch_grf_start_reg_2; > - } > > anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) { > ps.PixelShaderValid = true; > diff --git a/src/intel/vulkan/genX_pipeline_util.h > b/src/intel/vulkan/genX_pipeline_util.h > index 3906529..1215e9b 100644 > --- a/src/intel/vulkan/genX_pipeline_util.h > +++ b/src/intel/vulkan/genX_pipeline_util.h > @@ -1134,4 +1134,74 @@ emit_3dstate_gs(struct anv_pipeline *pipeline) > } > } > > +static void > +emit_3dstate_ps(struct anv_pipeline *pipeline) > +{ > + MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline- > >device->info; > + const struct anv_shader_bin *fs_bin = > + pipeline->shaders[MESA_SHADER_FRAGMENT]; > + > + if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { > + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { > +#if GEN_GEN == 7 > + /* Even if no fragments are ever dispatched, gen7 hardware > hangs if > + * we don't at least set the maximum number of threads. > + */ > + ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; > +#endif > + } > + return; > + } > + > + const struct brw_wm_prog_data *wm_prog_data = > get_wm_prog_data(pipeline); > + > + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { > + ps.KernelStartPointer0 = fs_bin->kernel.offset; > + ps.KernelStartPointer1 = 0; > + ps.KernelStartPointer2 = fs_bin->kernel.offset + > + wm_prog_data->prog_offset_2; > + ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; > + ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; > + ps._32PixelDispatchEnable = false; > + > + ps.SingleProgramFlow = false; > + ps.VectorMaskEnable = true; > + ps.SamplerCount = get_sampler_count(fs_bin); > + ps.BindingTableEntryCount = > get_binding_table_entry_count(fs_bin); > + ps.PushConstantEnable = wm_prog_data->base.nr_params > 0; > + ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? > + POSOFFSET_SAMPLE: POSOFFSET_NONE; > +#if GEN_GEN < 8 > + ps.AttributeEnable = wm_prog_data->num_varying_inputs > > 0; > + ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; Again if we are going to both trying to align the = column now is the time to get this right while we are already moving everything. > + ps.DualSourceBlendEnable = wm_prog_data->dual_src_blend; This was previously false for gen7. If this is a fix maybe mention it in the commit message or even a separate patch? > +#endif > + > +#if GEN_IS_HASWELL > + /* Haswell requires the sample mask to be set in this packet > as well > + * as in 3DSTATE_SAMPLE_MASK; the values should match. > + */ > + ps.SampleMask = 0xff; > +#endif > + > +#if GEN_GEN >= 9 > + ps.MaximumNumberofThreadsPerPSD = 64 - 1; > +#elif GEN_GEN >= 8 This would make more sense as GEN_GEN == 8 > + ps.MaximumNumberofThreadsPerPSD = 64 - 2; > +#else > + ps.MaximumNumberofThreads = devinfo->max_wm_threads - > 1; > +#endif > + > + ps.DispatchGRFStartRegisterForConstantSetupData0 = > + wm_prog_data->base.dispatch_grf_start_reg; > + ps.DispatchGRFStartRegisterForConstantSetupData1 = 0; > + ps.DispatchGRFStartRegisterForConstantSetupData2 = > + wm_prog_data->dispatch_grf_start_reg_2; > + > + ps.PerThreadScratchSpace = get_scratch_space(fs_bin); > + ps.ScratchSpaceBasePointer = > + get_scratch_address(pipeline, MESA_SHADER_FRAGMENT, > fs_bin); > + } > +} > + > #endif /* GENX_PIPELINE_UTIL_H */ _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev