Re: [Mesa-dev] [PATCH] anv: allow exporting an imported SYNC_FD semaphore type

2018-11-22 Thread Samuel Iglesias Gonsálvez
Reviewed-by: Samuel Iglesias Gonsálvez 

On 22/11/2018 08:41, Tapani Pälli wrote:
> Fixes issues with following SkQP tests:
> 
>unitTest_VulkanHardwareBuffer_Vulkan_EGL_Syncs
>unitTest_VulkanHardwareBuffer_Vulkan_Vulkan_Syncs
> 
> Signed-off-by: Tapani Pälli 
> ---
>  src/intel/vulkan/anv_queue.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
> index 2a8ed2eb4ed..7887fbcb937 100644
> --- a/src/intel/vulkan/anv_queue.c
> +++ b/src/intel/vulkan/anv_queue.c
> @@ -1056,7 +1056,8 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
>  
> case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
>if (device->has_exec_fence) {
> - pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
> + pExternalSemaphoreProperties->exportFromImportedHandleTypes =
> +VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
>   pExternalSemaphoreProperties->compatibleHandleTypes =
>  VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
>   pExternalSemaphoreProperties->externalSemaphoreFeatures =
> 



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] vulkan: Update the XML and headers to 1.1.93

2018-11-22 Thread Samuel Iglesias Gonsálvez
Patch series is,

Reviewed-by: Samuel Iglesias Gonsálvez 

Sam

On 19/11/2018 16:39, Jason Ekstrand wrote:
> ---
>  include/vulkan/vulkan_core.h | 35 ---
>  src/vulkan/registry/vk.xml   | 84 +---
>  2 files changed, 98 insertions(+), 21 deletions(-)
> 
> diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h
> index 4cd8ed51dcd..35c06649aa5 100644
> --- a/include/vulkan/vulkan_core.h
> +++ b/include/vulkan/vulkan_core.h
> @@ -43,7 +43,7 @@ extern "C" {
>  #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
>  #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
>  // Version of this file
> -#define VK_HEADER_VERSION 91
> +#define VK_HEADER_VERSION 93
>  
>  
>  #define VK_NULL_HANDLE 0
> @@ -454,6 +454,8 @@ typedef enum VkStructureType {
>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR = 
> 1000211000,
>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT = 
> 1000212000,
>  VK_STRUCTURE_TYPE_IMAGEPIPE_SURFACE_CREATE_INFO_FUCHSIA = 1000214000,
> +VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT = 
> 1000221000,
> +VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT = 1000246000,
>  VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = 
> VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
>  VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = 
> VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,
>  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = 
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES,
> @@ -6101,9 +6103,10 @@ typedef enum VkDriverIdKHR {
>  VK_DRIVER_ID_IMAGINATION_PROPRIETARY_KHR = 7,
>  VK_DRIVER_ID_QUALCOMM_PROPRIETARY_KHR = 8,
>  VK_DRIVER_ID_ARM_PROPRIETARY_KHR = 9,
> +VK_DRIVER_ID_GOOGLE_PASTEL_KHR = 10,
>  VK_DRIVER_ID_BEGIN_RANGE_KHR = VK_DRIVER_ID_AMD_PROPRIETARY_KHR,
> -VK_DRIVER_ID_END_RANGE_KHR = VK_DRIVER_ID_ARM_PROPRIETARY_KHR,
> -VK_DRIVER_ID_RANGE_SIZE_KHR = (VK_DRIVER_ID_ARM_PROPRIETARY_KHR - 
> VK_DRIVER_ID_AMD_PROPRIETARY_KHR + 1),
> +VK_DRIVER_ID_END_RANGE_KHR = VK_DRIVER_ID_GOOGLE_PASTEL_KHR,
> +VK_DRIVER_ID_RANGE_SIZE_KHR = (VK_DRIVER_ID_GOOGLE_PASTEL_KHR - 
> VK_DRIVER_ID_AMD_PROPRIETARY_KHR + 1),
>  VK_DRIVER_ID_MAX_ENUM_KHR = 0x7FFF
>  } VkDriverIdKHR;
>  
> @@ -7791,8 +7794,6 @@ typedef struct 
> VkPipelineCoverageModulationStateCreateInfoNV {
>  
>  
>  #define VK_EXT_image_drm_format_modifier 1
> -#define VK_EXT_EXTENSION_159_SPEC_VERSION 0
> -#define VK_EXT_EXTENSION_159_EXTENSION_NAME "VK_EXT_extension_159"
>  #define VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_SPEC_VERSION 1
>  #define VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME 
> "VK_EXT_image_drm_format_modifier"
>  
> @@ -8806,6 +8807,18 @@ typedef struct VkPhysicalDevicePCIBusInfoPropertiesEXT 
> {
>  
>  
>  
> +#define VK_EXT_scalar_block_layout 1
> +#define VK_EXT_SCALAR_BLOCK_LAYOUT_SPEC_VERSION 1
> +#define VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME 
> "VK_EXT_scalar_block_layout"
> +
> +typedef struct VkPhysicalDeviceScalarBlockLayoutFeaturesEXT {
> +VkStructureTypesType;
> +void*  pNext;
> +VkBool32   scalarBlockLayout;
> +} VkPhysicalDeviceScalarBlockLayoutFeaturesEXT;
> +
> +
> +
>  #define VK_GOOGLE_hlsl_functionality1 1
>  #define VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION 0
>  #define VK_GOOGLE_HLSL_FUNCTIONALITY1_EXTENSION_NAME 
> "VK_GOOGLE_hlsl_functionality1"
> @@ -8816,6 +8829,18 @@ typedef struct VkPhysicalDevicePCIBusInfoPropertiesEXT 
> {
>  #define VK_GOOGLE_DECORATE_STRING_EXTENSION_NAME "VK_GOOGLE_decorate_string"
>  
>  
> +#define VK_EXT_separate_stencil_usage 1
> +#define VK_EXT_SEPARATE_STENCIL_USAGE_SPEC_VERSION 1
> +#define VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME 
> "VK_EXT_separate_stencil_usage"
> +
> +typedef struct VkImageStencilUsageCreateInfoEXT {
> +VkStructureType  sType;
> +const void*  pNext;
> +VkImageUsageFlagsstencilUsage;
> +} VkImageStencilUsageCreateInfoEXT;
> +
> +
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/vulkan/registry/vk.xml b/src/vulkan/registry/vk.xml
> index 24cc3ce7872..6cfa256d3b3 100644
> --- a/src/vulkan/registry/vk.xml
> +++ b/src/vulkan/registry/vk.xml
> @@ -146,7 +146,7 @@ server.
>  // Vulkan 1.1 version number
>  #define VK_API_VERSION_1_1 VK_MAKE_VERSION(1, 1, 
> 0)// Patch version should always be set to 0
>  // Version of this file
> -#define VK_HEADER_VERSION 91
> +#define VK_HEADER_VERSION 93
>  
>  
>  #define VK_DEFINE_HANDLE(object) typedef struct object##_T* 
> object;
> @@ -1253,7 +1253,7 @@ server.
>  VkFramebuffer  
> framebuffer
>  VkRect2D   
> renderArea
>  uint32_t   
> clearValueCount
> -const 
> VkClearValue*pClearValues
> +const VkClearValue*   
>  pClearValues
>  
>  
>

Re: [Mesa-dev] [PATCH] virgl: quadruple command buffer size

2018-11-22 Thread Erik Faye-Lund
On Wed, 2018-11-21 at 15:26 -0800, Gurchetan Singh wrote:
> Tested running WebGL aquarium on Nvidia host (10,000 fishes)
> 
> This moves us from 7 fps to 9 fps.  After quadrupling, performance
> gains diminish.
> 
> Change-Id: Iba3a9929ba2d17cf5a38233b92391dd413b79bc2

What's this Change-Id thing? I don't think we usually include things
like this...

With that dropped:
Reviewed-by: Erik Faye-Lund 

> ---
>  src/gallium/drivers/virgl/virgl_winsys.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/virgl/virgl_winsys.h
> b/src/gallium/drivers/virgl/virgl_winsys.h
> index d13212c5e8..96726b409d 100644
> --- a/src/gallium/drivers/virgl/virgl_winsys.h
> +++ b/src/gallium/drivers/virgl/virgl_winsys.h
> @@ -31,7 +31,7 @@ struct pipe_fence_handle;
>  struct winsys_handle;
>  struct virgl_hw_res;
>  
> -#define VIRGL_MAX_CMDBUF_DWORDS (16*1024)
> +#define VIRGL_MAX_CMDBUF_DWORDS (64 * 1024)
>  
>  struct virgl_drm_caps {
> union virgl_caps caps;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] llvmpipe: Always return some fence in flush

2018-11-22 Thread Haehnle, Nicolai
On 22.11.18 06:40, Tomasz Figa wrote:
> Hi Brian, Keith,
> 
> +Some more Chromium folks for visibility.
> 
> On Wed, Aug 22, 2018 at 4:21 PM Tomasz Figa  wrote:
>>
>> Hi Michel,
>>
>> On Thu, Aug 16, 2018 at 6:43 PM Michel Dänzer  wrote:
>>>
>>> On 2018-08-16 11:34 AM, Tomasz Figa wrote:
 If there is no last fence, due to no rendering happening yet, just
 create a new signaled fence and return it, to match the expectations of
 the EGL sync fence API.

 Fixes random "Could not create sync fence 0x3003" assertion failures from
 Skia on Android, coming from the following code:

 https://android.googlesource.com/platform/frameworks/base/+/master/libs/hwui/pipeline/skia/SkiaOpenGLPipeline.cpp#427

 Reproducible especially with thread count >= 4.

 Signed-off-by: Tomasz Figa 
 ---
   src/gallium/drivers/llvmpipe/lp_setup.c | 8 +++-
   1 file changed, 7 insertions(+), 1 deletion(-)

 diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c 
 b/src/gallium/drivers/llvmpipe/lp_setup.c
 index b087369473..a6f1b54d69 100644
 --- a/src/gallium/drivers/llvmpipe/lp_setup.c
 +++ b/src/gallium/drivers/llvmpipe/lp_setup.c
 @@ -360,7 +360,13 @@ lp_setup_flush( struct lp_setup_context *setup,
  set_scene_state( setup, SETUP_FLUSHED, reason );

  if (fence) {
 -  lp_fence_reference((struct lp_fence **)fence, setup->last_fence);
 +  struct lp_fence *lp_fence = NULL;
 +
 +  lp_fence_reference(&lp_fence, setup->last_fence);
 +  if (!lp_fence)
 + lp_fence = lp_fence_create(0);
 +  lp_fence_reference((struct lp_fence **)fence, lp_fence);
 +  lp_fence_reference(&lp_fence, NULL);
  }
   }


>>>
>>> Other drivers keep around a reference to the last fence in the context,
>>> and return that if there's no new work to flush.
>>
>> Thanks for taking a look.
>>
>> Right, it sounds like a sane thing to do. LLVMpipe, however, seems to
>> explicitly destroy the fence whenever a rendering pass completes and I
>> didn't want to change that without understanding the intentions behind
>> that. Precisely, it's lp_scene_end_rasterization():
>> https://gitlab.freedesktop.org/mesa/mesa/blob/master/src/gallium/drivers/llvmpipe/lp_scene.c#L292
>>
>> Also, this still wouldn't solve the problem of an EGL sync fence being
>> created and waited on without any rendering happening at all, which is
>> also likely to happen with Android code pointed to in the commit
>> message. Obviously that could be dealt with by creating a signaled
>> fence in lp_setup_create(), though.
>>
>> Let me add Keith and Brian for more visibility.
> 
> Any thoughts on this?

Your analysis seems correct to me.

Note that I wouldn't worry too much about creating a new fence object. 
radeonsi creates a new Gallium pipe fence object on every call to 
pipe->flush, regardless of the state of the system. This is because 
internally, radeonsi's Gallium pipe fences are a union of an SDMA and a 
GFX fence.

So creating a new fence, as your patch does, should be perfectly fine if 
you know that all previous work has finished.

I do think your patch is needlessly convoluted though. Why not just

lp_fence_reference((struct lp_fence **)fence, setup->last_fence);
+  if (!*fence)
+ *fence = (struct pipe_fence_handle *)lp_fence_create(0);

Cheers,
Nicolai

> 
> Best regards,
> Tomasz
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] llvmpipe: Always return some fence in flush

2018-11-22 Thread Tomasz Figa
Hi Nicolai,

On Thu, Nov 22, 2018 at 6:19 PM Haehnle, Nicolai
 wrote:
>
> On 22.11.18 06:40, Tomasz Figa wrote:
> > Hi Brian, Keith,
> >
> > +Some more Chromium folks for visibility.
> >
> > On Wed, Aug 22, 2018 at 4:21 PM Tomasz Figa  wrote:
> >>
> >> Hi Michel,
> >>
> >> On Thu, Aug 16, 2018 at 6:43 PM Michel Dänzer  wrote:
> >>>
> >>> On 2018-08-16 11:34 AM, Tomasz Figa wrote:
>  If there is no last fence, due to no rendering happening yet, just
>  create a new signaled fence and return it, to match the expectations of
>  the EGL sync fence API.
> 
>  Fixes random "Could not create sync fence 0x3003" assertion failures from
>  Skia on Android, coming from the following code:
> 
>  https://android.googlesource.com/platform/frameworks/base/+/master/libs/hwui/pipeline/skia/SkiaOpenGLPipeline.cpp#427
> 
>  Reproducible especially with thread count >= 4.
> 
>  Signed-off-by: Tomasz Figa 
>  ---
>    src/gallium/drivers/llvmpipe/lp_setup.c | 8 +++-
>    1 file changed, 7 insertions(+), 1 deletion(-)
> 
>  diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c 
>  b/src/gallium/drivers/llvmpipe/lp_setup.c
>  index b087369473..a6f1b54d69 100644
>  --- a/src/gallium/drivers/llvmpipe/lp_setup.c
>  +++ b/src/gallium/drivers/llvmpipe/lp_setup.c
>  @@ -360,7 +360,13 @@ lp_setup_flush( struct lp_setup_context *setup,
>   set_scene_state( setup, SETUP_FLUSHED, reason );
> 
>   if (fence) {
>  -  lp_fence_reference((struct lp_fence **)fence, setup->last_fence);
>  +  struct lp_fence *lp_fence = NULL;
>  +
>  +  lp_fence_reference(&lp_fence, setup->last_fence);
>  +  if (!lp_fence)
>  + lp_fence = lp_fence_create(0);
>  +  lp_fence_reference((struct lp_fence **)fence, lp_fence);
>  +  lp_fence_reference(&lp_fence, NULL);
>   }
>    }
> 
> 
> >>>
> >>> Other drivers keep around a reference to the last fence in the context,
> >>> and return that if there's no new work to flush.
> >>
> >> Thanks for taking a look.
> >>
> >> Right, it sounds like a sane thing to do. LLVMpipe, however, seems to
> >> explicitly destroy the fence whenever a rendering pass completes and I
> >> didn't want to change that without understanding the intentions behind
> >> that. Precisely, it's lp_scene_end_rasterization():
> >> https://gitlab.freedesktop.org/mesa/mesa/blob/master/src/gallium/drivers/llvmpipe/lp_scene.c#L292
> >>
> >> Also, this still wouldn't solve the problem of an EGL sync fence being
> >> created and waited on without any rendering happening at all, which is
> >> also likely to happen with Android code pointed to in the commit
> >> message. Obviously that could be dealt with by creating a signaled
> >> fence in lp_setup_create(), though.
> >>
> >> Let me add Keith and Brian for more visibility.
> >
> > Any thoughts on this?
>
> Your analysis seems correct to me.
>
> Note that I wouldn't worry too much about creating a new fence object.
> radeonsi creates a new Gallium pipe fence object on every call to
> pipe->flush, regardless of the state of the system. This is because
> internally, radeonsi's Gallium pipe fences are a union of an SDMA and a
> GFX fence.
>
> So creating a new fence, as your patch does, should be perfectly fine if
> you know that all previous work has finished.
>
> I do think your patch is needlessly convoluted though. Why not just
>
> lp_fence_reference((struct lp_fence **)fence, setup->last_fence);
> +  if (!*fence)
> + *fence = (struct pipe_fence_handle *)lp_fence_create(0);

Indeed, there is no need for this reference dance, not sure what I had
in mind when writing that. Let me respin a simplified version. Thanks
for feedback!

Best regards,
Tomasz
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] virgl: don't mark buffers as unclean after a write

2018-11-22 Thread Erik Faye-Lund
On Wed, 2018-11-21 at 20:08 -0800, Gurchetan Singh wrote:
> We can mark the buffer unclean if it's ever bound as a TBO,
> SSBO, ABO, or image.
> 
> This improves
> 
> dEQP-
> GLES3.performance.buffer.data_upload.function_call.map_buffer_range.n
> ew_specified_buffer.flag_write_full.stream_draw
> 
> from 9.58 MB/s to 451.17 MB/s.
> 
> v2: Using buffer bindings to track cleanliness (Ilia).
> ---
>  src/gallium/drivers/virgl/virgl_buffer.c |  1 -
>  src/gallium/drivers/virgl/virgl_encode.c | 10 ++
>  2 files changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/virgl/virgl_buffer.c
> b/src/gallium/drivers/virgl/virgl_buffer.c
> index 88a22b56f9..f72c93f499 100644
> --- a/src/gallium/drivers/virgl/virgl_buffer.c
> +++ b/src/gallium/drivers/virgl/virgl_buffer.c
> @@ -106,7 +106,6 @@ static void virgl_buffer_transfer_unmap(struct
> pipe_context *ctx,
> if (trans->base.usage & PIPE_TRANSFER_WRITE) {
>if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
>   struct virgl_screen *vs = virgl_screen(ctx->screen);
> - vbuf->base.clean = FALSE;
>   vctx->num_transfers++;
>   vs->vws->transfer_put(vs->vws, vbuf->base.hw_res,
> &transfer->box, trans->base.stride,
> trans->base.layer_stride, trans->offset, transfer->level);
> diff --git a/src/gallium/drivers/virgl/virgl_encode.c
> b/src/gallium/drivers/virgl/virgl_encode.c
> index 400ba68474..6483f47031 100644
> --- a/src/gallium/drivers/virgl/virgl_encode.c
> +++ b/src/gallium/drivers/virgl/virgl_encode.c
> @@ -61,6 +61,12 @@ static void virgl_encoder_write_res(struct
> virgl_context *ctx,
> }
>  }
>  
> +static void virgl_modify_clean(struct virgl_resource *res, boolean
> value)
> +{
> +   if (res)
> +  res->clean = value;

It looks like not all call-sites can have res == NULL, so it would be
better to move the conditional out to the call-sites, I think. And then
I would argue the usefulness of the helper is diminished...

Anyway, if you decide to keep the helper: it seems like this is always
called with a FALSE-argument, perhaps this should be:

void virgl_dirty_res(struct virgl_resource *res)

... instead?

> +}
> +
>  int virgl_encode_bind_object(struct virgl_context *ctx,
>  uint32_t handle, uint32_t object)
>  {
> @@ -615,6 +621,7 @@ int virgl_encode_sampler_view(struct
> virgl_context *ctx,
> if (res->u.b.target == PIPE_BUFFER) {
>virgl_encoder_write_dword(ctx->cbuf, state->u.buf.offset /
> elem_size);
>virgl_encoder_write_dword(ctx->cbuf, (state->u.buf.offset +
> state->u.buf.size) / elem_size - 1);
> +  virgl_modify_clean(res, FALSE);
> } else {
>virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_layer
> | state->u.tex.last_layer << 16);
>virgl_encoder_write_dword(ctx->cbuf, state->u.tex.first_level
> | state->u.tex.last_level << 8);
> @@ -949,6 +956,7 @@ int virgl_encode_set_shader_buffers(struct
> virgl_context *ctx,
>   virgl_encoder_write_dword(ctx->cbuf,
> buffers[i].buffer_offset);
>   virgl_encoder_write_dword(ctx->cbuf,
> buffers[i].buffer_size);
>   virgl_encoder_write_res(ctx, res);
> + virgl_modify_clean(res, FALSE);
>} else {
>   virgl_encoder_write_dword(ctx->cbuf, 0);
>   virgl_encoder_write_dword(ctx->cbuf, 0);
> @@ -972,6 +980,7 @@ int virgl_encode_set_hw_atomic_buffers(struct
> virgl_context *ctx,
>   virgl_encoder_write_dword(ctx->cbuf,
> buffers[i].buffer_offset);
>   virgl_encoder_write_dword(ctx->cbuf,
> buffers[i].buffer_size);
>   virgl_encoder_write_res(ctx, res);
> + virgl_modify_clean(res, FALSE);
>} else {
>   virgl_encoder_write_dword(ctx->cbuf, 0);
>   virgl_encoder_write_dword(ctx->cbuf, 0);
> @@ -999,6 +1008,7 @@ int virgl_encode_set_shader_images(struct
> virgl_context *ctx,
>   virgl_encoder_write_dword(ctx->cbuf,
> images[i].u.buf.offset);
>   virgl_encoder_write_dword(ctx->cbuf, images[i].u.buf.size);
>   virgl_encoder_write_res(ctx, res);
> + virgl_modify_clean(res, FALSE);
>} else {
>   virgl_encoder_write_dword(ctx->cbuf, 0);
>   virgl_encoder_write_dword(ctx->cbuf, 0);

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1.5/2] ac/surface/gfx9: let addrlib choose the preferred swizzle kind

2018-11-22 Thread Haehnle, Nicolai
Thanks. Any comments on the addrlib change itself?

Cheers,
Nicolai

On 21.11.18 21:32, Marek Olšák wrote:
> See what Gustaw said. Other than that:
> 
> Reviewed-by: Marek Olšák mailto:marek.ol...@amd.com>>
> 
> Marek
> 
> On Wed, Nov 21, 2018 at 10:21 AM Nicolai Hähnle  > wrote:
> 
> From: Nicolai Hähnle  >
> 
> Our choices here are simply redundant as long as sin.flags is set
> correctly.
> --
> This is the change I was talking about.
> ---
>   src/amd/common/ac_surface.c | 10 --
>   1 file changed, 10 deletions(-)
> 
> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
> index edd710a968c..ad2cb585c9d 100644
> --- a/src/amd/common/ac_surface.c
> +++ b/src/amd/common/ac_surface.c
> @@ -1057,30 +1057,20 @@ gfx9_get_preferred_swizzle_mode(ADDR_HANDLE
> addrlib,
>          sin.forbiddenBlock.var = 1; /* don't allow the
> variable-sized swizzle modes */
>          sin.forbiddenBlock.linear = 1; /* don't allow linear
> swizzle modes */
>          sin.bpp = in->bpp;
>          sin.width = in->width;
>          sin.height = in->height;
>          sin.numSlices = in->numSlices;
>          sin.numMipLevels = in->numMipLevels;
>          sin.numSamples = in->numSamples;
>          sin.numFrags = in->numFrags;
> 
> -       if (flags & RADEON_SURF_SCANOUT) {
> -               sin.preferredSwSet.sw_D = 1;
> -               /* Raven only allows S for displayable surfaces with
> < 64 bpp, so
> -                * allow it as fallback */
> -               sin.preferredSwSet.sw_S = 1;
> -       } else if (in->flags.depth || in->flags.stencil || is_fmask)
> -               sin.preferredSwSet.sw_Z = 1;
> -       else
> -               sin.preferredSwSet.sw_S = 1;
> -
>          if (is_fmask) {
>                  sin.flags.display = 0;
>                  sin.flags.color = 0;
>                  sin.flags.fmask = 1;
>          }
> 
>          ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);
>          if (ret != ADDR_OK)
>                  return ret;
> 
> -- 
> 2.19.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org 
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] winsys/amdgpu: explicitly declare whether buffer_map is permanent or not

2018-11-22 Thread Nicolai Hähnle

On 21.11.18 21:27, Marek Olšák wrote:
On Wed, Nov 21, 2018 at 12:57 PM Nicolai Hähnle > wrote:


From: Nicolai Hähnle mailto:nicolai.haeh...@amd.com>>

Introduce a new driver-private transfer flag RADEON_TRANSFER_TEMPORARY
that specifies whether the caller will use buffer_unmap or not. The
default behavior is set to permanent maps, because that's what drivers
do for Gallium buffer maps.

This should eliminate the need for hacks in libdrm. Assertions are added
to catch when the buffer_unmap calls don't match the (temporary)
buffer_map calls.

I did my best to update r600 and r300 as well for completeness (yes,
it's a no-op for r300 because it never calls buffer_unmap), even though
the radeon winsys ignores the new flag.


You didn't make any changes to r300.


Yeah, that's what I wrote :)



You can also drop all r600 changes, because the radeon winsys doesn't care.


I don't think it's a good idea, though. The interface of the two winsys 
is different, yes, but it's largely the same and it makes sense to keep 
it that way conceptually. Not that it matters much for the code itself.



[snip]

+enum radeon_transfer_flags {
+   /* Indicates that the caller will unmap the buffer.
+    *
+    * Not unmapping buffers is an important performance
optimization for
+    * OpenGL (avoids kernel overhead for frequently mapped
buffers). However,
+    * if you only map a buffer once and then use it indefinitely
from the GPU,
+    * it is much better to unmap it so that the kernel is free to
move it to
+    * non-visible VRAM.


The second half of the comment is misleading. The kernel will move 
buffers to invisible VRAM regardless of whether they're mapped, so CPU 
mappings have no effect on the placement. Buffers are only moved back to 
CPU-accessible memory on a CPU page fault. If a buffer is mapped and 
there no CPU access, it will stay in invisible VRAM forever. The general 
recommendation is to keep those buffers mapped for CPU access just like 
GTT buffers.


Yeah, I'll change that.



+    */
+   RADEON_TRANSFER_TEMPORARY = (PIPE_TRANSFER_DRV_PRV << 0),
+};
+
  #define RADEON_SPARSE_PAGE_SIZE (64 * 1024)

  enum ring_type {
      RING_GFX = 0,
      RING_COMPUTE,
      RING_DMA,
      RING_UVD,
      RING_VCE,
      RING_UVD_ENC,
      RING_VCN_DEC,
@@ -287,23 +299,26 @@ struct radeon_winsys {
      struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,
                                         uint64_t size,
                                         unsigned alignment,
                                         enum radeon_bo_domain domain,
                                         enum radeon_bo_flag flags);

      /**
       * Map the entire data store of a buffer object into the
client's address
       * space.
       *
+     * Callers are expected to unmap buffers again if and only if the
+     * RADEON_TRANSFER_TEMPORARY flag is set in \p usage.
+     *
       * \param buf       A winsys buffer object to map.
       * \param cs        A command stream to flush if the buffer is
referenced by it.
-     * \param usage     A bitmask of the PIPE_TRANSFER_* flags.
+     * \param usage     A bitmask of the PIPE_TRANSFER_* and
RADEON_TRANSFER_* flags.
       * \return          The pointer at the beginning of the buffer.
       */
      void *(*buffer_map)(struct pb_buffer *buf,
                          struct radeon_cmdbuf *cs,
                          enum pipe_transfer_usage usage);

      /**
       * Unmap a buffer object from the client's address space.
       *
       * \param buf       A winsys buffer object to unmap.
diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index 19522cc97b1..d455fb5db6a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5286,21 +5286,22 @@ int si_shader_binary_upload(struct si_screen
*sscreen, struct si_shader *shader)
                                                 0 :
SI_RESOURCE_FLAG_READ_ONLY,
                                                PIPE_USAGE_IMMUTABLE,
                                                align(bo_size,
SI_CPDMA_ALIGNMENT),
                                                256);
         if (!shader->bo)
                 return -ENOMEM;

         /* Upload. */
         ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
                                         PIPE_TRANSFER_READ_WRITE |
-                                       PIPE_TRANSFER_UNSYNCHRONIZED);
+                                       PIPE_TRANSFER_UNSYNCHRONIZED |
+                                       RADEON_TRANSFER_TEMPORAR

[Mesa-dev] [PATCH v2] winsys/amdgpu: explicitly declare whether buffer_map is permanent or not

2018-11-22 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Introduce a new driver-private transfer flag RADEON_TRANSFER_TEMPORARY
that specifies whether the caller will use buffer_unmap or not. The
default behavior is set to permanent maps, because that's what drivers
do for Gallium buffer maps.

This should eliminate the need for hacks in libdrm. Assertions are added
to catch when the buffer_unmap calls don't match the (temporary)
buffer_map calls.

I did my best to update r600 for consistency (r300 needs no changes
because it never calls buffer_unmap), even though the radeon winsys
ignores the new flag.

As an added bonus, this should actually improve the performance of
the normal fast path, because we no longer call into libdrm at all
after the first map, and there's one less atomic in the winsys itself
(there are now no atomics left in the UNSYNCHRONIZED fast path).

Cc: Leo Liu 
v2:
- remove comment about visible VRAM (Marek)
- don't rely on amdgpu_bo_cpu_map doing an atomic write
---
 src/gallium/drivers/r600/evergreen_compute.c |  4 +-
 src/gallium/drivers/r600/r600_asm.c  |  4 +-
 src/gallium/drivers/r600/r600_shader.c   |  4 +-
 src/gallium/drivers/r600/radeon_uvd.c|  8 +-
 src/gallium/drivers/r600/radeon_vce.c|  4 +-
 src/gallium/drivers/r600/radeon_video.c  |  6 +-
 src/gallium/drivers/radeon/radeon_uvd.c  | 10 +-
 src/gallium/drivers/radeon/radeon_uvd_enc.c  |  6 +-
 src/gallium/drivers/radeon/radeon_vce.c  |  4 +-
 src/gallium/drivers/radeon/radeon_vcn_dec.c  | 18 ++--
 src/gallium/drivers/radeon/radeon_vcn_enc.c  |  4 +-
 src/gallium/drivers/radeon/radeon_video.c|  6 +-
 src/gallium/drivers/radeon/radeon_winsys.h   | 14 ++-
 src/gallium/drivers/radeonsi/si_shader.c |  3 +-
 src/gallium/include/pipe/p_defines.h |  8 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c| 96 +---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.h|  3 +-
 17 files changed, 140 insertions(+), 62 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index a77f58242e3..9085be4e2f3 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -431,21 +431,23 @@ static void *evergreen_create_compute_state(struct 
pipe_context *ctx,
COMPUTE_DBG(rctx->screen, "*** evergreen_create_compute_state\n");
header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header);
radeon_shader_binary_init(&shader->binary);
r600_elf_read(code, header->num_bytes, &shader->binary);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
 
/* Upload code + ROdata */
shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen,
shader->bc.ndw * 4);
-   p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, 
PIPE_TRANSFER_WRITE);
+   p = r600_buffer_map_sync_with_rings(
+   &rctx->b, shader->code_bo,
+   PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
//TODO: use util_memcpy_cpu_to_le32 ?
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
rctx->b.ws->buffer_unmap(shader->code_bo->buf);
 #endif
 
return shader;
 }
 
 static void evergreen_delete_compute_state(struct pipe_context *ctx, void 
*state)
 {
diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 7029be24f4b..4ba77c535f9 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2765,21 +2765,23 @@ void *r600_create_vertex_fetch_shader(struct 
pipe_context *ctx,
 
u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256,
 &shader->offset,
 (struct pipe_resource**)&shader->buffer);
if (!shader->buffer) {
r600_bytecode_clear(&bc);
FREE(shader);
return NULL;
}
 
-   bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, 
PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+   bytecode = r600_buffer_map_sync_with_rings
+   (&rctx->b, shader->buffer,
+   PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | 
RADEON_TRANSFER_TEMPORARY);
bytecode += shader->offset / 4;
 
if (R600_BIG_ENDIAN) {
for (i = 0; i < fs_size / 4; ++i) {
bytecode[i] = util_cpu_to_le32(bc.bytecode[i]);
}
} else {
memcpy(bytecode, bc.bytecode, fs_size);
}
rctx->b.ws->buffer_unmap(shader->buffer->buf);
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 408939d1105..fc826470d69 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -134,21 +134,23 @@ static int store_shader(struct pipe_context *ctx,
 {
stru

[Mesa-dev] [Bug 108829] [meson] libglapi exports internal API

2018-11-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108829

Bug ID: 108829
   Summary: [meson] libglapi exports internal API
   Product: Mesa
   Version: unspecified
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Other
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: emil.l.veli...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

Seems like we're missing visibility flags here. We export 39 symbols, instead
of 19.

Might be having a look throughout mesa comparing the meson and autotools
builds.
Personally I use `nm -CD --defined-only` but objdump and others can also be
utilised ;-)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108829] [meson] libglapi exports internal API

2018-11-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108829

Emil Velikov  changed:

   What|Removed |Added

 CC||baker.dyla...@gmail.com,
   ||fdo-b...@engestrom.ch

--- Comment #1 from Emil Velikov  ---
Cc-ing some people. Props to the freedesktop-sdk people for spotting it.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] build: enable -Werror=incompatible-pointer-types

2018-11-22 Thread Emil Velikov
On 2018/11/21, Gert Wollny wrote:
> Am Mittwoch, den 21.11.2018, 13:14 + schrieb Emil Velikov:

> > +AX_CHECK_COMPILE_FLAG([-Werror=incompatible-pointer-
> > types],[CFXXLAGS="$CXXFLAGS -Werror=incompatible-pointer-types"])
> For C++ this shoudn't be needed, because there this is already an
> error. 
> 
Indeed - option is for C and Objective C only.

Seems like the meson patch will need the same tweak.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 30/30] mesa/st: require linear interpolation for ARB_texture_float

2018-11-22 Thread Mathias Fröhlich
Hi,

On Monday, 19 November 2018 20:17:38 CET Roland Scheidegger wrote:
> FWIW this looks like a rather similar incident to me what happened when mesa 
> began to verify the max vertex stride (which needs to be 2048 with GL 4.4 
> whereas r600 can only do 2047) where I argued it's a much better idea to lie 
> about the GL version there rather than the specific vertex stride bit, but I 
> was rather unsuccessful and not everybody apparently shares this view...

If I had fully tracked the mailing list, Roland, you would have gotten at least
my +1 on the lie in the GL version instead of the max stride.
The using application would still have had the chance to query the limiting
value - may be with an off by one surprise wrt the standard paper, but with a
value that you can finally rely on. Now with lying about the stride, the
application side has no way anymore to query what the real limit is. The
application just may not work correctly if really everything that you can
query is within limits.
For this max stride value, the only excuse is that it's highly unlikely to find
these huge strides for vertex attributes, so the problem is very unlikely to 
show up.

IMO, the applications point of view or the applications authors point of view
is the one that should drive decisions. At first because this driver library 
has that 
one really major purpose to serve exactly those applications with a reliable 
and predictable
3d api. But what is predictable when an application cannot rely on the 
extensions
and constants it queries?

Two cents of somebody writing mostly such applications.

best
Mathias


> 
> 
> From: mesa-dev  on behalf of Ilia 
> Mirkin 
> Sent: Monday, November 19, 2018 5:37:58 PM
> To: Erik Faye-Lund
> Cc: ML Mesa-dev; Timothy Arceri; Emil Velikov
> Subject: Re: [Mesa-dev] [PATCH 30/30] mesa/st: require linear interpolation 
> for ARB_texture_float
> 
> On Mon, Nov 19, 2018 at 11:30 AM Erik Faye-Lund
>  wrote:
> >
> > On Mon, 2018-11-19 at 11:13 -0500, Ilia Mirkin wrote:
> > > On Mon, Nov 19, 2018 at 10:40 AM Erik Faye-Lund
> > >  wrote:
> > > > On Mon, 2018-11-19 at 10:02 -0500, Ilia Mirkin wrote:
> > > > > Unfortunately this will drop GL 3.0 from Adreno A3xx. I think
> > > > > we'd
> > > > > rather fake linear interpolation with F32 textures which are
> > > > > never
> > > > > used than lose GL 3.0 there...
> > > >
> > > > Right...
> > > >
> > > > I guess this means that this GPU never really did support OpenGL
> > > > 3.0,
> > > > and will make some applications misbehave. There's definately
> > > > applications out there that will lead to surprisingly bad problems
> > > > when
> > > > features like these are not supported.
> > > >
> > > > For instance if an application tries to take a local gradient by
> > > > sampling a texture twice with a tiny epsilon (a common trick in
> > > > tangent-free normal mapping, for instance), it will essentially get
> > > > garbage, which can cause close to useless rendering.
> > > >
> > > > I've worked on applications that would have had problems like these
> > > > if
> > > > drivers report the wrong version, but could work correctly if they
> > > > report the right version.
> > > >
> > > > Either way, I don't believe faking like that belongs in core Mesa.
> > > > So
> > > > if the Freedreno developers really want this kind of behavior,
> > > > perhaps
> > > > something like this could be a better move?
> > > >
> > > > ---8<---
> > > > diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c
> > > > b/src/gallium/drivers/freedreno/freedreno_screen.c
> > > > index 88d91a91234..de811371f05 100644
> > > > --- a/src/gallium/drivers/freedreno/freedreno_screen.c
> > > > +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
> > > > @@ -260,6 +260,11 @@ fd_screen_get_param(struct pipe_screen
> > > > *pscreen,
> > > > enum pipe_cap param)
> > > > return 0;
> > > >
> > > > case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
> > > > +   /* HACK: A330 doesn't support linear interpolation
> > > > of
> > > > FP32 textures, but
> > > > +* to keep OpenGL 3.0 support, we lie about it
> > > > here.
> > > > +*/
> > > > +   return is_a3xx(screen) || is_a4xx(screen) ||
> > > > is_a5xx(screen) || is_a6xx(screen);
> > > > +
> > > > case PIPE_CAP_CUBE_MAP_ARRAY:
> > > > case PIPE_CAP_SAMPLER_VIEW_TARGET:
> > > > case PIPE_CAP_TEXTURE_QUERY_LOD:
> > > > ---8<---
> > > >
> > > > Alternatively, they could ask users to override the GL-version for
> > > > applications that need GL 3.0, but doesn't have problems with the
> > > > lack
> > > > of FP32-interpolation...
> > >
> > > GL 3.0 brings SO much stuff in though, and GL 3.1 brings core
> > > profiles.
> > >
> > > Your proposed solution will also expose the OES_bla ext, which we
> > > definitely don't want to do. I'd instead keep it loose. The hardware
> > > that doesn't support this stuff is generally targeted 

Re: [Mesa-dev] [PATCH] st/xvmc: Add X11 include path.

2018-11-22 Thread Emil Velikov
On 2018/11/21, Vinson Lee wrote:
> This patch fixes this build error.
> 
>   CC   tests/xvmc_bench.o
> In file included from tests/xvmc_bench.c:35:
> tests/testlib.h:38:10: fatal error: 'X11/Xlib.h' file not found
>  ^~~~
> 
> Signed-off-by: Vinson Lee 

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Emil Velikov 

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 29/30] mesa/main: do not require float-texture filtering for es3

2018-11-22 Thread Mathias Fröhlich
Hi Erik,

The series looks very reasonable and I could not spot loosing any negating ! in 
the
query logic. Even if I have not been able time wise to double checked every 
move when
which texture format got introduced in which ES GL version.
So, what can I tell now? Is that already a reviewed by?

What tests did you run on the series?
May be I can convince myself to go forward using this testing information ...

Well - and given the amount of discussion about #30, I mean #1-#29.

best
Mathias


On Monday, 19 November 2018 13:15:05 CET Erik Faye-Lund wrote:
> The OpenGL ES 3.0 specification, table 3.13 lists half-float textures as
> filterable, but not float textures. So we shouldn't depend on
> ARB_float_texture, which requires full filtering support for both.
> 
> Signed-off-by: Erik Faye-Lund 
> ---
>  src/mesa/main/version.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
> index 210caad097e..fb5e816db32 100644
> --- a/src/mesa/main/version.c
> +++ b/src/mesa/main/version.c
> @@ -509,7 +509,9 @@ compute_version_es2(const struct gl_extensions 
> *extensions,
>   extensions->ARB_internalformat_query &&
>   extensions->ARB_map_buffer_range &&
>   extensions->ARB_shader_texture_lod &&
> - extensions->ARB_texture_float &&
> + extensions->OES_texture_float &&
> + extensions->OES_texture_half_float &&
> + extensions->OES_texture_half_float_linear &&
>   extensions->ARB_texture_rg &&
>   extensions->ARB_depth_buffer_float &&
>   /* extensions->ARB_framebuffer_object && */
> 




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] virgl: avoid large inline transfers

2018-11-22 Thread Gert Wollny
I think Erik already pointed out the little problems with this series
and "virgl: quadruple command buffer size". 

I've tested the impact on performance of these tree patches and the
results look great: Unigine Valley went from ~9 fps to 20 (Host 50) and
Unigine Heaven (no tesselation) from 12 fps to 26 (Host 68). (All on
r600 - 6870 HD). 

Tested-By: Gert Wollny  

Am Mittwoch, den 21.11.2018, 20:08 -0800 schrieb Gurchetan Singh:
> We flush everytime the command buffer (16 kB) is full, which is
> quite costly.
> 
> This improves
> 
> dEQP-
> GLES3.performance.buffer.data_upload.function_call.buffer_data.new_bu
> ffer.usage_stream_draw
> 
> from 111.16 MB/s to 1930.36 MB/s.
> 
> In addition, I made the benchmark produce buffers from 0 -->
> VIRGL_MAX_CMDBUF_DWORDS * 4,
> and tried ((VIRGL_MAX_CMDBUF_DWORDS * 4) / 2),
> ((VIRGL_MAX_CMDBUF_DWORDS * 4) / 4), etc.
> 
> I didn't notice any clear differences, so let's just go with the most
> obvious
> heuristic.
> ---
>  src/gallium/drivers/virgl/virgl_resource.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/virgl/virgl_resource.c
> b/src/gallium/drivers/virgl/virgl_resource.c
> index db5e7dd61a..9174ec5cbb 100644
> --- a/src/gallium/drivers/virgl/virgl_resource.c
> +++ b/src/gallium/drivers/virgl/virgl_resource.c
> @@ -95,7 +95,11 @@ static void virgl_buffer_subdata(struct
> pipe_context *pipe,
>usage |= PIPE_TRANSFER_DISCARD_RANGE;
>  
> u_box_1d(offset, size, &box);
> -   virgl_transfer_inline_write(pipe, resource, 0, usage, &box, data,
> 0, 0);
> +
> +   if (size >= (VIRGL_MAX_CMDBUF_DWORDS * 4))
> +  u_default_buffer_subdata(pipe, resource, usage, offset, size,
> data);
> +   else
> +  virgl_transfer_inline_write(pipe, resource, 0, usage, &box,
> data, 0, 0);
>  }
>  
>  void virgl_init_context_resource_functions(struct pipe_context *ctx)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radv: allow to skip DCC decompressions with the new predicate

2018-11-22 Thread Samuel Pitoiset
Feral games aren't affected because they don't decompress DCC.
F1 2018 has one DCC decompression per frame, but I don't see
any performance improvements. This new predicate will be
probably more useful for DCC/MSAA.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_fast_clear.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_fast_clear.c 
b/src/amd/vulkan/radv_meta_fast_clear.c
index ce088d7332b..0f7a9132182 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -580,13 +580,14 @@ emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
 
 static void
 radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image, bool value)
+  struct radv_image *image,
+  uint64_t pred_offset, bool value)
 {
uint64_t va = 0;
 
if (value) {
va = radv_buffer_get_va(image->bo) + image->offset;
-   va += image->fce_pred_offset;
+   va += pred_offset;
}
 
si_emit_set_predication_state(cmd_buffer, true, va);
@@ -629,10 +630,13 @@ radv_emit_color_decompress(struct radv_cmd_buffer 
*cmd_buffer,
pipeline = 
cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
}
 
-   if (!decompress_dcc && radv_image_has_dcc(image)) {
+   if (radv_image_has_dcc(image)) {
+   uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
+   image->fce_pred_offset;
+
old_predicating = cmd_buffer->state.predicating;
 
-   radv_emit_set_predication_state_from_image(cmd_buffer, image, 
true);
+   radv_emit_set_predication_state_from_image(cmd_buffer, image, 
pred_offset, true);
cmd_buffer->state.predicating = true;
}
for (uint32_t layer = 0; layer < layer_count; ++layer) {
@@ -697,10 +701,13 @@ radv_emit_color_decompress(struct radv_cmd_buffer 
*cmd_buffer,
&cmd_buffer->pool->alloc);
 
}
-   if (!decompress_dcc && radv_image_has_dcc(image)) {
+   if (radv_image_has_dcc(image)) {
+   uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
+   image->fce_pred_offset;
+
cmd_buffer->state.predicating = old_predicating;
 
-   radv_emit_set_predication_state_from_image(cmd_buffer, image, 
false);
+   radv_emit_set_predication_state_from_image(cmd_buffer, image, 
pred_offset, true);
 
if (cmd_buffer->state.predication_type != -1) {
/* Restore previous conditional rendering user state. */
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radv: add a predicate for reflecting DCC decompression state

2018-11-22 Thread Samuel Pitoiset
It's somehow similar to the FCE predicate.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c  | 23 +++
 src/amd/vulkan/radv_image.c   |  3 ++-
 src/amd/vulkan/radv_meta_clear.c  |  3 +++
 src/amd/vulkan/radv_meta_fast_clear.c |  7 +++
 src/amd/vulkan/radv_private.h |  4 
 5 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f13768b4ada..50216b0ffdf 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1363,6 +1363,29 @@ radv_update_fce_metadata(struct radv_cmd_buffer 
*cmd_buffer,
radeon_emit(cmd_buffer->cs, pred_val >> 32);
 }
 
+/**
+ * Update the DCC predicate to reflect the compression state.
+ */
+void
+radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
+struct radv_image *image, bool value)
+{
+   uint64_t pred_val = value;
+   uint64_t va = radv_buffer_get_va(image->bo);
+   va += image->offset + image->dcc_pred_offset;
+
+   assert(radv_image_has_dcc(image));
+
+   radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+   radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+   S_370_WR_CONFIRM(1) |
+   S_370_ENGINE_SEL(V_370_PFP));
+   radeon_emit(cmd_buffer->cs, va);
+   radeon_emit(cmd_buffer->cs, va >> 32);
+   radeon_emit(cmd_buffer->cs, pred_val);
+   radeon_emit(cmd_buffer->cs, pred_val >> 32);
+}
+
 /**
  * Update the fast clear color values if the image is bound as a color buffer.
  */
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 7492bf48b51..ebde5519d29 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -858,7 +858,8 @@ radv_image_alloc_dcc(struct radv_image *image)
/* + 16 for storing the clear values + dcc pred */
image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
image->fce_pred_offset = image->clear_value_offset + 8;
-   image->size = image->dcc_offset + image->surface.dcc_size + 16;
+   image->dcc_pred_offset = image->clear_value_offset + 16;
+   image->size = image->clear_value_offset + 24;
image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
 }
 
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index bf88d3a84d9..787f9f178fa 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -1248,6 +1248,9 @@ uint32_t
 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
   struct radv_image *image, uint32_t value)
 {
+   /* Mark the image as being compressed. */
+   radv_update_dcc_metadata(cmd_buffer, image, true);
+
return radv_fill_buffer(cmd_buffer, image->bo,
image->offset + image->dcc_offset,
image->surface.dcc_size, value);
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c 
b/src/amd/vulkan/radv_meta_fast_clear.c
index a54b78c2a6c..ce088d7332b 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -715,6 +715,10 @@ radv_emit_color_decompress(struct radv_cmd_buffer 
*cmd_buffer,
 * FMASK and DCC also imply a fast-clear eliminate.
 */
radv_update_fce_metadata(cmd_buffer, image, false);
+
+   /* Mark the image as being decompressed. */
+   if (decompress_dcc)
+   radv_update_dcc_metadata(cmd_buffer, image, false);
}
 
radv_meta_restore(&saved_state, cmd_buffer);
@@ -811,6 +815,9 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer 
*cmd_buffer,
 
radv_unaligned_dispatch(cmd_buffer, image->info.width, 
image->info.height, 1);
 
+   /* Mark this image as actually being decompressed. */
+   radv_update_dcc_metadata(cmd_buffer, image, false);
+
/* The fill buffer below does its own saving */
radv_meta_restore(&saved_state, cmd_buffer);
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index ac756f2c247..e67f3ac1cd1 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1206,6 +1206,9 @@ void radv_update_color_clear_metadata(struct 
radv_cmd_buffer *cmd_buffer,
 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
  struct radv_image *image, bool value);
 
+void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image, bool value);
+
 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
  struct radeon_winsys_bo *bo,
  uint64_t offset, uint64_t size, uint32_t value);
@@ -1502,6 +1505,7 @@ struct radv_image {
struct radv_cmask_info cmask;
uint64_t clea

Re: [Mesa-dev] [PATCH] build: enable -Werror=incompatible-pointer-types

2018-11-22 Thread Eric Engestrom
On Thursday, 2018-11-22 12:07:03 +, Emil Velikov wrote:
> On 2018/11/21, Gert Wollny wrote:
> > Am Mittwoch, den 21.11.2018, 13:14 + schrieb Emil Velikov:
> 
> > > +AX_CHECK_COMPILE_FLAG([-Werror=incompatible-pointer-
> > > types],[CFXXLAGS="$CXXFLAGS -Werror=incompatible-pointer-types"])
> > For C++ this shoudn't be needed, because there this is already an
> > error. 
> > 
> Indeed - option is for C and Objective C only.
> 
> Seems like the meson patch will need the same tweak.

Indeed, thanks!

> Eric, feel free to squash this with your patch.

I'll respin a v2 with your patch and mine squashed, and the C++ bit
dropped.

Thanks!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] build: enable -Werror=incompatible-pointer-types

2018-11-22 Thread Eric Engestrom
On Wednesday, 2018-11-21 13:14:48 +, Emil Velikov wrote:
> From: Emil Velikov 
> 
> More or less any issue pointed out by the compiler is an error. Make
> sure we flag and bail loudly.
> 
> Cc: Eric Engestrom 
> Signed-off-by: Emil Velikov 
> ---
> Eric, feel free to squash this with your patch.
> ---
>  Android.common.mk | 1 +
>  configure.ac  | 2 ++
>  scons/gallium.py  | 1 +
>  3 files changed, 4 insertions(+)
> 
> diff --git a/Android.common.mk b/Android.common.mk
> index aa1b266a393..4bdbf70e30b 100644
> --- a/Android.common.mk
> +++ b/Android.common.mk
> @@ -31,6 +31,7 @@ LOCAL_C_INCLUDES += \
>  
>  MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION)
>  LOCAL_CFLAGS += \
> + -Werror=incompatible-pointer-types \
>   -Wno-error \

Doesn't that no-error cancel it right away?

>   -Wno-unused-parameter \
>   -Wno-pointer-arith \
> diff --git a/configure.ac b/configure.ac
> index 93a52475d50..2b632cda997 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -307,6 +307,7 @@ dnl
>  AX_CHECK_COMPILE_FLAG([-Wall], 
> [CFLAGS="$CFLAGS -Wall"])
>  AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], 
> [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"])
>  AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes],
> [CFLAGS="$CFLAGS -Werror=missing-prototypes"])
> +AX_CHECK_COMPILE_FLAG([-Werror=incompatible-pointer-types],
> [CFLAGS="$CFLAGS -Werror=incompatible-pointer-types"])
>  AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes],  
> [CFLAGS="$CFLAGS -Wmissing-prototypes"])
>  dnl Dylan Baker: gcc and clang always accepr -Wno-*, hence check for the 
> original warning, then set the no-* flag
>  AX_CHECK_COMPILE_FLAG([-Wmissing-field-initializers],  
> [CFLAGS="$CFLAGS -Wno-missing-field-initializers"])
> @@ -326,6 +327,7 @@ dnl Check C++ compiler flags
>  dnl
>  AC_LANG_PUSH([C++])
>  AX_CHECK_COMPILE_FLAG([-Wall], 
> [CXXFLAGS="$CXXFLAGS -Wall"])
> +AX_CHECK_COMPILE_FLAG([-Werror=incompatible-pointer-types],
> [CFXXLAGS="$CXXFLAGS -Werror=incompatible-pointer-types"])
>  AX_CHECK_COMPILE_FLAG([-fno-math-errno],   
> [CXXFLAGS="$CXXFLAGS -fno-math-errno"])
>  AX_CHECK_COMPILE_FLAG([-fno-trapping-math],
> [CXXFLAGS="$CXXFLAGS -fno-trapping-math"])
>  AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],   
> [VISIBILITY_CXXFLAGS="-fvisibility=hidden"])
> diff --git a/scons/gallium.py b/scons/gallium.py
> index 963834a5fbc..36f542d0d64 100755
> --- a/scons/gallium.py
> +++ b/scons/gallium.py
> @@ -451,6 +451,7 @@ def generate(env):
>  # - http://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
>  ccflags += [
>  '-Wall',
> +'-Werror=incompatible-pointer-types'

Missing comma at the end of the line

>  '-Wno-long-long',
>  '-fmessage-length=0', # be nice to Eclipse
>  ]
> -- 
> 2.19.1
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH mesa] build: make passing an incorrect pointer type a hard error

2018-11-22 Thread Eric Engestrom
More or less any of this issue pointed out by the compiler is
a coding error. Make sure we flag it and bail loudly.

v2: - apply the change to autotools and scons as well (Emil)
- C++ doesn't need this, it's already an error and the flag
  doesn't exist (Gert)

Signed-off-by: Eric Engestrom 
Reviewed-by: Dylan Baker  # v1
Reviewed-by: Emil Velikov  # v1
[Emil: apply the same change to autotools and scons]
Signed-off-by: Emil Velikov 
---
Emil, I dropped the Android bit as this `-Wno-error` makes it look to me
like they don't want compiler warnings to stop the build.
---
 configure.ac | 1 +
 meson.build  | 1 +
 scons/gallium.py | 1 +
 3 files changed, 3 insertions(+)

diff --git a/configure.ac b/configure.ac
index 93a52475d50e6ee65432..422c8e3c87c897a88b51 100644
--- a/configure.ac
+++ b/configure.ac
@@ -307,6 +307,7 @@ dnl
 AX_CHECK_COMPILE_FLAG([-Wall], 
[CFLAGS="$CFLAGS -Wall"])
 AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], 
[CFLAGS="$CFLAGS -Werror=implicit-function-declaration"])
 AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes],
[CFLAGS="$CFLAGS -Werror=missing-prototypes"])
+AX_CHECK_COMPILE_FLAG([-Werror=incompatible-pointer-types],
[CFLAGS="$CFLAGS -Werror=incompatible-pointer-types"])
 AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes],  
[CFLAGS="$CFLAGS -Wmissing-prototypes"])
 dnl Dylan Baker: gcc and clang always accepr -Wno-*, hence check for the 
original warning, then set the no-* flag
 AX_CHECK_COMPILE_FLAG([-Wmissing-field-initializers],  
[CFLAGS="$CFLAGS -Wno-missing-field-initializers"])
diff --git a/meson.build b/meson.build
index 4ad41fde13a33f546d48..2fde2b23e24fd2e4321e 100644
--- a/meson.build
+++ b/meson.build
@@ -788,6 +788,7 @@ endif
 c_args = []
 foreach a : ['-Werror=implicit-function-declaration',
  '-Werror=missing-prototypes', '-Werror=return-type',
+ '-Werror=incompatible-pointer-types',
  '-fno-math-errno',
  '-fno-trapping-math', '-Qunused-arguments']
   if cc.has_argument(a)
diff --git a/scons/gallium.py b/scons/gallium.py
index 963834a5fbc12a378ad4..04175014925cf088e5ea 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -451,6 +451,7 @@ def generate(env):
 # - http://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
 ccflags += [
 '-Wall',
+'-Werror=incompatible-pointer-types',
 '-Wno-long-long',
 '-fmessage-length=0', # be nice to Eclipse
 ]
-- 
Cheers,
  Eric

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 32211] [GLSL] lower_jumps with continue-statements in for-loops prevents loop unrolling

2018-11-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=32211

--- Comment #14 from Danylo  ---
(In reply to Timothy Arceri from comment #13)
> 
> None of that should matter. If the continue if removed there should be
> nothing stopping the loop from unrolling, and if the loop is unrolled the
> both ifs should be able to be optimised away (assuming I'm reading the IR
> correctly). Is this not what you are seeing?

Unfortunately not, loop isn't unrolled.

To be on the same page the optimization I did is turning

   loop {
   ...
   if (cond) {
  do_work_1();
  continue;
   } else {
   }
   do_work_2();
}

into:

loop {
   ...
   if (cond) {
  do_work_1();
   } else {
  do_work_2();
   }
}

So in our case it effectively produces:

   ...
   if (cond) {
  i++;
  do_work_1();
   } else {
  i++;
  do_work_2();
   }
   ...

Looks like in previous comment I forgot to say that both branches have 'i++'.

Loop with such condition couldn't be unrolled because
'compute_induction_information' could not find induction variable because 'i'
is in a control flow

> /* If one of the sources is in a conditional or nested block then
>  * panic.
>  */
> if (src_var->in_control_flow)
>break;

To make loop unrollable 'i++' should be outside of conditional block and there
is no optimization that could pull it out as I wrote in previous comment.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 32211] [GLSL] lower_jumps with continue-statements in for-loops prevents loop unrolling

2018-11-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=32211

--- Comment #15 from Danylo  ---
Created attachment 142567
  --> https://bugs.freedesktop.org/attachment.cgi?id=142567&action=edit
Removing unnecessary continue

Optimization in question.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH mesa] glapi: add missing visibility args

2018-11-22 Thread Eric Engestrom
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108829
Cc: Emil Velikov 
Fixes: 3218056e0eb375eeda470 "meson: Build i965 and dri stack"
Signed-off-by: Eric Engestrom 
---
 src/mapi/shared-glapi/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mapi/shared-glapi/meson.build 
b/src/mapi/shared-glapi/meson.build
index 41da9b1c1c5ad0be39a8..828d14bfd4156d89f8f4 100644
--- a/src/mapi/shared-glapi/meson.build
+++ b/src/mapi/shared-glapi/meson.build
@@ -40,7 +40,7 @@ libglapi = shared_library(
   'glapi',
   [files_mapi_glapi, files_mapi_util, shared_glapi_mapi_tmp_h],
   c_args : [
-c_msvc_compat_args, '-DMAPI_MODE_GLAPI',
+c_msvc_compat_args, c_vis_args, '-DMAPI_MODE_GLAPI',
 '-DMAPI_ABI_HEADER="@0@"'.format(shared_glapi_mapi_tmp_h.full_path()),
   ],
   link_args : [ld_args_gc_sections],
-- 
Cheers,
  Eric

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [ANNOUNCE] mesa 18.3.0-rc4

2018-11-22 Thread Emil Velikov
The fourth release candidate for Mesa 18.3.0 is now available.

As per the issue tracker [1] we have one outstanding bug blocking the
release.
[1] https://bugs.freedesktop.org/show_bug.cgi?id=108530


Andrii Simiklit (1):
  i965/batch: avoid reverting batch buffer if saved state is an empty

Emil Velikov (4):
  travis: drop unneeded x11proto-xf86vidmode-dev
  glx: make xf86vidmode mandatory for direct rendering
  travis: adding missing x11-xcb for meson+vulkan
  Update version to 18.3.0-rc4

Gert Wollny (1):
  r600: clean up the GS ring buffers when the context is destroyed

Lionel Landwerlin (1):
  egl/dri: fix error value with unknown drm format

Marek Olšák (1):
  radeonsi: go back to using bottom-of-pipe for beginning of TIME_ELAPSED

Nicolai Hähnle (1):
  radeonsi: fix an out-of-bounds read reported by ASAN

git tag: mesa-18.3.0-rc4

https://mesa.freedesktop.org/archive/mesa-18.3.0-rc4.tar.gz
MD5:  5401533e9fec6c65bdbcae9d0e064091  mesa-18.3.0-rc4.tar.gz
SHA1: 3b58f9917cdc102f5dfc161d452234ffa254a930  mesa-18.3.0-rc4.tar.gz
SHA256: f674aae203840d27e84cbf523384036d45c415d766a6e771aac93a2c71ce4c17  
mesa-18.3.0-rc4.tar.gz
SHA512: 
26965ba394e9691d76acf87e378ccc461752fcc840a11dd2b15109a145c704bdb5a959a91617397275bc95146e070d69cce00e9490c61c99ef3a431559ef56c3
  mesa-18.3.0-rc4.tar.gz
PGP:  https://mesa.freedesktop.org/archive/mesa-18.3.0-rc4.tar.gz.sig

https://mesa.freedesktop.org/archive/mesa-18.3.0-rc4.tar.xz
MD5:  04083daf8985d3c95c061d9fda4f8d2e  mesa-18.3.0-rc4.tar.xz
SHA1: 5bf144ba0b5cad53c85a3b0cdf4d40bddb720c75  mesa-18.3.0-rc4.tar.xz
SHA256: a3fb029886dc9c12b888fafc76642fa503df0005b41523ba745c68d4fd9b9c39  
mesa-18.3.0-rc4.tar.xz
SHA512: 
ee1adf9f8237a4f72a8b44ba9aeeb58fad041240e98ab71da6203f2a634f4b9dcd2758b4b5c592104cd51e431acb5292e79d8dd5a61a21c18ae7f93a9d98af41
  mesa-18.3.0-rc4.tar.xz
PGP:  https://mesa.freedesktop.org/archive/mesa-18.3.0-rc4.tar.xz.sig



signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] build: make passing an incorrect pointer type a hard error

2018-11-22 Thread Eric Engestrom
On Thursday, 2018-11-22 13:15:05 +, Eric Engestrom wrote:
> More or less any of this issue pointed out by the compiler is
> a coding error. Make sure we flag it and bail loudly.
> 
> v2: - apply the change to autotools and scons as well (Emil)
> - C++ doesn't need this, it's already an error and the flag
>   doesn't exist (Gert)
> 
> Signed-off-by: Eric Engestrom 
> Reviewed-by: Dylan Baker  # v1
> Reviewed-by: Emil Velikov  # v1
> [Emil: apply the same change to autotools and scons]
> Signed-off-by: Emil Velikov 
> ---
> Emil, I dropped the Android bit as this `-Wno-error` makes it look to me
> like they don't want compiler warnings to stop the build.
> ---
>  configure.ac | 1 +
>  meson.build  | 1 +
>  scons/gallium.py | 1 +
>  3 files changed, 3 insertions(+)
> 
> diff --git a/configure.ac b/configure.ac
> index 93a52475d50e6ee65432..422c8e3c87c897a88b51 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -307,6 +307,7 @@ dnl
>  AX_CHECK_COMPILE_FLAG([-Wall], 
> [CFLAGS="$CFLAGS -Wall"])
>  AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], 
> [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"])
>  AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes],
> [CFLAGS="$CFLAGS -Werror=missing-prototypes"])
> +AX_CHECK_COMPILE_FLAG([-Werror=incompatible-pointer-types],
> [CFLAGS="$CFLAGS -Werror=incompatible-pointer-types"])
>  AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes],  
> [CFLAGS="$CFLAGS -Wmissing-prototypes"])
>  dnl Dylan Baker: gcc and clang always accepr -Wno-*, hence check for the 
> original warning, then set the no-* flag
>  AX_CHECK_COMPILE_FLAG([-Wmissing-field-initializers],  
> [CFLAGS="$CFLAGS -Wno-missing-field-initializers"])
> diff --git a/meson.build b/meson.build
> index 4ad41fde13a33f546d48..2fde2b23e24fd2e4321e 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -788,6 +788,7 @@ endif
>  c_args = []
>  foreach a : ['-Werror=implicit-function-declaration',
>   '-Werror=missing-prototypes', '-Werror=return-type',
> + '-Werror=incompatible-pointer-types',
>   '-fno-math-errno',
>   '-fno-trapping-math', '-Qunused-arguments']
>if cc.has_argument(a)
> diff --git a/scons/gallium.py b/scons/gallium.py
> index 963834a5fbc12a378ad4..04175014925cf088e5ea 100755
> --- a/scons/gallium.py
> +++ b/scons/gallium.py
> @@ -451,6 +451,7 @@ def generate(env):
>  # - http://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
>  ccflags += [
>  '-Wall',
> +'-Werror=incompatible-pointer-types',

Hmm, just got the results from travis, and I hadn't realised that these
flags aren't checked before being given to the compiler, so this just
breaks the scons build. Until someone adds a 'does the compiler
understand this flag' feature to scons, we just can't add this flag.

I'll just revert the scons bit as well, so it's gonna be just meson and
autotools.

>  '-Wno-long-long',
>  '-fmessage-length=0', # be nice to Eclipse
>  ]
> -- 
> Cheers,
>   Eric
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] anv: Put robust buffer access in the pipeline hash

2018-11-22 Thread Jason Ekstrand
Thanks!

On Thu, Nov 22, 2018 at 1:06 AM Iago Toral  wrote:

> For both patches:
>
> Reviewed-by: Iago Toral Quiroga 
>
> On Wed, 2018-11-21 at 17:20 -0600, Jason Ekstrand wrote:
> > It affects apply_pipeline_layout.  Shaders compiled with the wrong
> > value
> > will work but they may not be robust as requested by the app.
> >
> > Cc: mesa-sta...@lists.freedesktop.org
> > ---
> >  src/intel/vulkan/anv_pipeline.c | 6 ++
> >  1 file changed, 6 insertions(+)
> >
> > diff --git a/src/intel/vulkan/anv_pipeline.c
> > b/src/intel/vulkan/anv_pipeline.c
> > index ad0f08253e7..f170366d030 100644
> > --- a/src/intel/vulkan/anv_pipeline.c
> > +++ b/src/intel/vulkan/anv_pipeline.c
> > @@ -446,6 +446,9 @@ anv_pipeline_hash_graphics(struct anv_pipeline
> > *pipeline,
> > if (layout)
> >_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
> >
> > +   const bool rba = pipeline->device->robust_buffer_access;
> > +   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
> > +
> > for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
> >if (stages[s].entrypoint)
> >   anv_pipeline_hash_shader(&ctx, &stages[s]);
> > @@ -466,6 +469,9 @@ anv_pipeline_hash_compute(struct anv_pipeline
> > *pipeline,
> > if (layout)
> >_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
> >
> > +   const bool rba = pipeline->device->robust_buffer_access;
> > +   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
> > +
> > anv_pipeline_hash_shader(&ctx, stage);
> >
> > _mesa_sha1_final(&ctx, sha1_out);
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: allow exporting an imported SYNC_FD semaphore type

2018-11-22 Thread Jason Ekstrand
Should work.

Reviewed-by: Jason Ekstrand 

On Thu, Nov 22, 2018 at 1:41 AM Tapani Pälli  wrote:

> Fixes issues with following SkQP tests:
>
>unitTest_VulkanHardwareBuffer_Vulkan_EGL_Syncs
>unitTest_VulkanHardwareBuffer_Vulkan_Vulkan_Syncs
>
> Signed-off-by: Tapani Pälli 
> ---
>  src/intel/vulkan/anv_queue.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
> index 2a8ed2eb4ed..7887fbcb937 100644
> --- a/src/intel/vulkan/anv_queue.c
> +++ b/src/intel/vulkan/anv_queue.c
> @@ -1056,7 +1056,8 @@ void
> anv_GetPhysicalDeviceExternalSemaphoreProperties(
>
> case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
>if (device->has_exec_fence) {
> - pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
> + pExternalSemaphoreProperties->exportFromImportedHandleTypes =
> +VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
>   pExternalSemaphoreProperties->compatibleHandleTypes =
>  VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
>   pExternalSemaphoreProperties->externalSemaphoreFeatures =
> --
> 2.17.2
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] build: make passing an incorrect pointer type a hard error

2018-11-22 Thread Emil Velikov
On 2018/11/22, Eric Engestrom wrote:
> On Thursday, 2018-11-22 13:15:05 +, Eric Engestrom wrote:
> > More or less any of this issue pointed out by the compiler is
> > a coding error. Make sure we flag it and bail loudly.
> > 
> > v2: - apply the change to autotools and scons as well (Emil)
> > - C++ doesn't need this, it's already an error and the flag
> >   doesn't exist (Gert)
> > 
> > Signed-off-by: Eric Engestrom 
> > Reviewed-by: Dylan Baker  # v1
> > Reviewed-by: Emil Velikov  # v1
> > [Emil: apply the same change to autotools and scons]
> > Signed-off-by: Emil Velikov 
> > ---
> > Emil, I dropped the Android bit as this `-Wno-error` makes it look to me
> > like they don't want compiler warnings to stop the build.

By default Android builds with -Werror which threats all warnings as
errors. In an extremely well controlled environment that is fine.

The individual Werror=foo is separate thing. The following example`
illustrates that. Note adding -Wno-error after -Werror=foo does not
change anything, rightfully so.

$ echo -e "#include 
int main(void)
{ int foo; return fprintf(\"test\"); }" >> eric.c
$ gcc -Wall -Werror  -Wno-error -Werror=incompatible-pointer-types test.c

Please add the hunk back?

> > ---
> >  configure.ac | 1 +
> >  meson.build  | 1 +
> >  scons/gallium.py | 1 +
> >  3 files changed, 3 insertions(+)
> > 
> > diff --git a/configure.ac b/configure.ac
> > index 93a52475d50e6ee65432..422c8e3c87c897a88b51 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -307,6 +307,7 @@ dnl
> >  AX_CHECK_COMPILE_FLAG([-Wall], 
> > [CFLAGS="$CFLAGS -Wall"])
> >  AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], 
> > [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"])
> >  AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes],
> > [CFLAGS="$CFLAGS -Werror=missing-prototypes"])
> > +AX_CHECK_COMPILE_FLAG([-Werror=incompatible-pointer-types],
> > [CFLAGS="$CFLAGS -Werror=incompatible-pointer-types"])
> >  AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes],  
> > [CFLAGS="$CFLAGS -Wmissing-prototypes"])
> >  dnl Dylan Baker: gcc and clang always accepr -Wno-*, hence check for the 
> > original warning, then set the no-* flag
> >  AX_CHECK_COMPILE_FLAG([-Wmissing-field-initializers],  
> > [CFLAGS="$CFLAGS -Wno-missing-field-initializers"])
> > diff --git a/meson.build b/meson.build
> > index 4ad41fde13a33f546d48..2fde2b23e24fd2e4321e 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -788,6 +788,7 @@ endif
> >  c_args = []
> >  foreach a : ['-Werror=implicit-function-declaration',
> >   '-Werror=missing-prototypes', '-Werror=return-type',
> > + '-Werror=incompatible-pointer-types',
> >   '-fno-math-errno',
> >   '-fno-trapping-math', '-Qunused-arguments']
> >if cc.has_argument(a)
> > diff --git a/scons/gallium.py b/scons/gallium.py
> > index 963834a5fbc12a378ad4..04175014925cf088e5ea 100755
> > --- a/scons/gallium.py
> > +++ b/scons/gallium.py
> > @@ -451,6 +451,7 @@ def generate(env):
> >  # - http://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
> >  ccflags += [
> >  '-Wall',
> > +'-Werror=incompatible-pointer-types',
> 
> Hmm, just got the results from travis, and I hadn't realised that these
> flags aren't checked before being given to the compiler, so this just
> breaks the scons build. Until someone adds a 'does the compiler
> understand this flag' feature to scons, we just can't add this flag.
> 
> I'll just revert the scons bit as well, so it's gonna be just meson and
> autotools.
> 
Right, I forgot the toggle is present since GCC 5.0 while we use older
one there.

Technically, one can reinstate the ccversion hunk from commit
c068610a7df and bar on v5.0 or later. I don't feel strongly about it
though.

With the Android piece, regardless of scons the patch is
Reviewed-by: Emil Velikov 

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/5] compiler/types: Serialize/deserialize subpass input types correctly

2018-11-22 Thread Lionel Landwerlin

On 13/10/2018 01:08, Jason Ekstrand wrote:

They have glsl_sampler_dim enum values of 8 and 9 which don't work when
you & them with 0x7.  Fortunately, we have plenty of bits.



I think this needs a Cc stable right?


Reviewed-by: Lionel Landwerlin 



---
  src/compiler/glsl_types.cpp | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 70bce6ace8e..d3cfcf24dd8 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -2175,14 +2175,14 @@ decode_type_from_blob(struct blob_reader *blob)
 case GLSL_TYPE_INT64:
return glsl_type::get_instance(base_type, (u >> 4) & 0x0f, u & 0x0f);
 case GLSL_TYPE_SAMPLER:
-  return glsl_type::get_sampler_instance((enum glsl_sampler_dim) ((u >> 4) 
& 0x07),
+  return glsl_type::get_sampler_instance((enum glsl_sampler_dim) ((u >> 4) 
& 0x0f),
   (u >> 3) & 0x01,
   (u >> 2) & 0x01,
   (glsl_base_type) ((u >> 0) & 
0x03));
 case GLSL_TYPE_SUBROUTINE:
return glsl_type::get_subroutine_instance(blob_read_string(blob));
 case GLSL_TYPE_IMAGE:
-  return glsl_type::get_image_instance((enum glsl_sampler_dim) ((u >> 3) & 
0x07),
+  return glsl_type::get_image_instance((enum glsl_sampler_dim) ((u >> 3) & 
0x0f),
   (u >> 2) & 0x01,
   (glsl_base_type) ((u >> 0) & 
0x03));
 case GLSL_TYPE_ATOMIC_UINT:



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] anv/pipeline: Move wpos and input attachment lowering to lower_nir

2018-11-22 Thread Lionel Landwerlin

On 13/10/2018 01:08, Jason Ekstrand wrote:

This lets us make anv_pipeline_compile_to_nir take a device instead of a
pipeline.



Reviewed-by: Lionel Landwerlin 



---
  src/intel/vulkan/anv_pipeline.c | 19 ---
  1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index be05c11f45d..4e3ae9d094d 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -97,15 +97,13 @@ static const uint64_t stage_to_debug[] = {
   * we can't do that yet because we don't have the ability to copy nir.
   */
  static nir_shader *
-anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
+anv_shader_compile_to_nir(struct anv_device *device,
void *mem_ctx,
const struct anv_shader_module *module,
const char *entrypoint_name,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info)
  {
-   const struct anv_device *device = pipeline->device;
-
 const struct brw_compiler *compiler =
device->instance->physicalDevice.compiler;
 const nir_shader_compiler_options *nir_options =
@@ -209,9 +207,6 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
 NIR_PASS_V(nir, nir_remove_dead_variables,
nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
  
-   if (stage == MESA_SHADER_FRAGMENT)

-  NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
-
 NIR_PASS_V(nir, nir_propagate_invariant);
 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
entry_point->impl, true, false);
@@ -221,9 +216,6 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
  
 nir = brw_preprocess_nir(compiler, nir);
  
-   if (stage == MESA_SHADER_FRAGMENT)

-  NIR_PASS_V(nir, anv_nir_lower_input_attachments);
-
 return nir;
  }
  
@@ -484,6 +476,11 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,

 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
 nir_shader *nir = stage->nir;
  
+   if (nir->info.stage == MESA_SHADER_FRAGMENT) {

+  NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
+  NIR_PASS_V(nir, anv_nir_lower_input_attachments);
+   }
+
 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
  
 NIR_PASS_V(nir, anv_nir_lower_push_constants);

@@ -969,7 +966,7 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
   .sampler_to_descriptor = stages[s].sampler_to_descriptor
};
  
-  stages[s].nir = anv_shader_compile_to_nir(pipeline, pipeline_ctx,

+  stages[s].nir = anv_shader_compile_to_nir(pipeline->device, pipeline_ctx,
  stages[s].module,
  stages[s].entrypoint,
  stages[s].stage,
@@ -1137,7 +1134,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
  
void *mem_ctx = ralloc_context(NULL);
  
-  stage.nir = anv_shader_compile_to_nir(pipeline, mem_ctx,

+  stage.nir = anv_shader_compile_to_nir(pipeline->device, mem_ctx,
  stage.module,
  stage.entrypoint,
  stage.stage,



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] anv/pipeline_cache: Add support for caching NIR

2018-11-22 Thread Lionel Landwerlin

On 13/10/2018 01:08, Jason Ekstrand wrote:

Reviewed-by: Lionel Landwerlin 


---
  src/intel/vulkan/anv_pipeline_cache.c | 100 ++
  src/intel/vulkan/anv_private.h|  18 +
  2 files changed, 118 insertions(+)

diff --git a/src/intel/vulkan/anv_pipeline_cache.c 
b/src/intel/vulkan/anv_pipeline_cache.c
index 3efa427279d..2add9e06b20 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -26,6 +26,7 @@
  #include "util/debug.h"
  #include "util/disk_cache.h"
  #include "util/mesa-sha1.h"
+#include "nir/nir_serialize.h"
  #include "anv_private.h"
  
  struct anv_shader_bin *

@@ -211,6 +212,18 @@ shader_bin_key_compare_func(const void *void_a, const void 
*void_b)
 return memcmp(a->data, b->data, a->size) == 0;
  }
  
+static uint32_t

+sha1_hash_func(const void *sha1)
+{
+   return _mesa_hash_data(sha1, 20);
+}
+
+static bool
+sha1_compare_func(const void *sha1_a, const void *sha1_b)
+{
+   return memcmp(sha1_a, sha1_b, 20) == 0;
+}
+
  void
  anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
  struct anv_device *device,
@@ -222,6 +235,8 @@ anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
 if (cache_enabled) {
cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
   shader_bin_key_compare_func);
+  cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
+ sha1_compare_func);
 } else {
cache->cache = NULL;
 }
@@ -644,3 +659,88 @@ anv_device_upload_kernel(struct anv_device *device,
  
 return bin;

  }
+
+struct serialized_nir {
+   unsigned char sha1_key[20];
+   size_t size;
+   char data[0];
+};
+
+struct nir_shader *
+anv_device_search_for_nir(struct anv_device *device,
+  struct anv_pipeline_cache *cache,
+  const nir_shader_compiler_options *nir_options,
+  unsigned char sha1_key[20],
+  void *mem_ctx)
+{
+   if (cache) {
+  const struct serialized_nir *snir = NULL;
+
+  pthread_mutex_lock(&cache->mutex);
+  struct hash_entry *entry =
+ _mesa_hash_table_search(cache->nir_cache, sha1_key);
+  if (entry)
+ snir = entry->data;
+  pthread_mutex_unlock(&cache->mutex);
+
+  if (snir) {
+ struct blob_reader blob;
+ blob_reader_init(&blob, snir->data, snir->size);
+
+ nir_shader *nir = nir_deserialize(mem_ctx, nir_options, &blob);
+ if (blob.overrun) {
+ralloc_free(nir);
+ } else {
+return nir;
+ }
+  }
+   }
+
+   return NULL;
+}
+
+void
+anv_device_upload_nir(struct anv_device *device,
+  struct anv_pipeline_cache *cache,
+  const struct nir_shader *nir,
+  unsigned char sha1_key[20])
+{
+   if (cache) {
+  pthread_mutex_lock(&cache->mutex);
+  struct hash_entry *entry =
+ _mesa_hash_table_search(cache->nir_cache, sha1_key);
+  pthread_mutex_unlock(&cache->mutex);
+  if (entry)
+ return;
+
+  struct blob blob;
+  blob_init(&blob);
+
+  nir_serialize(&blob, nir);
+  if (blob.out_of_memory) {
+ blob_finish(&blob);
+ return;
+  }
+
+  pthread_mutex_lock(&cache->mutex);
+  /* Because ralloc isn't thread-safe, we have to do all this inside the
+   * lock.  We could unlock for the big memcpy but it's probably not worth
+   * the hassle.
+   */
+  entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
+  if (entry) {
+ pthread_mutex_unlock(&cache->mutex);
+ return;
+  }
+
+  struct serialized_nir *snir =
+ ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
+  memcpy(snir->sha1_key, sha1_key, 20);
+  snir->size = blob.size;
+  memcpy(snir->data, blob.data, blob.size);
+
+  _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
+
+  pthread_mutex_unlock(&cache->mutex);
+   }
+}
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 5b4c286bf38..19f673f1563 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -932,6 +932,8 @@ struct anv_pipeline_cache {
 struct anv_device *  device;
 pthread_mutex_t  mutex;
  
+   struct hash_table *  nir_cache;

+
 struct hash_table *  cache;
  };
  
@@ -971,6 +973,22 @@ anv_device_upload_kernel(struct anv_device *device,

   uint32_t prog_data_size,
   const struct anv_pipeline_bind_map *bind_map);
  
+struct nir_shader;

+struct nir_shader_compiler_options;
+
+struct nir_shader *
+anv_device_search_for_nir(struct anv_device *device,
+  str

Re: [Mesa-dev] [PATCH 5/5] anv/pipeline: Cache the pre-lowered NIR

2018-11-22 Thread Lionel Landwerlin

On 13/10/2018 01:08, Jason Ekstrand wrote:

This adds a second level of caching for the pre-lowered NIR that's only
based off of the shader module, entrypoint and specialization constants.
This is enough for spirv_to_nir as well as our first round of lowering
and optimization.  Caching at this level should allow for faster shader
recompiles due to state changes.

The NIR caching does not get serialized to disk via either the
VkPipelineCache serialization mechanism or the transparent on-disk
cache.  We could but it's usually not that expensive to fall back to
SPIR-V for the odd cache miss especially if it only happens once for
several misses and it simplifies the cache.



Reviewed-by: Lionel Landwerlin 



---
  src/intel/vulkan/anv_pipeline.c | 49 ++---
  1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 481921840f3..cb204f62902 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -478,6 +478,41 @@ anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
 _mesa_sha1_final(&ctx, sha1_out);
  }
  
+static nir_shader *

+anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
+   struct anv_pipeline_cache *cache,
+   void *mem_ctx,
+   struct anv_pipeline_stage *stage)
+{
+   const struct brw_compiler *compiler =
+  pipeline->device->instance->physicalDevice.compiler;
+   const nir_shader_compiler_options *nir_options =
+  compiler->glsl_compiler_options[stage->stage].NirOptions;
+   nir_shader *nir;
+
+   nir = anv_device_search_for_nir(pipeline->device, cache,
+   nir_options,
+   stage->shader_sha1,
+   mem_ctx);
+   if (nir) {
+  assert(nir->info.stage == stage->stage);
+  return nir;
+   }
+
+   nir = anv_shader_compile_to_nir(pipeline->device,
+   mem_ctx,
+   stage->module,
+   stage->entrypoint,
+   stage->stage,
+   stage->spec_info);
+   if (nir) {
+  anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
+  return nir;
+   }
+
+   return NULL;
+}
+
  static void
  anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
 void *mem_ctx,
@@ -985,11 +1020,9 @@ anv_pipeline_compile_graphics(struct anv_pipeline 
*pipeline,
   .sampler_to_descriptor = stages[s].sampler_to_descriptor
};
  
-  stages[s].nir = anv_shader_compile_to_nir(pipeline->device, pipeline_ctx,

-stages[s].module,
-stages[s].entrypoint,
-stages[s].stage,
-stages[s].spec_info);
+  stages[s].nir = anv_pipeline_stage_get_nir(pipeline, cache,
+ pipeline_ctx,
+ &stages[s]);
if (stages[s].nir == NULL) {
   result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
   goto fail;
@@ -1158,11 +1191,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
  
void *mem_ctx = ralloc_context(NULL);
  
-  stage.nir = anv_shader_compile_to_nir(pipeline->device, mem_ctx,

-stage.module,
-stage.entrypoint,
-stage.stage,
-stage.spec_info);
+  stage.nir = anv_pipeline_stage_get_nir(pipeline, cache, mem_ctx, &stage);
if (stage.nir == NULL) {
   ralloc_free(mem_ctx);
   return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] anv/pipeline: Hash shader modules and spec constants separately

2018-11-22 Thread Lionel Landwerlin

On 13/10/2018 01:08, Jason Ekstrand wrote:

The stuff hashed by anv_pipeline_hash_shader is exactly the inputs to
anv_shader_compile_to_nir so it can be used for NIR caching.



Reviewed-by: Lionel Landwerlin 



---
  src/intel/vulkan/anv_pipeline.c | 54 -
  1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 4e3ae9d094d..481921840f3 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -391,6 +391,8 @@ struct anv_pipeline_stage {
 const char *entrypoint;
 const VkSpecializationInfo *spec_info;
  
+   unsigned char shader_sha1[20];

+
 union brw_any_prog_key key;
  
 struct {

@@ -408,20 +410,27 @@ struct anv_pipeline_stage {
  };
  
  static void

-anv_pipeline_hash_shader(struct mesa_sha1 *ctx,
- struct anv_pipeline_stage *stage)
+anv_pipeline_hash_shader(const struct anv_shader_module *module,
+ const char *entrypoint,
+ gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info,
+ unsigned char *sha1_out)
  {
-   _mesa_sha1_update(ctx, stage->module->sha1, sizeof(stage->module->sha1));
-   _mesa_sha1_update(ctx, stage->entrypoint, strlen(stage->entrypoint));
-   _mesa_sha1_update(ctx, &stage->stage, sizeof(stage->stage));
-   if (stage->spec_info) {
-  _mesa_sha1_update(ctx, stage->spec_info->pMapEntries,
-stage->spec_info->mapEntryCount *
-sizeof(*stage->spec_info->pMapEntries));
-  _mesa_sha1_update(ctx, stage->spec_info->pData,
-stage->spec_info->dataSize);
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+
+   _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
+   _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
+   _mesa_sha1_update(&ctx, &stage, sizeof(stage));
+   if (spec_info) {
+  _mesa_sha1_update(&ctx, spec_info->pMapEntries,
+spec_info->mapEntryCount *
+sizeof(*spec_info->pMapEntries));
+  _mesa_sha1_update(&ctx, spec_info->pData,
+spec_info->dataSize);
 }
-   _mesa_sha1_update(ctx, &stage->key, brw_prog_key_size(stage->stage));
+
+   _mesa_sha1_final(&ctx, sha1_out);
  }
  
  static void

@@ -440,8 +449,11 @@ anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
  
 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {

-  if (stages[s].entrypoint)
- anv_pipeline_hash_shader(&ctx, &stages[s]);
+  if (stages[s].entrypoint) {
+ _mesa_sha1_update(&ctx, stages[s].shader_sha1,
+   sizeof(stages[s].shader_sha1));
+ _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
+  }
 }
  
 _mesa_sha1_final(&ctx, sha1_out);

@@ -459,7 +471,9 @@ anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
 if (layout)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
  
-   anv_pipeline_hash_shader(&ctx, stage);

+   _mesa_sha1_update(&ctx, stage->shader_sha1,
+ sizeof(stage->shader_sha1));
+   _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
  
 _mesa_sha1_final(&ctx, sha1_out);

  }
@@ -865,6 +879,11 @@ anv_pipeline_compile_graphics(struct anv_pipeline 
*pipeline,
stages[stage].module = anv_shader_module_from_handle(sinfo->module);
stages[stage].entrypoint = sinfo->pName;
stages[stage].spec_info = sinfo->pSpecializationInfo;
+  anv_pipeline_hash_shader(stages[stage].module,
+   stages[stage].entrypoint,
+   stage,
+   stages[stage].spec_info,
+   stages[stage].shader_sha1);
  
const struct gen_device_info *devinfo = &pipeline->device->info;

switch (stage) {
@@ -1115,6 +1134,11 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
   .stage = MESA_SHADER_COMPUTE,
}
 };
+   anv_pipeline_hash_shader(stage.module,
+stage.entrypoint,
+MESA_SHADER_COMPUTE,
+stage.spec_info,
+stage.shader_sha1);
  
 struct anv_shader_bin *bin = NULL;
  



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: allow exporting an imported SYNC_FD semaphore type

2018-11-22 Thread Emil Velikov
On 2018/11/22, Tapani Pälli wrote:
> Fixes issues with following SkQP tests:
> 
>unitTest_VulkanHardwareBuffer_Vulkan_EGL_Syncs
>unitTest_VulkanHardwareBuffer_Vulkan_Vulkan_Syncs
> 
> Signed-off-by: Tapani Pälli 
> ---
>  src/intel/vulkan/anv_queue.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
> index 2a8ed2eb4ed..7887fbcb937 100644
> --- a/src/intel/vulkan/anv_queue.c
> +++ b/src/intel/vulkan/anv_queue.c
> @@ -1056,7 +1056,8 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
>  
> case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
>if (device->has_exec_fence) {
> - pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
> + pExternalSemaphoreProperties->exportFromImportedHandleTypes =
> +VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
>   pExternalSemaphoreProperties->compatibleHandleTypes =
>  VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
>   pExternalSemaphoreProperties->externalSemaphoreFeatures =
> -- 
Hi Tapani,

Is it Worth picking this for stable?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] mesa/main: fix incorrect detph-error

2018-11-22 Thread Erik Faye-Lund
If glGetTexImage or glGetnTexImage is called with a level that doesn't
exist, we get an error message on this form:

Mesa: User error: GL_INVALID_VALUE in glGetTexImage(depth = 0)

This is clearly nonsensical, because these APIs don't even have a
depth-parameter. The reason is that get_texture_image_dims() return
all-zero dimensions for non-existent texture-images, and we go on to
validate these dimensions as if they were user-input, because
glGetTextureSubImage requires checking.

So let's split this logic in two, so glGetTextureSubImage can have
stricter input-validation. All arguments that are no longer validated
are generated internally by mesa, so there's no use in validating them.

Fixes: 42891dbaa12 "gettextsubimage: verify zoffset and depth are correct"
Signed-off-by: Erik Faye-Lund 
---
 src/mesa/main/texgetimage.c | 57 -
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 190f53d62fe..dabfcd06a52 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -1255,7 +1255,6 @@ static bool
 getteximage_error_check(struct gl_context *ctx,
 struct gl_texture_object *texObj,
 GLenum target, GLint level,
-GLint xoffset, GLint yoffset, GLint zoffset,
 GLsizei width, GLsizei height, GLsizei depth,
 GLenum format, GLenum type, GLsizei bufSize,
 GLvoid *pixels, const char *caller)
@@ -1269,6 +1268,49 @@ getteximage_error_check(struct gl_context *ctx,
   return true;
}
 
+   if (width == 0 || height == 0 || depth == 0) {
+  /* Not an error, but nothing to do.  Return 'true' so that the
+   * caller simply returns.
+   */
+  return true;
+   }
+
+   if (pbo_error_check(ctx, target, width, height, depth,
+   format, type, bufSize, pixels, caller)) {
+  return true;
+   }
+
+   texImage = select_tex_image(texObj, target, level, 0);
+   if (teximage_error_check(ctx, texImage, format, caller)) {
+  return true;
+   }
+
+   return false;
+}
+
+
+/**
+ * Do error checking for all (non-compressed) get-texture-image functions.
+ * \return true if any error, false if no errors.
+ */
+static bool
+gettexsubimage_error_check(struct gl_context *ctx,
+   struct gl_texture_object *texObj,
+   GLenum target, GLint level,
+   GLint xoffset, GLint yoffset, GLint zoffset,
+   GLsizei width, GLsizei height, GLsizei depth,
+   GLenum format, GLenum type, GLsizei bufSize,
+   GLvoid *pixels, const char *caller)
+{
+   struct gl_texture_image *texImage;
+
+   assert(texObj);
+
+   if (common_error_check(ctx, texObj, target, level, width, height, depth,
+  format, type, bufSize, pixels, caller)) {
+  return true;
+   }
+
if (dimensions_error_check(ctx, texObj, target, level,
   xoffset, yoffset, zoffset,
   width, height, depth, caller)) {
@@ -1417,7 +1459,7 @@ _mesa_GetnTexImage(GLenum target, GLint level, GLenum 
format, GLenum type,
get_texture_image_dims(texObj, target, level, &width, &height, &depth);
 
if (getteximage_error_check(ctx, texObj, target, level,
-   0, 0, 0, width, height, depth,
+   width, height, depth,
format, type, bufSize, pixels, caller)) {
   return;
}
@@ -1448,7 +1490,7 @@ _mesa_GetTexImage(GLenum target, GLint level, GLenum 
format, GLenum type,
get_texture_image_dims(texObj, target, level, &width, &height, &depth);
 
if (getteximage_error_check(ctx, texObj, target, level,
-   0, 0, 0, width, height, depth,
+   width, height, depth,
format, type, INT_MAX, pixels, caller)) {
   return;
}
@@ -1482,7 +1524,7 @@ _mesa_GetTextureImage(GLuint texture, GLint level, GLenum 
format, GLenum type,
   &width, &height, &depth);
 
if (getteximage_error_check(ctx, texObj, texObj->Target, level,
-   0, 0, 0, width, height, depth,
+   width, height, depth,
format, type, bufSize, pixels, caller)) {
   return;
}
@@ -1515,9 +1557,10 @@ _mesa_GetTextureSubImage(GLuint texture, GLint level,
   return;
}
 
-   if (getteximage_error_check(ctx, texObj, texObj->Target, level,
-   xoffset, yoffset, zoffset, width, height, depth,
-   format, type, bufSize, pixels, caller)) {
+   if (gettexsubimage_error_check(ctx, texObj, texObj->Target, level,
+  xoffset, yoffset, z

[Mesa-dev] [PATCH 5/6] mesa/main: check cube-completeness in common code

2018-11-22 Thread Erik Faye-Lund
This check is the only part of dimensions_error_check that isn't about
error-checking the offset and size arguments of
glGet[Compressed]TextureSubImage(), so it doesn't really belong in here.

This doesn't make a difference right now, apart for changing the
presedence of this error. But it will make a difference  for the next
patch, where we no longer call this method from the non-sub tex-image
getters.

Signed-off-by: Erik Faye-Lund 
---
 src/mesa/main/texgetimage.c | 31 ---
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index cf316a865a9..190f53d62fe 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -981,21 +981,6 @@ dimensions_error_check(struct gl_context *ctx,
  "%s(zoffset + depth = %d)", caller, zoffset + depth);
  return true;
   }
-  /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image 
Queries"):
-   *
-   *   "An INVALID_OPERATION error is generated by GetTextureImage if the
-   *   effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY ,
-   *   and the texture object is not cube complete or cube array complete,
-   *   respectively."
-   *
-   * This applies also to GetTextureSubImage, GetCompressedTexImage,
-   * GetCompressedTextureImage, and GetnCompressedTexImage.
-   */
-  if (!_mesa_cube_complete(texObj)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "%s(cube incomplete)", caller);
- return true;
-  }
   break;
default:
   ; /* nothing */
@@ -1242,6 +1227,22 @@ common_error_check(struct gl_context *ctx,
   return true;
}
 
+   /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries"):
+*
+*   "An INVALID_OPERATION error is generated by GetTextureImage if the
+*   effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY ,
+*   and the texture object is not cube complete or cube array complete,
+*   respectively."
+*
+* This applies also to GetTextureSubImage, GetCompressedTexImage,
+* GetCompressedTextureImage, and GetnCompressedTexImage.
+*/
+   if (target == GL_TEXTURE_CUBE_MAP && !_mesa_cube_complete(texObj)) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "%s(cube incomplete)", caller);
+  return true;
+   }
+
return false;
 }
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] mesa/main: factor out common error-checking

2018-11-22 Thread Erik Faye-Lund
This error checking is the same for teximage and texsubimage getters, so
let's factor it out to its own function.

This will be useful when getteximage and gettexsubimage gets their own
error checking routines a bit later.
---
 src/mesa/main/texgetimage.c | 46 +++--
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 8ee5cc0d729..cf316a865a9 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -1211,24 +1211,20 @@ teximage_error_check(struct gl_context *ctx,
 
 
 /**
- * Do error checking for all (non-compressed) get-texture-image functions.
- * \return true if any error, false if no errors.
+ * Do common teximage-related error checking for getting uncompressed images.
+ * \return true if there was an error
  */
 static bool
-getteximage_error_check(struct gl_context *ctx,
-struct gl_texture_object *texObj,
-GLenum target, GLint level,
-GLint xoffset, GLint yoffset, GLint zoffset,
-GLsizei width, GLsizei height, GLsizei depth,
-GLenum format, GLenum type, GLsizei bufSize,
-GLvoid *pixels, const char *caller)
+common_error_check(struct gl_context *ctx,
+   struct gl_texture_object *texObj,
+   GLenum target, GLint level,
+   GLsizei width, GLsizei height, GLsizei depth,
+   GLenum format, GLenum type, GLsizei bufSize,
+   GLvoid *pixels, const char *caller)
 {
-   struct gl_texture_image *texImage;
GLenum err;
GLint maxLevels;
 
-   assert(texObj);
-
if (texObj->Target == 0) {
   _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
   return true;
@@ -1246,6 +1242,32 @@ getteximage_error_check(struct gl_context *ctx,
   return true;
}
 
+   return false;
+}
+
+
+/**
+ * Do error checking for all (non-compressed) get-texture-image functions.
+ * \return true if any error, false if no errors.
+ */
+static bool
+getteximage_error_check(struct gl_context *ctx,
+struct gl_texture_object *texObj,
+GLenum target, GLint level,
+GLint xoffset, GLint yoffset, GLint zoffset,
+GLsizei width, GLsizei height, GLsizei depth,
+GLenum format, GLenum type, GLsizei bufSize,
+GLvoid *pixels, const char *caller)
+{
+   struct gl_texture_image *texImage;
+
+   assert(texObj);
+
+   if (common_error_check(ctx, texObj, target, level, width, height, depth,
+  format, type, bufSize, pixels, caller)) {
+  return true;
+   }
+
if (dimensions_error_check(ctx, texObj, target, level,
   xoffset, yoffset, zoffset,
   width, height, depth, caller)) {
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] glGetTexImage fixes

2018-11-22 Thread Erik Faye-Lund
I was recently investigating a gl-error that appears some times while
using virgl, where I got a pretty nonsensical GL-error:

Mesa: User error: GL_INVALID_VALUE in glGetnTexImageARB(depth = 0)

Now, the reason why this is nonsensical, is that glGetnTexImage doesn't 
have a 'depth'-argument, this value is generated internally in mesa.

What happens is that virgl ends up asking for the teximage for a
non-existen mipmap-level. Whoops, that shouldn't be done, but while 
we're in this area, let's clean this up.

My first fix was to revert 42891dbaa12 ("gettextsubimage: verify zoffset
and depth are correct"). But that would render that commit moot, but it
actually does fix something. So I'd rather not; it's a good change, but
this logic should only apply to glGetTextureSubImage.

So I decided to give glGetTextureSubImage its own error checking.

While working on this, I also realized that the error introduced in
b37b35a5d26 ("getteximage: assume texture image is empty for non defined
levels") was flawed; it shouldn't apply in the case where *all* of
width, xoffset and the texture's width are zero. And since we end up
checking explicitly for these conditions later, this should simply be a
no-op as per the spec. And we already have some support-code for this.

No regressions found in piglit.

Erik Faye-Lund (6):
  mesa/main: remove ARB suffix from glGetnTexImage
  mesa/main: remove bogus error for zero-sized images
  mesa/main: factor out tex-image error-checking
  mesa/main: factor out common error-checking
  mesa/main: check cube-completeness in common code
  mesa/main: fix incorrect detph-error

 src/mapi/glapi/gen/ARB_robustness.xml |   2 +-
 src/mapi/glapi/gen/gl_API.xml |   9 +
 src/mesa/main/texgetimage.c   | 273 --
 src/mesa/main/texgetimage.h   |   4 +-
 4 files changed, 179 insertions(+), 109 deletions(-)

-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] mesa/main: factor out tex-image error-checking

2018-11-22 Thread Erik Faye-Lund
This will be useful when we split error-checking for getteximage and
gettexsubimage later.

Signed-off-by: Erik Faye-Lund 
---
 src/mesa/main/texgetimage.c | 110 +---
 1 file changed, 64 insertions(+), 46 deletions(-)

diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index c30ca703242..8ee5cc0d729 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -1139,53 +1139,15 @@ pbo_error_check(struct gl_context *ctx, GLenum target,
 
 
 /**
- * Do error checking for all (non-compressed) get-texture-image functions.
- * \return true if any error, false if no errors.
+ * Do teximage-related error checking for getting uncompressed images.
+ * \return true if there was an error
  */
 static bool
-getteximage_error_check(struct gl_context *ctx,
-struct gl_texture_object *texObj,
-GLenum target, GLint level,
-GLint xoffset, GLint yoffset, GLint zoffset,
-GLsizei width, GLsizei height, GLsizei depth,
-GLenum format, GLenum type, GLsizei bufSize,
-GLvoid *pixels, const char *caller)
+teximage_error_check(struct gl_context *ctx,
+ struct gl_texture_image *texImage,
+ GLenum format, const char *caller)
 {
-   struct gl_texture_image *texImage;
-   GLenum baseFormat, err;
-   GLint maxLevels;
-
-   assert(texObj);
-
-   if (texObj->Target == 0) {
-  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
-  return true;
-   }
-
-   maxLevels = _mesa_max_texture_levels(ctx, target);
-   if (level < 0 || level >= maxLevels) {
-  _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level);
-  return true;
-   }
-
-   err = _mesa_error_check_format_and_type(ctx, format, type);
-   if (err != GL_NO_ERROR) {
-  _mesa_error(ctx, err, "%s(format/type)", caller);
-  return true;
-   }
-
-   if (dimensions_error_check(ctx, texObj, target, level,
-  xoffset, yoffset, zoffset,
-  width, height, depth, caller)) {
-  return true;
-   }
-
-   if (pbo_error_check(ctx, target, width, height, depth,
-   format, type, bufSize, pixels, caller)) {
-  return true;
-   }
-
-   texImage = select_tex_image(texObj, target, level, zoffset);
+   GLenum baseFormat;
assert(texImage);
 
/*
@@ -1218,8 +1180,8 @@ getteximage_error_check(struct gl_context *ctx,
   return true;
}
else if (_mesa_is_stencil_format(format)
-   && !_mesa_is_depthstencil_format(baseFormat)
-   && !_mesa_is_stencil_format(baseFormat)) {
+&& !_mesa_is_depthstencil_format(baseFormat)
+&& !_mesa_is_stencil_format(baseFormat)) {
   _mesa_error(ctx, GL_INVALID_OPERATION,
   "%s(format mismatch)", caller);
   return true;
@@ -1248,6 +1210,62 @@ getteximage_error_check(struct gl_context *ctx,
 }
 
 
+/**
+ * Do error checking for all (non-compressed) get-texture-image functions.
+ * \return true if any error, false if no errors.
+ */
+static bool
+getteximage_error_check(struct gl_context *ctx,
+struct gl_texture_object *texObj,
+GLenum target, GLint level,
+GLint xoffset, GLint yoffset, GLint zoffset,
+GLsizei width, GLsizei height, GLsizei depth,
+GLenum format, GLenum type, GLsizei bufSize,
+GLvoid *pixels, const char *caller)
+{
+   struct gl_texture_image *texImage;
+   GLenum err;
+   GLint maxLevels;
+
+   assert(texObj);
+
+   if (texObj->Target == 0) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
+  return true;
+   }
+
+   maxLevels = _mesa_max_texture_levels(ctx, target);
+   if (level < 0 || level >= maxLevels) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level);
+  return true;
+   }
+
+   err = _mesa_error_check_format_and_type(ctx, format, type);
+   if (err != GL_NO_ERROR) {
+  _mesa_error(ctx, err, "%s(format/type)", caller);
+  return true;
+   }
+
+   if (dimensions_error_check(ctx, texObj, target, level,
+  xoffset, yoffset, zoffset,
+  width, height, depth, caller)) {
+  return true;
+   }
+
+   if (pbo_error_check(ctx, target, width, height, depth,
+   format, type, bufSize, pixels, caller)) {
+  return true;
+   }
+
+   texImage = select_tex_image(texObj, target, level, zoffset);
+   if (teximage_error_check(ctx, texImage, format, caller)) {
+  return true;
+   }
+
+   return false;
+}
+
+
 /**
  * Return the width, height and depth of a texture image.
  * This function must be resilient to bad parameter values since
-- 
2.19.1

___
mesa-dev mailing list

[Mesa-dev] [PATCH 2/6] mesa/main: remove bogus error for zero-sized images

2018-11-22 Thread Erik Faye-Lund
The explanation quotes the spec on the following wording to justify the
error:

"An INVALID_VALUE error is generated if xoffset + width is greater than
 the texture’s width, yoffset + height is greater than the  texture’s
 height, or zoffset + depth is greater than the texture’s depth."

However, this shouldn't generate an error in the case where *all three*
of width, xoffset and the texture's width are zero. In this case, we end
up generating an unspecified error.

So let's remove this check, and instead make sure that we consider this
as an empty texture.

So let's not generate an error, there's non mandated in the spec in
xoffset/yoffset/zoffset = 0 case. We already avoid doing any work in
this case, because of the final, non-error generating check in this
function.

Fixes: b37b35a5d26 "getteximage: assume texture image is empty for non defined 
levels"
Signed-off-by: Erik Faye-Lund 
---
 src/mesa/main/texgetimage.c | 49 ++---
 1 file changed, 13 insertions(+), 36 deletions(-)

diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 2f44f94d37b..c30ca703242 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -900,8 +900,7 @@ select_tex_image(const struct gl_texture_object *texObj, 
GLenum target,
 
 /**
  * Error-check the offset and size arguments to
- * glGet[Compressed]TextureSubImage().  Also checks if the specified
- * texture image is missing.
+ * glGet[Compressed]TextureSubImage().
  * \return true if error, false if no error.
  */
 static bool
@@ -913,6 +912,7 @@ dimensions_error_check(struct gl_context *ctx,
const char *caller)
 {
const struct gl_texture_image *texImage;
+   GLuint imageWidth = 0, imageHeight = 0, imageDepth = 0;
 
if (xoffset < 0) {
   _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset = %d)", caller, xoffset);
@@ -1002,61 +1002,38 @@ dimensions_error_check(struct gl_context *ctx,
}
 
texImage = select_tex_image(texObj, target, level, zoffset);
-   if (!texImage) {
-  /* Trying to return a non-defined level is a valid operation per se, as
-   * OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries") does not
-   * handle this case as an error.
-   *
-   * Rather, we need to look at section 8.22 ("Texture State and Proxy
-   * State"):
-   *
-   *   "Each initial texture image is null. It has zero width, height, and
-   *depth, internal format RGBA, or R8 for buffer textures, component
-   *sizes set to zero and component types set to NONE, the compressed
-   *flag set to FALSE, a zero compressed size, and the bound buffer
-   *object name is zero."
-   *
-   * This means we need to assume the image for the non-defined level is
-   * an empty image. With this assumption, we can go back to section
-   * 8.11.4 and checking again the errors:
-   *
-   *   "An INVALID_VALUE error is generated if xoffset + width is greater
-   *than the texture’s width, yoffset + height is greater than the
-   *texture’s height, or zoffset + depth is greater than the texture’s
-   *depth."
-   *
-   * Thus why we return INVALID_VALUE.
-   */
-  _mesa_error(ctx, GL_INVALID_VALUE, "%s(missing image)", caller);
-  return true;
+   if (texImage) {
+  imageWidth = texImage->Width;
+  imageHeight = texImage->Height;
+  imageDepth = texImage->Depth;
}
 
-   if (xoffset + width > texImage->Width) {
+   if (xoffset + width > imageWidth) {
   _mesa_error(ctx, GL_INVALID_VALUE,
   "%s(xoffset %d + width %d > %u)",
-  caller, xoffset, width, texImage->Width);
+  caller, xoffset, width, imageWidth);
   return true;
}
 
-   if (yoffset + height > texImage->Height) {
+   if (yoffset + height > imageHeight) {
   _mesa_error(ctx, GL_INVALID_VALUE,
   "%s(yoffset %d + height %d > %u)",
-  caller, yoffset, height, texImage->Height);
+  caller, yoffset, height, imageHeight);
   return true;
}
 
if (target != GL_TEXTURE_CUBE_MAP) {
   /* Cube map error checking was done above */
-  if (zoffset + depth > texImage->Depth) {
+  if (zoffset + depth > imageDepth) {
  _mesa_error(ctx, GL_INVALID_VALUE,
  "%s(zoffset %d + depth %d > %u)",
- caller, zoffset, depth, texImage->Depth);
+ caller, zoffset, depth, imageDepth);
  return true;
   }
}
 
/* Extra checks for compressed textures */
-   {
+   if (texImage) {
   GLuint bw, bh, bd;
   _mesa_get_format_block_size_3d(texImage->TexFormat, &bw, &bh, &bd);
   if (bw > 1 || bh > 1 || bd > 1) {
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] mesa/main: remove ARB suffix from glGetnTexImage

2018-11-22 Thread Erik Faye-Lund
This function has been core since OpenGL 4.3, so naming the
implementation and reporting erros using an ARB-suffix can be
confusing.

Signed-off-by: Erik Faye-Lund 
---
 src/mapi/glapi/gen/ARB_robustness.xml | 2 +-
 src/mapi/glapi/gen/gl_API.xml | 9 +
 src/mesa/main/texgetimage.c   | 6 +++---
 src/mesa/main/texgetimage.h   | 4 ++--
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/mapi/glapi/gen/ARB_robustness.xml 
b/src/mapi/glapi/gen/ARB_robustness.xml
index 1f6ac4696b6..47c417390df 100644
--- a/src/mapi/glapi/gen/ARB_robustness.xml
+++ b/src/mapi/glapi/gen/ARB_robustness.xml
@@ -73,7 +73,7 @@
 
 
 
-
+
 
 
 
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 929e5f6b024..f4d0808f13b 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -2817,6 +2817,15 @@
 
 
 
+
+
+
+
+
+
+
+
+
 
 
 
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 0ab9ed445d6..2f44f94d37b 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -1380,11 +1380,11 @@ get_texture_image(struct gl_context *ctx,
 
 
 void GLAPIENTRY
-_mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format, GLenum type,
-  GLsizei bufSize, GLvoid *pixels)
+_mesa_GetnTexImage(GLenum target, GLint level, GLenum format, GLenum type,
+   GLsizei bufSize, GLvoid *pixels)
 {
GET_CURRENT_CONTEXT(ctx);
-   static const char *caller = "glGetnTexImageARB";
+   static const char *caller = "glGetnTexImage";
GLsizei width, height, depth;
struct gl_texture_object *texObj;
 
diff --git a/src/mesa/main/texgetimage.h b/src/mesa/main/texgetimage.h
index 3928e7abb7b..9a5874300e7 100644
--- a/src/mesa/main/texgetimage.h
+++ b/src/mesa/main/texgetimage.h
@@ -56,8 +56,8 @@ extern void GLAPIENTRY
 _mesa_GetTexImage( GLenum target, GLint level,
GLenum format, GLenum type, GLvoid *pixels );
 extern void GLAPIENTRY
-_mesa_GetnTexImageARB( GLenum target, GLint level, GLenum format,
-   GLenum type, GLsizei bufSize, GLvoid *pixels );
+_mesa_GetnTexImage(GLenum target, GLint level, GLenum format,
+   GLenum type, GLsizei bufSize, GLvoid *pixels);
 extern void GLAPIENTRY
 _mesa_GetTextureImage(GLuint texture, GLint level, GLenum format,
   GLenum type, GLsizei bufSize, GLvoid *pixels);
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: Set the FBO error state INCOMPLETE_ATTACHMENT only for SRGB_R8

2018-11-22 Thread Gert Wollny
Originally the driver reported GL_FRAMEBUFFER_UNSUPPORTED in all cases,
adding more specific error messages was not correct and broke many tests.
Mostly revert this and only report GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT
for MESA_FORMAT_R_SRGB8.

Fixes: ebcde3454552adc6d3fea8af2207aafaba857796
  i965: be more specific about FBO completeness errors
Signed-off-by: Gert Wollny 
---
 src/mesa/drivers/dri/i965/intel_fbo.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index 7e40d61a47..5bcd846a1b 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -719,7 +719,7 @@ intel_validate_framebuffer(struct gl_context *ctx, struct 
gl_framebuffer *fb)
   "FBO incomplete: separate stencil unsupported\n");
 }
 if (stencil_mt->format != MESA_FORMAT_S_UINT8) {
-   fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT,
+   fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
   "FBO incomplete: separate stencil is %s "
   "instead of S8\n",
   _mesa_get_format_name(stencil_mt->format));
@@ -750,7 +750,7 @@ intel_validate_framebuffer(struct gl_context *ctx, struct 
gl_framebuffer *fb)
*/
   rb = fb->Attachment[i].Renderbuffer;
   if (rb == NULL) {
-fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT,
+fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
"FBO incomplete: attachment without "
"renderbuffer\n");
 continue;
@@ -771,8 +771,15 @@ intel_validate_framebuffer(struct gl_context *ctx, struct 
gl_framebuffer *fb)
 continue;
   }
 
+ if (rb->Format == MESA_FORMAT_R_SRGB8) {
+fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT,
+   "FBO incomplete: Format not color renderable: %s\n",
+   _mesa_get_format_name(rb->Format));
+continue;
+ }
+
   if (!brw_render_target_supported(brw, rb)) {
-fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT,
+fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED,
"FBO incomplete: Unsupported HW "
"texture/renderbuffer format attached: %s\n",
_mesa_get_format_name(intel_rb_format(irb)));
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Explicitely handle swizzles for MESA_FORMAT_R_SRGB8

2018-11-22 Thread Gert Wollny
The format is emulated by using ISL_FORMAT_L8_SRGB, therefore we need to
force swizzles for the GBA channels. However, doing this only based on the
data type GL_RED breaks other formats, therefore, test specifically for the
format.

Fixes: 5363869d4971780401b21bb75083ef2518c12be
  965: Force zero swizzles for unused components in GL_RED and GL_RG
Signed-off-by: Gert Wollny 
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 018bae98e8..4daa0e2add 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -420,11 +420,15 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
   }
   break;
case GL_RED:
-  swizzles[1] = SWIZZLE_ZERO;
+  if (img->TexFormat == MESA_FORMAT_R_SRGB8) {
+ swizzles[0] = SWIZZLE_X;
+ swizzles[1] = SWIZZLE_ZERO;
+ swizzles[2] = SWIZZLE_ZERO;
+ swizzles[3] = SWIZZLE_ONE;
+ break;
+  }
   /* fallthrough */
case GL_RG:
-  swizzles[2] = SWIZZLE_ZERO;
-  /* fallthrough */
case GL_RGB:
   if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
   img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radv: drop few useless state changes when doing color/depth decompressions

2018-11-22 Thread Samuel Pitoiset
Viewport/scissor don't need to be updated for array textures.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_decompress.c | 47 +--
 src/amd/vulkan/radv_meta_fast_clear.c | 55 +++
 2 files changed, 41 insertions(+), 61 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_decompress.c 
b/src/amd/vulkan/radv_meta_decompress.c
index 0bb77ce0583..fcfd8c2d65a 100644
--- a/src/amd/vulkan/radv_meta_decompress.c
+++ b/src/amd/vulkan/radv_meta_decompress.c
@@ -308,34 +308,6 @@ cleanup:
return res;
 }
 
-static void
-emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
- const VkExtent2D *depth_decomp_extent,
- VkPipeline pipeline_h)
-{
-   VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-
-   radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-pipeline_h);
-
-   radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, 
&(VkViewport) {
-   .x = 0,
-   .y = 0,
-   .width = depth_decomp_extent->width,
-   .height = depth_decomp_extent->height,
-   .minDepth = 0.0f,
-   .maxDepth = 1.0f
-   });
-
-   radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, 
&(VkRect2D) {
-   .offset = { 0, 0 },
-   .extent = *depth_decomp_extent,
-   });
-
-   radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
-}
-
-
 enum radv_depth_op {
DEPTH_DECOMPRESS,
DEPTH_RESUMMARIZE,
@@ -388,6 +360,23 @@ static void radv_process_depth_image_inplace(struct 
radv_cmd_buffer *cmd_buffer,
unreachable("unknown operation");
}
 
+   radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+pipeline_h);
+
+   radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) {
+   .x = 0,
+   .y = 0,
+   .width = width,
+   .height = height,
+   .minDepth = 0.0f,
+   .maxDepth = 1.0f
+   });
+
+   radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) {
+   .offset = { 0, 0 },
+   .extent = { width, height },
+   });
+
for (uint32_t layer = 0; layer < radv_get_layerCount(image, 
subresourceRange); layer++) {
struct radv_image_view iview;
 
@@ -442,7 +431,7 @@ static void radv_process_depth_image_inplace(struct 
radv_cmd_buffer *cmd_buffer,
   },
   VK_SUBPASS_CONTENTS_INLINE);
 
-   emit_depth_decomp(cmd_buffer, &(VkExtent2D){width, height}, 
pipeline_h);
+   radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
radv_CmdEndRenderPass(cmd_buffer_h);
 
radv_DestroyFramebuffer(device_h, fb_h,
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c 
b/src/amd/vulkan/radv_meta_fast_clear.c
index 3f5d0054046..15285200b45 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -549,35 +549,6 @@ radv_device_init_meta_fast_clear_flush_state(struct 
radv_device *device, bool on
return radv_device_init_meta_fast_clear_flush_state_internal(device);
 }
 
-static void
-emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
- const VkExtent2D *resolve_extent,
- VkPipeline pipeline)
-{
-   VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-
-   radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-pipeline);
-
-   radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, 
&(VkViewport) {
-   .x = 0,
-   .y = 0,
-   .width = resolve_extent->width,
-   .height = resolve_extent->height,
-   .minDepth = 0.0f,
-   .maxDepth = 1.0f
-   });
-
-   radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, 
&(VkRect2D) {
-   .offset = (VkOffset2D) { 0, 0 },
-   .extent = (VkExtent2D) { resolve_extent->width, 
resolve_extent->height },
-   });
-
-   radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
-   cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-RADV_CMD_FLAG_FLUSH_AND_INV_CB_META);
-}
-
 static void
 radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
  struct radv_image *image, bool value)
@@ -638,6 +609,24 @@ radv_emit_color_decompress(struct radv_cmd_buffer 
*cmd_buffer,
radv_emit_set_predication_state_from_image(cmd_buffer, image, 
true);
cmd_buffer->state.predicating = true;
}
+
+   radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POI

[Mesa-dev] [PATCH 2/3] radv: remove unused pending_clears param in the transition path

2018-11-22 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 7367e24fd28..1e7954434fe 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -59,8 +59,7 @@ static void radv_handle_image_transition(struct 
radv_cmd_buffer *cmd_buffer,
 VkImageLayout dst_layout,
 uint32_t src_family,
 uint32_t dst_family,
-const VkImageSubresourceRange *range,
-VkImageAspectFlags pending_clears);
+const VkImageSubresourceRange *range);
 
 const struct radv_dynamic_state default_dynamic_state = {
.viewport = {
@@ -2280,8 +2279,7 @@ static void radv_handle_subpass_image_transition(struct 
radv_cmd_buffer *cmd_buf
radv_handle_image_transition(cmd_buffer,
 view->image,
 
cmd_buffer->state.attachments[idx].current_layout,
-att.layout, 0, 0, &range,
-
cmd_buffer->state.attachments[idx].pending_clear_aspects);
+att.layout, 0, 0, &range);
 
cmd_buffer->state.attachments[idx].current_layout = att.layout;
 
@@ -4260,8 +4258,7 @@ static void radv_handle_depth_image_transition(struct 
radv_cmd_buffer *cmd_buffe
   VkImageLayout dst_layout,
   unsigned src_queue_mask,
   unsigned dst_queue_mask,
-  const VkImageSubresourceRange 
*range,
-  VkImageAspectFlags 
pending_clears)
+  const VkImageSubresourceRange 
*range)
 {
if (!radv_image_has_htile(image))
return;
@@ -4407,8 +4404,7 @@ static void radv_handle_image_transition(struct 
radv_cmd_buffer *cmd_buffer,
 VkImageLayout dst_layout,
 uint32_t src_family,
 uint32_t dst_family,
-const VkImageSubresourceRange *range,
-VkImageAspectFlags pending_clears)
+const VkImageSubresourceRange *range)
 {
if (image->exclusive && src_family != dst_family) {
/* This is an acquire or a release operation and there will be
@@ -4438,7 +4434,7 @@ static void radv_handle_image_transition(struct 
radv_cmd_buffer *cmd_buffer,
radv_handle_depth_image_transition(cmd_buffer, image,
   src_layout, dst_layout,
   src_queue_mask, 
dst_queue_mask,
-  range, pending_clears);
+  range);
} else {
radv_handle_color_image_transition(cmd_buffer, image,
   src_layout, dst_layout,
@@ -4512,8 +4508,7 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer,
 pImageMemoryBarriers[i].newLayout,
 
pImageMemoryBarriers[i].srcQueueFamilyIndex,
 
pImageMemoryBarriers[i].dstQueueFamilyIndex,
-
&pImageMemoryBarriers[i].subresourceRange,
-0);
+
&pImageMemoryBarriers[i].subresourceRange);
}
 
/* Make sure CP DMA is idle because the driver might have performed a
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] radv: implement some SW counters

2018-11-22 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c  | 23 +++
 src/amd/vulkan/radv_device.c  |  4 ++
 src/amd/vulkan/radv_meta.c|  2 +
 src/amd/vulkan/radv_meta_buffer.c |  6 ++
 src/amd/vulkan/radv_meta_clear.c  | 11 
 src/amd/vulkan/radv_meta_copy.c   |  6 ++
 src/amd/vulkan/radv_meta_decompress.c |  1 +
 src/amd/vulkan/radv_meta_fast_clear.c |  3 +
 src/amd/vulkan/radv_meta_resolve.c|  2 +
 src/amd/vulkan/radv_meta_resolve_cs.c |  1 +
 src/amd/vulkan/radv_meta_resolve_fs.c |  2 +
 src/amd/vulkan/radv_private.h | 51 
 src/amd/vulkan/radv_wsi.c | 87 +++
 src/amd/vulkan/si_cmd_buffer.c| 19 ++
 14 files changed, 218 insertions(+)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f13768b4ada..7367e24fd28 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -907,6 +907,8 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
cmd_buffer->state.emitted_pipeline = pipeline;
 
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE;
+
+   cmd_buffer->counters.counters[RADV_NUM_EMITTED_PIPELINES]++;
 }
 
 static void
@@ -1822,6 +1824,10 @@ radv_flush_descriptors(struct radv_cmd_buffer 
*cmd_buffer,
 
if (unlikely(cmd_buffer->device->trace_bo))
radv_save_descriptors(cmd_buffer, bind_point);
+
+   assert(cmd_buffer->cs->cdw <= cdw_max);
+
+   cmd_buffer->counters.counters[RADV_NUM_FLUSH_DESCRIPTORS]++;
 }
 
 static void
@@ -1878,6 +1884,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
 
cmd_buffer->push_constant_stages &= ~stages;
assert(cmd_buffer->cs->cdw <= cdw_max);
+
+   cmd_buffer->counters.counters[RADV_NUM_FLUSH_CONSTANTS]++;
 }
 
 static void
@@ -1929,6 +1937,8 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
*cmd_buffer,
cmd_buffer->state.vb_va = va;
cmd_buffer->state.vb_size = count * 16;
cmd_buffer->state.prefetch_L2_mask |= 
RADV_PREFETCH_VBO_DESCRIPTORS;
+
+   cmd_buffer->counters.counters[RADV_NUM_VB_UPDATES]++;
}
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER;
 }
@@ -2463,6 +2473,7 @@ VkResult radv_BeginCommandBuffer(
}
 
memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
+   memset(&cmd_buffer->counters, 0, sizeof(cmd_buffer->counters));
cmd_buffer->state.last_primitive_reset_en = -1;
cmd_buffer->state.last_index_type = -1;
cmd_buffer->state.last_num_instances = -1;
@@ -2816,6 +2827,8 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
 
if (unlikely(cmd_buffer->device->trace_bo))
radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
+
+   cmd_buffer->counters.counters[RADV_NUM_EMITTED_PIPELINES]++;
 }
 
 static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer,
@@ -3710,6 +3723,8 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
 
assert(cmd_buffer->cs->cdw <= cdw_max);
radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
+
+   cmd_buffer->counters.counters[RADV_NUM_DRAW_CALLS]++;
 }
 
 void radv_CmdDraw(
@@ -4105,6 +4120,8 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
}
 
radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
+
+   cmd_buffer->counters.counters[RADV_NUM_DISPATCH_CALLS]++;
 }
 
 void radv_CmdDispatchBase(
@@ -4233,6 +4250,8 @@ static void radv_initialize_htile(struct radv_cmd_buffer 
*cmd_buffer,
aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
 
radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);
+
+   cmd_buffer->counters.counters[RADV_NUM_HTILE_INITS]++;
 }
 
 static void radv_handle_depth_image_transition(struct radv_cmd_buffer 
*cmd_buffer,
@@ -4283,6 +4302,8 @@ static void radv_initialise_cmask(struct radv_cmd_buffer 
*cmd_buffer,
state->flush_bits |= radv_clear_cmask(cmd_buffer, image, value);
 
state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+   cmd_buffer->counters.counters[RADV_NUM_CMASK_INITS]++;
 }
 
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
@@ -4297,6 +4318,8 @@ void radv_initialize_dcc(struct radv_cmd_buffer 
*cmd_buffer,
 
state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+   cmd_buffer->counters.counters[RADV_NUM_DCC_INITS]++;
 }
 
 /**
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 70084a2b605..91f8db6f3bc 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2772,6 +2772,10 @@ VkResult radv_QueueSubmit(
can_patch = false;
 
cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
+
+  

Re: [Mesa-dev] [PATCH 1/3] radv: implement some SW counters

2018-11-22 Thread Samuel Pitoiset

Oops.

I was not planning to send this one. Please ignore.

On 11/22/18 7:10 PM, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/radv_cmd_buffer.c  | 23 +++
  src/amd/vulkan/radv_device.c  |  4 ++
  src/amd/vulkan/radv_meta.c|  2 +
  src/amd/vulkan/radv_meta_buffer.c |  6 ++
  src/amd/vulkan/radv_meta_clear.c  | 11 
  src/amd/vulkan/radv_meta_copy.c   |  6 ++
  src/amd/vulkan/radv_meta_decompress.c |  1 +
  src/amd/vulkan/radv_meta_fast_clear.c |  3 +
  src/amd/vulkan/radv_meta_resolve.c|  2 +
  src/amd/vulkan/radv_meta_resolve_cs.c |  1 +
  src/amd/vulkan/radv_meta_resolve_fs.c |  2 +
  src/amd/vulkan/radv_private.h | 51 
  src/amd/vulkan/radv_wsi.c | 87 +++
  src/amd/vulkan/si_cmd_buffer.c| 19 ++
  14 files changed, 218 insertions(+)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f13768b4ada..7367e24fd28 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -907,6 +907,8 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
cmd_buffer->state.emitted_pipeline = pipeline;
  
  	cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE;

+
+   cmd_buffer->counters.counters[RADV_NUM_EMITTED_PIPELINES]++;
  }
  
  static void

@@ -1822,6 +1824,10 @@ radv_flush_descriptors(struct radv_cmd_buffer 
*cmd_buffer,
  
  	if (unlikely(cmd_buffer->device->trace_bo))

radv_save_descriptors(cmd_buffer, bind_point);
+
+   assert(cmd_buffer->cs->cdw <= cdw_max);
+
+   cmd_buffer->counters.counters[RADV_NUM_FLUSH_DESCRIPTORS]++;
  }
  
  static void

@@ -1878,6 +1884,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
  
  	cmd_buffer->push_constant_stages &= ~stages;

assert(cmd_buffer->cs->cdw <= cdw_max);
+
+   cmd_buffer->counters.counters[RADV_NUM_FLUSH_CONSTANTS]++;
  }
  
  static void

@@ -1929,6 +1937,8 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer 
*cmd_buffer,
cmd_buffer->state.vb_va = va;
cmd_buffer->state.vb_size = count * 16;
cmd_buffer->state.prefetch_L2_mask |= 
RADV_PREFETCH_VBO_DESCRIPTORS;
+
+   cmd_buffer->counters.counters[RADV_NUM_VB_UPDATES]++;
}
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER;
  }
@@ -2463,6 +2473,7 @@ VkResult radv_BeginCommandBuffer(
}
  
  	memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));

+   memset(&cmd_buffer->counters, 0, sizeof(cmd_buffer->counters));
cmd_buffer->state.last_primitive_reset_en = -1;
cmd_buffer->state.last_index_type = -1;
cmd_buffer->state.last_num_instances = -1;
@@ -2816,6 +2827,8 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
  
  	if (unlikely(cmd_buffer->device->trace_bo))

radv_save_pipeline(cmd_buffer, pipeline, RING_COMPUTE);
+
+   cmd_buffer->counters.counters[RADV_NUM_EMITTED_PIPELINES]++;
  }
  
  static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer,

@@ -3710,6 +3723,8 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
  
  	assert(cmd_buffer->cs->cdw <= cdw_max);

radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
+
+   cmd_buffer->counters.counters[RADV_NUM_DRAW_CALLS]++;
  }
  
  void radv_CmdDraw(

@@ -4105,6 +4120,8 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
}
  
  	radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);

+
+   cmd_buffer->counters.counters[RADV_NUM_DISPATCH_CALLS]++;
  }
  
  void radv_CmdDispatchBase(

@@ -4233,6 +4250,8 @@ static void radv_initialize_htile(struct radv_cmd_buffer 
*cmd_buffer,
aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
  
  	radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);

+
+   cmd_buffer->counters.counters[RADV_NUM_HTILE_INITS]++;
  }
  
  static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,

@@ -4283,6 +4302,8 @@ static void radv_initialise_cmask(struct radv_cmd_buffer 
*cmd_buffer,
state->flush_bits |= radv_clear_cmask(cmd_buffer, image, value);
  
  	state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;

+
+   cmd_buffer->counters.counters[RADV_NUM_CMASK_INITS]++;
  }
  
  void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,

@@ -4297,6 +4318,8 @@ void radv_initialize_dcc(struct radv_cmd_buffer 
*cmd_buffer,
  
  	state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |

 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+   cmd_buffer->counters.counters[RADV_NUM_DCC_INITS]++;
  }
  
  /**

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 70084a2b605..91f8db6f3bc 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2772,6 +2772,10 @@ VkResult radv_QueueSubmit(

[Mesa-dev] [PATCH mesa] mesa: use binary search for MESA_EXTENSION_OVERRIDE

2018-11-22 Thread Eric Engestrom
Not a hot path obviously, but the table still has 425 extensions, which
you can go through in just 9 steps with a binary search.

The table is already sorted, as required by other parts of the code and
enforced by mesa's `main-test`.

Signed-off-by: Eric Engestrom 
---
 src/mesa/main/extensions.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 4d95a072793fa4c13b69..0aeda39cc22152073f52 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -48,6 +48,13 @@ static char *unrecognized_extensions = NULL;
  */
 #define o(x) offsetof(struct gl_extensions, x)
 
+static int
+extension_name_compare(const void *name, const void *elem)
+{
+   const struct mesa_extension *entry = elem;
+   return strcmp(name, entry->name);
+}
+
 /**
  * Given an extension name, lookup up the corresponding member of struct
  * gl_extensions and return that member's index.  If the name is
@@ -59,15 +66,18 @@ static char *unrecognized_extensions = NULL;
 static int
 name_to_index(const char* name)
 {
-   unsigned i;
+   const struct mesa_extension *entry;
 
-   if (name == 0)
+   if (!name)
   return -1;
 
-   for (i = 0; i < MESA_EXTENSION_COUNT; ++i) {
-  if (strcmp(name, _mesa_extension_table[i].name) == 0)
-return i;
-   }
+   entry = bsearch(name,
+   _mesa_extension_table, MESA_EXTENSION_COUNT,
+   sizeof(_mesa_extension_table[0]),
+   extension_name_compare);
+
+   if (entry)
+  return entry - _mesa_extension_table;
 
return -1;
 }
-- 
Cheers,
  Eric

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH mesa] meson: remove unused include_directories(vulkan)

2018-11-22 Thread Eric Engestrom
The correct include path is "vulkan/…".

Signed-off-by: Eric Engestrom 
---
 include/meson.build | 1 -
 src/vulkan/util/meson.build | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/meson.build b/include/meson.build
index 081c1bc00088c956d47e..389fde9fd9a5193f2594 100644
--- a/include/meson.build
+++ b/include/meson.build
@@ -19,7 +19,6 @@
 # SOFTWARE.
 
 inc_drm_uapi = include_directories('drm-uapi')
-inc_vulkan = include_directories('vulkan')
 inc_d3d9 = include_directories('D3D9')
 inc_gl_internal = include_directories('GL/internal')
 inc_haikugl = include_directories('HaikuGL')
diff --git a/src/vulkan/util/meson.build b/src/vulkan/util/meson.build
index 15e4ff491293d9d1d3db..df09dd35f85d8fff1ec1 100644
--- a/src/vulkan/util/meson.build
+++ b/src/vulkan/util/meson.build
@@ -39,7 +39,7 @@ vk_enum_to_str = custom_target(
 libvulkan_util = static_library(
   'vulkan_util',
   [files_vulkan_util, vk_enum_to_str],
-  include_directories : [inc_common, inc_vulkan],
+  include_directories : inc_common,
   c_args : [c_vis_args],
   build_by_default : false,
 )
-- 
Cheers,
  Eric

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108829] [meson] libglapi exports internal API

2018-11-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108829

Eric Engestrom  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #2 from Eric Engestrom  ---
Fixed as of:

commit 896c59d690e38e92682f9bc509b5e3658aba5670
Author: Eric Engestrom 
Date:   Thu Nov 22 13:33:28 2018 +

glapi: add missing visibility args

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108829
Fixes: 3218056e0eb375eeda470 "meson: Build i965 and dri stack"
Signed-off-by: Eric Engestrom 
Reviewed-by: Emil Velikov 

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/22] nir: add type alignment support to lower_io

2018-11-22 Thread Rob Clark
On Wed, Nov 21, 2018 at 4:53 PM Jason Ekstrand  wrote:
>
> On Tue, Nov 13, 2018 at 9:48 AM Karol Herbst  wrote:
>>
>> From: Rob Clark 
>>
>> For cl we can have structs with 8/16/32/64 bit scalar types (as well as,
>> ofc, arrays/structs/etc), which are padded according to 'C' rules.  So
>> for lowering struct deref's we need to not just consider a field's size,
>> but also it's alignment.
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/compiler/nir/nir.h  | 10 +++
>>  src/compiler/nir/nir_lower_io.c | 52 -
>>  2 files changed, 49 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index c469e111b2c..11e3d18320a 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -2825,10 +2825,20 @@ typedef enum {
>>  */
>> nir_lower_io_force_sample_interpolation = (1 << 1),
>>  } nir_lower_io_options;
>> +typedef struct nir_memory_model {
>> +   int (*type_size)(const struct glsl_type *);
>> +   int (*type_align)(const struct glsl_type *);
>> +} nir_memory_model;
>
>
> I don't really like the name "memory model".  In my mind, that implies a lot 
> more than just a scheme for laying out memory.  Maybe nir_io_layout_cb or 
> nir_io_type_size_align_cb?
>

I guess it is the part of the memory-model that lower_io needs.. maybe
nir_io_memory_model?  But I guess I'd rather leave the name the same
and when we need to add other things memory model related we just add
to that same struct and pass it wherever else it is needed.  It seems
nice to only have one struct of call backs for all things memory model
related and re-use that everywhere that memory model needs to be
abstracted rather than a different one for each lowering pass that
needs to care.

failing that nir_io_layout_cb is fine.. nir_io_type_size_align_cb
seems a bit clunky.

(my $0.02)

BR,
-R


> I made this comment to Karol on IRC but I did something similar but with a 
> different approach with glsl_get_natural_size_align.  I think I like this 
> approach better.  It's potentially a bit less efficient but it's way simpler. 
>  We should convert the constant lowering code over to it so we can be 
> consistent.
>
>>
>>  bool nir_lower_io(nir_shader *shader,
>>nir_variable_mode modes,
>>int (*type_size)(const struct glsl_type *),
>>nir_lower_io_options);
>> +// TEMP use different name to avoid fixing all the callers yet:
>> +bool nir_lower_io2(nir_shader *shader,
>> +  nir_variable_mode modes,
>> +  const nir_memory_model *mm,
>> +  nir_lower_io_options);
>> +
>>  nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
>>  nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
>>
>> diff --git a/src/compiler/nir/nir_lower_io.c 
>> b/src/compiler/nir/nir_lower_io.c
>> index 2a6c284de2b..292baf9e4fc 100644
>> --- a/src/compiler/nir/nir_lower_io.c
>> +++ b/src/compiler/nir/nir_lower_io.c
>> @@ -38,7 +38,7 @@
>>  struct lower_io_state {
>> void *dead_ctx;
>> nir_builder builder;
>> -   int (*type_size)(const struct glsl_type *type);
>> +   const nir_memory_model *mm;
>> nir_variable_mode modes;
>> nir_lower_io_options options;
>>  };
>> @@ -86,12 +86,26 @@ nir_is_per_vertex_io(const nir_variable *var, 
>> gl_shader_stage stage)
>> return false;
>>  }
>>
>> +static int
>> +default_type_align(const struct glsl_type *type)
>> +{
>> +   return 1;
>> +}
>> +
>> +static inline int
>> +align(int value, int alignment)
>> +{
>> +   return (value + alignment - 1) & ~(alignment - 1);
>> +}
>
>
> we have an ALIGN macro which should be accessible from here which does 
> exactly that.
>
>>
>> +
>>  static nir_ssa_def *
>>  get_io_offset(nir_deref_instr *deref, nir_ssa_def **vertex_index,
>>struct lower_io_state *state, unsigned *component)
>>  {
>> nir_builder *b = &state->builder;
>> -   int (*type_size)(const struct glsl_type *) = state->type_size;
>> +   int (*type_size)(const struct glsl_type *) = state->mm->type_size;
>> +   int (*type_align)(const struct glsl_type *) = state->mm->type_align ?
>> +  state->mm->type_align : default_type_align;
>> nir_deref_path path;
>> nir_deref_path_init(&path, deref, NULL);
>>
>> @@ -137,7 +151,10 @@ get_io_offset(nir_deref_instr *deref, nir_ssa_def 
>> **vertex_index,
>>
>>   unsigned field_offset = 0;
>>   for (unsigned i = 0; i < (*p)->strct.index; i++) {
>> -field_offset += type_size(glsl_get_struct_field(parent->type, 
>> i));
>> +const struct glsl_type *field_type =
>> +   glsl_get_struct_field(parent->type, i);
>> +field_offset = align(field_offset, type_align(field_type));
>> +field_offset += type_size(field_type);
>>   }
>>   offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
>>} else {
>> @@ -207,7 +224,7 @@ lower_load(nir_intrinsic_

Re: [Mesa-dev] [Mesa-announce] [ANNOUNCE] mesa 18.3.0-rc4

2018-11-22 Thread Marek Olšák
Also this one is probably a blocker:
https://patchwork.freedesktop.org/patch/262760/

Without it, the computer may run out of memory depending on the window
system.

Marek

On Thu, Nov 22, 2018 at 8:38 AM Emil Velikov 
wrote:

> The fourth release candidate for Mesa 18.3.0 is now available.
>
> As per the issue tracker [1] we have one outstanding bug blocking the
> release.
> [1] https://bugs.freedesktop.org/show_bug.cgi?id=108530
>
>
> Andrii Simiklit (1):
>   i965/batch: avoid reverting batch buffer if saved state is an empty
>
> Emil Velikov (4):
>   travis: drop unneeded x11proto-xf86vidmode-dev
>   glx: make xf86vidmode mandatory for direct rendering
>   travis: adding missing x11-xcb for meson+vulkan
>   Update version to 18.3.0-rc4
>
> Gert Wollny (1):
>   r600: clean up the GS ring buffers when the context is destroyed
>
> Lionel Landwerlin (1):
>   egl/dri: fix error value with unknown drm format
>
> Marek Olšák (1):
>   radeonsi: go back to using bottom-of-pipe for beginning of
> TIME_ELAPSED
>
> Nicolai Hähnle (1):
>   radeonsi: fix an out-of-bounds read reported by ASAN
>
> git tag: mesa-18.3.0-rc4
>
> https://mesa.freedesktop.org/archive/mesa-18.3.0-rc4.tar.gz
> MD5:  5401533e9fec6c65bdbcae9d0e064091  mesa-18.3.0-rc4.tar.gz
> SHA1: 3b58f9917cdc102f5dfc161d452234ffa254a930  mesa-18.3.0-rc4.tar.gz
> SHA256: f674aae203840d27e84cbf523384036d45c415d766a6e771aac93a2c71ce4c17
> mesa-18.3.0-rc4.tar.gz
> SHA512:
> 26965ba394e9691d76acf87e378ccc461752fcc840a11dd2b15109a145c704bdb5a959a91617397275bc95146e070d69cce00e9490c61c99ef3a431559ef56c3
> mesa-18.3.0-rc4.tar.gz
> PGP:  https://mesa.freedesktop.org/archive/mesa-18.3.0-rc4.tar.gz.sig
>
> https://mesa.freedesktop.org/archive/mesa-18.3.0-rc4.tar.xz
> MD5:  04083daf8985d3c95c061d9fda4f8d2e  mesa-18.3.0-rc4.tar.xz
> SHA1: 5bf144ba0b5cad53c85a3b0cdf4d40bddb720c75  mesa-18.3.0-rc4.tar.xz
> SHA256: a3fb029886dc9c12b888fafc76642fa503df0005b41523ba745c68d4fd9b9c39
> mesa-18.3.0-rc4.tar.xz
> SHA512:
> ee1adf9f8237a4f72a8b44ba9aeeb58fad041240e98ab71da6203f2a634f4b9dcd2758b4b5c592104cd51e431acb5292e79d8dd5a61a21c18ae7f93a9d98af41
> mesa-18.3.0-rc4.tar.xz
> PGP:  https://mesa.freedesktop.org/archive/mesa-18.3.0-rc4.tar.xz.sig
>
> ___
> mesa-announce mailing list
> mesa-annou...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-announce
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 3/5] amd/nir: Implement the new ordered/unordered comparisons

2018-11-22 Thread Jason Ekstrand
They map directly to LLVM opcodes so it's really easy.
---
 src/amd/common/ac_nir_to_llvm.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index c82c2ab548d..97a553505bc 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -708,8 +708,8 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
case nir_op_feq:
result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
break;
-   case nir_op_fneu:
-   result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
+   case nir_op_fne:
+   result = emit_float_cmp(&ctx->ac, LLVMRealONE, src[0], src[1]);
break;
case nir_op_flt:
result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
@@ -717,6 +717,18 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
case nir_op_fge:
result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
break;
+   case nir_op_fequ:
+   result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0], src[1]);
+   break;
+   case nir_op_fneu:
+   result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
+   break;
+   case nir_op_fltu:
+   result = emit_float_cmp(&ctx->ac, LLVMRealULT, src[0], src[1]);
+   break;
+   case nir_op_fgeu:
+   result = emit_float_cmp(&ctx->ac, LLVMRealUGE, src[0], src[1]);
+   break;
case nir_op_fabs:
result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
  ac_to_float_type(&ctx->ac, 
def_type), src[0]);
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 2/5] nir: Add unordered comparisons and ordered fne

2018-11-22 Thread Jason Ekstrand
---
 src/compiler/nir/nir.h|  8 
 src/compiler/nir/nir_loop_analyze.c   | 12 
 src/compiler/nir/nir_opcodes.py   |  4 
 src/compiler/nir/nir_opt_algebraic.py |  5 +
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4271f237235..20ff9a87297 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1561,6 +1561,10 @@ nir_alu_instr_is_comparison(const nir_alu_instr *instr)
case nir_op_flt:
case nir_op_fge:
case nir_op_feq:
+   case nir_op_fne:
+   case nir_op_fltu:
+   case nir_op_fgeu:
+   case nir_op_fequ:
case nir_op_fneu:
case nir_op_ilt:
case nir_op_ult:
@@ -2132,6 +2136,10 @@ typedef struct nir_shader_compiler_options {
 
bool lower_ldexp;
 
+   bool lower_fltu;
+   bool lower_fgeu;
+   bool lower_fne_to_fequ;
+
bool lower_pack_half_2x16;
bool lower_pack_unorm_2x16;
bool lower_pack_snorm_2x16;
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index d73314a8a44..d6cba541a10 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -376,6 +376,10 @@ get_iteration(nir_op cond_op, nir_const_value *initial, 
nir_const_value *step,
case nir_op_fge:
case nir_op_flt:
case nir_op_feq:
+   case nir_op_fne:
+   case nir_op_fgeu:
+   case nir_op_fltu:
+   case nir_op_fequ:
case nir_op_fneu: {
   float initial_val = initial->f32[0];
   float span = limit->f32[0] - initial_val;
@@ -547,10 +551,10 @@ find_trip_count(loop_info_state *state)
   bool limit_rhs = true;
 
   switch (alu->op) {
-  case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
-  case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
-  case nir_op_feq:  case nir_op_ieq:
-  case nir_op_fneu: case nir_op_ine:
+  case nir_op_fgeu: case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
+  case nir_op_fltu: case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
+  case nir_op_fequ: case nir_op_feq:  case nir_op_ieq:
+  case nir_op_fneu: case nir_op_fne:  case nir_op_ine:
 
  /* We assume that the limit is the "right" operand */
  basic_ind = get_loop_var(alu->src[0].src.ssa, state);
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 2375309aca6..032168bae49 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -491,6 +491,10 @@ binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / 
src1)")
 binop_compare("flt", tfloat, "", "src0 < src1")
 binop_compare("fge", tfloat, "", "src0 >= src1")
 binop_compare("feq", tfloat, commutative, "src0 == src1")
+binop_compare("fne", tfloat, commutative, "!isnan(src0) && !isnan(src1) && 
src0 == src1")
+binop_compare("fltu", tfloat, "", "!(src0 >= src1)")
+binop_compare("fgeu", tfloat, "", "!(src0 < src1)")
+binop_compare("fequ", tfloat, commutative, "isnan(src0) || isnan(src1) || src0 
== src1")
 binop_compare("fneu", tfloat, commutative, "src0 != src1")
 binop_compare("ilt", tint, "", "src0 < src1")
 binop_compare("ige", tint, "", "src0 >= src1")
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 131d2721934..64327708a66 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -164,6 +164,11 @@ optimizations = [
(('inot', ('ieq', a, b)), ('ine', a, b)),
(('inot', ('ine', a, b)), ('ieq', a, b)),
 
+   # Comparison lowering
+   (('fltu', a, b), ('inot', ('fge', a, b)), 'options->lower_fltu'),
+   (('fgeu', a, b), ('inot', ('flt', a, b)), 'options->lower_fgeu'),
+   (('fne', a, b), ('inot', ('fequ', a, b)), 'options->lower_fne_to_fequ'),
+
# 0.0 >= b2f(a)
# b2f(a) <= 0.0
# b2f(a) == 0.0 because b2f(a) can only be 0 or 1
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 5/5] spirv: Emit the correct ordered/unordered NIR opcodes directly

2018-11-22 Thread Jason Ekstrand
---
 src/compiler/spirv/vtn_alu.c | 61 
 1 file changed, 6 insertions(+), 55 deletions(-)

diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c
index cff8c76b2a3..0dff64e66f0 100644
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -265,26 +265,26 @@ vtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b,
 * ordered.
 */
case SpvOpFOrdEqual:return nir_op_feq;
-   case SpvOpFUnordEqual:  return nir_op_feq;
+   case SpvOpFUnordEqual:  return nir_op_fequ;
case SpvOpINotEqual:return nir_op_ine;
-   case SpvOpFOrdNotEqual: return nir_op_fneu;
+   case SpvOpFOrdNotEqual: return nir_op_fne;
case SpvOpFUnordNotEqual:   return nir_op_fneu;
case SpvOpULessThan:return nir_op_ult;
case SpvOpSLessThan:return nir_op_ilt;
case SpvOpFOrdLessThan: return nir_op_flt;
-   case SpvOpFUnordLessThan:   return nir_op_flt;
+   case SpvOpFUnordLessThan:   return nir_op_fltu;
case SpvOpUGreaterThan:  *swap = true;  return nir_op_ult;
case SpvOpSGreaterThan:  *swap = true;  return nir_op_ilt;
case SpvOpFOrdGreaterThan:   *swap = true;  return nir_op_flt;
-   case SpvOpFUnordGreaterThan: *swap = true;  return nir_op_flt;
+   case SpvOpFUnordGreaterThan: *swap = true;  return nir_op_fltu;
case SpvOpULessThanEqual:*swap = true;  return nir_op_uge;
case SpvOpSLessThanEqual:*swap = true;  return nir_op_ige;
case SpvOpFOrdLessThanEqual: *swap = true;  return nir_op_fge;
-   case SpvOpFUnordLessThanEqual:   *swap = true;  return nir_op_fge;
+   case SpvOpFUnordLessThanEqual:   *swap = true;  return nir_op_fgeu;
case SpvOpUGreaterThanEqual:return nir_op_uge;
case SpvOpSGreaterThanEqual:return nir_op_ige;
case SpvOpFOrdGreaterThanEqual: return nir_op_fge;
-   case SpvOpFUnordGreaterThanEqual:   return nir_op_fge;
+   case SpvOpFUnordGreaterThanEqual:   return nir_op_fgeu;
 
/* Conversions: */
case SpvOpQuantizeToF16: return nir_op_fquantize2f16;
@@ -508,55 +508,6 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
   break;
}
 
-   case SpvOpFUnordEqual:
-   case SpvOpFUnordNotEqual:
-   case SpvOpFUnordLessThan:
-   case SpvOpFUnordGreaterThan:
-   case SpvOpFUnordLessThanEqual:
-   case SpvOpFUnordGreaterThanEqual: {
-  bool swap;
-  unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type);
-  unsigned dst_bit_size = glsl_get_bit_size(type);
-  nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
-  src_bit_size, dst_bit_size);
-
-  if (swap) {
- nir_ssa_def *tmp = src[0];
- src[0] = src[1];
- src[1] = tmp;
-  }
-
-  val->ssa->def =
- nir_ior(&b->nb,
- nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL),
- nir_ior(&b->nb,
- nir_fneu(&b->nb, src[0], src[0]),
- nir_fneu(&b->nb, src[1], src[1])));
-  break;
-   }
-
-   case SpvOpFOrdNotEqual: {
-  /* For all the SpvOpFOrd* comparisons apart from NotEqual, the value
-   * from the ALU will probably already be false if the operands are not
-   * ordered so we don’t need to handle it specially.
-   */
-  bool swap;
-  unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type);
-  unsigned dst_bit_size = glsl_get_bit_size(type);
-  nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap,
-  src_bit_size, dst_bit_size);
-
-  assert(!swap);
-
-  val->ssa->def =
- nir_iand(&b->nb,
-  nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL),
-  nir_iand(&b->nb,
-  nir_feq(&b->nb, src[0], src[0]),
-  nir_feq(&b->nb, src[1], src[1])));
-  break;
-   }
-
case SpvOpBitcast:
   /* From the definition of OpBitcast in the SPIR-V 1.2 spec:
*
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 4/5] intel/compiler: Implement unordered comparisons

2018-11-22 Thread Jason Ekstrand
The vec4 path has only been compile-tested as there's no easy way to
generate a vec4 shader with an unordered equality.
---
 src/intel/compiler/brw_compiler.c   |  3 +++
 src/intel/compiler/brw_fs_nir.cpp   | 20 +---
 src/intel/compiler/brw_vec4_nir.cpp | 21 +
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.c 
b/src/intel/compiler/brw_compiler.c
index fe632c5badc..f9e8fa09a34 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -42,6 +42,9 @@
.lower_fdiv = true,\
.lower_flrp64 = true,  \
.lower_ldexp = true,   \
+   .lower_fltu = true,\
+   .lower_fgeu = true,\
+   .lower_fne_to_fequ = true, \
.lower_cs_local_id_from_index = true,  \
.lower_device_index_to_zero = true,\
.native_integers = true,   \
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index a62d521bb5d..eba3611e447 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1049,6 +1049,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
nir_alu_instr *instr)
case nir_op_flt:
case nir_op_fge:
case nir_op_feq:
+   case nir_op_fne:
+   case nir_op_fequ:
case nir_op_fneu: {
   fs_reg dest = result;
 
@@ -1056,26 +1058,30 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
nir_alu_instr *instr)
   if (bit_size != 32)
  dest = bld.vgrf(op[0].type, 1);
 
-  brw_conditional_mod cond;
   switch (instr->op) {
   case nir_op_flt:
- cond = BRW_CONDITIONAL_L;
+ bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_L);
  break;
   case nir_op_fge:
- cond = BRW_CONDITIONAL_GE;
+ bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_GE);
  break;
   case nir_op_feq:
- cond = BRW_CONDITIONAL_Z;
+ bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_Z);
+ break;
+  case nir_op_fequ:
+ bld.CMP(dest, op[0], op[0], BRW_CONDITIONAL_NZ);
+ set_predicate_inv(BRW_PREDICATE_NORMAL, true, /* inverse */
+   bld.CMP(dest, op[1], op[1], BRW_CONDITIONAL_NZ));
+ set_predicate_inv(BRW_PREDICATE_NORMAL, true, /* inverse */
+   bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_Z));
  break;
   case nir_op_fneu:
- cond = BRW_CONDITIONAL_NZ;
+ bld.CMP(dest, op[0], op[1], BRW_CONDITIONAL_NZ);
  break;
   default:
  unreachable("bad opcode");
   }
 
-  bld.CMP(dest, op[0], op[1], cond);
-
   if (bit_size > 32) {
  bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0));
   } else if(bit_size < 32) {
diff --git a/src/intel/compiler/brw_vec4_nir.cpp 
b/src/intel/compiler/brw_vec4_nir.cpp
index f7f46f5034c..32559e1aade 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -1366,6 +1366,27 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
   break;
}
 
+   case nir_op_fequ: {
+  dst_reg cmp_res = dst;
+  if (nir_src_bit_size(instr->src[0].src) == 64)
+ cmp_res = dst_reg(this, glsl_type::dvec4_type);
+
+  vec4_instruction *inst;
+  inst = emit(CMP(cmp_res, op[0], op[0], BRW_CONDITIONAL_NZ));
+  inst = emit(CMP(cmp_res, op[1], op[1], BRW_CONDITIONAL_NZ));
+  inst->predicate = BRW_PREDICATE_NORMAL;
+  inst->predicate_inverse = true;
+  inst = emit(CMP(cmp_res, op[0], op[1], BRW_CONDITIONAL_Z));
+  inst->predicate = BRW_PREDICATE_NORMAL;
+  inst->predicate_inverse = true;
+
+  if (nir_src_bit_size(instr->src[0].src) == 64) {
+ dst_reg cmp_res32 = dst_reg(this, glsl_type::bvec4_type);
+ emit(VEC4_OPCODE_PICK_LOW_32BIT, cmp_res32, src_reg(cmp_res));
+ emit(MOV(dst, src_reg(cmp_res32)));
+  }
+   }
+
case nir_op_ball_iequal2:
case nir_op_ball_iequal3:
case nir_op_ball_iequal4:
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC 1/5] nir: Rename nir_op_fne to nir_op_fneu

2018-11-22 Thread Jason Ekstrand
This way, it's explicit in the opcode name that it's an unordered
comparison.
---
 src/amd/common/ac_nir_to_llvm.c   |  2 +-
 src/compiler/glsl/glsl_to_nir.cpp |  4 +-
 src/compiler/nir/nir.h|  2 +-
 src/compiler/nir/nir_builder.h|  2 +-
 src/compiler/nir/nir_loop_analyze.c   |  4 +-
 src/compiler/nir/nir_lower_alu_to_scalar.c|  2 +-
 src/compiler/nir/nir_lower_double_ops.c   |  6 +--
 src/compiler/nir/nir_opcodes.py   |  2 +-
 src/compiler/nir/nir_opt_algebraic.py | 46 +--
 src/compiler/spirv/vtn_alu.c  | 10 ++--
 src/compiler/spirv/vtn_glsl450.c  |  4 +-
 src/gallium/auxiliary/nir/tgsi_to_nir.c   |  4 +-
 .../drivers/freedreno/ir3/ir3_compiler_nir.c  |  2 +-
 src/gallium/drivers/vc4/vc4_program.c |  4 +-
 src/intel/compiler/brw_fs_nir.cpp |  4 +-
 src/intel/compiler/brw_vec4_nir.cpp   |  4 +-
 16 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index a109f5a8156..c82c2ab548d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -708,7 +708,7 @@ static void visit_alu(struct ac_nir_context *ctx, const 
nir_alu_instr *instr)
case nir_op_feq:
result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
break;
-   case nir_op_fne:
+   case nir_op_fneu:
result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
break;
case nir_op_flt:
diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 9f73b721e39..fd6e0cc48ad 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1835,7 +1835,7 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_nequal:
   if (supports_ints) {
  if (type_is_float(types[0]))
-result = nir_fne(&b, srcs[0], srcs[1]);
+result = nir_fneu(&b, srcs[0], srcs[1]);
  else
 result = nir_ine(&b, srcs[0], srcs[1]);
   } else {
@@ -1878,7 +1878,7 @@ nir_visitor::visit(ir_expression *ir)
   if (supports_ints) {
  if (type_is_float(types[0])) {
 switch (ir->operands[0]->type->vector_elements) {
-   case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
+   case 1: result = nir_fneu(&b, srcs[0], srcs[1]); break;
case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a292ec73e1e..4271f237235 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1561,7 +1561,7 @@ nir_alu_instr_is_comparison(const nir_alu_instr *instr)
case nir_op_flt:
case nir_op_fge:
case nir_op_feq:
-   case nir_op_fne:
+   case nir_op_fneu:
case nir_op_ilt:
case nir_op_ult:
case nir_op_ige:
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index 30fa1d7ec8b..d520526e779 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -993,7 +993,7 @@ nir_compare_func(nir_builder *b, enum compare_func func,
case COMPARE_FUNC_EQUAL:
   return nir_feq(b, src0, src1);
case COMPARE_FUNC_NOTEQUAL:
-  return nir_fne(b, src0, src1);
+  return nir_fneu(b, src0, src1);
case COMPARE_FUNC_GREATER:
   return nir_flt(b, src1, src0);
case COMPARE_FUNC_GEQUAL:
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c
index 9c3fd2f286f..d73314a8a44 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -376,7 +376,7 @@ get_iteration(nir_op cond_op, nir_const_value *initial, 
nir_const_value *step,
case nir_op_fge:
case nir_op_flt:
case nir_op_feq:
-   case nir_op_fne: {
+   case nir_op_fneu: {
   float initial_val = initial->f32[0];
   float span = limit->f32[0] - initial_val;
   iter = span / step->f32[0];
@@ -550,7 +550,7 @@ find_trip_count(loop_info_state *state)
   case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
   case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
   case nir_op_feq:  case nir_op_ieq:
-  case nir_op_fne:  case nir_op_ine:
+  case nir_op_fneu: case nir_op_ine:
 
  /* We assume that the limit is the "right" operand */
  basic_ind = get_loop_var(alu->src[0].src.ssa, state);
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c 
b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 7ef032cd164..ef580fb3821 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -200,7 +200,7 @@ lower_alu_instr_scalar(ni

Re: [Mesa-dev] [PATCH 1/2] i965: Explicitely handle swizzles for MESA_FORMAT_R_SRGB8

2018-11-22 Thread Emil Velikov
Hi Gert,

On 2018/11/22, Gert Wollny wrote:
> The format is emulated by using ISL_FORMAT_L8_SRGB, therefore we need to
> force swizzles for the GBA channels. However, doing this only based on the
> data type GL_RED breaks other formats, therefore, test specifically for the
> format.
> 
> Fixes: 5363869d4971780401b21bb75083ef2518c12be
>   965: Force zero swizzles for unused components in GL_RED and GL_RG
First letter of each sha and commit summary are missing.
Using a git alias should make things a bit easier.

git config --global alias.fixes '!git show $1 -s --pretty="format:%h 
(\"%s\")%n"'

HTH
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 2/5] nir: Add unordered comparisons and ordered fne

2018-11-22 Thread Bas Nieuwenhuizen
On Thu, Nov 22, 2018 at 7:47 PM Jason Ekstrand  wrote:
>
> ---
>  src/compiler/nir/nir.h|  8 
>  src/compiler/nir/nir_loop_analyze.c   | 12 
>  src/compiler/nir/nir_opcodes.py   |  4 
>  src/compiler/nir/nir_opt_algebraic.py |  5 +
>  4 files changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 4271f237235..20ff9a87297 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1561,6 +1561,10 @@ nir_alu_instr_is_comparison(const nir_alu_instr *instr)
> case nir_op_flt:
> case nir_op_fge:
> case nir_op_feq:
> +   case nir_op_fne:
> +   case nir_op_fltu:
> +   case nir_op_fgeu:
> +   case nir_op_fequ:
> case nir_op_fneu:
> case nir_op_ilt:
> case nir_op_ult:
> @@ -2132,6 +2136,10 @@ typedef struct nir_shader_compiler_options {
>
> bool lower_ldexp;
>
> +   bool lower_fltu;
> +   bool lower_fgeu;
> +   bool lower_fne_to_fequ;
> +
> bool lower_pack_half_2x16;
> bool lower_pack_unorm_2x16;
> bool lower_pack_snorm_2x16;
> diff --git a/src/compiler/nir/nir_loop_analyze.c 
> b/src/compiler/nir/nir_loop_analyze.c
> index d73314a8a44..d6cba541a10 100644
> --- a/src/compiler/nir/nir_loop_analyze.c
> +++ b/src/compiler/nir/nir_loop_analyze.c
> @@ -376,6 +376,10 @@ get_iteration(nir_op cond_op, nir_const_value *initial, 
> nir_const_value *step,
> case nir_op_fge:
> case nir_op_flt:
> case nir_op_feq:
> +   case nir_op_fne:
> +   case nir_op_fgeu:
> +   case nir_op_fltu:
> +   case nir_op_fequ:
> case nir_op_fneu: {
>float initial_val = initial->f32[0];
>float span = limit->f32[0] - initial_val;
> @@ -547,10 +551,10 @@ find_trip_count(loop_info_state *state)
>bool limit_rhs = true;
>
>switch (alu->op) {
> -  case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
> -  case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
> -  case nir_op_feq:  case nir_op_ieq:
> -  case nir_op_fneu: case nir_op_ine:
> +  case nir_op_fgeu: case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
> +  case nir_op_fltu: case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
> +  case nir_op_fequ: case nir_op_feq:  case nir_op_ieq:
> +  case nir_op_fneu: case nir_op_fne:  case nir_op_ine:
>
>   /* We assume that the limit is the "right" operand */
>   basic_ind = get_loop_var(alu->src[0].src.ssa, state);
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
> index 2375309aca6..032168bae49 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -491,6 +491,10 @@ binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / 
> src1)")
>  binop_compare("flt", tfloat, "", "src0 < src1")
>  binop_compare("fge", tfloat, "", "src0 >= src1")
>  binop_compare("feq", tfloat, commutative, "src0 == src1")
> +binop_compare("fne", tfloat, commutative, "!isnan(src0) && !isnan(src1) && 
> src0 == src1")
> +binop_compare("fltu", tfloat, "", "!(src0 >= src1)")
> +binop_compare("fgeu", tfloat, "", "!(src0 < src1)")
> +binop_compare("fequ", tfloat, commutative, "isnan(src0) || isnan(src1) || 
> src0 == src1")

Please add some comment around this that the ones without suffix are
ordered and the one with u suffix are unordered.

Otherwise

Reviewed-by: Bas Nieuwenhuizen   binop_compare("fneu", tfloat, commutative, "src0 != src1")
>  binop_compare("ilt", tint, "", "src0 < src1")
>  binop_compare("ige", tint, "", "src0 >= src1")
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index 131d2721934..64327708a66 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -164,6 +164,11 @@ optimizations = [
> (('inot', ('ieq', a, b)), ('ine', a, b)),
> (('inot', ('ine', a, b)), ('ieq', a, b)),
>
> +   # Comparison lowering
> +   (('fltu', a, b), ('inot', ('fge', a, b)), 'options->lower_fltu'),
> +   (('fgeu', a, b), ('inot', ('flt', a, b)), 'options->lower_fgeu'),
> +   (('fne', a, b), ('inot', ('fequ', a, b)), 'options->lower_fne_to_fequ'),
> +
> # 0.0 >= b2f(a)
> # b2f(a) <= 0.0
> # b2f(a) == 0.0 because b2f(a) can only be 0 or 1
> --
> 2.19.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 2/5] nir: Add unordered comparisons and ordered fne

2018-11-22 Thread Jason Ekstrand
On November 22, 2018 13:48:48 Bas Nieuwenhuizen  
wrote:



On Thu, Nov 22, 2018 at 7:47 PM Jason Ekstrand  wrote:



---
src/compiler/nir/nir.h|  8 
src/compiler/nir/nir_loop_analyze.c   | 12 
src/compiler/nir/nir_opcodes.py   |  4 
src/compiler/nir/nir_opt_algebraic.py |  5 +
4 files changed, 25 insertions(+), 4 deletions(-)


diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 4271f237235..20ff9a87297 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1561,6 +1561,10 @@ nir_alu_instr_is_comparison(const nir_alu_instr *instr)
case nir_op_flt:
case nir_op_fge:
case nir_op_feq:
+   case nir_op_fne:
+   case nir_op_fltu:
+   case nir_op_fgeu:
+   case nir_op_fequ:
case nir_op_fneu:
case nir_op_ilt:
case nir_op_ult:
@@ -2132,6 +2136,10 @@ typedef struct nir_shader_compiler_options {


bool lower_ldexp;


+   bool lower_fltu;
+   bool lower_fgeu;
+   bool lower_fne_to_fequ;
+
bool lower_pack_half_2x16;
bool lower_pack_unorm_2x16;
bool lower_pack_snorm_2x16;
diff --git a/src/compiler/nir/nir_loop_analyze.c 
b/src/compiler/nir/nir_loop_analyze.c

index d73314a8a44..d6cba541a10 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -376,6 +376,10 @@ get_iteration(nir_op cond_op, nir_const_value 
*initial, nir_const_value *step,

case nir_op_fge:
case nir_op_flt:
case nir_op_feq:
+   case nir_op_fne:
+   case nir_op_fgeu:
+   case nir_op_fltu:
+   case nir_op_fequ:
case nir_op_fneu: {
float initial_val = initial->f32[0];
float span = limit->f32[0] - initial_val;
@@ -547,10 +551,10 @@ find_trip_count(loop_info_state *state)
bool limit_rhs = true;


switch (alu->op) {
-  case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
-  case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
-  case nir_op_feq:  case nir_op_ieq:
-  case nir_op_fneu: case nir_op_ine:
+  case nir_op_fgeu: case nir_op_fge:  case nir_op_ige:  case nir_op_uge:
+  case nir_op_fltu: case nir_op_flt:  case nir_op_ilt:  case nir_op_ult:
+  case nir_op_fequ: case nir_op_feq:  case nir_op_ieq:
+  case nir_op_fneu: case nir_op_fne:  case nir_op_ine:


 /* We assume that the limit is the "right" operand */
 basic_ind = get_loop_var(alu->src[0].src.ssa, state);
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 2375309aca6..032168bae49 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -491,6 +491,10 @@ binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / 
src1)")

binop_compare("flt", tfloat, "", "src0 < src1")
binop_compare("fge", tfloat, "", "src0 >= src1")
binop_compare("feq", tfloat, commutative, "src0 == src1")
+binop_compare("fne", tfloat, commutative, "!isnan(src0) && !isnan(src1) && 
src0 == src1")

+binop_compare("fltu", tfloat, "", "!(src0 >= src1)")
+binop_compare("fgeu", tfloat, "", "!(src0 < src1)")
+binop_compare("fequ", tfloat, commutative, "isnan(src0) || isnan(src1) || 
src0 == src1")


Please add some comment around this that the ones without suffix are
ordered and the one with u suffix are unordered.


Yeah, I can do that.


Otherwise


Reviewed-by: Bas Nieuwenhuizen 

Thanks!




for 1-3, 5

binop_compare("fneu", tfloat, commutative, "src0 != src1")
binop_compare("ilt", tint, "", "src0 < src1")
binop_compare("ige", tint, "", "src0 >= src1")
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py

index 131d2721934..64327708a66 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -164,6 +164,11 @@ optimizations = [
(('inot', ('ieq', a, b)), ('ine', a, b)),
(('inot', ('ine', a, b)), ('ieq', a, b)),


+   # Comparison lowering
+   (('fltu', a, b), ('inot', ('fge', a, b)), 'options->lower_fltu'),
+   (('fgeu', a, b), ('inot', ('flt', a, b)), 'options->lower_fgeu'),
+   (('fne', a, b), ('inot', ('fequ', a, b)), 'options->lower_fne_to_fequ'),
+
# 0.0 >= b2f(a)
# b2f(a) <= 0.0
# b2f(a) == 0.0 because b2f(a) can only be 0 or 1
--
2.19.1


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] meson: Bump version to 0.46 for python module

2018-11-22 Thread Matt Turner
On Wed, Nov 21, 2018 at 10:48 AM Matt Turner  wrote:
>
> Thanks Arfrever and Dylan.
>
> Acked-by: Matt Turner 

Hmm, actually this doesn't seem to work for me. With it applied I get:

src/mesa/drivers/dri/meson.build:59:8: ERROR:  Python object does not
have method path.

which comes from:

  meson.add_install_script(
prog_python.path(),
join_paths(meson.source_root(), 'bin/install_megadrivers.py'),
libmesa_dri_drivers.full_path(),
dri_drivers_path,
dri_link,
  )
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108841] [RADV] SPIRV's control flow attributes do not propagate to LLVM

2018-11-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108841

Bug ID: 108841
   Summary: [RADV] SPIRV's control flow attributes do not
propagate to LLVM
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: godl...@ancient-ritual.com
QA Contact: mesa-dev@lists.freedesktop.org

Hi,

SPIR-V already supports control flow attributes (see SpvLoopControlMask and
SpvSelectionControlMask).

There is the GL_EXT_control_flow_attributes extension that exposes those in
GLSL and glslang already implements it. glslang's also implements them in its
HLSL backend.

When I use those attributes in a project of mine I observe some nice reduction
in VGPR usage on AMDVLK. In RADV though I see no difference.

I greped in mesa for those parameters and I didn't find any code using them.

Is it possible to add support?

Apart from my selfish reasons for asking this, I'd like to mention that DX11
games running through wine/proton/DXVK layers would probably make use of them
since HLSL supported those for quite some time already.

Thanks.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radv: add a predicate for reflecting DCC decompression state

2018-11-22 Thread Bas Nieuwenhuizen
So to complicate things, drawing with DCC enabled also compresses
things again. (maybe set it on framebuffer CB emit time?)

On Thu, Nov 22, 2018 at 1:47 PM Samuel Pitoiset
 wrote:
>
> It's somehow similar to the FCE predicate.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  | 23 +++
>  src/amd/vulkan/radv_image.c   |  3 ++-
>  src/amd/vulkan/radv_meta_clear.c  |  3 +++
>  src/amd/vulkan/radv_meta_fast_clear.c |  7 +++
>  src/amd/vulkan/radv_private.h |  4 
>  5 files changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index f13768b4ada..50216b0ffdf 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1363,6 +1363,29 @@ radv_update_fce_metadata(struct radv_cmd_buffer 
> *cmd_buffer,
> radeon_emit(cmd_buffer->cs, pred_val >> 32);
>  }
>
> +/**
> + * Update the DCC predicate to reflect the compression state.
> + */
> +void
> +radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
> +struct radv_image *image, bool value)
> +{
> +   uint64_t pred_val = value;
> +   uint64_t va = radv_buffer_get_va(image->bo);
> +   va += image->offset + image->dcc_pred_offset;
> +
> +   assert(radv_image_has_dcc(image));
> +
> +   radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
> +   radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
> +   S_370_WR_CONFIRM(1) |
> +   S_370_ENGINE_SEL(V_370_PFP));
> +   radeon_emit(cmd_buffer->cs, va);
> +   radeon_emit(cmd_buffer->cs, va >> 32);
> +   radeon_emit(cmd_buffer->cs, pred_val);
> +   radeon_emit(cmd_buffer->cs, pred_val >> 32);
> +}
> +
>  /**
>   * Update the fast clear color values if the image is bound as a color 
> buffer.
>   */
> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
> index 7492bf48b51..ebde5519d29 100644
> --- a/src/amd/vulkan/radv_image.c
> +++ b/src/amd/vulkan/radv_image.c
> @@ -858,7 +858,8 @@ radv_image_alloc_dcc(struct radv_image *image)
> /* + 16 for storing the clear values + dcc pred */
> image->clear_value_offset = image->dcc_offset + 
> image->surface.dcc_size;
> image->fce_pred_offset = image->clear_value_offset + 8;
> -   image->size = image->dcc_offset + image->surface.dcc_size + 16;
> +   image->dcc_pred_offset = image->clear_value_offset + 16;
> +   image->size = image->clear_value_offset + 24;
> image->alignment = MAX2(image->alignment, 
> image->surface.dcc_alignment);
>  }
>
> diff --git a/src/amd/vulkan/radv_meta_clear.c 
> b/src/amd/vulkan/radv_meta_clear.c
> index bf88d3a84d9..787f9f178fa 100644
> --- a/src/amd/vulkan/radv_meta_clear.c
> +++ b/src/amd/vulkan/radv_meta_clear.c
> @@ -1248,6 +1248,9 @@ uint32_t
>  radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
>struct radv_image *image, uint32_t value)
>  {
> +   /* Mark the image as being compressed. */
> +   radv_update_dcc_metadata(cmd_buffer, image, true);
> +
> return radv_fill_buffer(cmd_buffer, image->bo,
> image->offset + image->dcc_offset,
> image->surface.dcc_size, value);
> diff --git a/src/amd/vulkan/radv_meta_fast_clear.c 
> b/src/amd/vulkan/radv_meta_fast_clear.c
> index a54b78c2a6c..ce088d7332b 100644
> --- a/src/amd/vulkan/radv_meta_fast_clear.c
> +++ b/src/amd/vulkan/radv_meta_fast_clear.c
> @@ -715,6 +715,10 @@ radv_emit_color_decompress(struct radv_cmd_buffer 
> *cmd_buffer,
>  * FMASK and DCC also imply a fast-clear eliminate.
>  */
> radv_update_fce_metadata(cmd_buffer, image, false);
> +
> +   /* Mark the image as being decompressed. */
> +   if (decompress_dcc)
> +   radv_update_dcc_metadata(cmd_buffer, image, false);
> }
>
> radv_meta_restore(&saved_state, cmd_buffer);
> @@ -811,6 +815,9 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer 
> *cmd_buffer,
>
> radv_unaligned_dispatch(cmd_buffer, image->info.width, 
> image->info.height, 1);
>
> +   /* Mark this image as actually being decompressed. */
> +   radv_update_dcc_metadata(cmd_buffer, image, false);
> +
> /* The fill buffer below does its own saving */
> radv_meta_restore(&saved_state, cmd_buffer);
>
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index ac756f2c247..e67f3ac1cd1 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1206,6 +1206,9 @@ void radv_update_color_clear_metadata(struct 
> radv_cmd_buffer *cmd_buffer,
>  void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
>   struct radv_image *image, bool value);
>
> +void radv_update_dcc_me

[Mesa-dev] [PATCH] nir: propagate known constant values into the if-then branch

2018-11-22 Thread Timothy Arceri
shader-db results radeonsi (VEGA):

Totals from affected shaders:
SGPRS: 7272 -> 7488 (2.97 %)
VGPRS: 4168 -> 4160 (-0.19 %)
Spilled SGPRs: 328 -> 327 (-0.30 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 268212 -> 268656 (0.17 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 452 -> 452 (0.00 %)
Wait states: 0 -> 0 (0.00 %)

vkpipeline-db results RADV (VEGA):

Totals from affected shaders:
SGPRS: 160 -> 160 (0.00 %)
VGPRS: 88 -> 88 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 18268 -> 18152 (-0.63 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 26 -> 26 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
 src/compiler/nir/nir_opt_if.c | 60 +++
 1 file changed, 60 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 8a971c43f2..b1b99b4e40 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -574,6 +574,65 @@ opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif)
return progress;
 }
 
+/* Perform optimisations based on the values we can derive from the evaluation
+ * of if-statement conditions.
+ */
+static bool
+opt_for_known_values(nir_builder *b, nir_if *nif)
+{
+   bool progress = false;
+
+   assert(nif->condition.is_ssa);
+   nir_ssa_def *if_cond = nif->condition.ssa;
+
+   if (if_cond->parent_instr->type != nir_instr_type_alu)
+  return false;
+
+   nir_alu_instr *alu = nir_instr_as_alu(if_cond->parent_instr);
+   switch (alu->op) {
+   case nir_op_feq:
+   case nir_op_ieq: {
+  nir_load_const_instr *load_const = NULL;
+  nir_ssa_def *unknown_val = NULL;
+
+  nir_ssa_def *src0 = alu->src[0].src.ssa;
+  nir_ssa_def *src1 = alu->src[1].src.ssa;
+  if (src0a->parent_instr->type == nir_instr_type_load_const) {
+ load_const = nir_instr_as_load_const(src0->parent_instr);
+ unknown_val = src1;
+  } else if (src1->parent_instr->type == nir_instr_type_load_const) {
+ load_const = nir_instr_as_load_const(src1->parent_instr);
+ unknown_val = src0;
+  }
+
+  if (!load_const)
+return false;
+
+  /* TODO: remove this and support swizzles? */
+  if (unknown_val->num_components != 1)
+return false;
+
+  /* Replace unknown ssa uses with the known constant */
+  nir_foreach_use_safe(use_src, unknown_val) {
+ nir_cursor cursor = nir_before_src(use_src, false);
+ nir_block *use_block = nir_cursor_current_block(cursor);
+ if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
+nir_instr_rewrite_src(use_src->parent_instr, use_src,
+  nir_src_for_ssa(&load_const->def));
+return true;
+ }
+  }
+
+  break;
+   }
+
+   default:
+  return false;
+   }
+
+   return false;
+}
+
 static bool
 opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
 {
@@ -625,6 +684,7 @@ opt_if_safe_cf_list(nir_builder *b, struct exec_list 
*cf_list)
  progress |= opt_if_safe_cf_list(b, &nif->then_list);
  progress |= opt_if_safe_cf_list(b, &nif->else_list);
  progress |= opt_if_evaluate_condition_use(b, nif);
+ progress |= opt_for_known_values(b, nif);
  break;
   }
 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: propagate known constant values into the if-then branch

2018-11-22 Thread Timothy Arceri

On 23/11/18 11:49 am, Timothy Arceri wrote:

shader-db results radeonsi (VEGA):

Totals from affected shaders:
SGPRS: 7272 -> 7488 (2.97 %)
VGPRS: 4168 -> 4160 (-0.19 %)
Spilled SGPRs: 328 -> 327 (-0.30 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 268212 -> 268656 (0.17 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 452 -> 452 (0.00 %)
Wait states: 0 -> 0 (0.00 %)

vkpipeline-db results RADV (VEGA):

Totals from affected shaders:
SGPRS: 160 -> 160 (0.00 %)
VGPRS: 88 -> 88 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 18268 -> 18152 (-0.63 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 26 -> 26 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
  src/compiler/nir/nir_opt_if.c | 60 +++
  1 file changed, 60 insertions(+)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 8a971c43f2..b1b99b4e40 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -574,6 +574,65 @@ opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif)
 return progress;
  }
  
+/* Perform optimisations based on the values we can derive from the evaluation

+ * of if-statement conditions.
+ */
+static bool
+opt_for_known_values(nir_builder *b, nir_if *nif)
+{
+   bool progress = false;
+
+   assert(nif->condition.is_ssa);
+   nir_ssa_def *if_cond = nif->condition.ssa;
+
+   if (if_cond->parent_instr->type != nir_instr_type_alu)
+  return false;
+
+   nir_alu_instr *alu = nir_instr_as_alu(if_cond->parent_instr);
+   switch (alu->op) {
+   case nir_op_feq:
+   case nir_op_ieq: {
+  nir_load_const_instr *load_const = NULL;
+  nir_ssa_def *unknown_val = NULL;
+
+  nir_ssa_def *src0 = alu->src[0].src.ssa;
+  nir_ssa_def *src1 = alu->src[1].src.ssa;
+  if (src0a->parent_instr->type == nir_instr_type_load_const) {


 ^--- Whoops this is fixed locally

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] draw: fix infinite loop in line stippling

2018-11-22 Thread sroland
From: Roland Scheidegger 

The calculated length of a line may be infinite, if the coords we
get are bogus. This leads to an infinite loop in line stippling.
To prevent this test for this explicitly (although technically
on at least x86 sse it would actually work without the explicit
test, as long as we use the int-converted length value).
While here also get rid of some always-true condition.

Note this does not actually solve the root cause, which is that
the coords we receive are bogus after clipping. This seems a difficult
problem to solve. One issue is that due to float arithmetic, clip w
may become 0 after clipping if the incoming geometry is
"sufficiently degenerate", hence x/y/z ndc (and window) coords will
be all inf (or nan). Even with w not quite 0, I believe it's possible
we produce values which are actually outside the view volume.
(Also, x=y=z=w=0 coords in clipspace would be not considered subject
to clipping, and similarly result in all NaN coords.) We just hope for
now other draw stages (and rasterizers) can handle those relatively
safely (llvmpipe itself should be sort of robust against this, certainly
converstion to fixed point will produce garbage, it might fail a couple
assertions but should neither hang nor crash otherwise).
---
 .../auxiliary/draw/draw_pipe_stipple.c| 26 +++
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c 
b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index d30572cc61..386b7649e4 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -48,8 +48,8 @@
 struct stipple_stage {
struct draw_stage stage;
float counter;
-   uint pattern;
-   uint factor;
+   ushort pattern;
+   ushort factor;
bool smooth;
 };
 
@@ -110,7 +110,7 @@ emit_segment(struct draw_stage *stage, struct prim_header 
*header,
 
 
 static inline bool
-stipple_test(int counter, ushort pattern, int factor)
+stipple_test(int counter, ushort pattern, ushort factor)
 {
int b = (counter / factor) & 0xf;
return !!((1 << b) & pattern);
@@ -136,6 +136,10 @@ stipple_line(struct draw_stage *stage, struct prim_header 
*header)
 
float length;
int i;
+   int intlength;
+
+   if (header->flags & DRAW_PIPE_RESET_STIPPLE)
+  stipple->counter = 0;
 
if (stipple->smooth) {
   float dx = x1 - x0;
@@ -147,21 +151,21 @@ stipple_line(struct draw_stage *stage, struct prim_header 
*header)
   length = MAX2(dx, dy);
}
 
-   if (header->flags & DRAW_PIPE_RESET_STIPPLE)
-  stipple->counter = 0;
+   if (util_is_inf_or_nan(length))
+  intlength = 0;
+   else
+  intlength = ceilf(length);
 
/* XXX ToDo: instead of iterating pixel-by-pixel, use a look-up table.
 */
-   for (i = 0; i < length; i++) {
+   for (i = 0; i < intlength; i++) {
   bool result = stipple_test((int)stipple->counter + i,
- (ushort)stipple->pattern, stipple->factor);
+ stipple->pattern, stipple->factor);
   if (result != state) {
  /* changing from "off" to "on" or vice versa */
  if (state) {
-if (start != i) {
-   /* finishing an "on" segment */
-   emit_segment(stage, header, start / length, i / length);
-}
+/* finishing an "on" segment */
+emit_segment(stage, header, start / length, i / length);
  }
  else {
 /* starting an "on" segment */
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: allow exporting an imported SYNC_FD semaphore type

2018-11-22 Thread Tapani Pälli

Hi;

On 11/22/18 6:20 PM, Emil Velikov wrote:

On 2018/11/22, Tapani Pälli wrote:

Fixes issues with following SkQP tests:

unitTest_VulkanHardwareBuffer_Vulkan_EGL_Syncs
unitTest_VulkanHardwareBuffer_Vulkan_Vulkan_Syncs

Signed-off-by: Tapani Pälli 
---
  src/intel/vulkan/anv_queue.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
index 2a8ed2eb4ed..7887fbcb937 100644
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -1056,7 +1056,8 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
  
 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:

if (device->has_exec_fence) {
- pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
   pExternalSemaphoreProperties->compatibleHandleTypes =
  VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
   pExternalSemaphoreProperties->externalSemaphoreFeatures =
--

Hi Tapani,

Is it Worth picking this for stable?



Nope, as it exposes a new capability. I believe these tests should be 
made to skip earlier if this capability is not available, currently they 
realize it too late and fail in horrible ways. Will try to apply some 
fix there also.


// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 00/13] query validation fixes

2018-11-22 Thread Tapani Pälli

Ping, this series has not landed yet, planning to land it soon?

On 11/9/18 7:58 AM, Tapani Pälli wrote:

Thanks, _mesa_has makes things a lot cleaner and simpler to manage;

Reviewed-by: Tapani Pälli 

On 11/8/18 2:42 PM, Erik Faye-Lund wrote:

Here's v2 of this series. Changes are as follows:

- 5/13: also check for ARB_occlusion_query2, for core-contexts
- 8/13: also check for EXT_disjoint_timer_query, for gles-contexts
- 9/13: also check for OES_geometry_shader, for gles-contexts
- 12/13: also check for EXT_disjoint_timer_query, for gles-contexts

In addition, there's regression in a piglit-test, but that test is wrong
and uses invalid queries on GLES. A patch for that test has been sent to
the piglit mailing list, here:

https://patchwork.freedesktop.org/series/52216/

Thanks to Tapani Pälli for testing this on the intel-ci system :)

Erik Faye-Lund (13):
   mesa/main: correct requirement for EXT_occlusion_query_boolean
   mesa/main: correct year for EXT_occlusion_query_boolean
   mesa/main: use non-prefixed enums for consistency
   mesa/main: simplify pipeline-statistics query validation
   mesa/main: fix validation of GL_SAMPLES_PASSED
   mesa/main: fix validation of GL_ANY_SAMPLES_PASSED
   mesa/main: fix validation of GL_ANY_SAMPLES_PASSED_CONSERVATIVE
   mesa/main: fix validation of GL_TIME_ELAPSED
   mesa/main: fix validation of transform-feedback queries
   mesa/main: fix validation of transform-feedback overflow queries
   mesa/main: fix validation of ARB_query_buffer_object
   mesa/main: fix validation of GL_TIMESTAMP
   mesa/main: remove overly strict query-validation

  src/mesa/main/extensions_table.h |   2 +-
  src/mesa/main/queryobj.c | 112 ++-
  2 files changed, 52 insertions(+), 62 deletions(-)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa] mesa: use binary search for MESA_EXTENSION_OVERRIDE

2018-11-22 Thread Tapani Pälli

Reviewed-by: Tapani Pälli 

On 11/22/18 8:18 PM, Eric Engestrom wrote:

Not a hot path obviously, but the table still has 425 extensions, which
you can go through in just 9 steps with a binary search.

The table is already sorted, as required by other parts of the code and
enforced by mesa's `main-test`.

Signed-off-by: Eric Engestrom 
---
  src/mesa/main/extensions.c | 22 --
  1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 4d95a072793fa4c13b69..0aeda39cc22152073f52 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -48,6 +48,13 @@ static char *unrecognized_extensions = NULL;
   */
  #define o(x) offsetof(struct gl_extensions, x)
  
+static int

+extension_name_compare(const void *name, const void *elem)
+{
+   const struct mesa_extension *entry = elem;
+   return strcmp(name, entry->name);
+}
+
  /**
   * Given an extension name, lookup up the corresponding member of struct
   * gl_extensions and return that member's index.  If the name is
@@ -59,15 +66,18 @@ static char *unrecognized_extensions = NULL;
  static int
  name_to_index(const char* name)
  {
-   unsigned i;
+   const struct mesa_extension *entry;
  
-   if (name == 0)

+   if (!name)
return -1;
  
-   for (i = 0; i < MESA_EXTENSION_COUNT; ++i) {

-  if (strcmp(name, _mesa_extension_table[i].name) == 0)
-return i;
-   }
+   entry = bsearch(name,
+   _mesa_extension_table, MESA_EXTENSION_COUNT,
+   sizeof(_mesa_extension_table[0]),
+   extension_name_compare);
+
+   if (entry)
+  return entry - _mesa_extension_table;
  
 return -1;

  }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] XSync/XReply take too much CPU

2018-11-22 Thread yanhua
Hi, Tapani:
  Recently I come into a problem. I have a opengl program, if the program 
run, it can take up almost one core, and the Xorg takes almost one core too.
  I found this program continuously recvmsg from the Xorg. I'm sure the CPU 
is takeing by too much sendmsg/recvmsg. 

 Following is the backtrace of this program when it call recvmsg:




#0  recvmsg () at ../sysdeps/unix/syscall-template.S:84
#1  0x7fb88abffdd7 in _xcb_in_read (c=c@entry=0x277e990) at 
../../src/xcb_in.c:982
#2  0x7fb88abfded1 in _xcb_conn_wait (c=c@entry=0x277e990, 
cond=cond@entry=0x7fb8667fa5c0, vector=vector@entry=0x0, 
count=count@entry=0x0) at ../../src/xcb_conn.c:495
#3  0x7fb88abff617 in wait_for_reply (c=c@entry=0x277e990, request=2575173, 
e=e@entry=0x7fb8667fa690)
at ../../src/xcb_in.c:516
#4  0x7fb88abff721 in xcb_wait_for_reply (c=0x277e990, request=2575173, 
e=0x7fb8667fa690) at ../../src/xcb_in.c:546
#5  0x7fb88dd64ae7 in _XReply () from /usr/lib/x86_64-linux-gnu/libX11.so.6
#6  0x7fb88dd6052d in XSync () from /usr/lib/x86_64-linux-gnu/libX11.so.6
#7  0x7fb88748c1de in dri2XcbSwapBuffers (pdraw=0x7fb849a058c0, 
remainder=, divisor=0, target_msc=0, 
dpy=0x277d6d0) at dri2_glx.c:815


I strace the program's syscall, 

the data is almost zeros. 

recvmsg(20, {msg_name(0)=NULL, 
msg_iov(1)=[{"g\0\325R\v\0@\4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
 4096}], msg_controllen=0, msg_flags=0}, 0) = 2048



Could you tell me what's the possible cause  of this problem.


---

Yanhua___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] XSync/XReply take too much CPU

2018-11-22 Thread Tapani Pälli

Hi;

On 11/23/18 9:06 AM, yanhua wrote:

Hi, Tapani:
   Recently I come into a problem. I have a opengl program, if the 
program run, it can take up almost one core, and the Xorg takes almost 
one core too.
   I found this program continuously recvmsg from the Xorg. I'm sure 
the CPU is takeing by too much sendmsg/recvmsg.


There might be a lot of these calls but typically it is the app itself 
that hogs the CPU. Does it use many threads, does it use physics engine 
like bullet?


Also is this with vblank_mode=0 or with throttling, does it achieve 60fps?



Following is the backtrace of this program when it call recvmsg:


#0  recvmsg () at ../sysdeps/unix/syscall-template.S:84
#1  0x7fb88abffdd7 in _xcb_in_read (c=c@entry=0x277e990) at 
../../src/xcb_in.c:982
#2  0x7fb88abfded1 in _xcb_conn_wait (c=c@entry=0x277e990, 
cond=cond@entry=0x7fb8667fa5c0, vector=vector@entry=0x0,

     count=count@entry=0x0) at ../../src/xcb_conn.c:495
#3  0x7fb88abff617 in wait_for_reply (c=c@entry=0x277e990, 
request=2575173, e=e@entry=0x7fb8667fa690)

     at ../../src/xcb_in.c:516
#4  0x7fb88abff721 in xcb_wait_for_reply (c=0x277e990, 
request=2575173, e=0x7fb8667fa690) at ../../src/xcb_in.c:546
#5  0x7fb88dd64ae7 in _XReply () from 
/usr/lib/x86_64-linux-gnu/libX11.so.6
#6  0x7fb88dd6052d in XSync () from 
/usr/lib/x86_64-linux-gnu/libX11.so.6
#7  0x7fb88748c1de in dri2XcbSwapBuffers (pdraw=0x7fb849a058c0, 
remainder=, divisor=0, target_msc=0,

     dpy=0x277d6d0) at dri2_glx.c:815

I strace the program's syscall,
the data is almost zeros.
recvmsg(20, {msg_name(0)=NULL, 
msg_iov(1)=[{"g\0\325R\v\0@\4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 
4096}], msg_controllen=0, msg_flags=0}, 0) = 2048


Could you tell me what's the possible cause  of this problem.

---
Yanhua

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev