[Mesa-dev] [PATCH] ac/nir: Use instance_rate_inputs per attribute, not per variable.

2018-01-22 Thread Bas Nieuwenhuizen
This did the wrong thing if we had e.g. an array for which only some
of the attributes use the instance index. Tripped up some new CTS
tests.

CC: 
---
 src/amd/common/ac_nir_to_llvm.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 243005a221..7defe0f491 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -5321,21 +5321,20 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
 
variable->data.driver_location = idx * 4;
 
-   if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
-   buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id,
-   ctx->abi.start_instance, "");
-   if (ctx->options->key.vs.as_ls) {
-   ctx->shader_info->vs.vgpr_comp_cnt =
-   MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
-   } else {
-   ctx->shader_info->vs.vgpr_comp_cnt =
-   MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
-   }
-   } else
-   buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
-   ctx->abi.base_vertex, "");
-
for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
+   if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + 
i))) {
+   buffer_index = LLVMBuildAdd(ctx->builder, 
ctx->abi.instance_id,
+   ctx->abi.start_instance, 
"");
+   if (ctx->options->key.vs.as_ls) {
+   ctx->shader_info->vs.vgpr_comp_cnt =
+   MAX2(2, 
ctx->shader_info->vs.vgpr_comp_cnt);
+   } else {
+   ctx->shader_info->vs.vgpr_comp_cnt =
+   MAX2(1, 
ctx->shader_info->vs.vgpr_comp_cnt);
+   }
+   } else
+   buffer_index = LLVMBuildAdd(ctx->builder, 
ctx->abi.vertex_id,
+   ctx->abi.base_vertex, "");
t_offset = LLVMConstInt(ctx->ac.i32, index + i, false);
 
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 07/24] anv/image: Support color aspects in layout_to_aux_usage

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:24PM -0800, Jason Ekstrand wrote:
> Reviewed-by: Nanley Chery 
> ---
>  src/intel/vulkan/anv_image.c | 48 
> ++--
>  1 file changed, 29 insertions(+), 19 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 84e4b96..e34ac95 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -774,12 +774,6 @@ anv_layout_to_aux_usage(const struct gen_device_info * 
> const devinfo,
> /* Stencil has no aux */
> assert(aspect != VK_IMAGE_ASPECT_STENCIL_BIT);
>  
> -   /* The following switch currently only handles depth stencil aspects.
> -* TODO: Handle the color aspect.
> -*/
> -   if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV)
> -  return image->planes[plane].aux_usage;
> -
> switch (layout) {
>  
> /* Invalid Layouts */
> @@ -799,28 +793,38 @@ anv_layout_to_aux_usage(const struct gen_device_info * 
> const devinfo,
>  
>  
> /* Transfer Layouts
> -*
> -* This buffer could be a depth buffer used in a transfer operation. BLORP
> -* currently doesn't use HiZ for transfer operations so we must use the 
> main
> -* buffer for this layout. TODO: Enable HiZ in BLORP.
>  */
> case VK_IMAGE_LAYOUT_GENERAL:
> case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
> case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
> -  return ISL_AUX_USAGE_NONE;
> +  if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
> + /* This buffer could be a depth buffer used in a transfer operation.
> +  * BLORP currently doesn't use HiZ for transfer operations so we 
> must
> +  * use the main buffer for this layout. TODO: Enable HiZ in BLORP.
> +  */
> + assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_HIZ);
> + return ISL_AUX_USAGE_NONE;
> +  } else {
> + assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> + return image->planes[plane].aux_usage;
> +  }
>  
>  
> /* Sampling Layouts */
> case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
> +   case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR:
>assert((image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0);
>/* Fall-through */
> case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
> -   case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR:
> -  assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
> -  if (anv_can_sample_with_hiz(devinfo, image))
> - return ISL_AUX_USAGE_HIZ;
> -  else
> - return ISL_AUX_USAGE_NONE;
> +  if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
> + if (anv_can_sample_with_hiz(devinfo, image))
> +return ISL_AUX_USAGE_HIZ;
> + else
> +return ISL_AUX_USAGE_NONE;
> +  } else {
> + return image->planes[plane].aux_usage;
> +  }
> +
>  
> case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
>assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> @@ -845,8 +849,14 @@ anv_layout_to_aux_usage(const struct gen_device_info * 
> const devinfo,
>  
> /* Rendering Layouts */
> case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
> -  assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> -  unreachable("Color images are not yet supported.");
> +  assert(aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> +  if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) {
> + assert(image->samples == 1);
> + return ISL_AUX_USAGE_CCS_D;

Just checking that I understand what is going on. Earlier in this function
there is early return for "image->planes[plane].aux_surface.isl.size == 0".
That means that we end up here only if there is auxiliary. Moreover if CCS_E
should be used then image->planes[plane].aux_usage is set already?

> +  } else {
> + assert(image->planes[plane].aux_usage != ISL_AUX_USAGE_CCS_D);
> + return image->planes[plane].aux_usage;
> +  }
>  
> case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
> case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL_KHR:
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 09/24] anv/cmd_buffer: Generalize transition_color_buffer

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:26PM -0800, Jason Ekstrand wrote:
> This moves it to being based on layout_to_aux_usage instead of being
> hard-coded based on bits of a priori knowledge of how transitions
> interact with layouts.  This conceptually simplifies things because
> we're now using layout_to_aux_usage and layout_supports_fast_clear to
> make resolve decisions so changes to those functions will do what one
> expects.
> 
> This fixes a potential bug with window system integration on gen9+ where
> we wouldn't do a resolve when transitioning to the PRESENT_SRC layout
> because we just assume that everything that handles CCS_E can handle it
> all the time.  When handing a CCS_E image off to the window system, we
> may need to do a full resolve if the window system does not support the
> CCS_E modifier.  The only reason why this hasn't been a problem yet is
> because we don't support modifiers in Vulkan WSI and so we always get X
> tiling which implies no CCS on gen9+.
> 
> v2 (Jason Ekstrand):
>  - Make a few more things const
>  - Use the anv_fast_clear_support enum
> 
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 56 
> ++
>  1 file changed, 44 insertions(+), 12 deletions(-)
> 
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 6a6d8b2..fd27463 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -593,6 +593,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
>  VkImageLayout initial_layout,
>  VkImageLayout final_layout)
>  {
> +   const struct gen_device_info *devinfo = &cmd_buffer->device->info;
> /* Validate the inputs. */
> assert(cmd_buffer);
> assert(image && image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> @@ -733,17 +734,51 @@ transition_color_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
>   VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
>   final_layout);
>}
> -   } else if (initial_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
> -  /* Resolves are only necessary if the subresource may contain blocks
> -   * fast-cleared to values unsupported in other layouts. This only 
> occurs
> -   * if the initial layout is COLOR_ATTACHMENT_OPTIMAL.
> -   */
> -  return;
> -   } else if (image->samples > 1) {
> -  /* MCS buffers don't need resolving. */
>return;
> }
>  
> +   /* If initial aux usage is NONE, there is nothing to resolve */
> +   const enum isl_aux_usage initial_aux_usage =
> +  anv_layout_to_aux_usage(devinfo, image, aspect, initial_layout);
> +   if (initial_aux_usage == ISL_AUX_USAGE_NONE)
> +  return;
> +
> +   enum isl_aux_op resolve_op = ISL_AUX_OP_NONE;
> +
> +   /* If the initial layout supports more fast clear than the final layout
> +* then we need at least a partial resolve.
> +*/
> +   const enum anv_fast_clear_type initial_fast_clear =
> +  anv_layout_to_fast_clear_type(devinfo, image, aspect, initial_layout);
> +   const enum anv_fast_clear_type final_fast_clear =
> +  anv_layout_to_fast_clear_type(devinfo, image, aspect, final_layout);
> +   if (final_fast_clear < initial_fast_clear)
> +  resolve_op = ISL_AUX_OP_PARTIAL_RESOLVE;
> +
> +   const enum isl_aux_usage final_aux_usage =
> +  anv_layout_to_aux_usage(devinfo, image, aspect, final_layout);
> +   if (initial_aux_usage == ISL_AUX_USAGE_CCS_E &&
> +   final_aux_usage != ISL_AUX_USAGE_CCS_E)
> +  resolve_op = ISL_AUX_OP_FULL_RESOLVE;
> +
> +   /* CCS_D only supports full resolves and BLORP will assert on us if we try
> +* to do a partial resolve on a CCS_D surface.
> +*/
> +   if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE &&
> +   initial_aux_usage == ISL_AUX_USAGE_CCS_D)
> +  resolve_op = ISL_AUX_OP_FULL_RESOLVE;
> +
> +   if (resolve_op == ISL_AUX_OP_NONE)
> +  return;
> +
> +   /* Even though the above code can theoretically handle multiple resolve
> +* types such as CCS_D -> CCS_E, the predication code below can't.  We 
> only
> +* really handle a couple of cases.
> +*/
> +   assert(initial_aux_usage == ISL_AUX_USAGE_NONE ||

Earlier there is explicit early return for
"initial_aux_usage == ISL_AUX_USAGE_NONE". Just checking if you really meant
to assert it here.

> +  final_aux_usage == ISL_AUX_USAGE_NONE ||
> +  initial_aux_usage == final_aux_usage);
> +
> /* Perform a resolve to synchronize data between the main and aux buffer.
>  * Before we begin, we must satisfy the cache flushing requirement 
> specified
>  * in the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
> @@ -774,10 +809,7 @@ transition_color_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
>genX(load_needs_resolve_predicate)(cmd_buffer, image, aspect, level);
>  
>anv_image_ccs_op(cmd_buffe

[Mesa-dev] [PATCH v3 06/24] anv/image: Add a helper for determining when fast clears are supported

2018-01-22 Thread Jason Ekstrand
v2 (Jason Ekstrand):
 - Return an enum instead of a boolean

v3 (Jason Ekstrand):
 - Return ANV_FAST_CLEAR_NONE instead of false (Topi)
 - Rename ANV_FAST_CLEAR_ANY to ANV_FAST_CLEAR_DEFAULT_VALUE
 - Add documentation for the enum values

Reviewed-by: Topi Pohjolainen 
---
 src/intel/vulkan/anv_image.c   | 71 ++
 src/intel/vulkan/anv_private.h | 16 ++
 2 files changed, 87 insertions(+)

diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 0aa8cd9..4cd4fe1 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -861,6 +861,77 @@ anv_layout_to_aux_usage(const struct gen_device_info * 
const devinfo,
unreachable("layout is not a VkImageLayout enumeration member.");
 }
 
+/**
+ * This function returns the level of unresolved fast-clear support of the
+ * given image in the given VkImageLayout.
+ *
+ * @param devinfo The device information of the Intel GPU.
+ * @param image The image that may contain a collection of buffers.
+ * @param aspect The aspect of the image to be accessed.
+ * @param layout The current layout of the image aspect(s).
+ */
+enum anv_fast_clear_type
+anv_layout_to_fast_clear_type(const struct gen_device_info * const devinfo,
+  const struct anv_image * const image,
+  const VkImageAspectFlagBits aspect,
+  const VkImageLayout layout)
+{
+   /* The aspect must be exactly one of the image aspects. */
+   assert(_mesa_bitcount(aspect) == 1 && (aspect & image->aspects));
+
+   uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
+
+   /* If there is no auxiliary surface allocated, there are no fast-clears */
+   if (image->planes[plane].aux_surface.isl.size == 0)
+  return ANV_FAST_CLEAR_NONE;
+
+   /* All images that use an auxiliary surface are required to be tiled. */
+   assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
+
+   /* Stencil has no aux */
+   assert(aspect != VK_IMAGE_ASPECT_STENCIL_BIT);
+
+   if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
+  /* For depth images (with HiZ), the layout supports fast-clears if and
+   * only if it supports HiZ.  However, we only support fast-clears to the
+   * default depth value.
+   */
+  enum isl_aux_usage aux_usage =
+ anv_layout_to_aux_usage(devinfo, image, aspect, layout);
+  return aux_usage == ISL_AUX_USAGE_HIZ ?
+ ANV_FAST_CLEAR_DEFAULT_VALUE : ANV_FAST_CLEAR_NONE;
+   }
+
+   assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
+
+   /* Multisample fast-clear is not yet supported. */
+   if (image->samples > 1)
+  return ANV_FAST_CLEAR_NONE;
+
+   /* The only layout which actually supports fast-clears today is
+* VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL.  Some day in the future
+* this may change if our ability to track clear colors improves.
+*/
+   switch (layout) {
+   case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+  return ANV_FAST_CLEAR_ANY;
+
+   case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
+  return ANV_FAST_CLEAR_NONE;
+
+   default:
+  /* If the image has CCS_E enabled all the time then we can use
+   * fast-clear as long as the clear color is the default value of zero
+   * since this is the default value we program into every surface state
+   * used for texturing.
+   */
+  if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E)
+ return ANV_FAST_CLEAR_DEFAULT_VALUE;
+  else
+ return ANV_FAST_CLEAR_NONE;
+   }
+}
+
 
 static struct anv_state
 alloc_surface_state(struct anv_device *device)
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index cf82196..b96895b 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -2423,6 +2423,16 @@ struct anv_image {
} planes[3];
 };
 
+/* The ordering of this enum is important */
+enum anv_fast_clear_type {
+   /** Image does not have/support any fast-clear blocks */
+   ANV_FAST_CLEAR_NONE = 0,
+   /** Image has/supports fast-clear but only to the default value */
+   ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
+   /** Image has/supports fast-clear with an arbitrary fast-clear value */
+   ANV_FAST_CLEAR_ANY = 2,
+};
+
 /* Returns the number of auxiliary buffer levels attached to an image. */
 static inline uint8_t
 anv_image_aux_levels(const struct anv_image * const image,
@@ -2545,6 +2555,12 @@ anv_layout_to_aux_usage(const struct gen_device_info * 
const devinfo,
 const VkImageAspectFlagBits aspect,
 const VkImageLayout layout);
 
+enum anv_fast_clear_type
+anv_layout_to_fast_clear_type(const struct gen_device_info * const devinfo,
+  const struct anv_image * const image,
+  const VkImageAspectFlagBits aspect,
+  const VkImageLayout layout);
+
 /* This is defined as a macro so that it works f

Re: [Mesa-dev] [PATCH v2 07/24] anv/image: Support color aspects in layout_to_aux_usage

2018-01-22 Thread Jason Ekstrand
On Mon, Jan 22, 2018 at 12:07 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Fri, Jan 19, 2018 at 03:47:24PM -0800, Jason Ekstrand wrote:
> > Reviewed-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/anv_image.c | 48 ++
> --
> >  1 file changed, 29 insertions(+), 19 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > index 84e4b96..e34ac95 100644
> > --- a/src/intel/vulkan/anv_image.c
> > +++ b/src/intel/vulkan/anv_image.c
> > @@ -774,12 +774,6 @@ anv_layout_to_aux_usage(const struct
> gen_device_info * const devinfo,
> > /* Stencil has no aux */
> > assert(aspect != VK_IMAGE_ASPECT_STENCIL_BIT);
> >
> > -   /* The following switch currently only handles depth stencil aspects.
> > -* TODO: Handle the color aspect.
> > -*/
> > -   if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV)
> > -  return image->planes[plane].aux_usage;
> > -
> > switch (layout) {
> >
> > /* Invalid Layouts */
> > @@ -799,28 +793,38 @@ anv_layout_to_aux_usage(const struct
> gen_device_info * const devinfo,
> >
> >
> > /* Transfer Layouts
> > -*
> > -* This buffer could be a depth buffer used in a transfer operation.
> BLORP
> > -* currently doesn't use HiZ for transfer operations so we must use
> the main
> > -* buffer for this layout. TODO: Enable HiZ in BLORP.
> >  */
> > case VK_IMAGE_LAYOUT_GENERAL:
> > case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
> > case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
> > -  return ISL_AUX_USAGE_NONE;
> > +  if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
> > + /* This buffer could be a depth buffer used in a transfer
> operation.
> > +  * BLORP currently doesn't use HiZ for transfer operations so
> we must
> > +  * use the main buffer for this layout. TODO: Enable HiZ in
> BLORP.
> > +  */
> > + assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_HIZ);
> > + return ISL_AUX_USAGE_NONE;
> > +  } else {
> > + assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> > + return image->planes[plane].aux_usage;
> > +  }
> >
> >
> > /* Sampling Layouts */
> > case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
> > +   case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR:
> >assert((image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) ==
> 0);
> >/* Fall-through */
> > case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
> > -   case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR:
> > -  assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
> > -  if (anv_can_sample_with_hiz(devinfo, image))
> > - return ISL_AUX_USAGE_HIZ;
> > -  else
> > - return ISL_AUX_USAGE_NONE;
> > +  if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
> > + if (anv_can_sample_with_hiz(devinfo, image))
> > +return ISL_AUX_USAGE_HIZ;
> > + else
> > +return ISL_AUX_USAGE_NONE;
> > +  } else {
> > + return image->planes[plane].aux_usage;
> > +  }
> > +
> >
> > case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
> >assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> > @@ -845,8 +849,14 @@ anv_layout_to_aux_usage(const struct
> gen_device_info * const devinfo,
> >
> > /* Rendering Layouts */
> > case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
> > -  assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> > -  unreachable("Color images are not yet supported.");
> > +  assert(aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> > +  if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) {
> > + assert(image->samples == 1);
> > + return ISL_AUX_USAGE_CCS_D;
>
> Just checking that I understand what is going on. Earlier in this function
> there is early return for "image->planes[plane].aux_surface.isl.size ==
> 0".
> That means that we end up here only if there is auxiliary. Moreover if
> CCS_E
> should be used then image->planes[plane].aux_usage is set already?
>

Yes and no.  Yes, we only get here if we actually have an aux surface.  For
color images, aux_usage is the "default" aux_usage.  Specifically, it's
CCS_E for images where we can enable proper compression and NONE when we
cannot.  It's a bit of an artifact of history and we should probably change
it at some point.  For aux_usage == NONE, we can still enable CCS_D for
color attachments and we do a full resolve when we transition from
COLOR_ATTACHMENT_OPTIMAL to basically any other layout.

We could also enable CCS_E if we wanted in certain cases, but it's not
clear that doing so would actually be faster since a full resolve of a
CCS_E image can actually be more expensive than a full resolve of CCS_D.
(For CCS_D the hardware can just ignore any blocks that aren't clear
whereas a full resolve with CCS_E will touch almost every pixel.)


> > +  } else {
> > + assert(image->plan

Re: [Mesa-dev] [PATCH] i965: Accept CONTEXT_ATTRIB_PRIORITY for brwCreateContext

2018-01-22 Thread Tapani Pälli

Reviewed-by: Tapani Pälli 

On 01/20/2018 02:29 AM, Chris Wilson wrote:

The forward port of commit 6d87500fe12e ("dri: Change
__DriverApiRec::CreateContext to take a struct for attribs") failed to
adapt the set of allowed attributes for the earlier introduction of
context priorities (commit 1617fca6d12e "i965: Pass the EGL/DRI context
priority through to the kernel").

Fixes: 6d87500fe12e ("dri: Change __DriverApiRec::CreateContext to take a struct for 
attribs")
Signed-off-by: Chris Wilson 
Cc: Adam Jackson 
Cc: Nicolai Hähnle 
Cc: Emil Velikov 
Cc: Kenneth Graunke 
---
  src/mesa/drivers/dri/i965/brw_context.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index c30eae420c..0eb9aba948 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -860,7 +860,9 @@ brwCreateContext(gl_api api,
return false;
 }
  
-   if (ctx_config->attribute_mask & ~__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) {

+   if (ctx_config->attribute_mask &
+   ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
+ __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
*dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
return false;
 }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 09/24] anv/cmd_buffer: Generalize transition_color_buffer

2018-01-22 Thread Jason Ekstrand
On Mon, Jan 22, 2018 at 12:17 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Fri, Jan 19, 2018 at 03:47:26PM -0800, Jason Ekstrand wrote:
> > This moves it to being based on layout_to_aux_usage instead of being
> > hard-coded based on bits of a priori knowledge of how transitions
> > interact with layouts.  This conceptually simplifies things because
> > we're now using layout_to_aux_usage and layout_supports_fast_clear to
> > make resolve decisions so changes to those functions will do what one
> > expects.
> >
> > This fixes a potential bug with window system integration on gen9+ where
> > we wouldn't do a resolve when transitioning to the PRESENT_SRC layout
> > because we just assume that everything that handles CCS_E can handle it
> > all the time.  When handing a CCS_E image off to the window system, we
> > may need to do a full resolve if the window system does not support the
> > CCS_E modifier.  The only reason why this hasn't been a problem yet is
> > because we don't support modifiers in Vulkan WSI and so we always get X
> > tiling which implies no CCS on gen9+.
> >
> > v2 (Jason Ekstrand):
> >  - Make a few more things const
> >  - Use the anv_fast_clear_support enum
> >
> > Reviewed-by: Topi Pohjolainen 
> > ---
> >  src/intel/vulkan/genX_cmd_buffer.c | 56 ++
> 
> >  1 file changed, 44 insertions(+), 12 deletions(-)
> >
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> > index 6a6d8b2..fd27463 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -593,6 +593,7 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> >  VkImageLayout initial_layout,
> >  VkImageLayout final_layout)
> >  {
> > +   const struct gen_device_info *devinfo = &cmd_buffer->device->info;
> > /* Validate the inputs. */
> > assert(cmd_buffer);
> > assert(image && image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> > @@ -733,17 +734,51 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> >   VK_IMAGE_LAYOUT_COLOR_
> ATTACHMENT_OPTIMAL,
> >   final_layout);
> >}
> > -   } else if (initial_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
> {
> > -  /* Resolves are only necessary if the subresource may contain
> blocks
> > -   * fast-cleared to values unsupported in other layouts. This only
> occurs
> > -   * if the initial layout is COLOR_ATTACHMENT_OPTIMAL.
> > -   */
> > -  return;
> > -   } else if (image->samples > 1) {
> > -  /* MCS buffers don't need resolving. */
> >return;
> > }
> >
> > +   /* If initial aux usage is NONE, there is nothing to resolve */
> > +   const enum isl_aux_usage initial_aux_usage =
> > +  anv_layout_to_aux_usage(devinfo, image, aspect, initial_layout);
> > +   if (initial_aux_usage == ISL_AUX_USAGE_NONE)
> > +  return;
> > +
> > +   enum isl_aux_op resolve_op = ISL_AUX_OP_NONE;
> > +
> > +   /* If the initial layout supports more fast clear than the final
> layout
> > +* then we need at least a partial resolve.
> > +*/
> > +   const enum anv_fast_clear_type initial_fast_clear =
> > +  anv_layout_to_fast_clear_type(devinfo, image, aspect,
> initial_layout);
> > +   const enum anv_fast_clear_type final_fast_clear =
> > +  anv_layout_to_fast_clear_type(devinfo, image, aspect,
> final_layout);
> > +   if (final_fast_clear < initial_fast_clear)
> > +  resolve_op = ISL_AUX_OP_PARTIAL_RESOLVE;
> > +
> > +   const enum isl_aux_usage final_aux_usage =
> > +  anv_layout_to_aux_usage(devinfo, image, aspect, final_layout);
> > +   if (initial_aux_usage == ISL_AUX_USAGE_CCS_E &&
> > +   final_aux_usage != ISL_AUX_USAGE_CCS_E)
> > +  resolve_op = ISL_AUX_OP_FULL_RESOLVE;
> > +
> > +   /* CCS_D only supports full resolves and BLORP will assert on us if
> we try
> > +* to do a partial resolve on a CCS_D surface.
> > +*/
> > +   if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE &&
> > +   initial_aux_usage == ISL_AUX_USAGE_CCS_D)
> > +  resolve_op = ISL_AUX_OP_FULL_RESOLVE;
> > +
> > +   if (resolve_op == ISL_AUX_OP_NONE)
> > +  return;
> > +
> > +   /* Even though the above code can theoretically handle multiple
> resolve
> > +* types such as CCS_D -> CCS_E, the predication code below can't.
> We only
> > +* really handle a couple of cases.
> > +*/
> > +   assert(initial_aux_usage == ISL_AUX_USAGE_NONE ||
>
> Earlier there is explicit early return for
> "initial_aux_usage == ISL_AUX_USAGE_NONE". Just checking if you really
> meant
> to assert it here.
>

I don't know.  I'm trying to make the point with the assert of what the
code can handle.  I can't do this assert any earlier because we don't have
final_aux_usage until we get here.


> > +  final_aux_usage == ISL_AUX_USAGE_NONE ||
> > +  ini

Re: [Mesa-dev] [PATCH v2 11/24] anv/blorp: Add src/dst_level helper variables in CmdCopyImage

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:28PM -0800, Jason Ekstrand wrote:
> ---
>  src/intel/vulkan/anv_blorp.c | 14 ++
>  1 file changed, 6 insertions(+), 8 deletions(-)

Reviewed-by: Topi Pohjolainen 

> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index ccb9d18..e4e4135 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -247,6 +247,7 @@ void anv_CmdCopyImage(
>VkExtent3D extent =
>   anv_sanitize_image_extent(src_image->type, pRegions[r].extent);
>  
> +  const uint32_t dst_level = pRegions[r].dstSubresource.mipLevel;
>unsigned dst_base_layer, layer_count;
>if (dst_image->type == VK_IMAGE_TYPE_3D) {
>   dst_base_layer = pRegions[r].dstOffset.z;
> @@ -257,6 +258,7 @@ void anv_CmdCopyImage(
>  anv_get_layerCount(dst_image, &pRegions[r].dstSubresource);
>}
>  
> +  const uint32_t src_level = pRegions[r].srcSubresource.mipLevel;
>unsigned src_base_layer;
>if (src_image->type == VK_IMAGE_TYPE_3D) {
>   src_base_layer = pRegions[r].srcOffset.z;
> @@ -283,10 +285,8 @@ void anv_CmdCopyImage(
>   ANV_AUX_USAGE_DEFAULT, &dst_surf);
>  
>  for (unsigned i = 0; i < layer_count; i++) {
> -   blorp_copy(&batch, &src_surf, 
> pRegions[r].srcSubresource.mipLevel,
> -  src_base_layer + i,
> -  &dst_surf, pRegions[r].dstSubresource.mipLevel,
> -  dst_base_layer + i,
> +   blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
> +  &dst_surf, dst_level, dst_base_layer + i,
>srcOffset.x, srcOffset.y,
>dstOffset.x, dstOffset.y,
>extent.width, extent.height);
> @@ -300,10 +300,8 @@ void anv_CmdCopyImage(
>ANV_AUX_USAGE_DEFAULT, &dst_surf);
>  
>   for (unsigned i = 0; i < layer_count; i++) {
> -blorp_copy(&batch, &src_surf, 
> pRegions[r].srcSubresource.mipLevel,
> -   src_base_layer + i,
> -   &dst_surf, pRegions[r].dstSubresource.mipLevel,
> -   dst_base_layer + i,
> +blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
> +   &dst_surf, dst_level, dst_base_layer + i,
> srcOffset.x, srcOffset.y,
> dstOffset.x, dstOffset.y,
> extent.width, extent.height);
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #6663 failed

2018-01-22 Thread AppVeyor



Build mesa 6663 failed


Commit 5048572352 by Jason Ekstrand on 1/12/2018 1:33 AM:

i965: Set tiling on BOs imported with modifiers\n\nWe need this to ensure that GTT maps work on buffers we get from Vulkan\non the off chance that someone does a readpixels or something.  Soon, we\nwill be removing GTT maps from i965 entirely and this can be reverted.\nNone the less, it's needed for stable.\n\nReviewed-by: Kenneth Graunke \nCc: mesa-sta...@lists.freedesktop.org


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Set tiling on BOs imported with modifiers

2018-01-22 Thread Jason Ekstrand
Note to back-porters:  I forgot to add a "fixes" tag but these and the
other two I pushed with them should go into 17.3 as they fix the modifiers
support we landed there.

On Sun, Jan 21, 2018 at 8:05 PM, Jason Ekstrand 
wrote:

> We need this to ensure that GTT maps work on buffers we get from Vulkan
> on the off chance that someone does a readpixels or something.  Soon, we
> will be removing GTT maps from i965 entirely and this can be reverted.
> None the less, it's needed for stable.
>
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index b563bbf..e877f93 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1043,7 +1043,16 @@ intel_create_image_from_fds_common(__DRIscreen
> *dri_screen,
>
> image->planar_format = f;
>
> -   image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
> +   if (modifier != DRM_FORMAT_MOD_INVALID) {
> +  const struct isl_drm_modifier_info *mod_info =
> + isl_drm_modifier_get_info(modifier);
> +  uint32_t tiling = isl_tiling_to_i915_tiling(mod_info->tiling);
> +  image->bo = brw_bo_gem_create_from_prime_tiled(screen->bufmgr,
> fds[0],
> + tiling, strides[0]);
> +   } else {
> +  image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
> +   }
> +
> if (image->bo == NULL) {
>free(image);
>return NULL;
> --
> 2.5.0.400.gff86faf
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 09/24] anv/cmd_buffer: Generalize transition_color_buffer

2018-01-22 Thread Pohjolainen, Topi
On Mon, Jan 22, 2018 at 12:30:30AM -0800, Jason Ekstrand wrote:
> On Mon, Jan 22, 2018 at 12:17 AM, Pohjolainen, Topi <
> topi.pohjolai...@gmail.com> wrote:
> 
> > On Fri, Jan 19, 2018 at 03:47:26PM -0800, Jason Ekstrand wrote:
> > > This moves it to being based on layout_to_aux_usage instead of being
> > > hard-coded based on bits of a priori knowledge of how transitions
> > > interact with layouts.  This conceptually simplifies things because
> > > we're now using layout_to_aux_usage and layout_supports_fast_clear to
> > > make resolve decisions so changes to those functions will do what one
> > > expects.
> > >
> > > This fixes a potential bug with window system integration on gen9+ where
> > > we wouldn't do a resolve when transitioning to the PRESENT_SRC layout
> > > because we just assume that everything that handles CCS_E can handle it
> > > all the time.  When handing a CCS_E image off to the window system, we
> > > may need to do a full resolve if the window system does not support the
> > > CCS_E modifier.  The only reason why this hasn't been a problem yet is
> > > because we don't support modifiers in Vulkan WSI and so we always get X
> > > tiling which implies no CCS on gen9+.
> > >
> > > v2 (Jason Ekstrand):
> > >  - Make a few more things const
> > >  - Use the anv_fast_clear_support enum
> > >
> > > Reviewed-by: Topi Pohjolainen 
> > > ---
> > >  src/intel/vulkan/genX_cmd_buffer.c | 56 ++
> > 
> > >  1 file changed, 44 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > b/src/intel/vulkan/genX_cmd_buffer.c
> > > index 6a6d8b2..fd27463 100644
> > > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > > @@ -593,6 +593,7 @@ transition_color_buffer(struct anv_cmd_buffer
> > *cmd_buffer,
> > >  VkImageLayout initial_layout,
> > >  VkImageLayout final_layout)
> > >  {
> > > +   const struct gen_device_info *devinfo = &cmd_buffer->device->info;
> > > /* Validate the inputs. */
> > > assert(cmd_buffer);
> > > assert(image && image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> > > @@ -733,17 +734,51 @@ transition_color_buffer(struct anv_cmd_buffer
> > *cmd_buffer,
> > >   VK_IMAGE_LAYOUT_COLOR_
> > ATTACHMENT_OPTIMAL,
> > >   final_layout);
> > >}
> > > -   } else if (initial_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
> > {
> > > -  /* Resolves are only necessary if the subresource may contain
> > blocks
> > > -   * fast-cleared to values unsupported in other layouts. This only
> > occurs
> > > -   * if the initial layout is COLOR_ATTACHMENT_OPTIMAL.
> > > -   */
> > > -  return;
> > > -   } else if (image->samples > 1) {
> > > -  /* MCS buffers don't need resolving. */
> > >return;
> > > }
> > >
> > > +   /* If initial aux usage is NONE, there is nothing to resolve */
> > > +   const enum isl_aux_usage initial_aux_usage =
> > > +  anv_layout_to_aux_usage(devinfo, image, aspect, initial_layout);
> > > +   if (initial_aux_usage == ISL_AUX_USAGE_NONE)
> > > +  return;
> > > +
> > > +   enum isl_aux_op resolve_op = ISL_AUX_OP_NONE;
> > > +
> > > +   /* If the initial layout supports more fast clear than the final
> > layout
> > > +* then we need at least a partial resolve.
> > > +*/
> > > +   const enum anv_fast_clear_type initial_fast_clear =
> > > +  anv_layout_to_fast_clear_type(devinfo, image, aspect,
> > initial_layout);
> > > +   const enum anv_fast_clear_type final_fast_clear =
> > > +  anv_layout_to_fast_clear_type(devinfo, image, aspect,
> > final_layout);
> > > +   if (final_fast_clear < initial_fast_clear)
> > > +  resolve_op = ISL_AUX_OP_PARTIAL_RESOLVE;
> > > +
> > > +   const enum isl_aux_usage final_aux_usage =
> > > +  anv_layout_to_aux_usage(devinfo, image, aspect, final_layout);
> > > +   if (initial_aux_usage == ISL_AUX_USAGE_CCS_E &&
> > > +   final_aux_usage != ISL_AUX_USAGE_CCS_E)
> > > +  resolve_op = ISL_AUX_OP_FULL_RESOLVE;
> > > +
> > > +   /* CCS_D only supports full resolves and BLORP will assert on us if
> > we try
> > > +* to do a partial resolve on a CCS_D surface.
> > > +*/
> > > +   if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE &&
> > > +   initial_aux_usage == ISL_AUX_USAGE_CCS_D)
> > > +  resolve_op = ISL_AUX_OP_FULL_RESOLVE;
> > > +
> > > +   if (resolve_op == ISL_AUX_OP_NONE)
> > > +  return;
> > > +
> > > +   /* Even though the above code can theoretically handle multiple
> > resolve
> > > +* types such as CCS_D -> CCS_E, the predication code below can't.
> > We only
> > > +* really handle a couple of cases.
> > > +*/
> > > +   assert(initial_aux_usage == ISL_AUX_USAGE_NONE ||
> >
> > Earlier there is explicit early return for
> > "initial_aux_usage == ISL_AUX_USAGE_NONE". Just checking if you really
> 

Re: [Mesa-dev] [PATCH 5/7] report.py: Add option to only display measurements that have changes

2018-01-22 Thread Ian Romanick
On 01/13/2018 04:23 AM, Dylan Baker wrote:
> Quoting Ian Romanick (2018-01-12 12:06:59)
>> From: Ian Romanick 
>>
>> This is useful for preparing data to go in a Mesa commit message.
>>
>> Signed-off-by: Ian Romanick 
>> ---
>>  report.py | 53 +++--
>>  1 file changed, 31 insertions(+), 22 deletions(-)
>>
>> diff --git a/report.py b/report.py
>> index e0068bc..72752c1 100755
>> --- a/report.py
>> +++ b/report.py
>> @@ -62,6 +62,8 @@ def main():
>>  help="comma-separated list of measurements to 
>> report")
>>  parser.add_argument("--summary-only", "-s", action="store_true", 
>> default=False,
>>  help="do not show the per-shader helped / hurt 
>> data")
>> +parser.add_argument("--changes-only", "-c", action="store_true", 
>> default=False,
>> +help="only show measurements that have changes")
>>  parser.add_argument("before", type=get_results, help="the output of the 
>> original code")
>>  parser.add_argument("after", type=get_results, help="the output of the 
>> new code")
>>  args = parser.parse_args()
>> @@ -116,14 +118,14 @@ def main():
>>  if len(helped) > 0:
>>  print("")
>>  
>> -hurt.sort(
>> -key=lambda k: args.after[k][m] if args.before[k][m] == 
>> 0 else float(args.after[k][m] - args.before[k][m]) / args.before[k][m])
>> -for p in hurt:
>> -namestr = p[0] + " " + p[1]
>> -print(m + " HURT:   " + get_result_string(
>> -namestr, args.before[p][m], args.after[p][m]))
>> -if len(hurt) > 0:
>> -print("")
>> +hurt.sort(
>> +key=lambda k: args.after[k][m] if args.before[k][m] == 0 
>> else float(args.after[k][m] - args.before[k][m]) / args.before[k][m])
>> +for p in hurt:
>> +namestr = p[0] + " " + p[1]
>> +print(m + " HURT:   " + get_result_string(
>> +namestr, args.before[p][m], args.after[p][m]))
>> +if len(hurt) > 0:
>> +print("")
>>  
>>  num_helped[m] = len(helped)
>>  num_hurt[m] = len(hurt)
>> @@ -153,21 +155,28 @@ def main():
>>  if len(gained) > 0:
>>  print("")
>>  
>> +any_helped_or_hurt = False
>>  for m in args.measurements:
>> -print("total {0} in shared programs: {1}\n"
>> -  "{0} in affected programs: {2}\n"
>> -  "helped: {3}\n"
>> -  "HURT: {4}\n".format(
>> -   m,
>> -   change(total_before[m], total_after[m]),
>> -   change(affected_before[m], affected_after[m]),
>> -   num_helped[m],
>> -   num_hurt[m]))
>> -
>> -
>> -print("LOST:   " + str(len(lost)))
>> -print("GAINED: " + str(len(gained)))
>> -
>> +if num_helped[m] > 0 or num_hurt[m] > 0:
>> +any_helped_or_hurt = True
>> +
>> +if num_helped[m] > 0 or num_hurt[m] > 0 or not args.changes_only:
> 
> Couldn't this be: `if any_helped_or_hurt or not args.changes_only:`

After running on that change for a few days... no.  any_helped_or_hurt
applies across all data where as num_helped[m] > 0 or num_hurt[m] > 0
applies only to a single, specific piece of data.  I don't want to log
information for loops just because instructions were changed.

>> +print("total {0} in shared programs: {1}\n"
>> +  "{0} in affected programs: {2}\n"
>> +  "helped: {3}\n"
>> +  "HURT: {4}\n".format(
>> + m,
>> + change(total_before[m], total_after[m]),
>> + change(affected_before[m], affected_after[m]),
>> + num_helped[m],
>> + num_hurt[m]))
>> +
>> +if len(lost) > 0 or len(gained) > 0 or not args.changes_only:
> 
> Don't use len() for checking if a list has members:
> `if lost or gained or not args.changes_only:`
> 
>> +print("LOST:   " + str(len(lost)))
>> +print("GAINED: " + str(len(gained)))
>> +
>> +if args.changes_only and len(lost) == 0 and len(gained) == 0 and not 
>> any_helped_or_hurt:
> 
> same here:
> `if args.changes_only and not lost and not gained and not any_helped_or_hurt`:
> 
>> +print("No changes.")
>>  
>>  if __name__ == "__main__":
>>  main()
>> -- 
>> 2.9.5
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev




signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/21] anv: Follow the rules for vkGet*ProcAddr

2018-01-22 Thread Samuel Iglesias Gonsálvez
I don't see patches 4, 8, 9 and 18 in the mailing list. Are they blocked?

Sam


On 20/01/18 20:11, Jason Ekstrand wrote:
> Our previous scheme for Get*ProcAddr was to just return what we could and
> not care about the details.  This meant that GetInstanceProcAddr returned
> all anv_ entrypoints and GetDeviceProcAddr would return the per-gen
> entrypoint and fall back to anv_.  We figured that this was a perfectly
> reasonable and Vulkan thing to do and that the loader could sort out the
> nasty details.  We were wrong.
>
> The Vulkan spec has some very specific rules about what vkGet*ProcAddr is
> supposed to do in various cases.  In particular, you're supposed to return
> NULL for any extension entrypoints which have not explicitly been enabled.
> Also, vkGetInstanceProcAddr is supposed to return entrypoints for device
> functionality even if they have to be trampoline entrypoints.  In 99% of
> case, the loader takes care of all these details for us.  However, what I
> hear from the loader people is that they can't do it all and that the
> drivers should also follow the rules.
>
> On the upside, this means that our driver, short of exposing a few symbols,
> is now a completely stand-alone Vulkan implementation and doesn't require a
> loader.
>
> Cc: Samuel Iglesias Gonsálvez 
>
> Jason Ekstrand (21):
>   anv/meson: Make anv_entrypoints_gen.py depend on anv_extensions.py
>   anv: Split anv_extensions.py into two files
>   anv/meson: Simplify some dependency and flag tracking
>   anv/extensions: Generate a header file with extension tables
>   anv: Use tables for instance extension wrangling
>   anv: Add a per-instance table of enabled extensions
>   anv: Use tables for device extension wrangling
>   anv: Add a per-device table of enabled extensions
>   anv/entrypoints: Add an Entrypoint class
>   anv/entrypoints: Add a LAYERS helper variable
>   anv/entrypoints: Split entrypoint index lookup into its own function
>   anv/entrypoints: Expose the different dispatch tables
>   anv/entrypoints: Parse entrypoints before extensions/features
>   anv/extensions: Fix VkVersion::c_vk_version for patch == None
>   anv: Properly NULL for GetInstanceProcAddr with a null instance
>   anv: Add a per-instance dispatch table
>   anv: Add a per-device dispatch table
>   anv: Only advertise enabled entrypoints
>   anv/entrypoints: Use an named tuple for params
>   anv: Return trampoline entrypoints from GetInstanceProcAddr
>   HACK: Return instance entrypoints from GetDeviceProcAddr
>
>  src/intel/Makefile.sources  |   3 +-
>  src/intel/Makefile.vulkan.am|  15 +-
>  src/intel/vulkan/anv_device.c   | 174 ++-
>  src/intel/vulkan/anv_entrypoints_gen.py | 297 
> +++-
>  src/intel/vulkan/anv_extensions.py  | 157 +
>  src/intel/vulkan/anv_extensions_gen.py  | 202 ++
>  src/intel/vulkan/anv_private.h  |  16 +-
>  src/intel/vulkan/meson.build|  50 --
>  8 files changed, 653 insertions(+), 261 deletions(-)
>  create mode 100644 src/intel/vulkan/anv_extensions_gen.py
>




signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #6664 completed

2018-01-22 Thread AppVeyor


Build mesa 6664 completed



Commit 005375717b by Matthew Nicholls on 1/19/2018 2:11 PM:

radv: restore previous stencil reference after depth-stencil clear\n\nCc: mesa-sta...@lists.freedesktop.org\nReviewed-by: Samuel Pitoiset \nReviewed-by: Alex Smith 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/21] anv: Follow the rules for vkGet*ProcAddr

2018-01-22 Thread Jason Ekstrand
I don't think they're blocked by mailman decided to give up over the
week-end so it may have gotten lost.  Here's a branch:

https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/anv-proc-addr

On Mon, Jan 22, 2018 at 1:03 AM, Samuel Iglesias Gonsálvez <
sigles...@igalia.com> wrote:

> I don't see patches 4, 8, 9 and 18 in the mailing list. Are they blocked?
>
> Sam
>
>
> On 20/01/18 20:11, Jason Ekstrand wrote:
> > Our previous scheme for Get*ProcAddr was to just return what we could and
> > not care about the details.  This meant that GetInstanceProcAddr returned
> > all anv_ entrypoints and GetDeviceProcAddr would return the per-gen
> > entrypoint and fall back to anv_.  We figured that this was a perfectly
> > reasonable and Vulkan thing to do and that the loader could sort out the
> > nasty details.  We were wrong.
> >
> > The Vulkan spec has some very specific rules about what vkGet*ProcAddr is
> > supposed to do in various cases.  In particular, you're supposed to
> return
> > NULL for any extension entrypoints which have not explicitly been
> enabled.
> > Also, vkGetInstanceProcAddr is supposed to return entrypoints for device
> > functionality even if they have to be trampoline entrypoints.  In 99% of
> > case, the loader takes care of all these details for us.  However, what I
> > hear from the loader people is that they can't do it all and that the
> > drivers should also follow the rules.
> >
> > On the upside, this means that our driver, short of exposing a few
> symbols,
> > is now a completely stand-alone Vulkan implementation and doesn't
> require a
> > loader.
> >
> > Cc: Samuel Iglesias Gonsálvez 
> >
> > Jason Ekstrand (21):
> >   anv/meson: Make anv_entrypoints_gen.py depend on anv_extensions.py
> >   anv: Split anv_extensions.py into two files
> >   anv/meson: Simplify some dependency and flag tracking
> >   anv/extensions: Generate a header file with extension tables
> >   anv: Use tables for instance extension wrangling
> >   anv: Add a per-instance table of enabled extensions
> >   anv: Use tables for device extension wrangling
> >   anv: Add a per-device table of enabled extensions
> >   anv/entrypoints: Add an Entrypoint class
> >   anv/entrypoints: Add a LAYERS helper variable
> >   anv/entrypoints: Split entrypoint index lookup into its own function
> >   anv/entrypoints: Expose the different dispatch tables
> >   anv/entrypoints: Parse entrypoints before extensions/features
> >   anv/extensions: Fix VkVersion::c_vk_version for patch == None
> >   anv: Properly NULL for GetInstanceProcAddr with a null instance
> >   anv: Add a per-instance dispatch table
> >   anv: Add a per-device dispatch table
> >   anv: Only advertise enabled entrypoints
> >   anv/entrypoints: Use an named tuple for params
> >   anv: Return trampoline entrypoints from GetInstanceProcAddr
> >   HACK: Return instance entrypoints from GetDeviceProcAddr
> >
> >  src/intel/Makefile.sources  |   3 +-
> >  src/intel/Makefile.vulkan.am|  15 +-
> >  src/intel/vulkan/anv_device.c   | 174 ++-
> >  src/intel/vulkan/anv_entrypoints_gen.py | 297
> +++-
> >  src/intel/vulkan/anv_extensions.py  | 157 +
> >  src/intel/vulkan/anv_extensions_gen.py  | 202 ++
> >  src/intel/vulkan/anv_private.h  |  16 +-
> >  src/intel/vulkan/meson.build|  50 --
> >  8 files changed, 653 insertions(+), 261 deletions(-)
> >  create mode 100644 src/intel/vulkan/anv_extensions_gen.py
> >
>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 12/24] anv/cmd_buffer: Add a mark_image_written helper

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:29PM -0800, Jason Ekstrand wrote:
> Currently, this helper does nothing but we call it every place where an
> image is written through the render pipeline.  This will allow us to
> properly mark the aux state so that we can handle resolves correctly.

I can't tell 100% if we are missing a call somewhere but all these look good:

Reviewed-by: Topi Pohjolainen 

> ---
>  src/intel/vulkan/anv_blorp.c   | 44 
> +-
>  src/intel/vulkan/anv_cmd_buffer.c  | 15 +
>  src/intel/vulkan/anv_genX.h|  8 +++
>  src/intel/vulkan/anv_private.h |  9 
>  src/intel/vulkan/genX_cmd_buffer.c | 44 
> ++
>  5 files changed, 119 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index e4e4135..05efc6d 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -283,6 +283,10 @@ void anv_CmdCopyImage(
>  get_blorp_surf_for_anv_image(cmd_buffer->device,
>   dst_image, 1UL << aspect_bit,
>   ANV_AUX_USAGE_DEFAULT, &dst_surf);
> +anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
> +  1UL << aspect_bit,
> +  dst_surf.aux_usage, dst_level,
> +  dst_base_layer, layer_count);
>  
>  for (unsigned i = 0; i < layer_count; i++) {
> blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
> @@ -298,6 +302,9 @@ void anv_CmdCopyImage(
>ANV_AUX_USAGE_DEFAULT, &src_surf);
>   get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, 
> dst_mask,
>ANV_AUX_USAGE_DEFAULT, &dst_surf);
> + anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
> +   dst_surf.aux_usage, dst_level,
> +   dst_base_layer, layer_count);
>  
>   for (unsigned i = 0; i < layer_count; i++) {
>  blorp_copy(&batch, &src_surf, src_level, src_base_layer + i,
> @@ -386,6 +393,13 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
>  buffer_row_pitch, buffer_format,
>  &buffer.surf, &buffer_isl_surf);
>  
> +  if (&image == dst) {
> + anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
> +   aspect, dst->surf.aux_usage,
> +   dst->level,
> +   dst->offset.z, extent.depth);
> +  }
> +
>for (unsigned z = 0; z < extent.depth; z++) {
>   blorp_copy(&batch, &src->surf, src->level, src->offset.z,
>  &dst->surf, dst->level, dst->offset.z,
> @@ -545,6 +559,12 @@ void anv_CmdBlitImage(
>bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
>  
>const unsigned num_layers = dst_end - dst_start;
> +  anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
> +dst_res->aspectMask,
> +dst.aux_usage,
> +dst_res->mipLevel,
> +dst_start, num_layers);
> +
>for (unsigned i = 0; i < num_layers; i++) {
>   unsigned dst_z = dst_start + i;
>   unsigned src_z = src_start + i * src_z_step;
> @@ -558,7 +578,6 @@ void anv_CmdBlitImage(
>  dst_x0, dst_y0, dst_x1, dst_y1,
>  gl_filter, flip_x, flip_y);
>}
> -
> }
>  
> blorp_batch_finish(&batch);
> @@ -818,6 +837,11 @@ void anv_CmdClearColorImage(
>  layer_count = anv_minify(image->extent.depth, level);
>   }
>  
> + anv_cmd_buffer_mark_image_written(cmd_buffer, image,
> +   pRanges[r].aspectMask,
> +   surf.aux_usage, level,
> +   base_layer, layer_count);
> +
>   blorp_clear(&batch, &surf,
>   src_format.isl_format, src_format.swizzle,
>   level, base_layer, layer_count,
> @@ -1215,6 +1239,13 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
> *cmd_buffer)
>  ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
>} else {
>   assert(image->n_planes == 1);
> + anv_cmd_buffer_mark_image_written(cmd_buffer, image,
> +   VK_IMAGE_ASPECT_COLOR_BIT,
> +   att_state->aux_usage,
> +   iview->pla

Re: [Mesa-dev] [PATCH] i965: Accept CONTEXT_ATTRIB_PRIORITY for brwCreateContext

2018-01-22 Thread Chris Wilson
Quoting Tapani Pälli (2018-01-22 08:26:15)
> Reviewed-by: Tapani Pälli 
> 
> On 01/20/2018 02:29 AM, Chris Wilson wrote:
> > The forward port of commit 6d87500fe12e ("dri: Change
> > __DriverApiRec::CreateContext to take a struct for attribs") failed to
> > adapt the set of allowed attributes for the earlier introduction of
> > context priorities (commit 1617fca6d12e "i965: Pass the EGL/DRI context
> > priority through to the kernel").
> > 
> > Fixes: 6d87500fe12e ("dri: Change __DriverApiRec::CreateContext to take a 
> > struct for attribs")
> > Signed-off-by: Chris Wilson 
> > Cc: Adam Jackson 
> > Cc: Nicolai Hähnle 
> > Cc: Emil Velikov 
> > Cc: Kenneth Graunke 

Do I need to do anything special for this patch to make it into whatever
approach branch to fix the regression?
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] st/omx/tizonia: Add --enable-omx-tizonia flag and build files

2018-01-22 Thread Gurkirpal Singh
Since patch 1 was rejected this time around for being too large I've also
sent the series directly to Leo and Julien.

On Sat, Jan 20, 2018 at 8:01 AM, Gurkirpal Singh 
wrote:

> I've added the right patches as a reply.
>
> Sry again for the inconvenience
>
> On Sat, Jan 20, 2018 at 3:38 AM, Gurkirpal Singh 
> wrote:
>
>>
>>
>> On Fri, Jan 19, 2018 at 8:30 PM, Leo Liu  wrote:
>>
>>>
>>>
>>> On 01/19/2018 04:46 AM, Julien Isorce wrote:
>>>
>>> Hi Leo,
>>>
>>> Do you have enough data to start the merge ?
>>>
>>> Hi Julien,
>>>
>>> What I can help is to have the patches pushed to upstream, and which I
>>> think anyone with write-permission account  also can do.
>>>
>>> I would like to help to push patches that are cleanly applied, and build
>>> successfully, and we can take of how functional later.
>>>
>>> @Dylan, you said:
>>> >The patches in this thread don't apply, there is some other work on you
>>> tree not
>>> >included maybe? Or I'd see about adding it (I don't think it would be
>>> too hard).
>>> >Do you have this series in a public git repo I could pull from?
>>>
>>> I have the patch from this thread applied cleanly without any problem,
>>> re-attached as your reference.
>>>
>>> $ git am *.patch
>>> Applying: st/omx/bellagio: Rename st and target directories
>>> Applying: st/omx/tizonia: Add --enable-omx-tizonia flag and build files
>>> Applying: st/omx/tizonia: Add entrypoint
>>> Applying: st/omx/tizonia: Add H.264 decoder
>>> Applying: st/omx/tizonia: Add H.264 encoder
>>> Applying: st/omx/tizonia/h264d: Add EGLImage support
>>>
>>>
>>>
>>> >For Tizonia, what I'd like to do is change the -Dgallium-omx argument
>>> to accept
>>> >['auto', 'bellagio', 'tizonia', 'disabled'] (this mirrors the glx
>>> option), with
>>> >auto trying to find bellagio and tizonia in some order and falling back
>>> to
>>> >disabled if neither is found.
>>>
>>> Can you help to add this into current patch set ?  Thanks.
>>>
>>>
>>> @Gukirpal, the patches (also re-attached) failed to build
>>>
>>
>> Ah my bad. I just realised these were slightly older version of same
>> patches that got mixed up. I'll send in the latest ones ASAP.
>>
>>>
>>>
>>> Making all in state_trackers/omx
>>> make[4]: Entering directory 'mesa/src/gallium/state_trackers/omx'
>>> Making all in bellagio
>>> make[5]: Entering directory 'mesa/src/gallium/state_tracke
>>> rs/omx/bellagio'
>>>   CC   entrypoint.lo
>>>   CC   vid_dec_mpeg12.lo
>>>   CC   vid_dec.lo
>>>   CC   vid_dec_h265.lo
>>>   CC   vid_enc.lo
>>>   CC   vid_dec_h264.lo
>>> vid_enc.c: In function ‘vid_enc_Constructor’:
>>> vid_enc.c:150:19: error: implicit declaration of function
>>> ‘omx_get_screen’; did you mean ‘os_get_option’?
>>> [-Werror=implicit-function-declaration]
>>> priv->screen = omx_get_screen("OMX_RENDER_NODE");
>>>^~
>>>os_get_option
>>> vid_enc.c:150:17: warning: assignment makes pointer from integer without
>>> a cast [-Wint-conversion]
>>> priv->screen = omx_get_screen("OMX_RENDER_NODE");
>>>  ^
>>> vid_enc.c: In function ‘vid_enc_Destructor’:
>>> vid_enc.c:276:7: error: implicit declaration of function
>>> ‘omx_put_screen’ [-Werror=implicit-function-declaration]
>>>omx_put_screen();
>>>^~
>>> cc1: some warnings being treated as errors
>>> Makefile:682: recipe for target 'vid_enc.lo' failed
>>> make[5]: *** [vid_enc.lo] Error 1
>>>
>>>
>>> Leo
>>>
>>>
>>>
>>>
>>> Thx
>>> Julien
>>>
>>> On 18 January 2018 at 02:28, Gurkirpal Singh 
>>> wrote:
>>>


 On Wed, Jan 17, 2018 at 10:39 PM, Dylan Baker 
 wrote:

> The meson build for omx-bellagio has landed along with the other media
> state
> trackers that are in tree. Currently there is a known bug with at
> least vdpau
> and va-api (I suspect that the omx, va, and xvmc are affected as well)
> which
> causes required symbols to not be exported, I'm still trying to
> resolve that.
>
> For Tizonia, what I'd like to do is change the -Dgallium-omx argument
> to accept
> ['auto', 'bellagio', 'tizonia', 'disabled'] (this mirrors the glx
> option), with
> auto trying to find bellagio and tizonia in some order and falling
> back to
> disabled if neither is found.
>
> The patches in this thread don't apply, there is some other work on
> you tree not
> included maybe? Or I'd see about adding it (I don't think it would be
> too hard).
> Do you have this series in a public git repo I could pull from?
>

 I decided not to commit incomplete work so there are only these commits
 in the repo.
 I have it on my github repo branch "refactor_stomx"
 https://github.com/gpalsingh/mesa/commits/refactor_stomx

 Cheers

>
> Dylan
>
> Quoting Leo Liu (2018-01-17 08:23:54)
> >
> >
> > On 01/17/2018 11:19 AM, Gurkirpal Singh wrote:
> >
> >
> >
> >

Re: [Mesa-dev] [PATCH] i965: Accept CONTEXT_ATTRIB_PRIORITY for brwCreateContext

2018-01-22 Thread Jason Ekstrand
On Mon, Jan 22, 2018 at 1:22 AM, Chris Wilson 
wrote:

> Quoting Tapani Pälli (2018-01-22 08:26:15)
> > Reviewed-by: Tapani Pälli 
> >
> > On 01/20/2018 02:29 AM, Chris Wilson wrote:
> > > The forward port of commit 6d87500fe12e ("dri: Change
> > > __DriverApiRec::CreateContext to take a struct for attribs") failed to
> > > adapt the set of allowed attributes for the earlier introduction of
> > > context priorities (commit 1617fca6d12e "i965: Pass the EGL/DRI context
> > > priority through to the kernel").
> > >
> > > Fixes: 6d87500fe12e ("dri: Change __DriverApiRec::CreateContext to
> take a struct for attribs")
> > > Signed-off-by: Chris Wilson 
> > > Cc: Adam Jackson 
> > > Cc: Nicolai Hähnle 
> > > Cc: Emil Velikov 
> > > Cc: Kenneth Graunke 
>
> Do I need to do anything special for this patch to make it into whatever
> approach branch to fix the regression?
>

Cc: mesa-sta...@lists.freedesktop.org


> -Chris
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 15/24] anv/cmd_buffer: Pull the undefined layout condition into the if

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:32PM -0800, Jason Ekstrand wrote:
> Now that this isn't a multi-case if and it's just the one case, it's a
> bit clearer if the condition is just part of the if instead of being
> pulled out into a boolean variable.

Reviewed-by: Topi Pohjolainen 

> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 13 -
>  1 file changed, 4 insertions(+), 9 deletions(-)
> 
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 0f56719..67d3839 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -662,20 +662,15 @@ transition_color_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
>anv_image_aux_layers(image, aspect, base_level) - 
> base_layer);
> last_level_num = base_level + level_count;
>  
> -   /* Record whether or not the layout is undefined. Pre-initialized images
> -* with auxiliary buffers have a non-linear layout and are thus undefined.
> -*/
> assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
> -   const bool undef_layout = initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
> - initial_layout == 
> VK_IMAGE_LAYOUT_PREINITIALIZED;
>  
> -   /* Do preparatory work before the resolve operation or return early if no
> -* resolve is actually needed.
> -*/
> -   if (undef_layout) {
> +   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
> +   initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
>/* A subresource in the undefined layout may have been aliased and
> * populated with any arrangement of bits. Therefore, we must 
> initialize
> * the related aux buffer and clear buffer entry with desirable values.
> +   * An initial layout of PREINITIALIZED is the same as UNDEFINED for
> +   * images with VK_IMAGE_TILING_OPTIMAL.
> *
> * Initialize the relevant clear buffer entries.
> */
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 16/24] anv/cmd_buffer: Re-arrange the logic around UNDEFINED fast-clears

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:33PM -0800, Jason Ekstrand wrote:
> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 31 ++-
>  1 file changed, 14 insertions(+), 17 deletions(-)

Reviewed-by: Topi Pohjolainen 

> 
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 67d3839..77fdadf 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -692,29 +692,26 @@ transition_color_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
> * We don't have any data to show that this is a problem, but we want 
> to
> * avoid causing difficult-to-debug problems.
> */
> -  if ((GEN_GEN >= 9 && image->samples == 1) || image->samples > 1) {
> +  if (GEN_GEN >= 9 && image->samples == 1) {
> + for (uint32_t l = 0; l < level_count; l++) {
> +const uint32_t level = base_level + l;
> +const uint32_t level_layer_count =
> +   MIN2(layer_count, anv_image_aux_layers(image, aspect, level));
> +anv_image_ccs_op(cmd_buffer, image, aspect, level,
> + base_layer, level_layer_count,
> + ISL_AUX_OP_FAST_CLEAR, false);
> + }
> +  } else if (image->samples > 1) {
>   if (image->samples == 4 || image->samples == 16) {
>  anv_perf_warn(cmd_buffer->device->instance, image,
>"Doing a potentially unnecessary fast-clear to "
>"define an MCS buffer.");
>   }
>  
> - if (image->samples == 1) {
> -for (uint32_t l = 0; l < level_count; l++) {
> -   const uint32_t level = base_level + l;
> -   const uint32_t level_layer_count =
> -  MIN2(layer_count, anv_image_aux_layers(image, aspect, 
> level));
> -   anv_image_ccs_op(cmd_buffer, image, aspect, level,
> -base_layer, level_layer_count,
> -ISL_AUX_OP_FAST_CLEAR, false);
> -}
> - } else {
> -assert(image->samples > 1);
> -assert(base_level == 0 && level_count == 1);
> -anv_image_mcs_op(cmd_buffer, image, aspect,
> - base_layer, layer_count,
> - ISL_AUX_OP_FAST_CLEAR, false);
> - }
> + assert(base_level == 0 && level_count == 1);
> + anv_image_mcs_op(cmd_buffer, image, aspect,
> +  base_layer, layer_count,
> +  ISL_AUX_OP_FAST_CLEAR, false);
>}
>/* At this point, some elements of the CCS buffer may have the 
> fast-clear
> * bit-arrangement. As the user writes to a subresource, we need to 
> have
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 17/24] anv: Use blorp_ccs_ambiguate instead of fast-clears

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:34PM -0800, Jason Ekstrand wrote:
> Even though the blorp pass looks a bit on the sketchy side, the end
> result in the Vulkan driver is very nice.  Instead of having this weird
> case where you do a fast clear and then maybe have to resolve, we just
> do the ambiguate and are done with it.  The ambiguate does exactly what
> we want of setting all the CCS values to 0 which puts it inot the
> pass-through state.
> 
> This should also improve performance a bit in certain cases.  For
> instance, if we did a transition from UNDEFINED to GENERAL for a surface
> that doesn't have CCS enabled all the time, we would end up doing a
> fast-clear and then a full resolve which ends up touching every byte in
> the main surface as well as the CCS.  With the ambiguate pass, that
> transition only touches the CCS.
> ---
>  src/intel/vulkan/anv_blorp.c   |  5 
>  src/intel/vulkan/genX_cmd_buffer.c | 54 
> +-
>  2 files changed, 17 insertions(+), 42 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 05efc6d..3698543 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1792,6 +1792,11 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
>  surf.surf->format, 
> isl_to_blorp_fast_clear_op(ccs_op));
>break;
> case ISL_AUX_OP_AMBIGUATE:
> +  for (uint32_t a = 0; a < layer_count; a++) {
> + const uint32_t layer = base_layer + a;
> + blorp_ccs_ambiguate(&batch, &surf, level, layer);
> +  }
> +  break;
> default:
>unreachable("Unsupported CCS operation");
> }
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 77fdadf..9e2eba3 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -486,15 +486,6 @@ init_fast_clear_state_entry(struct anv_cmd_buffer 
> *cmd_buffer,
> uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> enum isl_aux_usage aux_usage = image->planes[plane].aux_usage;
>  
> -   /* The resolve flag should updated to signify that fast-clear/compression
> -* data needs to be removed when leaving the undefined layout. Such data
> -* may need to be removed if it would cause accesses to the color buffer
> -* to return incorrect data. The fast clear data in CCS_D buffers should
> -* be removed because CCS_D isn't enabled all the time.
> -*/
> -   genX(set_image_needs_resolve)(cmd_buffer, image, aspect, level,
> - aux_usage == ISL_AUX_USAGE_NONE);
> -
> /* The fast clear value dword(s) will be copied into a surface state 
> object.
>  * Ensure that the restrictions of the fields in the dword(s) are 
> followed.
>  *
> @@ -677,10 +668,9 @@ transition_color_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
>for (unsigned level = base_level; level < last_level_num; level++)
>   init_fast_clear_state_entry(cmd_buffer, image, aspect, level);
>  
> -  /* Initialize the aux buffers to enable correct rendering. This 
> operation
> -   * requires up to two steps: one to rid the aux buffer of data that may
> -   * cause GPU hangs, and another to ensure that writes done without aux
> -   * will be visible to reads done with aux.
> +  /* Initialize the aux buffers to enable correct rendering.  In order to
> +   * ensure that things such as storage images work correctly, aux 
> buffers
> +   * are initialized to the pass-through state.
> *
> * Having an aux buffer with invalid data is possible for CCS buffers
> * SKL+ and for MCS buffers with certain sample counts (2x and 8x). One
> @@ -692,16 +682,18 @@ transition_color_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
> * We don't have any data to show that this is a problem, but we want 
> to
> * avoid causing difficult-to-debug problems.
> */
> -  if (GEN_GEN >= 9 && image->samples == 1) {
> +  if (image->samples == 1) {
>   for (uint32_t l = 0; l < level_count; l++) {
>  const uint32_t level = base_level + l;
>  const uint32_t level_layer_count =
> MIN2(layer_count, anv_image_aux_layers(image, aspect, level));
>  anv_image_ccs_op(cmd_buffer, image, aspect, level,
>   base_layer, level_layer_count,
> - ISL_AUX_OP_FAST_CLEAR, false);
> + ISL_AUX_OP_AMBIGUATE, false);
> +genX(set_image_needs_resolve)(cmd_buffer, image,
> +  aspect, level, false);
>   }
> -  } else if (image->samples > 1) {
> +  } else {
>   if (image->samples == 4 || image->samples == 16) {
>  anv_perf_warn(cmd_buffer->device->instance, image,
>"Doing a potentially unnec

Re: [Mesa-dev] [PATCH v2 18/24] anv/image: Simplify some verbose commennts

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:35PM -0800, Jason Ekstrand wrote:
> ---
>  src/intel/vulkan/anv_image.c | 13 +++--
>  1 file changed, 3 insertions(+), 10 deletions(-)

Reviewed-by: Topi Pohjolainen 

> 
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index e34ac95..94b9ecb 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -235,18 +235,11 @@ add_fast_clear_state_buffer(struct anv_image *image,
> assert(image->planes[plane].aux_surface.isl.size > 0 &&
>image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
>  
> -   /* The offset to the buffer of clear values must be dword-aligned for GPU
> -* memcpy operations. It is located immediately after the auxiliary 
> surface.
> -*/
> -
> -   /* Tiled images are guaranteed to be 4K aligned, so the image alignment
> -* should also be dword-aligned.
> +   /* Compressed images must be tiled and therefore everything should be 4K
> +* aligned.  The CCS has the same alignment requirements.  This is good
> +* because we need at least dword-alignment for MI_LOAD/STORE operations.
>  */
> assert(image->alignment % 4 == 0);
> -
> -   /* Auxiliary buffers should be a multiple of 4K, so the start of the clear
> -* values buffer should already be dword-aligned.
> -*/
> assert((image->planes[plane].offset + image->planes[plane].size) % 4 == 
> 0);
>  
> /* This buffer should be at the very end of the plane. */
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 19/24] anv/cmd_buffer: Move the mi_alu helper higher up

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:36PM -0800, Jason Ekstrand wrote:
> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 34 +++---
>  1 file changed, 19 insertions(+), 15 deletions(-)
> 
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 9e2eba3..15e805f 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -431,6 +431,25 @@ genX(set_image_needs_resolve)(struct anv_cmd_buffer 
> *cmd_buffer,
> }
>  }
>  
> +#if GEN_IS_HASWELL || GEN_GEN >= 8
> +static inline uint32_t
> +mi_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
> +{
> +   struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
> +  .ALUOpcode = opcode,
> +  .Operand1 = operand1,
> +  .Operand2 = operand2,
> +   };
> +
> +   uint32_t dw;
> +   GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
> +
> +   return dw;
> +}
> +#endif
> +
> +#define CS_GPR(n) (0x2600 + (n) * 8)
> +
>  static void
>  genX(load_needs_resolve_predicate)(struct anv_cmd_buffer *cmd_buffer,
> const struct anv_image *image,
> @@ -2312,21 +2331,6 @@ void genX(CmdDrawIndexed)(
>  /* MI_MATH only exists on Haswell+ */
>  #if GEN_IS_HASWELL || GEN_GEN >= 8
>  
> -static uint32_t
> -mi_alu(uint32_t opcode, uint32_t op1, uint32_t op2)
> -{
> -   struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
> -  .ALUOpcode = opcode,
> -  .Operand1 = op1,
> -  .Operand2 = op2,
> -   };
> -
> -   uint32_t dw;
> -   GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
> -
> -   return dw;
> -}
> -
>  #define CS_GPR(n) (0x2600 + (n) * 8)

This needs to be removed as well. With that:

Reviewed-by: Topi Pohjolainen 

>  
>  /* Emit dwords to multiply GPR0 by N */
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Accept CONTEXT_ATTRIB_PRIORITY for brwCreateContext

2018-01-22 Thread Chris Wilson
Quoting Jason Ekstrand (2018-01-22 09:45:55)
> On Mon, Jan 22, 2018 at 1:22 AM, Chris Wilson  
> wrote:
> 
> Quoting Tapani Pälli (2018-01-22 08:26:15)
> > Reviewed-by: Tapani Pälli 
> >
> > On 01/20/2018 02:29 AM, Chris Wilson wrote:
> > > The forward port of commit 6d87500fe12e ("dri: Change
> > > __DriverApiRec::CreateContext to take a struct for attribs") failed to
> > > adapt the set of allowed attributes for the earlier introduction of
> > > context priorities (commit 1617fca6d12e "i965: Pass the EGL/DRI 
> context
> > > priority through to the kernel").
> > >
> > > Fixes: 6d87500fe12e ("dri: Change __DriverApiRec::CreateContext to 
> take
> a struct for attribs")
> > > Signed-off-by: Chris Wilson 
> > > Cc: Adam Jackson 
> > > Cc: Nicolai Hähnle 
> > > Cc: Emil Velikov 
> > > Cc: Kenneth Graunke 
> 
> Do I need to do anything special for this patch to make it into whatever
> approach branch to fix the regression?
> 
> 
> Cc: mesa-sta...@lists.freedesktop.org

Ta, thanks for the review and pushed. Hopefully nipping it in the bud,
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Accept CONTEXT_ATTRIB_PRIORITY for brwCreateContext

2018-01-22 Thread Emil Velikov
On 22 January 2018 at 10:27, Chris Wilson  wrote:
> Quoting Jason Ekstrand (2018-01-22 09:45:55)
>> On Mon, Jan 22, 2018 at 1:22 AM, Chris Wilson  
>> wrote:
>>
>> Quoting Tapani Pälli (2018-01-22 08:26:15)
>> > Reviewed-by: Tapani Pälli 
>> >
>> > On 01/20/2018 02:29 AM, Chris Wilson wrote:
>> > > The forward port of commit 6d87500fe12e ("dri: Change
>> > > __DriverApiRec::CreateContext to take a struct for attribs") failed 
>> to
>> > > adapt the set of allowed attributes for the earlier introduction of
>> > > context priorities (commit 1617fca6d12e "i965: Pass the EGL/DRI 
>> context
>> > > priority through to the kernel").
>> > >
>> > > Fixes: 6d87500fe12e ("dri: Change __DriverApiRec::CreateContext to 
>> take
>> a struct for attribs")
>> > > Signed-off-by: Chris Wilson 
>> > > Cc: Adam Jackson 
>> > > Cc: Nicolai Hähnle 
>> > > Cc: Emil Velikov 
>> > > Cc: Kenneth Graunke 
>>
>> Do I need to do anything special for this patch to make it into whatever
>> approach branch to fix the regression?
>>
>>
>> Cc: mesa-sta...@lists.freedesktop.org
>
> Ta, thanks for the review and pushed. Hopefully nipping it in the bud,
When in doubt - follow the kernel rules for stable nominations. Modulo
tweaked ML.

Thanks for fixing this Chris.
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 1/2] r600/sb: Set flags for GROUP_BARRIER instruction and force it into slot X

2018-01-22 Thread Nicolai Hähnle

On 11.01.2018 12:22, Ilia Mirkin wrote:

On Thu, Jan 11, 2018 at 3:51 AM, Gert Wollny  wrote:

Am Donnerstag, den 11.01.2018, 08:09 +0100 schrieb Gert Wollny:

Am Mittwoch, den 10.01.2018, 15:27 -0500 schrieb Ilia Mirkin:
[...]


If your hardware executes all the vertices in parallel, then a
barrier should be unnecessary.


My first try for this patch did not include forcing the barrier into
slot x, which in turn resulted in failing piglits, e.g.

tcs-output-array-float-index-rd-after-barrier

and I'm quite confident that the LDS r/w order was not broken - also
because just forcing the barrier into slot x fixed it, so I guess not
all vertices are always processed in parallel, which is also no
surprise since the number of shader units attributed to the TCS stage
is limited.

Dropping the barrier altogether doesn't have an impact on the
tesselation/barrier piglits, but I still wonder what happens if there
are more vertices to be processed than there are shader units so that
they can not be processes in parallel ...


The max vertices in a patch is 32, so that is generally within a
hardware's ability to do in a single wave. If it's not, then you
definitely need a barrier.


At least on GCN, tessellation control shaders run multiple waves 
together in a workgroup, and if the number of invocations per patch 
doesn't cleanly divide 64 (the wave size), the patch will cross a wave 
boundary. Not sure if that applies to pre-GCN chips as well, but it 
could contribute to explaining some of the perceived weirdness.


Cheers,
Nicolai



   -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] clover: add functions up to 2.2 to ICD dispatch table

2018-01-22 Thread Karol Herbst
using void* for functions we need newer CL headers for.

Signed-off-by: Karol Herbst 
---
 src/gallium/state_trackers/clover/api/dispatch.cpp |  29 +-
 src/gallium/state_trackers/clover/api/dispatch.hpp | 116 +
 2 files changed, 144 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/dispatch.cpp 
b/src/gallium/state_trackers/clover/api/dispatch.cpp
index 8f4cfdc7fb..f362c84819 100644
--- a/src/gallium/state_trackers/clover/api/dispatch.cpp
+++ b/src/gallium/state_trackers/clover/api/dispatch.cpp
@@ -142,6 +142,33 @@ namespace clover {
   NULL, // clEnqueueReleaseD3D11ObjectsKHR
   NULL, // clGetDeviceIDsFromDX9MediaAdapterKHR
   NULL, // clEnqueueAcquireDX9MediaSurfacesKHR
-  NULL // clEnqueueReleaseDX9MediaSurfacesKHR
+  NULL, // clEnqueueReleaseDX9MediaSurfacesKHR
+  NULL, // clCreateFromEGLImageKHR
+  NULL, // clEnqueueAcquireEGLObjectsKHR
+  NULL, // clEnqueueReleaseEGLObjectsKHR
+  NULL, // clCreateEventFromEGLSyncKHR
+  NULL, // clCreateCommandQueueWithProperties
+  NULL, // clCreatePipe
+  NULL, // clGetPipeInfo
+  NULL, // clSVMAlloc
+  NULL, // clSVMFree
+  NULL, // clEnqueueSVMFree
+  NULL, // clEnqueueSVMMemcpy
+  NULL, // clEnqueueSVMMemFill
+  NULL, // clEnqueueSVMMap
+  NULL, // clEnqueueSVMUnmap
+  NULL, // clCreateSamplerWithProperties
+  NULL, // clSetKernelArgSVMPointer
+  NULL, // clSetKernelExecInfo
+  NULL, // clGetKernelSubGroupInfoKHR
+  NULL, // clCloneKernel
+  NULL, // clCreateProgramWithIL,
+  NULL, // clEnqueueSVMMigrateMem
+  NULL, // clGetDeviceAndHostTimer
+  NULL, // clGetHostTimer
+  NULL, // clGetKernelSubGroupInfo
+  NULL, // clSetDefaultDeviceCommandQueue
+  NULL, // clSetProgramReleaseCallback
+  NULL, // clSetProgramSpecializationConstant
};
 }
diff --git a/src/gallium/state_trackers/clover/api/dispatch.hpp 
b/src/gallium/state_trackers/clover/api/dispatch.hpp
index 0ec1b51fa6..0910e19422 100644
--- a/src/gallium/state_trackers/clover/api/dispatch.hpp
+++ b/src/gallium/state_trackers/clover/api/dispatch.hpp
@@ -765,6 +765,122 @@ struct _cl_icd_dispatch {
void *clGetDeviceIDsFromDX9MediaAdapterKHR;
void *clEnqueueAcquireDX9MediaSurfacesKHR;
void *clEnqueueReleaseDX9MediaSurfacesKHR;
+   void *clCreateFromEGLImageKHR;
+   void *clEnqueueAcquireEGLObjectsKHR;
+   void *clEnqueueReleaseEGLObjectsKHR;
+   void *clCreateEventFromEGLSyncKHR;
+   void *clCreateCommandQueueWithProperties;
+   void *clCreatePipe;
+   void *clGetPipeInfo;
+   void *clSVMAlloc;
+
+   CL_API_ENTRY void (CL_API_CALL *clSVMFree)(
+  cl_context context,
+  void *svm_pointer);
+
+   CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueSVMFree)(
+  cl_command_queue command_queue,
+  cl_uint num_svm_pointers,
+  void **svm_pointers,
+  void (CL_CALLBACK *pfn_free_func)(cl_command_queue, cl_uint, void **, 
void *),
+  void *user_data,
+  cl_uint num_events_in_wait_list,
+  const cl_event *event_wait_list,
+  cl_event *event);
+
+   CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(
+  cl_command_queue command_queue,
+  cl_bool blocking_copy,
+  void *dst_ptr,
+  const void *src_ptr,
+  size_t size,
+  cl_uint num_events_in_wait_list,
+  const cl_event *event_wait_list,
+  cl_event *event);
+
+   CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueSVMMemFill)(
+  cl_command_queue command_queue,
+  void *svm_ptr,
+  const void *pattern,
+  size_t pattern_size,
+  size_t size,
+  cl_uint num_events_in_wait_list,
+  const cl_event *event_wait_list,
+  cl_event *event);
+
+   CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueSVMMap)(
+  cl_command_queue command_queue,
+  cl_bool blocking_map,
+  cl_map_flags map_flags,
+  void *svm_ptr,
+  size_t size,
+  cl_uint num_events_in_wait_list,
+  const cl_event *event_wait_list,
+  cl_event *event);
+
+   CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueSVMUnmap)(
+  cl_command_queue command_queue,
+  void *svm_ptr,
+  cl_uint num_events_in_wait_list,
+  const cl_event *event_wait_list,
+  cl_event *event);
+
+   void *clCreateSamplerWithProperties;
+
+   CL_API_ENTRY cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(
+  cl_kernel kernel,
+  cl_uint arg_index,
+  const void *arg_value);
+
+   void *clSetKernelExecInfo;
+   void *clGetKernelSubGroupInfoKHR;
+
+   CL_API_ENTRY cl_kernel (CL_API_CALL *clCloneKernel)(
+  cl_kernel source_kernel,
+  cl_int *errcode_ret);
+
+   CL_API_ENTRY cl_program (CL_API_CALL *clCreateProgramWithIL)(
+  cl_context context,
+  const void *il,
+  size_t length,
+  cl_int *errcode_ret);
+
+   CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueSVMMigrateMem)(
+  cl_command_queue command_queue,
+  cl_uint num_svm_pointers,
+  const void **svm_pointers,
+  const size_t *sizes,
+ 

Re: [Mesa-dev] [PATCH v2 1/2] ac/nir: Use correct 32-bit component writemask for 64-bit SSBO stores.

2018-01-22 Thread Nicolai Hähnle

On 18.01.2018 00:48, Bas Nieuwenhuizen wrote:

Fixes: 91074bb11bda "radv/ac: Implement Float64 SSBO stores."
---
  src/amd/common/ac_nir_to_llvm.c | 22 +-
  1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 337dfdb5ec..12f7772a5c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2408,6 +2408,16 @@ static LLVMValueRef visit_get_buffer_size(struct 
ac_nir_context *ctx,
  
  	return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);

  }
+
+static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
+{
+   uint32_t new_mask = 0;
+   for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
+   if (mask & (1u << i))
+   new_mask |= ((1u << multiplier) - 1u) << (i * 
multiplier);


I think using util_bitscan for the loop would be cleaner.

Apart from that, both patches:

Reviewed-by: Nicolai Hähnle 



+   return new_mask;
+}
+
  static void visit_store_ssbo(struct ac_nir_context *ctx,
   nir_intrinsic_instr *instr)
  {
@@ -2429,6 +2439,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
if (components_32bit > 1)
data_type = LLVMVectorType(ctx->ac.f32, components_32bit);
  
+	writemask = widen_mask(writemask, elem_size_mult);

+
base_data = ac_to_float(&ctx->ac, src_data);
base_data = trim_vector(&ctx->ac, base_data, instr->num_components);
base_data = LLVMBuildBitCast(ctx->ac.builder, base_data,
@@ -2448,9 +2460,6 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
count = 2;
}
  
-		start *= elem_size_mult;

-   count *= elem_size_mult;
-
if (count > 4) {
writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
count = 4;
@@ -3261,17 +3270,12 @@ visit_store_var(struct ac_nir_context *ctx,
 NULL, NULL, &const_index, &indir_index);
  
  	if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {

-   int old_writemask = writemask;
  
  		src = LLVMBuildBitCast(ctx->ac.builder, src,

   LLVMVectorType(ctx->ac.f32, 
ac_get_llvm_num_components(src) * 2),
   "");
  
-		writemask = 0;

-   for (unsigned chan = 0; chan < 4; chan++) {
-   if (old_writemask & (1 << chan))
-   writemask |= 3u << (2 * chan);
-   }
+   writemask = widen_mask(writemask, 2);
}
  
  	switch (instr->variables[0]->var->data.mode) {





--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac: fix visit_ssa_undef() for doubles

2018-01-22 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 18.01.2018 02:03, Timothy Arceri wrote:

Fixes: f4e499ec7914 "radv: add initial non-conformant radv vulkan driver"
---
  src/amd/common/ac_nir_to_llvm.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5d582cb0ed..43486fdd6c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -5052,12 +5052,13 @@ static void visit_ssa_undef(struct ac_nir_context *ctx,
const nir_ssa_undef_instr *instr)
  {
unsigned num_components = instr->def.num_components;
+   LLVMTypeRef type = instr->def.bit_size == 32 ? ctx->ac.i32 : 
ctx->ac.i64;
LLVMValueRef undef;
  
  	if (num_components == 1)

-   undef = LLVMGetUndef(ctx->ac.i32);
+   undef = LLVMGetUndef(type);
else {
-   undef = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, 
num_components));
+   undef = LLVMGetUndef(LLVMVectorType(type, num_components));
}
_mesa_hash_table_insert(ctx->defs, &instr->def, undef);
  }




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3 3/3] anv: implement VK_EXT_global_priority extension

2018-01-22 Thread Tapani Pälli
v2: add ANV_CONTEXT_REALTIME_PRIORITY (Chris)
use unreachable with unknown priority (Samuel)

v3: add stubs in gem_stubs.c (Emil)
use priority defines from gen_defines.h

Signed-off-by: Tapani Pälli 
Reviewed-by: Samuel Iglesias Gonsálvez  (v2)
Reviewed-by: Chris Wilson  (v2)
---
 src/intel/vulkan/anv_device.c  | 25 +++
 src/intel/vulkan/anv_extensions.py |  2 ++
 src/intel/vulkan/anv_gem.c | 51 ++
 src/intel/vulkan/anv_gem_stubs.c   | 10 
 src/intel/vulkan/anv_private.h |  3 +++
 5 files changed, 91 insertions(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 777abd8757..42ebc19f2b 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -369,6 +369,9 @@ anv_physical_device_init(struct anv_physical_device *device,
device->has_syncobj_wait = device->has_syncobj &&
   anv_gem_supports_syncobj_wait(fd);
 
+   if (anv_gem_has_context_priority(fd))
+  device->has_context_priority = true;
+
bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
 
/* Starting with Gen10, the timestamp frequency of the command streamer may
@@ -1205,6 +1208,15 @@ VkResult anv_CreateDevice(
   }
}
 
+   /* Check if client specified queue priority. */
+   const VkDeviceQueueGlobalPriorityCreateInfoEXT *queue_priority =
+  vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
+   DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
+
+   VkQueueGlobalPriorityEXT priority =
+  queue_priority ? queue_priority->globalPriority :
+ VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT;
+
device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
sizeof(*device), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
@@ -1234,6 +1246,19 @@ VkResult anv_CreateDevice(
   goto fail_fd;
}
 
+   /* As per spec, the driver implementation may deny requests to acquire
+* a priority above the default priority (MEDIUM) if the caller does not
+* have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
+* is returned.
+*/
+   if (physical_device->has_context_priority) {
+  int err = anv_gem_set_context_priority(device, priority);
+  if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT) {
+ result = vk_error(VK_ERROR_NOT_PERMITTED_EXT);
+ goto fail_fd;
+  }
+   }
+
device->info = physical_device->info;
device->isl_dev = physical_device->isl_dev;
 
diff --git a/src/intel/vulkan/anv_extensions.py 
b/src/intel/vulkan/anv_extensions.py
index adfebca985..aacf39248f 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -86,6 +86,8 @@ EXTENSIONS = [
 Extension('VK_KHX_multiview', 1, True),
 Extension('VK_EXT_debug_report',  8, True),
 Extension('VK_EXT_external_memory_dma_buf',   1, True),
+Extension('VK_EXT_global_priority',   1,
+  'device->has_context_priority'),
 ]
 
 class VkVersion:
diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
index 34c0989108..7f83820429 100644
--- a/src/intel/vulkan/anv_gem.c
+++ b/src/intel/vulkan/anv_gem.c
@@ -30,6 +30,7 @@
 #include 
 
 #include "anv_private.h"
+#include "common/gen_defines.h"
 
 static int
 anv_ioctl(int fd, unsigned long request, void *arg)
@@ -302,6 +303,56 @@ close_and_return:
return swizzled;
 }
 
+static int
+vk_priority_to_anv(int priority)
+{
+   switch (priority) {
+   case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
+  return GEN_CONTEXT_LOW_PRIORITY;
+   case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
+  return GEN_CONTEXT_MEDIUM_PRIORITY;
+   case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
+  return GEN_CONTEXT_HIGH_PRIORITY;
+   case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
+  return GEN_CONTEXT_REALTIME_PRIORITY;
+   default:
+  unreachable("Invalid priority");
+   }
+}
+
+static int
+_anv_gem_set_context_priority(int fd,
+  int context_id,
+  int priority)
+{
+   struct drm_i915_gem_context_param p = {
+  .ctx_id = context_id,
+  .param = I915_CONTEXT_PARAM_PRIORITY,
+  .value = vk_priority_to_anv(priority),
+   };
+   int err = 0;
+
+   if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
+  err = -errno;
+
+   return err;
+}
+
+int
+anv_gem_set_context_priority(struct anv_device *device,
+ int priority)
+{
+   return _anv_gem_set_context_priority(device->fd, device->context_id,
+priority);
+}
+
+bool
+anv_gem_has_context_priority(int fd)
+{
+   return !_anv_gem_set_context_priority(fd, 0,
+ VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT);
+}
+
 int
 anv_gem_create_context(struct anv_device *device)
 {
diff --git a/src/intel/v

[Mesa-dev] [PATCH 1/3] intel: add new common header gen_defines.h

2018-01-22 Thread Tapani Pälli
Signed-off-by: Tapani Pälli 
---
 src/intel/common/gen_defines.h | 52 ++
 1 file changed, 52 insertions(+)
 create mode 100644 src/intel/common/gen_defines.h

diff --git a/src/intel/common/gen_defines.h b/src/intel/common/gen_defines.h
new file mode 100644
index 00..f8daa84431
--- /dev/null
+++ b/src/intel/common/gen_defines.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef GEN_DEFINES_H
+#define GEN_DEFINES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * \file gen_defines.h
+ *
+ * Common defines we want to share between GL And Vulkan.
+ */
+
+#define GEN_CONTEXT_LOW_PRIORITY ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
+#define GEN_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
+#define GEN_CONTEXT_HIGH_PRIORITY ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2)
+/* We don't have a strict notion of RT (yet, and when we do it is likely
+ * to be more complicated than a mere priority value!), but we can give
+ * it the absolute most priority available to us. By convention, this
+ * is higher than any other client, except for blocked interactive
+ * clients.
+ */
+#define GEN_CONTEXT_REALTIME_PRIORITY I915_CONTEXT_MAX_USER_PRIORITY
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* GEN_DEFINES_H */
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] i965: use context priority definitions from gen_defines.h

2018-01-22 Thread Tapani Pälli
Signed-off-by: Tapani Pälli 
---
 src/mesa/drivers/dri/i965/brw_bufmgr.h   | 4 
 src/mesa/drivers/dri/i965/brw_context.c  | 8 +---
 src/mesa/drivers/dri/i965/intel_screen.c | 8 +---
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index a3745d6667..48c8407f87 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -323,10 +323,6 @@ int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
 
 uint32_t brw_create_hw_context(struct brw_bufmgr *bufmgr);
 
-#define BRW_CONTEXT_LOW_PRIORITY ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
-#define BRW_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
-#define BRW_CONTEXT_HIGH_PRIORITY ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2)
-
 int brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
 uint32_t ctx_id,
 int priority);
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index b830e71247..f0d036f938 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -75,6 +75,8 @@
 #include "util/debug.h"
 #include "isl/isl.h"
 
+#include "common/gen_defines.h"
+
 /***
  * Mesa's Driver Functions
  ***/
@@ -979,14 +981,14 @@ brwCreateContext(gl_api api,
  return false;
   }
 
-  int hw_priority = BRW_CONTEXT_MEDIUM_PRIORITY;
+  int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
   if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
  switch (ctx_config->priority) {
  case __DRI_CTX_PRIORITY_LOW:
-hw_priority = BRW_CONTEXT_LOW_PRIORITY;
+hw_priority = GEN_CONTEXT_LOW_PRIORITY;
 break;
  case __DRI_CTX_PRIORITY_HIGH:
-hw_priority = BRW_CONTEXT_HIGH_PRIORITY;
+hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
 break;
  }
   }
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index a4e34e9f2c..385b64ad94 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -43,6 +43,8 @@
 #include "utils.h"
 #include "util/xmlpool.h"
 
+#include "common/gen_defines.h"
+
 static const __DRIconfigOptionsExtension brw_config_options = {
.base = { __DRI_CONFIG_OPTIONS, 1 },
.xml =
@@ -1452,14 +1454,14 @@ brw_query_renderer_integer(__DRIscreen *dri_screen,
case __DRI2_RENDERER_HAS_CONTEXT_PRIORITY:
   value[0] = 0;
   if (brw_hw_context_set_priority(screen->bufmgr,
- 0, BRW_CONTEXT_HIGH_PRIORITY) == 0)
+ 0, GEN_CONTEXT_HIGH_PRIORITY) == 0)
  value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_HIGH;
   if (brw_hw_context_set_priority(screen->bufmgr,
- 0, BRW_CONTEXT_LOW_PRIORITY) == 0)
+ 0, GEN_CONTEXT_LOW_PRIORITY) == 0)
  value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_LOW;
   /* reset to default last, just in case */
   if (brw_hw_context_set_priority(screen->bufmgr,
- 0, BRW_CONTEXT_MEDIUM_PRIORITY) == 0)
+ 0, GEN_CONTEXT_MEDIUM_PRIORITY) == 0)
  value[0] |= __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_MEDIUM;
   return 0;
case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB:
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [GSOC] DriConf Replacement

2018-01-22 Thread Nicolai Hähnle

Hi Jean,

thanks for the interest :)

On 17.01.2018 21:50, Jean Hertel wrote:

Hello Michel,
Hello Nicolai,

I have spent some more time reading the Mesa source code and trying to figure 
out how the prime configuration works.
If i understood correctly the driconf XML format should have an option inside each 
application indicating if PRIME should be used. This option is named 
"device_id" and defines the device which this application should run.


Yes. The format is implicitly defined by loader_get_user_preferred_fd in 
src/loader/loader.c:


1: Just pick any render device which is non-default
pci-_XX_XX_X: The device given by its PCI bus address



So when Mesa loads the configuration, if the application has the "device_id" it 
will run under this driver. Am I correct?
If this is the case, then to correctly support PRIME the expected behavior of 
the configuration GUI should be:
1 - Find out if this is a PRIME setup
2 - If its PRIME, give the user an option to select on which device this 
application should run.

Some more questions that I still have to understand:
- How do we detect at user-land that we are under a PRIME setup?


First of all, I'd say the best way to enumerate devices is to use 
libdrm's drmGetDevices2. Roughly speaking, you have a PRIME setup if you 
have at least two devices.


This function fills in data structures that include the pci bus info 
required to build the pci-_* string above.




- I suppose there is some way to query the device_id directly from Mesa (maybe 
using Xlib). Any idea which API can be called for this?


I take it the real question here on top of the above is, how do you find 
out which device is being used by default?


Assuming you're working under X, you can use the GLX_MESA_query_renderer 
extension to query the PCI vendor and device ID, and then search for 
those in the device list returned by drmGetDevices2. This will work for 
all PRIME laptops.


Where this fails is if somebody builds a system with two identical GPUs. 
I admit I can't think of an overly easy solution to this right now. What 
you could do is go to the level of libxcb-dri3 and basically copy what 
loader_dri3_open does to open the DRM fd. With that in hand, you can 
then use drmGetDevice2 to get the the details you need. I have to say 
I'm not entirely sure whether that will work if the UI toolkit has 
already loaded OpenGL.


Neither of these work with native Wayland -- you can either rely on 
Xwayland or ask somebody who knows more about Wayland :)


If none of this works, we could always consider adding an OpenGL 
extension to return the relevant information. It *is* a bit silly that 
you can have an OpenGL context in your hand and not be able to tell 
which device it lives on...




- Having the device_id, how can I get additional data for this device (for 
example the vendor name and the full hardware description). I assume there is 
already an API that makes this possible, as the actual driconf is capable of 
identifying the hardware correctly.


Maybe look at what the existing driconf does? :)

More seriously though, here are some ways to get more information:

- Query the PCI IDs database; on Ubuntu that's part of the pciutils 
package. I don't know what the distro-portable way to do that is though.


- Create an OpenGL context on the device, by opening the DRM device file 
directly and then using libgbm + EGL with the GBM platform. From there 
you can query stuff like OpenGL version, extensions, and obviously the 
driver's vendor and renderer strings.




Sorry If I'm making too much questions, but I'm really interested in understand 
how this work.


Not at all, happy to help :)

Cheers,
Nicolai

P.S.: I tried to build your tool, but it needs some fairly recent 
versions of a bunch of dependencies. I'll give it a shot again after 
updating my systems.





Kind Regards,
Jean Hertel


De: Jean Hertel 
Enviado: domingo, 7 de janeiro de 2018 20:10
Para: Michel Dänzer; Nicolai Hähnle
Cc: mesa-dev@lists.freedesktop.org
Assunto: Re: [Mesa-dev] [GSOC] DriConf Replacement
   


Hello Michael,

Finally I found some free time to spent on this.
Can you please give me a small example on how I can get the device_ids ?

Currently to retrieve the available driver options I'm doing something like 
this:

1 - Count the number of screens using "ScreenCount" function from Xlib.
2 - For each screen retrieve the driver name with "glXGetScreenDriver".
3 - For each driver retrieve the available options with "glXGetDriverConfig".

Please note that my knowledge about openGL, x11 and mesa is almost zero, so if 
you have any documentation that I can read it would be nice too.

Kind Regards,
Jean Hertel

-
On 10/04/17 06:02 AM, Michel Dänzer wrote:
   

On 05/04/17 05:26 PM, Nicolai Hähnle wrote:

On 04.04.2017 01:52, Jean Hertel wrote:

2c) Consider adding an option to configure PRIME to driconf.

[2b and 2c will also require changes in Mesa; also, you may want to get
rid of the implicit 

Re: [Mesa-dev] [PATCH 2/3] i965: use context priority definitions from gen_defines.h

2018-01-22 Thread Chris Wilson
Quoting Tapani Pälli (2018-01-22 12:29:31)
> Signed-off-by: Tapani Pälli 

1&2 Reviewed-by: Chris Wilson 
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac: add support for gl_HelperInvocation

2018-01-22 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 


On 19.01.2018 00:05, Timothy Arceri wrote:

---
  src/amd/common/ac_nir_to_llvm.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index cf0b3d998c..4f240db4ac 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3914,6 +3914,17 @@ static void emit_discard(struct ac_nir_context *ctx,
ac_build_kill_if_false(&ctx->ac, cond);
  }
  
+static LLVMValueRef

+visit_load_helper_invocation(struct ac_nir_context *ctx)
+{
+   LLVMValueRef result = ac_build_intrinsic(&ctx->ac,
+"llvm.amdgcn.ps.live",
+ctx->ac.i1, NULL, 0,
+AC_FUNC_ATTR_READNONE);
+   result = LLVMBuildNot(ctx->ac.builder, result, "");
+   return LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
+}
+
  static LLVMValueRef
  visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
  {
@@ -4316,6 +4327,9 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_load_front_face:
result = ctx->abi->front_face;
break;
+   case nir_intrinsic_load_helper_invocation:
+   result = visit_load_helper_invocation(ctx);
+   break;
case nir_intrinsic_load_instance_id:
result = ctx->abi->instance_id;
break;




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] ac: add stream handling to visit_end_primitive()

2018-01-22 Thread Nicolai Hähnle

Why hasn't this been noticed before? Does Vulkan not have multiple streams?

Anyway, series is:

Reviewed-by: Nicolai Hähnle 



On 18.01.2018 23:26, Timothy Arceri wrote:

---
  src/amd/common/ac_nir_to_llvm.c | 7 +++
  1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 12353943a5..9709514532 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4206,10 +4206,9 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned 
stream, LLVMValueRef *addr
  }
  
  static void

-visit_end_primitive(struct nir_to_llvm_context *ctx,
-   const nir_intrinsic_instr *instr)
+visit_end_primitive(struct nir_to_llvm_context *ctx, unsigned stream)
  {
-   ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (0 << 
8), ctx->gs_wave_id);
+   ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream 
<< 8), ctx->gs_wave_id);
  }
  
  static LLVMValueRef

@@ -4420,7 +4419,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
break;
case nir_intrinsic_end_primitive:
-   visit_end_primitive(ctx->nctx, instr);
+   visit_end_primitive(ctx->nctx, nir_intrinsic_stream_id(instr));
break;
case nir_intrinsic_load_tess_coord: {
LLVMTypeRef type = ctx->nctx ?




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir: add ARB_shader_group_vote support

2018-01-22 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 


On 22.01.2018 00:52, Timothy Arceri wrote:

---
  src/amd/common/ac_nir_to_llvm.c | 15 +++
  1 file changed, 15 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ac3a949bd3..eabdc83f16 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4454,6 +4454,21 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_load_patch_vertices_in:
result = ctx->abi->load_patch_vertices_in(ctx->abi);
break;
+   case nir_intrinsic_vote_all: {
+   LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, 
instr->src[0]));
+   result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+   break;
+   }
+   case nir_intrinsic_vote_any: {
+   LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, 
instr->src[0]));
+   result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+   break;
+   }
+   case nir_intrinsic_vote_eq: {
+   LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, get_src(ctx, 
instr->src[0]));
+   result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+   break;
+   }
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac/nir/radeonsi: add ARB_shader_ballot support

2018-01-22 Thread Nicolai Hähnle

On 22.01.2018 02:54, Timothy Arceri wrote:

---
  src/amd/common/ac_nir_to_llvm.c  | 38 
  src/gallium/drivers/radeonsi/si_shader_nir.c |  9 +++
  2 files changed, 47 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index eabdc83f16..2c9b85bf82 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4260,6 +4260,44 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
LLVMValueRef result = NULL;
  
  	switch (instr->intrinsic) {

+   case nir_intrinsic_ballot:
+   result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
+   break;
+   case nir_intrinsic_read_invocation:
+   case nir_intrinsic_read_first_invocation: {
+   const char *intr_names[2] = {"llvm.amdgcn.readlane", 
"llvm.amdgcn.readfirstlane"};
+   LLVMValueRef args[2];
+
+   /* Value */
+   args[0] = get_src(ctx, instr->src[0]);
+
+   unsigned num_args;
+   const char *intr_name;
+   if (instr->intrinsic == nir_intrinsic_read_invocation) {
+   num_args = 2;
+   intr_name = intr_names[0];


Please just inline the intrinsic names here.

With that fixed:

Reviewed-by: Nicolai Hähnle 



+
+   /* Invocation */
+   args[1] = get_src(ctx, instr->src[1]);
+   } else {
+   num_args = 1;
+   intr_name = intr_names[1];
+   }
+
+   /* We currently have no other way to prevent LLVM from lifting 
the icmp
+* calls to a dominating basic block.
+*/
+   ac_build_optimization_barrier(&ctx->ac, &args[0]);
+
+   result = ac_build_intrinsic(&ctx->ac, intr_name,
+   ctx->ac.i32, args, num_args,
+   AC_FUNC_ATTR_READNONE |
+   AC_FUNC_ATTR_CONVERGENT);
+   break;
+   }
+   case nir_intrinsic_load_subgroup_invocation:
+   result = ac_get_thread_id(&ctx->ac);
+   break;
case nir_intrinsic_load_work_group_id: {
LLVMValueRef values[3];
  
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c

index 2f19bae813..5b941da949 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -566,6 +566,15 @@ si_lower_nir(struct si_shader_selector* sel)
};
NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
  
+	const nir_lower_subgroups_options subgroups_options = {

+   .subgroup_size = 64,
+   .ballot_bit_size = 32,
+   .lower_to_scalar = true,
+   .lower_subgroup_masks = true,
+   .lower_vote_trivial = false,
+   };
+   NIR_PASS_V(sel->nir, nir_lower_subgroups, &subgroups_options);
+
bool progress;
do {
progress = false;




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi/nir: fix fs output index

2018-01-22 Thread Nicolai Hähnle

On 22.01.2018 04:57, Timothy Arceri wrote:

Fixes the following piglit tests:

arb_blend_func_extended-fbo-extended-blend
arb_blend_func_extended-fbo-extended-blend-explicit
arb_blend_func_extended-fbo-extended-blend-explicit_gles3
arb_blend_func_extended-fbo-extended-blend-pattern
arb_blend_func_extended-fbo-extended-blend-pattern_gles2
arb_blend_func_extended-fbo-extended-blend-pattern_gles3
arb_blend_func_extended-fbo-extended-blend_gles3
ext_framebuffer_multisample/alpha-to-coverage-dual-src-blend
ext_framebuffer_multisample/alpha-to-one-dual-src-blend
---
  src/gallium/drivers/radeonsi/si_shader_nir.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 5b941da949..81cf503d60 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -378,6 +378,10 @@ void si_nir_scan_shader(const struct nir_shader *nir,
if (nir->info.stage == MESA_SHADER_FRAGMENT) {

tgsi_get_gl_frag_result_semantic(variable->data.location,
&semantic_name, &semantic_index);
+
+   if (variable->data.index > 0) {
+   semantic_index++;


Just to understand this correctly, variable->data.index == 1 indicates 
the second source in a dual source blend, and index values > 1 do not 
occur, right?


Could you please add a comment /* Adjust for dual source blending */ or 
similar?


With that:

Reviewed-by: Nicolai Hähnle 



+   }
} else {
tgsi_get_gl_varying_semantic(variable->data.location, 
true,
 &semantic_name, 
&semantic_index);




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] dri: add interface for EGL_ANDROID_blob_cache extension

2018-01-22 Thread Tapani Pälli
Signed-off-by: Tapani Pälli 
---
 include/GL/internal/dri_interface.h | 26 +-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/include/GL/internal/dri_interface.h 
b/include/GL/internal/dri_interface.h
index 34a5c9fb01..de367d8f77 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -82,7 +82,7 @@ typedef struct __DRI2flushExtensionRec
__DRI2flushExtension;
 typedef struct __DRI2throttleExtensionRec  __DRI2throttleExtension;
 typedef struct __DRI2fenceExtensionRec  __DRI2fenceExtension;
 typedef struct __DRI2interopExtensionRec   __DRI2interopExtension;
-
+typedef struct __DRI2blobExtensionRec   __DRI2blobExtension;
 
 typedef struct __DRIimageLoaderExtensionRec __DRIimageLoaderExtension;
 typedef struct __DRIimageDriverExtensionRec __DRIimageDriverExtension;
@@ -336,6 +336,30 @@ struct __DRI2throttleExtensionRec {
enum __DRI2throttleReason reason);
 };
 
+/**
+ * Extension for EGL_ANDROID_blob_cache
+ */
+
+#define __DRI2_BLOB "DRI2_Blob"
+#define __DRI2_BLOB_VERSION 1
+
+typedef void
+(*__DRIblobCacheSet) (const void *key, signed long keySize,
+  const void *value, signed long valueSize);
+
+typedef signed long
+(*__DRIblobCacheGet) (const void *key, signed long keySize,
+  void *value, signed long valueSize);
+
+struct __DRI2blobExtensionRec {
+   __DRIextension base;
+
+   /**
+* Set cache functions for setting and getting cache entries.
+*/
+   void (*set_cache_funcs) (__DRIcontext *ctx,
+__DRIblobCacheSet set, __DRIblobCacheGet get);
+};
 
 /**
  * Extension for fences / synchronization objects.
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/5] egl: add support for EGL_ANDROID_blob_cache

2018-01-22 Thread Tapani Pälli
v2: cleanup, move callbacks to _egl_display struct (Emil Velikov)

Signed-off-by: Tapani Pälli 
---
 src/egl/drivers/dri2/egl_dri2.c | 39 +++
 src/egl/drivers/dri2/egl_dri2.h |  1 +
 src/egl/main/eglapi.c   | 30 ++
 src/egl/main/eglapi.h   |  4 
 src/egl/main/egldisplay.h   |  4 
 src/egl/main/eglentrypoint.h|  1 +
 6 files changed, 79 insertions(+)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index d5a4f72e86..c323a4989d 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -458,6 +458,7 @@ static const struct dri2_extension_match 
optional_core_extensions[] = {
{ __DRI2_INTEROP, 1, offsetof(struct dri2_egl_display, interop) },
{ __DRI_IMAGE, 1, offsetof(struct dri2_egl_display, image) },
{ __DRI2_FLUSH_CONTROL, 1, offsetof(struct dri2_egl_display, flush_control) 
},
+   { __DRI2_BLOB, 1, offsetof(struct dri2_egl_display, blob) },
{ NULL, 0, 0 }
 };
 
@@ -727,6 +728,9 @@ dri2_setup_screen(_EGLDisplay *disp)
   }
}
 
+   if (dri2_dpy->blob)
+  disp->Extensions.ANDROID_blob_cache = EGL_TRUE;
+
disp->Extensions.KHR_reusable_sync = EGL_TRUE;
 
if (dri2_dpy->image) {
@@ -1470,6 +1474,28 @@ dri2_surf_update_fence_fd(_EGLContext *ctx,
dri2_surface_set_out_fence_fd(surf, fence_fd);
 }
 
+static void
+update_blob_cache_functions( _EGLDisplay *disp, _EGLContext *ctx)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
+
+   if (!ctx)
+  return;
+
+   /* No blob support. */
+   if (!dri2_dpy->blob)
+  return;
+
+   /* No functions to set. */
+   if (!disp->BlobCacheSet)
+  return;
+
+   dri2_dpy->blob->set_cache_funcs(dri2_ctx->dri_context,
+   disp->BlobCacheSet,
+   disp->BlobCacheGet);
+}
+
 /**
  * Called via eglMakeCurrent(), drv->API.MakeCurrent().
  */
@@ -1499,6 +1525,9 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *dsurf,
if (old_ctx)
   dri2_gl_flush();
 
+   /* Make sure cache functions are set for new context. */
+   update_blob_cache_functions(disp, ctx);
+
ddraw = (dsurf) ? dri2_dpy->vtbl->get_dri_drawable(dsurf) : NULL;
rdraw = (rsurf) ? dri2_dpy->vtbl->get_dri_drawable(rsurf) : NULL;
cctx = (dri2_ctx) ? dri2_ctx->dri_context : NULL;
@@ -3016,6 +3045,15 @@ dri2_dup_native_fence_fd(_EGLDriver *drv, _EGLDisplay 
*dpy, _EGLSync *sync)
return dup(sync->SyncFd);
 }
 
+static void
+dri2_set_blob_cache_funcs(_EGLDriver *drv, _EGLDisplay *dpy,
+  EGLSetBlobFuncANDROID set,
+  EGLGetBlobFuncANDROID get)
+{
+   _EGLContext *ctx = _eglGetCurrentContext();
+   update_blob_cache_functions(dpy, ctx);
+}
+
 static EGLint
 dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
   EGLint flags, EGLTime timeout)
@@ -3234,6 +3272,7 @@ _eglBuiltInDriver(void)
dri2_drv->API.GLInteropQueryDeviceInfo = dri2_interop_query_device_info;
dri2_drv->API.GLInteropExportObject = dri2_interop_export_object;
dri2_drv->API.DupNativeFenceFDANDROID = dri2_dup_native_fence_fd;
+   dri2_drv->API.SetBlobCacheFuncsANDROID = dri2_set_blob_cache_funcs;
 
dri2_drv->Name = "DRI2";
 
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index cc76c73eab..c49156fbb6 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -171,6 +171,7 @@ struct dri2_egl_display
const __DRInoErrorExtension*no_error;
const __DRI2configQueryExtension *config;
const __DRI2fenceExtension *fence;
+   const __DRI2blobExtension *blob;
const __DRI2rendererQueryExtension *rendererQuery;
const __DRI2interopExtension *interop;
int   fd;
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 5110688f2d..f2ba260060 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -476,6 +476,7 @@ _eglCreateExtensionsString(_EGLDisplay *dpy)
char *exts = dpy->ExtensionsString;
 
/* Please keep these sorted alphabetically. */
+   _EGL_CHECK_EXTENSION(ANDROID_blob_cache);
_EGL_CHECK_EXTENSION(ANDROID_framebuffer_target);
_EGL_CHECK_EXTENSION(ANDROID_image_native_buffer);
_EGL_CHECK_EXTENSION(ANDROID_native_fence_sync);
@@ -2522,6 +2523,35 @@ eglQueryDmaBufModifiersEXT(EGLDisplay dpy, EGLint 
format, EGLint max_modifiers,
RETURN_EGL_EVAL(disp, ret);
 }
 
+static void EGLAPIENTRY
+eglSetBlobCacheFuncsANDROID(EGLDisplay *dpy, EGLSetBlobFuncANDROID set,
+EGLGetBlobFuncANDROID get)
+{
+   _EGLDisplay *disp = _eglLockDisplay(dpy);
+   _EGLDriver *drv = _eglCheckDisplay(disp, __func__);
+
+   if (!set || !get) {
+  _eglError(EGL_BAD_PARAMETER,
+"eglSetBlobCacheFuncsANDROID: NULL handler given");
+  _eglU

[Mesa-dev] [PATCH 0/5] EGL_ANDROID_blob_cache

2018-01-22 Thread Tapani Pälli
Another go at EGL_ANDROID_blob_cache, some patches are re-sent but I 
thought it would be easier to read when sending all patches at once.

Most important change here is that I've done refactoring to disk cache 
so that it can live without path/index. Path will get generated only 
when cache is actually used. This makes it possible to implement 
disk_cache_has_key and disk_cache_put_key for EGL_ANDROID_blob_cache.

This functionality can be tested on desktop with any EGL app using this
branch:

https://cgit.freedesktop.org/~tpalli/mesa/log/?h=ANDROID_blob_cache_debug_v2

I've tested regular disk cache and blob cache on desktop with these 
changes, and blob cache functionality on Android.

Thanks;

Tapani Pälli (5):
  dri: add interface for EGL_ANDROID_blob_cache extension
  egl: add support for EGL_ANDROID_blob_cache
  disk cache: initialize cache path and index only when used
  disk cache: add callback functionality
  i965: add __DRI2_BLOB support and set cache functions

 include/GL/internal/dri_interface.h  |  26 -
 src/egl/drivers/dri2/egl_dri2.c  |  39 
 src/egl/drivers/dri2/egl_dri2.h  |   1 +
 src/egl/main/eglapi.c|  30 ++
 src/egl/main/eglapi.h|   4 +
 src/egl/main/egldisplay.h|   4 +
 src/egl/main/eglentrypoint.h |   1 +
 src/mesa/drivers/dri/i965/intel_screen.c |  21 
 src/util/disk_cache.c| 163 +++
 src/util/disk_cache.h|  19 
 10 files changed, 266 insertions(+), 42 deletions(-)

-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] disk cache: initialize cache path and index only when used

2018-01-22 Thread Tapani Pälli
This patch makes disk_cache initialize path and index lazily so
that we can utilize disk_cache without a path using callback
functionality introduced by next patch.

Signed-off-by: Tapani Pälli 
---
 src/util/disk_cache.c | 114 --
 1 file changed, 73 insertions(+), 41 deletions(-)

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 2884d3c9c1..2fb07699b5 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -77,6 +77,7 @@
 struct disk_cache {
/* The path to the cache directory. */
char *path;
+   bool path_init_failed;
 
/* Thread queue for compressing and writing cache entries to disk */
struct util_queue cache_queue;
@@ -178,27 +179,18 @@ concatenate_and_mkdir(void *ctx, const char *path, const 
char *name)
   return NULL;
 }
 
-#define DRV_KEY_CPY(_dst, _src, _src_size) \
-do {   \
-   memcpy(_dst, _src, _src_size);  \
-   _dst += _src_size;  \
-} while (0);
-
-struct disk_cache *
-disk_cache_create(const char *gpu_name, const char *timestamp,
-  uint64_t driver_flags)
+static bool
+disk_cache_path_init(struct disk_cache *cache)
 {
-   void *local;
-   struct disk_cache *cache = NULL;
-   char *path, *max_size_str;
-   uint64_t max_size;
+   void *local = NULL;
+   char *path;
int fd = -1;
struct stat sb;
size_t size;
 
/* If running as a users other than the real user disable cache */
if (geteuid() != getuid())
-  return NULL;
+  goto fail;
 
/* A ralloc context for transient data during this invocation. */
local = ralloc_context(NULL);
@@ -273,10 +265,6 @@ disk_cache_create(const char *gpu_name, const char 
*timestamp,
  goto fail;
}
 
-   cache = ralloc(NULL, struct disk_cache);
-   if (cache == NULL)
-  goto fail;
-
cache->path = ralloc_strdup(cache, path);
if (cache->path == NULL)
   goto fail;
@@ -325,6 +313,58 @@ disk_cache_create(const char *gpu_name, const char 
*timestamp,
cache->size = (uint64_t *) cache->index_mmap;
cache->stored_keys = cache->index_mmap + sizeof(uint64_t);
 
+   /* 1 thread was chosen because we don't really care about getting things
+* to disk quickly just that it's not blocking other tasks.
+*
+* The queue will resize automatically when it's full, so adding new jobs
+* doesn't stall.
+*/
+   util_queue_init(&cache->cache_queue, "disk_cache", 32, 1,
+   UTIL_QUEUE_INIT_RESIZE_IF_FULL |
+   UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY);
+
+   ralloc_free(local);
+
+   return true;
+
+ fail:
+   if (fd != -1)
+  close(fd);
+
+   if (local)
+  ralloc_free(local);
+
+   cache->path_init_failed = true;
+
+   return false;
+}
+
+#define DRV_KEY_CPY(_dst, _src, _src_size) \
+do {   \
+   memcpy(_dst, _src, _src_size);  \
+   _dst += _src_size;  \
+} while (0);
+
+struct disk_cache *
+disk_cache_create(const char *gpu_name, const char *timestamp,
+  uint64_t driver_flags)
+{
+   struct disk_cache *cache = NULL;
+   char *max_size_str;
+   uint64_t max_size;
+
+   /* If running as a users other than the real user disable cache */
+   if (geteuid() != getuid())
+  return NULL;
+
+   /* At user request, disable shader cache entirely. */
+   if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", false))
+  return NULL;
+
+   cache = rzalloc(NULL, struct disk_cache);
+   if (cache == NULL)
+  return NULL;
+
max_size = 0;
 
max_size_str = getenv("MESA_GLSL_CACHE_MAX_SIZE");
@@ -360,16 +400,6 @@ disk_cache_create(const char *gpu_name, const char 
*timestamp,
 
cache->max_size = max_size;
 
-   /* 1 thread was chosen because we don't really care about getting things
-* to disk quickly just that it's not blocking other tasks.
-*
-* The queue will resize automatically when it's full, so adding new jobs
-* doesn't stall.
-*/
-   util_queue_init(&cache->cache_queue, "disk_cache", 32, 1,
-   UTIL_QUEUE_INIT_RESIZE_IF_FULL |
-   UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY);
-
uint8_t cache_version = CACHE_VERSION;
size_t cv_size = sizeof(cache_version);
cache->driver_keys_blob_size = cv_size;
@@ -392,8 +422,10 @@ disk_cache_create(const char *gpu_name, const char 
*timestamp,
 
cache->driver_keys_blob =
   ralloc_size(cache, cache->driver_keys_blob_size);
-   if (!cache->driver_keys_blob)
-  goto fail;
+   if (!cache->driver_keys_blob) {
+  ralloc_free(cache);
+  return NULL;
+   }
 
uint8_t *drv_key_blob = cache->driver_keys_blob;
DRV_KEY_CPY(drv_key_blob, &cache_version, cv_size)
@@ -405,18 +437,7 @@ disk_cache_create(const char *gpu_name, const char 
*timestamp,
/* Seed our rand function */
s_rand_xorshift128plus(cache->seed_xorshift128plus, true);
 
-   ralloc_free(local);
-
return cache;
-
- fail:
-   if 

[Mesa-dev] [PATCH 5/5] i965: add __DRI2_BLOB support and set cache functions

2018-01-22 Thread Tapani Pälli
Signed-off-by: Tapani Pälli 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index a4e34e9f2c..84009382c6 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -36,6 +36,7 @@
 #include "main/version.h"
 #include "swrast/s_renderbuffer.h"
 #include "util/ralloc.h"
+#include "util/disk_cache.h"
 #include "brw_defines.h"
 #include "brw_state.h"
 #include "compiler/nir/nir.h"
@@ -1493,6 +1494,19 @@ brw_query_renderer_string(__DRIscreen *dri_screen,
return -1;
 }
 
+static void
+brw_set_cache_funcs(__DRIcontext *dri_ctx,
+__DRIblobCacheSet set, __DRIblobCacheGet get)
+{
+   struct brw_context *brw = dri_ctx->driverPrivate;
+   struct gl_context *ctx = &brw->ctx;
+
+   if (!ctx->Cache)
+  return;
+
+   disk_cache_set_callbacks(ctx->Cache, set, get);
+}
+
 static const __DRI2rendererQueryExtension intelRendererQueryExtension = {
.base = { __DRI2_RENDERER_QUERY, 1 },
 
@@ -1504,6 +1518,11 @@ static const __DRIrobustnessExtension dri2Robustness = {
.base = { __DRI2_ROBUSTNESS, 1 }
 };
 
+static const __DRI2blobExtension intelBlobExtension = {
+   .base = { __DRI2_BLOB, 1 },
+   .set_cache_funcs = brw_set_cache_funcs
+};
+
 static const __DRIextension *screenExtensions[] = {
 &intelTexBufferExtension.base,
 &intelFenceExtension.base,
@@ -1512,6 +1531,7 @@ static const __DRIextension *screenExtensions[] = {
 &intelRendererQueryExtension.base,
 &dri2ConfigQueryExtension.base,
 &dri2NoErrorExtension.base,
+&intelBlobExtension.base,
 NULL
 };
 
@@ -1524,6 +1544,7 @@ static const __DRIextension 
*intelRobustScreenExtensions[] = {
 &dri2ConfigQueryExtension.base,
 &dri2Robustness.base,
 &dri2NoErrorExtension.base,
+&intelBlobExtension.base,
 NULL
 };
 
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/5] disk cache: add callback functionality

2018-01-22 Thread Tapani Pälli
v2: add disk_cache_has_key, disk_cache_put_key support
using blob cache (Nicolai, Jordan)

Signed-off-by: Tapani Pälli 
---
 src/util/disk_cache.c | 49 +
 src/util/disk_cache.h | 19 +++
 2 files changed, 68 insertions(+)

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 2fb07699b5..ba98f3bba5 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -101,6 +101,9 @@ struct disk_cache {
/* Driver cache keys. */
uint8_t *driver_keys_blob;
size_t driver_keys_blob_size;
+
+   disk_cache_set_cb blob_set_cb;
+   disk_cache_get_cb blob_get_cb;
 };
 
 struct disk_cache_put_job {
@@ -1020,6 +1023,11 @@ disk_cache_put(struct disk_cache *cache, const cache_key 
key,
const void *data, size_t size,
struct cache_item_metadata *cache_item_metadata)
 {
+   if (cache->blob_set_cb) {
+  cache->blob_set_cb(key, CACHE_KEY_SIZE, data, size);
+  return;
+   }
+
struct disk_cache_put_job *dc_job =
   create_put_job(cache, key, data, size, cache_item_metadata);
 
@@ -1082,6 +1090,29 @@ disk_cache_get(struct disk_cache *cache, const cache_key 
key, size_t *size)
if (size)
   *size = 0;
 
+   if (cache->blob_get_cb) {
+/* This is what Android EGL defines as the maxValueSize in egl_cache_t
+ * class implementation.
+ */
+#define MAX_BLOB_SIZE 64 * 1024
+  void *blob = malloc(MAX_BLOB_SIZE);
+  if (!blob)
+ return NULL;
+
+  signed long bytes =
+ cache->blob_get_cb(key, CACHE_KEY_SIZE, blob, MAX_BLOB_SIZE);
+
+  if (!bytes) {
+ free(blob);
+ return NULL;
+  }
+
+  if (size)
+ *size = bytes;
+  return blob;
+#undef MAX_BLOB_SIZE
+   }
+
filename = get_cache_file(cache, key);
if (filename == NULL)
   goto fail;
@@ -1197,6 +1228,11 @@ disk_cache_put_key(struct disk_cache *cache, const 
cache_key key)
int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK;
unsigned char *entry;
 
+   if (cache->blob_set_cb) {
+  cache->blob_set_cb(key, CACHE_KEY_SIZE, key_chunk, sizeof(uint32_t));
+  return;
+   }
+
if (!cache->path)
   return;
 
@@ -1219,6 +1255,11 @@ disk_cache_has_key(struct disk_cache *cache, const 
cache_key key)
int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK;
unsigned char *entry;
 
+   if (cache->blob_get_cb) {
+  uint32_t blob;
+  return cache->blob_get_cb(key, CACHE_KEY_SIZE, &blob, sizeof(uint32_t));
+   }
+
/* Try initializing path only once. */
if (cache->path_init_failed ||
(!cache->path && !disk_cache_path_init(cache)))
@@ -1242,4 +1283,12 @@ disk_cache_compute_key(struct disk_cache *cache, const 
void *data, size_t size,
_mesa_sha1_final(&ctx, key);
 }
 
+void
+disk_cache_set_callbacks(struct disk_cache *cache, disk_cache_set_cb set,
+ disk_cache_get_cb get)
+{
+   cache->blob_set_cb = set;
+   cache->blob_get_cb = get;
+}
+
 #endif /* ENABLE_SHADER_CACHE */
diff --git a/src/util/disk_cache.h b/src/util/disk_cache.h
index 488b297ead..3fae8a1358 100644
--- a/src/util/disk_cache.h
+++ b/src/util/disk_cache.h
@@ -50,6 +50,14 @@ typedef uint8_t cache_key[CACHE_KEY_SIZE];
 #define CACHE_ITEM_TYPE_UNKNOWN  0x0
 #define CACHE_ITEM_TYPE_GLSL 0x1
 
+typedef void
+(*disk_cache_set_cb) (const void *key, signed long keySize,
+  const void *value, signed long valueSize);
+
+typedef signed long
+(*disk_cache_get_cb) (const void *key, signed long keySize,
+  void *value, signed long valueSize);
+
 struct cache_item_metadata {
/**
 * The cache item type. This could be used to identify a GLSL cache item,
@@ -207,6 +215,10 @@ void
 disk_cache_compute_key(struct disk_cache *cache, const void *data, size_t size,
cache_key key);
 
+void
+disk_cache_set_callbacks(struct disk_cache *cache, disk_cache_set_cb set,
+ disk_cache_get_cb get);
+
 #else
 
 static inline struct disk_cache *
@@ -260,6 +272,13 @@ disk_cache_compute_key(struct disk_cache *cache, const 
void *data, size_t size,
return;
 }
 
+static inline void
+disk_cache_set_callbacks(struct disk_cache *cache, disk_cache_set_cb set,
+ disk_cache_get_cb get)
+{
+   return;
+}
+
 #endif /* ENABLE_SHADER_CACHE */
 
 #ifdef __cplusplus
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] amd/common: only load used channels when sampling buffer views

2018-01-22 Thread Nicolai Hähnle

For the series:

Reviewed-by: Nicolai Hähnle 

On 10.01.2018 20:12, Samuel Pitoiset wrote:

This allows to reduce the number of dwords that are loaded
with buffer_load_format_xyzw. For example, when the only used
channel is 1, the driver will emit buffer_load_format_x instead.

Shader stats for DOW3 (with some local hacky scripts for SPIRV):

143 shaders in 143 tests
Totals:
SGPRS: 5344 -> 5352 (0.15 %)
VGPRS: 3476 -> 3452 (-0.69 %)
Spilled SGPRs: 30 -> 29 (-3.33 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 269860 -> 269808 (-0.02 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 1267 -> 1272 (0.39 %)
Wait states: 0 -> 0 (0.00 %)

The 'const' qualifier has to be removed to avoid a compilation
warning with nir_ssa_def_components_read().

Signed-off-by: Samuel Pitoiset 
---
  src/amd/common/ac_nir_to_llvm.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 8f5df12e3d..bafbdbb250 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2242,16 +2242,19 @@ static LLVMValueRef radv_lower_gather4_integer(struct 
ac_llvm_context *ctx,
  }
  
  static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,

-   const nir_tex_instr *instr,
+   nir_tex_instr *instr,
bool lod_is_zero,
struct ac_image_args *args)
  {
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+   unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+
return ac_build_buffer_load_format(&ctx->ac,
   args->resource,
   args->addr,
   ctx->ac.i32_0,
-  4, true);
+  util_last_bit(mask),
+  true);
}
  
  	args->opcode = ac_image_sample;





--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/10] gallium/u_threaded: don't run out of memory with staging uploads

2018-01-22 Thread Nicolai Hähnle

On 10.01.2018 20:49, Marek Olšák wrote:

From: Marek Olšák 

Cc: 17.2 17.3 
---
  src/gallium/auxiliary/util/u_threaded_context.c | 13 +
  src/gallium/auxiliary/util/u_threaded_context.h |  8 
  2 files changed, 21 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_threaded_context.c 
b/src/gallium/auxiliary/util/u_threaded_context.c
index ffa8247..7bd13cf 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -1508,35 +1508,47 @@ struct tc_resource_copy_region {
  };
  
  static void

  tc_resource_copy_region(struct pipe_context *_pipe,
  struct pipe_resource *dst, unsigned dst_level,
  unsigned dstx, unsigned dsty, unsigned dstz,
  struct pipe_resource *src, unsigned src_level,
  const struct pipe_box *src_box);
  
  static void

+tc_notify_staging_upload_done(struct threaded_context *tc, unsigned size)
+{
+   tc->unflushed_transfer_size += size;
+
+   if (tc->unflushed_transfer_size > TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE) {
+  tc->base.flush(&tc->base, NULL, PIPE_FLUSH_ASYNC);
+  tc->unflushed_transfer_size = 0;
+   }
+}
+
+static void
  tc_buffer_do_flush_region(struct threaded_context *tc,
struct threaded_transfer *ttrans,
const struct pipe_box *box)
  {
 struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
  
 if (ttrans->staging) {

struct pipe_box src_box;
  
u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,

 box->width, &src_box);
  
/* Copy the staging buffer into the original one. */

tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
ttrans->staging, 0, &src_box);
+  tc_notify_staging_upload_done(tc, box->width);
 }
  
 util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);

  }
  
  static void

  tc_transfer_flush_region(struct pipe_context *_pipe,
   struct pipe_transfer *transfer,
   const struct pipe_box *rel_box)
  {
@@ -1653,20 +1665,21 @@ tc_buffer_subdata(struct pipe_context *_pipe,
  
 /* The upload is small. Enqueue it. */

 struct tc_buffer_subdata *p =
tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, 
size);
  
 tc_set_resource_reference(&p->resource, resource);

 p->usage = usage;
 p->offset = offset;
 p->size = size;
 memcpy(p->slot, data, size);
+   tc_notify_staging_upload_done(tc, size);
  }
  
  struct tc_texture_subdata {

 struct pipe_resource *resource;
 unsigned level, usage, stride, layer_stride;
 struct pipe_box box;
 char slot[0]; /* more will be allocated if needed */
  };
  
  static void

diff --git a/src/gallium/auxiliary/util/u_threaded_context.h 
b/src/gallium/auxiliary/util/u_threaded_context.h
index 53c5a7e..295464a 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -225,20 +225,27 @@ struct tc_unflushed_batch_token;
  /* Threshold for when to use the queue or sync. */
  #define TC_MAX_STRING_MARKER_BYTES  512
  
  /* Threshold for when to enqueue buffer/texture_subdata as-is.

   * If the upload size is greater than this, it will do instead:
   * - for buffers: DISCARD_RANGE is done by the threaded context
   * - for textures: sync and call the driver directly
   */
  #define TC_MAX_SUBDATA_BYTES320
  
+/* Every staging upload allocates memory. If we have too many uploads

+ * in a row without flushes, we might run out of memory. This limit controls
+ * how many bytes of queued uploads we can have at a time. If we go over,
+ * the threaded context triggers a context flush.
+ */
+#define TC_MAX_UNFLUSHED_STAGING_UPLOAD_SIZE (512 * 1024 * 1024)


This seems very aggressive. In reality, this should probably scale with 
free space in GART, but we don't know that here. Can you reduce it to 
something like 64MB? Unless there's concrete evidence that having it 
higher is beneficial, of course.


With that, patches 9 & 10:

Reviewed-by: Nicolai Hähnle 



+
  typedef void (*tc_replace_buffer_storage_func)(struct pipe_context *ctx,
 struct pipe_resource *dst,
 struct pipe_resource *src);
  typedef struct pipe_fence_handle *(*tc_create_fence_func)(struct pipe_context 
*ctx,
struct 
tc_unflushed_batch_token *token);
  
  struct threaded_resource {

 struct pipe_resource b;
 const struct u_resource_vtbl *vtbl;
  
@@ -346,20 +353,21 @@ struct tc_batch {

 struct tc_call call[TC_CALLS_PER_BATCH];
  };
  
  struct threaded_context {

 struct pipe_context base;
 struct pipe_context *pipe;
 s

Re: [Mesa-dev] [PATCH] radeonsi: don't ignore pitch for imported textures

2018-01-22 Thread Nicolai Hähnle

On 10.01.2018 20:49, Marek Olšák wrote:

From: Marek Olšák 


Please add a bug report or reference to what fails without it.

It does make sense though, so

Reviewed-by: Nicolai Hähnle 




Cc: 17.2 17.3 
---
  src/gallium/drivers/radeon/r600_texture.c | 14 --
  1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 34b3ab0..36eff40 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -291,25 +291,35 @@ static int r600_init_surface(struct si_screen *sscreen,
flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
  
  	r = sscreen->ws->surface_init(sscreen->ws, ptex, flags, bpe,

  array_mode, surface);
if (r) {
return r;
}
  
+	unsigned pitch = pitch_in_bytes_override / bpe;

+
if (sscreen->info.chip_class >= GFX9) {
-   assert(!pitch_in_bytes_override ||
-  pitch_in_bytes_override == surface->u.gfx9.surf_pitch * 
bpe);
+   if (pitch) {
+   surface->u.gfx9.surf_pitch = pitch;
+   surface->u.gfx9.surf_slice_size =
+   (uint64_t)pitch * surface->u.gfx9.surf_height * 
bpe;
+   }
surface->u.gfx9.surf_offset = offset;
} else {
+   if (pitch) {
+   surface->u.legacy.level[0].nblk_x = pitch;
+   surface->u.legacy.level[0].slice_size_dw =
+   ((uint64_t)pitch * 
surface->u.legacy.level[0].nblk_y * bpe) / 4;
+   }
if (offset) {
for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); 
++i)
surface->u.legacy.level[i].offset += offset;
}
}
return 0;
  }
  
  static void r600_texture_init_metadata(struct si_screen *sscreen,

   struct r600_texture *rtex,




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: add image and sampler (un)packing support to glsl to nir

2018-01-22 Thread Nicolai Hähnle

Both patches:

Reviewed-by: Nicolai Hähnle 


On 19.01.2018 05:12, Timothy Arceri wrote:

This is needed for ARB_bindless_texture support.
---
  src/compiler/glsl/glsl_to_nir.cpp | 4 
  1 file changed, 4 insertions(+)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 4e3e9c4610..1a579f41cd 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1645,11 +1645,15 @@ nir_visitor::visit(ir_expression *ir)
 case ir_unop_unpack_half_2x16:
result = nir_unpack_half_2x16(&b, srcs[0]);
break;
+   case ir_unop_pack_sampler_2x32:
+   case ir_unop_pack_image_2x32:
 case ir_unop_pack_double_2x32:
 case ir_unop_pack_int_2x32:
 case ir_unop_pack_uint_2x32:
result = nir_pack_64_2x32(&b, srcs[0]);
break;
+   case ir_unop_unpack_sampler_2x32:
+   case ir_unop_unpack_image_2x32:
 case ir_unop_unpack_double_2x32:
 case ir_unop_unpack_int_2x32:
 case ir_unop_unpack_uint_2x32:




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac: image size builtin for GLSL_SAMPLER_DIM_3D

2018-01-22 Thread Nicolai Hähnle

On 18.01.2018 08:17, Timothy Arceri wrote:

This is what radeonsi does. Fixes remaing piglit subtest in:

./bin/arb_shader_image_size-builtin --quick -auto -fbo


Reviewed-by: Nicolai Hähnle 



---
  src/amd/common/ac_nir_to_llvm.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index d5b8bea44f..ac6bbe02f1 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3788,7 +3788,8 @@ static LLVMValueRef visit_image_size(struct 
ac_nir_context *ctx,
const nir_variable *var = instr->variables[0]->var;
const struct glsl_type *type = instr->variables[0]->var->type;
bool da = glsl_sampler_type_is_array(var->type) ||
- glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
+ glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE ||
+ glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_3D;
if(instr->variables[0]->deref.child)
type = instr->variables[0]->deref.child->type;
  




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi/nir: add nir_intrinsic_load_sample_mask_in to ir scan

2018-01-22 Thread Nicolai Hähnle

On 19.01.2018 12:03, Timothy Arceri wrote:

Fixes a bunch of ARB_sample_shading piglit tests.


Reviewed-by: Nicolai Hähnle 



---
  src/gallium/drivers/radeonsi/si_shader_nir.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index a9e852c0bc..4455322392 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -98,6 +98,9 @@ static void scan_instruction(struct tgsi_shader_info *info,
case nir_intrinsic_load_primitive_id:
info->uses_primid = 1;
break;
+   case nir_intrinsic_load_sample_mask_in:
+   info->reads_samplemask = true;
+   break;
case nir_intrinsic_load_tess_level_inner:
case nir_intrinsic_load_tess_level_outer:
info->reads_tess_factors = true;




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi/nir: add primitive id to inputs scan

2018-01-22 Thread Nicolai Hähnle

On 19.01.2018 12:46, Timothy Arceri wrote:

Fixes the following piglit tests:

arb_tessellation_shader/fs-primitiveid-instanced
glsl-1.50/primitive-id-no-gs
glsl-1.50/primitive-id-no-gs-first-vertex
glsl-1.50/primitive-id-no-gs-instanced
glsl-1.50/primitive-id-no-gs-strip
glsl-1.50/primitive-id-no-gs-strip-first-vertex


Reviewed-by: Nicolai Hähnle 



---
  src/gallium/drivers/radeonsi/si_shader_nir.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 4455322392..2f19bae813 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -315,6 +315,9 @@ void si_nir_scan_shader(const struct nir_shader *nir,
info->input_semantic_name[i] = semantic_name;
info->input_semantic_index[i] = semantic_index;
  
+			if (semantic_name == TGSI_SEMANTIC_PRIMID)

+   info->uses_primid = true;
+
if (variable->data.sample)
info->input_interpolate_loc[i] = 
TGSI_INTERPOLATE_LOC_SAMPLE;
else if (variable->data.centroid)




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] EGL_ANDROID_blob_cache

2018-01-22 Thread Nicolai Hähnle

On 19.01.2018 07:05, Tapani Pälli wrote:



On 01/18/2018 05:49 PM, Nicolai Hähnle wrote:

On 17.01.2018 17:10, Tapani Pälli wrote:



On 17.01.2018 13:34, Nicolai Hähnle wrote:

On 15.01.2018 13:31, Tapani Pälli wrote:

Hello;

Here's a refactored series of EGL_ANDROID_blob_cache. Now cache
functions are stored in disk_cache struct and the functionality
called within existing disk_cache put/get code. Problems/errors
that existed with earlier series are gone.

On Android cache index file is created to MESA_GLSL_CACHE_DIR
and blobs are  generated under '/data/user_de/0' in application
specific paths:


Can't we let the "cache index" be managed by the ANDROID_blob_cache 
as well? That seems to me more in the spirit of what that extension 
is about, and would avoid polluting stuff like /sdcard.


The cache index file is used for two purposes:

- Keeping track of the size of the cache. This use is obsoleted 
enterely by ANDROID_blob_cache.


- Keeping track of keys where only their presence is relevant and no 
data is associated. These could easily be treated as empty (0 byte 
sized) blobs.


I guess it could be possible, for every app we would create index 
when disk_cache gets created. I guess only issue could be that 
Android might go and remove the index from cache when it decides to 
resize it and then we would loose it even though there would be items 
in cache. I can try how this would work.


Entries can be kicked out of the on-disk index (which is really a bit 
of a misnomer) as well if there's a hash collision.


I believe this functionality is only used as a marker to indicate that 
the GLSL compile step can be skipped. So losing an entry in there 
shouldn't cause any damage.


What I meant is that if we would store index by using set() function, 
the index itself would be just another entry for Android's cache, it can 
decide to remove it when it manages cache size.


Not sure if we're talking about the same thing, then? What I meant was 
that every call to disk_cache_put_key becomes a call to the 
Android-provided set() function, and every call to disk_cache_has_key 
becomes a call to the get() function.


Although storing the index itself in the blob cache is also an 
interesting idea.


Cheers,
Nicolai





Cheers,
Nicolai






Cheers,
Nicolai





androidia_64:/ # find /data/user_de/0/ -name *shader*
/data/user_de/0/com.android.settings/code_cache/com.android.opengl.shaders_cache 

/data/user_de/0/com.android.gallery3d/code_cache/com.android.opengl.shaders_cache 

/data/user_de/0/com.android.systemui/code_cache/com.android.opengl.shaders_cache 

/data/user_de/0/com.rovio.angrybirdsspace.ads/code_cache/com.android.opengl.shaders_cache 



(this part is managed by Android but may be interesting to know).

Also SurfaceFlinger manages its own cache as seen in the log output:
01-15 07:40:26.329  2129  2129 D SurfaceFlinger: shader cache 
generated - 24 shaders in 57.687504 ms


I'm not sure if /sdcard is sane default but I've tried everything
else (/cache, /data/cache) and failed because of permission errors.

Thanks;

Tapani Pälli (7):
   dri: add interface for EGL_ANDROID_blob_cache extension
   egl: add support for EGL_ANDROID_blob_cache
   disk cache: add callback functionality
   disk cache: support setting MESA_GLSL_CACHE_DIR at compile time
   i965: add __DRI2_BLOB support and set cache functions
   android: ignore MESA_GLSL_CACHE_DISABLE setting
   android: set '/sdcard/' as MESA_GLSL_CACHE_DIR by default

  Android.common.mk  |  1 +
  include/GL/internal/dri_interface.h    | 26 +-
  src/egl/drivers/dri2/egl_dri2.c    | 43 


  src/egl/drivers/dri2/egl_dri2.h    |  4 +++
  src/egl/main/eglapi.c  | 29 
  src/egl/main/eglapi.h  |  4 +++
  src/egl/main/egldisplay.h  |  3 ++
  src/egl/main/eglentrypoint.h   |  1 +
  src/mesa/drivers/dri/i965/brw_disk_cache.c |  2 ++
  src/mesa/drivers/dri/i965/intel_screen.c   | 21 
  src/util/disk_cache.c  | 54 
+-

  src/util/disk_cache.h  | 19 +++
  12 files changed, 205 insertions(+), 2 deletions(-)










--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] ac: fix emit vertex stream parameter

2018-01-22 Thread Nicolai Hähnle

On 20.01.2018 02:38, Timothy Arceri wrote:

Fixes the following piglit test on radeonsi:

./bin/arb_enhanced_layouts-gs-stream-location-aliasing


Reviewed-by: Nicolai Hähnle 



---
  src/amd/common/ac_nir_to_llvm.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3741b74dc5..781e4a905b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4160,6 +4160,8 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned 
stream, LLVMValueRef *addr
int idx;
struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
  
+	assert(stream == 0);

+
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(ctx->builder,
   ctx->gs_next_vertex,
@@ -4429,8 +4431,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
result = visit_interp(ctx, instr);
break;
case nir_intrinsic_emit_vertex:
-   assert(instr->const_index[0] == 0);
-   ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
+   ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), 
ctx->outputs);
break;
case nir_intrinsic_end_primitive:
ctx->abi->emit_primitive(ctx->abi, 
nir_intrinsic_stream_id(instr));




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac: fix image load store for GLSL_SAMPLER_DIM_3D

2018-01-22 Thread Nicolai Hähnle

On 20.01.2018 04:11, Timothy Arceri wrote:

Fixes the following piglit tests:

arb_shader_image_load_store/layer/image3d/layered binding test
arb_shader_image_load_store/max-size/image3d max size test/2048x8x8x1
arb_shader_image_load_store/max-size/image3d max size test/8x2048x8x1
arb_shader_image_load_store/max-size/image3d max size test/8x8x2048x1
arb_shader_image_load_store/semantics/imageload/vertex shader/rgba32f/image3d 
test


Reviewed-by: Nicolai Hähnle 



---
  src/amd/common/ac_nir_to_llvm.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 781e4a905b..ac3a949bd3 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3634,6 +3634,7 @@ static LLVMValueRef visit_image_load(struct 
ac_nir_context *ctx,
} else {
bool is_da = glsl_sampler_type_is_array(type) ||
 glsl_get_sampler_dim(type) == 
GLSL_SAMPLER_DIM_CUBE ||
+glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D 
||
 glsl_get_sampler_dim(type) == 
GLSL_SAMPLER_DIM_SUBPASS ||
 glsl_get_sampler_dim(type) == 
GLSL_SAMPLER_DIM_SUBPASS_MS;
LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
@@ -3692,7 +3693,8 @@ static void visit_image_store(struct ac_nir_context *ctx,
   params, 6, 0);
} else {
bool is_da = glsl_sampler_type_is_array(type) ||
-glsl_get_sampler_dim(type) == 
GLSL_SAMPLER_DIM_CUBE;
+glsl_get_sampler_dim(type) == 
GLSL_SAMPLER_DIM_CUBE ||
+glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D;
LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
LLVMValueRef slc = ctx->ac.i1false;
  




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac: fix ac_build_varying_gather_values() for packed layouts

2018-01-22 Thread Nicolai Hähnle

On 19.01.2018 06:50, Timothy Arceri wrote:

This fixes a segfault for varyings not starting at component 0.


Reviewed-by: Nicolai Hähnle 



---
  src/amd/common/ac_llvm_build.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 3467bba693..6615a269f8 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -413,7 +413,7 @@ ac_build_varying_gather_values(struct ac_llvm_context *ctx, 
LLVMValueRef *values
for (unsigned i = component; i < value_count + component; i++) {
LLVMValueRef value = values[i];
  
-		if (!i)

+   if (i == component)
vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), 
value_count));
LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, 
false);
vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, 
"");




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] ac: remove arrays when when querying sampler info

2018-01-22 Thread Nicolai Hähnle

On 19.01.2018 06:07, Timothy Arceri wrote:

Fixes the following ARB_arrays_of_arrays piglit tests:

basic-imagestore-const-uniform-index
basic-imagestore-mixed-const-non-const-uniform-index
basic-imagestore-mixed-const-non-const-uniform-index2
basic-imagestore-non-const-uniform-index


Reviewed-by: Nicolai Hähnle 



---
  src/amd/common/ac_nir_to_llvm.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 4f240db4ac..e88f64fb3b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3516,9 +3516,7 @@ static LLVMValueRef 
adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
  static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
 const nir_intrinsic_instr *instr)
  {
-   const struct glsl_type *type = instr->variables[0]->var->type;
-   if(instr->variables[0]->deref.child)
-   type = instr->variables[0]->deref.child->type;
+   const struct glsl_type *type = 
glsl_without_array(instr->variables[0]->var->type);
  
  	LLVMValueRef src0 = get_src(ctx, instr->src[0]);

LLVMValueRef coords[4];




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac: fix image load store for GLSL_SAMPLER_DIM_3D

2018-01-22 Thread Kai Wasserbäch
Hey,
Nicolai Hähnle wrote on 22.01.2018 14:18:
> On 20.01.2018 04:11, Timothy Arceri wrote:
>> Fixes the following piglit tests:
>>
>> arb_shader_image_load_store/layer/image3d/layered binding test
>> arb_shader_image_load_store/max-size/image3d max size test/2048x8x8x1
>> arb_shader_image_load_store/max-size/image3d max size test/8x2048x8x1
>> arb_shader_image_load_store/max-size/image3d max size test/8x8x2048x1
>> arb_shader_image_load_store/semantics/imageload/vertex shader/rgba32f/image3d
>> test
> 
> Reviewed-by: Nicolai Hähnle 
> 
> 
>> ---
>>   src/amd/common/ac_nir_to_llvm.c | 4 +++-
>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c 
>> b/src/amd/common/ac_nir_to_llvm.c
>> index 781e4a905b..ac3a949bd3 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -3634,6 +3634,7 @@ static LLVMValueRef visit_image_load(struct
>> ac_nir_context *ctx,
>>   } else {
>>   bool is_da = glsl_sampler_type_is_array(type) ||
>>    glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ||
>> + glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D ||
>>    glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS ||
>>    glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS;
>>   LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
>> @@ -3692,7 +3693,8 @@ static void visit_image_store(struct ac_nir_context 
>> *ctx,
>>  params, 6, 0);
>>   } else {
>>   bool is_da = glsl_sampler_type_is_array(type) ||
>> - glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
>> + glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ||
>> + glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D;
>>   LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
>>   LLVMValueRef slc = ctx->ac.i1false;

just a note: if this patch goes in before my patch
(), assuming it'll be accepted,
I'd need to do a v2 patch, which applies cleanly on top of this change. The same
applies the other way around.

Cheers,
Kai



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 20/24] anv/cmd_buffer: Rework aux tracking

2018-01-22 Thread Pohjolainen, Topi

I read this thru a few times, and found just two typos:

Reviewed-by: Topi Pohjolainen 

There are a lot of details that could go wrong and another set of eyes
on it would be really good.

On Fri, Jan 19, 2018 at 03:47:37PM -0800, Jason Ekstrand wrote:
> This commit completely reworks aux tracking.  This includes a number of
> somewhat distinct changes:
> 
>  1) Since we are no longer fast-clearing multiple slices, we only need
> to track one fast clear color and one fast clear type.
> 
>  2) We store two bits for fast clear instead of one to let us
> distinguish between zero and non-zero fast clear colors.  This is
> needed so that we can do full resolves when transitioning to
> PRESENT_SRC_KHR with gen9 CCS images where we allow zero clear
> values in all sorts of places wouldn't normally.
> 
>  3) We now track compression state as a boolean separate from fast clear
> type and this is tracked on a per-slice granularity.
> 
> The previous scheme had some issues when it came to individual slices of
> a multi-LOD images.  In particular, we only tracked "needs resolve"
> per-LOD but you could do a vkCmdPipelineBarrier that would only resolve
> a portion of the image and would set "needs resolve" to false anyway.
> Also, any transition from an undefined layout would reset the clear
> color for the entire LOD regardless of whether or not there was some
> clear color on some other slice.
> 
> As far as full/partial resolves go, he assumptions of the previous

  the

> scheme held because the one case where we do need a full resolve when
> CCS_E is enabled is for window-system images.  Since we only ever
> allowed X-tiled window-system images, CCS was entirely disabled on gen9+
> and we never got CCS_E.  With the advent of Y-tiled window-system
> buffers, we now need to properly support doing a full resolve of images
> marked CCS_E.
> ---
>  src/intel/vulkan/anv_blorp.c   |   3 +-
>  src/intel/vulkan/anv_image.c   |  96 ++-
>  src/intel/vulkan/anv_private.h |  53 +++---
>  src/intel/vulkan/genX_cmd_buffer.c | 340 
> +++--
>  4 files changed, 331 insertions(+), 161 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 3698543..594b0d8 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1757,8 +1757,7 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> * particular value and don't care about format or clear value.
> */
>const struct anv_address clear_color_addr =
> - anv_image_get_clear_color_addr(cmd_buffer->device, image,
> -aspect, level);
> + anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
>surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> }
>  
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 94b9ecb..d5f8dcf 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -190,46 +190,54 @@ all_formats_ccs_e_compatible(const struct 
> gen_device_info *devinfo,
>   * fast-clear values in non-trivial cases (e.g., outside of a render pass in
>   * which a fast clear has occurred).
>   *
> - * For the purpose of discoverability, the algorithm used to manage this 
> buffer
> - * is described here. A clear value in this buffer is updated when a fast 
> clear
> - * is performed on a subresource. One of two synchronization operations is
> - * performed in order for a following memory access to use the fast-clear
> - * value:
> - *a. Copy the value from the buffer to the surface state object used for
> - *   reading. This is done implicitly when the value is the clear value
> - *   predetermined to be the default in other surface state objects. This
> - *   is currently only done explicitly for the operation below.
> - *b. Do (a) and use the surface state object to resolve the subresource.
> - *   This is only done during layout transitions for decent performance.
> + * In order to avoid having multiple clear colors for a single plane of an
> + * image (hence a single RENDER_SURFACE_STATE), we only allow fast-clears on
> + * the first slice (level 0, layer 0).  At the time of our testing (Jan 17,
> + * 2018), there were known applications which would benefit from 
> fast-clearing

s/known/no known/ ?

> + * more than just the first slice.
>   *
> - * With the above scheme, we can fast-clear whenever the hardware allows 
> except
> - * for two cases in which synchronization becomes impossible or undesirable:
> - ** The subresource is in the GENERAL layout and is cleared to a value
> - *  other than the special default value.
> + * The fast clear portion of the image is laid out in the following order:
>   *
> - *  Performing a synchronization operation in order to read from the
> - *  subresource is undesi

Re: [Mesa-dev] [PATCH 2/7] egl: add support for EGL_ANDROID_blob_cache

2018-01-22 Thread Emil Velikov
On 19 January 2018 at 06:53, Tapani Pälli  wrote:
>
>
> On 01/18/2018 04:55 PM, Emil Velikov wrote:
>>
>> On 17 January 2018 at 16:11, Tapani Pälli  wrote:
>>>
>>>
>>>
>>> On 17.01.2018 13:28, Nicolai Hähnle wrote:


 On 16.01.2018 18:45, Emil Velikov wrote:
>
>
> Hi Tapani,
>
> On 15 January 2018 at 12:31, Tapani Pälli 
> wrote:
>
>> +static void
>> +update_blob_cache_functions(struct dri2_egl_display *dri2_dpy,
>> +struct dri2_egl_context *dri2_ctx)
>> +{
>> +   if (!dri2_dpy || !dri2_ctx)
>> +  return;
>
>
> AFAICT dri2_dpy can never be NULL.
>
>> +
>> +   /* No blob support. */
>> +   if (!dri2_dpy->blob)
>> +  return;
>> +
>> +   /* No functions to set. */
>> +   if (!dri2_dpy->blob_cache_set)
>> +  return;
>> +
>> +   dri2_dpy->blob->set_cache_funcs(dri2_ctx->dri_context,
>> +   dri2_dpy->blob_cache_set,
>> +   dri2_dpy->blob_cache_get);
>> +}
>> +
>
>
> I'm wondering why you opted to make set_cache_funcs dri_context
> specific as opposed to dri_screen.
> The latter seems to align better to EGLDisplay.
>
> Plus doing so will simplify the existing code - no hunk in
> dri2_make_current, no dri2_dpy->blob/blob_cache_set checks, etc.



 Yes, please make it a screen thing. It just makes more sense, and
 there's
 precedent in Gallium, where the disk-cache is a per-pipe_screen object
 as
 well.
>>>
>>>
>>>
>>> I chose context because eventually I need to access disk_cache which is
>>> part
>>> of gl_context. I'm not sure how would I propagate the set/get there from
>>> dri_screen?
>>>
>> Gallium does the following during create_context. I'm not sure if
>> there's any particular reason why i965 cannot do the same.
>> Tim, you've worked a fair bit in the area do you see any drawbacks?
>>
>> ctx->Cache = pipe->screen->det_dist_shader_cache(pipe->screen);
>>
>
> One problem is that client might set the callbacks only after context
> creation so we need to be able to do this during set_cache_funcs(). Now it
> works fine because we pass context there.
>
I don't see why that would be an issue. Both ctx::cache and
screen::cache are pointer to a single instance.
Hence, as we deref. screen::cache and update the callbacks, everything
will be fine from ctx POV - no need to update for each make_current
call/etc.

Am I having a dull moment here?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 21/24] anv: Allow fast-clearing the first slice of a multi-slice image

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:38PM -0800, Jason Ekstrand wrote:
> Now that we're tracking aux properly per-slice, we can enable this for
> applications which actually care.
> ---
>  src/intel/vulkan/anv_blorp.c   | 22 +++---
>  src/intel/vulkan/genX_cmd_buffer.c | 13 +
>  2 files changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 594b0d8..73a44fd 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1205,9 +1205,16 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
> *cmd_buffer)
> image, VK_IMAGE_ASPECT_COLOR_BIT,
> att_state->aux_usage, &surf);
>  
> +  uint32_t base_layer = iview->planes[0].isl.base_array_layer;
> +  uint32_t layer_count = fb->layers;
> +
>if (att_state->fast_clear) {
>   surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
>  
> + /* We only support fast-clears on the first layer */
> + assert(iview->planes[0].isl.base_level == 0);
> + assert(iview->planes[0].isl.base_array_layer == 0);
> +

Before reading to the end of the patch I was wondering how this assumption
holds. Then I realized the logic in color_attachment_compute_aux_usage(). In
other words, if "att_state->fast_clear == true" then the view covers also the
very first slice.

>   /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
>*
>*"After Render target fast clear, pipe-control with color cache
> @@ -1229,27 +1236,28 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer 
> *cmd_buffer)
>  
>   assert(image->n_planes == 1);
>   blorp_fast_clear(&batch, &surf, iview->planes[0].isl.format,
> -  iview->planes[0].isl.base_level,
> -  iview->planes[0].isl.base_array_layer, fb->layers,
> +  iview->planes[0].isl.base_level, base_layer, 1,

Given the design for "first slice only" and as there are the asserts earlier
I was expecting to see "0, 0, 1," here. Not a big deal though.

Reviewed-by: Topi Pohjolainen 

>render_area.offset.x, render_area.offset.y,
>render_area.offset.x + render_area.extent.width,
>render_area.offset.y + render_area.extent.height);
> + base_layer++;
> + layer_count--;
>  
>   cmd_buffer->state.pending_pipe_bits |=
>  ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> -  } else {
> +  }
> +
> +  if (layer_count > 0) {
>   assert(image->n_planes == 1);
>   anv_cmd_buffer_mark_image_written(cmd_buffer, image,
> VK_IMAGE_ASPECT_COLOR_BIT,
> att_state->aux_usage,
> iview->planes[0].isl.base_level,
> -   
> iview->planes[0].isl.base_array_layer,
> -   fb->layers);
> +   base_layer, layer_count);
>  
>   blorp_clear(&batch, &surf, iview->planes[0].isl.format,
>   anv_swizzle_for_render(iview->planes[0].isl.swizzle),
> - iview->planes[0].isl.base_level,
> - iview->planes[0].isl.base_array_layer, fb->layers,
> + iview->planes[0].isl.base_level, base_layer, 
> layer_count,
>   render_area.offset.x, render_area.offset.y,
>   render_area.offset.x + render_area.extent.width,
>   render_area.offset.y + render_area.extent.height,
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 4c83a5c..484246d 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -329,12 +329,17 @@ color_attachment_compute_aux_usage(struct anv_device * 
> device,
> */
>if (att_state->fast_clear &&
>(iview->planes[0].isl.base_level > 0 ||
> -   iview->image->type == VK_IMAGE_TYPE_3D ||
> -   iview->image->array_size > 0)) {
> +   iview->planes[0].isl.base_array_layer > 0 ||
> +   cmd_state->framebuffer->layers > 1)) {
>   anv_perf_warn(device->instance, iview->image,
> "Rendering to a multi-LOD or multi-layer framebuffer "
> -   "with LOAD_OP_CLEAR.  Not fast-clearing");
> - att_state->fast_clear = false;
> +   "with LOAD_OP_CLEAR.  Only fast-clearing the first "
> +   "slice");
> +
> + /* Leave fast_clear enabled if we are clearing the first slice. */
> + if (iview->planes[0].isl.base_level > 0 ||
> + iview->planes[0].isl.b

Re: [Mesa-dev] [PATCH] radeonsi/nir: fix fs output index

2018-01-22 Thread Timothy Arceri



On 22/01/18 23:45, Nicolai Hähnle wrote:

On 22.01.2018 04:57, Timothy Arceri wrote:

Fixes the following piglit tests:

arb_blend_func_extended-fbo-extended-blend
arb_blend_func_extended-fbo-extended-blend-explicit
arb_blend_func_extended-fbo-extended-blend-explicit_gles3
arb_blend_func_extended-fbo-extended-blend-pattern
arb_blend_func_extended-fbo-extended-blend-pattern_gles2
arb_blend_func_extended-fbo-extended-blend-pattern_gles3
arb_blend_func_extended-fbo-extended-blend_gles3
ext_framebuffer_multisample/alpha-to-coverage-dual-src-blend
ext_framebuffer_multisample/alpha-to-one-dual-src-blend
---
  src/gallium/drivers/radeonsi/si_shader_nir.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c

index 5b941da949..81cf503d60 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -378,6 +378,10 @@ void si_nir_scan_shader(const struct nir_shader 
*nir,

  if (nir->info.stage == MESA_SHADER_FRAGMENT) {
  tgsi_get_gl_frag_result_semantic(variable->data.location,
  &semantic_name, &semantic_index);
+
+    if (variable->data.index > 0) {
+    semantic_index++;


Just to understand this correctly, variable->data.index == 1 indicates 
the second source in a dual source blend, and index values > 1 do not 
occur, right?


I believe so, the variable->data.index > 0 was just a copy and paste 
from the glsl to tgsi state tracker.




Could you please add a comment /* Adjust for dual source blending */ or 
similar?


Will do.



With that:

Reviewed-by: Nicolai Hähnle 



+    }
  } else {
  tgsi_get_gl_varying_semantic(variable->data.location, true,
   &semantic_name, &semantic_index);





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 22/24] intel/blorp: Use isl_aux_op instead of blorp_fast_clear_op

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:39PM -0800, Jason Ekstrand wrote:
> ---
>  src/intel/blorp/blorp.h   |  9 +--
>  src/intel/blorp/blorp_clear.c | 10 
>  src/intel/blorp/blorp_genX_exec.h | 16 ++--
>  src/intel/blorp/blorp_priv.h  |  2 +-
>  src/intel/vulkan/anv_blorp.c  | 14 +--
>  src/mesa/drivers/dri/i965/brw_blorp.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_blorp.h |  2 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 36 
> +--
>  8 files changed, 36 insertions(+), 55 deletions(-)

Reviewed-by: Topi Pohjolainen 

> 
> diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> index 478a9af..f1be12b 100644
> --- a/src/intel/blorp/blorp.h
> +++ b/src/intel/blorp/blorp.h
> @@ -189,19 +189,12 @@ blorp_clear_attachments(struct blorp_batch *batch,
>  bool clear_depth, float depth_value,
>  uint8_t stencil_mask, uint8_t stencil_value);
>  
> -enum blorp_fast_clear_op {
> -   BLORP_FAST_CLEAR_OP_NONE = 0,
> -   BLORP_FAST_CLEAR_OP_CLEAR,
> -   BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL,
> -   BLORP_FAST_CLEAR_OP_RESOLVE_FULL,
> -};
> -
>  void
>  blorp_ccs_resolve(struct blorp_batch *batch,
>struct blorp_surf *surf, uint32_t level,
>uint32_t start_layer, uint32_t num_layers,
>enum isl_format format,
> -  enum blorp_fast_clear_op resolve_op);
> +  enum isl_aux_op resolve_op);
>  
>  void
>  blorp_ccs_ambiguate(struct blorp_batch *batch,
> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> index fa2abd9..8b7c5a8 100644
> --- a/src/intel/blorp/blorp_clear.c
> +++ b/src/intel/blorp/blorp_clear.c
> @@ -322,7 +322,7 @@ blorp_fast_clear(struct blorp_batch *batch,
> params.y1 = y1;
>  
> memset(¶ms.wm_inputs.clear_color, 0xff, 4*sizeof(float));
> -   params.fast_clear_op = BLORP_FAST_CLEAR_OP_CLEAR;
> +   params.fast_clear_op = ISL_AUX_OP_FAST_CLEAR;
>  
> get_fast_clear_rect(batch->blorp->isl_dev, surf->aux_surf,
> ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1);
> @@ -720,7 +720,7 @@ blorp_ccs_resolve(struct blorp_batch *batch,
>struct blorp_surf *surf, uint32_t level,
>uint32_t start_layer, uint32_t num_layers,
>enum isl_format format,
> -  enum blorp_fast_clear_op resolve_op)
> +  enum isl_aux_op resolve_op)
>  {
> struct blorp_params params;
>  
> @@ -759,11 +759,11 @@ blorp_ccs_resolve(struct blorp_batch *batch,
> params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown;
>  
> if (batch->blorp->isl_dev->info->gen >= 9) {
> -  assert(resolve_op == BLORP_FAST_CLEAR_OP_RESOLVE_FULL ||
> - resolve_op == BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL);
> +  assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE ||
> + resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE);
> } else {
>/* Broadwell and earlier do not have a partial resolve */
> -  assert(resolve_op == BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
> +  assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE);
> }
> params.fast_clear_op = resolve_op;
> params.num_layers = num_layers;
> diff --git a/src/intel/blorp/blorp_genX_exec.h 
> b/src/intel/blorp/blorp_genX_exec.h
> index 1968460..8d007b8 100644
> --- a/src/intel/blorp/blorp_genX_exec.h
> +++ b/src/intel/blorp/blorp_genX_exec.h
> @@ -750,21 +750,21 @@ blorp_emit_ps_config(struct blorp_batch *batch,
>   ps.MaximumNumberofThreadsPerPSD = 64 - 2;
>  
>switch (params->fast_clear_op) {
> -  case BLORP_FAST_CLEAR_OP_NONE:
> +  case ISL_AUX_OP_NONE:
>   break;
>  #if GEN_GEN >= 9
> -  case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
> +  case ISL_AUX_OP_PARTIAL_RESOLVE:
>   ps.RenderTargetResolveType = RESOLVE_PARTIAL;
>   break;
> -  case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
> +  case ISL_AUX_OP_FULL_RESOLVE:
>   ps.RenderTargetResolveType = RESOLVE_FULL;
>   break;
>  #else
> -  case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
> +  case ISL_AUX_OP_FULL_RESOLVE:
>   ps.RenderTargetResolveEnable = true;
>   break;
>  #endif
> -  case BLORP_FAST_CLEAR_OP_CLEAR:
> +  case ISL_AUX_OP_FAST_CLEAR:
>   ps.RenderTargetFastClearEnable = true;
>   break;
>default:
> @@ -852,12 +852,12 @@ blorp_emit_ps_config(struct blorp_batch *batch,
>   ps.SamplerCount = 1; /* Up to 4 samplers */
>  
>switch (params->fast_clear_op) {
> -  case BLORP_FAST_CLEAR_OP_NONE:
> +  case ISL_AUX_OP_NONE:
>   break;
> -  case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
> +  case ISL_AUX_OP_FULL_RESOLVE:
>   ps.RenderTargetResolveEnable = true;
>   break;
> -  case BLORP_FAST_CLEAR_OP_CLEAR:
> +  case ISL_AUX_OP_FAST_CLEAR:
>   

Re: [Mesa-dev] [PATCH 1/3] ac: add stream handling to visit_end_primitive()

2018-01-22 Thread Timothy Arceri

On 22/01/18 23:37, Nicolai Hähnle wrote:
Why hasn't this been noticed before? 


I knew it was broken but the state tracker needed to be fixed first 
which I hadn't gotten to, fortunately Rob fixed it for me :) [1]



[1] 
https://cgit.freedesktop.org/mesa/mesa/commit/?id=4c69961daf45a6a64970e5831bd362307dca0cb2 




Does Vulkan not have multiple streams?


Vulkan does not have transform feedback.



Anyway, series is:

Reviewed-by: Nicolai Hähnle 



On 18.01.2018 23:26, Timothy Arceri wrote:

---
  src/amd/common/ac_nir_to_llvm.c | 7 +++
  1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c 
b/src/amd/common/ac_nir_to_llvm.c

index 12353943a5..9709514532 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4206,10 +4206,9 @@ visit_emit_vertex(struct ac_shader_abi *abi, 
unsigned stream, LLVMValueRef *addr

  }
  static void
-visit_end_primitive(struct nir_to_llvm_context *ctx,
-    const nir_intrinsic_instr *instr)
+visit_end_primitive(struct nir_to_llvm_context *ctx, unsigned stream)
  {
-    ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | 
(0 << 8), ctx->gs_wave_id);
+    ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | 
(stream << 8), ctx->gs_wave_id);

  }
  static LLVMValueRef
@@ -4420,7 +4419,7 @@ static void visit_intrinsic(struct 
ac_nir_context *ctx,

  ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
  break;
  case nir_intrinsic_end_primitive:
-    visit_end_primitive(ctx->nctx, instr);
+    visit_end_primitive(ctx->nctx, nir_intrinsic_stream_id(instr));
  break;
  case nir_intrinsic_load_tess_coord: {
  LLVMTypeRef type = ctx->nctx ?





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 23/24] intel/blorp: Use isl_aux_op instead of blorp_hiz_op

2018-01-22 Thread Pohjolainen, Topi
On Fri, Jan 19, 2018 at 03:47:40PM -0800, Jason Ekstrand wrote:
> ---
>  src/intel/blorp/blorp.c   |  2 +-
>  src/intel/blorp/blorp.h   | 19 +
>  src/intel/blorp/blorp_clear.c |  2 +-
>  src/intel/blorp/blorp_genX_exec.h | 39 
> ++-
>  src/intel/blorp/blorp_priv.h  |  2 +-
>  src/intel/vulkan/anv_blorp.c  | 15 +--
>  src/mesa/drivers/dri/i965/brw_blorp.c | 16 +--
>  src/mesa/drivers/dri/i965/brw_blorp.h |  2 +-
>  src/mesa/drivers/dri/i965/brw_clear.c |  4 +--
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 +-
>  10 files changed, 44 insertions(+), 71 deletions(-)

Reviewed-by: Topi Pohjolainen 

> 
> diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
> index e8a2c61..f067515 100644
> --- a/src/intel/blorp/blorp.c
> +++ b/src/intel/blorp/blorp.c
> @@ -293,7 +293,7 @@ blorp_ensure_sf_program(struct blorp_context *blorp,
>  void
>  blorp_hiz_op(struct blorp_batch *batch, struct blorp_surf *surf,
>   uint32_t level, uint32_t start_layer, uint32_t num_layers,
> - enum blorp_hiz_op op)
> + enum isl_aux_op op)
>  {
> struct blorp_params params;
> blorp_params_init(¶ms);
> diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> index f1be12b..ce3762c 100644
> --- a/src/intel/blorp/blorp.h
> +++ b/src/intel/blorp/blorp.h
> @@ -207,27 +207,10 @@ blorp_mcs_partial_resolve(struct blorp_batch *batch,
>enum isl_format format,
>uint32_t start_layer, uint32_t num_layers);
>  
> -/**
> - * For an overview of the HiZ operations, see the following sections of the
> - * Sandy Bridge PRM, Volume 1, Part2:
> - *   - 7.5.3.1 Depth Buffer Clear
> - *   - 7.5.3.2 Depth Buffer Resolve
> - *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
> - *
> - * Of these, two get entered in the resolve map as needing to be done to the
> - * buffer: depth resolve and hiz resolve.
> - */
> -enum blorp_hiz_op {
> -   BLORP_HIZ_OP_NONE,
> -   BLORP_HIZ_OP_DEPTH_CLEAR,
> -   BLORP_HIZ_OP_DEPTH_RESOLVE,
> -   BLORP_HIZ_OP_HIZ_RESOLVE,
> -};
> -
>  void
>  blorp_hiz_op(struct blorp_batch *batch, struct blorp_surf *surf,
>   uint32_t level, uint32_t start_layer, uint32_t num_layers,
> - enum blorp_hiz_op op);
> + enum isl_aux_op op);
>  
>  #ifdef __cplusplus
>  } /* end extern "C" */
> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> index 8b7c5a8..e9f5463 100644
> --- a/src/intel/blorp/blorp_clear.c
> +++ b/src/intel/blorp/blorp_clear.c
> @@ -630,7 +630,7 @@ blorp_gen8_hiz_clear_attachments(struct blorp_batch 
> *batch,
> struct blorp_params params;
> blorp_params_init(¶ms);
> params.num_layers = 1;
> -   params.hiz_op = BLORP_HIZ_OP_DEPTH_CLEAR;
> +   params.hiz_op = ISL_AUX_OP_FAST_CLEAR;
> params.x0 = x0;
> params.y0 = y0;
> params.x1 = x1;
> diff --git a/src/intel/blorp/blorp_genX_exec.h 
> b/src/intel/blorp/blorp_genX_exec.h
> index 8d007b8..5e1312a 100644
> --- a/src/intel/blorp/blorp_genX_exec.h
> +++ b/src/intel/blorp/blorp_genX_exec.h
> @@ -787,16 +787,16 @@ blorp_emit_ps_config(struct blorp_batch *batch,
>  
> blorp_emit(batch, GENX(3DSTATE_WM), wm) {
>switch (params->hiz_op) {
> -  case BLORP_HIZ_OP_DEPTH_CLEAR:
> +  case ISL_AUX_OP_FAST_CLEAR:
>   wm.DepthBufferClear = true;
>   break;
> -  case BLORP_HIZ_OP_DEPTH_RESOLVE:
> +  case ISL_AUX_OP_FULL_RESOLVE:
>   wm.DepthBufferResolveEnable = true;
>   break;
> -  case BLORP_HIZ_OP_HIZ_RESOLVE:
> +  case ISL_AUX_OP_AMBIGUATE:
>   wm.HierarchicalDepthBufferResolveEnable = true;
>   break;
> -  case BLORP_HIZ_OP_NONE:
> +  case ISL_AUX_OP_NONE:
>   break;
>default:
>   unreachable("not reached");
> @@ -872,16 +872,16 @@ blorp_emit_ps_config(struct blorp_batch *batch,
>   batch->blorp->isl_dev->info->max_wm_threads - 1;
>  
>switch (params->hiz_op) {
> -  case BLORP_HIZ_OP_DEPTH_CLEAR:
> +  case ISL_AUX_OP_FAST_CLEAR:
>   wm.DepthBufferClear = true;
>   break;
> -  case BLORP_HIZ_OP_DEPTH_RESOLVE:
> +  case ISL_AUX_OP_FULL_RESOLVE:
>   wm.DepthBufferResolveEnable = true;
>   break;
> -  case BLORP_HIZ_OP_HIZ_RESOLVE:
> +  case ISL_AUX_OP_AMBIGUATE:
>   wm.HierarchicalDepthBufferResolveEnable = true;
>   break;
> -  case BLORP_HIZ_OP_NONE:
> +  case ISL_AUX_OP_NONE:
>   break;
>default:
>   unreachable("not reached");
> @@ -1014,7 +1014,7 @@ blorp_emit_depth_stencil_state(struct blorp_batch 
> *batch,
>ds.DepthBufferWriteEnable = true;
>  
>switch (params->hiz_op) {
> -  case BLORP_HIZ_OP_NONE:
> +  case ISL_AUX_OP_NONE:
>   ds.DepthTestEnab

[Mesa-dev] [Bug 104553] mat4: m[i][j] incorrect result with row_major UBO

2018-01-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104553

--- Comment #8 from Ilia Mirkin  ---
FYI idr had a separate script to reduce the giant test cases to the
smallest that would still fail. It should be in one of his piglit branches
in his personal fd.o git repo. (I'm on the go, hence no specific URL.)

On Jan 22, 2018 09:02,  wrote:

> florian.w...@googlemail.com changed bug 104553
> 
> What Removed Added
> Attachment #136630 is obsolete   1
>
> *Comment # 7  on
> bug 104553  from
> florian.w...@googlemail.com  *
>
> Created attachment 136878 
>  [details] 
>  [review] 
> 
> Changes to piglit UBO test generator
>
> I have now extended the random UBO piglit test generator python script (in a
> hackish way) to generate SSBO tests as well, and added std430 packing rules to
> generate std430 SSBO tests. My changes are in the attached patch file, but I'd
> say it's not suitable for piglit git (too ugly).
>
> It was helpful to validate the mesa patch I've attached to this bug report
> earlier. Using mesa git master, 391 out of the 540 generated UBO&SSBO tests
> fail. After applying my patch, only a few tests (3-7) fail. The failing tests
> are always very huge test files (some have more than 10k lines and sometimes 
> up
> to 5MB shader_test files). Apparently they hit something like an internal size
> limit for vertex shaders, because the tests pass when commenting out one half
> of the test conditions in the vertex shader, and they still pass when
> commenting out the other half of the vertex shader.
>
> So I'm now fairly confident that my patch improves the SSBO / UBO buffer 
> access
> behaviour when reading from SSBOs and UBOs.
>
> Is there anything else that should be tested? Or any comments about the patch
> by someone who knows the lower_buffer_access code better than I do?
>
> --
> You are receiving this mail because:
>
>- You are the assignee for the bug.
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] intel: add new common header gen_defines.h

2018-01-22 Thread Emil Velikov
On 22 January 2018 at 12:29, Tapani Pälli  wrote:
> Signed-off-by: Tapani Pälli 
> ---
>  src/intel/common/gen_defines.h | 52 
> ++
>  1 file changed, 52 insertions(+)
>  create mode 100644 src/intel/common/gen_defines.h
>
Please add the file to the src/intel/Makefile.sources COMMON_FILES list.


> diff --git a/src/intel/common/gen_defines.h b/src/intel/common/gen_defines.h
> new file mode 100644
> index 00..f8daa84431
> --- /dev/null
> +++ b/src/intel/common/gen_defines.h
> @@ -0,0 +1,52 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining
> + * a copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sublicense, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial
> + * portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
> + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
> + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
> + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
> + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifndef GEN_DEFINES_H
> +#define GEN_DEFINES_H
> +
#include "i915_drm.h"

^^ is where the I915_CONTEXT*PRIORITY symbols are defined, so let's
include that.

With the two nitpicks, the series is:
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: refactor swr_create_screen to allow for proper cleanup on error

2018-01-22 Thread Chuck Atkins
Signed-off-by: Chuck Atkins 
---
 src/gallium/drivers/swr/swr_loader.cpp | 100 +
 src/gallium/drivers/swr/swr_public.h   |   6 +-
 src/gallium/drivers/swr/swr_screen.cpp |  26 +++--
 src/gallium/drivers/swr/swr_screen.h   |   3 +
 4 files changed, 79 insertions(+), 56 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_loader.cpp 
b/src/gallium/drivers/swr/swr_loader.cpp
index 7f28bdb536..1ebfaa8392 100644
--- a/src/gallium/drivers/swr/swr_loader.cpp
+++ b/src/gallium/drivers/swr/swr_loader.cpp
@@ -29,96 +29,98 @@
 #include 
 
 // Helper function to resolve the backend filename based on architecture
-inline void get_swr_arch_filename(const char arch[], char filename[])
+static bool
+swr_initialize_screen_interface(struct swr_screen *screen, const char arch[])
 {
 #ifdef HAVE_SWR_BUILTIN
-   strcpy(filename , "builtin");
+   screen->pLibrary = NULL;
+   screen->pfnSwrGetInterface = SwrGetInterface;
+   fprintf(stderr, "(using: builtin).\n");
 #else
+   char filename[256] = { 0 };
sprintf(filename, "%sswr%s%s", UTIL_DL_PREFIX, arch, UTIL_DL_EXT);
+
+   screen->pLibrary = util_dl_open(filename);
+   if (!screen->pLibrary) {
+  fprintf(stderr, "(skipping: %s)\n", util_dl_error());
+  return false;
+   }
+
+   util_dl_proc pApiProc = util_dl_get_proc_address(screen->pLibrary,
+  "SwrGetInterface");
+   if (!pApiProc) {
+  fprintf(stderr, "(skipping: %s)\n", util_dl_error());
+  util_dl_close(screen->pLibrary);
+  screen->pLibrary = NULL;
+  return false;
+   }
+
+   screen->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
+   fprintf(stderr, "(using: %s).\n", filename);
 #endif
+   return true;
 }
 
+
 struct pipe_screen *
 swr_create_screen(struct sw_winsys *winsys)
 {
-   char filename[256] = { 0 };
-   bool found = false;
-   bool is_knl = false;
-   PFNSwrGetInterface pfnSwrGetInterface = nullptr;
+   struct pipe_screen *p_screen = swr_create_screen_internal(winsys);
+   if (!p_screen) {
+  return NULL;
+   }
+
+   struct swr_screen *screen = swr_screen(p_screen);
+   screen->is_knl = false;
 
util_cpu_detect();
 
-   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
+   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
   fprintf(stderr, "SWR detected KNL instruction support ");
 #ifndef HAVE_SWR_KNL
-  fprintf(stderr, "(skipping not built).\n");
+  fprintf(stderr, "(skipping: not built).\n");
 #else
-  get_swr_arch_filename("KNL", filename);
-  found = true;
-  is_knl = true;
+  if (swr_initialize_screen_interface(screen, "KNL")) {
+ screen->is_knl = true;
+ return p_screen;
+  }
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
+   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
   fprintf(stderr, "SWR detected SKX instruction support ");
 #ifndef HAVE_SWR_SKX
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("SKX", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "SKX"))
+ return p_screen;
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx2) {
+   if (util_cpu_caps.has_avx2) {
   fprintf(stderr, "SWR detected AVX2 instruction support ");
 #ifndef HAVE_SWR_AVX2
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("AVX2", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "AVX2"))
+ return p_screen;
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx) {
+   if (util_cpu_caps.has_avx) {
   fprintf(stderr, "SWR detected AVX instruction support ");
 #ifndef HAVE_SWR_AVX
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("AVX", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "AVX"))
+ return p_screen;
 #endif
}
 
-   if (!found) {
-  fprintf(stderr, "SWR could not detect a supported CPU architecture.\n");
-  exit(-1);
-   }
-
-   fprintf(stderr, "(using %s).\n", filename);
-
-#ifdef HAVE_SWR_BUILTIN
-   pfnSwrGetInterface = SwrGetInterface;
-#else
-   util_dl_library *pLibrary = util_dl_open(filename);
-   if (!pLibrary) {
-  fprintf(stderr, "SWR library load failure: %s\n", util_dl_error());
-  exit(-1);
-   }
-
-   util_dl_proc pApiProc = util_dl_get_proc_address(pLibrary, 
"SwrGetInterface");
-   if (!pApiProc) {
-  fprintf(stderr, "SWR library search failure: %s\n", util_dl_error());
-  exit(-1);
-   }
-
-   pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
-#endif
-
-   struct pipe_screen *screen = swr_create_screen_internal(winsys);
-   swr_screen(screen)->is_knl = is_knl;
-   swr_screen(screen)->pfnSwrGetInterface = pfnSwrGetInterface;
+   fprintf(stderr, "SWR could not initialize a supported CPU architecture.\n");
+   swr_destroy_screen_internal(&screen);
 
-   return screen;
+   return NULL;
 }
 
 
diff -

[Mesa-dev] [PATCH] swr: refactor swr_create_screen to allow for proper cleanup on error

2018-01-22 Thread Chuck Atkins
Signed-off-by: Chuck Atkins 
---
 src/gallium/drivers/swr/swr_loader.cpp | 100 +
 src/gallium/drivers/swr/swr_public.h   |   6 +-
 src/gallium/drivers/swr/swr_screen.cpp |  26 +++--
 src/gallium/drivers/swr/swr_screen.h   |   3 +
 4 files changed, 79 insertions(+), 56 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_loader.cpp 
b/src/gallium/drivers/swr/swr_loader.cpp
index 7f28bdb536..01b9804646 100644
--- a/src/gallium/drivers/swr/swr_loader.cpp
+++ b/src/gallium/drivers/swr/swr_loader.cpp
@@ -29,96 +29,98 @@
 #include 
 
 // Helper function to resolve the backend filename based on architecture
-inline void get_swr_arch_filename(const char arch[], char filename[])
+static bool
+swr_initialize_screen_interface(struct swr_screen *screen, const char arch[])
 {
 #ifdef HAVE_SWR_BUILTIN
-   strcpy(filename , "builtin");
+   screen->pLibrary = NULL;
+   screen->pfnSwrGetInterface = SwrGetInterface;
+   fprintf(stderr, "(using: builtin).\n");
 #else
+   char filename[256] = { 0 };
sprintf(filename, "%sswr%s%s", UTIL_DL_PREFIX, arch, UTIL_DL_EXT);
+
+   screen->pLibrary = util_dl_open(filename);
+   if (!screen->pLibrary) {
+  fprintf(stderr, "(skipping: %s).\n", util_dl_error());
+  return false;
+   }
+
+   util_dl_proc pApiProc = util_dl_get_proc_address(screen->pLibrary,
+  "SwrGetInterface");
+   if (!pApiProc) {
+  fprintf(stderr, "(skipping: %s).\n", util_dl_error());
+  util_dl_close(screen->pLibrary);
+  screen->pLibrary = NULL;
+  return false;
+   }
+
+   screen->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
+   fprintf(stderr, "(using: %s).\n", filename);
 #endif
+   return true;
 }
 
+
 struct pipe_screen *
 swr_create_screen(struct sw_winsys *winsys)
 {
-   char filename[256] = { 0 };
-   bool found = false;
-   bool is_knl = false;
-   PFNSwrGetInterface pfnSwrGetInterface = nullptr;
+   struct pipe_screen *p_screen = swr_create_screen_internal(winsys);
+   if (!p_screen) {
+  return NULL;
+   }
+
+   struct swr_screen *screen = swr_screen(p_screen);
+   screen->is_knl = false;
 
util_cpu_detect();
 
-   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
+   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
   fprintf(stderr, "SWR detected KNL instruction support ");
 #ifndef HAVE_SWR_KNL
-  fprintf(stderr, "(skipping not built).\n");
+  fprintf(stderr, "(skipping: not built).\n");
 #else
-  get_swr_arch_filename("KNL", filename);
-  found = true;
-  is_knl = true;
+  if (swr_initialize_screen_interface(screen, "KNL")) {
+ screen->is_knl = true;
+ return p_screen;
+  }
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
+   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
   fprintf(stderr, "SWR detected SKX instruction support ");
 #ifndef HAVE_SWR_SKX
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("SKX", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "SKX"))
+ return p_screen;
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx2) {
+   if (util_cpu_caps.has_avx2) {
   fprintf(stderr, "SWR detected AVX2 instruction support ");
 #ifndef HAVE_SWR_AVX2
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("AVX2", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "AVX2"))
+ return p_screen;
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx) {
+   if (util_cpu_caps.has_avx) {
   fprintf(stderr, "SWR detected AVX instruction support ");
 #ifndef HAVE_SWR_AVX
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("AVX", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "AVX"))
+ return p_screen;
 #endif
}
 
-   if (!found) {
-  fprintf(stderr, "SWR could not detect a supported CPU architecture.\n");
-  exit(-1);
-   }
-
-   fprintf(stderr, "(using %s).\n", filename);
-
-#ifdef HAVE_SWR_BUILTIN
-   pfnSwrGetInterface = SwrGetInterface;
-#else
-   util_dl_library *pLibrary = util_dl_open(filename);
-   if (!pLibrary) {
-  fprintf(stderr, "SWR library load failure: %s\n", util_dl_error());
-  exit(-1);
-   }
-
-   util_dl_proc pApiProc = util_dl_get_proc_address(pLibrary, 
"SwrGetInterface");
-   if (!pApiProc) {
-  fprintf(stderr, "SWR library search failure: %s\n", util_dl_error());
-  exit(-1);
-   }
-
-   pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
-#endif
-
-   struct pipe_screen *screen = swr_create_screen_internal(winsys);
-   swr_screen(screen)->is_knl = is_knl;
-   swr_screen(screen)->pfnSwrGetInterface = pfnSwrGetInterface;
+   fprintf(stderr, "SWR could not initialize a supported CPU architecture.\n");
+   swr_destroy_screen_internal(&screen);
 
-   return screen;
+   return NULL;
 }
 
 
diff

[Mesa-dev] [PATCH] svga: Prevent use after free.

2018-01-22 Thread Jose Fonseca
Courtesy of clang static analyzer.

I was hunting for potential sources of memory corruption using Mesa with
a GL trace, and happened to find this (unrelated) issue.
---
 src/gallium/drivers/svga/svga_context.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/svga/svga_context.c 
b/src/gallium/drivers/svga/svga_context.c
index 546b9f48b1..af16a99b72 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -327,6 +327,7 @@ cleanup:
util_bitmask_destroy(svga->stream_output_id_bm);
util_bitmask_destroy(svga->query_id_bm);
FREE(svga);
+   svga = NULL;
 
 done:
SVGA_STATS_TIME_POP(svgascreen->sws);
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: refactor swr_create_screen to allow for proper cleanup on error

2018-01-22 Thread Chuck Atkins
For context, without this, the library handle from dlopen never get's
closed, even under successful operation, and the swr_screen created never
get's freed on error.  Also error conditions resulted in exit() rather than
NULL return.

- Chuck

On Mon, Jan 22, 2018 at 10:12 AM, Chuck Atkins 
wrote:

> Signed-off-by: Chuck Atkins 
> ---
>  src/gallium/drivers/swr/swr_loader.cpp | 100
> +
>  src/gallium/drivers/swr/swr_public.h   |   6 +-
>  src/gallium/drivers/swr/swr_screen.cpp |  26 +++--
>  src/gallium/drivers/swr/swr_screen.h   |   3 +
>  4 files changed, 79 insertions(+), 56 deletions(-)
>
> diff --git a/src/gallium/drivers/swr/swr_loader.cpp
> b/src/gallium/drivers/swr/swr_loader.cpp
> index 7f28bdb536..01b9804646 100644
> --- a/src/gallium/drivers/swr/swr_loader.cpp
> +++ b/src/gallium/drivers/swr/swr_loader.cpp
> @@ -29,96 +29,98 @@
>  #include 
>
>  // Helper function to resolve the backend filename based on architecture
> -inline void get_swr_arch_filename(const char arch[], char filename[])
> +static bool
> +swr_initialize_screen_interface(struct swr_screen *screen, const char
> arch[])
>  {
>  #ifdef HAVE_SWR_BUILTIN
> -   strcpy(filename , "builtin");
> +   screen->pLibrary = NULL;
> +   screen->pfnSwrGetInterface = SwrGetInterface;
> +   fprintf(stderr, "(using: builtin).\n");
>  #else
> +   char filename[256] = { 0 };
> sprintf(filename, "%sswr%s%s", UTIL_DL_PREFIX, arch, UTIL_DL_EXT);
> +
> +   screen->pLibrary = util_dl_open(filename);
> +   if (!screen->pLibrary) {
> +  fprintf(stderr, "(skipping: %s).\n", util_dl_error());
> +  return false;
> +   }
> +
> +   util_dl_proc pApiProc = util_dl_get_proc_address(screen->pLibrary,
> +  "SwrGetInterface");
> +   if (!pApiProc) {
> +  fprintf(stderr, "(skipping: %s).\n", util_dl_error());
> +  util_dl_close(screen->pLibrary);
> +  screen->pLibrary = NULL;
> +  return false;
> +   }
> +
> +   screen->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
> +   fprintf(stderr, "(using: %s).\n", filename);
>  #endif
> +   return true;
>  }
>
> +
>  struct pipe_screen *
>  swr_create_screen(struct sw_winsys *winsys)
>  {
> -   char filename[256] = { 0 };
> -   bool found = false;
> -   bool is_knl = false;
> -   PFNSwrGetInterface pfnSwrGetInterface = nullptr;
> +   struct pipe_screen *p_screen = swr_create_screen_internal(winsys);
> +   if (!p_screen) {
> +  return NULL;
> +   }
> +
> +   struct swr_screen *screen = swr_screen(p_screen);
> +   screen->is_knl = false;
>
> util_cpu_detect();
>
> -   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er)
> {
> +   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
>fprintf(stderr, "SWR detected KNL instruction support ");
>  #ifndef HAVE_SWR_KNL
> -  fprintf(stderr, "(skipping not built).\n");
> +  fprintf(stderr, "(skipping: not built).\n");
>  #else
> -  get_swr_arch_filename("KNL", filename);
> -  found = true;
> -  is_knl = true;
> +  if (swr_initialize_screen_interface(screen, "KNL")) {
> + screen->is_knl = true;
> + return p_screen;
> +  }
>  #endif
> }
>
> -   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw)
> {
> +   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
>fprintf(stderr, "SWR detected SKX instruction support ");
>  #ifndef HAVE_SWR_SKX
>fprintf(stderr, "(skipping not built).\n");
>  #else
> -  get_swr_arch_filename("SKX", filename);
> -  found = true;
> +  if (swr_initialize_screen_interface(screen, "SKX"))
> + return p_screen;
>  #endif
> }
>
> -   if (!found && util_cpu_caps.has_avx2) {
> +   if (util_cpu_caps.has_avx2) {
>fprintf(stderr, "SWR detected AVX2 instruction support ");
>  #ifndef HAVE_SWR_AVX2
>fprintf(stderr, "(skipping not built).\n");
>  #else
> -  get_swr_arch_filename("AVX2", filename);
> -  found = true;
> +  if (swr_initialize_screen_interface(screen, "AVX2"))
> + return p_screen;
>  #endif
> }
>
> -   if (!found && util_cpu_caps.has_avx) {
> +   if (util_cpu_caps.has_avx) {
>fprintf(stderr, "SWR detected AVX instruction support ");
>  #ifndef HAVE_SWR_AVX
>fprintf(stderr, "(skipping not built).\n");
>  #else
> -  get_swr_arch_filename("AVX", filename);
> -  found = true;
> +  if (swr_initialize_screen_interface(screen, "AVX"))
> + return p_screen;
>  #endif
> }
>
> -   if (!found) {
> -  fprintf(stderr, "SWR could not detect a supported CPU
> architecture.\n");
> -  exit(-1);
> -   }
> -
> -   fprintf(stderr, "(using %s).\n", filename);
> -
> -#ifdef HAVE_SWR_BUILTIN
> -   pfnSwrGetInterface = SwrGetInterface;
> -#else
> -   util_dl_library *pLibrary = util_dl_open(filename);
> -   if (!pLibrary) {
> -  fprintf(stderr, "SWR library load failure: %s\n", util_dl_error());
> -  exit(-1);
> -   }
> -
> -   util_dl_proc pApiProc = util_dl_get_proc

[Mesa-dev] [PATCH] spirv/nir: Fix the stream ID when emitting a primitive or vertex

2018-01-22 Thread Neil Roberts
According to the SPIR-V spec:

“Stream must be an  of a constant instruction with a scalar
 integer type. That constant is the output-primitive stream number.”

The previous code was treating it as an integer literal.
---

This is part of the GL SPIR-V branch to enable streams for transform
feedback but seeing as it is a standalone fix for existing code I
thought it might be worth posting seperately.

 src/compiler/spirv/spirv_to_nir.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index c6df764682e..c71ed51e4cf 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -2983,8 +2983,12 @@ vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
nir_intrinsic_instr *intrin =
   nir_intrinsic_instr_create(b->shader, intrinsic_op);
 
-   if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive)
-  nir_intrinsic_set_stream_id(intrin, w[1]);
+   if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) {
+  struct vtn_value *stream_value =
+ vtn_value(b, w[1], vtn_value_type_constant);
+  unsigned stream = stream_value->constant->values[0].u32[0];
+  nir_intrinsic_set_stream_id(intrin, stream);
+   }
 
nir_builder_instr_insert(&b->nb, &intrin->instr);
 }
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] svga: Prevent use after free.

2018-01-22 Thread Emil Velikov
On 22 January 2018 at 15:13, Jose Fonseca  wrote:
> Courtesy of clang static analyzer.
>
> I was hunting for potential sources of memory corruption using Mesa with
> a GL trace, and happened to find this (unrelated) issue.
> ---
>  src/gallium/drivers/svga/svga_context.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/gallium/drivers/svga/svga_context.c 
> b/src/gallium/drivers/svga/svga_context.c
> index 546b9f48b1..af16a99b72 100644
> --- a/src/gallium/drivers/svga/svga_context.c
> +++ b/src/gallium/drivers/svga/svga_context.c
> @@ -327,6 +327,7 @@ cleanup:
> util_bitmask_destroy(svga->stream_output_id_bm);
> util_bitmask_destroy(svga->query_id_bm);
> FREE(svga);
> +   svga = NULL;
>
>  done:
> SVGA_STATS_TIME_POP(svgascreen->sws);

Seems like the most important line is just outside of context. Namely:

return svga ? &svga->pipe:NULL;

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: refactor swr_create_screen to allow for proper cleanup on error

2018-01-22 Thread Emil Velikov
On 22 January 2018 at 15:12, Chuck Atkins  wrote:
> Signed-off-by: Chuck Atkins 
Please include your follow-up reply/context as commit message.

>  // arch-specific dll entry point
>  PUBLIC struct pipe_screen *swr_create_screen_internal(struct sw_winsys 
> *winsys);
>
> +// cleanup for failed screen creation
> +PUBLIC void swr_destroy_screen_internal(struct swr_screen **screen);
> +

I'm fairly sure you don't need to make this function public. It's used
within the same binary - libmesaswr.la. Regardless if built-in or
separate DSO's are used.

With the above two suggestions:

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: make brw_context::num_samples unsigned int

2018-01-22 Thread Emil Velikov
On 21 January 2018 at 16:15, Jason Ekstrand  wrote:
> Fair enough.  I wasn't sure what you meant.
>
> Reviewed-by: Jason Ekstrand 
>
Thanks, tweaked a bit and pushed to master.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Only require independent blending for GLES 3.2.

2018-01-22 Thread Roland Scheidegger
Am 22.01.2018 um 08:16 schrieb Tapani Pälli:
> 
> 
> On 01/22/2018 08:48 AM, Ilia Mirkin wrote:
>> What es3 does require, though, is mrt. Maybe add something that
>> ensures that mrt is supported?
> 
> I guess that would be EXT_draw_buffers? That is currently enabled with
> 'dummy_true'.
But only because it allows a dummy implementation - minimum of
MaxDrawBuffers allowed is 1 as per the extension spec.
GLES 3.0-3.2 would require 4, GL 3.0-4.6 would require 8 at a quick
glance...
I guess adding a check wouldn't hurt, but I suppose all drivers
currently exposing the affected gl/gles versions would already meet the
limit.

Roland

> 
> 
>> On Jan 19, 2018 08:39, "Eric Anholt" > > wrote:
>>
>>     We've been requiring this since GLES 3.0 was introduced, but the
>>     GLES 3.2
>>     spec is the one that has "Supporting blending on a per-draw-buffer
>>     basis"
>>     in the new features.  V3D 3.3 would require lowering blending to
>> shader
>>     code to implement independent blending.
>>     ---
>>   src/mesa/main/version.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>>     diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
>>     index 68079f4ebbba..1fce8fe7ca9c 100644
>>     --- a/src/mesa/main/version.c
>>     +++ b/src/mesa/main/version.c
>>     @@ -516,7 +516,6 @@ compute_version_es2(const struct gl_extensions
>>     *extensions,
>>                            extensions->ARB_texture_float &&
>>                            extensions->ARB_texture_rg &&
>>                            extensions->ARB_depth_buffer_float &&
>>     -                         extensions->EXT_draw_buffers2 &&
>>                            /* extensions->ARB_framebuffer_object
>> && */
>>                            extensions->EXT_framebuffer_sRGB &&
>>                            extensions->EXT_packed_float &&
>>     @@ -546,6 +545,7 @@ compute_version_es2(const struct gl_extensions
>>     *extensions,
>>                            extensions->ARB_gpu_shader5 &&
>>                            extensions->EXT_shader_integer_mix);
>>      const bool ver_3_2 = (ver_3_1 &&
>>     +                         extensions->EXT_draw_buffers2 &&
>>                            extensions->KHR_blend_equation_advanced &&
>>                            extensions->KHR_robustness &&
>>      
>> extensions->KHR_texture_compression_astc_ldr &&
>>     --
>>     2.15.0
>>
>>     ___
>>     mesa-dev mailing list
>>     mesa-dev@lists.freedesktop.org
>> 
>>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>     
>>
>>
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 100951] vkcube fails with vkMapMemory failed

2018-01-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=100951

--- Comment #4 from Kristian Høgsberg  ---
Can you really claim it's "known broken" when nobody told the maintainer?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] intel: add new common header gen_defines.h

2018-01-22 Thread Tapani Pälli



On 22.01.2018 17:10, Emil Velikov wrote:

On 22 January 2018 at 12:29, Tapani Pälli  wrote:

Signed-off-by: Tapani Pälli 
---
  src/intel/common/gen_defines.h | 52 ++
  1 file changed, 52 insertions(+)
  create mode 100644 src/intel/common/gen_defines.h


Please add the file to the src/intel/Makefile.sources COMMON_FILES list.


ok




diff --git a/src/intel/common/gen_defines.h b/src/intel/common/gen_defines.h
new file mode 100644
index 00..f8daa84431
--- /dev/null
+++ b/src/intel/common/gen_defines.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef GEN_DEFINES_H
+#define GEN_DEFINES_H
+

#include "i915_drm.h"

^^ is where the I915_CONTEXT*PRIORITY symbols are defined, so let's
include that.


Will do, thanks Emil!


With the two nitpicks, the series is:
Reviewed-by: Emil Velikov 

-Emil



// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: refactor swr_create_screen to allow for proper cleanup on error

2018-01-22 Thread Chuck Atkins
Hi Emil,

Please include your follow-up reply/context as commit message.
>

Will do.


> +// cleanup for failed screen creation
> > +PUBLIC void swr_destroy_screen_internal(struct swr_screen **screen);
>
> I'm fairly sure you don't need to make this function public. It's used
> within the same binary - libmesaswr.la. Regardless if built-in or
> separate DSO's are used.
>

You are, of course, correct.  I was conflating static with not public and
thinking that since it's not in the same translation unit then it can't be
static so it must be public, which is certainly not the case.


With the above two suggestions:
>
> Cc: mesa-sta...@lists.freedesktop.org


I purposely left this out of mesa-stable since it requires the previous two
commits that initially refactored swr_create_screen to allow for builtin
arch, which were not part of stable.

- Chuck
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 103543] meson targets which link libgallium.a have unresolved symbol llvm::RTDyldMemoryManager

2018-01-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103543

Vedran Miletić  changed:

   What|Removed |Added

 Status|NEEDINFO|NEW
 CC||ved...@miletic.net

--- Comment #7 from Vedran Miletić  ---
I can reproduce on Fedora 27 and Fedora 28 (rawhide) with various LLVM versions
from 3.9 onwards. What info could be helpful?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: refactor swr_create_screen to allow for proper cleanup on error

2018-01-22 Thread Chuck Atkins
This makes the followingt changes to address cleanup issues:
- Error conditions now return null instead of calling exit()
- swr_creen is now freed upon error, rather than leak.
- Library handle from dlopen is now closed upon swr_screen destruction

v2: Added additional context in commit msg and remove unnecessary "PUBLIC"

Signed-off-by: Chuck Atkins 
Reviewed-by: Emil Velikov 
Cc: Bruce Cherniak 
Cc: Tim Rowley 
---
 src/gallium/drivers/swr/swr_loader.cpp | 100 +
 src/gallium/drivers/swr/swr_public.h   |   6 +-
 src/gallium/drivers/swr/swr_screen.cpp |  26 +++--
 src/gallium/drivers/swr/swr_screen.h   |   3 +
 4 files changed, 79 insertions(+), 56 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_loader.cpp 
b/src/gallium/drivers/swr/swr_loader.cpp
index 7f28bdb536..01b9804646 100644
--- a/src/gallium/drivers/swr/swr_loader.cpp
+++ b/src/gallium/drivers/swr/swr_loader.cpp
@@ -29,96 +29,98 @@
 #include 
 
 // Helper function to resolve the backend filename based on architecture
-inline void get_swr_arch_filename(const char arch[], char filename[])
+static bool
+swr_initialize_screen_interface(struct swr_screen *screen, const char arch[])
 {
 #ifdef HAVE_SWR_BUILTIN
-   strcpy(filename , "builtin");
+   screen->pLibrary = NULL;
+   screen->pfnSwrGetInterface = SwrGetInterface;
+   fprintf(stderr, "(using: builtin).\n");
 #else
+   char filename[256] = { 0 };
sprintf(filename, "%sswr%s%s", UTIL_DL_PREFIX, arch, UTIL_DL_EXT);
+
+   screen->pLibrary = util_dl_open(filename);
+   if (!screen->pLibrary) {
+  fprintf(stderr, "(skipping: %s).\n", util_dl_error());
+  return false;
+   }
+
+   util_dl_proc pApiProc = util_dl_get_proc_address(screen->pLibrary,
+  "SwrGetInterface");
+   if (!pApiProc) {
+  fprintf(stderr, "(skipping: %s).\n", util_dl_error());
+  util_dl_close(screen->pLibrary);
+  screen->pLibrary = NULL;
+  return false;
+   }
+
+   screen->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
+   fprintf(stderr, "(using: %s).\n", filename);
 #endif
+   return true;
 }
 
+
 struct pipe_screen *
 swr_create_screen(struct sw_winsys *winsys)
 {
-   char filename[256] = { 0 };
-   bool found = false;
-   bool is_knl = false;
-   PFNSwrGetInterface pfnSwrGetInterface = nullptr;
+   struct pipe_screen *p_screen = swr_create_screen_internal(winsys);
+   if (!p_screen) {
+  return NULL;
+   }
+
+   struct swr_screen *screen = swr_screen(p_screen);
+   screen->is_knl = false;
 
util_cpu_detect();
 
-   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
+   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
   fprintf(stderr, "SWR detected KNL instruction support ");
 #ifndef HAVE_SWR_KNL
-  fprintf(stderr, "(skipping not built).\n");
+  fprintf(stderr, "(skipping: not built).\n");
 #else
-  get_swr_arch_filename("KNL", filename);
-  found = true;
-  is_knl = true;
+  if (swr_initialize_screen_interface(screen, "KNL")) {
+ screen->is_knl = true;
+ return p_screen;
+  }
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
+   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
   fprintf(stderr, "SWR detected SKX instruction support ");
 #ifndef HAVE_SWR_SKX
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("SKX", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "SKX"))
+ return p_screen;
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx2) {
+   if (util_cpu_caps.has_avx2) {
   fprintf(stderr, "SWR detected AVX2 instruction support ");
 #ifndef HAVE_SWR_AVX2
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("AVX2", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "AVX2"))
+ return p_screen;
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx) {
+   if (util_cpu_caps.has_avx) {
   fprintf(stderr, "SWR detected AVX instruction support ");
 #ifndef HAVE_SWR_AVX
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("AVX", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "AVX"))
+ return p_screen;
 #endif
}
 
-   if (!found) {
-  fprintf(stderr, "SWR could not detect a supported CPU architecture.\n");
-  exit(-1);
-   }
-
-   fprintf(stderr, "(using %s).\n", filename);
-
-#ifdef HAVE_SWR_BUILTIN
-   pfnSwrGetInterface = SwrGetInterface;
-#else
-   util_dl_library *pLibrary = util_dl_open(filename);
-   if (!pLibrary) {
-  fprintf(stderr, "SWR library load failure: %s\n", util_dl_error());
-  exit(-1);
-   }
-
-   util_dl_proc pApiProc = util_dl_get_proc_address(pLibrary, 
"SwrGetInterface");
-   if (!pApiProc) {
-  fprintf(stderr, "SWR library search failure: %s\n", util_dl_error());
-  exit(-1);
-   }
-
-   pfnSwrGetInterface =

[Mesa-dev] [Bug 103543] meson targets which link libgallium.a have unresolved symbol llvm::RTDyldMemoryManager

2018-01-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103543

--- Comment #8 from Vedran Miletić  ---
Created attachment 136898
  --> https://bugs.freedesktop.org/attachment.cgi?id=136898&action=edit
build errors

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: refactor swr_create_screen to allow for proper cleanup on error

2018-01-22 Thread Chuck Atkins
This makes the following changes to address cleanup issues:
- Error conditions now return NULL instead of calling exit()
- swr_creen is now freed upon error, rather than leak.
- Library handle from dlopen is now closed upon swr_screen destruction

v2: Added additional context in commit msg and remove unnecessary "PUBLIC"
v3: Fix typo in commit message.

Signed-off-by: Chuck Atkins 
Reviewed-by: Emil Velikov 
Cc: Bruce Cherniak 
Cc: Tim Rowley 
---
 src/gallium/drivers/swr/swr_loader.cpp | 100 +
 src/gallium/drivers/swr/swr_public.h   |   6 +-
 src/gallium/drivers/swr/swr_screen.cpp |  26 +++--
 src/gallium/drivers/swr/swr_screen.h   |   3 +
 4 files changed, 79 insertions(+), 56 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_loader.cpp 
b/src/gallium/drivers/swr/swr_loader.cpp
index 7f28bdb536..01b9804646 100644
--- a/src/gallium/drivers/swr/swr_loader.cpp
+++ b/src/gallium/drivers/swr/swr_loader.cpp
@@ -29,96 +29,98 @@
 #include 
 
 // Helper function to resolve the backend filename based on architecture
-inline void get_swr_arch_filename(const char arch[], char filename[])
+static bool
+swr_initialize_screen_interface(struct swr_screen *screen, const char arch[])
 {
 #ifdef HAVE_SWR_BUILTIN
-   strcpy(filename , "builtin");
+   screen->pLibrary = NULL;
+   screen->pfnSwrGetInterface = SwrGetInterface;
+   fprintf(stderr, "(using: builtin).\n");
 #else
+   char filename[256] = { 0 };
sprintf(filename, "%sswr%s%s", UTIL_DL_PREFIX, arch, UTIL_DL_EXT);
+
+   screen->pLibrary = util_dl_open(filename);
+   if (!screen->pLibrary) {
+  fprintf(stderr, "(skipping: %s).\n", util_dl_error());
+  return false;
+   }
+
+   util_dl_proc pApiProc = util_dl_get_proc_address(screen->pLibrary,
+  "SwrGetInterface");
+   if (!pApiProc) {
+  fprintf(stderr, "(skipping: %s).\n", util_dl_error());
+  util_dl_close(screen->pLibrary);
+  screen->pLibrary = NULL;
+  return false;
+   }
+
+   screen->pfnSwrGetInterface = (PFNSwrGetInterface)pApiProc;
+   fprintf(stderr, "(using: %s).\n", filename);
 #endif
+   return true;
 }
 
+
 struct pipe_screen *
 swr_create_screen(struct sw_winsys *winsys)
 {
-   char filename[256] = { 0 };
-   bool found = false;
-   bool is_knl = false;
-   PFNSwrGetInterface pfnSwrGetInterface = nullptr;
+   struct pipe_screen *p_screen = swr_create_screen_internal(winsys);
+   if (!p_screen) {
+  return NULL;
+   }
+
+   struct swr_screen *screen = swr_screen(p_screen);
+   screen->is_knl = false;
 
util_cpu_detect();
 
-   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
+   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512er) {
   fprintf(stderr, "SWR detected KNL instruction support ");
 #ifndef HAVE_SWR_KNL
-  fprintf(stderr, "(skipping not built).\n");
+  fprintf(stderr, "(skipping: not built).\n");
 #else
-  get_swr_arch_filename("KNL", filename);
-  found = true;
-  is_knl = true;
+  if (swr_initialize_screen_interface(screen, "KNL")) {
+ screen->is_knl = true;
+ return p_screen;
+  }
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
+   if (util_cpu_caps.has_avx512f && util_cpu_caps.has_avx512bw) {
   fprintf(stderr, "SWR detected SKX instruction support ");
 #ifndef HAVE_SWR_SKX
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("SKX", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "SKX"))
+ return p_screen;
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx2) {
+   if (util_cpu_caps.has_avx2) {
   fprintf(stderr, "SWR detected AVX2 instruction support ");
 #ifndef HAVE_SWR_AVX2
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("AVX2", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "AVX2"))
+ return p_screen;
 #endif
}
 
-   if (!found && util_cpu_caps.has_avx) {
+   if (util_cpu_caps.has_avx) {
   fprintf(stderr, "SWR detected AVX instruction support ");
 #ifndef HAVE_SWR_AVX
   fprintf(stderr, "(skipping not built).\n");
 #else
-  get_swr_arch_filename("AVX", filename);
-  found = true;
+  if (swr_initialize_screen_interface(screen, "AVX"))
+ return p_screen;
 #endif
}
 
-   if (!found) {
-  fprintf(stderr, "SWR could not detect a supported CPU architecture.\n");
-  exit(-1);
-   }
-
-   fprintf(stderr, "(using %s).\n", filename);
-
-#ifdef HAVE_SWR_BUILTIN
-   pfnSwrGetInterface = SwrGetInterface;
-#else
-   util_dl_library *pLibrary = util_dl_open(filename);
-   if (!pLibrary) {
-  fprintf(stderr, "SWR library load failure: %s\n", util_dl_error());
-  exit(-1);
-   }
-
-   util_dl_proc pApiProc = util_dl_get_proc_address(pLibrary, 
"SwrGetInterface");
-   if (!pApiProc) {
-  fprintf(stderr, "SWR library search failure: %s\n", util_dl_error());
-  exit(-1);
-

Re: [Mesa-dev] [PATCH 2/2] i965: Set tiling on BOs imported with modifiers

2018-01-22 Thread Kristian Høgsberg
On Sun, Jan 21, 2018 at 8:05 PM, Jason Ekstrand  wrote:
> We need this to ensure that GTT maps work on buffers we get from Vulkan
> on the off chance that someone does a readpixels or something.  Soon, we
> will be removing GTT maps from i965 entirely and this can be reverted.
> None the less, it's needed for stable.
>
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index b563bbf..e877f93 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1043,7 +1043,16 @@ intel_create_image_from_fds_common(__DRIscreen 
> *dri_screen,
>
> image->planar_format = f;
>
> -   image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
> +   if (modifier != DRM_FORMAT_MOD_INVALID) {
> +  const struct isl_drm_modifier_info *mod_info =
> + isl_drm_modifier_get_info(modifier);

You need to handle mod_info == NULL for unknown modifiers.

Kristian

> +  uint32_t tiling = isl_tiling_to_i915_tiling(mod_info->tiling);
> +  image->bo = brw_bo_gem_create_from_prime_tiled(screen->bufmgr, fds[0],
> + tiling, strides[0]);
> +   } else {
> +  image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
> +   }
> +
> if (image->bo == NULL) {
>free(image);
>return NULL;
> --
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] EGL_ANDROID_blob_cache

2018-01-22 Thread Tapani Pälli



On 22.01.2018 15:16, Nicolai Hähnle wrote:

On 19.01.2018 07:05, Tapani Pälli wrote:



On 01/18/2018 05:49 PM, Nicolai Hähnle wrote:

On 17.01.2018 17:10, Tapani Pälli wrote:



On 17.01.2018 13:34, Nicolai Hähnle wrote:

On 15.01.2018 13:31, Tapani Pälli wrote:

Hello;

Here's a refactored series of EGL_ANDROID_blob_cache. Now cache
functions are stored in disk_cache struct and the functionality
called within existing disk_cache put/get code. Problems/errors
that existed with earlier series are gone.

On Android cache index file is created to MESA_GLSL_CACHE_DIR
and blobs are  generated under '/data/user_de/0' in application
specific paths:


Can't we let the "cache index" be managed by the ANDROID_blob_cache 
as well? That seems to me more in the spirit of what that extension 
is about, and would avoid polluting stuff like /sdcard.


The cache index file is used for two purposes:

- Keeping track of the size of the cache. This use is obsoleted 
enterely by ANDROID_blob_cache.


- Keeping track of keys where only their presence is relevant and 
no data is associated. These could easily be treated as empty (0 
byte sized) blobs.


I guess it could be possible, for every app we would create index 
when disk_cache gets created. I guess only issue could be that 
Android might go and remove the index from cache when it decides to 
resize it and then we would loose it even though there would be 
items in cache. I can try how this would work.


Entries can be kicked out of the on-disk index (which is really a bit 
of a misnomer) as well if there's a hash collision.


I believe this functionality is only used as a marker to indicate 
that the GLSL compile step can be skipped. So losing an entry in 
there shouldn't cause any damage.


What I meant is that if we would store index by using set() function, 
the index itself would be just another entry for Android's cache, it 
can decide to remove it when it manages cache size.


Not sure if we're talking about the same thing, then? What I meant was 
that every call to disk_cache_put_key becomes a call to the 
Android-provided set() function, and every call to disk_cache_has_key 
becomes a call to the get() function.


Sorry for the confusion, this is how I implemented it now for the 
current new set I sent.


Although storing the index itself in the blob cache is also an 
interesting idea.


Cheers,
Nicolai





Cheers,
Nicolai






Cheers,
Nicolai





androidia_64:/ # find /data/user_de/0/ -name *shader*
/data/user_de/0/com.android.settings/code_cache/com.android.opengl.shaders_cache 

/data/user_de/0/com.android.gallery3d/code_cache/com.android.opengl.shaders_cache 

/data/user_de/0/com.android.systemui/code_cache/com.android.opengl.shaders_cache 

/data/user_de/0/com.rovio.angrybirdsspace.ads/code_cache/com.android.opengl.shaders_cache 



(this part is managed by Android but may be interesting to know).

Also SurfaceFlinger manages its own cache as seen in the log output:
01-15 07:40:26.329  2129  2129 D SurfaceFlinger: shader cache 
generated - 24 shaders in 57.687504 ms


I'm not sure if /sdcard is sane default but I've tried everything
else (/cache, /data/cache) and failed because of permission errors.

Thanks;

Tapani Pälli (7):
   dri: add interface for EGL_ANDROID_blob_cache extension
   egl: add support for EGL_ANDROID_blob_cache
   disk cache: add callback functionality
   disk cache: support setting MESA_GLSL_CACHE_DIR at compile time
   i965: add __DRI2_BLOB support and set cache functions
   android: ignore MESA_GLSL_CACHE_DISABLE setting
   android: set '/sdcard/' as MESA_GLSL_CACHE_DIR by default

  Android.common.mk  |  1 +
  include/GL/internal/dri_interface.h    | 26 +-
  src/egl/drivers/dri2/egl_dri2.c    | 43 


  src/egl/drivers/dri2/egl_dri2.h    |  4 +++
  src/egl/main/eglapi.c  | 29 
  src/egl/main/eglapi.h  |  4 +++
  src/egl/main/egldisplay.h  |  3 ++
  src/egl/main/eglentrypoint.h   |  1 +
  src/mesa/drivers/dri/i965/brw_disk_cache.c |  2 ++
  src/mesa/drivers/dri/i965/intel_screen.c   | 21 
  src/util/disk_cache.c  | 54 
+-

  src/util/disk_cache.h  | 19 +++
  12 files changed, 205 insertions(+), 2 deletions(-)











___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: don't export swr_create_screen_internal

2018-01-22 Thread Emil Velikov
From: Emil Velikov 

With earlier rework the user and provider of the symbol are within the
same binary. Thus there's no point in exporting the function.

Spotted while reviewing patch from Chuck, that nearly added another
unneeded PUBLIC function.

Cc: Chuck Atkins 
Cc: Tim Rowley 
Fixes: f50aa21456d "(swr: build driver proper separate from rasterizer")
Signed-off-by: Emil Velikov 
---
The comment might need updating as well, although I'm short on
suggestions.
---
 src/gallium/drivers/swr/swr_public.h   | 2 +-
 src/gallium/drivers/swr/swr_screen.cpp | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/swr_public.h 
b/src/gallium/drivers/swr/swr_public.h
index 4b150705cd7..cb205479a91 100644
--- a/src/gallium/drivers/swr/swr_public.h
+++ b/src/gallium/drivers/swr/swr_public.h
@@ -36,7 +36,7 @@ extern "C" {
 struct pipe_screen *swr_create_screen(struct sw_winsys *winsys);
 
 // arch-specific dll entry point
-PUBLIC struct pipe_screen *swr_create_screen_internal(struct sw_winsys 
*winsys);
+struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys);
 
 #ifdef _WIN32
 void swr_gdi_swap(struct pipe_screen *screen,
diff --git a/src/gallium/drivers/swr/swr_screen.cpp 
b/src/gallium/drivers/swr/swr_screen.cpp
index b67ac25ac89..10b7e891aca 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -1114,7 +1114,6 @@ swr_validate_env_options(struct swr_screen *screen)
 }
 
 
-PUBLIC
 struct pipe_screen *
 swr_create_screen_internal(struct sw_winsys *winsys)
 {
-- 
2.16.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: refactor swr_create_screen to allow for proper cleanup on error

2018-01-22 Thread Emil Velikov
On 22 January 2018 at 17:03, Chuck Atkins  wrote:
> Hi Emil,
>
>> Please include your follow-up reply/context as commit message.
>
>
> Will do.
>
>
>> > +// cleanup for failed screen creation
>> > +PUBLIC void swr_destroy_screen_internal(struct swr_screen **screen);
>>
>> I'm fairly sure you don't need to make this function public. It's used
>> within the same binary - libmesaswr.la. Regardless if built-in or
>> separate DSO's are used.
>
>
> You are, of course, correct.  I was conflating static with not public and
> thinking that since it's not in the same translation unit then it can't be
> static so it must be public, which is certainly not the case.
>
Hmm not your fault really - swr_create_screen_internal should have
lost it's PUBLIC notation.
Will send a patch for it in a moment.

>
>> With the above two suggestions:
>>
>> Cc: mesa-sta...@lists.freedesktop.org
>
>
> I purposely left this out of mesa-stable since it requires the previous two
> commits that initially refactored swr_create_screen to allow for builtin
> arch, which were not part of stable.
>
Right this cleans up after the following commit - please include the tag.
Don't bother re-sending.

Fixes: a4be2bcee2f ("swr: allow a single swr architecture to be builtin")

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104732] [radv] Binding descriptor sets disturbs other pipeline bindings

2018-01-22 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104732

Bug ID: 104732
   Summary: [radv] Binding descriptor sets disturbs other pipeline
bindings
   Product: Mesa
   Version: git
  Hardware: All
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: joseph.ku...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

Binding descriptors sets for the compute pipeline should not disturb bindings
for the graphics pipeline and vice versa.

The Vulkan spec states:
"pipelineBindPoint is a VkPipelineBindPoint indicating whether the descriptors
will be used by graphics pipelines or compute pipelines. There is a separate
set of bind points for each of graphics and compute, so binding one does not
disturb the other."

Test case:
https://cgit.freedesktop.org/mesa/crucible/commit/?id=0a52673d71b444df81828d75138377b082855d03

How to reproduce: ./bin/crucible run func.bind-points

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: refactor swr_create_screen to allow for proper cleanup on error

2018-01-22 Thread Chuck Atkins
Hi Emil,

Fixes: a4be2bcee2f ("swr: allow a single swr architecture to be builtin")
>

It doesn't fix anything that was broken from that commit.  The issues with
error handling were already present before then, it's just that the changes
in a4be2bcee2f were substantial so this commit works off the result of the
previous.  So really it would be "Depends on: " rather than "Fixes: ".



> Right this cleans up after the following commit - please include the tag.
> Don't bother re-sending.
>

Not sure how I would go about that since I don't have push access.  The ML
is really the only mechanism I have to send commits to the git repo.  Given
that I've been submitting patches from time to time for the past two years
or so (not often, but every few months I have a handful), should I go ahead
and apply for an account now?

- Chuck
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Set tiling on BOs imported with modifiers

2018-01-22 Thread Jason Ekstrand

On January 22, 2018 09:10:52 Kristian Høgsberg  wrote:


On Sun, Jan 21, 2018 at 8:05 PM, Jason Ekstrand  wrote:

We need this to ensure that GTT maps work on buffers we get from Vulkan
on the off chance that someone does a readpixels or something.  Soon, we
will be removing GTT maps from i965 entirely and this can be reverted.
None the less, it's needed for stable.

Cc: mesa-sta...@lists.freedesktop.org
Cc: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/intel_screen.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c

index b563bbf..e877f93 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1043,7 +1043,16 @@ intel_create_image_from_fds_common(__DRIscreen 
*dri_screen,


image->planar_format = f;

-   image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
+   if (modifier != DRM_FORMAT_MOD_INVALID) {
+  const struct isl_drm_modifier_info *mod_info =
+ isl_drm_modifier_get_info(modifier);


You need to handle mod_info == NULL for unknown modifiers.


The code above this checks that the modifier is supported and bails if it 
isn't.



Kristian


+  uint32_t tiling = isl_tiling_to_i915_tiling(mod_info->tiling);
+  image->bo = brw_bo_gem_create_from_prime_tiled(screen->bufmgr, fds[0],
+ tiling, strides[0]);
+   } else {
+  image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
+   }
+
if (image->bo == NULL) {
   free(image);
   return NULL;
--
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965: Set tiling on BOs imported with modifiers

2018-01-22 Thread Kristian Høgsberg
On Mon, Jan 22, 2018 at 10:03 AM, Jason Ekstrand  wrote:
> On January 22, 2018 09:10:52 Kristian Høgsberg  wrote:
>
>> On Sun, Jan 21, 2018 at 8:05 PM, Jason Ekstrand 
>> wrote:
>>>
>>> We need this to ensure that GTT maps work on buffers we get from Vulkan
>>> on the off chance that someone does a readpixels or something.  Soon, we
>>> will be removing GTT maps from i965 entirely and this can be reverted.
>>> None the less, it's needed for stable.
>>>
>>> Cc: mesa-sta...@lists.freedesktop.org
>>> Cc: Kenneth Graunke 
>>> ---
>>>  src/mesa/drivers/dri/i965/intel_screen.c | 11 ++-
>>>  1 file changed, 10 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
>>> b/src/mesa/drivers/dri/i965/intel_screen.c
>>> index b563bbf..e877f93 100644
>>> --- a/src/mesa/drivers/dri/i965/intel_screen.c
>>> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
>>> @@ -1043,7 +1043,16 @@ intel_create_image_from_fds_common(__DRIscreen
>>> *dri_screen,
>>>
>>> image->planar_format = f;
>>>
>>> -   image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
>>> +   if (modifier != DRM_FORMAT_MOD_INVALID) {
>>> +  const struct isl_drm_modifier_info *mod_info =
>>> + isl_drm_modifier_get_info(modifier);
>>
>>
>> You need to handle mod_info == NULL for unknown modifiers.
>
>
> The code above this checks that the modifier is supported and bails if it
> isn't.

So it does. Looking forward to our bright future without gtt maps. In
the interim, thanks for fixing this.

Reviewed-by: Kristian H. Kristensen 

Kristian

>
>
>> Kristian
>>
>>> +  uint32_t tiling = isl_tiling_to_i915_tiling(mod_info->tiling);
>>> +  image->bo = brw_bo_gem_create_from_prime_tiled(screen->bufmgr,
>>> fds[0],
>>> + tiling,
>>> strides[0]);
>>> +   } else {
>>> +  image->bo = brw_bo_gem_create_from_prime(screen->bufmgr, fds[0]);
>>> +   }
>>> +
>>> if (image->bo == NULL) {
>>>free(image);
>>>return NULL;
>>> --
>>> 2.5.0.400.gff86faf
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965/bufmgr: Add a create_from_prime_tiled function

2018-01-22 Thread Kristian Høgsberg
On Sun, Jan 21, 2018 at 8:05 PM, Jason Ekstrand  wrote:
> This new function is an import and a set tiling in one go.
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 41 
> ++
>  src/mesa/drivers/dri/i965/brw_bufmgr.h |  4 
>  2 files changed, 36 insertions(+), 9 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 469895e..889350c 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -1133,8 +1133,9 @@ brw_bo_get_tiling(struct brw_bo *bo, uint32_t 
> *tiling_mode,
> return 0;
>  }
>
> -struct brw_bo *
> -brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd)
> +static struct brw_bo *
> +brw_bo_gem_create_from_prime_internal(struct brw_bufmgr *bufmgr, int 
> prime_fd,
> +  int tiling_mode, uint32_t stride)
>  {
> int ret;
> uint32_t handle;
> @@ -1185,14 +1186,18 @@ brw_bo_gem_create_from_prime(struct brw_bufmgr 
> *bufmgr, int prime_fd)
> bo->reusable = false;
> bo->external = true;
>
> -   memclear(get_tiling);
> -   get_tiling.handle = bo->gem_handle;
> -   if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
> -  goto err;
> +   if (tiling_mode < 0) {
> +  memclear(get_tiling);
> +  get_tiling.handle = bo->gem_handle;
> +  if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling))
> + goto err;
>
> -   bo->tiling_mode = get_tiling.tiling_mode;
> -   bo->swizzle_mode = get_tiling.swizzle_mode;
> -   /* XXX stride is unknown */
> +  bo->tiling_mode = get_tiling.tiling_mode;
> +  bo->swizzle_mode = get_tiling.swizzle_mode;
> +  /* XXX stride is unknown */
> +   } else {
> +  bo_set_tiling_internal(bo, tiling_mode, stride);
> +   }

Since most of this logic is specific to the tiling == -1 case, maybe
move the get_tiling path to brw_bo_gem_create_from_prime(). Either
move bo_set_tiling() to brw_bo_gem_create_from_prime_tiled() or call
it from here when stride > 0.

Either way,

Reviewed-by: Kristian H. Kristensen 

>
>  out:
> mtx_unlock(&bufmgr->lock);
> @@ -1204,6 +1209,24 @@ err:
> return NULL;
>  }
>
> +struct brw_bo *
> +brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr, int prime_fd)
> +{
> +   return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd, -1, 0);
> +}
> +
> +struct brw_bo *
> +brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr, int prime_fd,
> +   uint32_t tiling_mode, uint32_t stride)
> +{
> +   assert(tiling_mode == I915_TILING_NONE ||
> +  tiling_mode == I915_TILING_X ||
> +  tiling_mode == I915_TILING_Y);
> +
> +   return brw_bo_gem_create_from_prime_internal(bufmgr, prime_fd,
> +tiling_mode, stride);
> +}
> +
>  static void
>  brw_bo_make_external(struct brw_bo *bo)
>  {
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> index 8bfb0e4..6811e78 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> @@ -339,6 +339,10 @@ void brw_destroy_hw_context(struct brw_bufmgr *bufmgr, 
> uint32_t ctx_id);
>  int brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd);
>  struct brw_bo *brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr,
>  int prime_fd);
> +struct brw_bo *brw_bo_gem_create_from_prime_tiled(struct brw_bufmgr *bufmgr,
> +  int prime_fd,
> +  uint32_t tiling_mode,
> +  uint32_t stride);
>
>  uint32_t brw_bo_export_gem_handle(struct brw_bo *bo);
>
> --
> 2.5.0.400.gff86faf
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 04/24] anv/blorp: Rework HiZ ops to look like MCS and CCS

2018-01-22 Thread Nanley Chery
On Fri, Jan 19, 2018 at 03:47:21PM -0800, Jason Ekstrand wrote:
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/intel/vulkan/anv_blorp.c   | 38 
> ++
>  src/intel/vulkan/anv_private.h |  9 +
>  src/intel/vulkan/genX_cmd_buffer.c | 13 +++--
>  3 files changed, 34 insertions(+), 26 deletions(-)
> 

This patch is
Reviewed-by: Nanley Chery 

> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 229d87e..ccb9d18 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1570,26 +1570,30 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer 
> *cmd_buffer,
> blorp_batch_finish(&batch);
>  }
>  
> -void
> -anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
> -const struct anv_image *image,
> -enum blorp_hiz_op op)
> +static enum blorp_hiz_op
> +isl_to_blorp_hiz_op(enum isl_aux_op isl_op)
>  {
> -   assert(image);
> +   switch (isl_op) {
> +   case ISL_AUX_OP_FAST_CLEAR:   return BLORP_HIZ_OP_DEPTH_CLEAR;
> +   case ISL_AUX_OP_FULL_RESOLVE: return BLORP_HIZ_OP_DEPTH_RESOLVE;
> +   case ISL_AUX_OP_AMBIGUATE:return BLORP_HIZ_OP_HIZ_RESOLVE;
> +   default:
> +  unreachable("Unsupported HiZ aux op");
> +   }
> +}
>  
> +void
> +anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + VkImageAspectFlagBits aspect, uint32_t level,
> + uint32_t base_layer, uint32_t layer_count,
> + enum isl_aux_op hiz_op)
> +{
> +   assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
> +   assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 
> level));
> assert(anv_image_aspect_to_plane(image->aspects,
>  VK_IMAGE_ASPECT_DEPTH_BIT) == 0);
>  
> -   /* Don't resolve depth buffers without an auxiliary HiZ buffer and
> -* don't perform such a resolve on gens that don't support it.
> -*/
> -   if (cmd_buffer->device->info.gen < 8 ||
> -   image->planes[0].aux_usage != ISL_AUX_USAGE_HIZ)
> -  return;
> -
> -   assert(op == BLORP_HIZ_OP_HIZ_RESOLVE ||
> -  op == BLORP_HIZ_OP_DEPTH_RESOLVE);
> -
> struct blorp_batch batch;
> blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
>  
> @@ -1599,7 +1603,9 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer 
> *cmd_buffer,
>  ISL_AUX_USAGE_HIZ, &surf);
> surf.clear_color.f32[0] = ANV_HZ_FC_VAL;
>  
> -   blorp_hiz_op(&batch, &surf, 0, 0, 1, op);
> +   blorp_hiz_op(&batch, &surf, level, base_layer, layer_count,
> +isl_to_blorp_hiz_op(hiz_op));
> +
> blorp_batch_finish(&batch);
>  }
>  
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index f93c114..a837860 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -2535,10 +2535,11 @@ anv_can_sample_with_hiz(const struct gen_device_info 
> * const devinfo,
>  }
>  
>  void
> -anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
> -const struct anv_image *image,
> -enum blorp_hiz_op op);
> -
> +anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + VkImageAspectFlagBits aspect, uint32_t level,
> + uint32_t base_layer, uint32_t layer_count,
> + enum isl_aux_op hiz_op);
>  void
>  anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
>   const struct anv_image *image,
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 939f3fa..be263f8 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -388,19 +388,20 @@ transition_depth_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
>anv_layout_to_aux_usage(&cmd_buffer->device->info, image,
>VK_IMAGE_ASPECT_DEPTH_BIT, final_layout);
>  
> -   enum blorp_hiz_op hiz_op;
> +   enum isl_aux_op hiz_op;
> if (hiz_enabled && !enable_hiz) {
> -  hiz_op = BLORP_HIZ_OP_DEPTH_RESOLVE;
> +  hiz_op = ISL_AUX_OP_FULL_RESOLVE;
> } else if (!hiz_enabled && enable_hiz) {
> -  hiz_op = BLORP_HIZ_OP_HIZ_RESOLVE;
> +  hiz_op = ISL_AUX_OP_AMBIGUATE;
> } else {
>assert(hiz_enabled == enable_hiz);
>/* If the same buffer will be used, no resolves are necessary. */
> -  hiz_op = BLORP_HIZ_OP_NONE;
> +  hiz_op = ISL_AUX_OP_NONE;
> }
>  
> -   if (hiz_op != BLORP_HIZ_OP_NONE)
> -  anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
> +   if (hiz_op != ISL_AUX_OP_NONE)
> +  anv_image_hiz_op(cmd_buffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
> +   0, 0, 1, hiz_op);
>  }
>  
>  #define MI_PREDICATE_SRC0  0x2400
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> me

Re: [Mesa-dev] [PATCH] i965: Drop render_target_start from binding table struct.

2018-01-22 Thread Kenneth Graunke
On Thursday, January 18, 2018 11:47:33 PM PST Iago Toral wrote:
> On Thu, 2018-01-18 at 15:49 -0800, Kenneth Graunke wrote:
> > We have to start render targets at binding table index 0 in order to
> > use
> > headerless FB write messages, and in fact already assume this in a
> > bunch
> > of places in the code.  Let's finish that off, and not bother storing
> > 0
> > in a struct to pretend to add it in a few places.
> > ---
> >  src/intel/blorp/blorp.c | 1 -
> >  src/intel/compiler/brw_compiler.h   | 1 -
> >  src/intel/compiler/brw_fs_generator.cpp | 6 ++
> >  src/mesa/drivers/dri/i965/brw_wm.c  | 1 -
> >  4 files changed, 2 insertions(+), 7 deletions(-)
> > 
> > diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
> > index e8a2c6135f5..b7238306eb1 100644
> > --- a/src/intel/blorp/blorp.c
> > +++ b/src/intel/blorp/blorp.c
> > @@ -177,7 +177,6 @@ blorp_compile_fs(struct blorp_context *blorp,
> > void *mem_ctx,
> > wm_prog_data->base.param = NULL;
> >  
> > /* BLORP always just uses the first two binding table entries */
> > -   wm_prog_data->biable.render_target_start =
> > BLORP_RENDERBUFFER_BT_INDEX;
> > wm_prog_data->base.binding_table.texture_start =
> > BLORP_TEXTURE_BT_INDEX;
> 
> Since the comment says that blorp uses the first two entries but it
> then only assigns one, maybe it would be nice to update the comment
> above to state that we assume render targets start at binding table
> index 0.

Thanks, updated to:

   /* BLORP always uses the first two binding table entries:
* - Surface 0 is the render target (which always start from 0)
* - Surface 1 is the source texture
*/

> > nir = brw_preprocess
> > Wouldn't it be better to keep the assert? We still needs
> > render_target_start to be 0_nir(compiler, nir);
> > diff --git a/src/intel/compiler/brw_compiler.h
> > b/src/intel/compiler/brw_compiler.h
> > index 0060c381c0d..b1086bbcee5 100644
> > --- a/src/intel/compiler/brw_compiler.h
> > +++ b/src/intel/compiler/brw_compiler.h
> > @@ -681,7 +681,6 @@ struct brw_wm_prog_data {
> >/** @{
> > * surface indices the WM-specific surfaces
> > */
> > -  uint32_t render_target_start;
> >uint32_t render_target_read_start;
> >/** @} */
> > } binding_table;
> > diff --git a/src/intel/compiler/brw_fs_generator.cpp
> > b/src/intel/compiler/brw_fs_generator.cpp
> > index 91bf0643084..cd5be054f69 100644
> > --- a/src/intel/compiler/brw_fs_generator.cpp
> > +++ b/src/intel/compiler/brw_fs_generator.cpp
> > @@ -287,8 +287,6 @@ fs_generator::fire_fb_write(fs_inst *inst,
> >  * messages set "Render Target Index" to 0.  Using a different
> > binding
> >  * table index would make it impossible to use headerless
> > messages.
> >  */
> > -   assert(prog_data->binding_table.render_target_start == 0);
> > -
> > const uint32_t surf_index = inst->target;
> >  
> > bool last_render_target = inst->eot ||
> > @@ -427,8 +425,8 @@ fs_generator::generate_fb_read(fs_inst *inst,
> > struct brw_reg dst,
> >  {
> > assert(inst->size_written % REG_SIZE == 0);
> > struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this-
> > >prog_data);
> > -   const unsigned surf_index =
> > -  prog_data->binding_table.render_target_start + inst->target;
> > +   /* We assume that render targets start at binding table index 0.
> > */
> > +   const unsigned surf_index = inst->target;
> >  
> > gen9_fb_READ(p, dst, payload, surf_index,
> >  inst->header_size, inst->size_written / REG_SIZE,
> > diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
> > b/src/mesa/drivers/dri/i965/brw_wm.c
> > index 08bacebd571..755a76eec71 100644
> > --- a/src/mesa/drivers/dri/i965/brw_wm.c
> > +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> > @@ -51,7 +51,6 @@ assign_fs_binding_table_offsets(const struct
> > gen_device_info *devinfo,
> > /* If there are no color regions, we still perform an FB write to
> > a null
> >  * renderbuffer, which we place at surface index 0.
> >  */
> > -   prog_data->binding_table.render_target_start =
> > next_binding_table_offset;
> > next_binding_table_offset += MAX2(key->nr_color_regions, 1);
> 
> Since we are no longer assigning next_binding_table_offset with value
> of  0, we might as well drop the initialization to that value (or just
> initialize the variable directly to the value right after the line you
> remove here).
> 
> Either way:
> Reviewed-by: Iago Toral Quiroga 

Good call, updated to:

   /* Render targets implicitly start at surface index 0.  Even if there are
* no color regions, we still perform an FB write to a null render target,
* which will be surface 0.
*/
   uint32_t next_binding_table_offset = MAX2(key->nr_color_regions, 1);

and pushed:

To ssh://git.freedesktop.org/git/mesa/mesa
   a9bb067e27c..60f15477dad  master -> master



signature.asc
Description: This is a digitally signed message part.
___

Re: [Mesa-dev] [PATCH v2 06/24] anv/image: Add a helper for determining when fast clears are supported

2018-01-22 Thread Nanley Chery
On Fri, Jan 19, 2018 at 03:47:23PM -0800, Jason Ekstrand wrote:
> v2 (Jason Ekstrand):
>  - Return an enum instead of a boolean
> 
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/intel/vulkan/anv_image.c   | 70 
> ++
>  src/intel/vulkan/anv_private.h | 13 
>  2 files changed, 83 insertions(+)
> 
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 1218c00..84e4b96 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -863,6 +863,76 @@ anv_layout_to_aux_usage(const struct gen_device_info * 
> const devinfo,
> unreachable("layout is not a VkImageLayout enumeration member.");
>  }
>  
> +/**
> + * This function returns the level of unresolved fast-clear support of the
> + * given image in the given VkImageLayout.
> + *
> + * @param devinfo The device information of the Intel GPU.
> + * @param image The image that may contain a collection of buffers.
> + * @param aspect The aspect of the image to be accessed.
> + * @param layout The current layout of the image aspect(s).
> + */
> +enum anv_fast_clear_type
> +anv_layout_to_fast_clear_type(const struct gen_device_info * const devinfo,
> +  const struct anv_image * const image,
> +  const VkImageAspectFlagBits aspect,
> +  const VkImageLayout layout)
> +{
> +   /* The aspect must be exactly one of the image aspects. */
> +   assert(_mesa_bitcount(aspect) == 1 && (aspect & image->aspects));
> +
> +   uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> +
> +   /* If there is no auxiliary surface allocated, there are no fast-clears */
> +   if (image->planes[plane].aux_surface.isl.size == 0)
> +  return false;
> +
> +   /* All images that use an auxiliary surface are required to be tiled. */
> +   assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
> +
> +   /* Stencil has no aux */
> +   assert(aspect != VK_IMAGE_ASPECT_STENCIL_BIT);
> +
> +   if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
> +  /* For depth images (with HiZ), the layout supports fast-clears if and
> +   * only if it supports HiZ.
> +   */
> +  enum isl_aux_usage aux_usage =
> + anv_layout_to_aux_usage(devinfo, image, aspect, layout);
> +  return aux_usage == ISL_AUX_USAGE_HIZ ?
> + ANV_FAST_CLEAR_ANY : ANV_FAST_CLEAR_NONE;
> +   }
> +
> +   assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> +
> +   /* Multisample fast-clear is not yet supported. */
> +   if (image->samples > 1)
> +  return false;
> +
> +   /* The only layout which actually supports fast-clears today is
> +* VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL.  Some day in the future
> +* this may change if our ability to track clear colors improves.
> +*/

Stale comment.

-Nanley

> +   switch (layout) {
> +   case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
> +  return ANV_FAST_CLEAR_ANY;
> +
> +   case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
> +  return ANV_FAST_CLEAR_NONE;
> +
> +   default:
> +  /* If the image has CCS_E enabled all the time then we can use
> +   * fast-clear as long as the clear color is zero since this is the
> +   * default value we program into every surface state used for
> +   * texturing.
> +   */
> +  if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E)
> + return ANV_FAST_CLEAR_ZERO_ONLY;
> +  else
> + return ANV_FAST_CLEAR_NONE;
> +   }
> +}
> +
>  
>  static struct anv_state
>  alloc_surface_state(struct anv_device *device)
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index a837860..f81f8e1 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -2443,6 +2443,13 @@ struct anv_image {
> } planes[3];
>  };
>  
> +/* The ordering of this enum is important */
> +enum anv_fast_clear_type {
> +   ANV_FAST_CLEAR_NONE = 0,
> +   ANV_FAST_CLEAR_ZERO_ONLY = 1,
> +   ANV_FAST_CLEAR_ANY = 2,
> +};
> +
>  /* Returns the number of auxiliary buffer levels attached to an image. */
>  static inline uint8_t
>  anv_image_aux_levels(const struct anv_image * const image,
> @@ -2565,6 +2572,12 @@ anv_layout_to_aux_usage(const struct gen_device_info * 
> const devinfo,
>  const VkImageAspectFlagBits aspect,
>  const VkImageLayout layout);
>  
> +enum anv_fast_clear_type
> +anv_layout_to_fast_clear_type(const struct gen_device_info * const devinfo,
> +  const struct anv_image * const image,
> +  const VkImageAspectFlagBits aspect,
> +  const VkImageLayout layout);
> +
>  /* This is defined as a macro so that it works for both
>   * VkImageSubresourceRange and VkImageSubresourceLayers
>   */
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.f

Re: [Mesa-dev] [PATCH v3 3/3] anv: implement VK_EXT_global_priority extension

2018-01-22 Thread Jason Ekstrand
On Mon, Jan 22, 2018 at 4:29 AM, Tapani Pälli 
wrote:

> v2: add ANV_CONTEXT_REALTIME_PRIORITY (Chris)
> use unreachable with unknown priority (Samuel)
>
> v3: add stubs in gem_stubs.c (Emil)
> use priority defines from gen_defines.h
>
> Signed-off-by: Tapani Pälli 
> Reviewed-by: Samuel Iglesias Gonsálvez  (v2)
> Reviewed-by: Chris Wilson  (v2)
> ---
>  src/intel/vulkan/anv_device.c  | 25 +++
>  src/intel/vulkan/anv_extensions.py |  2 ++
>  src/intel/vulkan/anv_gem.c | 51 ++
> 
>  src/intel/vulkan/anv_gem_stubs.c   | 10 
>  src/intel/vulkan/anv_private.h |  3 +++
>  5 files changed, 91 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 777abd8757..42ebc19f2b 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -369,6 +369,9 @@ anv_physical_device_init(struct anv_physical_device
> *device,
> device->has_syncobj_wait = device->has_syncobj &&
>anv_gem_supports_syncobj_wait(fd);
>
> +   if (anv_gem_has_context_priority(fd))
> +  device->has_context_priority = true;
> +
> bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
>
> /* Starting with Gen10, the timestamp frequency of the command
> streamer may
> @@ -1205,6 +1208,15 @@ VkResult anv_CreateDevice(
>}
> }
>
> +   /* Check if client specified queue priority. */
> +   const VkDeviceQueueGlobalPriorityCreateInfoEXT *queue_priority =
> +  vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
> +   DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
> +
> +   VkQueueGlobalPriorityEXT priority =
> +  queue_priority ? queue_priority->globalPriority :
> + VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT;
> +
> device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
> sizeof(*device), 8,
> VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
> @@ -1234,6 +1246,19 @@ VkResult anv_CreateDevice(
>goto fail_fd;
> }
>
> +   /* As per spec, the driver implementation may deny requests to acquire
> +* a priority above the default priority (MEDIUM) if the caller does
> not
> +* have sufficient privileges. In this scenario
> VK_ERROR_NOT_PERMITTED_EXT
> +* is returned.
> +*/
> +   if (physical_device->has_context_priority) {
> +  int err = anv_gem_set_context_priority(device, priority);
> +  if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT) {
> + result = vk_error(VK_ERROR_NOT_PERMITTED_EXT);
> + goto fail_fd;
> +  }
> +   }
> +
> device->info = physical_device->info;
> device->isl_dev = physical_device->isl_dev;
>
> diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_
> extensions.py
> index adfebca985..aacf39248f 100644
> --- a/src/intel/vulkan/anv_extensions.py
> +++ b/src/intel/vulkan/anv_extensions.py
> @@ -86,6 +86,8 @@ EXTENSIONS = [
>  Extension('VK_KHX_multiview', 1, True),
>  Extension('VK_EXT_debug_report',  8, True),
>  Extension('VK_EXT_external_memory_dma_buf',   1, True),
> +Extension('VK_EXT_global_priority',   1,
> +  'device->has_context_priority'),
>  ]
>
>  class VkVersion:
> diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
> index 34c0989108..7f83820429 100644
> --- a/src/intel/vulkan/anv_gem.c
> +++ b/src/intel/vulkan/anv_gem.c
> @@ -30,6 +30,7 @@
>  #include 
>
>  #include "anv_private.h"
> +#include "common/gen_defines.h"
>
>  static int
>  anv_ioctl(int fd, unsigned long request, void *arg)
> @@ -302,6 +303,56 @@ close_and_return:
> return swizzled;
>  }
>
> +static int
> +vk_priority_to_anv(int priority)
> +{
> +   switch (priority) {
> +   case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
> +  return GEN_CONTEXT_LOW_PRIORITY;
> +   case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
> +  return GEN_CONTEXT_MEDIUM_PRIORITY;
> +   case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
> +  return GEN_CONTEXT_HIGH_PRIORITY;
> +   case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
> +  return GEN_CONTEXT_REALTIME_PRIORITY;
> +   default:
> +  unreachable("Invalid priority");
> +   }
> +}
>

I think I'd rather have the conversion in anv_device.c and just make the
anv_gem functions take an i915 priority.

Other than that, and a couple other nits, this looks good to me.

One other question, do we have tests?  I quickly searched the piglit list
and didn't see any.  Writing a crucible test shouldn't be that hard.  You
just have to submit a bunch of command buffers and show that they get
re-ordered to favor the higher-priority context.  You could do that with a
bunch of compute shader invocations that "take a number" from a shared
atomic or something like that.

The current func.sync.semaphore-fd tests should also probably be modified
to use it.  They current

Re: [Mesa-dev] [PATCH v2 06/24] anv/image: Add a helper for determining when fast clears are supported

2018-01-22 Thread Jason Ekstrand
On Mon, Jan 22, 2018 at 10:48 AM, Nanley Chery 
wrote:

> On Fri, Jan 19, 2018 at 03:47:23PM -0800, Jason Ekstrand wrote:
> > v2 (Jason Ekstrand):
> >  - Return an enum instead of a boolean
> >
> > Reviewed-by: Topi Pohjolainen 
> > ---
> >  src/intel/vulkan/anv_image.c   | 70 ++
> 
> >  src/intel/vulkan/anv_private.h | 13 
> >  2 files changed, 83 insertions(+)
> >
> > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > index 1218c00..84e4b96 100644
> > --- a/src/intel/vulkan/anv_image.c
> > +++ b/src/intel/vulkan/anv_image.c
> > @@ -863,6 +863,76 @@ anv_layout_to_aux_usage(const struct
> gen_device_info * const devinfo,
> > unreachable("layout is not a VkImageLayout enumeration member.");
> >  }
> >
> > +/**
> > + * This function returns the level of unresolved fast-clear support of
> the
> > + * given image in the given VkImageLayout.
> > + *
> > + * @param devinfo The device information of the Intel GPU.
> > + * @param image The image that may contain a collection of buffers.
> > + * @param aspect The aspect of the image to be accessed.
> > + * @param layout The current layout of the image aspect(s).
> > + */
> > +enum anv_fast_clear_type
> > +anv_layout_to_fast_clear_type(const struct gen_device_info * const
> devinfo,
> > +  const struct anv_image * const image,
> > +  const VkImageAspectFlagBits aspect,
> > +  const VkImageLayout layout)
> > +{
> > +   /* The aspect must be exactly one of the image aspects. */
> > +   assert(_mesa_bitcount(aspect) == 1 && (aspect & image->aspects));
> > +
> > +   uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> > +
> > +   /* If there is no auxiliary surface allocated, there are no
> fast-clears */
> > +   if (image->planes[plane].aux_surface.isl.size == 0)
> > +  return false;
> > +
> > +   /* All images that use an auxiliary surface are required to be
> tiled. */
> > +   assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
> > +
> > +   /* Stencil has no aux */
> > +   assert(aspect != VK_IMAGE_ASPECT_STENCIL_BIT);
> > +
> > +   if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
> > +  /* For depth images (with HiZ), the layout supports fast-clears
> if and
> > +   * only if it supports HiZ.
> > +   */
> > +  enum isl_aux_usage aux_usage =
> > + anv_layout_to_aux_usage(devinfo, image, aspect, layout);
> > +  return aux_usage == ISL_AUX_USAGE_HIZ ?
> > + ANV_FAST_CLEAR_ANY : ANV_FAST_CLEAR_NONE;
> > +   }
> > +
> > +   assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> > +
> > +   /* Multisample fast-clear is not yet supported. */
> > +   if (image->samples > 1)
> > +  return false;
> > +
> > +   /* The only layout which actually supports fast-clears today is
> > +* VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL.  Some day in the future
> > +* this may change if our ability to track clear colors improves.
> > +*/
>
> Stale comment.
>

Removed locally.  I think the switch and the one comment below are probably
documentation enough.

--Jason


> -Nanley
>
> > +   switch (layout) {
> > +   case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
> > +  return ANV_FAST_CLEAR_ANY;
> > +
> > +   case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
> > +  return ANV_FAST_CLEAR_NONE;
> > +
> > +   default:
> > +  /* If the image has CCS_E enabled all the time then we can use
> > +   * fast-clear as long as the clear color is zero since this is the
> > +   * default value we program into every surface state used for
> > +   * texturing.
> > +   */
> > +  if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E)
> > + return ANV_FAST_CLEAR_ZERO_ONLY;
> > +  else
> > + return ANV_FAST_CLEAR_NONE;
> > +   }
> > +}
> > +
> >
> >  static struct anv_state
> >  alloc_surface_state(struct anv_device *device)
> > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> > index a837860..f81f8e1 100644
> > --- a/src/intel/vulkan/anv_private.h
> > +++ b/src/intel/vulkan/anv_private.h
> > @@ -2443,6 +2443,13 @@ struct anv_image {
> > } planes[3];
> >  };
> >
> > +/* The ordering of this enum is important */
> > +enum anv_fast_clear_type {
> > +   ANV_FAST_CLEAR_NONE = 0,
> > +   ANV_FAST_CLEAR_ZERO_ONLY = 1,
> > +   ANV_FAST_CLEAR_ANY = 2,
> > +};
> > +
> >  /* Returns the number of auxiliary buffer levels attached to an image.
> */
> >  static inline uint8_t
> >  anv_image_aux_levels(const struct anv_image * const image,
> > @@ -2565,6 +2572,12 @@ anv_layout_to_aux_usage(const struct
> gen_device_info * const devinfo,
> >  const VkImageAspectFlagBits aspect,
> >  const VkImageLayout layout);
> >
> > +enum anv_fast_clear_type
> > +anv_layout_to_fast_clear_type(const struct gen_device_info * const
> devinfo,
> > +  const stru

Re: [Mesa-dev] [PATCH v2 09/24] anv/cmd_buffer: Generalize transition_color_buffer

2018-01-22 Thread Nanley Chery
On Fri, Jan 19, 2018 at 03:47:26PM -0800, Jason Ekstrand wrote:
> This moves it to being based on layout_to_aux_usage instead of being
> hard-coded based on bits of a priori knowledge of how transitions
> interact with layouts.  This conceptually simplifies things because
> we're now using layout_to_aux_usage and layout_supports_fast_clear to
> make resolve decisions so changes to those functions will do what one
> expects.
> 
> This fixes a potential bug with window system integration on gen9+ where
^
This patch still doesn't fix the bug.

> we wouldn't do a resolve when transitioning to the PRESENT_SRC layout
> because we just assume that everything that handles CCS_E can handle it
> all the time.  When handing a CCS_E image off to the window system, we
> may need to do a full resolve if the window system does not support the
> CCS_E modifier.  The only reason why this hasn't been a problem yet is
> because we don't support modifiers in Vulkan WSI and so we always get X
> tiling which implies no CCS on gen9+.
> 
> v2 (Jason Ekstrand):
>  - Make a few more things const
>  - Use the anv_fast_clear_support enum
> 
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 56 
> ++
>  1 file changed, 44 insertions(+), 12 deletions(-)
> 
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 6a6d8b2..fd27463 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -593,6 +593,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
>  VkImageLayout initial_layout,
>  VkImageLayout final_layout)
>  {
> +   const struct gen_device_info *devinfo = &cmd_buffer->device->info;
> /* Validate the inputs. */
> assert(cmd_buffer);
> assert(image && image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> @@ -733,17 +734,51 @@ transition_color_buffer(struct anv_cmd_buffer 
> *cmd_buffer,
>   VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
>   final_layout);
>}
> -   } else if (initial_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
> -  /* Resolves are only necessary if the subresource may contain blocks
> -   * fast-cleared to values unsupported in other layouts. This only 
> occurs
> -   * if the initial layout is COLOR_ATTACHMENT_OPTIMAL.
> -   */
> -  return;
> -   } else if (image->samples > 1) {
> -  /* MCS buffers don't need resolving. */
>return;
> }
>  
> +   /* If initial aux usage is NONE, there is nothing to resolve */
> +   const enum isl_aux_usage initial_aux_usage =
> +  anv_layout_to_aux_usage(devinfo, image, aspect, initial_layout);
> +   if (initial_aux_usage == ISL_AUX_USAGE_NONE)
> +  return;
> +
> +   enum isl_aux_op resolve_op = ISL_AUX_OP_NONE;
> +
> +   /* If the initial layout supports more fast clear than the final layout
> +* then we need at least a partial resolve.
> +*/
> +   const enum anv_fast_clear_type initial_fast_clear =
> +  anv_layout_to_fast_clear_type(devinfo, image, aspect, initial_layout);
> +   const enum anv_fast_clear_type final_fast_clear =
> +  anv_layout_to_fast_clear_type(devinfo, image, aspect, final_layout);
> +   if (final_fast_clear < initial_fast_clear)
> +  resolve_op = ISL_AUX_OP_PARTIAL_RESOLVE;
> +
> +   const enum isl_aux_usage final_aux_usage =
> +  anv_layout_to_aux_usage(devinfo, image, aspect, final_layout);
> +   if (initial_aux_usage == ISL_AUX_USAGE_CCS_E &&
> +   final_aux_usage != ISL_AUX_USAGE_CCS_E)
> +  resolve_op = ISL_AUX_OP_FULL_RESOLVE;
> +
> +   /* CCS_D only supports full resolves and BLORP will assert on us if we try
> +* to do a partial resolve on a CCS_D surface.
> +*/
> +   if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE &&
> +   initial_aux_usage == ISL_AUX_USAGE_CCS_D)
> +  resolve_op = ISL_AUX_OP_FULL_RESOLVE;
> +
> +   if (resolve_op == ISL_AUX_OP_NONE)
> +  return;
> +
> +   /* Even though the above code can theoretically handle multiple resolve
> +* types such as CCS_D -> CCS_E, the predication code below can't.  We 
> only
> +* really handle a couple of cases.
> +*/
> +   assert(initial_aux_usage == ISL_AUX_USAGE_NONE ||
> +  final_aux_usage == ISL_AUX_USAGE_NONE ||
> +  initial_aux_usage == final_aux_usage);
> +

I'm finding this assertion and comment confusing. The comment says that
the predication code below can't handle CCS_D -> CCS_E (which requires a
no-op resolve), but the assertion below it allows initial_aux_usage to
be NONE (which would lead to a no-op resolve), and initial_aux_usage ==
final_aux_usage which (may lead to a no-op resolve).

As far as I can tell, the only problematic case this assertion would catch 
is a CCS_E -> CCS_D transition. This transition requires a FULL_RESOLVE. If
the CCS_E texture was fast-cleared to 

Re: [Mesa-dev] [PATCH v2 06/24] anv/image: Add a helper for determining when fast clears are supported

2018-01-22 Thread Nanley Chery
On Mon, Jan 22, 2018 at 11:03:07AM -0800, Jason Ekstrand wrote:
> On Mon, Jan 22, 2018 at 10:48 AM, Nanley Chery 
> wrote:
> 
> > On Fri, Jan 19, 2018 at 03:47:23PM -0800, Jason Ekstrand wrote:
> > > v2 (Jason Ekstrand):
> > >  - Return an enum instead of a boolean
> > >
> > > Reviewed-by: Topi Pohjolainen 
> > > ---
> > >  src/intel/vulkan/anv_image.c   | 70 ++
> > 
> > >  src/intel/vulkan/anv_private.h | 13 
> > >  2 files changed, 83 insertions(+)
> > >
> > > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > > index 1218c00..84e4b96 100644
> > > --- a/src/intel/vulkan/anv_image.c
> > > +++ b/src/intel/vulkan/anv_image.c
> > > @@ -863,6 +863,76 @@ anv_layout_to_aux_usage(const struct
> > gen_device_info * const devinfo,
> > > unreachable("layout is not a VkImageLayout enumeration member.");
> > >  }
> > >
> > > +/**
> > > + * This function returns the level of unresolved fast-clear support of
> > the
> > > + * given image in the given VkImageLayout.
> > > + *
> > > + * @param devinfo The device information of the Intel GPU.
> > > + * @param image The image that may contain a collection of buffers.
> > > + * @param aspect The aspect of the image to be accessed.
> > > + * @param layout The current layout of the image aspect(s).
> > > + */
> > > +enum anv_fast_clear_type
> > > +anv_layout_to_fast_clear_type(const struct gen_device_info * const
> > devinfo,
> > > +  const struct anv_image * const image,
> > > +  const VkImageAspectFlagBits aspect,
> > > +  const VkImageLayout layout)
> > > +{
> > > +   /* The aspect must be exactly one of the image aspects. */
> > > +   assert(_mesa_bitcount(aspect) == 1 && (aspect & image->aspects));
> > > +
> > > +   uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> > > +
> > > +   /* If there is no auxiliary surface allocated, there are no
> > fast-clears */
> > > +   if (image->planes[plane].aux_surface.isl.size == 0)
> > > +  return false;
> > > +
> > > +   /* All images that use an auxiliary surface are required to be
> > tiled. */
> > > +   assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
> > > +
> > > +   /* Stencil has no aux */
> > > +   assert(aspect != VK_IMAGE_ASPECT_STENCIL_BIT);
> > > +
> > > +   if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
> > > +  /* For depth images (with HiZ), the layout supports fast-clears
> > if and
> > > +   * only if it supports HiZ.
> > > +   */
> > > +  enum isl_aux_usage aux_usage =
> > > + anv_layout_to_aux_usage(devinfo, image, aspect, layout);
> > > +  return aux_usage == ISL_AUX_USAGE_HIZ ?
> > > + ANV_FAST_CLEAR_ANY : ANV_FAST_CLEAR_NONE;
> > > +   }
> > > +
> > > +   assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> > > +
> > > +   /* Multisample fast-clear is not yet supported. */
> > > +   if (image->samples > 1)
> > > +  return false;
> > > +
> > > +   /* The only layout which actually supports fast-clears today is
> > > +* VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL.  Some day in the future
> > > +* this may change if our ability to track clear colors improves.
> > > +*/
> >
> > Stale comment.
> >
> 
> Removed locally.  I think the switch and the one comment below are probably
> documentation enough.
> 
> --Jason
> 
> 

Agreed. 

-Nanley

> > -Nanley
> >
> > > +   switch (layout) {
> > > +   case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
> > > +  return ANV_FAST_CLEAR_ANY;
> > > +
> > > +   case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
> > > +  return ANV_FAST_CLEAR_NONE;
> > > +
> > > +   default:
> > > +  /* If the image has CCS_E enabled all the time then we can use
> > > +   * fast-clear as long as the clear color is zero since this is the
> > > +   * default value we program into every surface state used for
> > > +   * texturing.
> > > +   */
> > > +  if (image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E)
> > > + return ANV_FAST_CLEAR_ZERO_ONLY;
> > > +  else
> > > + return ANV_FAST_CLEAR_NONE;
> > > +   }
> > > +}
> > > +
> > >
> > >  static struct anv_state
> > >  alloc_surface_state(struct anv_device *device)
> > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> > private.h
> > > index a837860..f81f8e1 100644
> > > --- a/src/intel/vulkan/anv_private.h
> > > +++ b/src/intel/vulkan/anv_private.h
> > > @@ -2443,6 +2443,13 @@ struct anv_image {
> > > } planes[3];
> > >  };
> > >
> > > +/* The ordering of this enum is important */
> > > +enum anv_fast_clear_type {
> > > +   ANV_FAST_CLEAR_NONE = 0,
> > > +   ANV_FAST_CLEAR_ZERO_ONLY = 1,
> > > +   ANV_FAST_CLEAR_ANY = 2,
> > > +};
> > > +
> > >  /* Returns the number of auxiliary buffer levels attached to an image.
> > */
> > >  static inline uint8_t
> > >  anv_image_aux_levels(const struct anv_image * const image,
> > > @@ -2565,6 +2572,12 @@ anv_l

  1   2   >