[Mesa-dev] [PATCH] r600/atomic: add cayman version of atomic save/restore from GDS (v2)

2017-12-05 Thread Dave Airlie
From: Dave Airlie 

On Cayman we don't use the append/consume counters (fglrx doesn't)
and they don't seem to work well with compute shaders.

This just uses GDS instead to do the atomic operations.

v1.1: remove unused line.
v2: use EOS on cayman, it appears to work.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/evergreen_state.c | 57 +-
 src/gallium/drivers/r600/r600_shader.c | 93 +++---
 2 files changed, 126 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 6bca35e850f..a1d2e0cd14b 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2672,6 +2672,7 @@ static void cayman_init_atom_start_cs(struct r600_context 
*rctx)
r600_store_value(cb, 0x76543210); /* 
CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 */
r600_store_value(cb, 0xfedcba98); /* 
CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 */
 
+   r600_store_context_reg(cb, R_028724_GDS_ADDR_SIZE, 0x3fff);
r600_store_context_reg_seq(cb, R_0288E8_SQ_LDS_ALLOC, 2);
r600_store_value(cb, 0); /* R_0288E8_SQ_LDS_ALLOC */
r600_store_value(cb, 0); /* R_0288EC_SQ_LDS_ALLOC_PS */
@@ -4627,6 +4628,51 @@ static void evergreen_emit_event_write_eos(struct 
r600_context *rctx,
radeon_emit(cs, reloc);
 }
 
+static void cayman_emit_event_write_eos(struct r600_context *rctx,
+   struct r600_shader_atomic *atomic,
+   struct r600_resource *resource,
+   uint32_t pkt_flags)
+{
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+   uint32_t event = EVENT_TYPE_PS_DONE;
+   uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
+  resource,
+  RADEON_USAGE_WRITE,
+  
RADEON_PRIO_SHADER_RW_BUFFER);
+   uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
+
+   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags);
+   radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6));
+   radeon_emit(cs, (dst_offset) & 0x);
+   radeon_emit(cs, (1 << 29) | ((dst_offset >> 32) & 0xff));
+   radeon_emit(cs, (atomic->hw_idx) | (1 << 16));
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+   radeon_emit(cs, reloc);
+}
+
+/* writes count from a buffer into GDS */
+static void cayman_write_count_to_gds(struct r600_context *rctx,
+ struct r600_shader_atomic *atomic,
+ struct r600_resource *resource,
+ uint32_t pkt_flags)
+{
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+   unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
+  resource,
+  RADEON_USAGE_READ,
+  
RADEON_PRIO_SHADER_RW_BUFFER);
+   uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
+
+   radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags);
+   radeon_emit(cs, dst_offset & 0x);
+   radeon_emit(cs, PKT3_CP_DMA_CP_SYNC | PKT3_CP_DMA_DST_SEL(1) | 
((dst_offset >> 32) & 0xff));// GDS
+   radeon_emit(cs, atomic->hw_idx * 4);
+   radeon_emit(cs, 0);
+   radeon_emit(cs, PKT3_CP_DMA_CMD_DAS | 4);
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+   radeon_emit(cs, reloc);
+}
+
 bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
struct r600_shader_atomic 
*combined_atomics,
uint8_t *atomic_used_mask_p)
@@ -4674,7 +4720,10 @@ bool evergreen_emit_atomic_buffer_setup(struct 
r600_context *rctx,
struct r600_resource *resource = 
r600_resource(astate->buffer[atomic->buffer_id].buffer);
assert(resource);
 
-   evergreen_emit_set_append_cnt(rctx, atomic, resource, 
pkt_flags);
+   if (rctx->b.chip_class == CAYMAN)
+   cayman_write_count_to_gds(rctx, atomic, resource, 
pkt_flags);
+   else
+   evergreen_emit_set_append_cnt(rctx, atomic, resource, 
pkt_flags);
}
*atomic_used_mask_p = atomic_used_mask;
return true;
@@ -4702,8 +4751,12 @@ void evergreen_emit_atomic_buffer_save(struct 
r600_context *rctx,
struct r600_resource *resource = 
r600_resource(astate->buffer[atomic->buffer_id].buffer);
assert(resource);
 
-   evergreen_emit_event_write_eos(rctx, atomic, resource, 
pkt_flags);
+   if (rctx->b.chip_class == CAYMAN)
+   cayman_emit_event_write_eos(rctx, atomic, resource,

Re: [Mesa-dev] [PATCH] radeon/vcn: determine idr by pic type

2017-12-05 Thread Christian König

Am 04.12.2017 um 21:12 schrieb Zhang, Boyuan:

Am 30.11.2017 um 22:18 schrieb Leo Liu:


On 11/30/2017 04:12 PM, boyuan.zh...@amd.com wrote:

From: Boyuan Zhang 

Vaapi encode interface provides idr frame flags, where omx interface
doesn't.
Therefore, change to use picture type to determine idr frame, which
will work for both interfaces.

Signed-off-by: Boyuan Zhang 

Reviewed-by: Leo Liu 

Reviewed-by: Christian König 

As a consequence could you remove the is_idr flag from the picture structure or 
is that used somewhere else as well?

Regards,
Christian.

Since Vaapi interface provides the idr flag directly, it's better to have a 1:1 
mapping in driver side and use it directly.


I strongly disagree. The VA-API interface for codecs is broken in quite 
a number of ways.


We should rather use the specifications as source for internal 
structures and interfaces.



  And actually, we are using this "is_idr" flag in some functions in st/va now. 
So I think it's better to keep it for now.


I think we should fix this as well and use the picture type here as well.

Regards,
Christian.



Regards,
Boyuan


---
   src/gallium/drivers/radeon/radeon_vcn_enc.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c
b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index 9806a69..5fc9fc7 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -47,7 +47,7 @@ static void radeon_vcn_enc_get_param(struct
radeon_encoder *enc, struct pipe_h26
   enc->enc_pic.ref_idx_l0 = pic->ref_idx_l0;
   enc->enc_pic.ref_idx_l1 = pic->ref_idx_l1;
   enc->enc_pic.not_referenced = pic->not_referenced;
-    enc->enc_pic.is_idr = pic->is_idr;
+    enc->enc_pic.is_idr = (pic->picture_type ==
PIPE_H264_ENC_PICTURE_TYPE_IDR);
   enc->enc_pic.crop_left = 0;
   enc->enc_pic.crop_right = (align(enc->base.width, 16) -
enc->base.width) / 2;
   enc->enc_pic.crop_top = 0;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/miptree: Use the tiling from the modifier instead of the BO

2017-12-05 Thread Daniel Stone
Hi,

On 18 November 2017 at 00:10, Jason Ekstrand  wrote:
> This fixes a bug where we were taking the tiling from the BO regardless
> of what the modifier said.  When we got images in from Vulkan where it
> doesn't set the tiling on the BO, we would treat them as linear even
> though the modifier expressly said to treat it as Y-tiled.

For some reason I thought Ken had already reviewed this and it landed,
until Kristian mentioned last night. Oops. Series is:
Reviewed-by: Daniel Stone 

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/android: Partially handle HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED

2017-12-05 Thread Tomasz Figa
On Sat, Dec 2, 2017 at 4:43 AM, Rob Herring  wrote:
> On Fri, Dec 1, 2017 at 8:44 AM, Tomasz Figa  wrote:
>> On Fri, Dec 1, 2017 at 11:20 PM, Rob Herring  wrote:
>>> On Fri, Dec 1, 2017 at 7:30 AM, Robert Foss  
>>> wrote:
 On Thu, 2017-11-30 at 11:14 -0600, Rob Herring wrote:
> On Thu, Nov 30, 2017 at 12:11 AM, Tapani Pälli  m> wrote:
> >
> >
> > On 11/30/2017 06:13 AM, Tomasz Figa wrote:
> > >
> > > On Thu, Nov 30, 2017 at 3:43 AM, Robert Foss  > > ra.com>
> > > wrote:
>
> [...]
>
> > > (As a side note, I had an idea to create a new interface,
> > > standardized
> > > by Mesa, let's say libdri_android, completely free of any
> > > gralloc-internals. It would have to be exposed additionally by
> > > any
> > > Android that intends to run Mesa. Given the need to deal with 3
> > > different gralloc versions already, it could be something easier
> > > to
> > > manage.)
> >
> >
> > Makes sense, it is a bit messy and we have bit too much patches on
> > our tree
> > because of these differences.
>
> Seems overly complicated to me. The information needed is within the
> ints in the native_handle in most/all implementations. I don't think
> there's another way to globally store dmabuf metadata unless you have
> a custom interface in your DRM driver. So standardizing to a common
> library implies a common handle struct here. I think the options are:
>
> - common struct definition (native_handle_t + dmabuf fd(s) + width,
> height, stride, format, usage, etc.)
> - common struct plus inline accessor functions
> - common opaque struct plus accessor library

 So these common parts would be much like what currently exists in
 minigbm/cros_gralloc_handle.h and gbm_gralloc/gralloc_drm_handle.h
 then, but extended with the above suggestions?
>>>
>>> Yes, but which part do you think needs to be extended?
>>>
>>> As we discussed on IRC, I think perhaps we just need to change the
>>> handle format field in gralloc_drm_handle.h to use fourcc (aka DRM and
>>> GBM formats) instead of the Android format. I think all the users just
>>> end up converting it to their own internal format anyway.
>>
>> We keep the handle opaque for consumers and even minigbm dereferences
>> it only when creating/registering the buffer, further using the handle
>> pointer only as a key to internal bookkeeping map.
>
> What you say implies that you don't need any metadata in the handle,
> but you do have pretty much all the same data. Whether you
>
>> Relying on the struct itself is not only error prone, as there is no
>> way to check if the struct on gralloc implementation side matches what
>> we expect, but also makes it difficult to change the handle struct at
>> our convenience.
>
> How does a library solve this?
>
> Everything in Android gets built together and the handle pretty much
> has to stay the same across components in any implementation I've
> seen. Maybe someday that will change and we'll need versioning and
> backwards compatibility, but for now that's unnecessary complexity.
> We'd have to get to a single, well controlled and defined handle first
> anyway before we start versioning.
>
> Anyone is still free to change things downstream however they want.
> We're only talking about what does mainline/upstream do.
>
> Also, I don't think whatever is standardized should live in Mesa.
> There's a need to support drm_hwcomposer (which has the same
> dependencies as mesa) with non-Mesa GL implementations (yes, vendor
> binaries).

 Excluding Mesa and the composer leaves us with the allocator or
 creating a new library.
 I would assume that creating a new library is the worse option.
>>>
>>> Why excluding the composer? If we have to pick, I'd put it there or
>>> perhaps libdrm?
>>
>> There is neither a single central composer nor libdrm is used on every
>> system... (The latter is not even used by Intel driver in Mesa
>> anymore.)
>
> I think you are confusing libdrm_intel which was removed with libdrm
> (the ioctl wrappers) which is still a dependency. I don't think there
> is any plan to remove libdrm completely.

Okay, my bad. libdrm is used for opening and manipulating DRI device
nodes after all.

>
> For cases where a user has different components, then they have to
> copy the struct.

Yeah, I guess that's not much different from what we're doing in
Chromium OS with proprietary vendor components already...

>
>> However I fully agree that there are other upstream components (e.g.
>> drm_hwcomposer), which would benefit from it and nobody wants to
>> include Mesa in the build just for one header. Should we have a
>> separate freedesktop project for it?
>
> I'm still going to say libdrm. If that's really a problem, then we can
> split it out later.

At least for Chromium OS, libdrm would work fine indeed.

Best regards,
Tomasz
___

Re: [Mesa-dev] [PATCH] nvir: Always split 64-bit IMAD/IMUL operations

2017-12-05 Thread Karol Herbst
On Tue, Dec 5, 2017 at 12:51 AM, Pierre Moreau  wrote:
> Those operations do not map to actual hardware instructions, therefore
> those should always be lowered to 32-bit instructions.
>
> Fixes: 009c54aa7af "nv50/ir: Split 64-bit integer MAD/MUL operations"
> Signed-off-by: Pierre Moreau 

Reviewed-by: Karol Herbst 

yeah, this is totally required to do in any case.

> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 61d4e6a2d0..14bdcea2ca 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -3794,7 +3794,7 @@ Program::optimizeSSA(int level)
> RUN_PASS(2, AlgebraicOpt, run);
> RUN_PASS(2, ModifierFolding, run); // before load propagation -> less 
> checks
> RUN_PASS(1, ConstantFolding, foldAll);
> -   RUN_PASS(1, Split64BitOpPreRA, run);
> +   RUN_PASS(0, Split64BitOpPreRA, run);
> RUN_PASS(1, LoadPropagation, run);
> RUN_PASS(1, IndirectPropagation, run);
> RUN_PASS(2, MemoryOpt, run);
> --
> 2.15.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Pohjolainen, Topi
On Tue, Dec 05, 2017 at 05:41:56AM +, Rogovin, Kevin wrote:
> Hi,
> 
>  The patch series I have submitted handles the case of needing to resolve 
> texture surfaces when a draw (or compute) accesses a texture which is 
> astc5x5. As it is quite clear you understand the issue and know the code of 
> i965 the patch centers on, you are an excellent person to review the code.
> 

We discussed earlier if we could handle the needed resolves in
brw_predraw_resolve_inputs(). I understood you had reservations on how it
would turn out and therefore I thought I try writing it out in code. I think
we could fit it there nicely.

I have to admit that I went quite a bit further. One thing I was looking for
was getting the code recording the needed bits into context closer to the
point consuming them. That looks doable as well, now reacting to the bits and
recording are both in gen9_astc5x5_sampler_wa(). It also becomes clear that
brw_predraw_resolve_inputs() does not depend on the bits in the context but
solely on the current texture settings.
This is important to me as I'm quite sure I'll be debugging things later on
where I need to pay attention to what happens with this workaround (among
other things).

> Here are my comments of the patch posted:
> 
>  1.  it is essentially replication and moving around of the code of the patch 
> series posted earlier but missing various
>   important bits: preventing the sampler from using the auxiliary buffer 
> (this requires to modify surface state
>   sent in brw_wm_surfaces.c). It also does not cover blorp sufficiently 
> (blorp might read from an ASTC5x5
>   and there are more paths in blorp than blorp_surf_for_miptree() that 
> sample from surfaces.
> 

Can you explain both more in detail? Resolves done in
brw_predraw_resolve_inputs() mean that there is nothing interesting in the
aux buffers and surface setup won't therefore enable auxiliary for texture
surfaces.

In case of blorp, as far as I know all operations sampling something should go
thru blorp_surf_for_miptree(). Can you point out cases that don't?

>  2.  using the check that the GPU is gen9 (and for that matter, using the 
> name gen9_ astc5x5_sampler_wa())
>   is not ideal; I would not be surprised that the bug might not be 
> present on various re-spins of Gen9 and/or
>   it might linger on to future Gens. I do NOT know; using a Boolean 
> assigned earlier makes the code more
>   future-ish proof.

This is quite common style of ours, workaround has the name of the platform
it was first introduced in.

> 
>  3.  the nature of GPU fragment dispatch is going to require a texture 
> invalidate even if the sampler only
>   have the bug in one direction; this is because a subslice is not 
> guaranteed to process fragments in any
>   order. The crux is that a single sampler serves an entire sub-slice 
> which has more than 1 EU. It is quite
>   possible that one EU has threads of a draw call ahead of the other and 
> depending on the timing, portions
>   of those fragments' coming after might be processed by the sampler of 
> those before of those fragments
>   coming before in batchbuffer order. Indeed a single EU might have 
> threads from separate draws as well.
>   A texture invalidate places a barrier in the pipeline preventing the 
> mixing (and means that efficiency of 
>  GPU drops quite a bit with every texture invalidate sadly). 
> 

Right. In the case of sampling both aux and astc5x5 in the same draw cycle the
only thing to do is to disable aux. With my question of direction I meant the
texture cache flush between two cycles. Do we need to flush in both cases

1) ASTC5x5 in first cycle and AUX in the following
2) AUX in first cycle and ASTC5x5 in the following

>  4. With 3 in mind, using the bit-masks is not a good idea as we want to then 
> enforce at the code level
>   that only one of the two is possible without texture invalidates.

Can you elaborate this a little more? It tells if aux is/was used and it tells
if astc5x5 is/was used. That is all we need, right?

> 
> -Kevin
> 
> 
> -Original Message-
> From: Topi Pohjolainen [mailto:topi.pohjolai...@gmail.com] 
> Sent: Monday, December 4, 2017 12:49 PM
> To: mesa-dev@lists.freedesktop.org
> Cc: Rogovin, Kevin 
> Subject: [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug
> 
> This is just drafting some thoughts and only compile tested.
> 
> CC: "Rogovin, Kevin" 
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c   |  8 +
>  src/mesa/drivers/dri/i965/brw_context.h | 10 ++
>  src/mesa/drivers/dri/i965/brw_draw.c| 54 
> -
>  3 files changed, 71 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> index 680121b6ab..b3f84ab8ca 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -186,11 +186,19 @@ blorp_surf_for_miptree(st

Re: [Mesa-dev] [PATCH] r600/atomic: add cayman version of atomic save/restore from GDS (v2)

2017-12-05 Thread Nicolai Hähnle

On 05.12.2017 09:16, Dave Airlie wrote:

From: Dave Airlie 

On Cayman we don't use the append/consume counters (fglrx doesn't)
and they don't seem to work well with compute shaders.

This just uses GDS instead to do the atomic operations.

v1.1: remove unused line.
v2: use EOS on cayman, it appears to work.

Signed-off-by: Dave Airlie 


I can't say much about the assembly, but the CP packets look reasonable 
to me.


Acked-by: Nicolai Hähnle 



---
  src/gallium/drivers/r600/evergreen_state.c | 57 +-
  src/gallium/drivers/r600/r600_shader.c | 93 +++---
  2 files changed, 126 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 6bca35e850f..a1d2e0cd14b 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2672,6 +2672,7 @@ static void cayman_init_atom_start_cs(struct r600_context 
*rctx)
r600_store_value(cb, 0x76543210); /* 
CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 */
r600_store_value(cb, 0xfedcba98); /* 
CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 */
  
+	r600_store_context_reg(cb, R_028724_GDS_ADDR_SIZE, 0x3fff);

r600_store_context_reg_seq(cb, R_0288E8_SQ_LDS_ALLOC, 2);
r600_store_value(cb, 0); /* R_0288E8_SQ_LDS_ALLOC */
r600_store_value(cb, 0); /* R_0288EC_SQ_LDS_ALLOC_PS */
@@ -4627,6 +4628,51 @@ static void evergreen_emit_event_write_eos(struct 
r600_context *rctx,
radeon_emit(cs, reloc);
  }
  
+static void cayman_emit_event_write_eos(struct r600_context *rctx,

+   struct r600_shader_atomic *atomic,
+   struct r600_resource *resource,
+   uint32_t pkt_flags)
+{
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+   uint32_t event = EVENT_TYPE_PS_DONE;
+   uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
+  resource,
+  RADEON_USAGE_WRITE,
+  
RADEON_PRIO_SHADER_RW_BUFFER);
+   uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
+
+   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags);
+   radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6));
+   radeon_emit(cs, (dst_offset) & 0x);
+   radeon_emit(cs, (1 << 29) | ((dst_offset >> 32) & 0xff));
+   radeon_emit(cs, (atomic->hw_idx) | (1 << 16));
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+   radeon_emit(cs, reloc);
+}
+
+/* writes count from a buffer into GDS */
+static void cayman_write_count_to_gds(struct r600_context *rctx,
+ struct r600_shader_atomic *atomic,
+ struct r600_resource *resource,
+ uint32_t pkt_flags)
+{
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+   unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
+  resource,
+  RADEON_USAGE_READ,
+  
RADEON_PRIO_SHADER_RW_BUFFER);
+   uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
+
+   radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags);
+   radeon_emit(cs, dst_offset & 0x);
+   radeon_emit(cs, PKT3_CP_DMA_CP_SYNC | PKT3_CP_DMA_DST_SEL(1) | ((dst_offset 
>> 32) & 0xff));// GDS
+   radeon_emit(cs, atomic->hw_idx * 4);
+   radeon_emit(cs, 0);
+   radeon_emit(cs, PKT3_CP_DMA_CMD_DAS | 4);
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+   radeon_emit(cs, reloc);
+}
+
  bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
struct r600_shader_atomic 
*combined_atomics,
uint8_t *atomic_used_mask_p)
@@ -4674,7 +4720,10 @@ bool evergreen_emit_atomic_buffer_setup(struct 
r600_context *rctx,
struct r600_resource *resource = 
r600_resource(astate->buffer[atomic->buffer_id].buffer);
assert(resource);
  
-		evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);

+   if (rctx->b.chip_class == CAYMAN)
+   cayman_write_count_to_gds(rctx, atomic, resource, 
pkt_flags);
+   else
+   evergreen_emit_set_append_cnt(rctx, atomic, resource, 
pkt_flags);
}
*atomic_used_mask_p = atomic_used_mask;
return true;
@@ -4702,8 +4751,12 @@ void evergreen_emit_atomic_buffer_save(struct 
r600_context *rctx,
struct r600_resource *resource = 
r600_resource(astate->buffer[atomic->buffer_id].buffer);
assert(resource);
  
-		evergreen_emit_event_write_eos(rctx, atomic, resource, pkt_

Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Rogovin, Kevin
Hi,


>> Here are my comments of the patch posted:
>> 
>>  1.  it is essentially replication and moving around of the code of the 
>> patch series posted earlier but missing various
>>   important bits: preventing the sampler from using the auxiliary buffer 
>> (this requires to modify surface state
>>   sent in brw_wm_surfaces.c). It also does not cover blorp sufficiently 
>> (blorp might read from an ASTC5x5
>>   and there are more paths in blorp than blorp_surf_for_miptree() that 
>> sample from surfaces.
>> 

>Can you explain both more in detail? Resolves done in
>brw_predraw_resolve_inputs() mean that there is nothing interesting in the aux 
>buffers and surface setup won't therefore enable auxiliary for texture 
>surfaces.

That there is nothing interesting is irrelevant to the sampler if the 
SURFACE_STATE fed includes the auxiliary buffer, thus when one sets up the 
SURFACE_STATE for sampler, the auxiliary buffer cannot be mentioned in the GPU 
command; The sampler will always try to read the auxiliary buffer if it is 
given the opportunity to do so. Indeed, it is quite feasible that less 
bandwidth is consumed if the sampler is given the chance to read an auxiliary 
buffer in place of the buffer; as such even if the surface is resolved one may 
wish to feed the sampler the auxiliary buffer. Indeed, for HiZ, i965 programs 
to use the HiZ auxiliary buffer even if the depth buffer is fully resolved (see 
inte_mipmap_tree_sample_with_hiz() in intel_mipmap_tree.c).

> In case of blorp, as far as I know all operations sampling something should 
> go thru blorp_surf_for_miptree(). Can you point out cases that don't?

Blorp is used in more than blorp_surf_for_miptree(), for example implementing 
GetTexImage(). Indeed, it is possible for blorp to sample from an ASTC5x5 (you 
can see this handled in the patch series I posted). I chose the hammer that the 
default is to just assume blorp is going to access auxiliary buffers unless a 
flag is set when the caller knows that blorp is going to sample from an astc5x5 
(against see my patch series).

>Right. In the case of sampling both aux and astc5x5 in the same draw cycle the 
>only thing to do is to disable aux. With my question of direction I meant the 
>texture 
> cache flush between two cycles. Do we need to flush in both cases
> 1) ASTC5x5 in first cycle and AUX in the following
> 2) AUX in first cycle and ASTC5x5 in the following

YES we need to flush in both cases. What is happening is that the sampler 
hardware is bugged. Let us suppose it was bugged in only 1 direction, take 1. 
Then if the sampler first samples from an ASTC5x5 then an AUX it would not 
hang, but the other way it would. However, if there are multiple draws in 
flight where one samples from an ASTC5x5 and the other does not, the command 
buffer order gives ZERO guarantee that the sampler will sample in that order 
because fragments get executed out-of-order even across draw calls even within 
a subslice (this is why sendc is needed at end of shader in GEN).

>>  4. With 3 in mind, using the bit-masks is not a good idea as we want to 
>> then enforce at the code level
>>   that only one of the two is possible without texture invalidates.
> Can you elaborate this a little more? It tells if aux is/was used and it 
> tells if astc5x5 is/was used. That is all we need, right?

WRONG. We must enforce that a given draw call can have neither or only one. By 
having bitmasks it is possible to support a state having both.

At any rate, please review the patch series I have posted and I am happy to 
take suggestions to improve that patch series that I have tested.

-Kevin
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] r600 ssbo/image fixes

2017-12-05 Thread Dave Airlie
I've been running deqp-gles31 over the r600 ssbo/image code
it uses compute shaders, but I've found a few bugs in the in-tree
code, so just sending some fixes out for those first.

ssbo seems to pass all the tests, images have some heisenbug
where they pass sometimes and not others.

Dave.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] r600/shader: fix thread id loading.

2017-12-05 Thread Dave Airlie
From: Dave Airlie 

This just changes how thread id loading is done, it makes
smaller shaders if we don't use thread id gprs.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 27 ++-
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 5d78e4f8ad..cf5d261007 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -355,6 +355,7 @@ struct r600_shader_ctx {
unsignedtess_input_info; /* temp with 
tess input offsets */
unsignedtess_output_info; /* temp with 
tess input offsets */
unsignedthread_id_gpr; /* temp with 
thread id calculated for images */
+   bool thread_id_gpr_loaded;
 };
 
 struct r600_shader_tgsi_instruction {
@@ -2981,6 +2982,9 @@ static int load_thread_id_gpr(struct r600_shader_ctx *ctx)
struct r600_bytecode_alu alu;
int r;
 
+   if (ctx->thread_id_gpr_loaded)
+   return 0;
+
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT;
alu.dst.sel = ctx->temp_reg;
@@ -3025,6 +3029,7 @@ static int load_thread_id_gpr(struct r600_shader_ctx *ctx)
   ctx->temp_reg, 0);
if (r)
return r;
+   ctx->thread_id_gpr_loaded = true;
return 0;
 }
 
@@ -3128,6 +3133,7 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
ctx.fragcoord_input = -1;
ctx.colors_used = 0;
ctx.clip_vertex_write = 0;
+   ctx.thread_id_gpr_loaded = false;
 
shader->nr_ps_color_exports = 0;
shader->nr_ps_max_color_exports = 0;
@@ -3235,11 +3241,10 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
ctx.temp_reg = ctx.bc->ar_reg + 3;
}
 
-   if (shader->uses_images && ctx.type == PIPE_SHADER_FRAGMENT) {
-   ctx.thread_id_gpr = ctx.temp_reg;
-   ctx.temp_reg++;
-   } else
-   ctx.thread_id_gpr = 0;
+   if (shader->uses_images) {
+   ctx.thread_id_gpr = ctx.temp_reg++;
+   ctx.thread_id_gpr_loaded = false;
+   }
 
shader->max_arrays = 0;
shader->num_arrays = 0;
@@ -3393,10 +3398,6 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
}
}
 
-   if (ctx.thread_id_gpr) {
-   load_thread_id_gpr(&ctx);
-   }
-
if (ctx.type == PIPE_SHADER_GEOMETRY) {
struct r600_bytecode_alu alu;
int r;
@@ -7990,6 +7991,10 @@ static int tgsi_load_rat(struct r600_shader_ctx *ctx)
unsigned rat_index_mode;
unsigned immed_base;
 
+   r = load_thread_id_gpr(ctx);
+   if (r)
+   return r;
+
rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // 
CF_INDEX_1 : CF_INDEX_NONE
 
immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
@@ -8233,6 +8238,10 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx 
*ctx)
immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
rat_base = ctx->shader->rat_base;
 
+   r = load_thread_id_gpr(ctx);
+   if (r)
+   return r;
+
 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
immed_base += ctx->info.file_count[TGSI_FILE_IMAGE];
rat_base += ctx->info.file_count[TGSI_FILE_IMAGE];
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] r600/ssbo: refactor out buffer coord calcs and use for atomic path.

2017-12-05 Thread Dave Airlie
From: Dave Airlie 

The atomic rat path has a bug in the ssbo path, refactor out the
address calcs from the load/store paths and reuse to fix the bug
in the buffer rat atomic path.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 71 ++
 1 file changed, 37 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index dbc75171fa..99d1cd3232 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -7905,22 +7905,13 @@ static int load_index_src(struct r600_shader_ctx *ctx, 
int src_index, int *idx_g
return 0;
 }
 
-static int tgsi_load_buffer(struct r600_shader_ctx *ctx)
+static int load_buffer_coord(struct r600_shader_ctx *ctx, int src_idx,
+int temp_reg)
 {
struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
-   /* have to work out the offset into the RAT immediate return buffer */
-   struct r600_bytecode_vtx vtx;
-   struct r600_bytecode_cf *cf;
int r;
-   int temp_reg = r600_get_temp(ctx);
-   unsigned rat_index_mode;
-   unsigned base;
-
-   rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // 
CF_INDEX_1 : CF_INDEX_NONE
-   base = R600_IMAGE_REAL_RESOURCE_OFFSET + 
ctx->info.file_count[TGSI_FILE_IMAGE];
-
-   if (inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE) {
-   int value = (ctx->literals[4 * inst->Src[1].Register.Index + 
inst->Src[1].Register.SwizzleX]);
+   if (inst->Src[src_idx].Register.File == TGSI_FILE_IMMEDIATE) {
+   int value = (ctx->literals[4 * 
inst->Src[src_idx].Register.Index + inst->Src[src_idx].Register.SwizzleX]);
r = single_alu_op2(ctx, ALU_OP1_MOV,
   temp_reg, 0,
   V_SQ_ALU_SRC_LITERAL, value >> 2,
@@ -7931,7 +7922,7 @@ static int tgsi_load_buffer(struct r600_shader_ctx *ctx)
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP2_LSHR_INT;
-   r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
+   r600_bytecode_src(&alu.src[0], &ctx->src[src_idx], 0);
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = 2;
alu.dst.sel = temp_reg;
@@ -7941,6 +7932,26 @@ static int tgsi_load_buffer(struct r600_shader_ctx *ctx)
if (r)
return r;
}
+   return 0;
+}
+
+static int tgsi_load_buffer(struct r600_shader_ctx *ctx)
+{
+   struct tgsi_full_instruction *inst = 
&ctx->parse.FullToken.FullInstruction;
+   /* have to work out the offset into the RAT immediate return buffer */
+   struct r600_bytecode_vtx vtx;
+   struct r600_bytecode_cf *cf;
+   int r;
+   int temp_reg = r600_get_temp(ctx);
+   unsigned rat_index_mode;
+   unsigned base;
+
+   rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // 
CF_INDEX_1 : CF_INDEX_NONE
+   base = R600_IMAGE_REAL_RESOURCE_OFFSET + 
ctx->info.file_count[TGSI_FILE_IMAGE];
+
+   r = load_buffer_coord(ctx, 1, temp_reg);
+   if (r)
+   return r;
ctx->bc->cf_last->barrier = 1;
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
vtx.op = FETCH_OP_VFETCH;
@@ -8078,22 +8089,9 @@ static int tgsi_store_buffer_rat(struct r600_shader_ctx 
*ctx)
int lasti;
int temp_reg = r600_get_temp(ctx), treg2 = r600_get_temp(ctx);
 
-   if (inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE) {
-   int value = (ctx->literals[4 * inst->Src[0].Register.Index + 
inst->Src[0].Register.SwizzleX]);
-   r = single_alu_op2(ctx, ALU_OP1_MOV,
-  treg2, 0,
-  V_SQ_ALU_SRC_LITERAL, value >> 2,
-  0, 0);
-   if (r)
-   return r;
-   } else {
-   r = single_alu_op2(ctx, ALU_OP2_LSHR_INT,
-  treg2, 0,
-  ctx->src[0].sel, ctx->src[0].swizzle[0],
-  V_SQ_ALU_SRC_LITERAL, 2);
-   if (r)
-   return r;
-   }
+   r = load_buffer_coord(ctx, 0, treg2);
+   if (r)
+   return r;
 
rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // 
CF_INDEX_1 : CF_INDEX_NONE
if (rat_index_mode)
@@ -8247,14 +8245,19 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx 
*ctx)
 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
immed_base += ctx->info.file_count[TGSI_FILE_IMAGE];
rat_base += ctx->info.file_count[TGSI_FILE_IMAGE];
+
+   r = load_buffer_coord(ctx, 1, ctx->temp_reg);
+   if (r)
+  

[Mesa-dev] [PATCH 3/6] r600: refactor out the immediate setup code.

2017-12-05 Thread Dave Airlie
From: Dave Airlie 

This just refactors the same code out of the images/buffers paths.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/evergreen_state.c | 66 +-
 1 file changed, 28 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 4a5c1aa6ae..89b6c3bb88 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3852,6 +3852,32 @@ static void evergreen_set_tess_state(struct pipe_context 
*ctx,
rctx->tess_state_dirty = true;
 }
 
+static void evergreen_setup_immed_buffer(struct r600_context *rctx,
+struct r600_image_view *rview,
+enum pipe_format pformat)
+{
+   struct r600_screen *rscreen = (struct r600_screen *)rctx->b.b.screen;
+   uint32_t immed_size = rscreen->b.info.max_se * 256 * 64 * 
util_format_get_blocksize(pformat);
+   struct eg_buf_res_params buf_params;
+   bool skip_reloc = false;
+   struct r600_resource *resource = (struct r600_resource 
*)rview->base.resource;
+   if (!resource->immed_buffer) {
+   eg_resource_alloc_immed(&rscreen->b, resource, immed_size);
+   }
+
+   memset(&buf_params, 0, sizeof(buf_params));
+   buf_params.pipe_format = pformat;
+   buf_params.size = resource->immed_buffer->b.b.width0;
+   buf_params.swizzle[0] = PIPE_SWIZZLE_X;
+   buf_params.swizzle[1] = PIPE_SWIZZLE_Y;
+   buf_params.swizzle[2] = PIPE_SWIZZLE_Z;
+   buf_params.swizzle[3] = PIPE_SWIZZLE_W;
+   buf_params.uncached = 1;
+   evergreen_fill_buffer_resource_words(rctx, &resource->immed_buffer->b.b,
+&buf_params, &skip_reloc,
+rview->immed_resource_words);
+}
+
 static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
unsigned start_slot,
unsigned count,
@@ -3890,7 +3916,6 @@ static void evergreen_set_shader_buffers(struct 
pipe_context *ctx,
 const struct pipe_shader_buffer 
*buffers)
 {
struct r600_context *rctx = (struct r600_context *)ctx;
-   struct r600_screen *rscreen = (struct r600_screen *)ctx->screen;
struct r600_image_state *istate = NULL;
struct r600_image_view *rview;
struct r600_tex_color_info color;
@@ -3898,7 +3923,6 @@ static void evergreen_set_shader_buffers(struct 
pipe_context *ctx,
struct r600_resource *resource;
int i, idx;
unsigned old_mask;
-   bool skip_reloc = false;
 
if (shader != PIPE_SHADER_FRAGMENT && count == 0)
return;
@@ -3923,11 +3947,8 @@ static void evergreen_set_shader_buffers(struct 
pipe_context *ctx,
pipe_resource_reference((struct pipe_resource 
**)&rview->base.resource, buf->buffer);
 
resource = (struct r600_resource *)rview->base.resource;
-   if (!resource->immed_buffer) {
-   int immed_size = (rscreen->b.info.max_se * 256 * 64) * 
util_format_get_blocksize(resource->b.b.format);
 
-   eg_resource_alloc_immed(&rscreen->b, resource, 
immed_size);
-   }
+   evergreen_setup_immed_buffer(rctx, rview, resource->b.b.format);
 
color.offset = 0;
color.view = 0;
@@ -3951,18 +3972,6 @@ static void evergreen_set_shader_buffers(struct 
pipe_context *ctx,
rview->cb_color_fmask = color.fmask;
rview->cb_color_fmask_slice = color.fmask_slice;
 
-   memset(&buf_params, 0, sizeof(buf_params));
-   buf_params.pipe_format = resource->b.b.format;
-   buf_params.size = resource->immed_buffer->b.b.width0;
-   buf_params.swizzle[0] = PIPE_SWIZZLE_X;
-   buf_params.swizzle[1] = PIPE_SWIZZLE_Y;
-   buf_params.swizzle[2] = PIPE_SWIZZLE_Z;
-   buf_params.swizzle[3] = PIPE_SWIZZLE_W;
-   buf_params.uncached = 1;
-   evergreen_fill_buffer_resource_words(rctx, 
&resource->immed_buffer->b.b,
-&buf_params, &skip_reloc,
-
rview->immed_resource_words);
-
memset(&buf_params, 0, sizeof(buf_params));
buf_params.pipe_format = PIPE_FORMAT_R32_FLOAT;
buf_params.offset = buf->buffer_offset;
@@ -4000,7 +4009,6 @@ static void evergreen_set_shader_images(struct 
pipe_context *ctx,
const struct pipe_image_view *images)
 {
struct r600_context *rctx = (struct r600_context *)ctx;
-   struct r600_screen *rscreen = (struct r600_screen *)ctx->screen;
int i;
struct r600_imag

[Mesa-dev] [PATCH 5/6] r600/ssbo: fix multi-dword buffer loads.

2017-12-05 Thread Dave Airlie
From: Dave Airlie 

This fixes loading from different channels.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 11372595fb..dbc75171fa 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -7958,17 +7958,19 @@ static int tgsi_load_buffer(struct r600_shader_ctx *ctx)
vtx.format_comp_all = 1;
vtx.srf_mode_all = 0;
 
-   if (inst->Dst[0].Register.WriteMask == 0xf) {
+   if (inst->Dst[0].Register.WriteMask & 8) {
vtx.data_format = FMT_32_32_32_32;
vtx.use_const_fields = 0;
-   } else if (inst->Dst[0].Register.WriteMask == 0x7) {
+   } else if (inst->Dst[0].Register.WriteMask & 4) {
vtx.data_format = FMT_32_32_32;
vtx.use_const_fields = 0;
-   } else if (inst->Dst[0].Register.WriteMask == 0x3) {
+   } else if (inst->Dst[0].Register.WriteMask & 2) {
vtx.data_format = FMT_32_32;
vtx.use_const_fields = 0;
-   } else
-   vtx.use_const_fields = 1;
+   } else {
+   vtx.data_format = FMT_32;
+   vtx.use_const_fields = 0;
+   }
 
r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
if (r)
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] r600/shader: fix ssbo atomic operations formats.

2017-12-05 Thread Dave Airlie
From: Dave Airlie 

Don't try and use the image format for ssbo, just 32-bit uint.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/r600_shader.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index cf5d261007..11372595fb 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -8313,10 +8313,19 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx 
*ctx)
cf->barrier = 1;
cf->cf_addr = 1;
 
-   desc = util_format_description(inst->Memory.Format);
-   r600_vertex_data_type(inst->Memory.Format,
- &format, &num_format, &format_comp, &endian);
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+   if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
+   desc = util_format_description(inst->Memory.Format);
+   r600_vertex_data_type(inst->Memory.Format,
+ &format, &num_format, &format_comp, 
&endian);
+   vtx.dst_sel_x = desc->swizzle[0];
+   } else {
+   format = FMT_32;
+   num_format = 1;
+   format_comp = 0;
+   endian = 0;
+   vtx.dst_sel_x = 0;
+   }
vtx.op = FETCH_OP_VFETCH;
vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
vtx.buffer_index_mode = rat_index_mode;
@@ -8324,7 +8333,6 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
vtx.src_gpr = ctx->thread_id_gpr;
vtx.src_sel_x = 1;
vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + 
inst->Dst[0].Register.Index;
-   vtx.dst_sel_x = desc->swizzle[0];
vtx.dst_sel_y = 7;
vtx.dst_sel_z = 7;
vtx.dst_sel_w = 7;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] r600/ssbo: use r32ui format for ssbo resources.

2017-12-05 Thread Dave Airlie
From: Dave Airlie 

This works best for returning the correct values and sizes in
tests.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/r600/evergreen_state.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 89b6c3bb88..12662c831b 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3948,12 +3948,12 @@ static void evergreen_set_shader_buffers(struct 
pipe_context *ctx,
 
resource = (struct r600_resource *)rview->base.resource;
 
-   evergreen_setup_immed_buffer(rctx, rview, resource->b.b.format);
+   evergreen_setup_immed_buffer(rctx, rview, PIPE_FORMAT_R32_UINT);
 
color.offset = 0;
color.view = 0;
evergreen_set_color_surface_buffer(rctx, resource,
-  PIPE_FORMAT_R32_FLOAT,
+  PIPE_FORMAT_R32_UINT,
   buf->buffer_offset,
   buf->buffer_offset + 
buf->buffer_size,
   &color);
@@ -3973,7 +3973,7 @@ static void evergreen_set_shader_buffers(struct 
pipe_context *ctx,
rview->cb_color_fmask_slice = color.fmask_slice;
 
memset(&buf_params, 0, sizeof(buf_params));
-   buf_params.pipe_format = PIPE_FORMAT_R32_FLOAT;
+   buf_params.pipe_format = PIPE_FORMAT_R32_UINT;
buf_params.offset = buf->buffer_offset;
buf_params.size = buf->buffer_size;
buf_params.swizzle[0] = PIPE_SWIZZLE_X;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Pohjolainen, Topi
On Tue, Dec 05, 2017 at 10:26:33AM +, Rogovin, Kevin wrote:
> Hi,
> 
> 
> >> Here are my comments of the patch posted:
> >> 
> >>  1.  it is essentially replication and moving around of the code of the 
> >> patch series posted earlier but missing various
> >>   important bits: preventing the sampler from using the auxiliary 
> >> buffer (this requires to modify surface state
> >>   sent in brw_wm_surfaces.c). It also does not cover blorp 
> >> sufficiently (blorp might read from an ASTC5x5
> >>   and there are more paths in blorp than blorp_surf_for_miptree() that 
> >> sample from surfaces.
> >> 
> 
> >Can you explain both more in detail? Resolves done in
> >brw_predraw_resolve_inputs() mean that there is nothing interesting in the 
> >aux buffers and surface setup won't therefore enable auxiliary for texture 
> >surfaces.
> 
> That there is nothing interesting is irrelevant to the sampler if the 
> SURFACE_STATE fed includes the auxiliary buffer, thus when one sets up the 
> SURFACE_STATE for sampler, the auxiliary buffer cannot be mentioned in the 
> GPU command; The sampler will always try to read the auxiliary buffer if it 
> is given the opportunity to do so. Indeed, it is quite feasible that less 
> bandwidth is consumed if the sampler is given the chance to read an auxiliary 
> buffer in place of the buffer; as such even if the surface is resolved one 
> may wish to feed the sampler the auxiliary buffer. Indeed, for HiZ, i965 
> programs to use the HiZ auxiliary buffer even if the depth buffer is fully 
> resolved (see inte_mipmap_tree_sample_with_hiz() in intel_mipmap_tree.c).

If you take a look at brw_update_texture_surface(), just in the end before
brw_emit_surface_state() the logic explictly consults for
intel_miptree_texture_aux_usage(). This in turn tells if the auxiliary buffer
is resolved and it doesn't need to be programmed.

> 
> > In case of blorp, as far as I know all operations sampling something should 
> > go thru blorp_surf_for_miptree(). Can you point out cases that don't?
> 
> Blorp is used in more than blorp_surf_for_miptree(), for example implementing 
> GetTexImage(). Indeed, it is possible for blorp to sample from an ASTC5x5 
> (you can see this handled in the patch series I posted). I chose the hammer 
> that the default is to just assume blorp is going to access auxiliary buffers 
> unless a flag is set when the caller knows that blorp is going to sample from 
> an astc5x5 (against see my patch series).

This path goes thru brw_blorp_download_miptree() which in turn takes either
brw_blorp_copy_miptrees() or brw_blorp_blit_miptrees(). Both in turn consult
blorp_surf_for_miptree().

> 
> >Right. In the case of sampling both aux and astc5x5 in the same draw cycle 
> >the only thing to do is to disable aux. With my question of direction I 
> >meant the texture 
> > cache flush between two cycles. Do we need to flush in both cases
> > 1) ASTC5x5 in first cycle and AUX in the following
> > 2) AUX in first cycle and ASTC5x5 in the following
> 
> YES we need to flush in both cases. What is happening is that the sampler 
> hardware is bugged. Let us suppose it was bugged in only 1 direction, take 1. 
> Then if the sampler first samples from an ASTC5x5 then an AUX it would not 
> hang, but the other way it would. However, if there are multiple draws in 
> flight where one samples from an ASTC5x5 and the other does not, the command 
> buffer order gives ZERO guarantee that the sampler will sample in that order 
> because fragments get executed out-of-order even across draw calls even 
> within a subslice (this is why sendc is needed at end of shader in GEN).

This would be a nice a piece of documentation to add into the code. Thanks
for explaining.

> 
> >>  4. With 3 in mind, using the bit-masks is not a good idea as we want to 
> >> then enforce at the code level
> >>   that only one of the two is possible without texture invalidates.
> > Can you elaborate this a little more? It tells if aux is/was used and it 
> > tells if astc5x5 is/was used. That is all we need, right?
> 
> WRONG. We must enforce that a given draw call can have neither or only one. 
> By having bitmasks it is possible to support a state having both.

I don't see how the bit mask prevents one from forcing anything. I now do see
a flaw in the RFC related to this. In brw_predraw_resolve_inputs() one would
actually need to smash down the recorded AUX mask bit when it reacts to ASTC5x5
being present.

> 
> At any rate, please review the patch series I have posted and I am happy to 
> take suggestions to improve that patch series that I have tested.

Well, if nothing else, I would really like to see you used
brw_predraw_resolve_inputs() for the resolves.

> 
> -Kevin
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nvir/gm107: iterate over all defs in SchedDataCalculatorGM107::findFirstUse

2017-12-05 Thread Karol Herbst
In the sched data calculator we have to track first use of defs by iterating
over all defs of an instruction, not just the first one.

Signed-off-by: Karol Herbst 
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 26 --
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index af7e2d4fd8..db1585818c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -3974,18 +3974,20 @@ SchedDataCalculatorGM107::findFirstUse(const 
Instruction *bari) const
 
   for (int s = 0; insn->srcExists(s); ++s) {
  const Value *src = insn->src(s).rep();
- if (bari->def(0).getFile() == FILE_GPR) {
-if (insn->src(s).getFile() != FILE_GPR ||
-src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
-src->reg.data.id > maxGPR)
-   continue;
-return insn;
- } else
- if (bari->def(0).getFile() == FILE_PREDICATE) {
-if (insn->src(s).getFile() != FILE_PREDICATE ||
-src->reg.data.id != minGPR)
-   continue;
-return insn;
+ for (int d = 0; bari->defExists(d); ++d) {
+if (bari->def(d).getFile() == FILE_GPR) {
+   if (insn->src(s).getFile() != FILE_GPR ||
+   src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
+   src->reg.data.id > maxGPR)
+  continue;
+   return insn;
+} else
+if (bari->def(d).getFile() == FILE_PREDICATE) {
+   if (insn->src(s).getFile() != FILE_PREDICATE ||
+   src->reg.data.id != minGPR)
+  continue;
+   return insn;
+}
  }
   }
}
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] nvir/gm107: consider FILE_FLAGS dependencies in SchedDataCalculatorGM107

2017-12-05 Thread Karol Herbst
currently while insterting barriers, writes and reads to FILE_FLAGS aren't
considered. This can lead to WaR hazards in some situations.

Fixes OpenCL CTS tests on Maxwell+:
basic/test_basic.intmath_long.LONG_MAD
basic/test_basic.intmath_long2.LONG_MAD
basic/test_basic.intmath_long4.LONG_MAD

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index db1585818c..cd75d531df 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -3949,6 +3949,7 @@ SchedDataCalculatorGM107::needWrDepBar(const Instruction 
*insn) const
 
for (int d = 0; insn->defExists(d); ++d) {
   if (insn->def(d).getFile() == FILE_GPR ||
+  insn->def(d).getFile() == FILE_FLAGS ||
   insn->def(d).getFile() == FILE_PREDICATE)
  return true;
}
@@ -3988,6 +3989,9 @@ SchedDataCalculatorGM107::findFirstUse(const Instruction 
*bari) const
   continue;
return insn;
 }
+if (bari->def(d).getFile() == FILE_FLAGS) {
+   return insn;
+}
  }
   }
}
@@ -4007,7 +4011,8 @@ SchedDataCalculatorGM107::findFirstDef(const Instruction 
*bari) const
 
   for (int d = 0; insn->defExists(d); ++d) {
  const Value *def = insn->def(d).rep();
- if (insn->def(d).getFile() != FILE_GPR)
+ if (insn->def(d).getFile() != FILE_GPR &&
+ insn->def(d).getFile() != FILE_FLAGS)
 continue;
 
  minGPR = def->reg.data.id;
@@ -4015,7 +4020,11 @@ SchedDataCalculatorGM107::findFirstDef(const Instruction 
*bari) const
 
  for (int s = 0; bari->srcExists(s); ++s) {
 const Value *src = bari->src(s).rep();
+if (bari->src(s).getFile() == FILE_FLAGS &&
+insn->def(d).getFile() == FILE_FLAGS)
+   return insn;
 if (bari->src(s).getFile() != FILE_GPR ||
+insn->def(d).getFile() != FILE_GPR ||
 src->reg.data.id + src->reg.size / 4 - 1 < minGPR ||
 src->reg.data.id > maxGPR)
continue;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Pohjolainen, Topi
On Tue, Dec 05, 2017 at 01:00:28PM +0200, Pohjolainen, Topi wrote:
> On Tue, Dec 05, 2017 at 10:26:33AM +, Rogovin, Kevin wrote:
> > Hi,
> > 
> > 
> > >> Here are my comments of the patch posted:
> > >> 
> > >>  1.  it is essentially replication and moving around of the code of the 
> > >> patch series posted earlier but missing various
> > >>   important bits: preventing the sampler from using the auxiliary 
> > >> buffer (this requires to modify surface state
> > >>   sent in brw_wm_surfaces.c). It also does not cover blorp 
> > >> sufficiently (blorp might read from an ASTC5x5
> > >>   and there are more paths in blorp than blorp_surf_for_miptree() 
> > >> that sample from surfaces.
> > >> 
> > 
> > >Can you explain both more in detail? Resolves done in
> > >brw_predraw_resolve_inputs() mean that there is nothing interesting in the 
> > >aux buffers and surface setup won't therefore enable auxiliary for texture 
> > >surfaces.
> > 
> > That there is nothing interesting is irrelevant to the sampler if the 
> > SURFACE_STATE fed includes the auxiliary buffer, thus when one sets up the 
> > SURFACE_STATE for sampler, the auxiliary buffer cannot be mentioned in the 
> > GPU command; The sampler will always try to read the auxiliary buffer if it 
> > is given the opportunity to do so. Indeed, it is quite feasible that less 
> > bandwidth is consumed if the sampler is given the chance to read an 
> > auxiliary buffer in place of the buffer; as such even if the surface is 
> > resolved one may wish to feed the sampler the auxiliary buffer. Indeed, for 
> > HiZ, i965 programs to use the HiZ auxiliary buffer even if the depth buffer 
> > is fully resolved (see inte_mipmap_tree_sample_with_hiz() in 
> > intel_mipmap_tree.c).
> 
> If you take a look at brw_update_texture_surface(), just in the end before
> brw_emit_surface_state() the logic explictly consults for
> intel_miptree_texture_aux_usage(). This in turn tells if the auxiliary buffer
> is resolved and it doesn't need to be programmed.
> 
> > 
> > > In case of blorp, as far as I know all operations sampling something 
> > > should go thru blorp_surf_for_miptree(). Can you point out cases that 
> > > don't?
> > 
> > Blorp is used in more than blorp_surf_for_miptree(), for example 
> > implementing GetTexImage(). Indeed, it is possible for blorp to sample from 
> > an ASTC5x5 (you can see this handled in the patch series I posted). I chose 
> > the hammer that the default is to just assume blorp is going to access 
> > auxiliary buffers unless a flag is set when the caller knows that blorp is 
> > going to sample from an astc5x5 (against see my patch series).
> 
> This path goes thru brw_blorp_download_miptree() which in turn takes either
> brw_blorp_copy_miptrees() or brw_blorp_blit_miptrees(). Both in turn consult
> blorp_surf_for_miptree().
> 
> > 
> > >Right. In the case of sampling both aux and astc5x5 in the same draw cycle 
> > >the only thing to do is to disable aux. With my question of direction I 
> > >meant the texture 
> > > cache flush between two cycles. Do we need to flush in both cases
> > > 1) ASTC5x5 in first cycle and AUX in the following
> > > 2) AUX in first cycle and ASTC5x5 in the following
> > 
> > YES we need to flush in both cases. What is happening is that the sampler 
> > hardware is bugged. Let us suppose it was bugged in only 1 direction, take 
> > 1. Then if the sampler first samples from an ASTC5x5 then an AUX it would 
> > not hang, but the other way it would. However, if there are multiple draws 
> > in flight where one samples from an ASTC5x5 and the other does not, the 
> > command buffer order gives ZERO guarantee that the sampler will sample in 
> > that order because fragments get executed out-of-order even across draw 
> > calls even within a subslice (this is why sendc is needed at end of shader 
> > in GEN).
> 
> This would be a nice a piece of documentation to add into the code. Thanks
> for explaining.
> 
> > 
> > >>  4. With 3 in mind, using the bit-masks is not a good idea as we want to 
> > >> then enforce at the code level
> > >>   that only one of the two is possible without texture invalidates.
> > > Can you elaborate this a little more? It tells if aux is/was used and it 
> > > tells if astc5x5 is/was used. That is all we need, right?
> > 
> > WRONG. We must enforce that a given draw call can have neither or only one. 
> > By having bitmasks it is possible to support a state having both.
> 
> I don't see how the bit mask prevents one from forcing anything. I now do see
> a flaw in the RFC related to this. In brw_predraw_resolve_inputs() one would
> actually need to smash down the recorded AUX mask bit when it reacts to 
> ASTC5x5
> being present.
> 
> > 
> > At any rate, please review the patch series I have posted and I am happy to 
> > take suggestions to improve that patch series that I have tested.
> 
> Well, if nothing else, I would really like to see you used
> brw_predraw_resolve_in

Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Rogovin, Kevin

> If you take a look at brw_update_texture_surface(), just in the end before
> brw_emit_surface_state() the logic explictly consults for 
> intel_miptree_texture_aux_usage(). 
> This in turn tells if the auxiliary buffer is resolved and it doesn't need to 
> be programmed.

The full stack trace is this:

 brw_update_texture_surface() calls intel_miptree_texture_usage() which for HiZ 
calls the function intel_miptree_sample_with_hiz() which, as the name suggest, 
returns true if and only if it is ok to use the HiZ to speed up depth texture 
reads. Indeed, the function calls intel_miptree_texture_usage() switches on 
intel_mipmap_tree::aux_usage which the documentation states is about the 
intended usage of the auxiliary buffer. Indeed, if the value is 
ISL_AUX_USAGE_NONE that means, quoting the comment tag above the field, "that 
auxiliary compression is permanently disabled". The conclusion then is that 
even if a buffer is fully resolved, the value of aux_usage is not 
ISL_AUX_USAGE_NONE and that the return value of calls 
intel_miptree_texture_usage() gives the return value assuming that the 
auxiliary buffer can be used with respect to that it holds good values enough 
for the sampler.

>This path goes thru brw_blorp_download_miptree() which in turn takes either
> brw_blorp_copy_miptrees() or brw_blorp_blit_miptrees(). Both in turn consult 
> blorp_surf_for_miptree().

If you are 100% sure (because I am not) that ALL blorp paths walk through that, 
then I have no real objection except it needs to do the magic of checking if it 
is reading from an ASTC5x5 or a surface with an auxiliary and update the 
enumeration appropriately.

> This would be a nice a piece of documentation to add into the code. Thanks 
> for explaining.

I can add this, though I do freely admit I take this for granted and an axiom 
of HW accelerated graphics.

>I don't see how the bit mask prevents one from forcing anything.

By not being able to encode such a state (both are present) such a state is 
impossible to store and cannot be reached. From the point of view of code, an 
enum is slightly more pleasant to read than bitmaks IMO.

> I now do see a flaw in the RFC related to this. In 
> brw_predraw_resolve_inputs() one would 
> actually need to smash down the recorded AUX mask bit when it reacts to 
> ASTC5x5 being present.

Indeed, really two passes are needed over the textures. The first pass to 
detect if ASTC5x5 is present and a second to resolve auxiliary using textures 
if they are present.

>Well, if nothing else, I would really like to see you used 
>brw_predraw_resolve_inputs() for the resolves.

I am happy with that as that walks through the textures anyways BUT it is 
called before brw_predraw_resolve_framebuffer() which might do some resolving 
too. The easy way out is to permute their calling order unless there is some 
other dangling assumption preventing permuting the call order of those two.

> 
> -Kevin
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Fwd: errors for mesa master Android build 1698

2017-12-05 Thread Emil Velikov
On 4 December 2017 at 23:12, Rob Herring  wrote:
> Looks like Jason is the actual culprit here. Unfortunately, the build
> can't keep up with every commit. Looks to me like these should be
> fixed.
>
> I wonder how long until I give up and just set Android back to -Wno-error...
>
Please go back to -Wno-error. In practise it's impossible to keep it working.
Everyone uses different compiler and/or versions, which in turn
produce different warnings.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Android: enable noreturn and returns_nonnull attributes

2017-12-05 Thread Emil Velikov
On 5 December 2017 at 02:21, Rob Herring  wrote:
> Commit 94ca8e04adf6 ("spirv: Add vtn_fail and vtn_assert helpers") broke
> Android builds which have -Werror enabled with the following errors:
>
> external/mesa3d/src/compiler/spirv/spirv_to_nir.c:272:1: error: control may 
> reach end of non-void function [-Werror,-Wreturn-type]
> external/mesa3d/src/compiler/spirv/spirv_to_nir.c:810:1: error: control may 
> reach end of non-void function [-Werror,-Wreturn-type]
> ...
>
> The problem is the noreturn attribute is not enabled and we to define
> HAVE_FUNC_ATTRIBUTE_NORETURN.
>
> Auditing src/util/macros.h, we're also missing
> HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL, so add it too.
>
> Fixes: 94ca8e04adf6 ("spirv: Add vtn_fail and vtn_assert helpers")
> Cc: Jason Ekstrand 
> Signed-off-by: Rob Herring 
Reviewed-by: Emil Velikov 

The following shows the macros not set on Android. Feel free to check
if they're supported and toggle them on.

$ for i in `git grep "if.*def.*HAVE_FUNC_ATTRIBUTE_" origin/master  --
 | grep -o "HAVE_FUNC_[A-Z_]*" | sort -u`; do git grep $i
origin/master  | grep
-qi android || echo missing $i; done
missing HAVE_FUNC_ATTRIBUTE_CONST
missing HAVE_FUNC_ATTRIBUTE_MALLOC
missing HAVE_FUNC_ATTRIBUTE_NORETURN
missing HAVE_FUNC_ATTRIBUTE_PURE
missing HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL
missing HAVE_FUNC_ATTRIBUTE_VISIBILITY
missing HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT
missing HAVE_FUNC_ATTRIBUTE_WEAK

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Android: gallium/radeon: fix libmesa_amd_common dependency

2017-12-05 Thread Emil Velikov
On 4 December 2017 at 15:13, Marek Olšák  wrote:
> Reviewed-by: Marek Olšák 
>
... and pushed to master.

Thanks gents.
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] drirc: add option to disable ARB_draw_indirect

2017-12-05 Thread Rob Clark
This is a bit sad/annoying.  But with current GPU firmware (at least on
a5xx) we can support both draw-indirect and base-instance.  But we can't
support draw-indirect with a non-zero base-instance specified.  So add a
driconf option to hide the extension from games that are known to use
both.

Signed-off-by: Rob Clark 
---
Tbh, I'm also not really sure what to do when/if we got updated firmware
which handled draw-indirect with base-instance, since we'd need to make
this option conditional on fw version.  For STK that probably isn't a
big deal since it doesn't use draw-indirect in a particularly useful way
(the indirect buffer is generated on CPU).

 src/gallium/auxiliary/pipe-loader/driinfo_gallium.h |  1 +
 src/gallium/include/state_tracker/st_api.h  |  1 +
 src/gallium/state_trackers/dri/dri_screen.c |  2 ++
 src/mesa/state_tracker/st_extensions.c  |  3 +++
 src/util/drirc  | 10 ++
 src/util/xmlpool/t_options.h|  5 +
 6 files changed, 22 insertions(+)

diff --git a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h 
b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
index 003a3d7089e..9c1705bd9a8 100644
--- a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
+++ b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
@@ -32,4 +32,5 @@ DRI_CONF_SECTION_END
 DRI_CONF_SECTION_MISCELLANEOUS
DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false")
DRI_CONF_GLSL_ZERO_INIT("false")
+   DRI_CONF_DISABLE_DRAW_INDIRECT("false")
 DRI_CONF_SECTION_END
diff --git a/src/gallium/include/state_tracker/st_api.h 
b/src/gallium/include/state_tracker/st_api.h
index 44d6b474f8f..20a7843992a 100644
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -217,6 +217,7 @@ struct st_config_options
boolean disable_blend_func_extended;
boolean disable_glsl_line_continuations;
boolean disable_shader_bit_encoding;
+   boolean disable_draw_indirect;
boolean force_glsl_extensions_warn;
unsigned force_glsl_version;
boolean allow_glsl_extension_directive_midshader;
diff --git a/src/gallium/state_trackers/dri/dri_screen.c 
b/src/gallium/state_trackers/dri/dri_screen.c
index 1ca511612ad..035406a771e 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -68,6 +68,8 @@ dri_fill_st_options(struct dri_screen *screen)
   driQueryOptionb(optionCache, "disable_glsl_line_continuations");
options->disable_shader_bit_encoding =
   driQueryOptionb(optionCache, "disable_shader_bit_encoding");
+   options->disable_draw_indirect =
+  driQueryOptionb(optionCache, "disable_draw_indirect");
options->force_glsl_extensions_warn =
   driQueryOptionb(optionCache, "force_glsl_extensions_warn");
options->force_glsl_version =
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 9ef0df1e926..0c7e7e3abf1 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -909,6 +909,9 @@ void st_init_extensions(struct pipe_screen *screen,
   }
}
 
+   if (options->disable_draw_indirect)
+  extensions->ARB_draw_indirect = GL_FALSE;
+
/* Expose the extensions which directly correspond to gallium formats. */
init_format_extensions(screen, extensions, rendertarget_mapping,
   ARRAY_SIZE(rendertarget_mapping), PIPE_TEXTURE_2D,
diff --git a/src/util/drirc b/src/util/drirc
index 9d27330036e..cea80ecc3d6 100644
--- a/src/util/drirc
+++ b/src/util/drirc
@@ -275,4 +275,14 @@ TODO: document the other workarounds.
 
 
 
+
+
+
+
+
+
 
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index bd553085c86..0d1c597a830 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -379,6 +379,11 @@ DRI_CONF_OPT_BEGIN_B(glsl_zero_init, def) \
 DRI_CONF_DESC(en,gettext("Force uninitialized variables to default to 
zero")) \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_DISABLE_DRAW_INDIRECT(def) \
+DRI_CONF_OPT_BEGIN_B(disable_draw_indirect, def) \
+   DRI_CONF_DESC(en, gettext("Disable the GL_ARB_draw_indirect extension")) \
+DRI_CONF_OPT_END
+
 /**
  * \brief Initialization configuration options
  */
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] drirc: add option to disable ARB_draw_indirect

2017-12-05 Thread Ilia Mirkin
On Tue, Dec 5, 2017 at 7:54 AM, Rob Clark  wrote:
> This is a bit sad/annoying.  But with current GPU firmware (at least on
> a5xx) we can support both draw-indirect and base-instance.  But we can't
> support draw-indirect with a non-zero base-instance specified.  So add a

That means you should only expose one of those features, probably the
draw-indirect one. If someone desperately wants that functionality,
they can force it with MESA_EXTENSION_OVERRIDE. ARB_base_instance has
a specc'd interaction with ARB_indirect_draw...

  -ilia

> driconf option to hide the extension from games that are known to use
> both.
>
> Signed-off-by: Rob Clark 
> ---
> Tbh, I'm also not really sure what to do when/if we got updated firmware
> which handled draw-indirect with base-instance, since we'd need to make
> this option conditional on fw version.  For STK that probably isn't a
> big deal since it doesn't use draw-indirect in a particularly useful way
> (the indirect buffer is generated on CPU).
>
>  src/gallium/auxiliary/pipe-loader/driinfo_gallium.h |  1 +
>  src/gallium/include/state_tracker/st_api.h  |  1 +
>  src/gallium/state_trackers/dri/dri_screen.c |  2 ++
>  src/mesa/state_tracker/st_extensions.c  |  3 +++
>  src/util/drirc  | 10 ++
>  src/util/xmlpool/t_options.h|  5 +
>  6 files changed, 22 insertions(+)
>
> diff --git a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h 
> b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
> index 003a3d7089e..9c1705bd9a8 100644
> --- a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
> +++ b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
> @@ -32,4 +32,5 @@ DRI_CONF_SECTION_END
>  DRI_CONF_SECTION_MISCELLANEOUS
> DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false")
> DRI_CONF_GLSL_ZERO_INIT("false")
> +   DRI_CONF_DISABLE_DRAW_INDIRECT("false")
>  DRI_CONF_SECTION_END
> diff --git a/src/gallium/include/state_tracker/st_api.h 
> b/src/gallium/include/state_tracker/st_api.h
> index 44d6b474f8f..20a7843992a 100644
> --- a/src/gallium/include/state_tracker/st_api.h
> +++ b/src/gallium/include/state_tracker/st_api.h
> @@ -217,6 +217,7 @@ struct st_config_options
> boolean disable_blend_func_extended;
> boolean disable_glsl_line_continuations;
> boolean disable_shader_bit_encoding;
> +   boolean disable_draw_indirect;
> boolean force_glsl_extensions_warn;
> unsigned force_glsl_version;
> boolean allow_glsl_extension_directive_midshader;
> diff --git a/src/gallium/state_trackers/dri/dri_screen.c 
> b/src/gallium/state_trackers/dri/dri_screen.c
> index 1ca511612ad..035406a771e 100644
> --- a/src/gallium/state_trackers/dri/dri_screen.c
> +++ b/src/gallium/state_trackers/dri/dri_screen.c
> @@ -68,6 +68,8 @@ dri_fill_st_options(struct dri_screen *screen)
>driQueryOptionb(optionCache, "disable_glsl_line_continuations");
> options->disable_shader_bit_encoding =
>driQueryOptionb(optionCache, "disable_shader_bit_encoding");
> +   options->disable_draw_indirect =
> +  driQueryOptionb(optionCache, "disable_draw_indirect");
> options->force_glsl_extensions_warn =
>driQueryOptionb(optionCache, "force_glsl_extensions_warn");
> options->force_glsl_version =
> diff --git a/src/mesa/state_tracker/st_extensions.c 
> b/src/mesa/state_tracker/st_extensions.c
> index 9ef0df1e926..0c7e7e3abf1 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -909,6 +909,9 @@ void st_init_extensions(struct pipe_screen *screen,
>}
> }
>
> +   if (options->disable_draw_indirect)
> +  extensions->ARB_draw_indirect = GL_FALSE;
> +
> /* Expose the extensions which directly correspond to gallium formats. */
> init_format_extensions(screen, extensions, rendertarget_mapping,
>ARRAY_SIZE(rendertarget_mapping), PIPE_TEXTURE_2D,
> diff --git a/src/util/drirc b/src/util/drirc
> index 9d27330036e..cea80ecc3d6 100644
> --- a/src/util/drirc
> +++ b/src/util/drirc
> @@ -275,4 +275,14 @@ TODO: document the other workarounds.
>   />
>  
>  
> +
> +
> +
> +
> +
> +
>  
> diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
> index bd553085c86..0d1c597a830 100644
> --- a/src/util/xmlpool/t_options.h
> +++ b/src/util/xmlpool/t_options.h
> @@ -379,6 +379,11 @@ DRI_CONF_OPT_BEGIN_B(glsl_zero_init, def) \
>  DRI_CONF_DESC(en,gettext("Force uninitialized variables to default 
> to zero")) \
>  DRI_CONF_OPT_END
>
> +#define DRI_CONF_DISABLE_DRAW_INDIRECT(def) \
> +DRI_CONF_OPT_BEGIN_B(disable_draw_indirect, def) \
> +   DRI_CONF_DESC(en, gettext("Disable the GL_ARB_draw_indirect extension")) \
> +DRI_CONF_OPT_END
> +
>  /**
>   * \brief Initialization configuration options
>   */
> --
> 2.13.6
>
> ___
> mesa-dev mailing list
> m

Re: [Mesa-dev] [PATCH] drirc: add option to disable ARB_draw_indirect

2017-12-05 Thread Rob Clark
On Tue, Dec 5, 2017 at 8:02 AM, Ilia Mirkin  wrote:
> On Tue, Dec 5, 2017 at 7:54 AM, Rob Clark  wrote:
>> This is a bit sad/annoying.  But with current GPU firmware (at least on
>> a5xx) we can support both draw-indirect and base-instance.  But we can't
>> support draw-indirect with a non-zero base-instance specified.  So add a
>
> That means you should only expose one of those features, probably the
> draw-indirect one. If someone desperately wants that functionality,
> they can force it with MESA_EXTENSION_OVERRIDE. ARB_base_instance has
> a specc'd interaction with ARB_indirect_draw...
>

In that case, we should have driconf to force override extension (ie.
not rely on user to remember magic MESA_EXTENSION_OVERRIDE
incantations) for any games that might use base-instance but not
draw-indirect..

BR,
-R

>   -ilia
>
>> driconf option to hide the extension from games that are known to use
>> both.
>>
>> Signed-off-by: Rob Clark 
>> ---
>> Tbh, I'm also not really sure what to do when/if we got updated firmware
>> which handled draw-indirect with base-instance, since we'd need to make
>> this option conditional on fw version.  For STK that probably isn't a
>> big deal since it doesn't use draw-indirect in a particularly useful way
>> (the indirect buffer is generated on CPU).
>>
>>  src/gallium/auxiliary/pipe-loader/driinfo_gallium.h |  1 +
>>  src/gallium/include/state_tracker/st_api.h  |  1 +
>>  src/gallium/state_trackers/dri/dri_screen.c |  2 ++
>>  src/mesa/state_tracker/st_extensions.c  |  3 +++
>>  src/util/drirc  | 10 ++
>>  src/util/xmlpool/t_options.h|  5 +
>>  6 files changed, 22 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h 
>> b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
>> index 003a3d7089e..9c1705bd9a8 100644
>> --- a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
>> +++ b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
>> @@ -32,4 +32,5 @@ DRI_CONF_SECTION_END
>>  DRI_CONF_SECTION_MISCELLANEOUS
>> DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false")
>> DRI_CONF_GLSL_ZERO_INIT("false")
>> +   DRI_CONF_DISABLE_DRAW_INDIRECT("false")
>>  DRI_CONF_SECTION_END
>> diff --git a/src/gallium/include/state_tracker/st_api.h 
>> b/src/gallium/include/state_tracker/st_api.h
>> index 44d6b474f8f..20a7843992a 100644
>> --- a/src/gallium/include/state_tracker/st_api.h
>> +++ b/src/gallium/include/state_tracker/st_api.h
>> @@ -217,6 +217,7 @@ struct st_config_options
>> boolean disable_blend_func_extended;
>> boolean disable_glsl_line_continuations;
>> boolean disable_shader_bit_encoding;
>> +   boolean disable_draw_indirect;
>> boolean force_glsl_extensions_warn;
>> unsigned force_glsl_version;
>> boolean allow_glsl_extension_directive_midshader;
>> diff --git a/src/gallium/state_trackers/dri/dri_screen.c 
>> b/src/gallium/state_trackers/dri/dri_screen.c
>> index 1ca511612ad..035406a771e 100644
>> --- a/src/gallium/state_trackers/dri/dri_screen.c
>> +++ b/src/gallium/state_trackers/dri/dri_screen.c
>> @@ -68,6 +68,8 @@ dri_fill_st_options(struct dri_screen *screen)
>>driQueryOptionb(optionCache, "disable_glsl_line_continuations");
>> options->disable_shader_bit_encoding =
>>driQueryOptionb(optionCache, "disable_shader_bit_encoding");
>> +   options->disable_draw_indirect =
>> +  driQueryOptionb(optionCache, "disable_draw_indirect");
>> options->force_glsl_extensions_warn =
>>driQueryOptionb(optionCache, "force_glsl_extensions_warn");
>> options->force_glsl_version =
>> diff --git a/src/mesa/state_tracker/st_extensions.c 
>> b/src/mesa/state_tracker/st_extensions.c
>> index 9ef0df1e926..0c7e7e3abf1 100644
>> --- a/src/mesa/state_tracker/st_extensions.c
>> +++ b/src/mesa/state_tracker/st_extensions.c
>> @@ -909,6 +909,9 @@ void st_init_extensions(struct pipe_screen *screen,
>>}
>> }
>>
>> +   if (options->disable_draw_indirect)
>> +  extensions->ARB_draw_indirect = GL_FALSE;
>> +
>> /* Expose the extensions which directly correspond to gallium formats. */
>> init_format_extensions(screen, extensions, rendertarget_mapping,
>>ARRAY_SIZE(rendertarget_mapping), PIPE_TEXTURE_2D,
>> diff --git a/src/util/drirc b/src/util/drirc
>> index 9d27330036e..cea80ecc3d6 100644
>> --- a/src/util/drirc
>> +++ b/src/util/drirc
>> @@ -275,4 +275,14 @@ TODO: document the other workarounds.
>>  > value="true" />
>>  
>>  
>> +
>> +
>> +
>> +
>> +
>> +
>>  
>> diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
>> index bd553085c86..0d1c597a830 100644
>> --- a/src/util/xmlpool/t_options.h
>> +++ b/src/util/xmlpool/t_options.h
>> @@ -379,6 +379,11 @@ DRI_CONF_OPT_BEGIN_B(glsl_zero_init, def) \
>>  DRI_CONF_DESC(en,gettext("Force uninitialized variables to default 
>> to

Re: [Mesa-dev] [PATCH] drirc: add option to disable ARB_draw_indirect

2017-12-05 Thread Emil Velikov
Hi Rob,

On 5 December 2017 at 12:54, Rob Clark  wrote:
> This is a bit sad/annoying.  But with current GPU firmware (at least on
> a5xx) we can support both draw-indirect and base-instance.  But we can't
> support draw-indirect with a non-zero base-instance specified.  So add a
> driconf option to hide the extension from games that are known to use
> both.
>
> Signed-off-by: Rob Clark 
> ---
> Tbh, I'm also not really sure what to do when/if we got updated firmware
> which handled draw-indirect with base-instance, since we'd need to make
> this option conditional on fw version.  For STK that probably isn't a
> big deal since it doesn't use draw-indirect in a particularly useful way
> (the indirect buffer is generated on CPU).
>
Couldn't freedreno just return 0 for PIPE_CAP_DRAW_INDIRECT (aka
disable the extension) as it detects buggy FW?
This is what radeons have been doing as they encounter iffy firmware or LLVM.

AFAICT freedreno doesn't do GL 4.0 or GLES 3.1 so one should be safe.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] drirc: add option to disable ARB_draw_indirect

2017-12-05 Thread Ilia Mirkin
On Tue, Dec 5, 2017 at 8:18 AM, Emil Velikov  wrote:
> Hi Rob,
>
> On 5 December 2017 at 12:54, Rob Clark  wrote:
>> This is a bit sad/annoying.  But with current GPU firmware (at least on
>> a5xx) we can support both draw-indirect and base-instance.  But we can't
>> support draw-indirect with a non-zero base-instance specified.  So add a
>> driconf option to hide the extension from games that are known to use
>> both.
>>
>> Signed-off-by: Rob Clark 
>> ---
>> Tbh, I'm also not really sure what to do when/if we got updated firmware
>> which handled draw-indirect with base-instance, since we'd need to make
>> this option conditional on fw version.  For STK that probably isn't a
>> big deal since it doesn't use draw-indirect in a particularly useful way
>> (the indirect buffer is generated on CPU).
>>
> Couldn't freedreno just return 0 for PIPE_CAP_DRAW_INDIRECT (aka
> disable the extension) as it detects buggy FW?
> This is what radeons have been doing as they encounter iffy firmware or LLVM.
>
> AFAICT freedreno doesn't do GL 4.0 or GLES 3.1 so one should be safe.

Rob is this -><- close to ES 3.1, so that's not a great option.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvir/nvc0: fix CVT lowering for dType == sType

2017-12-05 Thread Karol Herbst
The lowering code can't really handle that situation well and we just get away
with converting it to OP_MOV in this case.

Signed-off-by: Karol Herbst 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index b0834a8035..da2f979e66 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2808,6 +2808,11 @@ NVC0LoweringPass::handleCVT(Instruction *i)
if (isFloatType(i->dType) || isFloatType(i->sType))
   return true;
 
+   if (i->dType == i->sType) {
+  i->op = OP_MOV;
+  return true;
+   }
+
if (i->saturate && (typeSizeof(i->sType) > typeSizeof(i->dType))) {
   if (isSignedIntType(i->sType) && !isSignedIntType(i->dType)) {
  // Signed to unsigned: only need to clamp to 0
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvir/nvc0: fix CVT lowering for dType == sType

2017-12-05 Thread Karol Herbst
uhh, seems like this code isn't in master yet, so ignore this please.

On Tue, Dec 5, 2017 at 3:17 PM, Karol Herbst  wrote:
> The lowering code can't really handle that situation well and we just get away
> with converting it to OP_MOV in this case.
>
> Signed-off-by: Karol Herbst 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index b0834a8035..da2f979e66 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -2808,6 +2808,11 @@ NVC0LoweringPass::handleCVT(Instruction *i)
> if (isFloatType(i->dType) || isFloatType(i->sType))
>return true;
>
> +   if (i->dType == i->sType) {
> +  i->op = OP_MOV;
> +  return true;
> +   }
> +
> if (i->saturate && (typeSizeof(i->sType) > typeSizeof(i->dType))) {
>if (isSignedIntType(i->sType) && !isSignedIntType(i->dType)) {
>   // Signed to unsigned: only need to clamp to 0
> --
> 2.14.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvir/nvc0: fix CVT lowering for dType == sType

2017-12-05 Thread Ilia Mirkin
An in any case, CVT with stype == dtype is illegal - whatever
generates that should be fixed.

On Tue, Dec 5, 2017 at 9:23 AM, Karol Herbst  wrote:
> uhh, seems like this code isn't in master yet, so ignore this please.
>
> On Tue, Dec 5, 2017 at 3:17 PM, Karol Herbst  wrote:
>> The lowering code can't really handle that situation well and we just get 
>> away
>> with converting it to OP_MOV in this case.
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 5 +
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> index b0834a8035..da2f979e66 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> @@ -2808,6 +2808,11 @@ NVC0LoweringPass::handleCVT(Instruction *i)
>> if (isFloatType(i->dType) || isFloatType(i->sType))
>>return true;
>>
>> +   if (i->dType == i->sType) {
>> +  i->op = OP_MOV;
>> +  return true;
>> +   }
>> +
>> if (i->saturate && (typeSizeof(i->sType) > typeSizeof(i->dType))) {
>>if (isSignedIntType(i->sType) && !isSignedIntType(i->dType)) {
>>   // Signed to unsigned: only need to clamp to 0
>> --
>> 2.14.3
>>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Android: enable noreturn and returns_nonnull attributes

2017-12-05 Thread Rob Herring
On Tue, Dec 5, 2017 at 5:56 AM, Emil Velikov  wrote:
> On 5 December 2017 at 02:21, Rob Herring  wrote:
>> Commit 94ca8e04adf6 ("spirv: Add vtn_fail and vtn_assert helpers") broke
>> Android builds which have -Werror enabled with the following errors:
>>
>> external/mesa3d/src/compiler/spirv/spirv_to_nir.c:272:1: error: control may 
>> reach end of non-void function [-Werror,-Wreturn-type]
>> external/mesa3d/src/compiler/spirv/spirv_to_nir.c:810:1: error: control may 
>> reach end of non-void function [-Werror,-Wreturn-type]
>> ...
>>
>> The problem is the noreturn attribute is not enabled and we to define
>> HAVE_FUNC_ATTRIBUTE_NORETURN.
>>
>> Auditing src/util/macros.h, we're also missing
>> HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL, so add it too.
>>
>> Fixes: 94ca8e04adf6 ("spirv: Add vtn_fail and vtn_assert helpers")
>> Cc: Jason Ekstrand 
>> Signed-off-by: Rob Herring 
> Reviewed-by: Emil Velikov 
>
> The following shows the macros not set on Android. Feel free to check
> if they're supported and toggle them on.

I did this, but did another check of it.

> $ for i in `git grep "if.*def.*HAVE_FUNC_ATTRIBUTE_" origin/master  --
>  | grep -o "HAVE_FUNC_[A-Z_]*" | sort -u`; do git grep $i
> origin/master  | grep
> -qi android || echo missing $i; done
> missing HAVE_FUNC_ATTRIBUTE_CONST
> missing HAVE_FUNC_ATTRIBUTE_MALLOC
> missing HAVE_FUNC_ATTRIBUTE_NORETURN
> missing HAVE_FUNC_ATTRIBUTE_PURE
> missing HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL
> missing HAVE_FUNC_ATTRIBUTE_VISIBILITY
> missing HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT

At least according to documentation for clang, this is the only other
one that is supported. So I turned it on too and pushed the commit.

> missing HAVE_FUNC_ATTRIBUTE_WEAK
>
> -Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa/st: move cloning of NIR shader for compute

2017-12-05 Thread Rob Clark
Since in the NIR case, driver takes ownership of the NIR shader, we need
to clone what is passed to the driver.  Normally this is done as part of
creating the shader variant (where is clone is anyways needed).  But
compute shaders have no variants, so we were cloning earlier.

The problem is that after the NIR linking optimizations, we ended up
cloning *before* all the lowering passes where done.

So move this into st_get_cp_variant(), to make compute shaders work more
like other shader stages.

Signed-off-by: Rob Clark 
---
 src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +-
 src/mesa/state_tracker/st_program.c   | 5 -
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 5d18e7b62bf..36adf55cd45 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -431,7 +431,7 @@ set_st_program(struct gl_program *prog,
   stcp = (struct st_compute_program *)prog;
   stcp->shader_program = shader_program;
   stcp->tgsi.ir_type = PIPE_SHADER_IR_NIR;
-  stcp->tgsi.prog = nir_shader_clone(NULL, nir);
+  stcp->tgsi.prog = nir;
   break;
default:
   unreachable("unknown shader stage");
diff --git a/src/mesa/state_tracker/st_program.c 
b/src/mesa/state_tracker/st_program.c
index 5c0a58104fc..258f5e47cbe 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -1659,7 +1659,10 @@ st_get_cp_variant(struct st_context *st,
   v = CALLOC_STRUCT(st_basic_variant);
   if (v) {
  /* fill in new variant */
- v->driver_shader = pipe->create_compute_state(pipe, tgsi);
+ struct pipe_compute_state cs = *tgsi;
+ if (tgsi->ir_type == PIPE_SHADER_IR_NIR)
+cs.prog = nir_shader_clone(NULL, tgsi->prog);;
+ v->driver_shader = pipe->create_compute_state(pipe, &cs);
  v->key = key;
 
  /* insert into list */
-- 
2.13.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/8] glapi/check_table: Remove 'extern "C"' block

2017-12-05 Thread Emil Velikov
On 4 December 2017 at 23:57, Dylan Baker  wrote:
> Quoting Emil Velikov (2017-11-23 11:04:34)
>> On 20 November 2017 at 23:12, Dylan Baker  wrote:
>> > This doesn't actually accomplish what it's meant to do, as extern C
>> > doesn't undefine __cplusplus, so the included headers define a template
>> > (because __cplusplus is defined), but then that code is in an 'extern
>> > "C"' block, and explosion.
>> >
>> The commit does sound a bit off, admittedly I don't fully grok what
>> you're trying to say.
>>
>> The extern "C" { } construct annotates anything within as if it were a
>> normal C code. Thus functions will have the C linkage (otherwise
>> they'll have the C++ mangling) and structs are using the C rules.
>> So if you have a C++ header further down the chain it will be
>> considered as C and hell will break loose.
>>
>> This patch is correct, just sent a fix for glapitable.h, while glapi.h
>> was handled with d38839a498b38b745f5bdf6b96fbafa2792670be.
>>
>> As-is "struct _glapi_table" will be interpret as C++ one, which may
>> not have the same guarantees about sizeof and pointer arithmetic that
>> we're using in the test.
>>
>> Hope the above provides some inspiration towards a better commit message.
>>
>> Thanks
>> Emil
>
> So here's the problem I run into:
>
> In file included from ../include/c99_compat.h:28:0,
>  from ../src/util/macros.h:29,
>  from ../src/mapi/glapi/glapi.h:47,
>  from ../src/mapi/glapi/tests/check_table.cpp:28:
> ../include/no_extern_c.h:47:1: error: template with C linkage
>  template class _IncludeInsideExternCNotPortable;
>  ^~~~
> [54/93] Compiling C++ object 'src/gtest/gtest@sta/src_gtest-all.cc.o'.
>
> So, my commit message still makes sense to me, since you can't put a template 
> in
> an extern "C" block, and that template is guarded by __cplusplus, which is 
> still
> defined even in an extern "C" block.
>
This is one symptom, of the issue described before. The code in
c99_compat.h was explicitly added by Jose to make the issue obvious.

> Do you have a commit message in mind that is better?

Feel free to reuse as much or as little of my explanation. Borrowing
from analogous fixes also works ;-)
Some examples:
237dcb4aa7c39c59bfd225ae3d73caf709be216d
a177a13033cf9356eb26e8757055037a54268a18
36ad8265489fc66ab45b9b74dafa353a93bdb03b

I'm going through 4/8 - seems like there's a bunch of other bugs around :-\
Feel free to push the remainder of the series with the nitpicks addressed.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvir/nvc0: fix CVT lowering for dType == sType

2017-12-05 Thread Ilia Mirkin
On Tue, Dec 5, 2017 at 9:34 AM, Ilia Mirkin  wrote:
> An in any case, CVT with stype == dtype is illegal - whatever
> generates that should be fixed.

Without source modifiers, that is :) "cvt f32 dst neg src" is just
fine of course.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104119] radv: OpBitFieldInsert produces 0 with a loop counter for Insert

2017-12-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104119

Bug ID: 104119
   Summary: radv: OpBitFieldInsert produces 0 with a loop counter
for Insert
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: jl...@feralinteractive.com
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 135980
  --> https://bugs.freedesktop.org/attachment.cgi?id=135980&action=edit
test application that reproduces the bug

On RADV, the SPIR-V OpBitFieldInsert opcode produces 0 when the Insert
parameter derives from a variable used as a loop counter. For example, the
following GLSL compute shader writes 0 to the first 8 elements of the buffer at
binding 0:

#version 450 core
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout (std430, set = 0, binding = 0)
restrict writeonly buffer u2_cs { uint u2[]; };

void main()
{
for (int i = 0; i < 8; i++)
u2[i] = bitfieldInsert(0, i, 16, 2);
}


I've attached a program that reproduces the bug.

I'm using the LLVM release_50 branch at revision 318947 and the Mesa master
branch at 20d37da597653201d2c524434907e817bd03b1d0.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104119] radv: OpBitFieldInsert produces 0 with a loop counter for Insert

2017-12-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104119

--- Comment #1 from James Legg  ---
Created attachment 135981
  --> https://bugs.freedesktop.org/attachment.cgi?id=135981&action=edit
SPIR-V disassembly of test shader

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104119] radv: OpBitFieldInsert produces 0 with a loop counter for Insert

2017-12-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104119

--- Comment #2 from James Legg  ---
Created attachment 135982
  --> https://bugs.freedesktop.org/attachment.cgi?id=135982&action=edit
Output from RADV_DEBUG=shaders

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/miptree: Use the tiling from the modifier instead of the BO

2017-12-05 Thread Jason Ekstrand
On Tue, Dec 5, 2017 at 1:22 AM, Daniel Stone  wrote:

> Hi,
>
> On 18 November 2017 at 00:10, Jason Ekstrand  wrote:
> > This fixes a bug where we were taking the tiling from the BO regardless
> > of what the modifier said.  When we got images in from Vulkan where it
> > doesn't set the tiling on the BO, we would treat them as linear even
> > though the modifier expressly said to treat it as Y-tiled.
>
> For some reason I thought Ken had already reviewed this and it landed,
> until Kristian mentioned last night.


There's been some discussion about what the right thing to do is here.
I've got a patch (which is now landed) which will make us set the tiling
from Vulkan just like GL does.  It's rather annoying but I think that's
marginally better.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glx: GLX_MESA_multithread_makecurrent is direct-only

2017-12-05 Thread Adam Jackson
This extension is not defined for indirect contexts. Marking it as
"client only", as the old code did here, would make the extension
available in indirect contexts, even though the server would certainly
not have it in its extension list.

Cc: 
Signed-off-by: Adam Jackson 
---
 src/glx/glxextensions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c
index af6ffbf660..638d8bcbbe 100644
--- a/src/glx/glxextensions.c
+++ b/src/glx/glxextensions.c
@@ -152,7 +152,7 @@ static const struct extension_info known_glx_extensions[] = 
{
{ GLX(ATI_pixel_format_float),  VER(0,0), N, N, N, N },
{ GLX(INTEL_swap_event),VER(0,0), Y, N, N, N },
{ GLX(MESA_copy_sub_buffer),VER(0,0), Y, N, N, N },
-   { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N },
+   { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, N, Y },
{ GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y },
{ GLX(MESA_swap_control),   VER(0,0), Y, N, N, Y },
{ GLX(NV_float_buffer), VER(0,0), N, N, N, N },
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/miptree: Use the tiling from the modifier instead of the BO

2017-12-05 Thread Kristian Høgsberg
On Tue, Dec 5, 2017 at 7:57 AM, Jason Ekstrand  wrote:
> On Tue, Dec 5, 2017 at 1:22 AM, Daniel Stone  wrote:
>>
>> Hi,
>>
>> On 18 November 2017 at 00:10, Jason Ekstrand  wrote:
>> > This fixes a bug where we were taking the tiling from the BO regardless
>> > of what the modifier said.  When we got images in from Vulkan where it
>> > doesn't set the tiling on the BO, we would treat them as linear even
>> > though the modifier expressly said to treat it as Y-tiled.
>>
>> For some reason I thought Ken had already reviewed this and it landed,
>> until Kristian mentioned last night.
>
>
> There's been some discussion about what the right thing to do is here.  I've
> got a patch (which is now landed) which will make us set the tiling from
> Vulkan just like GL does.  It's rather annoying but I think that's
> marginally better.

I don't know that it's better - it reinforces the notion that you have
to make the kernel-side tiling match for the dma-buf import extension
to work. I think it'd be better to land these patches (btw, Rb: me
(can you even do parenthetical Rbs?)) and call set-tiling in mesa. The
assumption being that if the tiling doesn't match the modifier, then
maybe the producer didn't care about kernel tiling? Alternatively,
could we set bo->tiling = INCONSISTENT or such in mesa and then use
that to avoid the gtt map paths?

Kristian

>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/miptree: Use the tiling from the modifier instead of the BO

2017-12-05 Thread Jason Ekstrand
On Tue, Dec 5, 2017 at 8:23 AM, Kristian Høgsberg 
wrote:

> On Tue, Dec 5, 2017 at 7:57 AM, Jason Ekstrand 
> wrote:
> > On Tue, Dec 5, 2017 at 1:22 AM, Daniel Stone 
> wrote:
> >>
> >> Hi,
> >>
> >> On 18 November 2017 at 00:10, Jason Ekstrand 
> wrote:
> >> > This fixes a bug where we were taking the tiling from the BO
> regardless
> >> > of what the modifier said.  When we got images in from Vulkan where it
> >> > doesn't set the tiling on the BO, we would treat them as linear even
> >> > though the modifier expressly said to treat it as Y-tiled.
> >>
> >> For some reason I thought Ken had already reviewed this and it landed,
> >> until Kristian mentioned last night.
> >
> >
> > There's been some discussion about what the right thing to do is here.
> I've
> > got a patch (which is now landed) which will make us set the tiling from
> > Vulkan just like GL does.  It's rather annoying but I think that's
> > marginally better.
>
> I don't know that it's better - it reinforces the notion that you have
> to make the kernel-side tiling match for the dma-buf import extension
> to work. I think it'd be better to land these patches (btw, Rb: me
> (can you even do parenthetical Rbs?))


I'll allow it. :)


> and call set-tiling in mesa.


Yeah, I think that's reasonable.  Really, this should only be a problem if
we have a bunch of users trying to use the same BO with different
modifiers.  It can happen in theory (immagine two images in the same BO,
one X and one Y) but it's a pretty crazy case.


> The
> assumption being that if the tiling doesn't match the modifier, then
> maybe the producer didn't care about kernel tiling? Alternatively,
> could we set bo->tiling = INCONSISTENT or such in mesa and then use
> that to avoid the gtt map paths?
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr/scons: Fix another intermittent build failure

2017-12-05 Thread George Kyriazis
gen_BackendPixelRate*.cpp depends on gen_ar_eventhandler.hpp.
Fix missing dependency.
---
 src/gallium/drivers/swr/SConscript | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/swr/SConscript 
b/src/gallium/drivers/swr/SConscript
index 9204ecb..eca4830 100644
--- a/src/gallium/drivers/swr/SConscript
+++ b/src/gallium/drivers/swr/SConscript
@@ -146,6 +146,7 @@ env.CodeGenerate(
 Depends(backendPixelRateFiles,
 ['rasterizer/core/backends/gen_BackendPixelRate.hpp',
  'rasterizer/archrast/gen_ar_event.hpp',
+ 'rasterizer/archrast/gen_ar_eventhandler.hpp',
  'rasterizer/codegen/gen_knobs.h']
 )
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] st/mesa: whitespace fixes in st_format.c

2017-12-05 Thread Brian Paul
---
 src/mesa/state_tracker/st_format.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_format.c 
b/src/mesa/state_tracker/st_format.c
index 1ae677d..da8b5e2 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -58,7 +58,8 @@
  * Translate Mesa format to Gallium format.
  */
 enum pipe_format
-st_mesa_format_to_pipe_format(const struct st_context *st, mesa_format 
mesaFormat)
+st_mesa_format_to_pipe_format(const struct st_context *st,
+  mesa_format mesaFormat)
 {
switch (mesaFormat) {
case MESA_FORMAT_A8B8G8R8_UNORM:
@@ -1687,7 +1688,7 @@ static const struct format_mapping format_map[] = {
   { PIPE_FORMAT_R8G8B8A8_SINT, 0 }
},
{
-  { GL_RGB_INTEGER_EXT, 
+  { GL_RGB_INTEGER_EXT,
 GL_BGR_INTEGER_EXT,
 GL_RGB8I_EXT,
 GL_BLUE_INTEGER_EXT, 0 },
@@ -2005,6 +2006,7 @@ find_supported_format(struct pipe_screen *screen,
return PIPE_FORMAT_NONE;
 }
 
+
 struct exact_format_mapping
 {
GLenum format;
@@ -2040,6 +2042,7 @@ static const struct exact_format_mapping rgbx_tbl[] =
{ 0,   0,  0  }
 };
 
+
 /**
  * For unsized/base internal formats, we may choose a convenient effective
  * internal format for {format, type}. If one exists, return that, otherwise
@@ -2074,6 +2077,7 @@ find_exact_format(GLint internalFormat, GLenum format, 
GLenum type)
return PIPE_FORMAT_NONE;
 }
 
+
 /**
  * Given an OpenGL internalFormat value for a texture or surface, return
  * the best matching PIPE_FORMAT_x, or PIPE_FORMAT_NONE if there's no match.
@@ -2173,7 +2177,7 @@ st_choose_renderbuffer_format(struct st_context *st,
  */
 enum pipe_format
 st_choose_matching_format(struct st_context *st, unsigned bind,
- GLenum format, GLenum type, GLboolean swapBytes)
+  GLenum format, GLenum type, GLboolean swapBytes)
 {
struct pipe_screen *screen = st->pipe->screen;
mesa_format mesa_format;
@@ -2253,7 +2257,7 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum 
target,
 internalFormat == GL_RGBA16F ||
 internalFormat == GL_RGB32F ||
 internalFormat == GL_RGBA32F)
-bindings |= PIPE_BIND_RENDER_TARGET;
+  bindings |= PIPE_BIND_RENDER_TARGET;
 
/* GLES allows the driver to choose any format which matches
 * the format+type combo, because GLES only supports unsized internal
@@ -2279,7 +2283,9 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum 
target,
 return st_pipe_format_to_mesa_format(pFormat);
 
  if (!is_renderbuffer) {
-/* try choosing format again, this time without render target 
bindings */
+/* try choosing format again, this time without render
+ * target bindings.
+ */
 pFormat = st_choose_matching_format(st, PIPE_BIND_SAMPLER_VIEW,
 format, type,
 ctx->Unpack.SwapBytes);
@@ -2369,6 +2375,7 @@ st_QuerySamplesForFormat(struct gl_context *ctx, GLenum 
target,
return num_sample_counts;
 }
 
+
 /**
  * ARB_internalformat_query2 driver hook.
  */
@@ -2428,6 +2435,7 @@ st_QueryInternalFormat(struct gl_context *ctx, GLenum 
target,
}
 }
 
+
 /**
  * This is used for translating texture border color and the clear
  * color.  For example, the clear color is interpreted according to
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] st/mesa: remove unneeded #include in st_format.h

2017-12-05 Thread Brian Paul
---
 src/mesa/state_tracker/st_format.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_format.h 
b/src/mesa/state_tracker/st_format.h
index 3dd9c10..466b5d0 100644
--- a/src/mesa/state_tracker/st_format.h
+++ b/src/mesa/state_tracker/st_format.h
@@ -33,7 +33,6 @@
 #include "main/formats.h"
 #include "main/glheader.h"
 
-#include "pipe/p_defines.h"
 #include "pipe/p_format.h"
 
 #ifdef __cplusplus
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] st/mesa: rename a few vars to 'bindings'

2017-12-05 Thread Brian Paul
To be consistent.
---
 src/mesa/state_tracker/st_format.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/mesa/state_tracker/st_format.c 
b/src/mesa/state_tracker/st_format.c
index da8b5e2..3f7e55e 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -1988,13 +1988,13 @@ find_supported_format(struct pipe_screen *screen,
   const enum pipe_format formats[],
   enum pipe_texture_target target,
   unsigned sample_count,
-  unsigned tex_usage,
+  unsigned bindings,
   boolean allow_dxt)
 {
uint i;
for (i = 0; formats[i]; i++) {
   if (screen->is_format_supported(screen, formats[i], target,
-  sample_count, tex_usage)) {
+  sample_count, bindings)) {
  if (!allow_dxt && util_format_is_s3tc(formats[i])) {
 /* we can't return a dxt format, continue searching */
 continue;
@@ -2158,13 +2158,13 @@ enum pipe_format
 st_choose_renderbuffer_format(struct st_context *st,
   GLenum internalFormat, unsigned sample_count)
 {
-   uint usage;
+   unsigned bindings;
if (_mesa_is_depth_or_stencil_format(internalFormat))
-  usage = PIPE_BIND_DEPTH_STENCIL;
+  bindings = PIPE_BIND_DEPTH_STENCIL;
else
-  usage = PIPE_BIND_RENDER_TARGET;
+  bindings = PIPE_BIND_RENDER_TARGET;
return st_choose_format(st, internalFormat, GL_NONE, GL_NONE,
-   PIPE_TEXTURE_2D, sample_count, usage, FALSE);
+   PIPE_TEXTURE_2D, sample_count, bindings, FALSE);
 }
 
 
@@ -2411,17 +2411,17 @@ st_QueryInternalFormat(struct gl_context *ctx, GLenum 
target,
* the driver, and if so return the same internal format, otherwise
* return GL_NONE.
*/
-  uint usage;
+  unsigned bindings;
   if (_mesa_is_depth_or_stencil_format(internalFormat))
- usage = PIPE_BIND_DEPTH_STENCIL;
+ bindings = PIPE_BIND_DEPTH_STENCIL;
   else
- usage = PIPE_BIND_RENDER_TARGET;
+ bindings = PIPE_BIND_RENDER_TARGET;
   enum pipe_format pformat = st_choose_format(st,
   internalFormat,
   GL_NONE,
   GL_NONE,
   PIPE_TEXTURE_2D, 1,
-  usage, FALSE);
+  bindings, FALSE);
   if (pformat)
  params[0] = internalFormat;
   break;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] mesa: simplify/improve some _mesa_error() calls in teximage.c

2017-12-05 Thread Brian Paul
---
 src/mesa/main/teximage.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 41de966..572e380 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -5778,14 +5778,10 @@ texture_image_multisample(struct gl_context *ctx, 
GLuint dims,
}
 
if (!check_multisample_target(dims, target, dsa)) {
-  if (dsa) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "%s(target)", func);
- return;
-  }
-  else {
- _mesa_error(ctx, GL_INVALID_ENUM, "%s(target)", func);
- return;
-  }
+  GLenum err = dsa ? GL_INVALID_OPERATION : GL_INVALID_ENUM;
+  _mesa_error(ctx, err, "%s(target=%s)", func,
+  _mesa_enum_to_string(target));
+  return;
}
 
/* check that the specified internalformat is 
color/depth/stencil-renderable;
@@ -5826,7 +5822,7 @@ texture_image_multisample(struct gl_context *ctx, GLuint 
dims,
 *However, if samples is not supported, then no error is generated.
 */
if (!samplesOK && !_mesa_is_proxy_texture(target)) {
-  _mesa_error(ctx, sample_count_error, "%s(samples)", func);
+  _mesa_error(ctx, sample_count_error, "%s(samples=%d)", func, samples);
   return;
}
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] mesa: add const qualifier in test_attachment_completeness()

2017-12-05 Thread Brian Paul
---
 src/mesa/main/fbobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 8116563..f7702f1 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -793,7 +793,7 @@ test_attachment_completeness(const struct gl_context *ctx, 
GLenum format,
/* Look for reasons why the attachment might be incomplete */
if (att->Type == GL_TEXTURE) {
   const struct gl_texture_object *texObj = att->Texture;
-  struct gl_texture_image *texImage;
+  const struct gl_texture_image *texImage;
   GLenum baseFormat;
 
   if (!texObj) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] mesa: trivial whitespace fixes in transformfeedback.c

2017-12-05 Thread Brian Paul
---
 src/mesa/main/transformfeedback.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/mesa/main/transformfeedback.c 
b/src/mesa/main/transformfeedback.c
index a5ea2a5..e4cc1db 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -346,7 +346,7 @@ _mesa_compute_max_transform_feedback_vertices(struct 
gl_context *ctx,
 
  /* Skip any inactive buffers, which have a stride of 0. */
  if (stride == 0)
-   continue;
+continue;
 
  max_for_this_buffer = obj->Size[i] / (4 * stride);
  max_index = MIN2(max_index, max_for_this_buffer);
@@ -605,7 +605,7 @@ _mesa_validate_buffer_range_xfb(struct gl_context *ctx,
   _mesa_error(ctx, GL_INVALID_VALUE, "%s(size=%d must be a multiple of "
   "four)", gl_methd_name, (int) size);
   return false;
-   }  
+   }
 
if (offset & 0x3) {
   /* OpenGL 4.5 core profile, 6.7, pdf page 103: multiple of 4 */
@@ -732,13 +732,13 @@ _mesa_TransformFeedbackBufferBase(GLuint xfb, GLuint 
index, GLuint buffer)
 
obj = lookup_transform_feedback_object_err(ctx, xfb,
   "glTransformFeedbackBufferBase");
-   if(!obj) {
+   if (!obj) {
   return;
}
 
bufObj = lookup_transform_feedback_bufferobj_err(ctx, buffer,
   "glTransformFeedbackBufferBase");
-   if(!bufObj) {
+   if (!bufObj) {
   return;
}
 
@@ -755,13 +755,13 @@ _mesa_TransformFeedbackBufferRange(GLuint xfb, GLuint 
index, GLuint buffer,
 
obj = lookup_transform_feedback_object_err(ctx, xfb,
   
"glTransformFeedbackBufferRange");
-   if(!obj) {
+   if (!obj) {
   return;
}
 
bufObj = lookup_transform_feedback_bufferobj_err(ctx, buffer,
   
"glTransformFeedbackBufferRange");
-   if(!bufObj) {
+   if (!bufObj) {
   return;
}
 
@@ -1337,7 +1337,7 @@ _mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, 
GLint *param)
 
 obj = lookup_transform_feedback_object_err(ctx, xfb,
"glGetTransformFeedbackiv");
-if(!obj) {
+if (!obj) {
return;
 }
 
@@ -1363,7 +1363,7 @@ _mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, 
GLuint index,
 
obj = lookup_transform_feedback_object_err(ctx, xfb,
   "glGetTransformFeedbacki_v");
-   if(!obj) {
+   if (!obj) {
   return;
}
 
@@ -1392,7 +1392,7 @@ _mesa_GetTransformFeedbacki64_v(GLuint xfb, GLenum pname, 
GLuint index,
 
obj = lookup_transform_feedback_object_err(ctx, xfb,
   "glGetTransformFeedbacki64_v");
-   if(!obj) {
+   if (!obj) {
   return;
}
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] mesa: s/%u/%d/ in _mesa_error() call in check_layer()

2017-12-05 Thread Brian Paul
The layer parameter is signed.  Fixes the error message seen when
running the arb_texture_multisample-errors test which checks a
negative layer value.
---
 src/mesa/main/fbobject.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index f7702f1..30287ab 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3189,8 +3189,7 @@ check_layer(struct gl_context *ctx, GLenum target, GLint 
layer,
 * and layer is negative."
 */
if (layer < 0) {
-  _mesa_error(ctx, GL_INVALID_VALUE,
-  "%s(layer %u < 0)", caller, layer);
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(layer %d < 0)", caller, layer);
   return false;
}
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/android: Partially handle HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED

2017-12-05 Thread Robert Foss
On Tue, 2017-12-05 at 18:22 +0900, Tomasz Figa wrote:
> On Sat, Dec 2, 2017 at 4:43 AM, Rob Herring  wrote:
> > On Fri, Dec 1, 2017 at 8:44 AM, Tomasz Figa 
> > wrote:
> > > On Fri, Dec 1, 2017 at 11:20 PM, Rob Herring 
> > > wrote:
> > > > On Fri, Dec 1, 2017 at 7:30 AM, Robert Foss  > > > ora.com> wrote:
> > > > > On Thu, 2017-11-30 at 11:14 -0600, Rob Herring wrote:
> > > > > > On Thu, Nov 30, 2017 at 12:11 AM, Tapani Pälli  > > > > > i...@intel.co
> > > > > > m> wrote:
> > > > > > > 
> > > > > > > 
> > > > > > > On 11/30/2017 06:13 AM, Tomasz Figa wrote:
> > > > > > > > 
> > > > > > > > On Thu, Nov 30, 2017 at 3:43 AM, Robert Foss  > > > > > > > ss@collabo
> > > > > > > > ra.com>
> > > > > > > > wrote:
> > 
> > [...]
> > 
> > > > > > > > (As a side note, I had an idea to create a new
> > > > > > > > interface,
> > > > > > > > standardized
> > > > > > > > by Mesa, let's say libdri_android, completely free of
> > > > > > > > any
> > > > > > > > gralloc-internals. It would have to be exposed
> > > > > > > > additionally by
> > > > > > > > any
> > > > > > > > Android that intends to run Mesa. Given the need to
> > > > > > > > deal with 3
> > > > > > > > different gralloc versions already, it could be
> > > > > > > > something easier
> > > > > > > > to
> > > > > > > > manage.)
> > > > > > > 
> > > > > > > 
> > > > > > > Makes sense, it is a bit messy and we have bit too much
> > > > > > > patches on
> > > > > > > our tree
> > > > > > > because of these differences.
> > > > > > 
> > > > > > Seems overly complicated to me. The information needed is
> > > > > > within the
> > > > > > ints in the native_handle in most/all implementations. I
> > > > > > don't think
> > > > > > there's another way to globally store dmabuf metadata
> > > > > > unless you have
> > > > > > a custom interface in your DRM driver. So standardizing to
> > > > > > a common
> > > > > > library implies a common handle struct here. I think the
> > > > > > options are:
> > > > > > 
> > > > > > - common struct definition (native_handle_t + dmabuf fd(s)
> > > > > > + width,
> > > > > > height, stride, format, usage, etc.)
> > > > > > - common struct plus inline accessor functions
> > > > > > - common opaque struct plus accessor library
> > > > > 
> > > > > So these common parts would be much like what currently
> > > > > exists in
> > > > > minigbm/cros_gralloc_handle.h and
> > > > > gbm_gralloc/gralloc_drm_handle.h
> > > > > then, but extended with the above suggestions?
> > > > 
> > > > Yes, but which part do you think needs to be extended?
> > > > 
> > > > As we discussed on IRC, I think perhaps we just need to change
> > > > the
> > > > handle format field in gralloc_drm_handle.h to use fourcc (aka
> > > > DRM and
> > > > GBM formats) instead of the Android format. I think all the
> > > > users just
> > > > end up converting it to their own internal format anyway.
> > > 
> > > We keep the handle opaque for consumers and even minigbm
> > > dereferences
> > > it only when creating/registering the buffer, further using the
> > > handle
> > > pointer only as a key to internal bookkeeping map.
> > 
> > What you say implies that you don't need any metadata in the
> > handle,
> > but you do have pretty much all the same data. Whether you
> > 
> > > Relying on the struct itself is not only error prone, as there is
> > > no
> > > way to check if the struct on gralloc implementation side matches
> > > what
> > > we expect, but also makes it difficult to change the handle
> > > struct at
> > > our convenience.
> > 
> > How does a library solve this?
> > 
> > Everything in Android gets built together and the handle pretty
> > much
> > has to stay the same across components in any implementation I've
> > seen. Maybe someday that will change and we'll need versioning and
> > backwards compatibility, but for now that's unnecessary complexity.
> > We'd have to get to a single, well controlled and defined handle
> > first
> > anyway before we start versioning.
> > 
> > Anyone is still free to change things downstream however they want.
> > We're only talking about what does mainline/upstream do.
> > 
> > > > > > Also, I don't think whatever is standardized should live in
> > > > > > Mesa.
> > > > > > There's a need to support drm_hwcomposer (which has the
> > > > > > same
> > > > > > dependencies as mesa) with non-Mesa GL implementations
> > > > > > (yes, vendor
> > > > > > binaries).
> > > > > 
> > > > > Excluding Mesa and the composer leaves us with the allocator
> > > > > or
> > > > > creating a new library.
> > > > > I would assume that creating a new library is the worse
> > > > > option.
> > > > 
> > > > Why excluding the composer? If we have to pick, I'd put it
> > > > there or
> > > > perhaps libdrm?
> > > 
> > > There is neither a single central composer nor libdrm is used on
> > > every
> > > system... (The latter is not even used by Intel driver in Mesa
> > > anymore.)
> > 
> > I think you are confusing libdrm_intel which was removed with
> > li

Re: [Mesa-dev] [PATCH 3/3] st/mesa: remove unneeded #include in st_format.h

2017-12-05 Thread Ilia Mirkin
Series is

Reviewed-by: Ilia Mirkin 

On Tue, Dec 5, 2017 at 11:59 AM, Brian Paul  wrote:
> ---
>  src/mesa/state_tracker/st_format.h | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/src/mesa/state_tracker/st_format.h 
> b/src/mesa/state_tracker/st_format.h
> index 3dd9c10..466b5d0 100644
> --- a/src/mesa/state_tracker/st_format.h
> +++ b/src/mesa/state_tracker/st_format.h
> @@ -33,7 +33,6 @@
>  #include "main/formats.h"
>  #include "main/glheader.h"
>
> -#include "pipe/p_defines.h"
>  #include "pipe/p_format.h"
>
>  #ifdef __cplusplus
> --
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/1] radv: use a faster version for nir_op_pack_half_2x16

2017-12-05 Thread Samuel Pitoiset
This patch is ported from RadeonSI and it has two effects.

It fixes a rendering issue which affects F1 2017 and Dawn
of War 3 (Vega only) because LLVM was ending up by generating
the new v_mad_mix_{hi,lo} instructions which appear to be
buggy in some way. Not sure if Mesa is generating something
wrong or if the issue is in LLVM only. Anyway, that explains why
the DOW3 issue can't be reproduced with GL on Vega.

It also improves performance because v_cvt_pkrtz_f16 is faster,
and because I guess the rounding mode behaviour is similar between
GL and VK, we can use it. About performance, it improves Talos
by +3/4% but I don't see any other impacts.

No CTS regressions on Polaris (Vega in progress).

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_nir_to_llvm.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 96ba289a81..663b27d265 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1426,23 +1426,13 @@ static LLVMValueRef emit_bitfield_insert(struct 
ac_llvm_context *ctx,
 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
LLVMValueRef src0)
 {
-   LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
-   int i;
LLVMValueRef comp[2];
 
src0 = ac_to_float(ctx, src0);
comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
-   for (i = 0; i < 2; i++) {
-   comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
-   comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
-   comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
-   }
-
-   comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
-   comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
 
-   return comp[0];
+   return ac_build_cvt_pkrtz_f16(ctx, comp);
 }
 
 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/1] radv: bugfix for F1 2017 and DOW3 on Vega

2017-12-05 Thread Samuel Pitoiset
Hi folks,

It took me a while to figure out the issue which is addressed by the
following patch, but I think it should be the right one (see the patch
description for more explanations).

Keep in mind that DOW3 is still affected by one other issue (Vega only).
Hopefully, I will be able to write a patch for that one soon.

Thanks.

Samuel Pitoiset (1):
  radv: use a faster version for nir_op_pack_half_2x16

 src/amd/common/ac_nir_to_llvm.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 23/44] i965/fs: Enables 16-bit load_ubo with sampler

2017-12-05 Thread Chema Casanova
El 30/11/17 a las 23:58, Jason Ekstrand escribió:
> On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo
> mailto:jmcasan...@igalia.com>> wrote:
>
> load_ubo is using 32-bit loads as uniforms surfaces have a 32-bit
> surface format defined. So when reading 16-bit components with the
> sampler we need to unshuffle two 16-bit components from each 32-bit
> component.
>
> Using the sampler avoids the use of the byte_scattered_read message
> that needs one message for each component and is supposed to be
> slower.
>
> In the case of SKL+ we take advantage of a hardware feature that
> automatically defines a channel mask based on the rlen value, so on
> SKL+ we only use half of the registers without using a header in the
> payload.
> ---
>  src/intel/compiler/brw_fs.cpp           | 31
> +++
>  src/intel/compiler/brw_fs_generator.cpp | 10 --
>  2 files changed, 35 insertions(+), 6 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs.cpp
> b/src/intel/compiler/brw_fs.cpp
> index 1ca4d416b2..9c543496ba 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -184,9 +184,17 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const
> fs_builder &bld,
>      * a double this means we are only loading 2 elements worth of
> data.
>      * We also want to use a 32-bit data type for the dst of the
> load operation
>      * so other parts of the driver don't get confused about the
> size of the
> -    * result.
> +    * result. On the case of 16-bit data we only need half of the
> 32-bit
> +    * components on SKL+ as we take advance of using message
> return size to
> +    * define an xy channel mask.
>      */
> -   fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> +   fs_reg vec4_result;
> +   if (type_sz(dst.type) == 2 && (devinfo->gen >= 9)) {
> +      vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
> +      vec4_result = retype(vec4_result, BRW_REGISTER_TYPE_HF);
> +   } else {
> +      vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> +   }
>
>     fs_inst *inst =
> bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
>                              vec4_result, surf_index, vec4_offset);
>     inst->size_written = 4 *
> vec4_result.component_size(inst->exec_size);
> @@ -197,8 +205,23 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const
> fs_builder &bld,
>     }
>
>     vec4_result.type = dst.type;
> -   bld.MOV(dst, offset(vec4_result, bld,
> -                       (const_offset & 0xf) /
> type_sz(vec4_result.type)));
> +
> +   if (type_sz(dst.type) == 2) {
> +      /* 16-bit types need to be unshuffled as each pair of
> 16-bit components
> +       * is packed on a 32-bit compoment because we are using a
> 32-bit format
> +       * in the surface of uniform that is read by the sampler.
> +       * TODO: On BDW+ mark when an uniform has 16-bit type so we
> could setup a
> +       * surface format of 16-bit and use the 16-bit return
> format at the
> +       * sampler.
> +       */
> +      vec4_result.stride = 2;
> +      bld.MOV(dst, byte_offset(offset(vec4_result, bld,
> +                                      (const_offset & 0x7) / 4),
> +                               (const_offset & 0x7) / 2 % 2 * 2));
> +   } else {
> +      bld.MOV(dst, offset(vec4_result, bld,
> +                          (const_offset & 0xf) /
> type_sz(vec4_result.type)));
> +   }
>
>
> This seems overly complicated.  How about something like

> fs_reg dw = offset(vec4_result, bld, (const_offset & 0xf) / 4);
> switch (type_sz(dst.type)) {
> case 2:
>    shuffle_32bit_load_result_to_16bit_data(bld, dst, dw, 1);
>    bld.MOV(dst, subscript(dw, dst.type, (const_offset / 2) & 1));
>    break;
> case 4:
>    bld.MOV(dst, dw);
>    break;
> case 8:
>    shuffle_32bit_load_result_to_64bit_data(bld, dst, dw, 1);
>    break;
> default:
>    unreachable();
> }

This implementation it is really more clear. Tested and works perfectly
fine.

>  
>
>  }
>
>  /**
> diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> index a3861cd68e..00a4e29147 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -1381,12 +1381,18 @@
> fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
>     uint32_t simd_mode, rlen, mlen;
>     if (inst->exec_size == 16) {
>        mlen = 2;
> -      rlen = 8;
> +      if (type_sz(dst.type) == 2 && (devinfo->gen >= 9))
> +         rlen = 4;
> +      else
> +         rlen = 8;
>
>
> I'm not sure what I think of this.  We intentionally use a vec4 today
> instead of a single float

Re: [Mesa-dev] [PATCH] i965: Enable disk shader cache by default

2017-12-05 Thread Mark Janes
Jordan Justen  writes:

> On 2017-11-08 17:26:47, Timothy Arceri wrote:
>> Reviewed-by: Timothy Arceri 
>> 
>> Mark may want to consider adding some of the once a day type CI runs for 
>> this. For example running the test suite for two consecutive runs on the 
>> same build so that the second run uses the shader cache and also a 
>> second run the uses MESA_GLSL=cache_fb to force testing of the cache 
>> fallback path.
>
> Yeah. We discussed this previously, but I don't think it's been
> implemented yet. My opinion is that it could perhaps be a weekly test.

This automation is implemented now. It found the issues reported in
https://bugs.freedesktop.org/show_bug.cgi?id=103988

My opinion is that this test strategy is inadequate.  Adding a dimension
to the test matrix has high cost, especially when combined with other
dimensions of the test matrix (does shader cache need to be tested for
32/64 bit builds? For hswgt1/hswgt2/hswgt3e?).

Since 103988 is not hardware specific, and is not a regression, it's not
something that could only have been caught by CI.  I'm surprised to find
it at this point, considering piglit was used to validate the feature.

I'd be happy if there was at least a smoke test where complex programs
are cached, with the intent of exercising the shader cache mechanism
directly.  It doesn't have to be exhaustive to be effective.  Seems like
a good idea to at least directly test the issue that was fixed in
103988 tests.

> We also discussed a nir serialization test, similar to our current nir
> clone daily test. I don't think this is implemented yet either.
>
> -Jordan
>
>> 
>> On 09/11/17 11:58, Jordan Justen wrote:
>> > f9d5a7add42af5a2e4410526d1480a08f41317ae along with
>> > a16dc04ad51c32e5c7d136e4dd6273d983385d3f appears to have fixed the one
>> > known regression with shader cache. (Deus Ex instability.)
>> > 
>> > We should enable the shader cache by default to stabilize it before
>> > the next major Mesa release.
>> > 
>> > Signed-off-by: Jordan Justen 
>> > ---
>> >   docs/relnotes/17.4.0.html  | 2 +-
>> >   src/mesa/drivers/dri/i965/brw_disk_cache.c | 3 ---
>> >   2 files changed, 1 insertion(+), 4 deletions(-)
>> > 
>> > diff --git a/docs/relnotes/17.4.0.html b/docs/relnotes/17.4.0.html
>> > index f81b5bd62d3..48dcd5cce38 100644
>> > --- a/docs/relnotes/17.4.0.html
>> > +++ b/docs/relnotes/17.4.0.html
>> > @@ -44,7 +44,7 @@ Note: some of the new features are only available with 
>> > certain drivers.
>> >   
>> >   
>> >   
>> > -Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE 
>> > environment variable is set to "0" or "false"
>> > +Disk shader cache support for i965
>> >   
>> >   
>> >   Bug fixes
>> > diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c 
>> > b/src/mesa/drivers/dri/i965/brw_disk_cache.c
>> > index 853ea98af03..cd0524c5cbf 100644
>> > --- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
>> > +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
>> > @@ -420,9 +420,6 @@ void
>> >   brw_disk_cache_init(struct brw_context *brw)
>> >   {
>> >   #ifdef ENABLE_SHADER_CACHE
>> > -   if (env_var_as_boolean("MESA_GLSL_CACHE_DISABLE", true))
>> > -  return;
>> > -
>> >  char renderer[10];
>> >  MAYBE_UNUSED int len = snprintf(renderer, sizeof(renderer), 
>> > "i965_%04x",
>> >  brw->screen->deviceID);
>> > 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104119] radv: OpBitFieldInsert produces 0 with a loop counter for Insert

2017-12-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104119

--- Comment #3 from Samuel Pitoiset  ---
Something is wrong in the NIR.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 00/44] anv: SPV_KHR_16bit_storage/VK_KHR_16bit_storage for gen8+

2017-12-05 Thread Chema Casanova
El 05/12/17 a las 06:16, Jason Ekstrand escribió:
> A couple of notes:
>
>  1) I *think* I gave you enough reviews to land the UBO/SSBO part and
> the optimizations in 26-28.  If reviews are still missing anywhere,
> please let me know.  If not, let's try and get that part landed.

The series is almost ready to land, I have only pending to address your
feedback about use untyped_read for reading vec3 ssbos.

The only missing explicit R-b is that " [PATCH v4 28/44] i965/fs: Use
untyped_surface_read for 16-bit load_ssbo" and "[PATCH v4 23/44]
i965/fs: Enables 16-bit load_ubo with sampler" i've just answered your
review to confirm the R-b.

I expect to finish today vec3 ssbo and send the series to Jenkins before
landing, confirm your "pending" R-b, do a last rebase to master and ask
for a push.

>
>  2) I send out a patch to rewrite assign_constant_locations which I
> think should make it automatically handle 8 and 16-bit values as
> well.  I'd rather do that than more special casing if everything works
> out ok.

I'm testing this patch with 16-bits and make sure whatever is needed to
have 16-bit working.

>
>  3) I sent out a series of patches to enable pushing of UBOs in
> Vulkan.  If we're not careful, these will clash with 16bit storage as
> UBO support suddenly has to imply push constant support.  That said,
> I"m willing to wait at least a little while before landing them to let
> us get 16bit push constant support sorted out.  The UBO pushing
> patches give us a nice little performance boost but we're nowhere near
> a release and I don't want it blocking you.

That would be my next priority, so we would only have pending to land
the 16-bit input/output support to finish this extension.

Chema

> On Wed, Nov 29, 2017 at 6:07 PM, Jose Maria Casanova Crespo
> mailto:jmcasan...@igalia.com>> wrote:
>
> Hello,
>
> this is the V4 series for the implementation of the
> SPV_KHR_16bit_storage
> and VK_KHR_16bit_storage extensions on the anv vulkan driver, in
> addition
> to the GLSL and NIR support needed.
>
> The original series can be found here [1], the following v2 [2]
> and v3 [3].
>
> In short V4 includes the following:
>
>  * Reorder the series to enable features as they are implemented,
> the series
>    now enables first UBO and SSBO support, and then inputs/outputs and
>    finally push constants.
>  * Support the byte scattered read/write messages with different
> bit sizes
>    byte/word/dword.
>  * Refactor of the store_ssbo code and also fix stores when
> writemask was .yz
>  * Uses the sampler for load_ubo avoiding the initial
> implementation of
>    the series using byte_scattered_read.
>  * Addressed all the feedback provided by Jason and Topi on v3 review.
>
> This series is also available at:
>
> https://github.com/Igalia/mesa/tree/wip/VK_KHR_16bit_storage-rc4
> 
>
> The objective is to start landing part of this series, all
> feedback has been
> addressed for SSBO and UBO. But for input/outputs features it will
> probably
> need another iteration as was not completely reviewed. It is also
> needed
> to define the approach for push constants issues before of after
> landing
> the support with this implementation.
>
> Patches 1-5 and 8-17 have already been reviewed. Patch 7 was already
> reviewed but as it has changed too much i would appreciate another
> review. When patches until 25 or 28 are reviewed we could land
> UBOs and
> SSBOs support.
>
> Finally an updated overview of the patches:
>
> Patches 1-2 add 16-bit float, int and uint types to GLSL. This is
> needed because NIR uses GLSL types internally. We use the enums
> already defined at AMD_gpu_shader_half_float and NV_gpu_shader
> extensions. Patch 2 updates mesa/st, in order to avoid warnings for
> types not handled on a switch.
>
> Patches 3-6 add NIR support for those new GLSL 16-bit types,
> conversion opcodes, and rounding modes for float to half-float
> conversions.
>
> Patches 7-9 add the SPIR-V (SPV_KHR_16bit_storage) to NIR support.
>
> Patches 10-12 add general 16-bit support for i965. This includes
> handling of new types on several general purpose methods,
> update/remove some asserts.
>
> Patches 14-17 add support for 32 to 16-bit conversions for i965,
> including rounding mode opcodes (needed for float to half-float
> conversions), and an optimization that removes superfluous rounding
> mode sets.
>
> Patches 18-21 add and use two new messages: byte scattered read and
> write. Those were needed because untyped surface message has a fixed
> 32-bit write size. Those messages are used on the 16-bit support of
> store SSBO, load SSBO and load shared.
>
> Patch 22 adds helpers to allow un/shuffle 16-bit components in 32-

Re: [Mesa-dev] [PATCH 3/8] glapi/check_table: Remove 'extern "C"' block

2017-12-05 Thread Dylan Baker
Quoting Emil Velikov (2017-12-05 07:36:25)
> On 4 December 2017 at 23:57, Dylan Baker  wrote:
> > Quoting Emil Velikov (2017-11-23 11:04:34)
> >> On 20 November 2017 at 23:12, Dylan Baker  wrote:
> >> > This doesn't actually accomplish what it's meant to do, as extern C
> >> > doesn't undefine __cplusplus, so the included headers define a template
> >> > (because __cplusplus is defined), but then that code is in an 'extern
> >> > "C"' block, and explosion.
> >> >
> >> The commit does sound a bit off, admittedly I don't fully grok what
> >> you're trying to say.
> >>
> >> The extern "C" { } construct annotates anything within as if it were a
> >> normal C code. Thus functions will have the C linkage (otherwise
> >> they'll have the C++ mangling) and structs are using the C rules.
> >> So if you have a C++ header further down the chain it will be
> >> considered as C and hell will break loose.
> >>
> >> This patch is correct, just sent a fix for glapitable.h, while glapi.h
> >> was handled with d38839a498b38b745f5bdf6b96fbafa2792670be.
> >>
> >> As-is "struct _glapi_table" will be interpret as C++ one, which may
> >> not have the same guarantees about sizeof and pointer arithmetic that
> >> we're using in the test.
> >>
> >> Hope the above provides some inspiration towards a better commit message.
> >>
> >> Thanks
> >> Emil
> >
> > So here's the problem I run into:
> >
> > In file included from ../include/c99_compat.h:28:0,
> >  from ../src/util/macros.h:29,
> >  from ../src/mapi/glapi/glapi.h:47,
> >  from ../src/mapi/glapi/tests/check_table.cpp:28:
> > ../include/no_extern_c.h:47:1: error: template with C linkage
> >  template class _IncludeInsideExternCNotPortable;
> >  ^~~~
> > [54/93] Compiling C++ object 'src/gtest/gtest@sta/src_gtest-all.cc.o'.
> >
> > So, my commit message still makes sense to me, since you can't put a 
> > template in
> > an extern "C" block, and that template is guarded by __cplusplus, which is 
> > still
> > defined even in an extern "C" block.
> >
> This is one symptom, of the issue described before. The code in
> c99_compat.h was explicitly added by Jose to make the issue obvious.
> 
> > Do you have a commit message in mind that is better?
> 
> Feel free to reuse as much or as little of my explanation. Borrowing
> from analogous fixes also works ;-)
> Some examples:
> 237dcb4aa7c39c59bfd225ae3d73caf709be216d
> a177a13033cf9356eb26e8757055037a54268a18
> 36ad8265489fc66ab45b9b74dafa353a93bdb03b
> 
> I'm going through 4/8 - seems like there's a bunch of other bugs around :-\
> Feel free to push the remainder of the series with the nitpicks addressed.
> 
> Thanks
> Emil

The APPLE extensions one?


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104119] radv: OpBitFieldInsert produces 0 with a loop counter for Insert

2017-12-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104119

--- Comment #4 from Samuel Pitoiset  ---
Possible fix:

diff --git i/src/amd/vulkan/radv_shader.c w/src/amd/vulkan/radv_shader.c
index 4a3fdfa80e..0b19d23fa2 100644
--- i/src/amd/vulkan/radv_shader.c
+++ w/src/amd/vulkan/radv_shader.c
@@ -66,6 +66,7 @@ static const struct nir_shader_compiler_options nir_options =
{
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_ffma = true,
+   .lower_bitfield_insert = true,
.max_unroll_iterations = 32
 };

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 103126] glthread doesn't offload anything in Witcher 2

2017-12-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103126

--- Comment #2 from Alexander Tsoy  ---
I had the same problem with other eON ports: Spec Ops: The Line and Overlord at
least. It's turned out that it's due to disabled assembly in my 32-bit mesa
build (--disable-asm). Not sure if it worth fixing this. Leaving the bug open
for developers to decide.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] meson: Fix building gallium media targets with gallium-xlib glx

2017-12-05 Thread Dylan Baker
Signed-off-by: Dylan Baker 
---
 meson.build | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index 3e8ea7d17e0..bf6bd551f08 100644
--- a/meson.build
+++ b/meson.build
@@ -1107,9 +1107,9 @@ if with_platform_x11
 dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1')
 dep_xxf86vm = dependency('xxf86vm', required : false)
   endif
-  if with_any_vk or with_glx == 'dri' or
-  (with_gallium_vdpau or with_gallium_xvmc or with_gallium_omx or
-   with_gallium_xa)
+  if (with_any_vk or with_glx == 'dri' or
+   (with_gallium_vdpau or with_gallium_xvmc or with_gallium_omx or
+with_gallium_xa))
 dep_xcb = dependency('xcb')
 dep_x11_xcb = dependency('x11-xcb')
   endif
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/miptree: Use the tiling from the modifier instead of the BO

2017-12-05 Thread Kristian Høgsberg
On Tue, Dec 5, 2017 at 8:49 AM, Jason Ekstrand  wrote:
> On Tue, Dec 5, 2017 at 8:23 AM, Kristian Høgsberg 
> wrote:
>>
>> On Tue, Dec 5, 2017 at 7:57 AM, Jason Ekstrand 
>> wrote:
>> > On Tue, Dec 5, 2017 at 1:22 AM, Daniel Stone 
>> > wrote:
>> >>
>> >> Hi,
>> >>
>> >> On 18 November 2017 at 00:10, Jason Ekstrand 
>> >> wrote:
>> >> > This fixes a bug where we were taking the tiling from the BO
>> >> > regardless
>> >> > of what the modifier said.  When we got images in from Vulkan where
>> >> > it
>> >> > doesn't set the tiling on the BO, we would treat them as linear even
>> >> > though the modifier expressly said to treat it as Y-tiled.
>> >>
>> >> For some reason I thought Ken had already reviewed this and it landed,
>> >> until Kristian mentioned last night.
>> >
>> >
>> > There's been some discussion about what the right thing to do is here.
>> > I've
>> > got a patch (which is now landed) which will make us set the tiling from
>> > Vulkan just like GL does.  It's rather annoying but I think that's
>> > marginally better.
>>
>> I don't know that it's better - it reinforces the notion that you have
>> to make the kernel-side tiling match for the dma-buf import extension
>> to work. I think it'd be better to land these patches (btw, Rb: me
>> (can you even do parenthetical Rbs?))
>
>
> I'll allow it. :)
>
>>
>> and call set-tiling in mesa.
>
>
> Yeah, I think that's reasonable.  Really, this should only be a problem if
> we have a bunch of users trying to use the same BO with different modifiers.
> It can happen in theory (immagine two images in the same BO, one X and one
> Y) but it's a pretty crazy case.

It's not a complete solution, of course, but at least we're kicking
the can down the road of increasingly esoteric use cases.

>>
>> The
>> assumption being that if the tiling doesn't match the modifier, then
>> maybe the producer didn't care about kernel tiling? Alternatively,
>> could we set bo->tiling = INCONSISTENT or such in mesa and then use
>> that to avoid the gtt map paths?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/miptree: Use the tiling from the modifier instead of the BO

2017-12-05 Thread Kristian Høgsberg
On Tue, Dec 5, 2017 at 9:43 AM, Kristian Høgsberg  wrote:
> On Tue, Dec 5, 2017 at 8:49 AM, Jason Ekstrand  wrote:
>> On Tue, Dec 5, 2017 at 8:23 AM, Kristian Høgsberg 
>> wrote:
>>>
>>> On Tue, Dec 5, 2017 at 7:57 AM, Jason Ekstrand 
>>> wrote:
>>> > On Tue, Dec 5, 2017 at 1:22 AM, Daniel Stone 
>>> > wrote:
>>> >>
>>> >> Hi,
>>> >>
>>> >> On 18 November 2017 at 00:10, Jason Ekstrand 
>>> >> wrote:
>>> >> > This fixes a bug where we were taking the tiling from the BO
>>> >> > regardless
>>> >> > of what the modifier said.  When we got images in from Vulkan where
>>> >> > it
>>> >> > doesn't set the tiling on the BO, we would treat them as linear even
>>> >> > though the modifier expressly said to treat it as Y-tiled.
>>> >>
>>> >> For some reason I thought Ken had already reviewed this and it landed,
>>> >> until Kristian mentioned last night.
>>> >
>>> >
>>> > There's been some discussion about what the right thing to do is here.
>>> > I've
>>> > got a patch (which is now landed) which will make us set the tiling from
>>> > Vulkan just like GL does.  It's rather annoying but I think that's
>>> > marginally better.
>>>
>>> I don't know that it's better - it reinforces the notion that you have
>>> to make the kernel-side tiling match for the dma-buf import extension
>>> to work. I think it'd be better to land these patches (btw, Rb: me
>>> (can you even do parenthetical Rbs?))
>>
>>
>> I'll allow it. :)
>>
>>>
>>> and call set-tiling in mesa.
>>
>>
>> Yeah, I think that's reasonable.  Really, this should only be a problem if
>> we have a bunch of users trying to use the same BO with different modifiers.
>> It can happen in theory (immagine two images in the same BO, one X and one
>> Y) but it's a pretty crazy case.
>
> It's not a complete solution, of course, but at least we're kicking
> the can down the road of increasingly esoteric use cases.
>
>>>
>>> The
>>> assumption being that if the tiling doesn't match the modifier, then
>>> maybe the producer didn't care about kernel tiling? Alternatively,
>>> could we set bo->tiling = INCONSISTENT or such in mesa and then use
>>> that to avoid the gtt map paths?

Actually, for compressed textures, you already must have a way to deal
with glTexSubImage2D and similar without falling back to GTT maps. Can
you just handle miptrees with mismatched modifiers (or perhaps just
any valid modifier) the same way?

Kristian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radv: enable lowering of nir_op_bitfield_extract

2017-12-05 Thread Samuel Pitoiset
This instruction should also be lowered correctly.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_shader.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 0b19d23fa2..044bcd0641 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -67,6 +67,7 @@ static const struct nir_shader_compiler_options nir_options = 
{
.lower_extract_word = true,
.lower_ffma = true,
.lower_bitfield_insert = true,
+   .lower_bitfield_extract = true,
.max_unroll_iterations = 32
 };
 
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radv: enable lowering of nir_op_bitfield_insert

2017-12-05 Thread Samuel Pitoiset
Otherwise it's replaced by
"vec1 32 ssa_108 = load_const (0x /* 0.00 */)", which
looks clearly wrong.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104119
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_shader.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 4a3fdfa80e..0b19d23fa2 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -66,6 +66,7 @@ static const struct nir_shader_compiler_options nir_options = 
{
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_ffma = true,
+   .lower_bitfield_insert = true,
.max_unroll_iterations = 32
 };
 
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104119] radv: OpBitFieldInsert produces 0 with a loop counter for Insert

2017-12-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104119

--- Comment #5 from Samuel Pitoiset  ---
Patch is on the list.

https://patchwork.freedesktop.org/series/34930/

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/st: move cloning of NIR shader for compute

2017-12-05 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Dec 5, 2017 at 4:05 PM, Rob Clark  wrote:
> Since in the NIR case, driver takes ownership of the NIR shader, we need
> to clone what is passed to the driver.  Normally this is done as part of
> creating the shader variant (where is clone is anyways needed).  But
> compute shaders have no variants, so we were cloning earlier.
>
> The problem is that after the NIR linking optimizations, we ended up
> cloning *before* all the lowering passes where done.
>
> So move this into st_get_cp_variant(), to make compute shaders work more
> like other shader stages.
>
> Signed-off-by: Rob Clark 
> ---
>  src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +-
>  src/mesa/state_tracker/st_program.c   | 5 -
>  2 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
> b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> index 5d18e7b62bf..36adf55cd45 100644
> --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
> @@ -431,7 +431,7 @@ set_st_program(struct gl_program *prog,
>stcp = (struct st_compute_program *)prog;
>stcp->shader_program = shader_program;
>stcp->tgsi.ir_type = PIPE_SHADER_IR_NIR;
> -  stcp->tgsi.prog = nir_shader_clone(NULL, nir);
> +  stcp->tgsi.prog = nir;
>break;
> default:
>unreachable("unknown shader stage");
> diff --git a/src/mesa/state_tracker/st_program.c 
> b/src/mesa/state_tracker/st_program.c
> index 5c0a58104fc..258f5e47cbe 100644
> --- a/src/mesa/state_tracker/st_program.c
> +++ b/src/mesa/state_tracker/st_program.c
> @@ -1659,7 +1659,10 @@ st_get_cp_variant(struct st_context *st,
>v = CALLOC_STRUCT(st_basic_variant);
>if (v) {
>   /* fill in new variant */
> - v->driver_shader = pipe->create_compute_state(pipe, tgsi);
> + struct pipe_compute_state cs = *tgsi;
> + if (tgsi->ir_type == PIPE_SHADER_IR_NIR)
> +cs.prog = nir_shader_clone(NULL, tgsi->prog);;
> + v->driver_shader = pipe->create_compute_state(pipe, &cs);
>   v->key = key;
>
>   /* insert into list */
> --
> 2.13.6
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 104035] When will the egl introp for vaapi be implemented

2017-12-05 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104035

Vedran Miletić  changed:

   What|Removed |Added

 CC||ved...@miletic.net

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Pohjolainen, Topi
On Tue, Dec 05, 2017 at 10:26:33AM +, Rogovin, Kevin wrote:
> Hi,
> 
> 
> >> Here are my comments of the patch posted:
> >> 
> >>  1.  it is essentially replication and moving around of the code of the 
> >> patch series posted earlier but missing various
> >>   important bits: preventing the sampler from using the auxiliary 
> >> buffer (this requires to modify surface state
> >>   sent in brw_wm_surfaces.c). It also does not cover blorp 
> >> sufficiently (blorp might read from an ASTC5x5
> >>   and there are more paths in blorp than blorp_surf_for_miptree() that 
> >> sample from surfaces.
> >> 
> 
> >Can you explain both more in detail? Resolves done in
> >brw_predraw_resolve_inputs() mean that there is nothing interesting in the 
> >aux buffers and surface setup won't therefore enable auxiliary for texture 
> >surfaces.
> 
> That there is nothing interesting is irrelevant to the sampler if the 
> SURFACE_STATE fed includes the auxiliary buffer, thus when one sets up the 
> SURFACE_STATE for sampler, the auxiliary buffer cannot be mentioned in the 
> GPU command; The sampler will always try to read the auxiliary buffer if it 
> is given the opportunity to do so. Indeed, it is quite feasible that less 
> bandwidth is consumed if the sampler is given the chance to read an auxiliary 
> buffer in place of the buffer; as such even if the surface is resolved one 
> may wish to feed the sampler the auxiliary buffer. Indeed, for HiZ, i965 
> programs to use the HiZ auxiliary buffer even if the depth buffer is fully 
> resolved (see inte_mipmap_tree_sample_with_hiz() in intel_mipmap_tree.c).
> 
> > In case of blorp, as far as I know all operations sampling something should 
> > go thru blorp_surf_for_miptree(). Can you point out cases that don't?
> 
> Blorp is used in more than blorp_surf_for_miptree(), for example implementing 
> GetTexImage(). Indeed, it is possible for blorp to sample from an ASTC5x5 
> (you can see this handled in the patch series I posted). I chose the hammer 
> that the default is to just assume blorp is going to access auxiliary buffers 
> unless a flag is set when the caller knows that blorp is going to sample from 
> an astc5x5 (against see my patch series).
> 
> >Right. In the case of sampling both aux and astc5x5 in the same draw cycle 
> >the only thing to do is to disable aux. With my question of direction I 
> >meant the texture 
> > cache flush between two cycles. Do we need to flush in both cases
> > 1) ASTC5x5 in first cycle and AUX in the following
> > 2) AUX in first cycle and ASTC5x5 in the following
> 
> YES we need to flush in both cases. What is happening is that the sampler 
> hardware is bugged. Let us suppose it was bugged in only 1 direction, take 1. 
> Then if the sampler first samples from an ASTC5x5 then an AUX it would not 
> hang, but the other way it would. However, if there are multiple draws in 
> flight where one samples from an ASTC5x5 and the other does not, the command 
> buffer order gives ZERO guarantee that the sampler will sample in that order 
> because fragments get executed out-of-order even across draw calls even 
> within a subslice (this is why sendc is needed at end of shader in GEN).
> 

I need to clarify this bit a little more. In the current setup we have only
one draw cycle in flight per context that uses sample engine. There may be
blitter calls in parallel (although I'm not sure) but they don't use sampling
engine anyway.

The draw cycle itself may have multiple draws programmed into it but as they
all are based on the same surface setup there is naturally no flushing problem.
Surfaces with auxiliary would be resolved before the draw and programmed
without auxiliary buffer.

Are you saying that this bug extends over hardware context?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Jason Ekstrand
On Tue, Dec 5, 2017 at 10:17 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Tue, Dec 05, 2017 at 10:26:33AM +, Rogovin, Kevin wrote:
> > Hi,
> >
> >
> > >> Here are my comments of the patch posted:
> > >>
> > >>  1.  it is essentially replication and moving around of the code of
> the patch series posted earlier but missing various
> > >>   important bits: preventing the sampler from using the auxiliary
> buffer (this requires to modify surface state
> > >>   sent in brw_wm_surfaces.c). It also does not cover blorp
> sufficiently (blorp might read from an ASTC5x5
> > >>   and there are more paths in blorp than blorp_surf_for_miptree()
> that sample from surfaces.
> > >>
> >
> > >Can you explain both more in detail? Resolves done in
> > >brw_predraw_resolve_inputs() mean that there is nothing interesting in
> the aux buffers and surface setup won't therefore enable auxiliary for
> texture surfaces.
> >
> > That there is nothing interesting is irrelevant to the sampler if the
> SURFACE_STATE fed includes the auxiliary buffer, thus when one sets up the
> SURFACE_STATE for sampler, the auxiliary buffer cannot be mentioned in the
> GPU command; The sampler will always try to read the auxiliary buffer if it
> is given the opportunity to do so. Indeed, it is quite feasible that less
> bandwidth is consumed if the sampler is given the chance to read an
> auxiliary buffer in place of the buffer; as such even if the surface is
> resolved one may wish to feed the sampler the auxiliary buffer. Indeed, for
> HiZ, i965 programs to use the HiZ auxiliary buffer even if the depth buffer
> is fully resolved (see inte_mipmap_tree_sample_with_hiz() in
> intel_mipmap_tree.c).
>

We do have code which checks for the resolve state and disables the
auxiliary buffer.  However, I don't like relying on it for correctness as
that's bitten us before.  I think it'd be better to check the W/A state for
ASTC5x5, assert that everything is resolved, and manually disable aux in
that case.


> > > In case of blorp, as far as I know all operations sampling something
> should go thru blorp_surf_for_miptree(). Can you point out cases that don't?
> >
> > Blorp is used in more than blorp_surf_for_miptree(), for example
> implementing GetTexImage(). Indeed, it is possible for blorp to sample from
> an ASTC5x5 (you can see this handled in the patch series I posted). I chose
> the hammer that the default is to just assume blorp is going to access
> auxiliary buffers unless a flag is set when the caller knows that blorp is
> going to sample from an astc5x5 (against see my patch series).
>

This isn't true.  100% of the intel_mipmap_tree -> blorp_surf translations
are handled by that function.  It's a perfectly reasonable place to handle
these things.  It could also be handled in genX(blorp_exec) if that makes
someone more comfortable.

> >Right. In the case of sampling both aux and astc5x5 in the same draw
> cycle the only thing to do is to disable aux. With my question of direction
> I meant the texture
> > > cache flush between two cycles. Do we need to flush in both cases
> > > 1) ASTC5x5 in first cycle and AUX in the following
> > > 2) AUX in first cycle and ASTC5x5 in the following
> >
> > YES we need to flush in both cases. What is happening is that the
> sampler hardware is bugged. Let us suppose it was bugged in only 1
> direction, take 1. Then if the sampler first samples from an ASTC5x5 then
> an AUX it would not hang, but the other way it would. However, if there are
> multiple draws in flight where one samples from an ASTC5x5 and the other
> does not, the command buffer order gives ZERO guarantee that the sampler
> will sample in that order because fragments get executed out-of-order even
> across draw calls even within a subslice (this is why sendc is needed at
> end of shader in GEN).
> >
>
> I need to clarify this bit a little more. In the current setup we have only
> one draw cycle in flight per context that uses sample engine. There may be
> blitter calls in parallel (although I'm not sure) but they don't use
> sampling
> engine anyway.
>
> The draw cycle itself may have multiple draws programmed into it but as
> they
> all are based on the same surface setup there is naturally no flushing
> problem.
> Surfaces with auxiliary would be resolved before the draw and programmed
> without auxiliary buffer.
>

The problem is that a single invalidate doesn't actually cause a
synchronization point in the rendering operation.  All it does is torch the
texture cache and it does so immediately and in parallel with currently
active rendering.  If you can't have ASTC5x5 in the texture cache with a
CCS_E surface, then we need to do a CS stall to ensure that the previous
draw is complete before we invalidate.  Otherwise, if the draw with ASTC5x5
is still in-flight, the texture cache will immediately start to get
populated with ASTC5x5 data again.
___
mesa-dev mailing list

Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Rogovin, Kevin

> Are you saying that this bug extends over hardware context?

Different HW contexts imply different execbuffer2 ioctl's. The kernel inserts a 
full blown flush of everything after (or before, I cannot remember) each 
execbuffer2 call. This way there is context isolation in HW buggineness.

-Kevin
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Rogovin, Kevin
Hi,

>This isn't true.  100% of the intel_mipmap_tree -> blorp_surf translations are 
>handled by that function. 
>  It's a perfectly reasonable place to handle these things.  It could also be 
> handled in genX(blorp_exec) if that makes someone more comfortable.

This is where I placed the ASTC enumeration setting, in genX(blorp_exec). I 
added a boolean signaling if the caller to blorp believed it would sample from 
an ASTC, if it did it sets the enum as ASTC otherwise as AUX.

I confess I am not super familiar with blorp, but as far as I can tell, the 
only way for blorp to read an ASTC is for GetTexImage/GetTexSubImage since an 
ASTC surface cannot be used for an FBO.

> The problem is that a single invalidate doesn't actually cause a 
> synchronization point in the rendering operation.  All it does is torch the 
> texture cache and it 
> does so immediately and in parallel with currently active rendering.  If you 
> can't have ASTC5x5 in the texture cache with a CCS_E surface, then we need to
> do a CS stall to ensure that the previous draw is complete before we 
> invalidate.  Otherwise, if the draw with ASTC5x5 is still in-flight, the 
> texture cache will 
> immediately start to get populated with ASTC5x5 data again.

Ahh futz, I forget to add that stall.. by luck the guilty application worked 
anyways (when I first wrote the work I did intel_batchbuffer_flush() and 
relaxed it down to texture invalidate).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] st/mesa: swizzle argument when there's a vector size mismatch

2017-12-05 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Dec 5, 2017 at 5:26 AM, Ilia Mirkin  wrote:
> GLSL IR operation arguments can sometimes have an implicit swizzle as a
> result of a vector arg and a scalar arg, where the scalar argument is
> implicitly expanded to the size of the vector argument.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103955
> Signed-off-by: Ilia Mirkin 
> ---
>
> v1 -> v2:
>  - fix typo that caused compilation failure
>  - remove ir_binop_interpolate_at_* from getting the resizing treatment
>
> I went through all the ir_binops and ir_triops, and those two seem like the
> only ones with weird arguments. I think.
>
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 25 -
>  1 file changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 8eeae86dabb..740c197c74b 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -1341,10 +1341,33 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* 
> ir, st_src_reg *op)
> st_dst_reg result_dst;
>
> int vector_elements = ir->operands[0]->type->vector_elements;
> -   if (ir->operands[1]) {
> +   if (ir->operands[1] &&
> +   ir->operation != ir_binop_interpolate_at_offset &&
> +   ir->operation != ir_binop_interpolate_at_sample) {
> +  st_src_reg *swz_op = NULL;
> +  if (vector_elements > ir->operands[1]->type->vector_elements) {
> + assert(ir->operands[1]->type->vector_elements == 1);
> + swz_op = &op[1];
> +  } else if (vector_elements < ir->operands[1]->type->vector_elements) {
> + assert(ir->operands[0]->type->vector_elements == 1);
> + swz_op = &op[0];
> +  }
> +  if (swz_op) {
> + uint16_t swizzle_x = GET_SWZ(swz_op->swizzle, 0);
> + swz_op->swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x,
> + swizzle_x, swizzle_x);
> +  }
>vector_elements = MAX2(vector_elements,
>   ir->operands[1]->type->vector_elements);
> }
> +   if (ir->operands[2] &&
> +   ir->operands[2]->type->vector_elements != vector_elements) {
> +  /* This can happen with ir_triop_lrp, i.e. glsl mix */
> +  assert(ir->operands[2]->type->vector_elements == 1);
> +  uint16_t swizzle_x = GET_SWZ(op[2].swizzle, 0);
> +  op[2].swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x,
> +swizzle_x, swizzle_x);
> +   }
>
> this->result.file = PROGRAM_UNDEFINED;
>
> --
> 2.13.6
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] mesa: s/%u/%d/ in _mesa_error() call in check_layer()

2017-12-05 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Tue, Dec 5, 2017 at 6:00 PM, Brian Paul  wrote:
> The layer parameter is signed.  Fixes the error message seen when
> running the arb_texture_multisample-errors test which checks a
> negative layer value.
> ---
>  src/mesa/main/fbobject.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
> index f7702f1..30287ab 100644
> --- a/src/mesa/main/fbobject.c
> +++ b/src/mesa/main/fbobject.c
> @@ -3189,8 +3189,7 @@ check_layer(struct gl_context *ctx, GLenum target, 
> GLint layer,
>  * and layer is negative."
>  */
> if (layer < 0) {
> -  _mesa_error(ctx, GL_INVALID_VALUE,
> -  "%s(layer %u < 0)", caller, layer);
> +  _mesa_error(ctx, GL_INVALID_VALUE, "%s(layer %d < 0)", caller, layer);
>return false;
> }
>
> --
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

2017-12-05 Thread Jason Ekstrand
On Tue, Dec 5, 2017 at 10:34 AM, Rogovin, Kevin 
wrote:

> Hi,
>
> >This isn't true.  100% of the intel_mipmap_tree -> blorp_surf
> translations are handled by that function.
> >  It's a perfectly reasonable place to handle these things.  It could
> also be handled in genX(blorp_exec) if that makes someone more comfortable.
>
> This is where I placed the ASTC enumeration setting, in genX(blorp_exec).
> I added a boolean signaling if the caller to blorp believed it would sample
> from an ASTC, if it did it sets the enum as ASTC otherwise as AUX.
>
> I confess I am not super familiar with blorp, but as far as I can tell,
> the only way for blorp to read an ASTC is for GetTexImage/GetTexSubImage
> since an ASTC surface cannot be used for an FBO.
>

I'd have to think about it harder but even those are not likely to actually
get ASTC decode.  Maybe for some sort of decompression thing but if you're
doing GetCompressedTexImage, it'll probably turn into a blorp_copy which
will use R32G32B32A32_UINT.


> > The problem is that a single invalidate doesn't actually cause a
> synchronization point in the rendering operation.  All it does is torch the
> texture cache and it
> > does so immediately and in parallel with currently active rendering.  If
> you can't have ASTC5x5 in the texture cache with a CCS_E surface, then we
> need to
> > do a CS stall to ensure that the previous draw is complete before we
> invalidate.  Otherwise, if the draw with ASTC5x5 is still in-flight, the
> texture cache will
> > immediately start to get populated with ASTC5x5 data again.
>
> Ahh futz, I forget to add that stall.. by luck the guilty application
> worked anyways (when I first wrote the work I did intel_batchbuffer_flush()
> and relaxed it down to texture invalidate).
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] st/mesa: remove unneeded #include in st_format.h

2017-12-05 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Tue, Dec 5, 2017 at 5:59 PM, Brian Paul  wrote:
> ---
>  src/mesa/state_tracker/st_format.h | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/src/mesa/state_tracker/st_format.h 
> b/src/mesa/state_tracker/st_format.h
> index 3dd9c10..466b5d0 100644
> --- a/src/mesa/state_tracker/st_format.h
> +++ b/src/mesa/state_tracker/st_format.h
> @@ -33,7 +33,6 @@
>  #include "main/formats.h"
>  #include "main/glheader.h"
>
> -#include "pipe/p_defines.h"
>  #include "pipe/p_format.h"
>
>  #ifdef __cplusplus
> --
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 23/44] i965/fs: Enables 16-bit load_ubo with sampler

2017-12-05 Thread Jason Ekstrand
On Tue, Dec 5, 2017 at 9:08 AM, Chema Casanova 
wrote:

> El 30/11/17 a las 23:58, Jason Ekstrand escribió:
> > On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo
> > mailto:jmcasan...@igalia.com>> wrote:
> >
> > load_ubo is using 32-bit loads as uniforms surfaces have a 32-bit
> > surface format defined. So when reading 16-bit components with the
> > sampler we need to unshuffle two 16-bit components from each 32-bit
> > component.
> >
> > Using the sampler avoids the use of the byte_scattered_read message
> > that needs one message for each component and is supposed to be
> > slower.
> >
> > In the case of SKL+ we take advantage of a hardware feature that
> > automatically defines a channel mask based on the rlen value, so on
> > SKL+ we only use half of the registers without using a header in the
> > payload.
> > ---
> >  src/intel/compiler/brw_fs.cpp   | 31
> > +++
> >  src/intel/compiler/brw_fs_generator.cpp | 10 --
> >  2 files changed, 35 insertions(+), 6 deletions(-)
> >
> > diff --git a/src/intel/compiler/brw_fs.cpp
> > b/src/intel/compiler/brw_fs.cpp
> > index 1ca4d416b2..9c543496ba 100644
> > --- a/src/intel/compiler/brw_fs.cpp
> > +++ b/src/intel/compiler/brw_fs.cpp
> > @@ -184,9 +184,17 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const
> > fs_builder &bld,
> >  * a double this means we are only loading 2 elements worth of
> > data.
> >  * We also want to use a 32-bit data type for the dst of the
> > load operation
> >  * so other parts of the driver don't get confused about the
> > size of the
> > -* result.
> > +* result. On the case of 16-bit data we only need half of the
> > 32-bit
> > +* components on SKL+ as we take advance of using message
> > return size to
> > +* define an xy channel mask.
> >  */
> > -   fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> > +   fs_reg vec4_result;
> > +   if (type_sz(dst.type) == 2 && (devinfo->gen >= 9)) {
> > +  vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
> > +  vec4_result = retype(vec4_result, BRW_REGISTER_TYPE_HF);
> > +   } else {
> > +  vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> > +   }
> >
> > fs_inst *inst =
> > bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
> >  vec4_result, surf_index, vec4_offset);
> > inst->size_written = 4 *
> > vec4_result.component_size(inst->exec_size);
> > @@ -197,8 +205,23 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const
> > fs_builder &bld,
> > }
> >
> > vec4_result.type = dst.type;
> > -   bld.MOV(dst, offset(vec4_result, bld,
> > -   (const_offset & 0xf) /
> > type_sz(vec4_result.type)));
> > +
> > +   if (type_sz(dst.type) == 2) {
> > +  /* 16-bit types need to be unshuffled as each pair of
> > 16-bit components
> > +   * is packed on a 32-bit compoment because we are using a
> > 32-bit format
> > +   * in the surface of uniform that is read by the sampler.
> > +   * TODO: On BDW+ mark when an uniform has 16-bit type so we
> > could setup a
> > +   * surface format of 16-bit and use the 16-bit return
> > format at the
> > +   * sampler.
> > +   */
> > +  vec4_result.stride = 2;
> > +  bld.MOV(dst, byte_offset(offset(vec4_result, bld,
> > +  (const_offset & 0x7) / 4),
> > +   (const_offset & 0x7) / 2 % 2 * 2));
> > +   } else {
> > +  bld.MOV(dst, offset(vec4_result, bld,
> > +  (const_offset & 0xf) /
> > type_sz(vec4_result.type)));
> > +   }
> >
> >
> > This seems overly complicated.  How about something like
>
> > fs_reg dw = offset(vec4_result, bld, (const_offset & 0xf) / 4);
> > switch (type_sz(dst.type)) {
> > case 2:
> >shuffle_32bit_load_result_to_16bit_data(bld, dst, dw, 1);
> >bld.MOV(dst, subscript(dw, dst.type, (const_offset / 2) & 1));
> >break;
> > case 4:
> >bld.MOV(dst, dw);
> >break;
> > case 8:
> >shuffle_32bit_load_result_to_64bit_data(bld, dst, dw, 1);
> >break;
> > default:
> >unreachable();
> > }
>
> This implementation it is really more clear. Tested and works perfectly
> fine.
>
> >
> >
> >  }
> >
> >  /**
> > diff --git a/src/intel/compiler/brw_fs_generator.cpp
> > b/src/intel/compiler/brw_fs_generator.cpp
> > index a3861cd68e..00a4e29147 100644
> > --- a/src/intel/compiler/brw_fs_generator.cpp
> > +++ b/src/intel/compiler/brw_fs_generator.cpp
> > @@ -1381,12 +1381,18 @@
> > fs_generator::generate_varying_pull_constant_load_gen7(fs_inst
> *inst,
> > uint32_t simd_mode, rlen, mlen;
> > if (inst->e

[Mesa-dev] [PATCH 2/6] radeonsi: print the buffer list for CHECK_VM

2017-12-05 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_debug.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index 22609b7..385ce39 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -1069,20 +1069,21 @@ void si_check_vm_faults(struct r600_common_context *ctx,
fprintf(f, "Last apitrace call: %u\n\n",
sctx->apitrace_call_number);
 
switch (ring) {
case RING_GFX: {
struct u_log_context log;
u_log_context_init(&log);
 
si_log_draw_state(sctx, &log);
si_log_compute_state(sctx, &log);
+   si_log_cs(sctx, &log, true);
 
u_log_new_page_print(&log, f);
u_log_context_destroy(&log);
break;
}
case RING_DMA:
si_dump_dma(sctx, saved, f);
break;
 
default:
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] winsys/amdgpu: make IBs use read-only memory

2017-12-05 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 089a358..63cd632 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -642,20 +642,21 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys 
*ws, struct amdgpu_ib *ib,
   buffer_size = MAX2(buffer_size, 8 * 1024 * 4);
   break;
default:
   unreachable("unhandled IB type");
}
 
pb = ws->base.buffer_create(&ws->base, buffer_size,
ws->info.gart_page_size,
RADEON_DOMAIN_GTT,
RADEON_FLAG_NO_INTERPROCESS_SHARING |
+   RADEON_FLAG_READ_ONLY |
(ring_type == RING_GFX ||
 ring_type == RING_COMPUTE ||
 ring_type == RING_DMA ?
RADEON_FLAG_GTT_WC : 0));
if (!pb)
   return false;
 
mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE);
if (!mapped) {
   pb_reference(&pb, NULL);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] radeonsi: use a separate allocator for fine fences

2017-12-05 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_pipe_common.c | 7 +++
 src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
 src/gallium/drivers/radeonsi/si_fence.c   | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index d85f9f0..9090e65 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -447,20 +447,25 @@ bool si_common_context_init(struct r600_common_context 
*rctx,
rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
  0, PIPE_USAGE_STREAM, 0);
if (!rctx->b.stream_uploader)
return false;
 
rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
 0, PIPE_USAGE_DEFAULT, 0);
if (!rctx->b.const_uploader)
return false;
 
+   rctx->cached_gtt_allocator = u_upload_create(&rctx->b, 16 * 1024,
+0, PIPE_USAGE_STAGING, 0);
+   if (!rctx->cached_gtt_allocator)
+   return false;
+
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
return false;
 
if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & 
DBG(NO_ASYNC_DMA))) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
   r600_flush_dma_ring,
   rctx);
rctx->dma.flush = r600_flush_dma_ring;
}
@@ -491,20 +496,22 @@ void si_common_context_cleanup(struct r600_common_context 
*rctx)
rctx->ws->cs_destroy(rctx->gfx.cs);
if (rctx->dma.cs)
rctx->ws->cs_destroy(rctx->dma.cs);
if (rctx->ctx)
rctx->ws->ctx_destroy(rctx->ctx);
 
if (rctx->b.stream_uploader)
u_upload_destroy(rctx->b.stream_uploader);
if (rctx->b.const_uploader)
u_upload_destroy(rctx->b.const_uploader);
+   if (rctx->cached_gtt_allocator)
+   u_upload_destroy(rctx->cached_gtt_allocator);
 
slab_destroy_child(&rctx->pool_transfers);
slab_destroy_child(&rctx->pool_transfers_unsync);
 
if (rctx->allocator_zeroed_memory) {
u_suballocator_destroy(rctx->allocator_zeroed_memory);
}
rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL);
rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
r600_resource_reference(&rctx->eop_bug_scratch, NULL);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index d1fdea0..a8e632c 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -388,20 +388,21 @@ struct r600_common_context {
struct si_screen*screen;
struct radeon_winsys*ws;
struct radeon_winsys_ctx*ctx;
enum radeon_family  family;
enum chip_class chip_class;
struct r600_ringgfx;
struct r600_ringdma;
struct pipe_fence_handle*last_gfx_fence;
struct pipe_fence_handle*last_sdma_fence;
struct r600_resource*eop_bug_scratch;
+   struct u_upload_mgr *cached_gtt_allocator;
unsignednum_gfx_cs_flushes;
unsignedinitial_gfx_cs_size;
unsignedgpu_reset_counter;
unsignedlast_dirty_tex_counter;
unsignedlast_compressed_colortex_counter;
unsignedlast_num_draw_calls;
 
struct threaded_context *tc;
struct u_suballocator   *allocator_zeroed_memory;
struct slab_child_pool  pool_transfers;
diff --git a/src/gallium/drivers/radeonsi/si_fence.c 
b/src/gallium/drivers/radeonsi/si_fence.c
index 0d165a1..3c4d754 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -144,21 +144,21 @@ static bool si_fine_fence_signaled(struct radeon_winsys 
*rws,
 
 static void si_fine_fence_set(struct si_context *ctx,
  struct si_fine_fence *fine,
  unsigned flags)
 {
uint32_t *fence_ptr;
 
assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | 
PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1);
 
/* Use uncached system memory for the fence. */
-   u_upload_alloc(ctx->b.b.stream_uploader, 0, 4, 4,
+   u_upload_alloc(ctx->b.cached_gtt_allocator, 0, 4, 4,
   &fine->offset, (struct pipe_resource **)&fine->buf, 
(void **)&fence_ptr);

[Mesa-dev] [PATCH 4/6] radeonsi/gfx9: make shader binaries use read-only memory

2017-12-05 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_buffer_common.c | 3 +++
 src/gallium/drivers/radeon/r600_pipe_common.h   | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c  | 2 ++
 src/gallium/drivers/radeonsi/si_pipe.h  | 1 +
 src/gallium/drivers/radeonsi/si_shader.c| 9 ++---
 5 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index ec282d5..55400ab 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -167,20 +167,23 @@ void si_init_resource_fields(struct si_screen *sscreen,
 
/* Displayable and shareable surfaces are not suballocated. */
if (res->b.b.bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
res->flags |= RADEON_FLAG_NO_SUBALLOC; /* shareable */
else
res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
if (sscreen->debug_flags & DBG(NO_WC))
res->flags &= ~RADEON_FLAG_GTT_WC;
 
+   if (res->b.b.flags & R600_RESOURCE_FLAG_READ_ONLY)
+   res->flags |= RADEON_FLAG_READ_ONLY;
+
/* Set expected VRAM and GART usage for the buffer. */
res->vram_usage = 0;
res->gart_usage = 0;
res->max_forced_staging_uploads = 0;
res->b.max_forced_staging_uploads = 0;
 
if (res->domains & RADEON_DOMAIN_VRAM) {
res->vram_usage = size;
 
res->max_forced_staging_uploads =
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 498a741..d1fdea0 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -46,20 +46,21 @@
 
 struct u_log_context;
 struct si_screen;
 struct si_context;
 
 #define R600_RESOURCE_FLAG_TRANSFER(PIPE_RESOURCE_FLAG_DRV_PRIV << 
0)
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
1)
 #define R600_RESOURCE_FLAG_FORCE_TILING
(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 
3)
 #define R600_RESOURCE_FLAG_UNMAPPABLE  (PIPE_RESOURCE_FLAG_DRV_PRIV << 
4)
+#define R600_RESOURCE_FLAG_READ_ONLY   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
5)
 
 /* Debug flags. */
 enum {
/* Shader logging options: */
DBG_VS = PIPE_SHADER_VERTEX,
DBG_PS = PIPE_SHADER_FRAGMENT,
DBG_GS = PIPE_SHADER_GEOMETRY,
DBG_TCS = PIPE_SHADER_TESS_CTRL,
DBG_TES = PIPE_SHADER_TESS_EVAL,
DBG_CS = PIPE_SHADER_COMPUTE,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 5d7837d..676d199 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -822,20 +822,22 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
(sscreen->info.family == CHIP_STONEY ||
 sscreen->info.family == CHIP_RAVEN);
}
 
sscreen->dcc_msaa_allowed =
!(sscreen->debug_flags & DBG(NO_DCC_MSAA)) &&
(sscreen->debug_flags & DBG(DCC_MSAA) ||
 sscreen->info.chip_class == VI);
 
+   sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;
+
(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
sscreen->use_monolithic_shaders =
(sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
 
sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
SI_CONTEXT_INV_VMEM_L1;
if (sscreen->info.chip_class <= VI) {
sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
sscreen->barrier_flags.L2_to_cp |= 
SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 7a09937..3a959f9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -116,20 +116,21 @@ struct si_screen {
booldpbb_allowed;
booldfsm_allowed;
boolllvm_has_working_vgpr_indexing;
 
/* Whether shaders are monolithic (1-part) or separate (3-part). */
booluse_monolithic_shaders;
boolrecord_llvm_ir;
boolhas_rbplus; /* if RB+ registers 
exist */
boolrbplus_allowed; /* if RB+ is allowed */
booldcc_msaa_allowed;
+   boolcpdma_prefetch_writes_memory;
 
struct slab_parent_pool   

[Mesa-dev] [PATCH 1/6] radeonsi: allow DMABUF exports for local buffers

2017-12-05 Thread Marek Olšák
From: Marek Olšák 

Cc: 17.3 
---
 src/gallium/drivers/radeon/r600_texture.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 2aa47b5..07f7c33 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -739,22 +739,26 @@ static boolean r600_texture_get_handle(struct 
pipe_screen* screen,
stride = rtex->surface.u.gfx9.surf_pitch *
 rtex->surface.bpe;
slice_size = rtex->surface.u.gfx9.surf_slice_size;
} else {
offset = rtex->surface.u.legacy.level[0].offset;
stride = rtex->surface.u.legacy.level[0].nblk_x *
 rtex->surface.bpe;
slice_size = 
(uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
}
} else {
+   /* Buffer exports are for the OpenCL interop. */
/* Move a suballocated buffer into a non-suballocated 
allocation. */
-   if (sscreen->ws->buffer_is_suballocated(res->buf)) {
+   if (sscreen->ws->buffer_is_suballocated(res->buf) ||
+   /* A DMABUF export always fails if the BO is local. */
+   (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING 
&&
+whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
assert(!res->b.is_shared);
 
/* Allocate a new buffer with PIPE_BIND_SHARED. */
struct pipe_resource templ = res->b.b;
templ.bind |= PIPE_BIND_SHARED;
 
struct pipe_resource *newb =
screen->resource_create(screen, &templ);
if (!newb)
return false;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] radeonsi: make const and stream uploaders allocate read-only memory

2017-12-05 Thread Marek Olšák
From: Marek Olšák 

and anything that clones these uploaders, like u_threaded_context.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 9090e65..9e45a9f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -438,26 +438,29 @@ bool si_common_context_init(struct r600_common_context 
*rctx,
return false;
}
 
rctx->allocator_zeroed_memory =
u_suballocator_create(&rctx->b, sscreen->info.gart_page_size,
  0, PIPE_USAGE_DEFAULT, 0, true);
if (!rctx->allocator_zeroed_memory)
return false;
 
rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024,
- 0, PIPE_USAGE_STREAM, 0);
+ 0, PIPE_USAGE_STREAM,
+ R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.stream_uploader)
return false;
 
rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024,
-0, PIPE_USAGE_DEFAULT, 0);
+0, PIPE_USAGE_DEFAULT,
+
sscreen->cpdma_prefetch_writes_memory ?
+   0 : 
R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.const_uploader)
return false;
 
rctx->cached_gtt_allocator = u_upload_create(&rctx->b, 16 * 1024,
 0, PIPE_USAGE_STAGING, 0);
if (!rctx->cached_gtt_allocator)
return false;
 
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] mesa: add const qualifier on _mesa_base_fbo_format()

2017-12-05 Thread Brian Paul
---
 src/mesa/main/fbobject.c | 2 +-
 src/mesa/main/fbobject.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 30287ab..d23916d 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1804,7 +1804,7 @@ _mesa_CreateRenderbuffers(GLsizei n, GLuint 
*renderbuffers)
  * \return the base internal format, or 0 if internalFormat is illegal
  */
 GLenum
-_mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat)
+_mesa_base_fbo_format(const struct gl_context *ctx, GLenum internalFormat)
 {
/*
 * Notes: some formats such as alpha, luminance, etc. were added
diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h
index e4846e8..f52fd15 100644
--- a/src/mesa/main/fbobject.h
+++ b/src/mesa/main/fbobject.h
@@ -112,7 +112,7 @@ extern GLboolean
 _mesa_is_legal_color_format(const struct gl_context *ctx, GLenum baseFormat);
 
 extern GLenum
-_mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat);
+_mesa_base_fbo_format(const struct gl_context *ctx, GLenum internalFormat);
 
 extern bool
 _mesa_detach_renderbuffer(struct gl_context *ctx,
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] mesa: add const qualifier on _mesa_is_renderable_texture_format()

2017-12-05 Thread Brian Paul
---
 src/mesa/main/teximage.c | 3 ++-
 src/mesa/main/teximage.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 572e380..e5f8bb0 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -5712,7 +5712,8 @@ _mesa_TextureBufferRange(GLuint texture, GLenum 
internalFormat, GLuint buffer,
 }
 
 GLboolean
-_mesa_is_renderable_texture_format(struct gl_context *ctx, GLenum 
internalformat)
+_mesa_is_renderable_texture_format(const struct gl_context *ctx,
+   GLenum internalformat)
 {
/* Everything that is allowed for renderbuffers,
 * except for a base format of GL_STENCIL_INDEX, unless supported.
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index 18452db..2e950bf 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -216,7 +216,8 @@ bool
 _mesa_format_no_online_compression(const struct gl_context *ctx, GLenum 
format);
 
 GLboolean
-_mesa_is_renderable_texture_format(struct gl_context *ctx, GLenum 
internalformat);
+_mesa_is_renderable_texture_format(const struct gl_context *ctx,
+   GLenum internalformat);
 
 extern void
 _mesa_texture_sub_image(struct gl_context *ctx, GLuint dims,
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radv: enable lowering of nir_op_bitfield_insert

2017-12-05 Thread Connor Abbott
lower_bitfield_insert lowers nir_op_bitfield_insert to DX10-style
nir_op_bfi and nir_op_bfm, both of which aren't handled by
ac_nir_to_llvm, so unless I'm missing something this will just break
them even harder. We probably should use this lowering after adding
support for bfi and bfm, since AMD does have native instructions for
bfi and bfm, but first I'd like to see the actual bug fixed. Have you
tried running it with NIR_PRINT=true to pin down which optimization
pass is going wrong?

On Tue, Dec 5, 2017 at 12:50 PM, Samuel Pitoiset
 wrote:
> Otherwise it's replaced by
> "vec1 32 ssa_108 = load_const (0x /* 0.00 */)", which
> looks clearly wrong.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104119
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_shader.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 4a3fdfa80e..0b19d23fa2 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -66,6 +66,7 @@ static const struct nir_shader_compiler_options nir_options 
> = {
> .lower_extract_byte = true,
> .lower_extract_word = true,
> .lower_ffma = true,
> +   .lower_bitfield_insert = true,
> .max_unroll_iterations = 32
>  };
>
> --
> 2.15.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] radv: enable lowering of nir_op_bitfield_extract

2017-12-05 Thread Connor Abbott
Same comment as the previous patch.

On Tue, Dec 5, 2017 at 12:50 PM, Samuel Pitoiset
 wrote:
> This instruction should also be lowered correctly.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_shader.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 0b19d23fa2..044bcd0641 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -67,6 +67,7 @@ static const struct nir_shader_compiler_options nir_options 
> = {
> .lower_extract_word = true,
> .lower_ffma = true,
> .lower_bitfield_insert = true,
> +   .lower_bitfield_extract = true,
> .max_unroll_iterations = 32
>  };
>
> --
> 2.15.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] glx: Implement GLX_EXT_no_config_context (v3)

2017-12-05 Thread Adam Jackson
On Thu, 2017-11-30 at 16:10 +, Emil Velikov wrote:
> On 14 November 2017 at 20:13, Adam Jackson  wrote:
> 
> > @@ -562,6 +562,10 @@ dri2_convert_glx_attribs(unsigned num_attribs,
> > const uint32_t *attribs,
> >  return false;
> >   }
> >   break;
> > +  case GLX_SCREEN:
> > + /* Implies GLX_EXT_no_config_context */
> > + *render_type = GLX_DONT_CARE;
> > + break;
> 
> We should fall-through (and fail) when GLX_SCREEN is set but the
> extension is missing.

Nah. This function does not make reference to the screen we're trying
to create a context for. And, for direct contexts (that call this
conversion helper) this extension is now always present anyway.

- ajax
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/android: Partially handle HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED

2017-12-05 Thread Rob Herring
On Tue, Dec 5, 2017 at 11:01 AM, Robert Foss  wrote:
> On Tue, 2017-12-05 at 18:22 +0900, Tomasz Figa wrote:
>> On Sat, Dec 2, 2017 at 4:43 AM, Rob Herring  wrote:
>> > On Fri, Dec 1, 2017 at 8:44 AM, Tomasz Figa 
>> > wrote:
>> > > On Fri, Dec 1, 2017 at 11:20 PM, Rob Herring 
>> > > wrote:
>> > > > On Fri, Dec 1, 2017 at 7:30 AM, Robert Foss > > > > ora.com> wrote:
>> > > > > On Thu, 2017-11-30 at 11:14 -0600, Rob Herring wrote:
>> > > > > > On Thu, Nov 30, 2017 at 12:11 AM, Tapani Pälli > > > > > > i...@intel.co
>> > > > > > m> wrote:
>> > > > > > >
>> > > > > > >
>> > > > > > > On 11/30/2017 06:13 AM, Tomasz Figa wrote:
>> > > > > > > >
>> > > > > > > > On Thu, Nov 30, 2017 at 3:43 AM, Robert Foss > > > > > > > > ss@collabo
>> > > > > > > > ra.com>
>> > > > > > > > wrote:
>> >
>> > [...]


>> > > However I fully agree that there are other upstream components
>> > > (e.g.
>> > > drm_hwcomposer), which would benefit from it and nobody wants to
>> > > include Mesa in the build just for one header. Should we have a
>> > > separate freedesktop project for it?
>> >
>> > I'm still going to say libdrm. If that's really a problem, then we
>> > can
>> > split it out later.
>>
>> At least for Chromium OS, libdrm would work fine indeed.
>
> Excellent, so with the question of where (libdrm) covered, that leaves
> us with what.
>
> Currently this is what what the XXX_handle structs look like:
> gbm_gralloc: https://github.com/robherring/gbm_gralloc/blob/master/gral
> loc_drm_handle.h#L36
>
> minigbm: https://chromium.googlesource.com/chromiumos/platform/minigbm/
> +/master/cros_gralloc/cros_gralloc_handle.h#20
>
> intel-minigbm: https://github.com/intel/minigbm/blob/master/cros_grallo
> c/cros_gralloc_handle.h#L20
>
> A lot seems to be shared between the 3, gbm_gralloc seems to have
> modifier support, but lack multi-planar YUV support.
>
> DRV_MAX_PLANES sounds to be a bit of a misnomer, as (I think) it refers
> to YUV planes and not planes supported by the driver/hw.
>
> I would assume that all shared variables would be available in the
> libdrm-struct, as well the ones planar YUV support.
>
> As for the non-obvious ones, maybe the should be listed so that we can
> dig into if they are really needed, implementation specific or unused.

My plan is similar to what I was thinking in the move to hwc. Move the
struct as is to libdrm and get the dependencies (gbm_gralloc,
drm_gralloc?, drm_hwc, mesa) switched over to use it. Then start
modifying the struct contents. Here's my list:

- versioning of the handle
- remove .name (GEM handles. only dmabuf support)
- better way to do buffer registration tracking (than a pid and ptr)?
- multi-planar
- switch format to fourcc (or add the fourcc format)
- switch to accessor functions or library calls

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Enable disk shader cache by default

2017-12-05 Thread Jordan Justen
On 2017-12-05 09:13:11, Mark Janes wrote:
> Jordan Justen  writes:
> 
> > On 2017-11-08 17:26:47, Timothy Arceri wrote:
> >> Reviewed-by: Timothy Arceri 
> >> 
> >> Mark may want to consider adding some of the once a day type CI runs for 
> >> this. For example running the test suite for two consecutive runs on the 
> >> same build so that the second run uses the shader cache and also a 
> >> second run the uses MESA_GLSL=cache_fb to force testing of the cache 
> >> fallback path.
> >
> > Yeah. We discussed this previously, but I don't think it's been
> > implemented yet. My opinion is that it could perhaps be a weekly test.
> 
> This automation is implemented now. It found the issues reported in
> https://bugs.freedesktop.org/show_bug.cgi?id=103988
> 
> My opinion is that this test strategy is inadequate.

Meaning you think we cannot enable i965 shader cache for the 18.0
release?

We are already ~1.5 months away from the next stable branch point. If
we want to enable this in 18.0, we should be using this time to see if
enabling the cache by default has some large unexpected side effect in
our graphics stack, or breaks real-world apps.

> Adding a dimension
> to the test matrix has high cost, especially when combined with other
> dimensions of the test matrix (does shader cache need to be tested for
> 32/64 bit builds? For hswgt1/hswgt2/hswgt3e?).

Are you saying this is too high cost to run per check-in? Do you need
to disable it for the health of CI? I think I proposed that daily, or
perhaps even weekly would be adequate.

These tests are already identifying some corner case issues. I'm not
sure these would have impacted real-world apps yet, but I do think it
is a good idea to run these tests regularly.

You say this test strategy is inadequate. Perhaps. I do think it needs
to be part of our test strategy. There is no way we are going to hit
all the corners of the API better than running all of our tests with
the cache enabled. Do you agree?

> Since 103988 is not hardware specific, and is not a regression, it's not
> something that could only have been caught by CI.  I'm surprised to find
> it at this point, considering piglit was used to validate the feature.
> 
> I'd be happy if there was at least a smoke test where complex programs
> are cached, with the intent of exercising the shader cache mechanism
> directly.  It doesn't have to be exhaustive to be effective.  Seems like
> a good idea to at least directly test the issue that was fixed in
> 103988 tests.

It could be interesting to define a MESA extension to control or query
the shader cache. Today, the shader cache is a nearly 'invisible'
feature. There are a few env-vars, but I wouldn't call a public
interface.

The downside of defining an extension is that it might constrain
changes to the shader cache implementation. Or, if the interface is
too complex, then it may be tough for some drivers to implement.

But, what if we get around to defining and implementing this extenion,
(and a few piglit tests that makes use of it), by mid-January? It'll
still be pretty difficult to argue we should enable the shader cache
on i965 for 18.0, since we burned all the time to let it be tested on
master against real-world apps.

Also, while you say our current test strategy is inadequate, I do
think it is still far more comprehensive than anything we will
accomplish by defining a new extension and writing a few, or even a
few hundred tests against the new extension.

-Jordan
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radv: fix a case statement in GetMemoryFdPropertiesKHR

2017-12-05 Thread Fredrik Höglund
The handle type in the case statement is supposed to be VK_EXTERNAL_-
MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT.

Signed-off-by: Fredrik Höglund 
---
 src/amd/vulkan/radv_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 1b7cd355938..2538472bea6 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -3557,7 +3557,7 @@ VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
   VkMemoryFdPropertiesKHR 
*pMemoryFdProperties)
 {
switch (handleType) {
-   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
   pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
   return VK_SUCCESS;
 
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] anv: fix a case statement in GetMemoryFdPropertiesKHR

2017-12-05 Thread Fredrik Höglund
The handle type in the case statement is supposed to be VK_EXTERNAL_-
MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT.

Signed-off-by: Fredrik Höglund 
---
 src/intel/vulkan/anv_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 97124154b69..af804612654 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1714,7 +1714,7 @@ VkResult anv_GetMemoryFdPropertiesKHR(
struct anv_physical_device *pdevice = &device->instance->physicalDevice;
 
switch (handleType) {
-   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
   /* dma-buf can be imported as any memory type */
   pMemoryFdProperties->memoryTypeBits =
  (1 << pdevice->memory.type_count) - 1;
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] anv: fix a case statement in GetMemoryFdPropertiesKHR

2017-12-05 Thread Jason Ekstrand
Oops.  Both are

Reviewed-by: Jason Ekstrand 

On Tue, Dec 5, 2017 at 12:51 PM, Fredrik Höglund  wrote:

> The handle type in the case statement is supposed to be VK_EXTERNAL_-
> MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT.
>
> Signed-off-by: Fredrik Höglund 
> ---
>  src/intel/vulkan/anv_device.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 97124154b69..af804612654 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1714,7 +1714,7 @@ VkResult anv_GetMemoryFdPropertiesKHR(
> struct anv_physical_device *pdevice = &device->instance->
> physicalDevice;
>
> switch (handleType) {
> -   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
> +   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
>/* dma-buf can be imported as any memory type */
>pMemoryFdProperties->memoryTypeBits =
>   (1 << pdevice->memory.type_count) - 1;
> --
> 2.15.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] glx: Move vertex array protocol state into the indirect backend (v2)

2017-12-05 Thread Adam Jackson
Only relevant for indirect contexts, so let's get that code out of the
common path.

v2: Add the necessary context setup before calling GetString

Signed-off-by: Adam Jackson 
---
 src/glx/glxcurrent.c   | 12 
 src/glx/indirect_glx.c | 26 ++
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/src/glx/glxcurrent.c b/src/glx/glxcurrent.c
index fd04929b89..9f8bf7cee1 100644
--- a/src/glx/glxcurrent.c
+++ b/src/glx/glxcurrent.c
@@ -238,18 +238,6 @@ MakeContextCurrent(Display * dpy, GLXDrawable draw,
 
__glXUnlock();
 
-   /* The indirect vertex array state must to be initialised after we
-* have setup the context, as it needs to query server attributes.
-*/
-   if (gc && !gc->isDirect) {
-  __GLXattribute *state = gc->client_state_private;
-  if (state && state->array_state == NULL) {
- glGetString(GL_EXTENSIONS);
- glGetString(GL_VERSION);
- __glXInitVertexArrayState(gc);
-  }
-   }
-
return GL_TRUE;
 }
 
diff --git a/src/glx/indirect_glx.c b/src/glx/indirect_glx.c
index cfae12f6c0..5482d768ff 100644
--- a/src/glx/indirect_glx.c
+++ b/src/glx/indirect_glx.c
@@ -34,7 +34,7 @@
 
 #include "glapi.h"
 #include "glxclient.h"
-
+#include "indirect.h"
 #include "util/debug.h"
 
 #ifndef GLX_USE_APPLEGL
@@ -148,9 +148,27 @@ indirect_bind_context(struct glx_context *gc, struct 
glx_context *old,
sent = SendMakeCurrentRequest(dpy, gc->xid, tag, draw, read,
 &gc->currentContextTag);
 
-   if (!IndirectAPI)
-  IndirectAPI = __glXNewIndirectAPI();
-   _glapi_set_dispatch(IndirectAPI);
+   if (sent) {
+  if (!IndirectAPI)
+ IndirectAPI = __glXNewIndirectAPI();
+  _glapi_set_dispatch(IndirectAPI);
+
+  /* The indirect vertex array state must to be initialised after we
+   * have setup the context, as it needs to query server attributes.
+   *
+   * At the point this is called gc->currentDpy is not initialized
+   * nor is the thread's current context actually set. Hence the
+   * cleverness before the GetString calls.
+   */
+  __GLXattribute *state = gc->client_state_private;
+  if (state && state->array_state == NULL) {
+ gc->currentDpy = gc->psc->dpy;
+ __glXSetCurrentContext(gc);
+ __indirect_glGetString(GL_EXTENSIONS);
+ __indirect_glGetString(GL_VERSION);
+ __glXInitVertexArrayState(gc);
+  }
+   }
 
return !sent;
 }
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] glx: Lift sending the MakeCurrent request to top-level code (v2)

2017-12-05 Thread Adam Jackson
Somewhat terrifyingly, we never sent this for direct contexts, which
means the server never knew the context/drawable bindings.

To handle this sanely, pull the request code up out of the indirect
backend, and rewrite the context switch path to call it as appropriate.
This attempts to preserve the existing behavior of not calling unbind()
on the context if its refcount would not drop to zero, which is why the
diff is a little uglier than I'd like.

Fixes glx-make-context, glx-multi-context-single-window and
glx-query-drawable-glx_fbconfig_id-window with both direct and indirect
contexts. glx-make-glxdrawable-current regresses, but I believe the test
is in error, as the fbconfig it uses to create the context is just
"something double-buffered and rgba" as opposed to "corresponds to the
visual I used to create the windows".

v2: Style fixups (Ian Romanick)

Signed-off-by: Adam Jackson 
---
 src/glx/glxcurrent.c   | 182 +
 src/glx/indirect_glx.c | 140 +++--
 2 files changed, 162 insertions(+), 160 deletions(-)

diff --git a/src/glx/glxcurrent.c b/src/glx/glxcurrent.c
index 9f8bf7cee1..0ce80670d2 100644
--- a/src/glx/glxcurrent.c
+++ b/src/glx/glxcurrent.c
@@ -165,17 +165,85 @@ glXGetCurrentDrawable(void)
return gc->currentDrawable;
 }
 
-/**
- * Make a particular context current.
- *
- * \note This is in this file so that it can access dummyContext.
- */
+static Bool
+SendMakeCurrentRequest(Display * dpy, GLXContextID gc_id,
+   GLXContextTag gc_tag, GLXDrawable draw,
+   GLXDrawable read, GLXContextTag *out_tag)
+{
+   xGLXMakeCurrentReply reply;
+   Bool ret;
+   int opcode = __glXSetupForCommand(dpy);
+
+   LockDisplay(dpy);
+
+   if (draw == read) {
+  xGLXMakeCurrentReq *req;
+
+  GetReq(GLXMakeCurrent, req);
+  req->reqType = opcode;
+  req->glxCode = X_GLXMakeCurrent;
+  req->drawable = draw;
+  req->context = gc_id;
+  req->oldContextTag = gc_tag;
+   }
+   else {
+  struct glx_display *priv = __glXInitialize(dpy);
+
+  if ((priv->majorVersion > 1) || (priv->minorVersion >= 3)) {
+ xGLXMakeContextCurrentReq *req;
+
+ GetReq(GLXMakeContextCurrent, req);
+ req->reqType = opcode;
+ req->glxCode = X_GLXMakeContextCurrent;
+ req->drawable = draw;
+ req->readdrawable = read;
+ req->context = gc_id;
+ req->oldContextTag = gc_tag;
+  }
+  else {
+ xGLXVendorPrivateWithReplyReq *vpreq;
+ xGLXMakeCurrentReadSGIReq *req;
+
+ GetReqExtra(GLXVendorPrivateWithReply,
+ sz_xGLXMakeCurrentReadSGIReq -
+ sz_xGLXVendorPrivateWithReplyReq, vpreq);
+ req = (xGLXMakeCurrentReadSGIReq *) vpreq;
+ req->reqType = opcode;
+ req->glxCode = X_GLXVendorPrivateWithReply;
+ req->vendorCode = X_GLXvop_MakeCurrentReadSGI;
+ req->drawable = draw;
+ req->readable = read;
+ req->context = gc_id;
+ req->oldContextTag = gc_tag;
+  }
+   }
+
+   ret = _XReply(dpy, (xReply *) &reply, 0, False);
+
+   if (out_tag)
+  *out_tag = reply.contextTag;
+
+   UnlockDisplay(dpy);
+   SyncHandle();
+
+   return ret;
+}
+
+static void
+SetGC(struct glx_context *gc, Display *dpy, GLXDrawable draw, GLXDrawable read)
+{
+   gc->currentDpy = dpy;
+   gc->currentDrawable = draw;
+   gc->currentReadable = read;
+}
+
 static Bool
 MakeContextCurrent(Display * dpy, GLXDrawable draw,
GLXDrawable read, GLXContext gc_user)
 {
struct glx_context *gc = (struct glx_context *) gc_user;
struct glx_context *oldGC = __glXGetCurrentContext();
+   Bool ret = GL_FALSE;
 
/* Make sure that the new context has a nonzero ID.  In the request,
 * a zero context ID is used only to mean that we bind to no current
@@ -186,59 +254,87 @@ MakeContextCurrent(Display * dpy, GLXDrawable draw,
}
 
_glapi_check_multithread();
-
__glXLock();
+
if (oldGC == gc &&
-   gc->currentDrawable == draw && gc->currentReadable == read) {
-  __glXUnlock();
-  return True;
+   gc->currentDrawable == draw &&
+   gc->currentReadable == read) {
+  /* Same context and drawables: no op, just return */
+  ret = GL_TRUE;
}
 
-   if (oldGC != &dummyContext) {
-  if (--oldGC->thread_refcount == 0) {
-oldGC->vtable->unbind(oldGC, gc);
-oldGC->currentDpy = 0;
+   else if (oldGC == gc) {
+  /* Same context and new drawbles: update drawable bindings */
+  if (!SendMakeCurrentRequest(dpy, gc->xid, gc->currentContextTag,
+  draw, read, &gc->currentContextTag)) {
+ goto out;
   }
-   }
 
-   if (gc) {
-  /* Attempt to bind the context.  We do this before mucking with
-   * gc and __glXSetCurrentContext to properly handle our state in
-   * case of an error.
-   *
-   * If an error occ

[Mesa-dev] [PATCH 3/3] glx: Implement GLX_EXT_no_config_context (v4)

2017-12-05 Thread Adam Jackson
This more or less ports EGL_KHR_no_config_context to GLX.

v2: Enable the extension only for those backends that support it.
v3: Fix glvnd path and dri2_convert_glx_attribs()
v4: Screeching signedness correctness, and disable a now
inappropriate test.

Khronos: https://github.com/KhronosGroup/OpenGL-Registry/pull/102
Reviewed-by: Kenneth Graunke 
Signed-off-by: Adam Jackson 
Reviewed-by: Ian Romanick 
---
 src/glx/applegl_glx.c |  3 +++
 src/glx/create_context.c  | 37 +++
 src/glx/dri2_glx.c|  1 +
 src/glx/dri3_glx.c|  1 +
 src/glx/dri_common.c  |  4 
 src/glx/drisw_glx.c   |  1 +
 src/glx/driwindows_glx.c  |  2 +-
 src/glx/g_glxglvnddispatchfuncs.c | 14 +++-
 src/glx/glxcmds.c |  2 +-
 src/glx/glxextensions.c   |  1 +
 src/glx/glxextensions.h   |  1 +
 src/glx/tests/create_context_unittest.cpp |  9 
 12 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/src/glx/applegl_glx.c b/src/glx/applegl_glx.c
index c086e5146a..85f70058c6 100644
--- a/src/glx/applegl_glx.c
+++ b/src/glx/applegl_glx.c
@@ -134,6 +134,9 @@ applegl_create_context(struct glx_screen *psc,
/* TODO: Integrate this with apple_glx_create_context and make
 * struct apple_glx_context inherit from struct glx_context. */
 
+   if (!config)
+  return NULL;
+
gc = calloc(1, sizeof(*gc));
if (gc == NULL)
   return NULL;
diff --git a/src/glx/create_context.c b/src/glx/create_context.c
index 38e949ab4c..bc39ffef1d 100644
--- a/src/glx/create_context.c
+++ b/src/glx/create_context.c
@@ -47,21 +47,11 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config,
xcb_generic_error_t *err;
xcb_void_cookie_t cookie;
unsigned dummy_err = 0;
+   int screen = -1;
 
-
-   if (dpy == NULL || cfg == NULL)
-  return NULL;
-
-   /* This means that either the caller passed the wrong display pointer or
-* one of the internal GLX data structures (probably the fbconfig) has an
-* error.  There is nothing sensible to do, so return an error.
-*/
-   psc = GetGLXScreenConfigs(dpy, cfg->screen);
-   if (psc == NULL)
+   if (dpy == NULL)
   return NULL;
 
-   assert(cfg->screen == psc->scr);
-
/* Count the number of attributes specified by the application.  All
 * attributes appear in pairs, except the terminating None.
 */
@@ -70,6 +60,25 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config,
 /* empty */ ;
}
 
+   if (cfg) {
+  screen = cfg->screen;
+   } else {
+  for (unsigned int i = 0; i < num_attribs; i++) {
+ if (attrib_list[i * 2] == GLX_SCREEN)
+screen = attrib_list[i * 2 + 1];
+  }
+   }
+
+   /* This means that either the caller passed the wrong display pointer or
+* one of the internal GLX data structures (probably the fbconfig) has an
+* error.  There is nothing sensible to do, so return an error.
+*/
+   psc = GetGLXScreenConfigs(dpy, screen);
+   if (psc == NULL)
+  return NULL;
+
+   assert(screen == psc->scr);
+
if (direct && psc->vtable->create_context_attribs) {
   /* GLX drops the error returned by the driver.  The expectation is that
* an error will also be returned by the server.  The server's error
@@ -104,8 +113,8 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config,
cookie =
   xcb_glx_create_context_attribs_arb_checked(c,
 gc->xid,
-cfg->fbconfigID,
-cfg->screen,
+cfg ? cfg->fbconfigID : 0,
+screen,
 gc->share_xid,
 gc->isDirect,
 num_attribs,
diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index 0f44635725..eeec4f0d60 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -1129,6 +1129,7 @@ dri2BindExtensions(struct dri2_screen *psc, struct 
glx_display * priv,
 
   __glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
   __glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
+  __glXEnableDirectExtension(&psc->base, "GLX_EXT_no_config_context");
 
   if ((mask & ((1 << __DRI_API_GLES) |
(1 << __DRI_API_GLES2) |
diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index f280a8cef7..f0560edeb5 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -720,6 +720,7 @@ dri3_bind_extensions(struct dri3_screen *psc, struct 
glx_display * priv,
 
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_

[Mesa-dev] [PATCH 0/3] GLX_EXT_no_config_context v4

2017-12-05 Thread Adam Jackson
With one exception (noted in 2/3) this is now regression-free in piglit,
and passes make check.

- ajax

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 23/44] i965/fs: Enables 16-bit load_ubo with sampler

2017-12-05 Thread Chema Casanova
On 05/12/17 19:53, Jason Ekstrand wrote:
> On Tue, Dec 5, 2017 at 9:08 AM, Chema Casanova  > wrote:
> 
> El 30/11/17 a las 23:58, Jason Ekstrand escribió:
> > On Wed, Nov 29, 2017 at 6:50 PM, Jose Maria Casanova Crespo
> > mailto:jmcasan...@igalia.com>
> >> wrote:
> >
> >     load_ubo is using 32-bit loads as uniforms surfaces have a 32-bit
> >     surface format defined. So when reading 16-bit components with the
> >     sampler we need to unshuffle two 16-bit components from each
> 32-bit
> >     component.
> >
> >     Using the sampler avoids the use of the byte_scattered_read
> message
> >     that needs one message for each component and is supposed to be
> >     slower.
> >
> >     In the case of SKL+ we take advantage of a hardware feature that
> >     automatically defines a channel mask based on the rlen value,
> so on
> >     SKL+ we only use half of the registers without using a header
> in the
> >     payload.
> >     ---
> >      src/intel/compiler/brw_fs.cpp           | 31
> >     +++
> >      src/intel/compiler/brw_fs_generator.cpp | 10 --
> >      2 files changed, 35 insertions(+), 6 deletions(-)
> >
> >     diff --git a/src/intel/compiler/brw_fs.cpp
> >     b/src/intel/compiler/brw_fs.cpp
> >     index 1ca4d416b2..9c543496ba 100644
> >     --- a/src/intel/compiler/brw_fs.cpp
> >     +++ b/src/intel/compiler/brw_fs.cpp
> >     @@ -184,9 +184,17 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const
> >     fs_builder &bld,
> >          * a double this means we are only loading 2 elements worth of
> >     data.
> >          * We also want to use a 32-bit data type for the dst of the
> >     load operation
> >          * so other parts of the driver don't get confused about the
> >     size of the
> >     -    * result.
> >     +    * result. On the case of 16-bit data we only need half of the
> >     32-bit
> >     +    * components on SKL+ as we take advance of using message
> >     return size to
> >     +    * define an xy channel mask.
> >          */
> >     -   fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> >     +   fs_reg vec4_result;
> >     +   if (type_sz(dst.type) == 2 && (devinfo->gen >= 9)) {
> >     +      vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
> >     +      vec4_result = retype(vec4_result, BRW_REGISTER_TYPE_HF);
> >     +   } else {
> >     +      vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> >     +   }
> >
> >         fs_inst *inst =
> >     bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
> >                                  vec4_result, surf_index,
> vec4_offset);
> >         inst->size_written = 4 *
> >     vec4_result.component_size(inst->exec_size);
> >     @@ -197,8 +205,23 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const
> >     fs_builder &bld,
> >         }
> >
> >         vec4_result.type = dst.type;
> >     -   bld.MOV(dst, offset(vec4_result, bld,
> >     -                       (const_offset & 0xf) /
> >     type_sz(vec4_result.type)));
> >     +
> >     +   if (type_sz(dst.type) == 2) {
> >     +      /* 16-bit types need to be unshuffled as each pair of
> >     16-bit components
> >     +       * is packed on a 32-bit compoment because we are using a
> >     32-bit format
> >     +       * in the surface of uniform that is read by the sampler.
> >     +       * TODO: On BDW+ mark when an uniform has 16-bit type so we
> >     could setup a
> >     +       * surface format of 16-bit and use the 16-bit return
> >     format at the
> >     +       * sampler.
> >     +       */
> >     +      vec4_result.stride = 2;
> >     +      bld.MOV(dst, byte_offset(offset(vec4_result, bld,
> >     +                                      (const_offset & 0x7) / 4),
> >     +                               (const_offset & 0x7) / 2 % 2 *
> 2));
> >     +   } else {
> >     +      bld.MOV(dst, offset(vec4_result, bld,
> >     +                          (const_offset & 0xf) /
> >     type_sz(vec4_result.type)));
> >     +   }
> >
> >
> > This seems overly complicated.  How about something like
> 
> > fs_reg dw = offset(vec4_result, bld, (const_offset & 0xf) / 4);
> > switch (type_sz(dst.type)) {
> > case 2:
> >    shuffle_32bit_load_result_to_16bit_data(bld, dst, dw, 1);
> >    bld.MOV(dst, subscript(dw, dst.type, (const_offset / 2) & 1));
> >    break;
> > case 4:
> >    bld.MOV(dst, dw);
> >    break;
> > case 8:
> >    shuffle_32bit_load_result_to_64bit_data(bld, dst, dw, 1);
> >    break;
> > default

[Mesa-dev] [PATCH v4 28/44] i965/fs: Use untyped_surface_read for 16-bit load_ssbo (v2)

2017-12-05 Thread Jose Maria Casanova Crespo
SSBO loads were using byte_scattered read messages as they allow
reading 16-bit size components. byte_scattered messages can only
operate one component at a time so we needed to emit as many messages
as components.

But for vec2 and vec4 of 16-bit, being multiple of 32-bit we can use the
untyped_surface_read message to read pairs of 16-bit components using
only one message. Once each pair is read it is unshuffled to return the
proper 16-bit components. vec3 case is assimilated to vec4 but the 4th
component is ignored.

16-bit scalars are read using one byte_scattered_read message.

v2: Removed use of stride = 2 on sources (Jason Ekstrand)
Rework optimization using unshuffle 16 reads (Chema Casanova)
v3: Use W and D types insead of HF and F in shuffle to avoid rounding
erros (Jason Ekstrand)
Use untyped_surface_read for 16-bit vec3. (Jason Ekstrand)

CC: Jason Ekstrand 
---
 src/intel/compiler/brw_fs_nir.cpp | 29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index e11e75e6332..8deec082d59 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2303,16 +2303,31 @@ do_untyped_vector_read(const fs_builder &bld,
unsigned num_components)
 {
if (type_sz(dest.type) <= 2) {
-  fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
-  bld.MOV(read_offset, offset_reg);
-  for (unsigned i = 0; i < num_components; i++) {
- fs_reg read_reg =
-emit_byte_scattered_read(bld, surf_index, read_offset,
+  assert(dest.stride == 1);
+
+  if (num_components > 1) {
+ /* Pairs of 16-bit components can be read with untyped read, for 
16-bit
+  * vec3 4th component is ignored.
+  */
+ fs_reg read_result =
+emit_untyped_read(bld, surf_index, offset_reg,
+  1 /* dims */, DIV_ROUND_UP(num_components, 2),
+  BRW_PREDICATE_NONE);
+ shuffle_32bit_load_result_to_16bit_data(bld,
+   retype(dest, BRW_REGISTER_TYPE_W),
+   retype(read_result, BRW_REGISTER_TYPE_D),
+   num_components);
+  } else {
+ assert(num_components == 1);
+ /* scalar 16-bit are read using one byte_scattered_read message */
+ fs_reg read_result =
+emit_byte_scattered_read(bld, surf_index, offset_reg,
  1 /* dims */, 1,
  type_sz(dest.type) * 8 /* bit_size */,
  BRW_PREDICATE_NONE);
- bld.MOV(offset(dest, bld, i), subscript(read_reg, dest.type, 0));
- bld.ADD(read_offset, read_offset, brw_imm_ud(type_sz(dest.type)));
+ read_result.type = dest.type;
+ read_result.stride = 2;
+ bld.MOV(dest, read_result);
   }
} else if (type_sz(dest.type) == 4) {
   fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >