date:20150909

[Mesa-dev] [PATCH] nvc0: keep track of cb bindings per buffer, use for upload settings

2015-09-09 Thread Ilia Mirkin

CB updates to bound buffers need to go through the CB_DATA endpoints,
otherwise the shader may not notice that the updates happened.
Furthermore, these updates have to go in to the same address as the
bound buffer, otherwise, again, the shader may not notice updates.

So we keep track of all the places where a constbuf is bound, and
iterate over all of them when updating data. If a binding is found that
encompasses the region to be updated, then we use the settings of that
binding for the upload. Otherwise we upload as a regular data update.

This fixes piglit 'arb_uniform_buffer_object-rendering offset' as well
as blurriness in Witcher2.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91890
Signed-off-by: Ilia Mirkin 
Cc: "11.0" 
---
 src/gallium/drivers/nouveau/nouveau_buffer.c   |  4 +-
 src/gallium/drivers/nouveau/nouveau_buffer.h   |  2 +
 src/gallium/drivers/nouveau/nouveau_context.h  |  5 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h|  8 ++--
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c  |  2 +
 .../drivers/nouveau/nvc0/nvc0_state_validate.c |  3 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c   | 46 --
 7 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c 
b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 912b778..4937dae 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct 
nouveau_transfer *tx,
   nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
 tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
else
-   if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
-  nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
+   if (nv->push_cb && can_cb)
+  nv->push_cb(nv, buf,
   base, size / 4, (const uint32_t *)data);
else
   nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h 
b/src/gallium/drivers/nouveau/nouveau_buffer.h
index 7e6a6cc..d45bf7a 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -41,6 +41,8 @@ struct nv04_resource {
uint8_t status;
uint8_t domain;
 
+   uint16_t cb_bindings[6]; /* per-shader per-slot bindings */
+
struct nouveau_fence *fence;
struct nouveau_fence *fence_wr;
 
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h 
b/src/gallium/drivers/nouveau/nouveau_context.h
index 24deb7e..decb271 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -6,6 +6,8 @@
 
 #define NOUVEAU_MAX_SCRATCH_BUFS 4
 
+struct nv04_resource;
+
 struct nouveau_context {
struct pipe_context pipe;
struct nouveau_screen *screen;
@@ -23,8 +25,7 @@ struct nouveau_context {
  unsigned, const void *);
/* base, size refer to the whole constant buffer */
void (*push_cb)(struct nouveau_context *,
-   struct nouveau_bo *, unsigned domain,
-   unsigned base, unsigned size,
+   struct nv04_resource *,
unsigned offset, unsigned words, const uint32_t *);
 
/* @return: @ref reduced by nr of references found in context */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 6ed79cf..30bee3a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
   struct nouveau_bo *dst, unsigned offset, unsigned domain,
   unsigned size, const void *data);
 void
-nvc0_cb_push(struct nouveau_context *,
- struct nouveau_bo *bo, unsigned domain,
- unsigned base, unsigned size,
- unsigned offset, unsigned words, const uint32_t *data);
+nvc0_cb_bo_push(struct nouveau_context *,
+struct nouveau_bo *bo, unsigned domain,
+unsigned base, unsigned size,
+unsigned offset, unsigned words, const uint32_t *data);
 
 /* nvc0_vbo.c */
 void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index ee29912..c5bfd03 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint 
shader, uint index,
}
nvc0->constbuf_dirty[s] |= 1 << i;
 
+   if (nvc0->constbuf[s][i].u.buf)
+  nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
pipe_resource_reference(&nvc0->constbuf[s][i].

Re: [Mesa-dev] [PATCH 1/3] gallium/radeon: add separate stencil level dirty flags

2015-09-09 Thread Michel Dänzer

On 07.09.2015 07:17, Marek Olšák wrote:
> From: Marek Olšák 
> 
> We will only do depth-only or stencil-only decompress blits, whichever is
> needed by textures, instead of always doing both.
> ---
>  src/gallium/drivers/r600/evergreen_state.c| 4 ++--
>  src/gallium/drivers/r600/r600_state_common.c  | 3 +++
>  src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
>  src/gallium/drivers/radeonsi/cik_sdma.c   | 4 ++--
>  src/gallium/drivers/radeonsi/si_dma.c | 4 ++--
>  src/gallium/drivers/radeonsi/si_state_draw.c  | 3 +++
>  6 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 6f4cb55..8ecc498 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -3372,11 +3372,11 @@ static void evergreen_dma_copy(struct pipe_context 
> *ctx,
>   }
>  
>   if (src->format != dst->format || src_box->depth > 1 ||
> - rdst->dirty_level_mask != 0) {
> + (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << 
> dst_level)) {
>   goto fallback;
>   }
>  
> - if (rsrc->dirty_level_mask) {
> + if ((rsrc->dirty_level_mask | rsrc->stencil_dirty_level_mask) & (1 << 
> src_level)) {
>   ctx->flush_resource(ctx, src);
>   }

AFAICT ctx->flush_resource only decompresses colour resources, so is the
second change really necessary? Same in si_dma_copy().


With that fixed, or if it's correct as is,

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] radeonsi: only do depth-only or stencil-only in-place decompression

2015-09-09 Thread Michel Dänzer

On 07.09.2015 07:17, Marek Olšák wrote:
> From: Marek Olšák 
> 
> instead of always doing both.
> Usually, only depth is needed, so stencil decompression is useless.

[...]

> @@ -247,6 +256,7 @@ void si_flush_depth_textures(struct si_context *sctx,
>   assert(tex->is_depth && !tex->is_flushing_texture);
>  
>   si_blit_decompress_depth_in_place(sctx, tex,
> +   ((struct 
> si_sampler_view*)view)->is_stencil_sampler,

This cast is a bit ugly, please use a separate local variable of type
struct si_sampler_view* for this.


With that fixed,

Reviewed-by: Michel Dänzer 


Correspondingly for patch 3.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB

2015-09-09 Thread Chris Wilson

On Thu, Sep 03, 2015 at 11:08:11AM -0700, Ian Romanick wrote:
> On 09/03/2015 09:05 AM, Chris Wilson wrote:
> > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5
> > and specifies a generic unsized GL_RGB internal format, match that to a
> > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware.
> > 
> > Noticed while playing with mesa-demos/teximage:
> > 
> >   TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec
> >   TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec
> >   GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec
> > 
> > becomes
> > 
> >   TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec
> >   TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec
> >   GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec
> > 
> > on a puny Baytrail which is still far from what it is capable of. The
> > reason for the disparity is that the teximage demo uses a busy texture
> > which is performs an accelerated pixel conversion from the user's B5G6R5
> > into the native X8B8G8R8. After the patch, no conversion is required
> > allowing use of the blitter and memcpy fast paths.
> > 
> > Signed-off-by: Chris Wilson 
> > ---
> >  src/mesa/main/texformat.c | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
> > index fd9f335..866c7b3 100644
> > --- a/src/mesa/main/texformat.c
> > +++ b/src/mesa/main/texformat.c
> > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum 
> > target,
> > case GL_RGB:
> >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) {
> >   RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM);
> > +  } else if (type == GL_UNSIGNED_SHORT_5_6_5) {
> > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM);
> 
> Oh man... I hate this function.  It's such a big pile of hacks.  I think
> this change is good for now, but I think you're other idea is in the
> right direction.  This function really should check, "If internalFormat
> is a generic base format that matches , is there supported
> format that matches  exactly?"

An issue that cropped in testing was glGenerateMipmap(). As this
switches to using B5G6R5 internally we also use that format for storing
the intermediate layers when generating the mipmap (thus incurring
quantisation errors at each step). What I have in mind to remedy this is
to convert the texture to floating point, generate the mipmap, then
convert back to the internal format. (However, I expect this also to
change test results!) Does that seem reasonable?

"The internal formats of the derived mipmap images all match those of
the levelbase image. The contents of the derived images are computed by
repeated, filtered reduction of the levelbase+1 image. For one- and
two-dimensional array and cube map array textures, each layer is
filtered independently."

Does not seem to exclude the use of resampling.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB

2015-09-09 Thread Erik Faye-Lund

On Thu, Sep 3, 2015 at 6:05 PM, Chris Wilson  wrote:
> If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5
> and specifies a generic unsized GL_RGB internal format, match that to a
> texture format of MESA_FORMAT_B5G6R5 if supported by the hardware.
>
> Noticed while playing with mesa-demos/teximage:
>
>   TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec
>   TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec
>   GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec
>
> becomes
>
>   TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec
>   TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec
>   GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec
>
> on a puny Baytrail which is still far from what it is capable of. The
> reason for the disparity is that the teximage demo uses a busy texture
> which is performs an accelerated pixel conversion from the user's B5G6R5
> into the native X8B8G8R8. After the patch, no conversion is required
> allowing use of the blitter and memcpy fast paths.
>
> Signed-off-by: Chris Wilson 
> ---
>  src/mesa/main/texformat.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
> index fd9f335..866c7b3 100644
> --- a/src/mesa/main/texformat.c
> +++ b/src/mesa/main/texformat.c
> @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum 
> target,
> case GL_RGB:
>if (type == GL_UNSIGNED_INT_2_10_10_10_REV) {
>   RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM);
> +  } else if (type == GL_UNSIGNED_SHORT_5_6_5) {
> + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM);

Shouldn't this be MESA_FORMAT_R5G6B5_UNORM? AFAICT, the reason for
using BGR above, is the _REV suffix on the type...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB

2015-09-09 Thread Chris Wilson

On Wed, Sep 09, 2015 at 11:11:59AM +0200, Erik Faye-Lund wrote:
> On Thu, Sep 3, 2015 at 6:05 PM, Chris Wilson  wrote:
> > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5
> > and specifies a generic unsized GL_RGB internal format, match that to a
> > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware.
> >
> > Noticed while playing with mesa-demos/teximage:
> >
> >   TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec
> >   TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec
> >   GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec
> >
> > becomes
> >
> >   TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec
> >   TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec
> >   GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec
> >
> > on a puny Baytrail which is still far from what it is capable of. The
> > reason for the disparity is that the teximage demo uses a busy texture
> > which is performs an accelerated pixel conversion from the user's B5G6R5
> > into the native X8B8G8R8. After the patch, no conversion is required
> > allowing use of the blitter and memcpy fast paths.
> >
> > Signed-off-by: Chris Wilson 
> > ---
> >  src/mesa/main/texformat.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
> > index fd9f335..866c7b3 100644
> > --- a/src/mesa/main/texformat.c
> > +++ b/src/mesa/main/texformat.c
> > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum 
> > target,
> > case GL_RGB:
> >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) {
> >   RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM);
> > +  } else if (type == GL_UNSIGNED_SHORT_5_6_5) {
> > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM);
> 
> Shouldn't this be MESA_FORMAT_R5G6B5_UNORM? AFAICT, the reason for
> using BGR above, is the _REV suffix on the type...

No, it's the first line that's "wrong" since the B10G10R10A2 matches
GL_BGRA + GL_UNSIGNED_INT_2_10_10_10_REV, GL_RGB implies that the
incoming data only has 3 channels (no alpha at all).

i965 know how to do B5G6R5 and not R5G6B5, but for completeness we could
also add RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM);
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB

2015-09-09 Thread Erik Faye-Lund

On Wed, Sep 9, 2015 at 11:25 AM, Chris Wilson  wrote:
> On Wed, Sep 09, 2015 at 11:11:59AM +0200, Erik Faye-Lund wrote:
>> On Thu, Sep 3, 2015 at 6:05 PM, Chris Wilson  
>> wrote:
>> > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5
>> > and specifies a generic unsized GL_RGB internal format, match that to a
>> > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware.
>> >
>> > Noticed while playing with mesa-demos/teximage:
>> >
>> >   TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec
>> >   TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec
>> >   GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec
>> >
>> > becomes
>> >
>> >   TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec
>> >   TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec
>> >   GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec
>> >
>> > on a puny Baytrail which is still far from what it is capable of. The
>> > reason for the disparity is that the teximage demo uses a busy texture
>> > which is performs an accelerated pixel conversion from the user's B5G6R5
>> > into the native X8B8G8R8. After the patch, no conversion is required
>> > allowing use of the blitter and memcpy fast paths.
>> >
>> > Signed-off-by: Chris Wilson 
>> > ---
>> >  src/mesa/main/texformat.c | 2 ++
>> >  1 file changed, 2 insertions(+)
>> >
>> > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
>> > index fd9f335..866c7b3 100644
>> > --- a/src/mesa/main/texformat.c
>> > +++ b/src/mesa/main/texformat.c
>> > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum 
>> > target,
>> > case GL_RGB:
>> >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) {
>> >   RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM);
>> > +  } else if (type == GL_UNSIGNED_SHORT_5_6_5) {
>> > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM);
>>
>> Shouldn't this be MESA_FORMAT_R5G6B5_UNORM? AFAICT, the reason for
>> using BGR above, is the _REV suffix on the type...
>
> No, it's the first line that's "wrong" since the B10G10R10A2 matches
> GL_BGRA + GL_UNSIGNED_INT_2_10_10_10_REV, GL_RGB implies that the
> incoming data only has 3 channels (no alpha at all).

Good point about the alpha channel. It sounds like it should be
changed to MESA_FORMAT_B10G10R10X2_UNORM instead.

But according to src/mesa/main/format_pack.c's
pack_ubyte_b10g10r10a2_unorm(), mesa's B10G10R10A2 corresponds to
OpenGL's UNSIGNED_INT_2_10_10_10_REV. So I think it matches GL_RGBA,
not GL_BGRA. The latter would mean another swizzle, AFAICT.

> i965 know how to do B5G6R5 and not R5G6B5, but for completeness we could
> also add RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM);

I think intel-specific hacks (like preferring B5G6R5 over R5G6B5)
shouldn't leak into _mesa_choose_tex_format(), so I think it'd be a
good move to add "RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM);"
before the latter return.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 (part2) 23/59] glsl: add std430 interface packing support to ssbo store and unsized array length

2015-09-09 Thread Samuel Iglesias Gonsálvez



On 08/09/15 18:52, Jordan Justen wrote:
> On 2015-08-05 01:30:20, Iago Toral Quiroga wrote:
>> From: Samuel Iglesias Gonsalvez 
>>
>> Signed-off-by: Samuel Iglesias Gonsalvez 
>> ---
>>  src/glsl/lower_ubo_reference.cpp | 65 
>> ++--
>>  1 file changed, 49 insertions(+), 16 deletions(-)
>>
>> diff --git a/src/glsl/lower_ubo_reference.cpp 
>> b/src/glsl/lower_ubo_reference.cpp
>> index 4f42abb..495ffcf 100644
>> --- a/src/glsl/lower_ubo_reference.cpp
>> +++ b/src/glsl/lower_ubo_reference.cpp
>> @@ -164,7 +164,7 @@ public:
>> void emit_access(bool is_write, ir_dereference *deref,
>>  ir_variable *base_offset, unsigned int deref_offset,
>>  bool row_major, int matrix_columns,
>> -unsigned write_mask);
>> +bool is_std430, unsigned write_mask);
>>  
>> ir_visitor_status visit_enter(class ir_expression *);
>> void check_ssbo_unsized_array_length_expression(class ir_expression *);
>> @@ -343,8 +343,15 @@ 
>> lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
>>  const bool array_row_major =
>> is_dereferenced_thing_row_major(deref_array);
>>  
>> -array_stride = deref_array->type->std140_size(array_row_major);
>> -array_stride = glsl_align(array_stride, 16);
>> +/* The array type will give the correct interface packing
>> + * information
>> + */
>> +if (deref_array->array->type->interface_packing == 
>> GLSL_INTERFACE_PACKING_STD430) {
>> +   array_stride = 
>> deref_array->type->std430_size(array_row_major);
>> +} else {
>> +   array_stride = 
>> deref_array->type->std140_size(array_row_major);
>> +   array_stride = glsl_align(array_stride, 16);
>> +}
>>   }
>>  
>>   ir_rvalue *array_index = deref_array->array_index;
>> @@ -380,7 +387,12 @@ 
>> lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
>>  
>>  ralloc_free(field_deref);
>>  
>> -unsigned field_align = 
>> type->std140_base_alignment(field_row_major);
>> +unsigned field_align = 0;
>> +
>> +if (struct_type->interface_packing == 
>> GLSL_INTERFACE_PACKING_STD430)
>> +   field_align = type->std430_base_alignment(field_row_major);
>> +else
>> +   field_align = type->std140_base_alignment(field_row_major);
>>  
>>  intra_struct_offset = glsl_align(intra_struct_offset, 
>> field_align);
>>  
>> @@ -388,7 +400,10 @@ 
>> lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
>> deref_record->field) == 0)
>> break;
>>  
>> -intra_struct_offset += type->std140_size(field_row_major);
>> +if (struct_type->interface_packing == 
>> GLSL_INTERFACE_PACKING_STD430)
>> +   intra_struct_offset += type->std430_size(field_row_major);
>> +else
>> +   intra_struct_offset += type->std140_size(field_row_major);
>>  
>>  /* If the field just examined was itself a structure, apply rule
>>   * #9:
>> @@ -463,7 +478,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue 
>> **rvalue)
>>  
>> deref = new(mem_ctx) ir_dereference_variable(load_var);
>> emit_access(false, deref, load_offset, const_offset,
>> -   row_major, matrix_columns, 0);
>> +   row_major, matrix_columns, false, 0);
>> *rvalue = deref;
>>  
>> progress = true;
>> @@ -581,6 +596,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
>>   unsigned int deref_offset,
>>   bool row_major,
>>   int matrix_columns,
>> + bool is_std430,
>>   unsigned write_mask)
>>  {
>> if (deref->type->is_record()) {
>> @@ -599,7 +615,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
>>  
>>   emit_access(is_write, field_deref, base_offset,
>>   deref_offset + field_offset,
>> - row_major, 1,
>> + row_major, 1, is_std430,
>>   
>> writemask_for_size(field_deref->type->vector_elements));
>>  
>>   field_offset += field->type->std140_size(row_major);
>> @@ -608,7 +624,8 @@ lower_ubo_reference_visitor::emit_access(bool is_write,
>> }
>>  
>> if (deref->type->is_array()) {
>> -  unsigned array_stride =
>> +  unsigned array_stride = is_std430 ?
>> + deref->type->fields.array->std430_size(row_major) :
>>   glsl_align(deref->type->fields.array->std140_size(row_major), 16);
>>  
>>for (unsigned i = 0; i < deref->type->length; i++) {
>> @@ -618,7 +635,7 @@ lower_ubo_reference_visitor

Re: [Mesa-dev] [PATCH v4 (part2) 20/59] glsl: Add parser/compiler support for std430 interface packing qualifier

2015-09-09 Thread Samuel Iglesias Gonsálvez



On 31/08/15 08:38, Samuel Iglesias Gonsálvez wrote:
> 
> 
> On 29/08/15 02:27, Jordan Justen wrote:
>> On 2015-08-05 01:30:17, Iago Toral Quiroga wrote:
>>> From: Samuel Iglesias Gonsalvez 
>>>
>>> This commit also adds functions to calculate std430 base alignment and sizes
>>>
>>> Signed-off-by: Samuel Iglesias Gonsalvez 
>>> ---
>>>  src/glsl/ast.h   |   1 +
>>>  src/glsl/ast_to_hir.cpp  |  20 +--
>>>  src/glsl/ast_type.cpp|   1 +
>>>  src/glsl/glsl_parser.yy  |   2 +
>>>  src/glsl/glsl_types.cpp  | 117 
>>> +++
>>>  src/glsl/glsl_types.h|  15 -
>>>  src/glsl/link_uniform_blocks.cpp |  15 -
>>>  src/mesa/main/mtypes.h   |   3 +-
>>>  8 files changed, 165 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/src/glsl/ast.h b/src/glsl/ast.h
>>> index d8c6cea..ac3f80f 100644
>>> --- a/src/glsl/ast.h
>>> +++ b/src/glsl/ast.h
>>> @@ -491,6 +491,7 @@ struct ast_type_qualifier {
>>>  /** \name Layout qualifiers for GL_ARB_uniform_buffer_object */
>>>  /** \{ */
>>>   unsigned std140:1;
>>> + unsigned std430:1;
>>>   unsigned shared:1;
>>>   unsigned packed:1;
>>>   unsigned column_major:1;
>>> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
>>> index 6e63d86..2dadb03 100644
>>> --- a/src/glsl/ast_to_hir.cpp
>>> +++ b/src/glsl/ast_to_hir.cpp
>>> @@ -2886,11 +2886,12 @@ apply_type_qualifier_to_variable(const struct 
>>> ast_type_qualifier *qual,
>>> var->data.depth_layout = ir_depth_layout_none;
>>>  
>>> if (qual->flags.q.std140 ||
>>> +   qual->flags.q.std430 ||
>>> qual->flags.q.packed ||
>>> qual->flags.q.shared) {
>>>_mesa_glsl_error(loc, state,
>>> -   "uniform block layout qualifiers std140, packed, 
>>> and "
>>> -   "shared can only be applied to uniform blocks, not "
>>> +   "uniform block layout qualifiers std140, std430, 
>>> packed, "
>>> +   "and shared can only be applied to uniform blocks, 
>>> not "
>>> "members");
>>> }
>>>  
>>> @@ -5655,12 +5656,14 @@ ast_process_structure_or_interface_block(exec_list 
>>> *instructions,
>>>   const struct ast_type_qualifier *const qual =
>>>  & decl_list->type->qualifier;
>>>   if (qual->flags.q.std140 ||
>>> + qual->flags.q.std430 ||
>>>   qual->flags.q.packed ||
>>>   qual->flags.q.shared) {
>>>  _mesa_glsl_error(&loc, state,
>>>   "uniform/shader storage block layout 
>>> qualifiers "
>>> - "std140, packed, and shared can only be 
>>> applied "
>>> - "to uniform/shader storage blocks, not 
>>> members");
>>> + "std140, std430, packed, and shared can only 
>>> be "
>>> + "applied to uniform/shader storage blocks, 
>>> not "
>>> + "members");
>>>   }
>>>  
>>>   if (qual->flags.q.constant) {
>>> @@ -5872,6 +5875,13 @@ ast_interface_block::hir(exec_list *instructions,
>>> this->block_name);
>>> }
>>>  
>>> +   if (!this->layout.flags.q.buffer &&
>>> +   this->layout.flags.q.std430) {
>>> +  _mesa_glsl_error(&loc, state,
>>> +   "std430 storage block layout qualifier is supported 
>>> "
>>> +   "only for shader storage blocks");
>>> +   }
>>> +
>>> /* The ast_interface_block has a list of ast_declarator_lists.  We
>>>  * need to turn those into ir_variables with an association
>>>  * with this uniform block.
>>> @@ -5881,6 +5891,8 @@ ast_interface_block::hir(exec_list *instructions,
>>>packing = GLSL_INTERFACE_PACKING_SHARED;
>>> } else if (this->layout.flags.q.packed) {
>>>packing = GLSL_INTERFACE_PACKING_PACKED;
>>> +   } else if (this->layout.flags.q.std430) {
>>> +  packing = GLSL_INTERFACE_PACKING_STD430;
>>> } else {
>>>/* The default layout is std140.
>>> */
>>> diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
>>> index a4671e2..6e69ba2 100644
>>> --- a/src/glsl/ast_type.cpp
>>> +++ b/src/glsl/ast_type.cpp
>>> @@ -123,6 +123,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
>>> ubo_layout_mask.flags.q.std140 = 1;
>>> ubo_layout_mask.flags.q.packed = 1;
>>> ubo_layout_mask.flags.q.shared = 1;
>>> +   ubo_layout_mask.flags.q.std430 = 1;
>>>  
>>> ast_type_qualifier ubo_binding_mask;
>>> ubo_binding_mask.flags.i = 0;
>>> diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
>>> index 2b0c8bd..e1c0f3d 100644
>>> --- a/src/glsl/glsl_parser.yy
>>> +++ b/src/glsl/glsl_parser.yy
>>> @@ -1197,6 +1197,8 @@ layout_qualifier_id:
>>>  $$.flags.q.std140 = 1;
>>>   } else if (match_layout_

Re: [Mesa-dev] [PATCH v4 (part2) 24/59] glsl: a shader storage buffer must be smaller than the maximum size allowed

2015-09-09 Thread Samuel Iglesias Gonsálvez



On 08/09/15 19:50, Jordan Justen wrote:
> On 2015-08-05 01:30:21, Iago Toral Quiroga wrote:
>> From: Samuel Iglesias Gonsalvez 
>>
>> Otherwise, generate a link time error as per the
>> ARB_shader_storage_buffer_object spec.
>>
>> Signed-off-by: Samuel Iglesias Gonsalvez 
>> ---
>>  src/glsl/glsl_types.cpp  |  9 +++--
>>  src/glsl/link_uniform_blocks.cpp | 17 +
>>  src/glsl/linker.cpp  |  2 +-
>>  src/glsl/linker.h|  1 +
>>  4 files changed, 26 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
>> index 5758029..1b8e2e4 100644
>> --- a/src/glsl/glsl_types.cpp
>> +++ b/src/glsl/glsl_types.cpp
>> @@ -1312,7 +1312,7 @@ glsl_type::std140_size(bool row_major) const
>>  * rounded up to the next multiple of the base alignment of the
>>  * structure.
>>  */
>> -   if (this->is_record()) {
>> +   if (this->is_record() || this->is_interface()) {
>>unsigned size = 0;
>>unsigned max_align = 0;
>>  
>> @@ -1328,7 +1328,12 @@ glsl_type::std140_size(bool row_major) const
>>  
>>  const struct glsl_type *field_type = this->fields.structure[i].type;
>>  unsigned align = field_type->std140_base_alignment(field_row_major);
>> -size = glsl_align(size, align);
>> +
>> + /* Ignore unsized arrays when calculating size */
>> + if (field_type->is_unsized_array())
>> +continue;
>> +
>> + size = glsl_align(size, align);
>>  size += field_type->std140_size(field_row_major);
>>  
>>   max_align = MAX2(align, max_align);
>> diff --git a/src/glsl/link_uniform_blocks.cpp 
>> b/src/glsl/link_uniform_blocks.cpp
>> index c891d03..2cda78d 100644
>> --- a/src/glsl/link_uniform_blocks.cpp
>> +++ b/src/glsl/link_uniform_blocks.cpp
>> @@ -178,6 +178,7 @@ struct block {
>>  
>>  unsigned
>>  link_uniform_blocks(void *mem_ctx,
>> +struct gl_context *ctx,
>>  struct gl_shader_program *prog,
>>  struct gl_shader **shader_list,
>>  unsigned num_shaders,
>> @@ -299,6 +300,14 @@ link_uniform_blocks(void *mem_ctx,
>>  
>>  blocks[i].UniformBufferSize = parcel.buffer_size;
>>  
>> +/* Check SSBO size is lower than maximum supported size for 
>> SSBO */
>> +if (b->is_shader_storage &&
>> +parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
>> +   linker_error(prog, "shader storage block `%s' has size %d > 
>> %d",
>> +   block_type->name,
>> +   parcel.buffer_size,
>> +   ctx->Const.MaxShaderStorageBlockSize);
> 
> This error message (and below) seems confusing. What about "shader
> storage block `%s' has size %d, which is larger than than the maximum
> allowed (%d)"?
> 

Yeah, it is more clear. I will do that change.

> Reviewed-by: Jordan Justen 

Thanks for your reviews!

Sam

> 
>> +}
>>  blocks[i].NumUniforms =
>> (unsigned)(ptrdiff_t)(&variables[parcel.index] - 
>> blocks[i].Uniforms);
>>  
>> @@ -319,6 +328,14 @@ link_uniform_blocks(void *mem_ctx,
>>  
>>   blocks[i].UniformBufferSize = parcel.buffer_size;
>>  
>> + /* Check SSBO size is lower than maximum supported size for SSBO */
>> + if (b->is_shader_storage &&
>> + parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
>> +linker_error(prog, "shader storage block `%s' has size %d > %d",
>> +block_type->name,
>> +parcel.buffer_size,
>> +ctx->Const.MaxShaderStorageBlockSize);
>> + }
>>   blocks[i].NumUniforms =
>>  (unsigned)(ptrdiff_t)(&variables[parcel.index] - 
>> blocks[i].Uniforms);
>>  
>> diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
>> index 9aff3ce..7ba685b 100644
>> --- a/src/glsl/linker.cpp
>> +++ b/src/glsl/linker.cpp
>> @@ -2023,7 +2023,7 @@ link_intrastage_shaders(void *mem_ctx,
>>  
>> /* Link up uniform blocks defined within this stage. */
>> const unsigned num_uniform_blocks =
>> -  link_uniform_blocks(mem_ctx, prog, shader_list, num_shaders,
>> +  link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders,
>>&uniform_blocks);
>> if (!prog->LinkStatus)
>>return NULL;
>> diff --git a/src/glsl/linker.h b/src/glsl/linker.h
>> index ce3dc32..ada5a1f 100644
>> --- a/src/glsl/linker.h
>> +++ b/src/glsl/linker.h
>> @@ -56,6 +56,7 @@ link_uniform_blocks_are_compatible(const gl_uniform_block 
>> *a,
>>  
>>  extern unsigned
>>  link_uniform_blocks(void *mem_ctx,
>> +struct gl_context *ctx,
>>  struct gl_shader_program *prog,
>>  struct gl_shader **shader_list,
>>  unsigned num_shaders,
>> -- 
>> 1.9.1
>>
> 
___

[Mesa-dev] [PATCH 0/9] Implement textureQueryLod in softpipe

2015-09-09 Thread Krzesimir Nowak

The following patches do some preparatory refactoring in softpipe's
sp_tex_sample.{c,h} before actually implementing the feature.

There is also a patch that fixes softpipe's textureLod with bias. I
caught this bug when finished the textureQueryLod implementation - one
of four textureQueryLod tests were failing because of it. I sent
patches adding the testcases with non-zero lod-bias value to piglit
mailing list
(http://lists.freedesktop.org/archives/piglit/2015-September/017018.html).

Also, this is my first contribution to mesa, but I hope I didn't make
any obvious mistakes.

Thanks.

Krzesimir Nowak (9):
  tgsi: Remove trailing backslash in comment
  softpipe: Fix textureLod with nonzero GL_TEXTURE_LOD_BIAS value.
  softpipe: Split compute_lambda_lod into two functions
  softpipe: Put mip_filter_func inside a struct
  softpipe: Split code getting a filter into separate function
  softpipe: Split 3D to 2D coords conversion into separate function.
  softpipe: Add functions for computing mipmap level
  tgsi: Add code for handling lodq opcode
  softpipe: Implement and enable textureQueryLod

 src/gallium/auxiliary/tgsi/tgsi_exec.c   |  46 +++-
 src/gallium/auxiliary/tgsi/tgsi_exec.h   |  10 +
 src/gallium/drivers/softpipe/sp_screen.c |   2 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c | 338 +--
 src/gallium/drivers/softpipe/sp_tex_sample.h |  32 ++-
 5 files changed, 345 insertions(+), 83 deletions(-)

-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 7/9] softpipe: Add functions for computing mipmap level

2015-09-09 Thread Krzesimir Nowak

These functions will be used by textureQueryLod.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 100 +--
 src/gallium/drivers/softpipe/sp_tex_sample.h |   7 ++
 2 files changed, 101 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index cdec984..6e639e0 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1937,6 +1937,38 @@ get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
 }
 
 static void
+clamp_lod(const struct sp_sampler_view *sp_sview,
+  const struct sp_sampler *sp_samp,
+  const float lod[TGSI_QUAD_SIZE],
+  float clamped[TGSI_QUAD_SIZE])
+{
+   const float min_lod = sp_samp->base.min_lod;
+   const float max_lod = sp_samp->base.max_lod;
+   const float min_level = sp_sview->base.u.tex.first_level;
+   const float max_level = sp_sview->base.u.tex.last_level;
+   int i;
+
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+  float cl = lod[i];
+
+  cl = CLAMP(cl, min_lod, max_lod);
+  /* XXX: Is min_level ever different from 0?
+   */
+  cl = CLAMP(cl, 0, max_level - min_level);
+  clamped[i] = cl;
+   }
+}
+
+static void
+mip_level_linear(struct sp_sampler_view *sp_sview,
+ struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE])
+{
+   clamp_lod(sp_sview, sp_samp, lod, level);
+}
+
+static void
 mip_filter_linear(struct sp_sampler_view *sp_sview,
   struct sp_sampler *sp_samp,
   img_filter_func min_filter,
@@ -1998,6 +2030,23 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
 }
 
 
+static void
+mip_level_nearest(struct sp_sampler_view *sp_sview,
+  struct sp_sampler *sp_samp,
+  const float lod[TGSI_QUAD_SIZE],
+  float level[TGSI_QUAD_SIZE])
+{
+   const int first_level = sp_sview->base.u.tex.first_level;
+   int j;
+
+   clamp_lod(sp_sview, sp_samp, lod, level);
+   for (j = 0; j < TGSI_QUAD_SIZE; j++)
+  /* TODO: It should rather be:
+   * level[j] = first_level + ceil(level[j] + 0.5F) - 1.0F;
+   */
+  level[j] = first_level + (int)(level[j] + 0.5F);
+}
+
 /**
  * Compute nearest mipmap level from texcoords.
  * Then sample the texture level for four elements of a quad.
@@ -2050,6 +2099,19 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
 
 
 static void
+mip_level_none(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float lod[TGSI_QUAD_SIZE],
+   float level[TGSI_QUAD_SIZE])
+{
+   int j;
+
+   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+  level[j] = sp_sview->base.u.tex.first_level;
+   }
+}
+
+static void
 mip_filter_none(struct sp_sampler_view *sp_sview,
 struct sp_sampler *sp_samp,
 img_filter_func min_filter,
@@ -2088,6 +2150,15 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
 
 
 static void
+mip_level_none_no_filter_select(struct sp_sampler_view *sp_sview,
+struct sp_sampler *sp_samp,
+const float lod[TGSI_QUAD_SIZE],
+float level[TGSI_QUAD_SIZE])
+{
+   mip_level_none(sp_sview, sp_samp, lod, level);
+}
+
+static void
 mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
  struct sp_sampler *sp_samp,
  img_filter_func min_filter,
@@ -2339,6 +2410,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
 }
 
 
+static void
+mip_level_linear_aniso(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float lod[TGSI_QUAD_SIZE],
+   float level[TGSI_QUAD_SIZE])
+{
+   mip_level_linear(sp_sview, sp_samp, lod, level);
+}
+
 /**
  * Sample 2D texture using an anisotropic filter.
  */
@@ -2450,6 +2530,14 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
}
 }
 
+static void
+mip_level_linear_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
+  struct sp_sampler *sp_samp,
+  const float lod[TGSI_QUAD_SIZE],
+  float level[TGSI_QUAD_SIZE])
+{
+   mip_level_linear(sp_sview, sp_samp, lod, level);
+}
 
 /**
  * Specialized version of mip_filter_linear with hard-wired calls to
@@ -2515,12 +2603,12 @@ mip_filter_linear_2d_linear_repeat_POT(
}
 }
 
-static struct sp_mip mip_linear = {mip_filter_linear};
-static struct sp_mip mip_nearest = {mip_filter_nearest};
-static struct sp_mip mip_none = {mip_filter_none};
-static struct sp_mip mip_none_no_filter_select = 
{mip_filter_none_no_filter_select};
-static struct sp_mip mip_linear_aniso = {mip_filter_linear_aniso};
-static struct sp_mip mip_linear_2d_linear_repeat_

[Mesa-dev] [PATCH 1/9] tgsi: Remove trailing backslash in comment

2015-09-09 Thread Krzesimir Nowak

It clearly is here by accident.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 75cd0d5..9544623 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2021,7 +2021,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
 /*
  * execute a texture instruction.
  *
- * modifier is used to control the channel routing for the\
+ * modifier is used to control the channel routing for the
  * instruction variants like proj, lod, and texture with lod bias.
  * sampler indicates which src register the sampler is contained in.
  */
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/9] softpipe: Split compute_lambda_lod into two functions

2015-09-09 Thread Krzesimir Nowak

textureQueryLod returns a vec2 with a mipmap information and a
LOD. The latter needs to be not clamped.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 55 
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 19188b0..38bdc93 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1855,24 +1855,23 @@ compute_lod(const struct pipe_sampler_state *sampler,
 }
 
 
-/* Calculate level of detail for every fragment.
+/* Calculate level of detail for every fragment. The computed value is not
+ * clamped into lod_min and lod_max.
  * \param lod_in per-fragment lod_bias or explicit_lod.
  * \param lod results per-fragment lod.
  */
 static inline void
-compute_lambda_lod(struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
-   const float s[TGSI_QUAD_SIZE],
-   const float t[TGSI_QUAD_SIZE],
-   const float p[TGSI_QUAD_SIZE],
-   const float lod_in[TGSI_QUAD_SIZE],
-   enum tgsi_sampler_control control,
-   float lod[TGSI_QUAD_SIZE])
+compute_lambda_lod_not_clamped(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float s[TGSI_QUAD_SIZE],
+   const float t[TGSI_QUAD_SIZE],
+   const float p[TGSI_QUAD_SIZE],
+   const float lod_in[TGSI_QUAD_SIZE],
+   enum tgsi_sampler_control control,
+   float lod[TGSI_QUAD_SIZE])
 {
const struct pipe_sampler_state *sampler = &sp_samp->base;
-   float lod_bias = sampler->lod_bias;
-   float min_lod = sampler->min_lod;
-   float max_lod = sampler->max_lod;
+   const float lod_bias = sampler->lod_bias;
float lambda;
uint i;
 
@@ -1881,24 +1880,23 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
   /* XXX FIXME */
case tgsi_sampler_derivs_explicit:
   lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
-  lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
+  lod[0] = lod[1] = lod[2] = lod[3] = lambda;
   break;
case tgsi_sampler_lod_bias:
   lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
  lod[i] = lambda + lod_in[i];
- lod[i] = CLAMP(lod[i], min_lod, max_lod);
   }
   break;
case tgsi_sampler_lod_explicit:
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
- lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod);
+ lod[i] = lod_in[i] + lod_bias;
   }
   break;
case tgsi_sampler_lod_zero:
case tgsi_sampler_gather:
   /* this is all static state in the sampler really need clamp here? */
-  lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
+  lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
   break;
default:
   assert(0);
@@ -1906,6 +1904,31 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
}
 }
 
+/* Calculate level of detail for every fragment.
+ * \param lod_in per-fragment lod_bias or explicit_lod.
+ * \param lod results per-fragment lod.
+ */
+static inline void
+compute_lambda_lod(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float s[TGSI_QUAD_SIZE],
+   const float t[TGSI_QUAD_SIZE],
+   const float p[TGSI_QUAD_SIZE],
+   const float lod_in[TGSI_QUAD_SIZE],
+   enum tgsi_sampler_control control,
+   float lod[TGSI_QUAD_SIZE])
+{
+   const struct pipe_sampler_state *sampler = &sp_samp->base;
+   const float min_lod = sampler->min_lod;
+   const float max_lod = sampler->max_lod;
+   int i;
+
+   compute_lambda_lod_not_clamped(sp_sview, sp_samp, s, t, p, lod_in, control, 
lod);
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+  lod[i] = CLAMP(lod[i], min_lod, max_lod);
+   }
+}
+
 static inline unsigned
 get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
 {
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 8/9] tgsi: Add code for handling lodq opcode

2015-09-09 Thread Krzesimir Nowak

This introduces new vfunc in tgsi_sampler just for this opcode. I
decided against extending get_samples vfunc to return the mipmap level
and LOD - the function's prototype is already too scary and doing the
sampling for textureQueryLod would be a waste of time.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 ++
 src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 
 2 files changed, 54 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 9544623..054ad08 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2132,6 +2132,44 @@ exec_tex(struct tgsi_exec_machine *mach,
}
 }
 
+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+  const struct tgsi_full_instruction *inst)
+{
+   uint unit;
+   int dim;
+   int i;
+   union tgsi_exec_channel coords[4];
+   const union tgsi_exec_channel *args[Elements(coords)];
+   union tgsi_exec_channel r[2];
+
+   unit = fetch_sampler_unit(mach, inst, 1);
+   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
+   assert(dim <= Elements(coords));
+   /* fetch coordinates */
+   for (i = 0; i < dim; i++) {
+  FETCH(&coords[i], 0, TGSI_CHAN_X + i);
+  args[i] = &coords[i];
+   }
+   for (i = dim; i < Elements(coords); i++) {
+  args[i] = &ZeroVec;
+   }
+   mach->Sampler->query_lod(mach->Sampler, unit, unit,
+args[0]->f,
+args[1]->f,
+args[2]->f,
+args[3]->f,
+tgsi_sampler_lod_none,
+r[0].f,
+r[1].f);
+
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+  store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 
TGSI_EXEC_DATA_FLOAT);
+   }
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+  store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 
TGSI_EXEC_DATA_FLOAT);
+   }
+}
 
 static void
 exec_txd(struct tgsi_exec_machine *mach,
@@ -4378,6 +4416,12 @@ exec_instruction(
   exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
   break;
 
+   case TGSI_OPCODE_LODQ:
+  /* src[0] = texcoord */
+  /* src[1] = sampler unit */
+  exec_lodq(mach, inst);
+  break;
+
case TGSI_OPCODE_UP2H:
   assert (0);
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 5d56aab..556e0af 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -138,6 +138,16 @@ struct tgsi_sampler
  const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
  const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+   void (*query_lod)(struct tgsi_sampler *tgsi_sampler,
+ const unsigned sview_index,
+ const unsigned sampler_index,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float mipmap[TGSI_QUAD_SIZE],
+ float lod[TGSI_QUAD_SIZE]);
 };
 
 #define TGSI_EXEC_NUM_TEMPS   4096
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/9] softpipe: Split code getting a filter into separate function

2015-09-09 Thread Krzesimir Nowak

This function will be later used by textureQueryLod. The
img_filter_func are optional, because textureQueryLod will not need
them.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 53 +++-
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index cd4a659..0a3fc20 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2912,6 +2912,42 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
}
 }
 
+/**
+ * Get mip filter, and optionally both img min filter and img mag filter
+ */
+static void
+get_filters(struct sp_sampler_view *sp_sview,
+struct sp_sampler *sp_samp,
+enum tgsi_sampler_control control,
+struct sp_mip **mip,
+img_filter_func *min,
+img_filter_func *mag)
+{
+   assert(mip);
+   if (control == tgsi_sampler_gather) {
+  *mip = &mip_nearest;
+  if (min)
+ *min = get_img_filter(sp_sview, &sp_samp->base, 
PIPE_TEX_FILTER_LINEAR, true);
+   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
+  *mip = &mip_linear_2d_linear_repeat_POT;
+   }
+   else {
+  *mip = &sp_samp->mip;
+  if (min || mag) {
+ img_filter_func tmp_filter = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->min_img_filter, false);
+ if (min)
+*min = tmp_filter;
+ if (mag) {
+if (sp_samp->min_mag_equal) {
+   *mag = tmp_filter;
+}
+else {
+   *mag = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->base.mag_img_filter, false);
+}
+ }
+  }
+   }
+}
 
 static void
 sample_mip(struct sp_sampler_view *sp_sview,
@@ -2928,22 +2964,7 @@ sample_mip(struct sp_sampler_view *sp_sview,
img_filter_func min_img_filter = NULL;
img_filter_func mag_img_filter = NULL;
 
-   if (filt_args->control == tgsi_sampler_gather) {
-  mip = &mip_nearest;
-  min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
PIPE_TEX_FILTER_LINEAR, true);
-   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
-  mip = &mip_linear_2d_linear_repeat_POT;
-   }
-   else {
-  mip = &sp_samp->mip;
-  min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->min_img_filter, false);
-  if (sp_samp->min_mag_equal) {
- mag_img_filter = min_img_filter;
-  }
-  else {
- mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->base.mag_img_filter, false);
-  }
-   }
+   get_filters(sp_sview, sp_samp, filt_args->control, &mip, &min_img_filter, 
&mag_img_filter);
 
mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
s, t, p, c0, lod, filt_args, rgba);
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/9] softpipe: Fix textureLod with nonzero GL_TEXTURE_LOD_BIAS value.

2015-09-09 Thread Krzesimir Nowak

The level-of-detail bias wasn't simply added in the explicit LOD case.
This case seems to be tested only in piglit's
fs-texturequerylod-nearest-biased test, which is currently skipped, as
softpipe does not support textureQueryLod at the moment.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 565fca6..19188b0 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1892,7 +1892,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
   break;
case tgsi_sampler_lod_explicit:
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
- lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
+ lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod);
   }
   break;
case tgsi_sampler_lod_zero:
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 9/9] softpipe: Implement and enable textureQueryLod

2015-09-09 Thread Krzesimir Nowak

Passes the shader piglit tests and introduces no regressions.

This commit finally makes use of the refactoring in previous
commits.
---
 src/gallium/drivers/softpipe/sp_screen.c |  2 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c | 47 +++-
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_screen.c 
b/src/gallium/drivers/softpipe/sp_screen.c
index 0bfd9c3..7ca8a67 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -193,9 +193,9 @@ softpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
   return 4;
case PIPE_CAP_TEXTURE_GATHER_SM5:
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
   return 1;
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-   case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
   return 0;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 6e639e0..499c8f9 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -3566,6 +3566,51 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
 }
 
+static void
+sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler,
+  const unsigned sview_index,
+  const unsigned sampler_index,
+  const float s[TGSI_QUAD_SIZE],
+  const float t[TGSI_QUAD_SIZE],
+  const float p[TGSI_QUAD_SIZE],
+  const float c0[TGSI_QUAD_SIZE],
+  enum tgsi_sampler_control control,
+  float mipmap[TGSI_QUAD_SIZE],
+  float lod[TGSI_QUAD_SIZE])
+{
+   static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
+
+   struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler 
*)tgsi_sampler;
+   struct sp_sampler_view *sp_sview;
+   struct sp_sampler *sp_samp;
+   struct sp_mip *mip;
+   int i;
+   float cs[TGSI_QUAD_SIZE];
+   float ct[TGSI_QUAD_SIZE];
+   float cp[TGSI_QUAD_SIZE];
+
+   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+   assert(sampler_index < PIPE_MAX_SAMPLERS);
+   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+   sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
+   /* always have a view here but texture is NULL if no sampler view was set. 
*/
+   if (!sp_sview->base.texture) {
+  for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ mipmap[i] = 0.0f;
+ lod[i] = 0.0f;
+  }
+  return;
+   }
+
+   sp_sview->convert_coords(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
+
+   compute_lambda_lod_not_clamped(sp_sview, sp_samp,
+  cs, ct, cp, lod_in, control, lod);
+   get_filters(sp_sview, sp_samp, control, &mip, NULL, NULL);
+   mip->level(sp_sview, sp_samp, lod, mipmap);
+}
 
 static void
 sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
@@ -3602,7 +3647,7 @@ sp_create_tgsi_sampler(void)
samp->base.get_dims = sp_tgsi_get_dims;
samp->base.get_samples = sp_tgsi_get_samples;
samp->base.get_texel = sp_tgsi_get_texel;
+   samp->base.query_lod = sp_tgsi_query_lod;
 
return samp;
 }
-
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/9] softpipe: Split 3D to 2D coords conversion into separate function.

2015-09-09 Thread Krzesimir Nowak

This is to avoid tying the conversion to sampling - textureQueryLod
will need to do the conversion too, but it does not do any sampling.

So instead of a "sample" vfunc, there is a "convert" vfunc. The
drawback of this approach is that a "noop" convert copies 3 arrays of
floats. Would be nice to avoid it in some clean way.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 73 +---
 src/gallium/drivers/softpipe/sp_tex_sample.h | 20 
 2 files changed, 54 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 0a3fc20..cdec984 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2981,29 +2981,36 @@ sample_mip(struct sp_sampler_view *sp_sview,
 
 }
 
+static void
+convert_noop(struct sp_sampler_view *sp_sview,
+ struct sp_sampler *sp_samp,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE])
+{
+   const size_t len = sizeof(float) * TGSI_QUAD_SIZE;
+
+   memcpy(, s, len);
+   memcpy(, t, len);
+   memcpy(, p, len);
+}
 
-/**
- * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
- * Put face info into the sampler faces[] array.
- */
 static void
-sample_cube(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
-const float s[TGSI_QUAD_SIZE],
-const float t[TGSI_QUAD_SIZE],
-const float p[TGSI_QUAD_SIZE],
-const float c0[TGSI_QUAD_SIZE],
-const float c1[TGSI_QUAD_SIZE],
-const struct filter_args *filt_args,
-float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+convert_cube(struct sp_sampler_view *sp_sview,
+ struct sp_sampler *sp_samp,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE])
 {
unsigned j;
-   float [4], [4];
-
-   /* Not actually used, but the intermediate steps that do the
-* dereferencing don't know it.
-*/
-   static float [4] = { 0, 0, 0, 0 };
 
[0] = c0[0];
[1] = c0[1];
@@ -3071,8 +3078,6 @@ sample_cube(struct sp_sampler_view *sp_sview,
  }
   }
}
-
-   sample_mip(sp_sview, sp_samp, , , , c0, c1, filt_args, rgba);
 }
 
 
@@ -3393,9 +3398,9 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
 
   if (view->target == PIPE_TEXTURE_CUBE ||
   view->target == PIPE_TEXTURE_CUBE_ARRAY)
- sview->get_samples = sample_cube;
+ sview->convert_coords = convert_cube;
   else {
- sview->get_samples = sample_mip;
+ sview->convert_coords = convert_noop;
   }
   sview->pot2d = spr->pot &&
  (view->target == PIPE_TEXTURE_2D ||
@@ -3440,13 +3445,22 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
 enum tgsi_sampler_control control,
 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler 
*)tgsi_sampler;
+   struct sp_sampler_view *sp_sview;
+   struct sp_sampler *sp_samp;
struct filter_args filt_args;
+   float cs[TGSI_QUAD_SIZE];
+   float ct[TGSI_QUAD_SIZE];
+   float cp[TGSI_QUAD_SIZE];
+
assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
assert(sampler_index < PIPE_MAX_SAMPLERS);
-   assert(sp_samp->sp_sampler[sampler_index]);
+   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+   sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
/* always have a view here but texture is NULL if no sampler view was set. 
*/
-   if (!sp_samp->sp_sview[sview_index].base.texture) {
+   if (!sp_sview->base.texture) {
   int i, j;
   for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
  for (i = 0; i < TGSI_QUAD_SIZE; i++) {
@@ -3456,11 +3470,12 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
   return;
}
 
+   sp_sview->convert_coords(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
+
filt_args.control = control;
filt_args.offset = offset;
-   sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
-  
sp_samp->sp_sampler[sampler_index],
-  s, t, p, c0, lod, &filt_args, 
rgba);
+
+   sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
 }

[Mesa-dev] [PATCH 4/9] softpipe: Put mip_filter_func inside a struct

2015-09-09 Thread Krzesimir Nowak

Putting this function pointer into a struct enables grouping of
several related functions in a single place. For now it is just a
single function, but the struct will be later extended with a
mip_level_func for returning mip level.
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 28 +---
 src/gallium/drivers/softpipe/sp_tex_sample.h |  5 -
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 38bdc93..cd4a659 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2515,6 +2515,12 @@ mip_filter_linear_2d_linear_repeat_POT(
}
 }
 
+static struct sp_mip mip_linear = {mip_filter_linear};
+static struct sp_mip mip_nearest = {mip_filter_nearest};
+static struct sp_mip mip_none = {mip_filter_none};
+static struct sp_mip mip_none_no_filter_select = 
{mip_filter_none_no_filter_select};
+static struct sp_mip mip_linear_aniso = {mip_filter_linear_aniso};
+static struct sp_mip mip_linear_2d_linear_repeat_POT = 
{mip_filter_linear_2d_linear_repeat_POT};
 
 /**
  * Do shadow/depth comparisons.
@@ -2918,18 +2924,18 @@ sample_mip(struct sp_sampler_view *sp_sview,
const struct filter_args *filt_args,
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
-   mip_filter_func mip_filter;
+   struct sp_mip *mip = NULL;
img_filter_func min_img_filter = NULL;
img_filter_func mag_img_filter = NULL;
 
if (filt_args->control == tgsi_sampler_gather) {
-  mip_filter = mip_filter_nearest;
+  mip = &mip_nearest;
   min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
PIPE_TEX_FILTER_LINEAR, true);
} else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
-  mip_filter = mip_filter_linear_2d_linear_repeat_POT;
+  mip = &mip_linear_2d_linear_repeat_POT;
}
else {
-  mip_filter = sp_samp->mip_filter;
+  mip = &sp_samp->mip;
   min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->min_img_filter, false);
   if (sp_samp->min_mag_equal) {
  mag_img_filter = min_img_filter;
@@ -2939,8 +2945,8 @@ sample_mip(struct sp_sampler_view *sp_sview,
   }
}
 
-   mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
-  s, t, p, c0, lod, filt_args, rgba);
+   mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
+   s, t, p, c0, lod, filt_args, rgba);
 
if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
   sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, 
rgba);
@@ -3239,13 +3245,13 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
switch (sampler->min_mip_filter) {
case PIPE_TEX_MIPFILTER_NONE:
   if (sampler->min_img_filter == sampler->mag_img_filter)
- samp->mip_filter = mip_filter_none_no_filter_select;
+ samp->mip = mip_none_no_filter_select;
   else
- samp->mip_filter = mip_filter_none;
+ samp->mip = mip_none;
   break;
 
case PIPE_TEX_MIPFILTER_NEAREST:
-  samp->mip_filter = mip_filter_nearest;
+  samp->mip = mip_nearest;
   break;
 
case PIPE_TEX_MIPFILTER_LINEAR:
@@ -3257,11 +3263,11 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
   sampler->max_anisotropy <= 1) {
  samp->min_mag_equal_repeat_linear = TRUE;
   }
-  samp->mip_filter = mip_filter_linear;
+  samp->mip = mip_linear;
 
   /* Anisotropic filtering extension. */
   if (sampler->max_anisotropy > 1) {
- samp->mip_filter = mip_filter_linear_aniso;
+ samp->mip = mip_linear_aniso;
 
  /* Override min_img_filter:
   * min_img_filter needs to be set to NEAREST since we need to access
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h 
b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 7d1aafc..78541e1 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -128,6 +128,9 @@ struct sp_sampler_view
 
 };
 
+struct sp_mip {
+   mip_filter_func filter;
+};
 
 struct sp_sampler {
struct pipe_sampler_state base;
@@ -144,7 +147,7 @@ struct sp_sampler {
wrap_linear_func linear_texcoord_t;
wrap_linear_func linear_texcoord_p;
 
-   mip_filter_func mip_filter;
+   struct sp_mip mip;
 };
 
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB

2015-09-09 Thread Chris Wilson

On Wed, Sep 09, 2015 at 12:09:40PM +0200, Erik Faye-Lund wrote:
> On Wed, Sep 9, 2015 at 11:25 AM, Chris Wilson  
> wrote:
> > On Wed, Sep 09, 2015 at 11:11:59AM +0200, Erik Faye-Lund wrote:
> >> On Thu, Sep 3, 2015 at 6:05 PM, Chris Wilson  
> >> wrote:
> >> > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5
> >> > and specifies a generic unsized GL_RGB internal format, match that to a
> >> > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware.
> >> >
> >> > Noticed while playing with mesa-demos/teximage:
> >> >
> >> >   TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec
> >> >   TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec
> >> >   GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec
> >> >
> >> > becomes
> >> >
> >> >   TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec
> >> >   TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec
> >> >   GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec
> >> >
> >> > on a puny Baytrail which is still far from what it is capable of. The
> >> > reason for the disparity is that the teximage demo uses a busy texture
> >> > which is performs an accelerated pixel conversion from the user's B5G6R5
> >> > into the native X8B8G8R8. After the patch, no conversion is required
> >> > allowing use of the blitter and memcpy fast paths.
> >> >
> >> > Signed-off-by: Chris Wilson 
> >> > ---
> >> >  src/mesa/main/texformat.c | 2 ++
> >> >  1 file changed, 2 insertions(+)
> >> >
> >> > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
> >> > index fd9f335..866c7b3 100644
> >> > --- a/src/mesa/main/texformat.c
> >> > +++ b/src/mesa/main/texformat.c
> >> > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, 
> >> > GLenum target,
> >> > case GL_RGB:
> >> >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) {
> >> >   RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM);
> >> > +  } else if (type == GL_UNSIGNED_SHORT_5_6_5) {
> >> > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM);
> >>
> >> Shouldn't this be MESA_FORMAT_R5G6B5_UNORM? AFAICT, the reason for
> >> using BGR above, is the _REV suffix on the type...
> >
> > No, it's the first line that's "wrong" since the B10G10R10A2 matches
> > GL_BGRA + GL_UNSIGNED_INT_2_10_10_10_REV, GL_RGB implies that the
> > incoming data only has 3 channels (no alpha at all).
> 
> Good point about the alpha channel. It sounds like it should be
> changed to MESA_FORMAT_B10G10R10X2_UNORM instead.
> 
> But according to src/mesa/main/format_pack.c's
> pack_ubyte_b10g10r10a2_unorm(), mesa's B10G10R10A2 corresponds to
> OpenGL's UNSIGNED_INT_2_10_10_10_REV. So I think it matches GL_RGBA,
> not GL_BGRA. The latter would mean another swizzle, AFAICT.

The mapping is (_mesa_format_from_format_and_type):

   case GL_UNSIGNED_INT_2_10_10_10_REV:
  if (format == GL_RGB)
 return MESA_FORMAT_R10G10B10X2_UNORM;
  if (format == GL_RGBA)
 return MESA_FORMAT_R10G10B10A2_UNORM;
  else if (format == GL_RGBA_INTEGER)
 return MESA_FORMAT_R10G10B10A2_UINT;
  else if (format == GL_BGRA)
 return MESA_FORMAT_B10G10R10A2_UNORM;
  else if (format == GL_BGRA_INTEGER)
 return MESA_FORMAT_B10G10R10A2_UINT;
  break;

The trick is that the packed formats are written as lsb first (or it may
just be native and my lsb bias is showing).

> > i965 know how to do B5G6R5 and not R5G6B5, but for completeness we could
> > also add RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM);
> 
> I think intel-specific hacks (like preferring B5G6R5 over R5G6B5)
> shouldn't leak into _mesa_choose_tex_format(),

Hah, did you look at _mesa_choose_tex_format()? :)
I sent an another patch to do the hardware agnostic unswizzled conversions.

> So I think it'd be a
> good move to add "RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM);"
> before the latter return.

Not quite, because the mapping for 565 is:

   case GL_UNSIGNED_SHORT_5_6_5:
  if (format == GL_RGB)
 return MESA_FORMAT_B5G6R5_UNORM;
  else if (format == GL_BGR)
 return MESA_FORMAT_R5G6B5_UNORM;
  else if (format == GL_RGB_INTEGER)
 return MESA_FORMAT_B5G6R5_UINT;
  break;
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 80821] When LIBGL_ALWAYS_SOFTWARE is set, KHR_create_context is not supported

2015-09-09 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=80821

Emil Velikov  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #6 from Emil Velikov  ---
Props to Boyan, a slightly different version has landed for the DRI based
libEGL.
On the st/egl side, it has been dropped from upstream due lack of interested
maintainer :'(

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] i915: fixing driver crashes if too few vertices are submitted

2015-09-09 Thread Marius Predut

Comparison with a signed expression and unsigned value
is converted to unsigned value, reason for minus value is interpreted
as a big unsigned value. For this case the "for" loop
is going into unexpected behavior.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
Signed-off-by: Marius Predut 
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 7be3954..2ae33c0 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -627,6 +627,8 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
   LOCAL_VARS;
   GLuint j;
 
+  if(count%4 != 0) return;
+
   INIT(GL_TRIANGLES);
 
   for (j = start; j < count-3; j += 4) {
@@ -1248,7 +1250,7 @@ static GLboolean TAG(validate_render)( struct gl_context 
*ctx,
ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
 }
 else {
-   ok = HAVE_TRIANGLES; /* flatshading is ok. */
+   ok = HAVE_TRIANGLES && (count%4 == 0); /* flatshading is ok. */
 }
 break;
   default:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Don't tell the hardware about our UAV access.

2015-09-09 Thread Francisco Jerez

Mark Janes  writes:

> Francisco Jerez  writes:
>
>> Mark Janes  writes:
>>
>>> When I tested this, I saw an intermittent BSW gpu hang.  I haven't been
>>> able to confirm that it is due to the host-mem-barrier test.
>>>
>> It would probably be useful to know if the hang is due to any of the
>> image load/store tests or if it's something unrelated.
>
> Yes, you are right.  I will take some time tomorrow to catch the
> specific test.  Given the low rate of gpu hangs on BSW lately, I expect
> it will be due to image load/store tests.  However I need to confirm it.
>
Anyway, unless the BSW hang is introduced by this patch or caused by the
host-mem-barrier/Indirect/RaW subtest, feel free to open a separate bug
report for it.  It's unlikely to be related to this thread.

> -Mark


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] i965/fs: Fix hang on IVB and VLV with image format mismatch.

2015-09-09 Thread Francisco Jerez

Ian Romanick  writes:

> On 09/03/2015 06:03 AM, Francisco Jerez wrote:
>> IVB and VLV hang sporadically when an untyped surface read or write
>> message is used to access a surface of format other than RAW, as may
>> happen when there is a mismatch between the format qualifier of the
>> image uniform and the format of the actual image bound to the
>> pipeline.  According to the spec this condition gives undefined
>> results but may not lead to program termination (which is one of the
>> possible outcomes of the hang).  Fix it by checking at runtime whether
>> the surface is of the right type.
>> 
>> Fixes the "arb_shader_image_load_store.invalid/format mismatch" piglit
>> subtest.
>> 
>> Reported-by: Mark Janes 
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91718
>> CC: mesa-sta...@lists.freedesktop.org
>> ---
>>  .../drivers/dri/i965/brw_fs_surface_builder.cpp| 42 
>> +++---
>>  1 file changed, 38 insertions(+), 4 deletions(-)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>> index f60afc9..57ce87f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>> @@ -313,12 +313,42 @@ namespace {
>>  
>> namespace image_validity {
>>/**
>> +   * Check whether the bound image is suitable for untyped access.
>> +   */
>> +  brw_predicate
>> +  emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
>> +   brw_predicate pred)
>> +  {
>> + const brw_device_info *devinfo = bld.shader->devinfo;
>> + const fs_reg stride = offset(image, bld, 
>> BRW_IMAGE_PARAM_STRIDE_OFFSET);
>> +
>> + if (devinfo->gen == 7 && !devinfo->is_haswell) {
>> +/* Check whether the first stride component (i.e. the Bpp value)
>> + * is greater than four, what on Gen7 indicates that a surface 
>> of
>> + * type RAW has been bound for untyped access.  Reading or 
>> writing
>> + * to a surface of type other than RAW using untyped surface
>> + * messages causes a hang on IVB and VLV.
>> + */
>> +set_predicate(pred,
>> +  bld.CMP(bld.null_reg_ud(), stride, fs_reg(4),
>> +  BRW_CONDITIONAL_G));
>> +
>> +return BRW_PREDICATE_NORMAL;
>> + } else {
>> +/* More recent generations handle the format mismatch
>> + * gracefully.
>> + */
>> +return pred;
>> + }
>> +  }
>> +
>> +  /**
>> * Check whether there is an image bound at the given index and write
>> * the comparison result to f0.0.  Returns an appropriate predication
>> * mode to use on subsequent image operations.
>> */
>>brw_predicate
>> -  emit_surface_check(const fs_builder &bld, const fs_reg &image)
>> +  emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
>
> This change seems spurious (and also reasonable).
>
The problem is that this patch introduces a new kind of surface check
applicable to untyped surface reads and writes only, so it would have
been confusing to keep the other surface check which is applicable to
atomics only with its previous rather unspecific name.

>>{
>>   const brw_device_info *devinfo = bld.shader->devinfo;
>>   const fs_reg size = offset(image, bld, 
>> BRW_IMAGE_PARAM_SIZE_OFFSET);
>> @@ -895,7 +925,9 @@ namespace brw {
>>   * surface read on the result,
>>   */
>>  const brw_predicate pred =
>> -   emit_bounds_check(bld, image, saddr, dims);
>> +   emit_untyped_image_check(bld, image,
>> +emit_bounds_check(bld, image,
>> +  saddr, dims));
>
> These appear to be the only two users of emit_bounds_check... shouldn't
> the bounds still be tested?
>
Yes, they are still.

>>  
>>  /* and they don't know about surface coordinates, we need to
>>   * convert them to a raw memory offset.
>> @@ -1041,7 +1073,9 @@ namespace brw {
>>  * the surface write on the result,
>>  */
>> const brw_predicate pred =
>> -  emit_bounds_check(bld, image, saddr, dims);
>> +  emit_untyped_image_check(bld, image,
>> +   emit_bounds_check(bld, image,
>> + saddr, dims));
>>  
>> /* and, phew, they don't know about surface coordinates, we
>>  * need to convert them to a raw memory offset.
>> @@ -1072,7 +1106,7 @@ namespace brw {
>>   using namespace image_coordinates;
>>   using namespace surface_access;
>>

[Mesa-dev] [PATCH v2] i915/aa: fixing anti-aliasing bug for thinnest width lines

2015-09-09 Thread Marius Predut

On PNV platform, for 1 pixel line thickness or less,
the general anti-aliasing algorithm gives up, and a garbage line is generated.
Setting a Line Width of 0.0 specifies the rasterization
of the "thinnest" (one-pixel-wide), non-antialiased lines.
Lines rendered with zero Line Width are rasterized using
Grid Intersection Quantization rules as specified by bspec G45: Volume 2: 
3D/Media,
7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section.

This patch follow the same rules as patches fixing the
https://bugs.freedesktop.org/show_bug.cgi?id=28832
bug.

v1: Eduardo Lima Mitev:  Wrong indentation inside the if clause.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367

Signed-off-by: Marius Predut 
---
 src/mesa/drivers/dri/i915/i915_state.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i915/i915_state.c 
b/src/mesa/drivers/dri/i915/i915_state.c
index 4c83073..ebb4e9a 100644
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf)

width = (int) (widthf * 2);
width = CLAMP(width, 1, 0xf);
+
+   if (ctx->Line.Width < 1.5 || widthf < 1.5) {
+ /* For 1 pixel line thickness or less, the general
+  * anti-aliasing algorithm gives up, and a garbage line is
+  * generated.  Setting a Line Width of 0.0 specifies the
+  * rasterization of the "thinnest" (one-pixel-wide),
+  * non-antialiased lines.
+  *
+  * Lines rendered with zero Line Width are rasterized using
+  * Grid Intersection Quantization rules as specified by
+  * bspec G45: Volume 2: 3D/Media,
+  * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section
+  */
+  width = 0;
+   }
lis4 |= width << S4_LINE_WIDTH_SHIFT;
 
if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Don't tell the hardware about our UAV access.

2015-09-09 Thread Roland Scheidegger

Am 09.09.2015 um 05:56 schrieb Ian Romanick:
> On 08/15/2015 02:24 AM, Francisco Jerez wrote:
>> Roland Scheidegger  writes:
>>
>>> I guess though you'd need these bits when implementing things like
>>> ARB_fragment_shader_ordering? (Thus stuff actually looks useful but I
>>> don't know if anybody wants to implement it in the near term.)
>>>
>> I believe that could be implemented using the thread dependency
>> mechanism which is independent of the UAV coherency stuff.  BTW I wasn't
>> aware that the extension had become ARB.
> 
> It hasn't.  It's still just an Intel extension.

Ah yes that's right not sure what I was looking at. There's a very
similar NV extension though too (NV_fragment_shader_interlock), so there
may be an ARB extension in the future.

Roland


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Adaptive Vsync

2015-09-09 Thread Alex Deucher

On Sun, Sep 6, 2015 at 4:28 AM, Lauri Kasanen  wrote:
> On Sat, 5 Sep 2015 23:29:05 +
> Albert Freeman  wrote:
>
>> The reply from Eric Anholt made two suggestions that should not be
>> difficult to implement for someone who made the patch in the first
>> place. Why would code be committed when improvements could be easily
>> made? From what I have seen, this kind of thing happens even to
>> experienced mesa developers who know exactly what they are doing.
>>
>> It is much better to arrive at a solution without the issue now than
>> wait a few years (or some other period of time) for it to be replaced
>> by someone who has likely forgotten the details involved with its
>> implementation (or by someone who did not write the code in the first
>> place).
>
> Oh, absolutely - I had no issues with "this needs changing". My issue
> was with the fact it took months to get that. Had I come up with a new
> patch, it would likely have taken a similar time, months again, which
> did not inspire confidence.

You need to drive it.  Developers are busy and can miss patches
especially if the subject is not necessarily an area they are familiar
with.  This is not specific to mesa; the same is true about just about
any open source project.  If you don't get any feedback right away
send a gentle reminder to the list asking if there are any objections
stating your desire to get it committed.  Wait a bit more time and if
there is still no feedback, feel free to commit or ask someone to
commit it.  If you just drive by and dump patches with no follow up,
we have no idea how serious about the patches you are. Were they just
an RFC, did you really want to commit them?  If you don't follow up on
them, developers assume you don't really care about getting them
committed.

Alex

>
> Benjamin, if you want to take it forward, please do.
>
> - Lauri
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v1] i915: fixing driver crashes if too few vertices are submitted

2015-09-09 Thread Marius Predut

Comparison with a signed expression and unsigned value
is converted to unsigned value, reason for minus value is interpreted
as a big unsigned value. For this case the "for" loop
is going into unexpected behavior.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
Signed-off-by: Marius Predut 
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 7be3954..88ecc78 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -627,6 +627,8 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
   LOCAL_VARS;
   GLuint j;
 
+  if(count%4 != 0) return;
+
   INIT(GL_TRIANGLES);
 
   for (j = start; j < count-3; j += 4) {
@@ -1248,7 +1250,7 @@ static GLboolean TAG(validate_render)( struct gl_context 
*ctx,
ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
 }
 else {
-   ok = HAVE_TRIANGLES; /* flatshading is ok. */
+   ok = HAVE_TRIANGLES && (count%4 == 0); /* flatshading is ok. */
 }
 break;
   default:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Adaptive Vsync

2015-09-09 Thread Lauri Kasanen

On Wed, 9 Sep 2015 09:11:50 -0400
Alex Deucher  wrote:

> > Oh, absolutely - I had no issues with "this needs changing". My issue
> > was with the fact it took months to get that. Had I come up with a new
> > patch, it would likely have taken a similar time, months again, which
> > did not inspire confidence.
> 
> You need to drive it.  Developers are busy and can miss patches
> especially if the subject is not necessarily an area they are familiar
> with.  This is not specific to mesa; the same is true about just about
> any open source project.  If you don't get any feedback right away
> send a gentle reminder to the list asking if there are any objections
> stating your desire to get it committed.  Wait a bit more time and if
> there is still no feedback, feel free to commit or ask someone to
> commit it.  If you just drive by and dump patches with no follow up,
> we have no idea how serious about the patches you are. Were they just
> an RFC, did you really want to commit them?  If you don't follow up on
> them, developers assume you don't really care about getting them
> committed.

I had followed up twice on ML in this particular case, and some times
on IRC.

- Lauri
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] i965: Move resolve from state-update notification to draw

2015-09-09 Thread Chris Wilson

Since _mesa_update_state() may be called several times per draw call,
we wish to be judicious about the amount of work we place inside the
intel_update_state() callback. By moving the current HiZ/color resolves
we need before drawing from out of the notify and into the draw itself,
we can save a few percent of overhead in OglBatch7 on Atom class
devices. The catch is that putting the resolve inside the draw call
itself causes recursion so we need a new vbo entry point so that we do
the resolves prior to sealing the vbo context.

No i965 piglits were harmed.
-Chris

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/7] i965: Move the predraw resolve from _mesa_update_state into brw_draw_prims()

2015-09-09 Thread Chris Wilson

Performing the HiZ/color resolves is a fairly heavyweight operation that
we don't need to perform on every state update, only before use the
unresolved surfaces in a draw call. Since we use meta operations to
perform the resolve, we cannot do so from inside brw_draw_prims() itself
(as that will reenter and corrupt the vbo context). Instead we move the
resolve into a vbo->resolve() hook that is called just before the
primitives are drawn.

Signed-off-by: Chris Wilson 
Cc: Jordan Justen 
Cc: Jason Ekstrand 
Cc: Kenneth Graunke 
Cc: Francisco Jerez 
---
 src/mesa/drivers/dri/i965/brw_context.c | 24 
 src/mesa/drivers/dri/i965/brw_draw.c| 28 
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 907b2a0..fdeadda 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -160,36 +160,12 @@ static void
 intel_update_state(struct gl_context * ctx, GLuint new_state)
 {
struct brw_context *brw = brw_context(ctx);
-   struct intel_texture_object *tex_obj;
-   struct intel_renderbuffer *depth_irb;
 
if (ctx->swrast_context)
   _swrast_InvalidateState(ctx, new_state);
_vbo_InvalidateState(ctx, new_state);
 
brw->NewGLState |= new_state;
-
-   _mesa_unlock_context_textures(ctx);
-
-   /* Resolve the depth buffer's HiZ buffer. */
-   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-   if (depth_irb)
-  intel_renderbuffer_resolve_hiz(brw, depth_irb);
-
-   /* Resolve depth buffer and render cache of each enabled texture. */
-   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
-   for (int i = 0; i <= maxEnabledUnit; i++) {
-  if (!ctx->Texture.Unit[i]._Current)
-continue;
-  tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
-  if (!tex_obj || !tex_obj->mt)
-continue;
-  intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
-  intel_miptree_resolve_color(brw, tex_obj->mt);
-  brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
-   }
-
-   _mesa_lock_context_textures(ctx);
 }
 
 #define flushFront(screen)  ((screen)->image.loader ? 
(screen)->image.loader->flushFrontBuffer : 
(screen)->dri2.loader->flushFrontBuffer)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index e5de420..d1b6279 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -342,6 +342,33 @@ brw_merge_inputs(struct brw_context *brw,
}
 }
 
+static void
+brw_draw_resolve(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_object *tex_obj;
+   struct intel_renderbuffer *depth_irb;
+
+   /* Resolve the depth buffer's HiZ buffer. */
+   depth_irb = intel_get_renderbuffer(brw->ctx.DrawBuffer, BUFFER_DEPTH);
+   if (depth_irb)
+  intel_renderbuffer_resolve_hiz(brw, depth_irb);
+
+   /* Resolve depth buffer and render cache of each enabled texture. */
+   int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit;
+   for (int i = 0; i <= maxEnabledUnit; i++) {
+  if (!brw->ctx.Texture.Unit[i]._Current)
+ continue;
+  tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current);
+  if (!tex_obj || !tex_obj->mt)
+ continue;
+  intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
+  intel_miptree_resolve_color(brw, tex_obj->mt);
+  brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
+   }
+}
+
+
 /**
  * \brief Call this after drawing to mark which buffers need resolving
  *
@@ -615,6 +642,7 @@ brw_draw_init(struct brw_context *brw)
 
/* Register our drawing function:
 */
+   vbo->resolve = brw_draw_resolve;
vbo->draw_prims = brw_draw_prims;
 
for (int i = 0; i < VERT_ATTRIB_MAX; i++)
-- 
2.5.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/7] vbo: Reorder clearing NeedFlush flag with performing the flush

2015-09-09 Thread Chris Wilson

When we come to flush the vertices, we need to clear the NeedFlush
flag first before calling into the backend as that backend may trigger
FLUSH_VERTICES() itself (through use of meta) before dirtying the vertex
state. If the NeedFlush flag is still in place, we end up recursing
into the backend again (ad infinitum or until we hit an assertion).

Signed-off-by: Chris Wilson 
Cc: Brian Paul 
Cc: Jordan Justen 
Cc: Jason Ekstrand 
Cc: Kenneth Graunke 
Cc: Francisco Jerez 
---
 src/mesa/vbo/vbo_exec_api.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 138cd60..5486104 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -1187,6 +1187,9 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, 
GLuint flags )
   return;
}
 
+   /* Don't try and recursively flush the current VBO */
+   ctx->Driver.NeedFlush &= ~(FLUSH_UPDATE_CURRENT | flags);
+
/* Flush (draw), and make sure VBO is left unmapped when done */
vbo_exec_FlushVertices_internal(exec, GL_TRUE);
 
-- 
2.5.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/7] i965: Move update_state from brw_meta_resolve_color() to after all resolves

2015-09-09 Thread Chris Wilson

Now that we moved the resolving from out of the _mesa_update_state()
callback we can remove the trick of calling _mesa_update_state() again
from inside the resolver (as we no longer need to adhere to the
_mesa_update_state() contract). However, we still need to make sure that
the context is up to date before proceeding on with the vbo draw.

Signed-off-by: Chris Wilson 
Cc: Jordan Justen 
Cc: Jason Ekstrand 
Cc: Kenneth Graunke 
Cc: Francisco Jerez 
---
 src/mesa/drivers/dri/i965/brw_draw.c| 7 +++
 src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 9 -
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index d1b6279..1269b65 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -366,6 +366,13 @@ brw_draw_resolve(struct gl_context *ctx)
   intel_miptree_resolve_color(brw, tex_obj->mt);
   brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
}
+
+   /* As we may have resolved using meta operations and so left the
+* context in a dirty state, we need to complete the invalidations
+* before continuing on with the vbo draw (and its context checks).
+*/
+   if (ctx->NewState)
+  _mesa_update_state(ctx);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c 
b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index f5ecbb5..dd7f148 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -710,13 +710,4 @@ brw_meta_resolve_color(struct brw_context *brw,
_mesa_DeleteFramebuffers(1, &fbo);
 
_mesa_meta_end(ctx);
-
-   /* We're typically called from intel_update_state() and we're supposed to
-* return with the state all updated to what it was before
-* brw_meta_resolve_color() was called.  The meta rendering will have
-* messed up the state and we need to call _mesa_update_state() again to
-* get back to where we were supposed to be when resolve was called.
-*/
-   if (ctx->NewState)
-  _mesa_update_state(ctx);
 }
-- 
2.5.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/7] i965: Remove redundant update_state from intelReadPixels()

2015-09-09 Thread Chris Wilson

The very first operation performed by _mesa_readpixels() is to call
_mesa_update_state() - we do not need to do so ourselves.

Signed-off-by: Chris Wilson 
Cc: Jordan Justen 
Cc: Jason Ekstrand 
Cc: Kenneth Graunke 
Cc: Francisco Jerez 
---
 src/mesa/drivers/dri/i965/intel_pixel_read.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c 
b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index eb366cd..16dcc55 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -269,14 +269,6 @@ intelReadPixels(struct gl_context * ctx,
intel_prepare_render(brw);
brw->front_buffer_dirty = dirty;
 
-   /* Update Mesa state before calling _mesa_readpixels().
-* XXX this may not be needed since ReadPixels no longer uses the
-* span code.
-*/
-
-   if (ctx->NewState)
-  _mesa_update_state(ctx);
-
_mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
 
/* There's an intel_prepare_render() call in intelSpanRenderStart(). */
-- 
2.5.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/7] i965: Only resolve the textures if a _NEW_STATE state change is flagged

2015-09-09 Thread Chris Wilson

Only walk through the set of enabled TextureUnits looking for a texture
that needs to be resolved if the context state flags a new texture.

Note that this will miss if the client is rendering into a texture that
it is reading from, though that needs explicit barriers (and futhermore
no piglits complain).

Signed-off-by: Chris Wilson 
Cc: Jordan Justen 
Cc: Jason Ekstrand 
Cc: Kenneth Graunke 
Cc: Francisco Jerez 
---
 src/mesa/drivers/dri/i965/brw_draw.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 1269b65..0ffcc24 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -355,16 +355,18 @@ brw_draw_resolve(struct gl_context *ctx)
   intel_renderbuffer_resolve_hiz(brw, depth_irb);
 
/* Resolve depth buffer and render cache of each enabled texture. */
-   int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit;
-   for (int i = 0; i <= maxEnabledUnit; i++) {
-  if (!brw->ctx.Texture.Unit[i]._Current)
- continue;
-  tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current);
-  if (!tex_obj || !tex_obj->mt)
- continue;
-  intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
-  intel_miptree_resolve_color(brw, tex_obj->mt);
-  brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
+   if (brw->NewGLState & _NEW_TEXTURE) {
+  int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit;
+  for (int i = 0; i <= maxEnabledUnit; i++) {
+ if (!brw->ctx.Texture.Unit[i]._Current)
+continue;
+ tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current);
+ if (!tex_obj || !tex_obj->mt)
+continue;
+ intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
+ intel_miptree_resolve_color(brw, tex_obj->mt);
+ brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
+  }
}
 
/* As we may have resolved using meta operations and so left the
-- 
2.5.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback

2015-09-09 Thread Chris Wilson

A common problem with using HiZ and multisampling is that surfaces need
to resolved prior to use. Currently i965 does this inside its state
update hook, but that is a comparatively heavyweight operation that need
not be performed so frequently. The obvious solution (and therefore
fraught with dragons) is to move the HiZ/color resolves into the
brw_draw_prims() - however, the resolves are performed using meta and
end up re-entering brw_draw_prims() corrupting the context state of the
original call. To avoid the meta recursion, we can add a new callback
(vbo->resolve()) into the vbo pipeline that is called just before
vbo->draw().

Signed-off-by: Chris Wilson 
Cc: Brian Paul 
Cc: Jordan Justen 
Cc: Jason Ekstrand 
Cc: Kenneth Graunke 
Cc: Francisco Jerez 
---
 src/mesa/vbo/vbo.h|  1 +
 src/mesa/vbo/vbo_context.c| 19 +++
 src/mesa/vbo/vbo_context.h|  1 +
 src/mesa/vbo/vbo_exec_array.c |  1 +
 src/mesa/vbo/vbo_exec_draw.c  |  5 -
 src/mesa/vbo/vbo_save_draw.c  |  2 ++
 6 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index 2aaff5d..b64c468 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -89,6 +89,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx,
  struct _glapi_table *exec);
 
 
+typedef void (*vbo_resolve_func)( struct gl_context *ctx);
 typedef void (*vbo_draw_func)( struct gl_context *ctx,
   const struct _mesa_prim *prims,
   GLuint nr_prims,
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index e3eb286..1f0b46a 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -148,11 +148,30 @@ static void init_mat_currval(struct gl_context *ctx)
 }
 
 
+static void nop_resolve(struct gl_context *ctx)
+{
+}
+
+static void nop_draw(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index,
+ struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
+ struct gl_buffer_object *indirect)
+{
+}
+
 GLboolean _vbo_CreateContext( struct gl_context *ctx )
 {
struct vbo_context *vbo = CALLOC_STRUCT(vbo_context);
 
ctx->vbo_context = vbo;
+   vbo->draw_prims = nop_draw;
+   vbo->resolve = nop_resolve;
 
/* Initialize the arrayelt helper
 */
diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
index a376efe..c4033ee4 100644
--- a/src/mesa/vbo/vbo_context.h
+++ b/src/mesa/vbo/vbo_context.h
@@ -75,6 +75,7 @@ struct vbo_context {
/* Callback into the driver.  This must always succeed, the driver
 * is responsible for initiating any fallback actions required:
 */
+   vbo_resolve_func resolve;
vbo_draw_func draw_prims;
 };
 
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 34d2c1d..d592ae4 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -549,6 +549,7 @@ vbo_bind_arrays(struct gl_context *ctx)
struct vbo_context *vbo = vbo_context(ctx);
struct vbo_exec_context *exec = &vbo->exec;
 
+   vbo->resolve(ctx);
vbo_draw_method(vbo, DRAW_ARRAYS);
 
if (exec->array.recalculate_inputs) {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 2bfb0c3..fa5b06b 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -388,11 +388,14 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, 
GLboolean keepUnmapped)
 
   if (exec->vtx.copied.nr != exec->vtx.vert_count) {
 struct gl_context *ctx = exec->ctx;
+
+ vbo_context(ctx)->resolve( ctx );
 
 /* Before the update_state() as this may raise _NEW_VARYING_VP_INPUTS
   * from _mesa_set_varying_vp_inputs().
  */
-vbo_exec_bind_arrays( ctx );
+ vbo_draw_method( vbo_context(ctx), DRAW_BEGIN_END);
+ vbo_exec_bind_arrays( ctx );
 
  if (ctx->NewState)
 _mesa_update_state( ctx );
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index b1fd689..2103b8e 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -297,6 +297,8 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void 
*data)
  return;
   }
 
+  vbo_context(ctx)->resolve(ctx);
+
   vbo_bind_vertex_list( ctx, node );
 
   vbo_draw_method(vbo_context(ctx), DRAW_DISPLAY_LIST);
-- 
2.5.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Nouveau] [PATCH mesa 2/3] nv30: Fix color resolving for nv3x cards

2015-09-09 Thread Hans de Goede


Hi,

On 08-09-15 09:53, Hans de Goede wrote:

Hi,

On 07-09-15 21:55, Ilia Mirkin wrote:

May I ask why you're doing 512x512 instead of 1024x1024? These are
already scaled up coordinates, so 1024x1024 should work no? Or is it
because of the seams on the edges? Do those not also appear with
512x512 or does it sample outside of the box?


This is my bad because of the bug fixed by patch 1/3 I had 512x512 in there
for a while, I've moved back and forth between 512 and 1024 a couple of times.

I've also tried 2048 but the hardware does not like that.

I will retest with 1024 and submit a fixed version.

I've not noticed any seams on the edges, even though I've been actively looking
for them.


Separately, why not use this approach on nv40 as well? I can't imagine
the blitter would be faster... does this result in lower quality?


I've the feeling the sifm bilinear filtering is more "blurry" then the
blitter one. I will do some more objective (ahem) tests on nv4x and get
back to you on this.


Ok I've run some tests comparing the rendering between using the blitter
and the sifm and the results seem identical, so one v2 using 1024x1024
blocks and doing so on all nv3x/nv4x cards coming up.

Regards,

Hans
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 7/7] i965: Remove redundant test for NULL intel_texture_object

2015-09-09 Thread Chris Wilson

Having checked whether the base class (gl_texture_object) is NULL, we
know that intel_texture_object itself cannot be NULL.

Signed-off-by: Chris Wilson 
Cc: Jordan Justen 
Cc: Jason Ekstrand 
Cc: Kenneth Graunke 
Cc: Francisco Jerez 
---
 src/mesa/drivers/dri/i965/brw_draw.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 0ffcc24..3cea331 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -346,7 +346,6 @@ static void
 brw_draw_resolve(struct gl_context *ctx)
 {
struct brw_context *brw = brw_context(ctx);
-   struct intel_texture_object *tex_obj;
struct intel_renderbuffer *depth_irb;
 
/* Resolve the depth buffer's HiZ buffer. */
@@ -358,11 +357,15 @@ brw_draw_resolve(struct gl_context *ctx)
if (brw->NewGLState & _NEW_TEXTURE) {
   int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit;
   for (int i = 0; i <= maxEnabledUnit; i++) {
+ struct intel_texture_object *tex_obj;
+
  if (!brw->ctx.Texture.Unit[i]._Current)
 continue;
+
  tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current);
- if (!tex_obj || !tex_obj->mt)
+ if (!tex_obj->mt)
 continue;
+
  intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
  intel_miptree_resolve_color(brw, tex_obj->mt);
  brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
-- 
2.5.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Don't tell the hardware about our UAV access.

2015-09-09 Thread Roland Scheidegger

Am 09.09.2015 um 15:04 schrieb Roland Scheidegger:
> Am 09.09.2015 um 05:56 schrieb Ian Romanick:
>> On 08/15/2015 02:24 AM, Francisco Jerez wrote:
>>> Roland Scheidegger  writes:
>>>
 I guess though you'd need these bits when implementing things like
 ARB_fragment_shader_ordering? (Thus stuff actually looks useful but I
 don't know if anybody wants to implement it in the near term.)

>>> I believe that could be implemented using the thread dependency
>>> mechanism which is independent of the UAV coherency stuff.  BTW I wasn't
>>> aware that the extension had become ARB.
>>
>> It hasn't.  It's still just an Intel extension.
> 
> Ah yes that's right not sure what I was looking at. There's a very
> similar NV extension though too (NV_fragment_shader_interlock), so there
> may be an ARB extension in the future.
> 

Actually it already exists, ARB_fragment_shader_interlock. Was just
missing the different name :-).

Roland


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH mesa v2 1/2] nv30: Fix color resolving for nv3x cards

2015-09-09 Thread Hans de Goede

We do not have a generic blitter on nv3x cards, so we must use the
sifm object for color resolving.

This commit divides the sources and dest surfaces in to tiles which
match the constraints of the sifm object, so that color resolving
will work properly on nv3x cards.

Signed-off-by: Hans de Goede 
---
Changes in v2:
-Use 1024x1024 blocks
-Use the sifm on both nv3x and nv4x cards instead of only on nv3x cards
---
 src/gallium/drivers/nouveau/nv30/nv30_miptree.c | 38 -
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c 
b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
index 76bb8b8..735c718 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -149,14 +149,50 @@ static void
 nv30_resource_resolve(struct nv30_context *nv30,
   const struct pipe_blit_info *info)
 {
+   struct nv30_miptree *src_mt = nv30_miptree(info->src.resource);
struct nv30_rect src, dst;
+   unsigned x, x0, x1, y, y1, w, h;
 
define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
   info->src.box.y, info->src.box.width, info->src.box.height, &src);
define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
   info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);
 
-   nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+   x0 = src.x0;
+   x1 = src.x1;
+   y1 = src.y1;
+
+   /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
+   for (y = src.y0; y < y1; y += h) {
+  h = y1 - y;
+  if (h > 1024)
+ h = 1024;
+
+  src.y0 = 0;
+  src.y1 = h;
+  src.h = h;
+
+  dst.y1 = dst.y0 + (h >> src_mt->ms_y);
+  dst.h = h >> src_mt->ms_y;
+
+  for (x = x0; x < x1; x += w) {
+ w = x1 - x;
+ if (w > 1024)
+w = 1024;
+
+ src.offset = y * src.pitch + x * src.cpp;
+ src.x0 = 0;
+ src.x1 = w;
+ src.w = w;
+
+ dst.offset = (y >> src_mt->ms_y) * dst.pitch +
+  (x >> src_mt->ms_x) * dst.cpp;
+ dst.x1 = dst.x0 + (w >> src_mt->ms_x);
+ dst.w = w >> src_mt->ms_x;
+
+ nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+  }
+   }
 }
 
 void
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH mesa v2 2/2] nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA

2015-09-09 Thread Hans de Goede

Some modern apps try to use msaa without keeping in mind the
restrictions on videomem of older cards. Resulting in dmesg saying:

 [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
 [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
 [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12

Because we are running out of video memory, after which the program
using the msaa visual freezes, and eventually the entire system freezes.

To work around this we do not allow msaa visauls by default and allow
the user to override this via NV30_MAX_MSAA.

Signed-off-by: Hans de Goede 
---
Changes in v2:
-Allow re-enabling msaa by setting NV30_MAX_MSAA in the environment
---
 src/gallium/drivers/nouveau/nv30/nv30_screen.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 7aad26b..4b77f43 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -319,8 +319,28 @@ nv30_screen_is_format_supported(struct pipe_screen 
*pscreen,
 unsigned sample_count,
 unsigned bindings)
 {
-   if (sample_count > 4)
+   unsigned int max_sample_count;
+
+   /*
+* Some modern apps try to use msaa without keeping in mind the
+* restrictions on videomem of older cards. Resulting in dmesg saying:
+* [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
+* [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
+* [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
+*
+* Because we are running out of video memory, after which the program
+* using the msaa visual freezes, and eventually the entire system freezes.
+*
+* To work around this we do not allow msaa visauls by default and allow
+* the user to override this via NV30_MAX_MSAA.
+*/
+   max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
+   if (max_sample_count > 4)
+  max_sample_count = 4;
+
+   if (sample_count > max_sample_count)
   return false;
+
if (!(0x0017 & (1 << sample_count)))
   return false;
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v1] i915: fixing driver crashes if too few vertices are submitted

2015-09-09 Thread Brian Paul


On 09/09/2015 08:16 AM, Marius Predut wrote:

Comparison with a signed expression and unsigned value
is converted to unsigned value, reason for minus value is interpreted
as a big unsigned value. For this case the "for" loop
is going into unexpected behavior.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
Signed-off-by: Marius Predut 
---
  src/mesa/tnl_dd/t_dd_dmatmp.h | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 7be3954..88ecc78 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -627,6 +627,8 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
LOCAL_VARS;
GLuint j;

+  if(count%4 != 0) return;


Should be:

   if (count % 4 != 0)
  return

In addition to following our code style, this allows one to put a 
breakpoint on the return statement if one wanted to catch that 
particular condition when debugging.




+
INIT(GL_TRIANGLES);

for (j = start; j < count-3; j += 4) {
@@ -1248,7 +1250,7 @@ static GLboolean TAG(validate_render)( struct gl_context 
*ctx,
ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
 }
 else {
-   ok = HAVE_TRIANGLES; /* flatshading is ok. */
+   ok = HAVE_TRIANGLES && (count%4 == 0); /* flatshading is ok. */


We put spaces around operators, so "count % 4" please.



 }
 break;
default:



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] i915/aa: fixing anti-aliasing bug for thinnest width lines

2015-09-09 Thread Brian Paul


On 09/09/2015 07:59 AM, Marius Predut wrote:

On PNV platform, for 1 pixel line thickness or less,
the general anti-aliasing algorithm gives up, and a garbage line is generated.
Setting a Line Width of 0.0 specifies the rasterization
of the "thinnest" (one-pixel-wide), non-antialiased lines.
Lines rendered with zero Line Width are rasterized using
Grid Intersection Quantization rules as specified by bspec G45: Volume 2: 
3D/Media,
7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section.

This patch follow the same rules as patches fixing the
https://bugs.freedesktop.org/show_bug.cgi?id=28832
bug.

v1: Eduardo Lima Mitev:  Wrong indentation inside the if clause.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367

Signed-off-by: Marius Predut 
---
  src/mesa/drivers/dri/i915/i915_state.c | 15 +++
  1 file changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i915/i915_state.c 
b/src/mesa/drivers/dri/i915/i915_state.c
index 4c83073..ebb4e9a 100644
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf)

 width = (int) (widthf * 2);
 width = CLAMP(width, 1, 0xf);
+
+   if (ctx->Line.Width < 1.5 || widthf < 1.5) {


By using "1.5f" we can avoid some float/double conversion.



+ /* For 1 pixel line thickness or less, the general
+  * anti-aliasing algorithm gives up, and a garbage line is
+  * generated.  Setting a Line Width of 0.0 specifies the
+  * rasterization of the "thinnest" (one-pixel-wide),
+  * non-antialiased lines.
+  *
+  * Lines rendered with zero Line Width are rasterized using
+  * Grid Intersection Quantization rules as specified by
+  * bspec G45: Volume 2: 3D/Media,


bspec?


+  * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section
+  */
+  width = 0;
+   }
 lis4 |= width << S4_LINE_WIDTH_SHIFT;

 if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v6 2/2] drm/i915: Add soft-pinning API for execbuffer

2015-09-09 Thread Chris Wilson

On Wed, Sep 09, 2015 at 04:07:09PM +0200, Michał Winiarski wrote:
> From: Chris Wilson 
> 
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
> 
> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by 
> Chris
> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> (Not published externally)
> 
> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> to allow eviction of soft-pinned objects when another soft-pinned object used
> by a subsequent execbuffer overlaps reported by Michal Winiarski.
> (Not published externally)
> 
> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> pinned first after an address conflict happens to avoid repeated conflicts in
> rare cases (Suggested by Chris Wilson).  Expanded comment on
> drm_i915_gem_exec_object2.offset to cover this new API.
> 
> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
> (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
> buffers are not considered misplaced without the user specifying
> EXEC_OBJECT_SUPPORTS_48BBADDRESS.  User must assume responsibility for any
> addressing workarounds.  Updated object2.offset field comment again to clarify
> NO_RELOC case (Chris).  checkpatch cleanup.
> 
> v6: Rebase on top of current nightly. Dropped any references to
> EXEC_OBJECT_SUPPORTS_48BBADDRESS since those don't exist in upstream
> yet, and are not a dependency.
> 
> Cc: Chris Wilson 
> Cc: Akash Goel 
> Cc: Vinay Belgaumkar 
> Cc: Michal Winiarski 
> Cc: Zou Nanhai 
> Cc: Kristian Høgsberg 
> Signed-off-by: Chris Wilson 
> Signed-off-by: Thomas Daniel 
> Signed-off-by: Michał Winiarski 

Again, the precursors are not included in this series or upstream, so
NAK.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/9] Implement textureQueryLod in softpipe

2015-09-09 Thread Brian Paul


On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:

The following patches do some preparatory refactoring in softpipe's
sp_tex_sample.{c,h} before actually implementing the feature.

There is also a patch that fixes softpipe's textureLod with bias. I
caught this bug when finished the textureQueryLod implementation - one
of four textureQueryLod tests were failing because of it. I sent
patches adding the testcases with non-zero lod-bias value to piglit
mailing list
(http://lists.freedesktop.org/archives/piglit/2015-September/017018.html).

Also, this is my first contribution to mesa, but I hope I didn't make
any obvious mistakes.

Thanks.

Krzesimir Nowak (9):
   tgsi: Remove trailing backslash in comment
   softpipe: Fix textureLod with nonzero GL_TEXTURE_LOD_BIAS value.
   softpipe: Split compute_lambda_lod into two functions
   softpipe: Put mip_filter_func inside a struct
   softpipe: Split code getting a filter into separate function
   softpipe: Split 3D to 2D coords conversion into separate function.
   softpipe: Add functions for computing mipmap level
   tgsi: Add code for handling lodq opcode
   softpipe: Implement and enable textureQueryLod

  src/gallium/auxiliary/tgsi/tgsi_exec.c   |  46 +++-
  src/gallium/auxiliary/tgsi/tgsi_exec.h   |  10 +
  src/gallium/drivers/softpipe/sp_screen.c |   2 +-
  src/gallium/drivers/softpipe/sp_tex_sample.c | 338 +--
  src/gallium/drivers/softpipe/sp_tex_sample.h |  32 ++-
  5 files changed, 345 insertions(+), 83 deletions(-)



Overall, this looks really nice.  You did a good job of following the 
coding style and documenting your patches.


I just have some nitpicks on some of the patches that I'll post/reply 
on.  Basically, I think a few more things could be const-qualified and 
more comments on the new functions to explain what's going on would be 
helpful.


If you can clean up those minor things I'll commit the series for you.

Thanks!

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/9] softpipe: Put mip_filter_func inside a struct

2015-09-09 Thread Brian Paul


On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:

Putting this function pointer into a struct enables grouping of
several related functions in a single place. For now it is just a
single function, but the struct will be later extended with a
mip_level_func for returning mip level.
---
  src/gallium/drivers/softpipe/sp_tex_sample.c | 28 +---
  src/gallium/drivers/softpipe/sp_tex_sample.h |  5 -
  2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 38bdc93..cd4a659 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2515,6 +2515,12 @@ mip_filter_linear_2d_linear_repeat_POT(
 }
  }

+static struct sp_mip mip_linear = {mip_filter_linear};
+static struct sp_mip mip_nearest = {mip_filter_nearest};
+static struct sp_mip mip_none = {mip_filter_none};
+static struct sp_mip mip_none_no_filter_select = 
{mip_filter_none_no_filter_select};
+static struct sp_mip mip_linear_aniso = {mip_filter_linear_aniso};
+static struct sp_mip mip_linear_2d_linear_repeat_POT = 
{mip_filter_linear_2d_linear_repeat_POT};


Can those be const-qualified too?




  /**
   * Do shadow/depth comparisons.
@@ -2918,18 +2924,18 @@ sample_mip(struct sp_sampler_view *sp_sview,
 const struct filter_args *filt_args,
 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
-   mip_filter_func mip_filter;
+   struct sp_mip *mip = NULL;
 img_filter_func min_img_filter = NULL;
 img_filter_func mag_img_filter = NULL;

 if (filt_args->control == tgsi_sampler_gather) {
-  mip_filter = mip_filter_nearest;
+  mip = &mip_nearest;
min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
PIPE_TEX_FILTER_LINEAR, true);
 } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
-  mip_filter = mip_filter_linear_2d_linear_repeat_POT;
+  mip = &mip_linear_2d_linear_repeat_POT;
 }
 else {
-  mip_filter = sp_samp->mip_filter;
+  mip = &sp_samp->mip;
min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->min_img_filter, false);
if (sp_samp->min_mag_equal) {
   mag_img_filter = min_img_filter;
@@ -2939,8 +2945,8 @@ sample_mip(struct sp_sampler_view *sp_sview,
}
 }

-   mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
-  s, t, p, c0, lod, filt_args, rgba);
+   mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
+   s, t, p, c0, lod, filt_args, rgba);

 if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, 
rgba);
@@ -3239,13 +3245,13 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
 switch (sampler->min_mip_filter) {
 case PIPE_TEX_MIPFILTER_NONE:
if (sampler->min_img_filter == sampler->mag_img_filter)
- samp->mip_filter = mip_filter_none_no_filter_select;
+ samp->mip = mip_none_no_filter_select;
else
- samp->mip_filter = mip_filter_none;
+ samp->mip = mip_none;
break;

 case PIPE_TEX_MIPFILTER_NEAREST:
-  samp->mip_filter = mip_filter_nearest;
+  samp->mip = mip_nearest;
break;

 case PIPE_TEX_MIPFILTER_LINEAR:
@@ -3257,11 +3263,11 @@ softpipe_create_sampler_state(struct pipe_context *pipe,
sampler->max_anisotropy <= 1) {
   samp->min_mag_equal_repeat_linear = TRUE;
}
-  samp->mip_filter = mip_filter_linear;
+  samp->mip = mip_linear;

/* Anisotropic filtering extension. */
if (sampler->max_anisotropy > 1) {
- samp->mip_filter = mip_filter_linear_aniso;
+ samp->mip = mip_linear_aniso;

   /* Override min_img_filter:
* min_img_filter needs to be set to NEAREST since we need to access
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h 
b/src/gallium/drivers/softpipe/sp_tex_sample.h
index 7d1aafc..78541e1 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -128,6 +128,9 @@ struct sp_sampler_view

  };

+struct sp_mip {


I wonder if we could come up with a more descriptive name than "sp_mip". 
 Maybe something like sp_filter_funcs since it contains functions 
related to filtering?




+   mip_filter_func filter;
+};

  struct sp_sampler {
 struct pipe_sampler_state base;
@@ -144,7 +147,7 @@ struct sp_sampler {
 wrap_linear_func linear_texcoord_t;
 wrap_linear_func linear_texcoord_p;

-   mip_filter_func mip_filter;
+   struct sp_mip mip;
  };





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/9] softpipe: Split compute_lambda_lod into two functions

2015-09-09 Thread Brian Paul


On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:

textureQueryLod returns a vec2 with a mipmap information and a
LOD. The latter needs to be not clamped.
---
  src/gallium/drivers/softpipe/sp_tex_sample.c | 55 
  1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 19188b0..38bdc93 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1855,24 +1855,23 @@ compute_lod(const struct pipe_sampler_state *sampler,
  }


-/* Calculate level of detail for every fragment.
+/* Calculate level of detail for every fragment. The computed value is not
+ * clamped into lod_min and lod_max.
   * \param lod_in per-fragment lod_bias or explicit_lod.
   * \param lod results per-fragment lod.
   */
  static inline void
-compute_lambda_lod(struct sp_sampler_view *sp_sview,
-   struct sp_sampler *sp_samp,
-   const float s[TGSI_QUAD_SIZE],
-   const float t[TGSI_QUAD_SIZE],
-   const float p[TGSI_QUAD_SIZE],
-   const float lod_in[TGSI_QUAD_SIZE],
-   enum tgsi_sampler_control control,
-   float lod[TGSI_QUAD_SIZE])
+compute_lambda_lod_not_clamped(struct sp_sampler_view *sp_sview,


Not a big deal, but we use "unclamped" in various other places in Gallium.

Patches 1-3: Reviewed-by: Brian Paul 



+   struct sp_sampler *sp_samp,
+   const float s[TGSI_QUAD_SIZE],
+   const float t[TGSI_QUAD_SIZE],
+   const float p[TGSI_QUAD_SIZE],
+   const float lod_in[TGSI_QUAD_SIZE],
+   enum tgsi_sampler_control control,
+   float lod[TGSI_QUAD_SIZE])
  {
 const struct pipe_sampler_state *sampler = &sp_samp->base;
-   float lod_bias = sampler->lod_bias;
-   float min_lod = sampler->min_lod;
-   float max_lod = sampler->max_lod;
+   const float lod_bias = sampler->lod_bias;
 float lambda;
 uint i;

@@ -1881,24 +1880,23 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
/* XXX FIXME */
 case tgsi_sampler_derivs_explicit:
lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
-  lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
+  lod[0] = lod[1] = lod[2] = lod[3] = lambda;
break;
 case tgsi_sampler_lod_bias:
lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
   lod[i] = lambda + lod_in[i];
- lod[i] = CLAMP(lod[i], min_lod, max_lod);
}
break;
 case tgsi_sampler_lod_explicit:
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
- lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod);
+ lod[i] = lod_in[i] + lod_bias;
}
break;
 case tgsi_sampler_lod_zero:
 case tgsi_sampler_gather:
/* this is all static state in the sampler really need clamp here? */
-  lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
+  lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
break;
 default:
assert(0);
@@ -1906,6 +1904,31 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
 }
  }

+/* Calculate level of detail for every fragment.
+ * \param lod_in per-fragment lod_bias or explicit_lod.
+ * \param lod results per-fragment lod.
+ */
+static inline void
+compute_lambda_lod(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float s[TGSI_QUAD_SIZE],
+   const float t[TGSI_QUAD_SIZE],
+   const float p[TGSI_QUAD_SIZE],
+   const float lod_in[TGSI_QUAD_SIZE],
+   enum tgsi_sampler_control control,
+   float lod[TGSI_QUAD_SIZE])
+{
+   const struct pipe_sampler_state *sampler = &sp_samp->base;
+   const float min_lod = sampler->min_lod;
+   const float max_lod = sampler->max_lod;
+   int i;
+
+   compute_lambda_lod_not_clamped(sp_sview, sp_samp, s, t, p, lod_in, control, 
lod);
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+  lod[i] = CLAMP(lod[i], min_lod, max_lod);
+   }
+}
+
  static inline unsigned
  get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
  {



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/9] softpipe: Split code getting a filter into separate function

2015-09-09 Thread Brian Paul


On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:

This function will be later used by textureQueryLod. The
img_filter_func are optional, because textureQueryLod will not need
them.
---
  src/gallium/drivers/softpipe/sp_tex_sample.c | 53 +++-
  1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index cd4a659..0a3fc20 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2912,6 +2912,42 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
 }
  }

+/**
+ * Get mip filter, and optionally both img min filter and img mag filter
+ */
+static void
+get_filters(struct sp_sampler_view *sp_sview,
+struct sp_sampler *sp_samp,
+enum tgsi_sampler_control control,
+struct sp_mip **mip,
+img_filter_func *min,
+img_filter_func *mag)
+{
+   assert(mip);
+   if (control == tgsi_sampler_gather) {
+  *mip = &mip_nearest;
+  if (min)
+ *min = get_img_filter(sp_sview, &sp_samp->base, 
PIPE_TEX_FILTER_LINEAR, true);
+   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
+  *mip = &mip_linear_2d_linear_repeat_POT;
+   }
+   else {
+  *mip = &sp_samp->mip;
+  if (min || mag) {
+ img_filter_func tmp_filter = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->min_img_filter, false);
+ if (min)
+*min = tmp_filter;
+ if (mag) {
+if (sp_samp->min_mag_equal) {
+   *mag = tmp_filter;
+}
+else {
+   *mag = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->base.mag_img_filter, false);
+}
+ }
+  }
+   }
+}

  static void
  sample_mip(struct sp_sampler_view *sp_sview,
@@ -2928,22 +2964,7 @@ sample_mip(struct sp_sampler_view *sp_sview,
 img_filter_func min_img_filter = NULL;
 img_filter_func mag_img_filter = NULL;

-   if (filt_args->control == tgsi_sampler_gather) {
-  mip = &mip_nearest;
-  min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
PIPE_TEX_FILTER_LINEAR, true);
-   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
-  mip = &mip_linear_2d_linear_repeat_POT;
-   }
-   else {
-  mip = &sp_samp->mip;
-  min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->min_img_filter, false);
-  if (sp_samp->min_mag_equal) {
- mag_img_filter = min_img_filter;
-  }
-  else {
- mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
sp_samp->base.mag_img_filter, false);
-  }
-   }
+   get_filters(sp_sview, sp_samp, filt_args->control, &mip, &min_img_filter, 
&mag_img_filter);

 mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
 s, t, p, c0, lod, filt_args, rgba);



Reviewed-by: Brian Paul 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/9] softpipe: Split 3D to 2D coords conversion into separate function.

2015-09-09 Thread Brian Paul


On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:

This is to avoid tying the conversion to sampling - textureQueryLod
will need to do the conversion too, but it does not do any sampling.

So instead of a "sample" vfunc, there is a "convert" vfunc. The
drawback of this approach is that a "noop" convert copies 3 arrays of
floats. Would be nice to avoid it in some clean way.
---
  src/gallium/drivers/softpipe/sp_tex_sample.c | 73 +---
  src/gallium/drivers/softpipe/sp_tex_sample.h | 20 
  2 files changed, 54 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 0a3fc20..cdec984 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2981,29 +2981,36 @@ sample_mip(struct sp_sampler_view *sp_sview,

  }

+static void
+convert_noop(struct sp_sampler_view *sp_sview,
+ struct sp_sampler *sp_samp,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE])
+{
+   const size_t len = sizeof(float) * TGSI_QUAD_SIZE;
+
+   memcpy(, s, len);
+   memcpy(, t, len);
+   memcpy(, p, len);
+}

-/**
- * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
- * Put face info into the sampler faces[] array.
- */
  static void
-sample_cube(struct sp_sampler_view *sp_sview,
-struct sp_sampler *sp_samp,
-const float s[TGSI_QUAD_SIZE],
-const float t[TGSI_QUAD_SIZE],
-const float p[TGSI_QUAD_SIZE],
-const float c0[TGSI_QUAD_SIZE],
-const float c1[TGSI_QUAD_SIZE],
-const struct filter_args *filt_args,
-float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+convert_cube(struct sp_sampler_view *sp_sview,
+ struct sp_sampler *sp_samp,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE],
+ float [TGSI_QUAD_SIZE])


Could you put some comments on top of these new functions to explain 
what they're doing?


Otherwise,
Reviewed-by: Brian Paul 



  {
 unsigned j;
-   float [4], [4];
-
-   /* Not actually used, but the intermediate steps that do the
-* dereferencing don't know it.
-*/
-   static float [4] = { 0, 0, 0, 0 };

 [0] = c0[0];
 [1] = c0[1];
@@ -3071,8 +3078,6 @@ sample_cube(struct sp_sampler_view *sp_sview,
   }
}
 }
-
-   sample_mip(sp_sview, sp_samp, , , , c0, c1, filt_args, rgba);
  }


@@ -3393,9 +3398,9 @@ softpipe_create_sampler_view(struct pipe_context *pipe,

if (view->target == PIPE_TEXTURE_CUBE ||
view->target == PIPE_TEXTURE_CUBE_ARRAY)
- sview->get_samples = sample_cube;
+ sview->convert_coords = convert_cube;
else {
- sview->get_samples = sample_mip;
+ sview->convert_coords = convert_noop;
}
sview->pot2d = spr->pot &&
   (view->target == PIPE_TEXTURE_2D ||
@@ -3440,13 +3445,22 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
  enum tgsi_sampler_control control,
  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
-   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
+   struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler 
*)tgsi_sampler;
+   struct sp_sampler_view *sp_sview;
+   struct sp_sampler *sp_samp;
 struct filter_args filt_args;
+   float cs[TGSI_QUAD_SIZE];
+   float ct[TGSI_QUAD_SIZE];
+   float cp[TGSI_QUAD_SIZE];
+
 assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
 assert(sampler_index < PIPE_MAX_SAMPLERS);
-   assert(sp_samp->sp_sampler[sampler_index]);
+   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+   sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
 /* always have a view here but texture is NULL if no sampler view was set. 
*/
-   if (!sp_samp->sp_sview[sview_index].base.texture) {
+   if (!sp_sview->base.texture) {
int i, j;
for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
@@ -3456,11 +3470,12 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
return;
 }

+   sp_sview->convert_coords(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
+
 filt_args.control = control;
 filt_args.offset = offset;
-   sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
-

Re: [Mesa-dev] [PATCH 7/9] softpipe: Add functions for computing mipmap level

2015-09-09 Thread Brian Paul


On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:

These functions will be used by textureQueryLod.
---
  src/gallium/drivers/softpipe/sp_tex_sample.c | 100 +--
  src/gallium/drivers/softpipe/sp_tex_sample.h |   7 ++
  2 files changed, 101 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index cdec984..6e639e0 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1937,6 +1937,38 @@ get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
  }

  static void
+clamp_lod(const struct sp_sampler_view *sp_sview,
+  const struct sp_sampler *sp_samp,
+  const float lod[TGSI_QUAD_SIZE],
+  float clamped[TGSI_QUAD_SIZE])


Can you add a comment on this function too?  We also have 
compute_lambda_lod() which does lod clamping so the comments could help 
to explain what's different between them.




+{
+   const float min_lod = sp_samp->base.min_lod;
+   const float max_lod = sp_samp->base.max_lod;
+   const float min_level = sp_sview->base.u.tex.first_level;
+   const float max_level = sp_sview->base.u.tex.last_level;
+   int i;
+
+   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+  float cl = lod[i];
+
+  cl = CLAMP(cl, min_lod, max_lod);
+  /* XXX: Is min_level ever different from 0?
+   */
+  cl = CLAMP(cl, 0, max_level - min_level);
+  clamped[i] = cl;
+   }
+}
+
+static void
+mip_level_linear(struct sp_sampler_view *sp_sview,
+ struct sp_sampler *sp_samp,
+ const float lod[TGSI_QUAD_SIZE],
+ float level[TGSI_QUAD_SIZE])
+{
+   clamp_lod(sp_sview, sp_samp, lod, level);
+}
+
+static void
  mip_filter_linear(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
img_filter_func min_filter,
@@ -1998,6 +2030,23 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
  }


+static void
+mip_level_nearest(struct sp_sampler_view *sp_sview,
+  struct sp_sampler *sp_samp,
+  const float lod[TGSI_QUAD_SIZE],
+  float level[TGSI_QUAD_SIZE])
+{
+   const int first_level = sp_sview->base.u.tex.first_level;
+   int j;
+
+   clamp_lod(sp_sview, sp_samp, lod, level);
+   for (j = 0; j < TGSI_QUAD_SIZE; j++)
+  /* TODO: It should rather be:
+   * level[j] = first_level + ceil(level[j] + 0.5F) - 1.0F;
+   */
+  level[j] = first_level + (int)(level[j] + 0.5F);
+}
+
  /**
   * Compute nearest mipmap level from texcoords.
   * Then sample the texture level for four elements of a quad.
@@ -2050,6 +2099,19 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,


  static void
+mip_level_none(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float lod[TGSI_QUAD_SIZE],
+   float level[TGSI_QUAD_SIZE])
+{
+   int j;
+
+   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+  level[j] = sp_sview->base.u.tex.first_level;
+   }
+}
+
+static void
  mip_filter_none(struct sp_sampler_view *sp_sview,
  struct sp_sampler *sp_samp,
  img_filter_func min_filter,
@@ -2088,6 +2150,15 @@ mip_filter_none(struct sp_sampler_view *sp_sview,


  static void
+mip_level_none_no_filter_select(struct sp_sampler_view *sp_sview,
+struct sp_sampler *sp_samp,
+const float lod[TGSI_QUAD_SIZE],
+float level[TGSI_QUAD_SIZE])
+{
+   mip_level_none(sp_sview, sp_samp, lod, level);
+}
+
+static void
  mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
   struct sp_sampler *sp_samp,
   img_filter_func min_filter,
@@ -2339,6 +2410,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
  }


+static void
+mip_level_linear_aniso(struct sp_sampler_view *sp_sview,
+   struct sp_sampler *sp_samp,
+   const float lod[TGSI_QUAD_SIZE],
+   float level[TGSI_QUAD_SIZE])
+{
+   mip_level_linear(sp_sview, sp_samp, lod, level);
+}
+
  /**
   * Sample 2D texture using an anisotropic filter.
   */
@@ -2450,6 +2530,14 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
 }
  }

+static void
+mip_level_linear_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
+  struct sp_sampler *sp_samp,
+  const float lod[TGSI_QUAD_SIZE],
+  float level[TGSI_QUAD_SIZE])
+{
+   mip_level_linear(sp_sview, sp_samp, lod, level);
+}

  /**
   * Specialized version of mip_filter_linear with hard-wired calls to
@@ -2515,12 +2603,12 @@ mip_filter_linear_2d_linear_repeat_POT(
 }
  }

-static struct sp_mip mip_linear = {mip_filter_linear};
-static struct sp_mip mip_nearest = {mip_filter_nearest};
-static st

Re: [Mesa-dev] [PATCH 8/9] tgsi: Add code for handling lodq opcode

2015-09-09 Thread Brian Paul


On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:

This introduces new vfunc in tgsi_sampler just for this opcode. I
decided against extending get_samples vfunc to return the mipmap level
and LOD - the function's prototype is already too scary and doing the
sampling for textureQueryLod would be a waste of time.
---
  src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 ++
  src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 
  2 files changed, 54 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 9544623..054ad08 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2132,6 +2132,44 @@ exec_tex(struct tgsi_exec_machine *mach,
 }
  }

+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+  const struct tgsi_full_instruction *inst)
+{
+   uint unit;
+   int dim;
+   int i;
+   union tgsi_exec_channel coords[4];
+   const union tgsi_exec_channel *args[Elements(coords)];
+   union tgsi_exec_channel r[2];
+
+   unit = fetch_sampler_unit(mach, inst, 1);
+   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
+   assert(dim <= Elements(coords));
+   /* fetch coordinates */
+   for (i = 0; i < dim; i++) {
+  FETCH(&coords[i], 0, TGSI_CHAN_X + i);
+  args[i] = &coords[i];
+   }
+   for (i = dim; i < Elements(coords); i++) {
+  args[i] = &ZeroVec;
+   }
+   mach->Sampler->query_lod(mach->Sampler, unit, unit,
+args[0]->f,
+args[1]->f,
+args[2]->f,
+args[3]->f,
+tgsi_sampler_lod_none,
+r[0].f,
+r[1].f);
+
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+  store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 
TGSI_EXEC_DATA_FLOAT);
+   }
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+  store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 
TGSI_EXEC_DATA_FLOAT);
+   }
+}

  static void
  exec_txd(struct tgsi_exec_machine *mach,
@@ -4378,6 +4416,12 @@ exec_instruction(
exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
break;

+   case TGSI_OPCODE_LODQ:
+  /* src[0] = texcoord */
+  /* src[1] = sampler unit */
+  exec_lodq(mach, inst);
+  break;
+
 case TGSI_OPCODE_UP2H:
assert (0);
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 5d56aab..556e0af 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -138,6 +138,16 @@ struct tgsi_sampler
   const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
   const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+   void (*query_lod)(struct tgsi_sampler *tgsi_sampler,


Can tgsi_sampler be const-qualified?



+ const unsigned sview_index,
+ const unsigned sampler_index,
+ const float s[TGSI_QUAD_SIZE],
+ const float t[TGSI_QUAD_SIZE],
+ const float p[TGSI_QUAD_SIZE],
+ const float c0[TGSI_QUAD_SIZE],
+ enum tgsi_sampler_control control,
+ float mipmap[TGSI_QUAD_SIZE],
+ float lod[TGSI_QUAD_SIZE]);
  };

  #define TGSI_EXEC_NUM_TEMPS   4096



Reviewed-by: Brian Paul 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 9/9] softpipe: Implement and enable textureQueryLod

2015-09-09 Thread Brian Paul


On 09/09/2015 04:35 AM, Krzesimir Nowak wrote:

Passes the shader piglit tests and introduces no regressions.

This commit finally makes use of the refactoring in previous
commits.
---
  src/gallium/drivers/softpipe/sp_screen.c |  2 +-
  src/gallium/drivers/softpipe/sp_tex_sample.c | 47 +++-
  2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_screen.c 
b/src/gallium/drivers/softpipe/sp_screen.c
index 0bfd9c3..7ca8a67 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -193,9 +193,9 @@ softpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
 case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;
 case PIPE_CAP_TEXTURE_GATHER_SM5:
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
return 1;
 case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-   case PIPE_CAP_TEXTURE_QUERY_LOD:
 case PIPE_CAP_SAMPLE_SHADING:
 case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
return 0;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 6e639e0..499c8f9 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -3566,6 +3566,51 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
 sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba);
  }

+static void
+sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler,
+  const unsigned sview_index,
+  const unsigned sampler_index,
+  const float s[TGSI_QUAD_SIZE],
+  const float t[TGSI_QUAD_SIZE],
+  const float p[TGSI_QUAD_SIZE],
+  const float c0[TGSI_QUAD_SIZE],
+  enum tgsi_sampler_control control,
+  float mipmap[TGSI_QUAD_SIZE],
+  float lod[TGSI_QUAD_SIZE])
+{
+   static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 };
+
+   struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler 
*)tgsi_sampler;


Can that be const-qualified, and the tgsi_sampler function parameter?

Ideally, we'd also have a cast-wrapper function to use instead of an 
inline cast here and elsewhere.  That could be done as a follow-up.




+   struct sp_sampler_view *sp_sview;
+   struct sp_sampler *sp_samp;
+   struct sp_mip *mip;
+   int i;
+   float cs[TGSI_QUAD_SIZE];
+   float ct[TGSI_QUAD_SIZE];
+   float cp[TGSI_QUAD_SIZE];
+
+   assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+   assert(sampler_index < PIPE_MAX_SAMPLERS);
+   assert(sp_tgsi_samp->sp_sampler[sampler_index]);
+
+   sp_sview = &sp_tgsi_samp->sp_sview[sview_index];
+   sp_samp = sp_tgsi_samp->sp_sampler[sampler_index];
+   /* always have a view here but texture is NULL if no sampler view was set. 
*/
+   if (!sp_sview->base.texture) {
+  for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+ mipmap[i] = 0.0f;
+ lod[i] = 0.0f;
+  }
+  return;
+   }
+
+   sp_sview->convert_coords(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp);
+
+   compute_lambda_lod_not_clamped(sp_sview, sp_samp,
+  cs, ct, cp, lod_in, control, lod);
+   get_filters(sp_sview, sp_samp, control, &mip, NULL, NULL);
+   mip->level(sp_sview, sp_samp, lod, mipmap);
+}

  static void
  sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
@@ -3602,7 +3647,7 @@ sp_create_tgsi_sampler(void)
 samp->base.get_dims = sp_tgsi_get_dims;
 samp->base.get_samples = sp_tgsi_get_samples;
 samp->base.get_texel = sp_tgsi_get_texel;
+   samp->base.query_lod = sp_tgsi_query_lod;

 return samp;
  }
-



Reviewed-by: Brian Paul 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v6 2/2] drm/i915: Add soft-pinning API for execbuffer

2015-09-09 Thread Michał Winiarski

From: Chris Wilson 

Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.

v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)

v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)

v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson).  Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.

v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
(Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
buffers are not considered misplaced without the user specifying
EXEC_OBJECT_SUPPORTS_48BBADDRESS.  User must assume responsibility for any
addressing workarounds.  Updated object2.offset field comment again to clarify
NO_RELOC case (Chris).  checkpatch cleanup.

v6: Rebase on top of current nightly. Dropped any references to
EXEC_OBJECT_SUPPORTS_48BBADDRESS since those don't exist in upstream
yet, and are not a dependency.

Cc: Chris Wilson 
Cc: Akash Goel 
Cc: Vinay Belgaumkar 
Cc: Michal Winiarski 
Cc: Zou Nanhai 
Cc: Kristian Høgsberg 
Signed-off-by: Chris Wilson 
Signed-off-by: Thomas Daniel 
Signed-off-by: Michał Winiarski 
---
 drivers/gpu/drm/i915/i915_dma.c|  3 ++
 drivers/gpu/drm/i915/i915_drv.h|  3 ++
 drivers/gpu/drm/i915/i915_gem.c| 52 ++
 drivers/gpu/drm/i915/i915_gem_evict.c  | 39 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++--
 include/uapi/drm/i915_drm.h| 10 +-
 6 files changed, 106 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 066a0ef..bd619af 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_RESOURCE_STREAMER:
value = HAS_RESOURCE_STREAMER(dev);
break;
+   case I915_PARAM_HAS_EXEC_SOFTPIN:
+   value = 1;
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2ef83c5..8eb01e0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2815,6 +2815,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 #define PIN_OFFSET_BIAS(1<<3)
 #define PIN_USER   (1<<4)
 #define PIN_UPDATE (1<<5)
+#define PIN_OFFSET_FIXED   (1<<6)
 #define PIN_OFFSET_MASK (~4095)
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
@@ -3157,6 +3158,8 @@ int __must_check i915_gem_evict_something(struct 
drm_device *dev,
  unsigned long start,
  unsigned long end,
  unsigned flags);
+int __must_check
+i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 int i915_gem_evict_everything(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 41263cd..bb2ff81 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3437,22 +3437,42 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
if (IS_ERR(vma))
goto err_unpin;
 
+   if (flags & PIN_OFFSET_FIXED) {
+   uint64_t offset = flags & PIN_OFFSET_MASK;
+
+   if (offset & (alignment - 1)) {
+   ret = -EINVAL;
+   goto err_free_vma;
+   }
+   vma->node.start = offset;
+   vma->node.size = size;
+   vma->node.color = obj->cache_level;
+   ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+   if (ret) {
+   ret = i915_gem_evict_for_vma(vma);
+   if (ret == 0)
+   ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+   }
+   if (ret)
+   goto err_free_vma;
+   } else {
 search_free:
-   ret = drm_mm_insert_node_in_range_generic(&vm->mm

[Mesa-dev] [RFC libdrm] intel: Add support for softpin

2015-09-09 Thread Michał Winiarski

Softpin allows userspace to take greater control of GPU virtual address
space and eliminates the need of relocations. It can also be used to
mirror addresses between GPU and CPU (shared virtual memory).
Calls to drm_intel_bo_emit_reloc are still required to build the list of
drm_i915_gem_exec_objects at exec time, but no entries in relocs are
created. Self-relocs don't make any sense for softpinned objects and can
indicate a programming errors, thus are forbidden. Softpinned objects
are marked by asterisk in debug dumps.

Cc: Thomas Daniel 
Cc: Kristian Høgsberg 
Cc: Zou Nanhai 
Cc: Michel Thierry 
Cc: Ben Widawsky 
Cc: Chris Wilson 
Signed-off-by: Michał Winiarski 
---
 include/drm/i915_drm.h|   4 +-
 intel/intel_bufmgr.c  |   9 +++
 intel/intel_bufmgr.h  |   1 +
 intel/intel_bufmgr_gem.c  | 176 --
 intel/intel_bufmgr_priv.h |   7 ++
 5 files changed, 173 insertions(+), 24 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ded43b1..2b99fc6 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -350,6 +350,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_REVISION  32
 #define I915_PARAM_SUBSLICE_TOTAL   33
 #define I915_PARAM_EU_TOTAL 34
+#define I915_PARAM_HAS_EXEC_SOFTPIN 37
 
 typedef struct drm_i915_getparam {
int param;
@@ -680,7 +681,8 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT  (1<<1)
 #define EXEC_OBJECT_WRITE  (1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_PINNED (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
__u64 flags;
 
__u64 rsvd1;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 14ea9f9..bd92335 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -261,6 +261,15 @@ drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * 
tiling_mode,
 }
 
 int
+drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
+{
+   if (bo->bufmgr->bo_set_softpin_offset)
+   return bo->bufmgr->bo_set_softpin_offset(bo, offset);
+
+   return -ENODEV;
+}
+
+int
 drm_intel_bo_disable_reuse(drm_intel_bo *bo)
 {
if (bo->bufmgr->bo_disable_reuse)
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 95eecb8..62ea4a0 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -164,6 +164,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * 
tiling_mode,
 int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name);
 int drm_intel_bo_busy(drm_intel_bo *bo);
 int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
+int drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset);
 
 int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
 int drm_intel_bo_is_reusable(drm_intel_bo *bo);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 2723e21..615741c 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -184,6 +184,13 @@ struct _drm_intel_bo_gem {
drm_intel_reloc_target *reloc_target_info;
/** Number of entries in relocs */
int reloc_count;
+   /** Array of BOs that are referenced by this buffer and will be 
softpinned */
+   drm_intel_bo **softpin_target;
+   /** Number softpinned BOs that are referenced by this buffer */
+   int softpin_target_count;
+   /** Maximum amount of softpinned BOs that are referenced by this buffer 
*/
+   int softpin_target_size;
+
/** Mapped address for the buffer, saved across map/unmap cycles */
void *mem_virtual;
/** GTT virtual address for the buffer, saved across map/unmap cycles */
@@ -237,6 +244,11 @@ struct _drm_intel_bo_gem {
bool is_userptr;
 
/**
+* Whether this buffer is softpinned at offset specified by the user
+*/
+   bool is_softpin;
+
+   /**
 * Size in bytes of this buffer and its relocation descendents.
 *
 * Used to avoid costly tree walking in
@@ -384,8 +396,9 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem 
*bufmgr_gem)
drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
 
-   if (bo_gem->relocs == NULL) {
-   DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
+   if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
+   DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
+   bo_gem->is_softpin ? "*" : "",
bo_gem->name);
continue;
}
@@ -395,16 +408,33 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem 
*bufmgr_gem)
drm_intel_bo_gem *target_gem =
(drm_intel_bo_gem *) target_bo;
 
-   DBG("%2d: %d (%s)@0x%08llx ->

[Mesa-dev] [PATCH 1/2] drm/i915/gtt: Allow adventurous users to select enable_ppgtt=3

2015-09-09 Thread Michał Winiarski

While support for 48b ppgtt is here, parameter enabling it is not known
to the sanitize function. Let's update it to allow selecting
full_48bit_ppgtt using module parameter.

Cc: Michel Thierry 
Cc: Mika Kuoppala 
Signed-off-by: Michał Winiarski 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8786281..f598d63 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -104,9 +104,11 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)
 {
bool has_aliasing_ppgtt;
bool has_full_ppgtt;
+   bool has_full_48bit_ppgtt;
 
has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
+   has_full_48bit_ppgtt = INTEL_INFO(dev)->gen >= 9 || IS_BROADWELL(dev);
 
if (intel_vgpu_active(dev))
has_full_ppgtt = false; /* emulation is too hard */
@@ -125,6 +127,9 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, 
int enable_ppgtt)
if (enable_ppgtt == 2 && has_full_ppgtt)
return 2;
 
+   if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
+   return 3;
+
 #ifdef CONFIG_INTEL_IOMMU
/* Disable ppgtt on SNB if VT-d is on. */
if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [RFC libdrm] intel: Softpin support

2015-09-09 Thread Michał Winiarski

The goal of this series is to start a discussion whether the interface and
implementation of softpin support proposed for libdrm is acceptable by all
interested parties.
The i915 patches are added so that it's easy to apply the series on latest
drm-intel-nightly and libdrm master and start using softpin.

Michał Winiarski (1):
  drm/i915/gtt: Allow adventurous users to select enable_ppgtt=3
is not strictly necessary for softpin itself, but it is needed for SVM usecase.

Chris Wilson (1):
  drm/i915: Add soft-pinning API for execbuffer
is just v5 from Thomas Daniel rebased on top of current nightly.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa v2 1/2] nv30: Fix color resolving for nv3x cards

2015-09-09 Thread Ilia Mirkin

On Wed, Sep 9, 2015 at 9:52 AM, Hans de Goede  wrote:
> We do not have a generic blitter on nv3x cards, so we must use the
> sifm object for color resolving.
>
> This commit divides the sources and dest surfaces in to tiles which
> match the constraints of the sifm object, so that color resolving
> will work properly on nv3x cards.
>
> Signed-off-by: Hans de Goede 
> ---
> Changes in v2:
> -Use 1024x1024 blocks
> -Use the sifm on both nv3x and nv4x cards instead of only on nv3x cards

Thanks, pushed.

> ---
>  src/gallium/drivers/nouveau/nv30/nv30_miptree.c | 38 
> -
>  1 file changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c 
> b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
> index 76bb8b8..735c718 100644
> --- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
> +++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
> @@ -149,14 +149,50 @@ static void
>  nv30_resource_resolve(struct nv30_context *nv30,
>const struct pipe_blit_info *info)
>  {
> +   struct nv30_miptree *src_mt = nv30_miptree(info->src.resource);
> struct nv30_rect src, dst;
> +   unsigned x, x0, x1, y, y1, w, h;
>
> define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
>info->src.box.y, info->src.box.width, info->src.box.height, &src);
> define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
>info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);
>
> -   nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
> +   x0 = src.x0;
> +   x1 = src.x1;
> +   y1 = src.y1;
> +
> +   /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
> +   for (y = src.y0; y < y1; y += h) {
> +  h = y1 - y;
> +  if (h > 1024)
> + h = 1024;
> +
> +  src.y0 = 0;
> +  src.y1 = h;
> +  src.h = h;
> +
> +  dst.y1 = dst.y0 + (h >> src_mt->ms_y);
> +  dst.h = h >> src_mt->ms_y;
> +
> +  for (x = x0; x < x1; x += w) {
> + w = x1 - x;
> + if (w > 1024)
> +w = 1024;
> +
> + src.offset = y * src.pitch + x * src.cpp;
> + src.x0 = 0;
> + src.x1 = w;
> + src.w = w;
> +
> + dst.offset = (y >> src_mt->ms_y) * dst.pitch +
> +  (x >> src_mt->ms_x) * dst.cpp;
> + dst.x1 = dst.x0 + (w >> src_mt->ms_x);
> + dst.w = w >> src_mt->ms_x;
> +
> + nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
> +  }
> +   }
>  }
>
>  void
> --
> 2.4.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH mesa v2 2/2] nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA

2015-09-09 Thread Ilia Mirkin

On Wed, Sep 9, 2015 at 9:52 AM, Hans de Goede  wrote:
> Some modern apps try to use msaa without keeping in mind the
> restrictions on videomem of older cards. Resulting in dmesg saying:
>
>  [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
>  [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
>  [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
>
> Because we are running out of video memory, after which the program
> using the msaa visual freezes, and eventually the entire system freezes.
>
> To work around this we do not allow msaa visauls by default and allow
> the user to override this via NV30_MAX_MSAA.
>
> Signed-off-by: Hans de Goede 
> ---
> Changes in v2:
> -Allow re-enabling msaa by setting NV30_MAX_MSAA in the environment

Moved this to screen init and pushed. is_format_supported is called a
lot, not sure how expensive getenv() is, don't want to find out.

> ---
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c | 22 +-
>  1 file changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
> b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> index 7aad26b..4b77f43 100644
> --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> @@ -319,8 +319,28 @@ nv30_screen_is_format_supported(struct pipe_screen 
> *pscreen,
>  unsigned sample_count,
>  unsigned bindings)
>  {
> -   if (sample_count > 4)
> +   unsigned int max_sample_count;
> +
> +   /*
> +* Some modern apps try to use msaa without keeping in mind the
> +* restrictions on videomem of older cards. Resulting in dmesg saying:
> +* [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
> +* [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
> +* [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
> +*
> +* Because we are running out of video memory, after which the program
> +* using the msaa visual freezes, and eventually the entire system 
> freezes.
> +*
> +* To work around this we do not allow msaa visauls by default and allow
> +* the user to override this via NV30_MAX_MSAA.
> +*/
> +   max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
> +   if (max_sample_count > 4)
> +  max_sample_count = 4;
> +
> +   if (sample_count > max_sample_count)
>return false;
> +
> if (!(0x0017 & (1 << sample_count)))
>return false;
>
> --
> 2.4.3
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] r600g: update num_dw in scissor_enable workaround

2015-09-09 Thread Grazvydas Ignotas

"r600g: apply disable workaround on all scissors" forgot to update
num_dw, fix it.

Fixes: fbb423b433 "r600g: apply disable workaround on all scissors"
Reported-and-tested-by: Markus Trippelsdorf 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91921
---
 src/gallium/drivers/r600/r600_state_common.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index ae13411..cd70889 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -374,6 +374,7 @@ static void r600_bind_rs_state(struct pipe_context *ctx, 
void *state)
rs->scissor_enable != rctx->scissor.enable) {
rctx->scissor.enable = rs->scissor_enable;
rctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
+   rctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4;
r600_mark_atom_dirty(rctx, &rctx->scissor.atom);
}
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Disallow fast blit paths for CopyTexImage with PixelTransfer ops

2015-09-09 Thread Chris Wilson

On Tue, Sep 08, 2015 at 01:16:11PM -0700, Ian Romanick wrote:
> On 09/06/2015 09:37 AM, Chris Wilson wrote:
> > glCopyTexImage behaves similarly to glReadPixels with respect to the
> > pixel transfer operations. Therefore if any are set we cannot use the
> > simply blit fast paths.
> 
> Do we have any test cases that hit these paths?  If not, we should
> probably con someone into writing some...

It looks like piglit should have coverage of PixelZoom in
tests/fbo/fbo-copypix.c, but only it ever uses a zoom of (1, 1). I could
not spot any of the other transfer operations being set for
glCopyTex*Image.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Disallow fast blit paths for CopyTexImage with PixelTransfer ops

2015-09-09 Thread Ilia Mirkin

On Wed, Sep 9, 2015 at 12:44 PM, Chris Wilson  wrote:
> On Tue, Sep 08, 2015 at 01:16:11PM -0700, Ian Romanick wrote:
>> On 09/06/2015 09:37 AM, Chris Wilson wrote:
>> > glCopyTexImage behaves similarly to glReadPixels with respect to the
>> > pixel transfer operations. Therefore if any are set we cannot use the
>> > simply blit fast paths.
>>
>> Do we have any test cases that hit these paths?  If not, we should
>> probably con someone into writing some...
>
> It looks like piglit should have coverage of PixelZoom in
> tests/fbo/fbo-copypix.c, but only it ever uses a zoom of (1, 1). I could
> not spot any of the other transfer operations being set for
> glCopyTex*Image.

There was someone who filed a bug about pixel zoom not behaving well
when the source data was > max texture size (and a small zoom factor
obviously). I believe that he added patches and tests into some
bugzilla entry... maybe even sent them to the list. This would have
been about a year ago.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Advertise 65536 for GL_MAX_UNIFORM_BLOCK_SIZE.

2015-09-09 Thread Ian Romanick

On 09/08/2015 10:43 PM, Kenneth Graunke wrote:
> On Tuesday, September 08, 2015 08:50:41 PM Ian Romanick wrote:
>> On 09/08/2015 06:04 PM, Kenneth Graunke wrote:
>>> Our old value of 16384 is the minimum value.  DirectX apparently
>>> requires 65536 at a minimum; that's also what nVidia and the Intel
>>> Windows driver advertise.  AMD advertises MAX_INT.
>>>
>>> Ilia Mirkin noticed that "Shadow Warrior" uses UBOs larger than 16k
>>> on Nouveau, which advertises 65536 bytes for this limit.  Traces
>>> captured on Nouveau don't work on i965 because our lower limit causes
>>> the GLSL linker to reject the captured shaders.  While this isn't
>>> important in and of itself, it does suggest that raising the limit
>>> would be beneficial.
>>>
>>> We can read linear buffers up to 2^27 bytes in size, so raising this
>>> should be safe; we could probably even go larger.  For now, matching
>>> nVidia and Intel/Windows seems like a good plan.
>>>
>>> We have to reinitialize MaxCombinedUniformComponents as core Mesa will
>>> have set it based on a stale value for MaxUniformBlockSize.
>>>
>>> Signed-off-by: Kenneth Graunke 
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_context.c | 9 +
>>>  1 file changed, 9 insertions(+)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
>>> b/src/mesa/drivers/dri/i965/brw_context.c
>>> index 907b2a0..7c1c133 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_context.c
>>> +++ b/src/mesa/drivers/dri/i965/brw_context.c
>>> @@ -323,6 +323,15 @@ brw_initialize_context_constants(struct brw_context 
>>> *brw)
>>>  
>>> ctx->Const.StripTextureBorder = true;
>>>  
>>> +   ctx->Const.MaxUniformBlockSize = 65536;
>>> +   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
>>> +  struct gl_program_constants *prog = &ctx->Const.Program[i];
>>> +  prog->MaxUniformBlocks = 12;
>>
>> Is this actually necessary?  It looks like this is that value already
>> set in init_program_limits.  Either way, this patch is
>>
>> Reviewed-by: Ian Romanick 
> 
> You're right, setting prog->MaxUniformBlocks = 12 is unnecessary.
> 
> We do need to reset prog->MaxCombinedUniformComponents since
> init_program_limits computed a value using a stale MaxUniformBlockSize.
> 
> I figured since the three values are related, we may as well just
> explicitly initialize them all.  But if you'd prefer I take it out,
> I'm fine with that too.  What do you prefer?

təˈmeɪtoʊ / təˈmɑːtoʊ.  I mostly wanted to be sure I wasn't missing
something.

> Thanks for the quick review!
> 
> --Ken
> 
>>> +  prog->MaxCombinedUniformComponents =
>>> + prog->MaxUniformComponents +
>>> + ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
>>> +   }
>>> +
>>> ctx->Const.MaxDualSourceDrawBuffers = 1;
>>> ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
>>> ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 
>>> max_samplers;
>>>
>>




signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] i965/fs: Fix hang on IVB and VLV with image format mismatch.

2015-09-09 Thread Ian Romanick

On 09/09/2015 05:30 AM, Francisco Jerez wrote:
> Ian Romanick  writes:
> 
>> On 09/03/2015 06:03 AM, Francisco Jerez wrote:
>>> IVB and VLV hang sporadically when an untyped surface read or write
>>> message is used to access a surface of format other than RAW, as may
>>> happen when there is a mismatch between the format qualifier of the
>>> image uniform and the format of the actual image bound to the
>>> pipeline.  According to the spec this condition gives undefined
>>> results but may not lead to program termination (which is one of the
>>> possible outcomes of the hang).  Fix it by checking at runtime whether
>>> the surface is of the right type.
>>>
>>> Fixes the "arb_shader_image_load_store.invalid/format mismatch" piglit
>>> subtest.
>>>
>>> Reported-by: Mark Janes 
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91718
>>> CC: mesa-sta...@lists.freedesktop.org
>>> ---
>>>  .../drivers/dri/i965/brw_fs_surface_builder.cpp| 42 
>>> +++---
>>>  1 file changed, 38 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>>> index f60afc9..57ce87f 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>>> @@ -313,12 +313,42 @@ namespace {
>>>  
>>> namespace image_validity {
>>>/**
>>> +   * Check whether the bound image is suitable for untyped access.
>>> +   */
>>> +  brw_predicate
>>> +  emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
>>> +   brw_predicate pred)
>>> +  {
>>> + const brw_device_info *devinfo = bld.shader->devinfo;
>>> + const fs_reg stride = offset(image, bld, 
>>> BRW_IMAGE_PARAM_STRIDE_OFFSET);
>>> +
>>> + if (devinfo->gen == 7 && !devinfo->is_haswell) {
>>> +/* Check whether the first stride component (i.e. the Bpp 
>>> value)
>>> + * is greater than four, what on Gen7 indicates that a surface 
>>> of
>>> + * type RAW has been bound for untyped access.  Reading or 
>>> writing
>>> + * to a surface of type other than RAW using untyped surface
>>> + * messages causes a hang on IVB and VLV.
>>> + */
>>> +set_predicate(pred,
>>> +  bld.CMP(bld.null_reg_ud(), stride, fs_reg(4),
>>> +  BRW_CONDITIONAL_G));
>>> +
>>> +return BRW_PREDICATE_NORMAL;
>>> + } else {
>>> +/* More recent generations handle the format mismatch
>>> + * gracefully.
>>> + */
>>> +return pred;
>>> + }
>>> +  }
>>> +
>>> +  /**
>>> * Check whether there is an image bound at the given index and write
>>> * the comparison result to f0.0.  Returns an appropriate predication
>>> * mode to use on subsequent image operations.
>>> */
>>>brw_predicate
>>> -  emit_surface_check(const fs_builder &bld, const fs_reg &image)
>>> +  emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
>>
>> This change seems spurious (and also reasonable).
>>
> The problem is that this patch introduces a new kind of surface check
> applicable to untyped surface reads and writes only, so it would have
> been confusing to keep the other surface check which is applicable to
> atomics only with its previous rather unspecific name.
> 
>>>{
>>>   const brw_device_info *devinfo = bld.shader->devinfo;
>>>   const fs_reg size = offset(image, bld, 
>>> BRW_IMAGE_PARAM_SIZE_OFFSET);
>>> @@ -895,7 +925,9 @@ namespace brw {
>>>   * surface read on the result,
>>>   */
>>>  const brw_predicate pred =
>>> -   emit_bounds_check(bld, image, saddr, dims);
>>> +   emit_untyped_image_check(bld, image,
>>> +emit_bounds_check(bld, image,
>>> +  saddr, dims));
>>
>> These appear to be the only two users of emit_bounds_check... shouldn't
>> the bounds still be tested?
>>
> Yes, they are still.

Ah... I completely missed that emit_bounds_check was moved into a
parameter of the call to emit_untyped_image_check.

Reviewed-by: Ian Romanick 

>>>  
>>>  /* and they don't know about surface coordinates, we need to
>>>   * convert them to a raw memory offset.
>>> @@ -1041,7 +1073,9 @@ namespace brw {
>>>  * the surface write on the result,
>>>  */
>>> const brw_predicate pred =
>>> -  emit_bounds_check(bld, image, saddr, dims);
>>> +  emit_untyped_image_check(bld, image,
>>> +   emit_bounds_check(bld, image,
>>> +

Re: [Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback

2015-09-09 Thread Kenneth Graunke

On Wednesday, September 09, 2015 02:38:56 PM Chris Wilson wrote:
> A common problem with using HiZ and multisampling is that surfaces need
> to resolved prior to use. Currently i965 does this inside its state
> update hook, but that is a comparatively heavyweight operation that need
> not be performed so frequently. The obvious solution (and therefore
> fraught with dragons) is to move the HiZ/color resolves into the
> brw_draw_prims() - however, the resolves are performed using meta and
> end up re-entering brw_draw_prims() corrupting the context state of the
> original call. To avoid the meta recursion, we can add a new callback
> (vbo->resolve()) into the vbo pipeline that is called just before
> vbo->draw().
> 
> Signed-off-by: Chris Wilson 
> Cc: Brian Paul 
> Cc: Jordan Justen 
> Cc: Jason Ekstrand 
> Cc: Kenneth Graunke 
> Cc: Francisco Jerez 
> ---
>  src/mesa/vbo/vbo.h|  1 +
>  src/mesa/vbo/vbo_context.c| 19 +++
>  src/mesa/vbo/vbo_context.h|  1 +
>  src/mesa/vbo/vbo_exec_array.c |  1 +
>  src/mesa/vbo/vbo_exec_draw.c  |  5 -
>  src/mesa/vbo/vbo_save_draw.c  |  2 ++
>  6 files changed, 28 insertions(+), 1 deletion(-)

What problem are you trying to solve with this patch series?
Are you trying to fix bugs?  If so, what triggers them?
Are you trying to improve performance?  If so, do you have any data
demonstrating that it benefits some workload?


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC 1/2] nir: Add a new lowering pass nir_lower_vec_and_coalesce

2015-09-09 Thread Jason Ekstrand

On Sep 8, 2015 23:27, "Eduardo Lima Mitev"  wrote:
>
> This pass will propagate the destination components of a vecN
instructions,
> as destination of the instructions that define its sources; if certain
> conditions are met.
>
> If all the components of the destination register in the vecN instruction
> can be propagated, the instruction is removed. Otherwise, a new, reduced
> vecN instruction is emitted with the channels that remained.
>
> This effectively coalesces registers and reduces indirection.
>
> By now, this pass will only propagate to ALU instructions, but it could
> be extended to include other instructions like load_input intrinsic.
>
> It also propagates to instructions within the same block as the vecN
> instruction. But it could be made to work cross-block in the future,
> though there are non-trivial issues with this like considering
> registers that are written in different branches of a conditional.
> More analysis is needed to correctly cover these cases.
>
> This pass works on a NIR shader in final form (after SSA), and is
> expected to run before nir_lower_vec_to_movs().
> ---
>  src/glsl/Makefile.sources |   1 +
>  src/glsl/nir/nir.h|   1 +
>  src/glsl/nir/nir_lower_vec_and_coalesce.c | 301
++
>  3 files changed, 303 insertions(+)
>  create mode 100644 src/glsl/nir/nir_lower_vec_and_coalesce.c
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index c422303..015f242 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -48,6 +48,7 @@ NIR_FILES = \
> nir/nir_lower_vars_to_ssa.c \
> nir/nir_lower_var_copies.c \
> nir/nir_lower_vec_to_movs.c \
> +   nir/nir_lower_vec_and_coalesce.c \
> nir/nir_metadata.c \
> nir/nir_normalize_cubemap_coords.c \
> nir/nir_opt_constant_folding.c \
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 3c375f3..6a89f1d 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1786,6 +1786,7 @@ void nir_lower_vars_to_ssa(nir_shader *shader);
>  void nir_remove_dead_variables(nir_shader *shader);
>
>  void nir_lower_vec_to_movs(nir_shader *shader);
> +void nir_lower_vec_and_coalesce(nir_shader *shader);
>  void nir_lower_alu_to_scalar(nir_shader *shader);
>  void nir_lower_load_const_to_scalar(nir_shader *shader);
>
> diff --git a/src/glsl/nir/nir_lower_vec_and_coalesce.c
b/src/glsl/nir/nir_lower_vec_and_coalesce.c
> new file mode 100644
> index 000..2b21ec1
> --- /dev/null
> +++ b/src/glsl/nir/nir_lower_vec_and_coalesce.c
> @@ -0,0 +1,301 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining
a
> + * copy of this software and associated documentation files (the
"Software"),
> + * to deal in the Software without restriction, including without
limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
next
> + * paragraph) shall be included in all copies or substantial portions of
the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *Eduardo Lima Mitev (el...@igalia.com)
> + *
> + */
> +
> +#include "nir.h"
> +
> +/*
> + * Implements a pass that lowers vecN instructions by propagating the
> + * components of their destinations, as the destination of the
> + * instructions that defines the sources of the vecN instruction.
> + *
> + * This effectively coalesces registers and reduces indirection.
> + *
> + * If all the components of the destination register in the vecN
> + * instruction can be propagated, the instruction is removed. Otherwise,
> + * a new, reduced vecN instruction is emitted with the channels that
> + * remained.
> + *
> + * By now, this pass will only propagate to ALU instructions, but it
could
> + * be extended to include load_const instructions or some intrinsics like
> + * load_input.
> + *
> + * This pass works on a NIR shader in final form (after SSA), and is
> + * expected to run before nir_lower_vec_to_movs().
> + */
> +
> +/**
> + * Clone an ALU instruction and override the destination with the one
given by
> + * new_dest. It copies sources from original ALU to the new one,
adjusting
> + * their swizzles.
> + *
> + * Ret

[Mesa-dev] [PATCH v2] i915: fixing driver crashes if too few vertices are submitted

2015-09-09 Thread Marius Predut

Comparison with a signed expression and unsigned value
is converted to unsigned value, reason for minus value is interpreted
as a big unsigned value. For this case the "for" loop
is going into unexpected behavior.

v1:Brian Paul: code style fix.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
Signed-off-by: Marius Predut 
---
 src/mesa/tnl_dd/t_dd_dmatmp.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 7be3954..79de224 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -627,6 +627,9 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
   LOCAL_VARS;
   GLuint j;
 
+  if(count % 4 != 0)
+  return;
+
   INIT(GL_TRIANGLES);
 
   for (j = start; j < count-3; j += 4) {
@@ -1248,7 +1251,7 @@ static GLboolean TAG(validate_render)( struct gl_context 
*ctx,
ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
 }
 else {
-   ok = HAVE_TRIANGLES; /* flatshading is ok. */
+   ok = HAVE_TRIANGLES && (count % 4 == 0); /* flatshading is ok. */
 }
 break;
   default:
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback

2015-09-09 Thread Ian Romanick

On 09/09/2015 10:10 AM, Kenneth Graunke wrote:
> On Wednesday, September 09, 2015 02:38:56 PM Chris Wilson wrote:
>> A common problem with using HiZ and multisampling is that surfaces need
>> to resolved prior to use. Currently i965 does this inside its state
>> update hook, but that is a comparatively heavyweight operation that need
>> not be performed so frequently. The obvious solution (and therefore
>> fraught with dragons) is to move the HiZ/color resolves into the
>> brw_draw_prims() - however, the resolves are performed using meta and
>> end up re-entering brw_draw_prims() corrupting the context state of the
>> original call. To avoid the meta recursion, we can add a new callback
>> (vbo->resolve()) into the vbo pipeline that is called just before
>> vbo->draw().
>>
>> Signed-off-by: Chris Wilson 
>> Cc: Brian Paul 
>> Cc: Jordan Justen 
>> Cc: Jason Ekstrand 
>> Cc: Kenneth Graunke 
>> Cc: Francisco Jerez 
>> ---
>>  src/mesa/vbo/vbo.h|  1 +
>>  src/mesa/vbo/vbo_context.c| 19 +++
>>  src/mesa/vbo/vbo_context.h|  1 +
>>  src/mesa/vbo/vbo_exec_array.c |  1 +
>>  src/mesa/vbo/vbo_exec_draw.c  |  5 -
>>  src/mesa/vbo/vbo_save_draw.c  |  2 ++
>>  6 files changed, 28 insertions(+), 1 deletion(-)
> 
> What problem are you trying to solve with this patch series?
> Are you trying to fix bugs?  If so, what triggers them?
> Are you trying to improve performance?  If so, do you have any data
> demonstrating that it benefits some workload?

In 0/7 he says, "By moving the current HiZ/color resolves
we need before drawing from out of the notify and into the draw itself,
we can save a few percent of overhead in OglBatch7 on Atom class
devices."  Given the amount of change here, I too would like to see some
actual performance data.

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev




signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback

2015-09-09 Thread Kenneth Graunke

On Wednesday, September 09, 2015 10:19:10 AM Ian Romanick wrote:
> On 09/09/2015 10:10 AM, Kenneth Graunke wrote:
> > On Wednesday, September 09, 2015 02:38:56 PM Chris Wilson wrote:
> >> A common problem with using HiZ and multisampling is that surfaces need
> >> to resolved prior to use. Currently i965 does this inside its state
> >> update hook, but that is a comparatively heavyweight operation that need
> >> not be performed so frequently. The obvious solution (and therefore
> >> fraught with dragons) is to move the HiZ/color resolves into the
> >> brw_draw_prims() - however, the resolves are performed using meta and
> >> end up re-entering brw_draw_prims() corrupting the context state of the
> >> original call. To avoid the meta recursion, we can add a new callback
> >> (vbo->resolve()) into the vbo pipeline that is called just before
> >> vbo->draw().
> >>
> >> Signed-off-by: Chris Wilson 
> >> Cc: Brian Paul 
> >> Cc: Jordan Justen 
> >> Cc: Jason Ekstrand 
> >> Cc: Kenneth Graunke 
> >> Cc: Francisco Jerez 
> >> ---
> >>  src/mesa/vbo/vbo.h|  1 +
> >>  src/mesa/vbo/vbo_context.c| 19 +++
> >>  src/mesa/vbo/vbo_context.h|  1 +
> >>  src/mesa/vbo/vbo_exec_array.c |  1 +
> >>  src/mesa/vbo/vbo_exec_draw.c  |  5 -
> >>  src/mesa/vbo/vbo_save_draw.c  |  2 ++
> >>  6 files changed, 28 insertions(+), 1 deletion(-)
> > 
> > What problem are you trying to solve with this patch series?
> > Are you trying to fix bugs?  If so, what triggers them?
> > Are you trying to improve performance?  If so, do you have any data
> > demonstrating that it benefits some workload?
> 
> In 0/7 he says, "By moving the current HiZ/color resolves
> we need before drawing from out of the notify and into the draw itself,
> we can save a few percent of overhead in OglBatch7 on Atom class
> devices."  Given the amount of change here, I too would like to see some
> actual performance data.

Ah, sorry - 1-7 were CC'd to me, but 0 wasn't, so it ended up in a
different mail folder and I missed it.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/9] softpipe: Split compute_lambda_lod into two functions

2015-09-09 Thread Roland Scheidegger

Just a minor nitpick.

Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak:
> textureQueryLod returns a vec2 with a mipmap information and a
> LOD. The latter needs to be not clamped.
> ---
>  src/gallium/drivers/softpipe/sp_tex_sample.c | 55 
> 
>  1 file changed, 39 insertions(+), 16 deletions(-)
> 
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index 19188b0..38bdc93 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> @@ -1855,24 +1855,23 @@ compute_lod(const struct pipe_sampler_state *sampler,
>  }
>  
>  
> -/* Calculate level of detail for every fragment.
> +/* Calculate level of detail for every fragment. The computed value is not
> + * clamped into lod_min and lod_max.
Probably should be just "clamped to"?

>   * \param lod_in per-fragment lod_bias or explicit_lod.
>   * \param lod results per-fragment lod.
>   */
>  static inline void
> -compute_lambda_lod(struct sp_sampler_view *sp_sview,
> -   struct sp_sampler *sp_samp,
> -   const float s[TGSI_QUAD_SIZE],
> -   const float t[TGSI_QUAD_SIZE],
> -   const float p[TGSI_QUAD_SIZE],
> -   const float lod_in[TGSI_QUAD_SIZE],
> -   enum tgsi_sampler_control control,
> -   float lod[TGSI_QUAD_SIZE])
> +compute_lambda_lod_not_clamped(struct sp_sampler_view *sp_sview,
> +   struct sp_sampler *sp_samp,
> +   const float s[TGSI_QUAD_SIZE],
> +   const float t[TGSI_QUAD_SIZE],
> +   const float p[TGSI_QUAD_SIZE],
> +   const float lod_in[TGSI_QUAD_SIZE],
> +   enum tgsi_sampler_control control,
> +   float lod[TGSI_QUAD_SIZE])
>  {
> const struct pipe_sampler_state *sampler = &sp_samp->base;
> -   float lod_bias = sampler->lod_bias;
> -   float min_lod = sampler->min_lod;
> -   float max_lod = sampler->max_lod;
> +   const float lod_bias = sampler->lod_bias;
> float lambda;
> uint i;
>  
> @@ -1881,24 +1880,23 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
>/* XXX FIXME */
> case tgsi_sampler_derivs_explicit:
>lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
> -  lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
> +  lod[0] = lod[1] = lod[2] = lod[3] = lambda;
>break;
> case tgsi_sampler_lod_bias:
>lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
>for (i = 0; i < TGSI_QUAD_SIZE; i++) {
>   lod[i] = lambda + lod_in[i];
> - lod[i] = CLAMP(lod[i], min_lod, max_lod);
>}
>break;
> case tgsi_sampler_lod_explicit:
>for (i = 0; i < TGSI_QUAD_SIZE; i++) {
> - lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod);
> + lod[i] = lod_in[i] + lod_bias;
>}
>break;
> case tgsi_sampler_lod_zero:
> case tgsi_sampler_gather:
>/* this is all static state in the sampler really need clamp here? */
> -  lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
> +  lod[0] = lod[1] = lod[2] = lod[3] = lod_bias;
>break;
> default:
>assert(0);
> @@ -1906,6 +1904,31 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
> }
>  }
>  
> +/* Calculate level of detail for every fragment.
> + * \param lod_in per-fragment lod_bias or explicit_lod.
> + * \param lod results per-fragment lod.
> + */
> +static inline void
> +compute_lambda_lod(struct sp_sampler_view *sp_sview,
> +   struct sp_sampler *sp_samp,
> +   const float s[TGSI_QUAD_SIZE],
> +   const float t[TGSI_QUAD_SIZE],
> +   const float p[TGSI_QUAD_SIZE],
> +   const float lod_in[TGSI_QUAD_SIZE],
> +   enum tgsi_sampler_control control,
> +   float lod[TGSI_QUAD_SIZE])
> +{
> +   const struct pipe_sampler_state *sampler = &sp_samp->base;
> +   const float min_lod = sampler->min_lod;
> +   const float max_lod = sampler->max_lod;
> +   int i;
> +
> +   compute_lambda_lod_not_clamped(sp_sview, sp_samp, s, t, p, lod_in, 
> control, lod);
> +   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
> +  lod[i] = CLAMP(lod[i], min_lod, max_lod);
> +   }
> +}
> +
>  static inline unsigned
>  get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
>  {
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/3] Move libdrm dependencies out of backend compiler

2015-09-09 Thread Kristian Høgsberg Kristensen

In trying to separate the backend compiler from the core driver, I ran
into a couple of inconsistencies with how the compute shader code is
split across files. First patch moves code around to follow our
convention. The last two patches moves fs precompile and perf debug
around to move libdrm dependencies out of the compiler.

Kristian Høgsberg Kristensen (3):
  i965: Move compute shader code around
  i965: Move brw_fs_precompile() to brw_wm.c
  i965: Move perf_debug code to brw_codegen_*_prog()

 src/mesa/drivers/dri/i965/Makefile.sources |   3 +-
 src/mesa/drivers/dri/i965/brw_cs.c | 206 +++
 src/mesa/drivers/dri/i965/brw_cs.cpp   | 535 -
 src/mesa/drivers/dri/i965/brw_cs.h |   9 +
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 132 +++
 src/mesa/drivers/dri/i965/brw_vec4.cpp |  19 -
 src/mesa/drivers/dri/i965/brw_vs.c |  29 +-
 src/mesa/drivers/dri/i965/brw_wm.c |  90 -
 src/mesa/drivers/dri/i965/gen7_cs_state.c  | 269 +++
 9 files changed, 664 insertions(+), 628 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_cs.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.cpp
 create mode 100644 src/mesa/drivers/dri/i965/gen7_cs_state.c

-- 
2.5.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] i965: Move brw_fs_precompile() to brw_wm.c

2015-09-09 Thread Kristian Høgsberg Kristensen

All other precompile functions live in the brw_.c files, make fs
follow the convention.

Signed-off-by: Kristian Høgsberg Kristensen 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 58 ---
 src/mesa/drivers/dri/i965/brw_wm.c   | 59 
 2 files changed, 59 insertions(+), 58 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e994bb3..801f650 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5229,64 +5229,6 @@ brw_wm_fs_emit(struct brw_context *brw,
return g.get_assembly(final_assembly_size);
 }
 
-extern "C" bool
-brw_fs_precompile(struct gl_context *ctx,
-  struct gl_shader_program *shader_prog,
-  struct gl_program *prog)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_wm_prog_key key;
-
-   struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
-   struct brw_fragment_program *bfp = brw_fragment_program(fp);
-   bool program_uses_dfdy = fp->UsesDFdy;
-
-   memset(&key, 0, sizeof(key));
-
-   if (brw->gen < 6) {
-  if (fp->UsesKill)
- key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
-
-  if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
- key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
-
-  /* Just assume depth testing. */
-  key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
-  key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
-   }
-
-   if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
- BRW_FS_VARYING_INPUT_MASK) > 16)
-  key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
-
-   brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
-
-   if (fp->Base.InputsRead & VARYING_BIT_POS) {
-  key.drawable_height = ctx->DrawBuffer->Height;
-   }
-
-   key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
- ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
- BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
-
-   if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
-  key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
-  key.nr_color_regions > 1;
-   }
-
-   key.program_string_id = bfp->id;
-
-   uint32_t old_prog_offset = brw->wm.base.prog_offset;
-   struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
-
-   bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key);
-
-   brw->wm.base.prog_offset = old_prog_offset;
-   brw->wm.prog_data = old_prog_data;
-
-   return success;
-}
-
 const unsigned *
 brw_cs_emit(struct brw_context *brw,
 void *mem_ctx,
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index 6ee9284..8dfa142 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -26,6 +26,7 @@
 #include "brw_context.h"
 #include "brw_wm.h"
 #include "brw_state.h"
+#include "brw_shader.h"
 #include "main/enums.h"
 #include "main/formats.h"
 #include "main/fbobject.h"
@@ -642,3 +643,61 @@ brw_upload_wm_prog(struct brw_context *brw)
}
brw->wm.base.prog_data = &brw->wm.prog_data->base;
 }
+
+bool
+brw_fs_precompile(struct gl_context *ctx,
+  struct gl_shader_program *shader_prog,
+  struct gl_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_wm_prog_key key;
+
+   struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+   struct brw_fragment_program *bfp = brw_fragment_program(fp);
+   bool program_uses_dfdy = fp->UsesDFdy;
+
+   memset(&key, 0, sizeof(key));
+
+   if (brw->gen < 6) {
+  if (fp->UsesKill)
+ key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+  if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+  /* Just assume depth testing. */
+  key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+  key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+   }
+
+   if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
+ BRW_FS_VARYING_INPUT_MASK) > 16)
+  key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
+
+   brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base);
+
+   if (fp->Base.InputsRead & VARYING_BIT_POS) {
+  key.drawable_height = ctx->DrawBuffer->Height;
+   }
+
+   key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
+ ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
+ BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
+
+   if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+  key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) ||
+  key.nr_color_regions > 1;
+   }
+
+   key.program_string_id = bfp->id;
+
+   uint32_t old_prog_offset = brw->wm.base.prog_offset;
+   struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+
+   bool success

[Mesa-dev] [PATCH 3/3] i965: Move perf_debug code to brw_codegen_*_prog()

2015-09-09 Thread Kristian Høgsberg Kristensen

We're trying to avoid a libdrm dependency in the core compiler, so let's
move the perf_debug code one level up from the brw_*_emit() helpers to
the brw_codegen_*_prog() helpers.

Signed-off-by: Kristian Høgsberg Kristensen 
---
 src/mesa/drivers/dri/i965/brw_cs.c | 31 -
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 41 --
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 19 
 src/mesa/drivers/dri/i965/brw_vs.c | 29 +++-
 src/mesa/drivers/dri/i965/brw_wm.c | 31 -
 5 files changed, 75 insertions(+), 76 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cs.c 
b/src/mesa/drivers/dri/i965/brw_cs.c
index 24e4b16..8d9d9d2 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -63,8 +63,11 @@ brw_codegen_cs_prog(struct brw_context *brw,
void *mem_ctx = ralloc_context(NULL);
GLuint program_size;
struct brw_cs_prog_data prog_data;
+   bool start_busy = false;
+   double start_time = 0;
 
-   struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+   struct brw_shader *cs =
+  (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
assert (cs);
 
memset(&prog_data, 0, sizeof(prog_data));
@@ -73,8 +76,8 @@ brw_codegen_cs_prog(struct brw_context *brw,
 * prog_data associated with the compiled program, and which will be freed
 * by the state cache.
 */
-   int param_count = cs->num_uniform_components +
- cs->NumImages * BRW_IMAGE_PARAM_SIZE;
+   int param_count = cs->base.num_uniform_components +
+ cs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
 
/* The backend also sometimes adds params for texture size. */
param_count += 2 * 
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
@@ -83,9 +86,15 @@ brw_codegen_cs_prog(struct brw_context *brw,
prog_data.base.pull_param =
   rzalloc_array(NULL, const gl_constant_value *, param_count);
prog_data.base.image_param =
-  rzalloc_array(NULL, struct brw_image_param, cs->NumImages);
+  rzalloc_array(NULL, struct brw_image_param, cs->base.NumImages);
prog_data.base.nr_params = param_count;
-   prog_data.base.nr_image_params = cs->NumImages;
+   prog_data.base.nr_image_params = cs->base.NumImages;
+
+   if (unlikely(brw->perf_debug)) {
+  start_busy = (brw->batch.last_bo &&
+drm_intel_bo_busy(brw->batch.last_bo));
+  start_time = get_time();
+   }
 
program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
  &cp->program, prog, &program_size);
@@ -94,6 +103,18 @@ brw_codegen_cs_prog(struct brw_context *brw,
   return false;
}
 
+   if (unlikely(brw->perf_debug) && cs) {
+  if (cs->compiled_once) {
+ _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles");
+  }
+  cs->compiled_once = true;
+
+  if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+ perf_debug("CS compile took %.03f ms and stalled the GPU\n",
+(get_time() - start_time) * 1000);
+  }
+   }
+
if (prog_data.base.total_scratch) {
   brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo,
  prog_data.base.total_scratch * brw->max_cs_threads);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 801f650..fcd0c25 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5129,15 +5129,6 @@ brw_wm_fs_emit(struct brw_context *brw,
struct gl_shader_program *prog,
unsigned *final_assembly_size)
 {
-   bool start_busy = false;
-   double start_time = 0;
-
-   if (unlikely(brw->perf_debug)) {
-  start_busy = (brw->batch.last_bo &&
-drm_intel_bo_busy(brw->batch.last_bo));
-  start_time = get_time();
-   }
-
struct brw_shader *shader = NULL;
if (prog)
   shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
@@ -5215,17 +5206,6 @@ brw_wm_fs_emit(struct brw_context *brw,
if (simd16_cfg)
   prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16);
 
-   if (unlikely(brw->perf_debug) && shader) {
-  if (shader->compiled_once)
- brw_wm_debug_recompile(brw, prog, key);
-  shader->compiled_once = true;
-
-  if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
- perf_debug("FS compile took %.03f ms and stalled the GPU\n",
-(get_time() - start_time) * 1000);
-  }
-   }
-
return g.get_assembly(final_assembly_size);
 }
 
@@ -5238,15 +5218,6 @@ brw_cs_emit(struct brw_context *brw,
 struct gl_shader_program *prog,
 unsigned *final_assembly_size)
 {
-   bool start_busy = false;
-   double start_time = 0;
-
-   if (unlikely(brw->perf_debug)) {
-  start_busy = (brw->batch.last_bo &&
-drm_intel_bo_busy(brw->batch.last_bo));

[Mesa-dev] [PATCH 1/3] i965: Move compute shader code around

2015-09-09 Thread Kristian Høgsberg Kristensen

This moves the compute shader code around in order to make the way the
code is split up more consistent. There should be no functional changes.
Typically we have a few files per stage:

brw_vs.c, brw_wm.c brw_gs.c:

code to drive code generation and implement precompiling and
cache search.

genX__state.c

gen specific implementation of the state emission for the shader
stage.

The brw_*_emit() functions are all in the same files as the visitor
classes they use (with the exception of VS, which may use either vec4 or
fs).

To make compute follow this convention, we move the brw_cs_emit()
function into brw_fs.cpp. We can then rename brw_cs.cpp to brw_cs.c and
do this in C like the other similar files.  Finally, move state setup
and atoms to gen7_cs_state.c.

Signed-off-by: Kristian Høgsberg Kristensen 
---
 src/mesa/drivers/dri/i965/Makefile.sources |   3 +-
 src/mesa/drivers/dri/i965/brw_cs.c | 185 ++
 src/mesa/drivers/dri/i965/brw_cs.cpp   | 535 -
 src/mesa/drivers/dri/i965/brw_cs.h |   9 +
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 107 ++
 src/mesa/drivers/dri/i965/gen7_cs_state.c  | 269 +++
 6 files changed, 572 insertions(+), 536 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_cs.c
 delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.cpp
 create mode 100644 src/mesa/drivers/dri/i965/gen7_cs_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index dfdad75..b53802c 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -21,7 +21,7 @@ i965_FILES = \
brw_conditional_render.c \
brw_context.c \
brw_context.h \
-   brw_cs.cpp \
+   brw_cs.c \
brw_cs.h \
brw_cubemap_normalize.cpp \
brw_curbe.c \
@@ -163,6 +163,7 @@ i965_FILES = \
gen6_wm_state.c \
gen7_blorp.cpp \
gen7_blorp.h \
+   gen7_cs_state.c \
gen7_disable.c \
gen7_gs_state.c \
gen7_misc_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c 
b/src/mesa/drivers/dri/i965/brw_cs.c
new file mode 100644
index 000..24e4b16
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2014 - 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "util/ralloc.h"
+#include "brw_context.h"
+#include "brw_cs.h"
+#include "brw_eu.h"
+#include "brw_shader.h"
+#include "intel_mipmap_tree.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+
+bool
+brw_cs_prog_data_compare(const void *in_a, const void *in_b)
+{
+   const struct brw_cs_prog_data *a =
+  (const struct brw_cs_prog_data *)in_a;
+   const struct brw_cs_prog_data *b =
+  (const struct brw_cs_prog_data *)in_b;
+
+   /* Compare the base structure. */
+   if (!brw_stage_prog_data_compare(&a->base, &b->base))
+  return false;
+
+   /* Compare the rest of the structure. */
+   const unsigned offset = sizeof(struct brw_stage_prog_data);
+   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
+  sizeof(struct brw_cs_prog_data) - offset))
+  return false;
+
+   return true;
+}
+
+static bool
+brw_codegen_cs_prog(struct brw_context *brw,
+struct gl_shader_program *prog,
+struct brw_compute_program *cp,
+struct brw_cs_prog_key *key)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const GLuint *program;
+   void *mem_ctx = ralloc_context(NULL);
+   GLuint program_size;
+   struct brw_cs_prog_data prog_data;
+
+   struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+   assert (cs);
+
+   memset(&prog_data, 0, sizeof(prog_data));
+
+   /* Allocate the references to the uniforms that will end up in the
+* prog_data associated wit

Re: [Mesa-dev] [PATCH] nvc0: keep track of cb bindings per buffer, use for upload settings

2015-09-09 Thread Ilia Mirkin

On Wed, Sep 9, 2015 at 3:17 AM, Ilia Mirkin  wrote:
> CB updates to bound buffers need to go through the CB_DATA endpoints,
> otherwise the shader may not notice that the updates happened.
> Furthermore, these updates have to go in to the same address as the
> bound buffer, otherwise, again, the shader may not notice updates.
>
> So we keep track of all the places where a constbuf is bound, and
> iterate over all of them when updating data. If a binding is found that
> encompasses the region to be updated, then we use the settings of that
> binding for the upload. Otherwise we upload as a regular data update.
>
> This fixes piglit 'arb_uniform_buffer_object-rendering offset' as well
> as blurriness in Witcher2.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91890
> Signed-off-by: Ilia Mirkin 
> Cc: "11.0" 
> ---
>  src/gallium/drivers/nouveau/nouveau_buffer.c   |  4 +-
>  src/gallium/drivers/nouveau/nouveau_buffer.h   |  2 +
>  src/gallium/drivers/nouveau/nouveau_context.h  |  5 ++-
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h|  8 ++--
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c  |  2 +
>  .../drivers/nouveau/nvc0/nvc0_state_validate.c |  3 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c   | 46 
> --
>  7 files changed, 58 insertions(+), 12 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c 
> b/src/gallium/drivers/nouveau/nouveau_buffer.c
> index 912b778..4937dae 100644
> --- a/src/gallium/drivers/nouveau/nouveau_buffer.c
> +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
> @@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct 
> nouveau_transfer *tx,
>nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
>  tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
> else
> -   if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
> -  nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
> +   if (nv->push_cb && can_cb)
> +  nv->push_cb(nv, buf,
>base, size / 4, (const uint32_t *)data);
> else
>nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, 
> data);
> diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h 
> b/src/gallium/drivers/nouveau/nouveau_buffer.h
> index 7e6a6cc..d45bf7a 100644
> --- a/src/gallium/drivers/nouveau/nouveau_buffer.h
> +++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
> @@ -41,6 +41,8 @@ struct nv04_resource {
> uint8_t status;
> uint8_t domain;
>
> +   uint16_t cb_bindings[6]; /* per-shader per-slot bindings */
> +
> struct nouveau_fence *fence;
> struct nouveau_fence *fence_wr;
>
> diff --git a/src/gallium/drivers/nouveau/nouveau_context.h 
> b/src/gallium/drivers/nouveau/nouveau_context.h
> index 24deb7e..decb271 100644
> --- a/src/gallium/drivers/nouveau/nouveau_context.h
> +++ b/src/gallium/drivers/nouveau/nouveau_context.h
> @@ -6,6 +6,8 @@
>
>  #define NOUVEAU_MAX_SCRATCH_BUFS 4
>
> +struct nv04_resource;
> +
>  struct nouveau_context {
> struct pipe_context pipe;
> struct nouveau_screen *screen;
> @@ -23,8 +25,7 @@ struct nouveau_context {
>   unsigned, const void *);
> /* base, size refer to the whole constant buffer */
> void (*push_cb)(struct nouveau_context *,
> -   struct nouveau_bo *, unsigned domain,
> -   unsigned base, unsigned size,
> +   struct nv04_resource *,
> unsigned offset, unsigned words, const uint32_t *);
>
> /* @return: @ref reduced by nr of references found in context */
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 6ed79cf..30bee3a 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
>struct nouveau_bo *dst, unsigned offset, unsigned 
> domain,
>unsigned size, const void *data);
>  void
> -nvc0_cb_push(struct nouveau_context *,
> - struct nouveau_bo *bo, unsigned domain,
> - unsigned base, unsigned size,
> - unsigned offset, unsigned words, const uint32_t *data);
> +nvc0_cb_bo_push(struct nouveau_context *,
> +struct nouveau_bo *bo, unsigned domain,
> +unsigned base, unsigned size,
> +unsigned offset, unsigned words, const uint32_t *data);
>
>  /* nvc0_vbo.c */
>  void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> index ee29912..c5bfd03 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> @@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct

Re: [Mesa-dev] [PATCH 5/9] softpipe: Split code getting a filter into separate function

2015-09-09 Thread Roland Scheidegger

Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak:
> This function will be later used by textureQueryLod. The
> img_filter_func are optional, because textureQueryLod will not need
> them.
> ---
>  src/gallium/drivers/softpipe/sp_tex_sample.c | 53 
> +++-
>  1 file changed, 37 insertions(+), 16 deletions(-)
> 
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index cd4a659..0a3fc20 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> @@ -2912,6 +2912,42 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
> }
>  }
>  
> +/**
> + * Get mip filter, and optionally both img min filter and img mag filter
> + */
> +static void
> +get_filters(struct sp_sampler_view *sp_sview,
> +struct sp_sampler *sp_samp,
> +enum tgsi_sampler_control control,
> +struct sp_mip **mip,
> +img_filter_func *min,
> +img_filter_func *mag)
> +{
> +   assert(mip);
> +   if (control == tgsi_sampler_gather) {
> +  *mip = &mip_nearest;
> +  if (min)
> + *min = get_img_filter(sp_sview, &sp_samp->base, 
> PIPE_TEX_FILTER_LINEAR, true);
> +   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
> +  *mip = &mip_linear_2d_linear_repeat_POT;
> +   }
> +   else {
> +  *mip = &sp_samp->mip;
> +  if (min || mag) {
I think this (and the following conditions) could be simplified a bit
since a caller should only either request both or none of min/mag.
So something like
if (min) {
   assert(mag);
   ...

Roland


> + img_filter_func tmp_filter = get_img_filter(sp_sview, 
> &sp_samp->base, sp_samp->min_img_filter, false);
> + if (min)
> +*min = tmp_filter;
> + if (mag) {
> +if (sp_samp->min_mag_equal) {
> +   *mag = tmp_filter;
> +}
> +else {
> +   *mag = get_img_filter(sp_sview, &sp_samp->base, 
> sp_samp->base.mag_img_filter, false);
> +}
> + }
> +  }
> +   }
> +}
>  
>  static void
>  sample_mip(struct sp_sampler_view *sp_sview,
> @@ -2928,22 +2964,7 @@ sample_mip(struct sp_sampler_view *sp_sview,
> img_filter_func min_img_filter = NULL;
> img_filter_func mag_img_filter = NULL;
>  
> -   if (filt_args->control == tgsi_sampler_gather) {
> -  mip = &mip_nearest;
> -  min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
> PIPE_TEX_FILTER_LINEAR, true);
> -   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
> -  mip = &mip_linear_2d_linear_repeat_POT;
> -   }
> -   else {
> -  mip = &sp_samp->mip;
> -  min_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
> sp_samp->min_img_filter, false);
> -  if (sp_samp->min_mag_equal) {
> - mag_img_filter = min_img_filter;
> -  }
> -  else {
> - mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, 
> sp_samp->base.mag_img_filter, false);
> -  }
> -   }
> +   get_filters(sp_sview, sp_samp, filt_args->control, &mip, &min_img_filter, 
> &mag_img_filter);
>  
> mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
> s, t, p, c0, lod, filt_args, rgba);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/2] egl/dri2: Close file descriptor on error.

2015-09-09 Thread Emil Velikov

On 9 September 2015 at 01:43, Ian Romanick  wrote:
> On 09/07/2015 01:58 AM, Emil Velikov wrote:
>> From: Matt Turner 
>>
>> v2: [Emil Velikov]
>> Rework the error path to a common goto, close only if we own the fd.
>>
>> Signed-off-by: Emil Velikov 
>> ---
>>  src/egl/drivers/dri2/platform_drm.c | 27 ++-
>>  1 file changed, 14 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/egl/drivers/dri2/platform_drm.c 
>> b/src/egl/drivers/dri2/platform_drm.c
>> index eda5087..e8fe7ea 100644
>> --- a/src/egl/drivers/dri2/platform_drm.c
>> +++ b/src/egl/drivers/dri2/platform_drm.c
>> @@ -623,26 +623,20 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
>>dri2_dpy->own_device = 1;
>>gbm = gbm_create_device(fd);
>>if (gbm == NULL)
>> - return EGL_FALSE;
>> + goto cleanup;
>> }
>>
>> -   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
>> -  free(dri2_dpy);
>> -  return EGL_FALSE;
>> -   }
>> +   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
>> +  goto cleanup;
>>
>> dri2_dpy->gbm_dri = gbm_dri_device(gbm);
>> -   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) {
>> -  free(dri2_dpy);
>> -  return EGL_FALSE;
>> -   }
>> +   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
>> +  goto cleanup;
>>
>> if (fd < 0) {
>>fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
>> -  if (fd < 0) {
>> - free(dri2_dpy);
>> - return EGL_FALSE;
>> -  }
>> +  if (fd < 0)
>> + goto cleanup;
>
> Shouldn't this fd also get closed eventually?  If we decide to catch
> other failures (e.g., memory allocation failures) later in this function?
>
By 'this fd' I assume you mean the dup'd fd as opposed to the
device_fd ? Yes we should, Matt's patch was doing the correct thing as
I misread the DUPFD part in F_DUPFD_CLOEXEC :-\

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/7] i965: Remove redundant update_state from intelReadPixels()

2015-09-09 Thread Ian Romanick

Regardless of what happens with the rest of this series, I think this is
a reasonable cleanup.  This patch is

Reviewed-by: Ian Romanick 

On 09/09/2015 06:38 AM, Chris Wilson wrote:
> The very first operation performed by _mesa_readpixels() is to call
> _mesa_update_state() - we do not need to do so ourselves.
> 
> Signed-off-by: Chris Wilson 
> Cc: Jordan Justen 
> Cc: Jason Ekstrand 
> Cc: Kenneth Graunke 
> Cc: Francisco Jerez 
> ---
>  src/mesa/drivers/dri/i965/intel_pixel_read.c | 8 
>  1 file changed, 8 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c 
> b/src/mesa/drivers/dri/i965/intel_pixel_read.c
> index eb366cd..16dcc55 100644
> --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
> +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
> @@ -269,14 +269,6 @@ intelReadPixels(struct gl_context * ctx,
> intel_prepare_render(brw);
> brw->front_buffer_dirty = dirty;
>  
> -   /* Update Mesa state before calling _mesa_readpixels().
> -* XXX this may not be needed since ReadPixels no longer uses the
> -* span code.
> -*/
> -
> -   if (ctx->NewState)
> -  _mesa_update_state(ctx);
> -
> _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
>  
> /* There's an intel_prepare_render() call in intelSpanRenderStart(). */
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 91793] the "bin" directory is not created

2015-09-09 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=91793

--- Comment #4 from Emil Velikov  ---
Just had a closer look and it seems that the wgl tests are only build with
cmake. Situation seems slightly confusing atm, but if you feel like helping out
that'll be appreciated :)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/7] i965: Remove redundant test for NULL intel_texture_object

2015-09-09 Thread Ian Romanick

On 09/09/2015 06:39 AM, Chris Wilson wrote:
> Having checked whether the base class (gl_texture_object) is NULL, we
> know that intel_texture_object itself cannot be NULL.
> 
> Signed-off-by: Chris Wilson 
> Cc: Jordan Justen 
> Cc: Jason Ekstrand 
> Cc: Kenneth Graunke 
> Cc: Francisco Jerez 
> ---
>  src/mesa/drivers/dri/i965/brw_draw.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
> b/src/mesa/drivers/dri/i965/brw_draw.c
> index 0ffcc24..3cea331 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -346,7 +346,6 @@ static void
>  brw_draw_resolve(struct gl_context *ctx)
>  {
> struct brw_context *brw = brw_context(ctx);
> -   struct intel_texture_object *tex_obj;
> struct intel_renderbuffer *depth_irb;
>  
> /* Resolve the depth buffer's HiZ buffer. */
> @@ -358,11 +357,15 @@ brw_draw_resolve(struct gl_context *ctx)
> if (brw->NewGLState & _NEW_TEXTURE) {
>int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit;
>for (int i = 0; i <= maxEnabledUnit; i++) {
> + struct intel_texture_object *tex_obj;
> +
>   if (!brw->ctx.Texture.Unit[i]._Current)
>  continue;
> +
>   tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current);

I'd be in favor of some form of combining the assignment and the
declaration.  Either move the declaration down here, or move the
assignment above.  If you choose the latter, the if-statement below
could remain unchanged while the if-statement above is just removed...
since intel_texture_object is just a cast.

> - if (!tex_obj || !tex_obj->mt)
> + if (!tex_obj->mt)
>  continue;
> +
>   intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
>   intel_miptree_resolve_color(brw, tex_obj->mt);
>   brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/9] softpipe: Split 3D to 2D coords conversion into separate function.

2015-09-09 Thread Roland Scheidegger

Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak:
> This is to avoid tying the conversion to sampling - textureQueryLod
> will need to do the conversion too, but it does not do any sampling.
> 
> So instead of a "sample" vfunc, there is a "convert" vfunc. The
> drawback of this approach is that a "noop" convert copies 3 arrays of
> floats. Would be nice to avoid it in some clean way.

Well yes I'm not sure it's such a great idea. These functions were there
initially to make things faster (by skipping work not necessary without
conditions basically), but it looks to me like that's not really
happening here (because the common case, non cube map, does extra work
for nothing - surely a perfectly predictable condition would be faster
than copying stuff around).
So, one idea to avoid this would be to just forget about those function
pointers there. Instead of get_samples (or now convert_coords) in sview,
just set needs_cube_convert or something like that in sview, then use a
condition based on needs_cube_convert to either invoke sample_mip
directly or cube coord conversion + sample mip in the appropriate place(s).
Either way though looks acceptable to me.

Roland

> ---
>  src/gallium/drivers/softpipe/sp_tex_sample.c | 73 
> +---
>  src/gallium/drivers/softpipe/sp_tex_sample.h | 20 
>  2 files changed, 54 insertions(+), 39 deletions(-)
> 
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index 0a3fc20..cdec984 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> @@ -2981,29 +2981,36 @@ sample_mip(struct sp_sampler_view *sp_sview,
>  
>  }
>  
> +static void
> +convert_noop(struct sp_sampler_view *sp_sview,
> + struct sp_sampler *sp_samp,
> + const float s[TGSI_QUAD_SIZE],
> + const float t[TGSI_QUAD_SIZE],
> + const float p[TGSI_QUAD_SIZE],
> + const float c0[TGSI_QUAD_SIZE],
> + float [TGSI_QUAD_SIZE],
> + float [TGSI_QUAD_SIZE],
> + float [TGSI_QUAD_SIZE])
> +{
> +   const size_t len = sizeof(float) * TGSI_QUAD_SIZE;
> +
> +   memcpy(, s, len);
> +   memcpy(, t, len);
> +   memcpy(, p, len);
> +}
>  
> -/**
> - * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
> - * Put face info into the sampler faces[] array.
> - */
>  static void
> -sample_cube(struct sp_sampler_view *sp_sview,
> -struct sp_sampler *sp_samp,
> -const float s[TGSI_QUAD_SIZE],
> -const float t[TGSI_QUAD_SIZE],
> -const float p[TGSI_QUAD_SIZE],
> -const float c0[TGSI_QUAD_SIZE],
> -const float c1[TGSI_QUAD_SIZE],
> -const struct filter_args *filt_args,
> -float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
> +convert_cube(struct sp_sampler_view *sp_sview,
> + struct sp_sampler *sp_samp,
> + const float s[TGSI_QUAD_SIZE],
> + const float t[TGSI_QUAD_SIZE],
> + const float p[TGSI_QUAD_SIZE],
> + const float c0[TGSI_QUAD_SIZE],
> + float [TGSI_QUAD_SIZE],
> + float [TGSI_QUAD_SIZE],
> + float [TGSI_QUAD_SIZE])
>  {
> unsigned j;
> -   float [4], [4];
> -
> -   /* Not actually used, but the intermediate steps that do the
> -* dereferencing don't know it.
> -*/
> -   static float [4] = { 0, 0, 0, 0 };
>  
> [0] = c0[0];
> [1] = c0[1];
> @@ -3071,8 +3078,6 @@ sample_cube(struct sp_sampler_view *sp_sview,
>   }
>}
> }
> -
> -   sample_mip(sp_sview, sp_samp, , , , c0, c1, filt_args, rgba);
>  }
>  
>  
> @@ -3393,9 +3398,9 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
>  
>if (view->target == PIPE_TEXTURE_CUBE ||
>view->target == PIPE_TEXTURE_CUBE_ARRAY)
> - sview->get_samples = sample_cube;
> + sview->convert_coords = convert_cube;
>else {
> - sview->get_samples = sample_mip;
> + sview->convert_coords = convert_noop;
>}
>sview->pot2d = spr->pot &&
>   (view->target == PIPE_TEXTURE_2D ||
> @@ -3440,13 +3445,22 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
>  enum tgsi_sampler_control control,
>  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
>  {
> -   struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
> +   struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler 
> *)tgsi_sampler;
> +   struct sp_sampler_view *sp_sview;
> +   struct sp_sampler *sp_samp;
> struct filter_args filt_args;
> +   float cs[TGSI_QUAD_SIZE];
> +   float ct[TGSI_QUAD_SIZE];
> +   float cp[TGSI_QUAD_SIZE];
> +
> assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
> assert(sampler_index < PIPE_MAX_SAMPLERS);
> -   assert

[Mesa-dev] [PATCH] meta: Use result of texture coordinate clamping operation

2015-09-09 Thread Ian Romanick

From: Ian Romanick 

Previously the result of the complicated clamp() expression just dropped
on the floor: clamp does not modify any of its parameters.  Looking at
the surrounding code, I believe this is supposed to modify the value of
tex_coord.

This change (along with a change to avoid the use of
brw_blorp_framebuffer) does not affect any existing piglit tests.  I'm
not sure what this clamp is trying to accomplish, so I'm not sure how to
write a test to exercise this path.

I also noticed another bug in this code.  There is no way the array
texture case could possibly work.  This will generate code for the
TEXEL_FETCH macro like:

#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord), 
sample_map[int(2 * fract(coord.x))]);

Since the coord parameter of this macro is a vec2 at all invocations, no
expansion of this macro will even compile.

Signed-off-by: Ian Romanick 
Cc: Anuj Phogat 
Cc: Topi Pohjolainen 
Cc: Jordan Justen 
---
 src/mesa/drivers/common/meta_blit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/common/meta_blit.c 
b/src/mesa/drivers/common/meta_blit.c
index 71d18de..a41fe42 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
"   vec2 tex_coord = texCoords - s_0_offset;\n"
"\n"
"   tex_coord *= scale;\n"
-   "   clamp(tex_coord.x, 0.0f, scale.x * 
src_width - 1.0f);\n"
-   "   clamp(tex_coord.y, 0.0f, scale.y * 
src_height - 1.0f);\n"
+   "   tex_coord.x = clamp(tex_coord.x, 0.0f, 
scale.x * src_width - 1.0f);\n"
+   "   tex_coord.y = clamp(tex_coord.y, 0.0f, 
scale.y * src_height - 1.0f);\n"
"   interp = fract(tex_coord);\n"
"   tex_coord = ivec2(tex_coord) * scale_inv;\n"
"\n"
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] meta: Use result of texture coordinate clamping operation

2015-09-09 Thread Ian Romanick

On 09/09/2015 11:13 AM, Ian Romanick wrote:
> From: Ian Romanick 
> 
> Previously the result of the complicated clamp() expression just dropped
> on the floor: clamp does not modify any of its parameters.  Looking at
> the surrounding code, I believe this is supposed to modify the value of
> tex_coord.
> 
> This change (along with a change to avoid the use of
> brw_blorp_framebuffer) does not affect any existing piglit tests.  I'm
> not sure what this clamp is trying to accomplish, so I'm not sure how to
> write a test to exercise this path.

I just noticed that the piglit test contains the same incorrect shader
as meta for the "reference" implementation.

> I also noticed another bug in this code.  There is no way the array
> texture case could possibly work.  This will generate code for the
> TEXEL_FETCH macro like:
> 
> #define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord), 
> sample_map[int(2 * fract(coord.x))]);
> 
> Since the coord parameter of this macro is a vec2 at all invocations, no
> expansion of this macro will even compile.
> 
> Signed-off-by: Ian Romanick 
> Cc: Anuj Phogat 
> Cc: Topi Pohjolainen 
> Cc: Jordan Justen 
> ---
>  src/mesa/drivers/common/meta_blit.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/common/meta_blit.c 
> b/src/mesa/drivers/common/meta_blit.c
> index 71d18de..a41fe42 100644
> --- a/src/mesa/drivers/common/meta_blit.c
> +++ b/src/mesa/drivers/common/meta_blit.c
> @@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
> "   vec2 tex_coord = texCoords - 
> s_0_offset;\n"
> "\n"
> "   tex_coord *= scale;\n"
> -   "   clamp(tex_coord.x, 0.0f, scale.x * 
> src_width - 1.0f);\n"
> -   "   clamp(tex_coord.y, 0.0f, scale.y * 
> src_height - 1.0f);\n"
> +   "   tex_coord.x = clamp(tex_coord.x, 0.0f, 
> scale.x * src_width - 1.0f);\n"
> +   "   tex_coord.y = clamp(tex_coord.y, 0.0f, 
> scale.y * src_height - 1.0f);\n"
> "   interp = fract(tex_coord);\n"
> "   tex_coord = ivec2(tex_coord) * 
> scale_inv;\n"
> "\n"

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/9] softpipe: Add functions for computing mipmap level

2015-09-09 Thread Roland Scheidegger

Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak:
> These functions will be used by textureQueryLod.
> ---
>  src/gallium/drivers/softpipe/sp_tex_sample.c | 100 
> +--
>  src/gallium/drivers/softpipe/sp_tex_sample.h |   7 ++
>  2 files changed, 101 insertions(+), 6 deletions(-)
> 
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
> b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index cdec984..6e639e0 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> @@ -1937,6 +1937,38 @@ get_gather_component(const float 
> lod_in[TGSI_QUAD_SIZE])
>  }
>  
>  static void
> +clamp_lod(const struct sp_sampler_view *sp_sview,
> +  const struct sp_sampler *sp_samp,
> +  const float lod[TGSI_QUAD_SIZE],
> +  float clamped[TGSI_QUAD_SIZE])
> +{
> +   const float min_lod = sp_samp->base.min_lod;
> +   const float max_lod = sp_samp->base.max_lod;
> +   const float min_level = sp_sview->base.u.tex.first_level;
> +   const float max_level = sp_sview->base.u.tex.last_level;
> +   int i;
> +
> +   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
> +  float cl = lod[i];
> +
> +  cl = CLAMP(cl, min_lod, max_lod);
> +  /* XXX: Is min_level ever different from 0?
I think the comment is bogus. min_level can easily be different from 0,
at least when using ARB_texture_view afaik (playing around with base
level might also cause this to be non-zero, though I'm not too familiar
with what the state tracker does for those legacy GL weirdness things).

> +   */
> +  cl = CLAMP(cl, 0, max_level - min_level);
> +  clamped[i] = cl;
> +   }
> +}
> +
> +static void
> +mip_level_linear(struct sp_sampler_view *sp_sview,
> + struct sp_sampler *sp_samp,
> + const float lod[TGSI_QUAD_SIZE],
> + float level[TGSI_QUAD_SIZE])
> +{
> +   clamp_lod(sp_sview, sp_samp, lod, level);
> +}
> +
> +static void
>  mip_filter_linear(struct sp_sampler_view *sp_sview,
>struct sp_sampler *sp_samp,
>img_filter_func min_filter,
> @@ -1998,6 +2030,23 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
>  }
>  
>  
> +static void
> +mip_level_nearest(struct sp_sampler_view *sp_sview,
> +  struct sp_sampler *sp_samp,
> +  const float lod[TGSI_QUAD_SIZE],
> +  float level[TGSI_QUAD_SIZE])
> +{
> +   const int first_level = sp_sview->base.u.tex.first_level;
> +   int j;
> +
> +   clamp_lod(sp_sview, sp_samp, lod, level);
> +   for (j = 0; j < TGSI_QUAD_SIZE; j++)
> +  /* TODO: It should rather be:
> +   * level[j] = first_level + ceil(level[j] + 0.5F) - 1.0F;
> +   */
> +  level[j] = first_level + (int)(level[j] + 0.5F);
> +}
> +
>  /**
>   * Compute nearest mipmap level from texcoords.
>   * Then sample the texture level for four elements of a quad.
> @@ -2050,6 +2099,19 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
>  
>  
>  static void
> +mip_level_none(struct sp_sampler_view *sp_sview,
> +   struct sp_sampler *sp_samp,
> +   const float lod[TGSI_QUAD_SIZE],
> +   float level[TGSI_QUAD_SIZE])
> +{
> +   int j;
> +
> +   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> +  level[j] = sp_sview->base.u.tex.first_level;
> +   }
> +}
> +
> +static void
>  mip_filter_none(struct sp_sampler_view *sp_sview,
>  struct sp_sampler *sp_samp,
>  img_filter_func min_filter,
> @@ -2088,6 +2150,15 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
>  
>  
>  static void
> +mip_level_none_no_filter_select(struct sp_sampler_view *sp_sview,
> +struct sp_sampler *sp_samp,
> +const float lod[TGSI_QUAD_SIZE],
> +float level[TGSI_QUAD_SIZE])
> +{
> +   mip_level_none(sp_sview, sp_samp, lod, level);
> +}
> +
> +static void
>  mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
>   struct sp_sampler *sp_samp,
>   img_filter_func min_filter,
> @@ -2339,6 +2410,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
>  }
>  
>  
> +static void
> +mip_level_linear_aniso(struct sp_sampler_view *sp_sview,
> +   struct sp_sampler *sp_samp,
> +   const float lod[TGSI_QUAD_SIZE],
> +   float level[TGSI_QUAD_SIZE])
> +{
> +   mip_level_linear(sp_sview, sp_samp, lod, level);
> +}
> +
>  /**
>   * Sample 2D texture using an anisotropic filter.
>   */
> @@ -2450,6 +2530,14 @@ mip_filter_linear_aniso(struct sp_sampler_view 
> *sp_sview,
> }
>  }
>  
> +static void
> +mip_level_linear_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
> +  struct sp_sampler *sp_samp,
> +  const float lod[TGSI_QUAD_SIZE],
> +

Re: [Mesa-dev] [PATCH 7/9] softpipe: Add functions for computing mipmap level

2015-09-09 Thread Ilia Mirkin

On Wed, Sep 9, 2015 at 2:17 PM, Roland Scheidegger  wrote:
> Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak:
>> These functions will be used by textureQueryLod.
>> ---
>>  src/gallium/drivers/softpipe/sp_tex_sample.c | 100 
>> +--
>>  src/gallium/drivers/softpipe/sp_tex_sample.h |   7 ++
>>  2 files changed, 101 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c 
>> b/src/gallium/drivers/softpipe/sp_tex_sample.c
>> index cdec984..6e639e0 100644
>> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
>> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
>> @@ -1937,6 +1937,38 @@ get_gather_component(const float 
>> lod_in[TGSI_QUAD_SIZE])
>>  }
>>
>>  static void
>> +clamp_lod(const struct sp_sampler_view *sp_sview,
>> +  const struct sp_sampler *sp_samp,
>> +  const float lod[TGSI_QUAD_SIZE],
>> +  float clamped[TGSI_QUAD_SIZE])
>> +{
>> +   const float min_lod = sp_samp->base.min_lod;
>> +   const float max_lod = sp_samp->base.max_lod;
>> +   const float min_level = sp_sview->base.u.tex.first_level;
>> +   const float max_level = sp_sview->base.u.tex.last_level;
>> +   int i;
>> +
>> +   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
>> +  float cl = lod[i];
>> +
>> +  cl = CLAMP(cl, min_lod, max_lod);
>> +  /* XXX: Is min_level ever different from 0?
> I think the comment is bogus. min_level can easily be different from 0,
> at least when using ARB_texture_view afaik (playing around with base
> level might also cause this to be non-zero, though I'm not too familiar
> with what the state tracker does for those legacy GL weirdness things).

Either setting the base level or a texture view (or both!) can cause
the min level to be 0.

>
>> +   */
>> +  cl = CLAMP(cl, 0, max_level - min_level);
>> +  clamped[i] = cl;
>> +   }
>> +}
>> +
>> +static void
>> +mip_level_linear(struct sp_sampler_view *sp_sview,
>> + struct sp_sampler *sp_samp,
>> + const float lod[TGSI_QUAD_SIZE],
>> + float level[TGSI_QUAD_SIZE])
>> +{
>> +   clamp_lod(sp_sview, sp_samp, lod, level);
>> +}
>> +
>> +static void
>>  mip_filter_linear(struct sp_sampler_view *sp_sview,
>>struct sp_sampler *sp_samp,
>>img_filter_func min_filter,
>> @@ -1998,6 +2030,23 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
>>  }
>>
>>
>> +static void
>> +mip_level_nearest(struct sp_sampler_view *sp_sview,
>> +  struct sp_sampler *sp_samp,
>> +  const float lod[TGSI_QUAD_SIZE],
>> +  float level[TGSI_QUAD_SIZE])
>> +{
>> +   const int first_level = sp_sview->base.u.tex.first_level;
>> +   int j;
>> +
>> +   clamp_lod(sp_sview, sp_samp, lod, level);
>> +   for (j = 0; j < TGSI_QUAD_SIZE; j++)
>> +  /* TODO: It should rather be:
>> +   * level[j] = first_level + ceil(level[j] + 0.5F) - 1.0F;
>> +   */
>> +  level[j] = first_level + (int)(level[j] + 0.5F);
>> +}
>> +
>>  /**
>>   * Compute nearest mipmap level from texcoords.
>>   * Then sample the texture level for four elements of a quad.
>> @@ -2050,6 +2099,19 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
>>
>>
>>  static void
>> +mip_level_none(struct sp_sampler_view *sp_sview,
>> +   struct sp_sampler *sp_samp,
>> +   const float lod[TGSI_QUAD_SIZE],
>> +   float level[TGSI_QUAD_SIZE])
>> +{
>> +   int j;
>> +
>> +   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
>> +  level[j] = sp_sview->base.u.tex.first_level;
>> +   }
>> +}
>> +
>> +static void
>>  mip_filter_none(struct sp_sampler_view *sp_sview,
>>  struct sp_sampler *sp_samp,
>>  img_filter_func min_filter,
>> @@ -2088,6 +2150,15 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
>>
>>
>>  static void
>> +mip_level_none_no_filter_select(struct sp_sampler_view *sp_sview,
>> +struct sp_sampler *sp_samp,
>> +const float lod[TGSI_QUAD_SIZE],
>> +float level[TGSI_QUAD_SIZE])
>> +{
>> +   mip_level_none(sp_sview, sp_samp, lod, level);
>> +}
>> +
>> +static void
>>  mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
>>   struct sp_sampler *sp_samp,
>>   img_filter_func min_filter,
>> @@ -2339,6 +2410,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
>>  }
>>
>>
>> +static void
>> +mip_level_linear_aniso(struct sp_sampler_view *sp_sview,
>> +   struct sp_sampler *sp_samp,
>> +   const float lod[TGSI_QUAD_SIZE],
>> +   float level[TGSI_QUAD_SIZE])
>> +{
>> +   mip_level_linear(sp_sview, sp_samp, lod, level);
>> +}
>> +
>>  /**
>>   * Sample 2D texture using an anisotropic filter.
>>   */
>> @@ -2450,6 +2530,14 @@ mip_filter_linear_aniso(struct sp_sampler_view 
>> *sp_sview,
>> }
>>  }

Re: [Mesa-dev] [PATCH 8/9] tgsi: Add code for handling lodq opcode

2015-09-09 Thread Roland Scheidegger

Yes, using a new function is definitely the right way to go imho.
Apart from the things mentioned by Brian, the series looks good to me.

Roland

Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak:
> This introduces new vfunc in tgsi_sampler just for this opcode. I
> decided against extending get_samples vfunc to return the mipmap level
> and LOD - the function's prototype is already too scary and doing the
> sampling for textureQueryLod would be a waste of time.
> ---
>  src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 
> ++
>  src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 
>  2 files changed, 54 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
> b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 9544623..054ad08 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -2132,6 +2132,44 @@ exec_tex(struct tgsi_exec_machine *mach,
> }
>  }
>  
> +static void
> +exec_lodq(struct tgsi_exec_machine *mach,
> +  const struct tgsi_full_instruction *inst)
> +{
> +   uint unit;
> +   int dim;
> +   int i;
> +   union tgsi_exec_channel coords[4];
> +   const union tgsi_exec_channel *args[Elements(coords)];
> +   union tgsi_exec_channel r[2];
> +
> +   unit = fetch_sampler_unit(mach, inst, 1);
> +   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
> +   assert(dim <= Elements(coords));
> +   /* fetch coordinates */
> +   for (i = 0; i < dim; i++) {
> +  FETCH(&coords[i], 0, TGSI_CHAN_X + i);
> +  args[i] = &coords[i];
> +   }
> +   for (i = dim; i < Elements(coords); i++) {
> +  args[i] = &ZeroVec;
> +   }
> +   mach->Sampler->query_lod(mach->Sampler, unit, unit,
> +args[0]->f,
> +args[1]->f,
> +args[2]->f,
> +args[3]->f,
> +tgsi_sampler_lod_none,
> +r[0].f,
> +r[1].f);
> +
> +   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
> +  store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 
> TGSI_EXEC_DATA_FLOAT);
> +   }
> +   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
> +  store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 
> TGSI_EXEC_DATA_FLOAT);
> +   }
> +}
>  
>  static void
>  exec_txd(struct tgsi_exec_machine *mach,
> @@ -4378,6 +4416,12 @@ exec_instruction(
>exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
>break;
>  
> +   case TGSI_OPCODE_LODQ:
> +  /* src[0] = texcoord */
> +  /* src[1] = sampler unit */
> +  exec_lodq(mach, inst);
> +  break;
> +
> case TGSI_OPCODE_UP2H:
>assert (0);
>break;
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h 
> b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> index 5d56aab..556e0af 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
> @@ -138,6 +138,16 @@ struct tgsi_sampler
>   const int j[TGSI_QUAD_SIZE], const int 
> k[TGSI_QUAD_SIZE],
>   const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
>   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
> +   void (*query_lod)(struct tgsi_sampler *tgsi_sampler,
> + const unsigned sview_index,
> + const unsigned sampler_index,
> + const float s[TGSI_QUAD_SIZE],
> + const float t[TGSI_QUAD_SIZE],
> + const float p[TGSI_QUAD_SIZE],
> + const float c0[TGSI_QUAD_SIZE],
> + enum tgsi_sampler_control control,
> + float mipmap[TGSI_QUAD_SIZE],
> + float lod[TGSI_QUAD_SIZE]);
>  };
>  
>  #define TGSI_EXEC_NUM_TEMPS   4096
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] meta: Use result of texture coordinate clamping operation

2015-09-09 Thread Ian Romanick

I'm pretty sure our implementation of this extension is complete
rubbish.  I have attached an image from the blit-scaled test.  This
result cannot be useful to anyone.

On 09/09/2015 11:16 AM, Ian Romanick wrote:
> On 09/09/2015 11:13 AM, Ian Romanick wrote:
>> From: Ian Romanick 
>>
>> Previously the result of the complicated clamp() expression just dropped
>> on the floor: clamp does not modify any of its parameters.  Looking at
>> the surrounding code, I believe this is supposed to modify the value of
>> tex_coord.
>>
>> This change (along with a change to avoid the use of
>> brw_blorp_framebuffer) does not affect any existing piglit tests.  I'm
>> not sure what this clamp is trying to accomplish, so I'm not sure how to
>> write a test to exercise this path.
> 
> I just noticed that the piglit test contains the same incorrect shader
> as meta for the "reference" implementation.
> 
>> I also noticed another bug in this code.  There is no way the array
>> texture case could possibly work.  This will generate code for the
>> TEXEL_FETCH macro like:
>>
>> #define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord), 
>> sample_map[int(2 * fract(coord.x))]);
>>
>> Since the coord parameter of this macro is a vec2 at all invocations, no
>> expansion of this macro will even compile.
>>
>> Signed-off-by: Ian Romanick 
>> Cc: Anuj Phogat 
>> Cc: Topi Pohjolainen 
>> Cc: Jordan Justen 
>> ---
>>  src/mesa/drivers/common/meta_blit.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/mesa/drivers/common/meta_blit.c 
>> b/src/mesa/drivers/common/meta_blit.c
>> index 71d18de..a41fe42 100644
>> --- a/src/mesa/drivers/common/meta_blit.c
>> +++ b/src/mesa/drivers/common/meta_blit.c
>> @@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context 
>> *ctx,
>> "   vec2 tex_coord = texCoords - 
>> s_0_offset;\n"
>> "\n"
>> "   tex_coord *= scale;\n"
>> -   "   clamp(tex_coord.x, 0.0f, scale.x * 
>> src_width - 1.0f);\n"
>> -   "   clamp(tex_coord.y, 0.0f, scale.y * 
>> src_height - 1.0f);\n"
>> +   "   tex_coord.x = clamp(tex_coord.x, 0.0f, 
>> scale.x * src_width - 1.0f);\n"
>> +   "   tex_coord.y = clamp(tex_coord.y, 0.0f, 
>> scale.y * src_height - 1.0f);\n"
>> "   interp = fract(tex_coord);\n"
>> "   tex_coord = ivec2(tex_coord) * 
>> scale_inv;\n"
>> "\n"
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] meta: Use result of texture coordinate clamping operation

2015-09-09 Thread Ilia Mirkin

In case it's of any interest, nouveau fails many of the blit-scaled
tests. However nouveau's MS blits are notoriously bad. [I think I know
what's wrong, just don't have the energy to fix it.]

OTOH the NVIDIA driver also fails at every scale factor (for 2x and
8x, passes for 4x).

On Wed, Sep 9, 2015 at 2:30 PM, Ian Romanick  wrote:
> I'm pretty sure our implementation of this extension is complete
> rubbish.  I have attached an image from the blit-scaled test.  This
> result cannot be useful to anyone.
>
> On 09/09/2015 11:16 AM, Ian Romanick wrote:
>> On 09/09/2015 11:13 AM, Ian Romanick wrote:
>>> From: Ian Romanick 
>>>
>>> Previously the result of the complicated clamp() expression just dropped
>>> on the floor: clamp does not modify any of its parameters.  Looking at
>>> the surrounding code, I believe this is supposed to modify the value of
>>> tex_coord.
>>>
>>> This change (along with a change to avoid the use of
>>> brw_blorp_framebuffer) does not affect any existing piglit tests.  I'm
>>> not sure what this clamp is trying to accomplish, so I'm not sure how to
>>> write a test to exercise this path.
>>
>> I just noticed that the piglit test contains the same incorrect shader
>> as meta for the "reference" implementation.
>>
>>> I also noticed another bug in this code.  There is no way the array
>>> texture case could possibly work.  This will generate code for the
>>> TEXEL_FETCH macro like:
>>>
>>> #define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord), 
>>> sample_map[int(2 * fract(coord.x))]);
>>>
>>> Since the coord parameter of this macro is a vec2 at all invocations, no
>>> expansion of this macro will even compile.
>>>
>>> Signed-off-by: Ian Romanick 
>>> Cc: Anuj Phogat 
>>> Cc: Topi Pohjolainen 
>>> Cc: Jordan Justen 
>>> ---
>>>  src/mesa/drivers/common/meta_blit.c | 4 ++--
>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/common/meta_blit.c 
>>> b/src/mesa/drivers/common/meta_blit.c
>>> index 71d18de..a41fe42 100644
>>> --- a/src/mesa/drivers/common/meta_blit.c
>>> +++ b/src/mesa/drivers/common/meta_blit.c
>>> @@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context 
>>> *ctx,
>>> "   vec2 tex_coord = texCoords - 
>>> s_0_offset;\n"
>>> "\n"
>>> "   tex_coord *= scale;\n"
>>> -   "   clamp(tex_coord.x, 0.0f, scale.x * 
>>> src_width - 1.0f);\n"
>>> -   "   clamp(tex_coord.y, 0.0f, scale.y * 
>>> src_height - 1.0f);\n"
>>> +   "   tex_coord.x = clamp(tex_coord.x, 0.0f, 
>>> scale.x * src_width - 1.0f);\n"
>>> +   "   tex_coord.y = clamp(tex_coord.y, 0.0f, 
>>> scale.y * src_height - 1.0f);\n"
>>> "   interp = fract(tex_coord);\n"
>>> "   tex_coord = ivec2(tex_coord) * 
>>> scale_inv;\n"
>>> "\n"
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/2] egl/dri2: Close file descriptor on error.

2015-09-09 Thread Ian Romanick

On 09/09/2015 10:59 AM, Emil Velikov wrote:
> On 9 September 2015 at 01:43, Ian Romanick  wrote:
>> On 09/07/2015 01:58 AM, Emil Velikov wrote:
>>> From: Matt Turner 
>>>
>>> v2: [Emil Velikov]
>>> Rework the error path to a common goto, close only if we own the fd.
>>>
>>> Signed-off-by: Emil Velikov 
>>> ---
>>>  src/egl/drivers/dri2/platform_drm.c | 27 ++-
>>>  1 file changed, 14 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/src/egl/drivers/dri2/platform_drm.c 
>>> b/src/egl/drivers/dri2/platform_drm.c
>>> index eda5087..e8fe7ea 100644
>>> --- a/src/egl/drivers/dri2/platform_drm.c
>>> +++ b/src/egl/drivers/dri2/platform_drm.c
>>> @@ -623,26 +623,20 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay 
>>> *disp)
>>>dri2_dpy->own_device = 1;
>>>gbm = gbm_create_device(fd);
>>>if (gbm == NULL)
>>> - return EGL_FALSE;
>>> + goto cleanup;
>>> }
>>>
>>> -   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
>>> -  free(dri2_dpy);
>>> -  return EGL_FALSE;
>>> -   }
>>> +   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
>>> +  goto cleanup;
>>>
>>> dri2_dpy->gbm_dri = gbm_dri_device(gbm);
>>> -   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) {
>>> -  free(dri2_dpy);
>>> -  return EGL_FALSE;
>>> -   }
>>> +   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
>>> +  goto cleanup;
>>>
>>> if (fd < 0) {
>>>fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
>>> -  if (fd < 0) {
>>> - free(dri2_dpy);
>>> - return EGL_FALSE;
>>> -  }
>>> +  if (fd < 0)
>>> + goto cleanup;
>>
>> Shouldn't this fd also get closed eventually?  If we decide to catch
>> other failures (e.g., memory allocation failures) later in this function?
>>
> By 'this fd' I assume you mean the dup'd fd as opposed to the
> device_fd ? Yes we should, Matt's patch was doing the correct thing as
> I misread the DUPFD part in F_DUPFD_CLOEXEC :-\

Yeah, that's what I meant.

In spite of that, I like the 'goto cleanup' change.  I was going to
suggest that to Matt... but you went and did it before I could suggest
it. :)  Maybe blend the strengths?

> Thanks
> Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] i965: Abort tiled_memcpy path for ReadPixels in case of transfer operations

2015-09-09 Thread Anuj Phogat

On Tue, Sep 1, 2015 at 6:58 AM, Emil Velikov  wrote:
> Hi all
>
> On 21 August 2015 at 23:04, Anuj Phogat  wrote:
>> We have a similar check in meta pbo path.
>>
>> Cc: 
>> Signed-off-by: Anuj Phogat 
>> ---
>>  src/mesa/drivers/dri/i965/intel_pixel_read.c | 4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c 
>> b/src/mesa/drivers/dri/i965/intel_pixel_read.c
>> index 3fe506e..55f6852 100644
>> --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
>> +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
>> @@ -81,6 +81,10 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
>> if (rb == NULL)
>>return false;
>>
>> +   if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
>> + type, GL_FALSE))
>> +  return false;
>> +
>> struct intel_renderbuffer *irb = intel_renderbuffer(rb);
>> int dst_pitch;
>>
> Can we get a pair of eyes looking this way (same goes for the rest of
> the series).
> They all seem like pretty trivial fixes.
>
> Anuj, can you amend the cc tag before pushing to include 10.6 please.
>
Yes, I'll amend the cc tag.
> Thanks
> Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] i915: fixing driver crashes if too few vertices are submitted

2015-09-09 Thread Ian Romanick

On 09/09/2015 11:16 AM, Marius Predut wrote:
> Comparison with a signed expression and unsigned value
> is converted to unsigned value, reason for minus value is interpreted
> as a big unsigned value. For this case the "for" loop
> is going into unexpected behavior.
> 
> v1:Brian Paul: code style fix.

I don't think you really did...

> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
> Signed-off-by: Marius Predut 
> ---
>  src/mesa/tnl_dd/t_dd_dmatmp.h | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
> index 7be3954..79de224 100644
> --- a/src/mesa/tnl_dd/t_dd_dmatmp.h
> +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
> @@ -627,6 +627,9 @@ static void TAG(render_quads_verts)( struct gl_context 
> *ctx,
>LOCAL_VARS;
>GLuint j;
>  
> +  if(count % 4 != 0)
   ^
...because I'm quite sure Brian's code had a space here, per Mesa's
coding standards.

Also, this change is incorrect.  If an application does
glDrawArrays(GL_QUADS, 0, (n * 4) + 3), this change will cause zero
quads to be drawn when n quads should be drawn.

Page 18 (page 32 of the PDF) of the OpenGL 2.1 spec says:

"The total number of vertices between Begin and End is 4n + k,
where 0 ≤ k ≤ 3; if k is not zero, the final k vertices are
ignored."

We probably don't have a piglit test for this scenario, so one should be
added.  You can CC me on that patch. :)

I think the correct change is to trim count such that (count % 4) == 0.
 If the modified value of count is zero, bail out.  With that change,
the other hunk (below) is unnecessary.

> +  return;
> +
>INIT(GL_TRIANGLES);
>  
>for (j = start; j < count-3; j += 4) {
> @@ -1248,7 +1251,7 @@ static GLboolean TAG(validate_render)( struct 
> gl_context *ctx,
>   ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
>}
>else {
> - ok = HAVE_TRIANGLES; /* flatshading is ok. */
> + ok = HAVE_TRIANGLES && (count % 4 == 0); /* flatshading is ok. */
>}
>break;
>default:
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] i915: fixing driver crashes if too few vertices are submitted

2015-09-09 Thread Ian Romanick

On 09/09/2015 11:53 AM, Ian Romanick wrote:
> On 09/09/2015 11:16 AM, Marius Predut wrote:
>> Comparison with a signed expression and unsigned value
>> is converted to unsigned value, reason for minus value is interpreted
>> as a big unsigned value. For this case the "for" loop
>> is going into unexpected behavior.
>>
>> v1:Brian Paul: code style fix.
> 
> I don't think you really did...
> 
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
>> Signed-off-by: Marius Predut 
>> ---
>>  src/mesa/tnl_dd/t_dd_dmatmp.h | 5 -
>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
>> index 7be3954..79de224 100644
>> --- a/src/mesa/tnl_dd/t_dd_dmatmp.h
>> +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
>> @@ -627,6 +627,9 @@ static void TAG(render_quads_verts)( struct gl_context 
>> *ctx,
>>LOCAL_VARS;
>>GLuint j;
>>  
>> +  if(count % 4 != 0)
>^
> ...because I'm quite sure Brian's code had a space here, per Mesa's
> coding standards.
> 
> Also, this change is incorrect.  If an application does
> glDrawArrays(GL_QUADS, 0, (n * 4) + 3), this change will cause zero
> quads to be drawn when n quads should be drawn.
> 
> Page 18 (page 32 of the PDF) of the OpenGL 2.1 spec says:
> 
> "The total number of vertices between Begin and End is 4n + k,
> where 0 ≤ k ≤ 3; if k is not zero, the final k vertices are
> ignored."
> 
> We probably don't have a piglit test for this scenario, so one should be
> added.  You can CC me on that patch. :)
> 
> I think the correct change is to trim count such that (count % 4) == 0.

Also... wherever you add this trim code, you should include the spec
quotation that I mentioned above.

>  If the modified value of count is zero, bail out.  With that change,
> the other hunk (below) is unnecessary.
> 
>> +  return;
>> +
>>INIT(GL_TRIANGLES);
>>  
>>for (j = start; j < count-3; j += 4) {
>> @@ -1248,7 +1251,7 @@ static GLboolean TAG(validate_render)( struct 
>> gl_context *ctx,
>>  ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
>>   }
>>   else {
>> -ok = HAVE_TRIANGLES; /* flatshading is ok. */
>> +ok = HAVE_TRIANGLES && (count % 4 == 0); /* flatshading is ok. */
>>   }
>>   break;
>>default:
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] i915/aa: fixing anti-aliasing bug for thinnest width lines

2015-09-09 Thread Ian Romanick

On 09/09/2015 06:59 AM, Marius Predut wrote:
> On PNV platform, for 1 pixel line thickness or less,
> the general anti-aliasing algorithm gives up, and a garbage line is generated.
> Setting a Line Width of 0.0 specifies the rasterization
> of the "thinnest" (one-pixel-wide), non-antialiased lines.
> Lines rendered with zero Line Width are rasterized using
> Grid Intersection Quantization rules as specified by bspec G45: Volume 2: 
> 3D/Media,
> 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section.
> 
> This patch follow the same rules as patches fixing the
> https://bugs.freedesktop.org/show_bug.cgi?id=28832
> bug.
> 
> v1: Eduardo Lima Mitev:  Wrong indentation inside the if clause.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367
> 
> Signed-off-by: Marius Predut 
> ---
>  src/mesa/drivers/dri/i915/i915_state.c | 15 +++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i915/i915_state.c 
> b/src/mesa/drivers/dri/i915/i915_state.c
> index 4c83073..ebb4e9a 100644
> --- a/src/mesa/drivers/dri/i915/i915_state.c
> +++ b/src/mesa/drivers/dri/i915/i915_state.c
> @@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf)
> 
> width = (int) (widthf * 2);
> width = CLAMP(width, 1, 0xf);
> +
> +   if (ctx->Line.Width < 1.5 || widthf < 1.5) {
> + /* For 1 pixel line thickness or less, the general
> +  * anti-aliasing algorithm gives up, and a garbage line is
> +  * generated.  Setting a Line Width of 0.0 specifies the
> +  * rasterization of the "thinnest" (one-pixel-wide),
> +  * non-antialiased lines.
> +  *
> +  * Lines rendered with zero Line Width are rasterized using
> +  * Grid Intersection Quantization rules as specified by
> +  * bspec G45: Volume 2: 3D/Media,

Eh... Isn't G45 actually i965-class hardware?  GEN4.5, right?  I don't
see how a GEN4 bspec reference is useful (or correct?) in the GEN3
driver.  I think you want "2.8.4.1 Zero-Width (Cosmetic) Line
Rasterization" from volume 1f of the GEN3 docs.

I looked at section 2.8.4.3 Anti-aliased Line Rasterization of volume 1f
of the GEN3 docs that I have.  I don't see any mention of this
restriction.  Is it documented anywhere?

It also seems like this will affect non-antialised wide lines.  Is that
correct?

> +  * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section
> +  */
> +  width = 0;
> +   }
> lis4 |= width << S4_LINE_WIDTH_SHIFT;
>  
> if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-dev, 1/5] i965: Support CS in update_stage_texture_surfaces

2015-09-09 Thread Kristian Høgsberg

Jordan Justen  writes:

> Signed-off-by: Jordan Justen 
>
> ---
> src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 72aad96..0eb5361 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -804,7 +804,7 @@ update_stage_texture_surfaces(struct brw_context *brw,
>struct brw_stage_state *stage_state,
>bool for_gather)
>  {
> -   if (!prog)
> +   if (!prog || !stage_state->prog_data)
>return;

Why do we need to test for stage_state->prog_data being NULL now? This
atom always run after brw_upload_cs_prog(), which sets

  brw->cs.base.prog_data = &brw->cs.prog_data->base;

which shouldn't change after that.

Other that that,

Reviewed-by: Kristian Høgsberg 

> struct gl_context *ctx = &brw->ctx;
> @@ -848,10 +848,14 @@ brw_update_texture_surfaces(struct brw_context *brw)
> /* BRW_NEW_FRAGMENT_PROGRAM */
> struct gl_program *fs = (struct gl_program *) brw->fragment_program;
>  
> +   /* BRW_NEW_COMPUTE_PROGRAM */
> +   struct gl_program *cs = (struct gl_program *) brw->compute_program;
> +
> /* _NEW_TEXTURE */
> update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
> update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
> update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
> +   update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
>  
> /* emit alternate set of surface state for gather. this
>  * allows the surface format to be overriden for only the
> @@ -863,6 +867,8 @@ brw_update_texture_surfaces(struct brw_context *brw)
>   update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
>if (fs && fs->UsesGather)
>   update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
> +  if (cs && cs->UsesGather)
> + update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
> }
>  
> brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
> @@ -872,6 +878,7 @@ const struct brw_tracked_state brw_texture_surfaces = {
> .dirty = {
>.mesa = _NEW_TEXTURE,
>.brw = BRW_NEW_BATCH |
> + BRW_NEW_COMPUTE_PROGRAM |
>   BRW_NEW_FRAGMENT_PROGRAM |
>   BRW_NEW_FS_PROG_DATA |
>   BRW_NEW_GEOMETRY_PROGRAM |
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v1] i915: fixing driver crashes if too few vertices are submitted

2015-09-09 Thread Eirik Byrkjeflot Anonsen

Marius Predut  writes:

> Comparison with a signed expression and unsigned value
> is converted to unsigned value, reason for minus value is interpreted
> as a big unsigned value. For this case the "for" loop
> is going into unexpected behavior.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109
> Signed-off-by: Marius Predut 
> ---
>  src/mesa/tnl_dd/t_dd_dmatmp.h | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
> index 7be3954..88ecc78 100644
> --- a/src/mesa/tnl_dd/t_dd_dmatmp.h
> +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
> @@ -627,6 +627,8 @@ static void TAG(render_quads_verts)( struct gl_context 
> *ctx,
>LOCAL_VARS;
>GLuint j;
>  
> +  if(count%4 != 0) return;
> +

Seems to me that does a bit more than just fixing the crash. I would
think

if (count < 4)

would fix the crash only. (And then quite possibly you won't need the
next hunk?) But I have no idea whether the side effect is desired.

(Actually, what if count is 0? Or is that impossible?)

eirik

>INIT(GL_TRIANGLES);
>  
>for (j = start; j < count-3; j += 4) {
> @@ -1248,7 +1250,7 @@ static GLboolean TAG(validate_render)( struct 
> gl_context *ctx,
>   ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS();
>}
>else {
> - ok = HAVE_TRIANGLES; /* flatshading is ok. */
> + ok = HAVE_TRIANGLES && (count%4 == 0); /* flatshading is ok. */
>}
>break;
>default:
> -- 
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Making ext_framebuffer_multisample-accuracy run on non-i965 drivers

2015-09-09 Thread Ilia Mirkin

Both st/mesa and the NVIDIA fail framebuffer completeness on this
command sequence:

270 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples
= 0, internalformat = GL_RGBA, width = 256, height = 256)
272 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment
= GL_COLOR_ATTACHMENT0, renderbuffertarget = GL_RENDERBUFFER,
renderbuffer = 1)
273 glBindRenderbuffer(target = GL_RENDERBUFFER, renderbuffer = 10)
274 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples
= 0, internalformat = GL_STENCIL_INDEX8, width = 256, height = 256)
275 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment
= GL_STENCIL_ATTACHMENT, renderbuffertarget = GL_RENDERBUFFER,
renderbuffer = 10)
276 glBindRenderbuffer(target = GL_RENDERBUFFER, renderbuffer = 9)
277 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples
= 0, internalformat = GL_DEPTH_COMPONENT24, width = 256, height = 256)
278 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment
= GL_DEPTH_ATTACHMENT, renderbuffertarget = GL_RENDERBUFFER,
renderbuffer = 9)
279 glCheckFramebufferStatus(target = GL_DRAW_FRAMEBUFFER) =
GL_FRAMEBUFFER_UNSUPPORTED

This is because (in the case of st/mesa), the depth and stencil
backing textures are different, which is not something that the
gallium API allows (nor does NVIDIA hardware handle).

For textures this is handled by merging attachment points when the
textures are identical, but this is harder to do with renderbuffers
since they're allocated implicitly, and can be attached/detached
individually.

I'm not sure that there's a clean way to fix this. I think that the
piglit should be fixed instead to not do this. However I know these
sorts of suggestions are often frowned upon, so I'm asking here.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] i915: use EmitNoIndirectSampler

2015-09-09 Thread Ian Romanick

Reviewed-by: Ian Romanick 

On 06/29/2015 12:35 AM, Tapani Pälli wrote:
> Signed-off-by: Tapani Pälli 
> ---
>  src/mesa/drivers/dri/i915/i915_context.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i915/i915_context.c 
> b/src/mesa/drivers/dri/i915/i915_context.c
> index 42ea54e..57b033c 100644
> --- a/src/mesa/drivers/dri/i915/i915_context.c
> +++ b/src/mesa/drivers/dri/i915/i915_context.c
> @@ -255,6 +255,8 @@ i915CreateContext(int api,
>  * FINISHME: vertex shaders?
>  */
> ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = true;
> +   
> ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectSampler =
> +  true;
>  
> struct gl_shader_compiler_options *const fs_options =
>& ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT];
> @@ -266,6 +268,7 @@ i915CreateContext(int api,
> fs_options->EmitNoIndirectOutput = true;
> fs_options->EmitNoIndirectUniform = true;
> fs_options->EmitNoIndirectTemp = true;
> +   fs_options->EmitNoIndirectSampler = true;
>  
> ctx->Const.MaxDrawBuffers = 1;
> ctx->Const.QueryCounterBits.SamplesPassed = 0;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-dev, 2/5] i965: Run vector splitting for CS in brw_link_shader

2015-09-09 Thread Kristian Høgsberg

Jordan Justen  writes:

> Signed-off-by: Jordan Justen 

I'd change the subject to something like

  i965: Teach is_scalar_shader_stage() about compute shaders

since that's the change in the patch and it affects many other
lowering/optimization  decisions.

With that,

Reviewed-by: Kristian Høgsberg 

> ---
> src/mesa/drivers/dri/i965/brw_shader.cpp | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 5653d6b..309f495 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -185,6 +185,7 @@ is_scalar_shader_stage(struct brw_context *brw, int stage)
>  {
> switch (stage) {
> case MESA_SHADER_FRAGMENT:
> +   case MESA_SHADER_COMPUTE:
>return true;
> case MESA_SHADER_VERTEX:
>return brw->intelScreen->compiler->scalar_vs;
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] meta: Always bind the texture

2015-09-09 Thread Ian Romanick

From: Ian Romanick 

We may have been called from glGenerateTextureMipmap with CurrentUnit
still set to 0, so we don't know when we can skip binding the texture.
Assume that _mesa_BindTexture will be fast if we're rebinding the same
texture.

Signed-off-by: Ian Romanick 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91847
Cc: "11.0" 
---
This patch seems to resolve the problems seen in the apitrace attached
to the bug.  I'm working on a piglit test.

 src/mesa/drivers/common/meta_generate_mipmap.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c 
b/src/mesa/drivers/common/meta_generate_mipmap.c
index 0655f05..6731478 100644
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -202,8 +202,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum 
target,
samplerSave = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
   ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;
 
-   if (currentTexUnitSave != 0)
-  _mesa_BindTexture(target, texObj->Name);
+   /* We may have been called from glGenerateTextureMipmap with CurrentUnit
+* still set to 0, so we don't know when we can skip binding the texture.
+* Assume that _mesa_BindTexture will be fast if we're rebinding the same
+* texture.
+*/
+   _mesa_BindTexture(target, texObj->Name);
 
if (!mipmap->Sampler) {
   _mesa_GenSamplers(1, &mipmap->Sampler);
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Making ext_framebuffer_multisample-accuracy run on non-i965 drivers

2015-09-09 Thread Ilia Mirkin

Aha, apparently there's a 'depthstencil' option that uses a combined
RB. Nevermind! Didn't realize that was the case.

On Wed, Sep 9, 2015 at 3:24 PM, Ilia Mirkin  wrote:
> Both st/mesa and the NVIDIA fail framebuffer completeness on this
> command sequence:
>
> 270 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples
> = 0, internalformat = GL_RGBA, width = 256, height = 256)
> 272 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment
> = GL_COLOR_ATTACHMENT0, renderbuffertarget = GL_RENDERBUFFER,
> renderbuffer = 1)
> 273 glBindRenderbuffer(target = GL_RENDERBUFFER, renderbuffer = 10)
> 274 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples
> = 0, internalformat = GL_STENCIL_INDEX8, width = 256, height = 256)
> 275 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment
> = GL_STENCIL_ATTACHMENT, renderbuffertarget = GL_RENDERBUFFER,
> renderbuffer = 10)
> 276 glBindRenderbuffer(target = GL_RENDERBUFFER, renderbuffer = 9)
> 277 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples
> = 0, internalformat = GL_DEPTH_COMPONENT24, width = 256, height = 256)
> 278 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment
> = GL_DEPTH_ATTACHMENT, renderbuffertarget = GL_RENDERBUFFER,
> renderbuffer = 9)
> 279 glCheckFramebufferStatus(target = GL_DRAW_FRAMEBUFFER) =
> GL_FRAMEBUFFER_UNSUPPORTED
>
> This is because (in the case of st/mesa), the depth and stencil
> backing textures are different, which is not something that the
> gallium API allows (nor does NVIDIA hardware handle).
>
> For textures this is handled by merging attachment points when the
> textures are identical, but this is harder to do with renderbuffers
> since they're allocated implicitly, and can be attached/detached
> individually.
>
> I'm not sure that there's a clean way to fix this. I think that the
> piglit should be fixed instead to not do this. However I know these
> sorts of suggestions are often frowned upon, so I'm asking here.
>
>   -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

1 2 >

1 - 100 of 144 matches

Mail list logo