[Mesa-dev] [PATCH] nvc0: keep track of cb bindings per buffer, use for upload settings
CB updates to bound buffers need to go through the CB_DATA endpoints, otherwise the shader may not notice that the updates happened. Furthermore, these updates have to go in to the same address as the bound buffer, otherwise, again, the shader may not notice updates. So we keep track of all the places where a constbuf is bound, and iterate over all of them when updating data. If a binding is found that encompasses the region to be updated, then we use the settings of that binding for the upload. Otherwise we upload as a regular data update. This fixes piglit 'arb_uniform_buffer_object-rendering offset' as well as blurriness in Witcher2. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91890 Signed-off-by: Ilia Mirkin Cc: "11.0" --- src/gallium/drivers/nouveau/nouveau_buffer.c | 4 +- src/gallium/drivers/nouveau/nouveau_buffer.h | 2 + src/gallium/drivers/nouveau/nouveau_context.h | 5 ++- src/gallium/drivers/nouveau/nvc0/nvc0_context.h| 8 ++-- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 2 + .../drivers/nouveau/nvc0/nvc0_state_validate.c | 3 +- src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 46 -- 7 files changed, 58 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 912b778..4937dae 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx, nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain, tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size); else - if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb) - nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0, + if (nv->push_cb && can_cb) + nv->push_cb(nv, buf, base, size / 4, (const uint32_t *)data); else nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data); diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h index 7e6a6cc..d45bf7a 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.h +++ b/src/gallium/drivers/nouveau/nouveau_buffer.h @@ -41,6 +41,8 @@ struct nv04_resource { uint8_t status; uint8_t domain; + uint16_t cb_bindings[6]; /* per-shader per-slot bindings */ + struct nouveau_fence *fence; struct nouveau_fence *fence_wr; diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h index 24deb7e..decb271 100644 --- a/src/gallium/drivers/nouveau/nouveau_context.h +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -6,6 +6,8 @@ #define NOUVEAU_MAX_SCRATCH_BUFS 4 +struct nv04_resource; + struct nouveau_context { struct pipe_context pipe; struct nouveau_screen *screen; @@ -23,8 +25,7 @@ struct nouveau_context { unsigned, const void *); /* base, size refer to the whole constant buffer */ void (*push_cb)(struct nouveau_context *, - struct nouveau_bo *, unsigned domain, - unsigned base, unsigned size, + struct nv04_resource *, unsigned offset, unsigned words, const uint32_t *); /* @return: @ref reduced by nr of references found in context */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 6ed79cf..30bee3a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv, struct nouveau_bo *dst, unsigned offset, unsigned domain, unsigned size, const void *data); void -nvc0_cb_push(struct nouveau_context *, - struct nouveau_bo *bo, unsigned domain, - unsigned base, unsigned size, - unsigned offset, unsigned words, const uint32_t *data); +nvc0_cb_bo_push(struct nouveau_context *, +struct nouveau_bo *bo, unsigned domain, +unsigned base, unsigned size, +unsigned offset, unsigned words, const uint32_t *data); /* nvc0_vbo.c */ void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index ee29912..c5bfd03 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, } nvc0->constbuf_dirty[s] |= 1 << i; + if (nvc0->constbuf[s][i].u.buf) + nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i); pipe_resource_reference(&nvc0->constbuf[s][i].
Re: [Mesa-dev] [PATCH 1/3] gallium/radeon: add separate stencil level dirty flags
On 07.09.2015 07:17, Marek Olšák wrote: > From: Marek Olšák > > We will only do depth-only or stencil-only decompress blits, whichever is > needed by textures, instead of always doing both. > --- > src/gallium/drivers/r600/evergreen_state.c| 4 ++-- > src/gallium/drivers/r600/r600_state_common.c | 3 +++ > src/gallium/drivers/radeon/r600_pipe_common.h | 1 + > src/gallium/drivers/radeonsi/cik_sdma.c | 4 ++-- > src/gallium/drivers/radeonsi/si_dma.c | 4 ++-- > src/gallium/drivers/radeonsi/si_state_draw.c | 3 +++ > 6 files changed, 13 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index 6f4cb55..8ecc498 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -3372,11 +3372,11 @@ static void evergreen_dma_copy(struct pipe_context > *ctx, > } > > if (src->format != dst->format || src_box->depth > 1 || > - rdst->dirty_level_mask != 0) { > + (rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << > dst_level)) { > goto fallback; > } > > - if (rsrc->dirty_level_mask) { > + if ((rsrc->dirty_level_mask | rsrc->stencil_dirty_level_mask) & (1 << > src_level)) { > ctx->flush_resource(ctx, src); > } AFAICT ctx->flush_resource only decompresses colour resources, so is the second change really necessary? Same in si_dma_copy(). With that fixed, or if it's correct as is, Reviewed-by: Michel Dänzer -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] radeonsi: only do depth-only or stencil-only in-place decompression
On 07.09.2015 07:17, Marek Olšák wrote: > From: Marek Olšák > > instead of always doing both. > Usually, only depth is needed, so stencil decompression is useless. [...] > @@ -247,6 +256,7 @@ void si_flush_depth_textures(struct si_context *sctx, > assert(tex->is_depth && !tex->is_flushing_texture); > > si_blit_decompress_depth_in_place(sctx, tex, > + ((struct > si_sampler_view*)view)->is_stencil_sampler, This cast is a bit ugly, please use a separate local variable of type struct si_sampler_view* for this. With that fixed, Reviewed-by: Michel Dänzer Correspondingly for patch 3. -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB
On Thu, Sep 03, 2015 at 11:08:11AM -0700, Ian Romanick wrote: > On 09/03/2015 09:05 AM, Chris Wilson wrote: > > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5 > > and specifies a generic unsized GL_RGB internal format, match that to a > > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware. > > > > Noticed while playing with mesa-demos/teximage: > > > > TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec > > TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec > > GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec > > > > becomes > > > > TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec > > TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec > > GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec > > > > on a puny Baytrail which is still far from what it is capable of. The > > reason for the disparity is that the teximage demo uses a busy texture > > which is performs an accelerated pixel conversion from the user's B5G6R5 > > into the native X8B8G8R8. After the patch, no conversion is required > > allowing use of the blitter and memcpy fast paths. > > > > Signed-off-by: Chris Wilson > > --- > > src/mesa/main/texformat.c | 2 ++ > > 1 file changed, 2 insertions(+) > > > > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c > > index fd9f335..866c7b3 100644 > > --- a/src/mesa/main/texformat.c > > +++ b/src/mesa/main/texformat.c > > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum > > target, > > case GL_RGB: > >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) { > > RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM); > > + } else if (type == GL_UNSIGNED_SHORT_5_6_5) { > > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM); > > Oh man... I hate this function. It's such a big pile of hacks. I think > this change is good for now, but I think you're other idea is in the > right direction. This function really should check, "If internalFormat > is a generic base format that matches , is there supported > format that matches exactly?" An issue that cropped in testing was glGenerateMipmap(). As this switches to using B5G6R5 internally we also use that format for storing the intermediate layers when generating the mipmap (thus incurring quantisation errors at each step). What I have in mind to remedy this is to convert the texture to floating point, generate the mipmap, then convert back to the internal format. (However, I expect this also to change test results!) Does that seem reasonable? "The internal formats of the derived mipmap images all match those of the levelbase image. The contents of the derived images are computed by repeated, filtered reduction of the levelbase+1 image. For one- and two-dimensional array and cube map array textures, each layer is filtered independently." Does not seem to exclude the use of resampling. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB
On Thu, Sep 3, 2015 at 6:05 PM, Chris Wilson wrote: > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5 > and specifies a generic unsized GL_RGB internal format, match that to a > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware. > > Noticed while playing with mesa-demos/teximage: > > TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec > TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec > GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec > > becomes > > TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec > TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec > GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec > > on a puny Baytrail which is still far from what it is capable of. The > reason for the disparity is that the teximage demo uses a busy texture > which is performs an accelerated pixel conversion from the user's B5G6R5 > into the native X8B8G8R8. After the patch, no conversion is required > allowing use of the blitter and memcpy fast paths. > > Signed-off-by: Chris Wilson > --- > src/mesa/main/texformat.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c > index fd9f335..866c7b3 100644 > --- a/src/mesa/main/texformat.c > +++ b/src/mesa/main/texformat.c > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum > target, > case GL_RGB: >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) { > RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM); > + } else if (type == GL_UNSIGNED_SHORT_5_6_5) { > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM); Shouldn't this be MESA_FORMAT_R5G6B5_UNORM? AFAICT, the reason for using BGR above, is the _REV suffix on the type... ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB
On Wed, Sep 09, 2015 at 11:11:59AM +0200, Erik Faye-Lund wrote: > On Thu, Sep 3, 2015 at 6:05 PM, Chris Wilson wrote: > > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5 > > and specifies a generic unsized GL_RGB internal format, match that to a > > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware. > > > > Noticed while playing with mesa-demos/teximage: > > > > TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec > > TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec > > GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec > > > > becomes > > > > TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec > > TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec > > GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec > > > > on a puny Baytrail which is still far from what it is capable of. The > > reason for the disparity is that the teximage demo uses a busy texture > > which is performs an accelerated pixel conversion from the user's B5G6R5 > > into the native X8B8G8R8. After the patch, no conversion is required > > allowing use of the blitter and memcpy fast paths. > > > > Signed-off-by: Chris Wilson > > --- > > src/mesa/main/texformat.c | 2 ++ > > 1 file changed, 2 insertions(+) > > > > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c > > index fd9f335..866c7b3 100644 > > --- a/src/mesa/main/texformat.c > > +++ b/src/mesa/main/texformat.c > > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum > > target, > > case GL_RGB: > >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) { > > RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM); > > + } else if (type == GL_UNSIGNED_SHORT_5_6_5) { > > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM); > > Shouldn't this be MESA_FORMAT_R5G6B5_UNORM? AFAICT, the reason for > using BGR above, is the _REV suffix on the type... No, it's the first line that's "wrong" since the B10G10R10A2 matches GL_BGRA + GL_UNSIGNED_INT_2_10_10_10_REV, GL_RGB implies that the incoming data only has 3 channels (no alpha at all). i965 know how to do B5G6R5 and not R5G6B5, but for completeness we could also add RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM); -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB
On Wed, Sep 9, 2015 at 11:25 AM, Chris Wilson wrote: > On Wed, Sep 09, 2015 at 11:11:59AM +0200, Erik Faye-Lund wrote: >> On Thu, Sep 3, 2015 at 6:05 PM, Chris Wilson >> wrote: >> > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5 >> > and specifies a generic unsized GL_RGB internal format, match that to a >> > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware. >> > >> > Noticed while playing with mesa-demos/teximage: >> > >> > TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec >> > TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec >> > GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec >> > >> > becomes >> > >> > TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec >> > TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec >> > GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec >> > >> > on a puny Baytrail which is still far from what it is capable of. The >> > reason for the disparity is that the teximage demo uses a busy texture >> > which is performs an accelerated pixel conversion from the user's B5G6R5 >> > into the native X8B8G8R8. After the patch, no conversion is required >> > allowing use of the blitter and memcpy fast paths. >> > >> > Signed-off-by: Chris Wilson >> > --- >> > src/mesa/main/texformat.c | 2 ++ >> > 1 file changed, 2 insertions(+) >> > >> > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c >> > index fd9f335..866c7b3 100644 >> > --- a/src/mesa/main/texformat.c >> > +++ b/src/mesa/main/texformat.c >> > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum >> > target, >> > case GL_RGB: >> >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) { >> > RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM); >> > + } else if (type == GL_UNSIGNED_SHORT_5_6_5) { >> > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM); >> >> Shouldn't this be MESA_FORMAT_R5G6B5_UNORM? AFAICT, the reason for >> using BGR above, is the _REV suffix on the type... > > No, it's the first line that's "wrong" since the B10G10R10A2 matches > GL_BGRA + GL_UNSIGNED_INT_2_10_10_10_REV, GL_RGB implies that the > incoming data only has 3 channels (no alpha at all). Good point about the alpha channel. It sounds like it should be changed to MESA_FORMAT_B10G10R10X2_UNORM instead. But according to src/mesa/main/format_pack.c's pack_ubyte_b10g10r10a2_unorm(), mesa's B10G10R10A2 corresponds to OpenGL's UNSIGNED_INT_2_10_10_10_REV. So I think it matches GL_RGBA, not GL_BGRA. The latter would mean another swizzle, AFAICT. > i965 know how to do B5G6R5 and not R5G6B5, but for completeness we could > also add RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM); I think intel-specific hacks (like preferring B5G6R5 over R5G6B5) shouldn't leak into _mesa_choose_tex_format(), so I think it'd be a good move to add "RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM);" before the latter return. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v4 (part2) 23/59] glsl: add std430 interface packing support to ssbo store and unsized array length
On 08/09/15 18:52, Jordan Justen wrote: > On 2015-08-05 01:30:20, Iago Toral Quiroga wrote: >> From: Samuel Iglesias Gonsalvez >> >> Signed-off-by: Samuel Iglesias Gonsalvez >> --- >> src/glsl/lower_ubo_reference.cpp | 65 >> ++-- >> 1 file changed, 49 insertions(+), 16 deletions(-) >> >> diff --git a/src/glsl/lower_ubo_reference.cpp >> b/src/glsl/lower_ubo_reference.cpp >> index 4f42abb..495ffcf 100644 >> --- a/src/glsl/lower_ubo_reference.cpp >> +++ b/src/glsl/lower_ubo_reference.cpp >> @@ -164,7 +164,7 @@ public: >> void emit_access(bool is_write, ir_dereference *deref, >> ir_variable *base_offset, unsigned int deref_offset, >> bool row_major, int matrix_columns, >> -unsigned write_mask); >> +bool is_std430, unsigned write_mask); >> >> ir_visitor_status visit_enter(class ir_expression *); >> void check_ssbo_unsized_array_length_expression(class ir_expression *); >> @@ -343,8 +343,15 @@ >> lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, >> const bool array_row_major = >> is_dereferenced_thing_row_major(deref_array); >> >> -array_stride = deref_array->type->std140_size(array_row_major); >> -array_stride = glsl_align(array_stride, 16); >> +/* The array type will give the correct interface packing >> + * information >> + */ >> +if (deref_array->array->type->interface_packing == >> GLSL_INTERFACE_PACKING_STD430) { >> + array_stride = >> deref_array->type->std430_size(array_row_major); >> +} else { >> + array_stride = >> deref_array->type->std140_size(array_row_major); >> + array_stride = glsl_align(array_stride, 16); >> +} >> } >> >> ir_rvalue *array_index = deref_array->array_index; >> @@ -380,7 +387,12 @@ >> lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, >> >> ralloc_free(field_deref); >> >> -unsigned field_align = >> type->std140_base_alignment(field_row_major); >> +unsigned field_align = 0; >> + >> +if (struct_type->interface_packing == >> GLSL_INTERFACE_PACKING_STD430) >> + field_align = type->std430_base_alignment(field_row_major); >> +else >> + field_align = type->std140_base_alignment(field_row_major); >> >> intra_struct_offset = glsl_align(intra_struct_offset, >> field_align); >> >> @@ -388,7 +400,10 @@ >> lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, >> deref_record->field) == 0) >> break; >> >> -intra_struct_offset += type->std140_size(field_row_major); >> +if (struct_type->interface_packing == >> GLSL_INTERFACE_PACKING_STD430) >> + intra_struct_offset += type->std430_size(field_row_major); >> +else >> + intra_struct_offset += type->std140_size(field_row_major); >> >> /* If the field just examined was itself a structure, apply rule >> * #9: >> @@ -463,7 +478,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue >> **rvalue) >> >> deref = new(mem_ctx) ir_dereference_variable(load_var); >> emit_access(false, deref, load_offset, const_offset, >> - row_major, matrix_columns, 0); >> + row_major, matrix_columns, false, 0); >> *rvalue = deref; >> >> progress = true; >> @@ -581,6 +596,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write, >> unsigned int deref_offset, >> bool row_major, >> int matrix_columns, >> + bool is_std430, >> unsigned write_mask) >> { >> if (deref->type->is_record()) { >> @@ -599,7 +615,7 @@ lower_ubo_reference_visitor::emit_access(bool is_write, >> >> emit_access(is_write, field_deref, base_offset, >> deref_offset + field_offset, >> - row_major, 1, >> + row_major, 1, is_std430, >> >> writemask_for_size(field_deref->type->vector_elements)); >> >> field_offset += field->type->std140_size(row_major); >> @@ -608,7 +624,8 @@ lower_ubo_reference_visitor::emit_access(bool is_write, >> } >> >> if (deref->type->is_array()) { >> - unsigned array_stride = >> + unsigned array_stride = is_std430 ? >> + deref->type->fields.array->std430_size(row_major) : >> glsl_align(deref->type->fields.array->std140_size(row_major), 16); >> >>for (unsigned i = 0; i < deref->type->length; i++) { >> @@ -618,7 +635,7 @@ lower_ubo_reference_visitor
Re: [Mesa-dev] [PATCH v4 (part2) 20/59] glsl: Add parser/compiler support for std430 interface packing qualifier
On 31/08/15 08:38, Samuel Iglesias Gonsálvez wrote: > > > On 29/08/15 02:27, Jordan Justen wrote: >> On 2015-08-05 01:30:17, Iago Toral Quiroga wrote: >>> From: Samuel Iglesias Gonsalvez >>> >>> This commit also adds functions to calculate std430 base alignment and sizes >>> >>> Signed-off-by: Samuel Iglesias Gonsalvez >>> --- >>> src/glsl/ast.h | 1 + >>> src/glsl/ast_to_hir.cpp | 20 +-- >>> src/glsl/ast_type.cpp| 1 + >>> src/glsl/glsl_parser.yy | 2 + >>> src/glsl/glsl_types.cpp | 117 >>> +++ >>> src/glsl/glsl_types.h| 15 - >>> src/glsl/link_uniform_blocks.cpp | 15 - >>> src/mesa/main/mtypes.h | 3 +- >>> 8 files changed, 165 insertions(+), 9 deletions(-) >>> >>> diff --git a/src/glsl/ast.h b/src/glsl/ast.h >>> index d8c6cea..ac3f80f 100644 >>> --- a/src/glsl/ast.h >>> +++ b/src/glsl/ast.h >>> @@ -491,6 +491,7 @@ struct ast_type_qualifier { >>> /** \name Layout qualifiers for GL_ARB_uniform_buffer_object */ >>> /** \{ */ >>> unsigned std140:1; >>> + unsigned std430:1; >>> unsigned shared:1; >>> unsigned packed:1; >>> unsigned column_major:1; >>> diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp >>> index 6e63d86..2dadb03 100644 >>> --- a/src/glsl/ast_to_hir.cpp >>> +++ b/src/glsl/ast_to_hir.cpp >>> @@ -2886,11 +2886,12 @@ apply_type_qualifier_to_variable(const struct >>> ast_type_qualifier *qual, >>> var->data.depth_layout = ir_depth_layout_none; >>> >>> if (qual->flags.q.std140 || >>> + qual->flags.q.std430 || >>> qual->flags.q.packed || >>> qual->flags.q.shared) { >>>_mesa_glsl_error(loc, state, >>> - "uniform block layout qualifiers std140, packed, >>> and " >>> - "shared can only be applied to uniform blocks, not " >>> + "uniform block layout qualifiers std140, std430, >>> packed, " >>> + "and shared can only be applied to uniform blocks, >>> not " >>> "members"); >>> } >>> >>> @@ -5655,12 +5656,14 @@ ast_process_structure_or_interface_block(exec_list >>> *instructions, >>> const struct ast_type_qualifier *const qual = >>> & decl_list->type->qualifier; >>> if (qual->flags.q.std140 || >>> + qual->flags.q.std430 || >>> qual->flags.q.packed || >>> qual->flags.q.shared) { >>> _mesa_glsl_error(&loc, state, >>> "uniform/shader storage block layout >>> qualifiers " >>> - "std140, packed, and shared can only be >>> applied " >>> - "to uniform/shader storage blocks, not >>> members"); >>> + "std140, std430, packed, and shared can only >>> be " >>> + "applied to uniform/shader storage blocks, >>> not " >>> + "members"); >>> } >>> >>> if (qual->flags.q.constant) { >>> @@ -5872,6 +5875,13 @@ ast_interface_block::hir(exec_list *instructions, >>> this->block_name); >>> } >>> >>> + if (!this->layout.flags.q.buffer && >>> + this->layout.flags.q.std430) { >>> + _mesa_glsl_error(&loc, state, >>> + "std430 storage block layout qualifier is supported >>> " >>> + "only for shader storage blocks"); >>> + } >>> + >>> /* The ast_interface_block has a list of ast_declarator_lists. We >>> * need to turn those into ir_variables with an association >>> * with this uniform block. >>> @@ -5881,6 +5891,8 @@ ast_interface_block::hir(exec_list *instructions, >>>packing = GLSL_INTERFACE_PACKING_SHARED; >>> } else if (this->layout.flags.q.packed) { >>>packing = GLSL_INTERFACE_PACKING_PACKED; >>> + } else if (this->layout.flags.q.std430) { >>> + packing = GLSL_INTERFACE_PACKING_STD430; >>> } else { >>>/* The default layout is std140. >>> */ >>> diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp >>> index a4671e2..6e69ba2 100644 >>> --- a/src/glsl/ast_type.cpp >>> +++ b/src/glsl/ast_type.cpp >>> @@ -123,6 +123,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, >>> ubo_layout_mask.flags.q.std140 = 1; >>> ubo_layout_mask.flags.q.packed = 1; >>> ubo_layout_mask.flags.q.shared = 1; >>> + ubo_layout_mask.flags.q.std430 = 1; >>> >>> ast_type_qualifier ubo_binding_mask; >>> ubo_binding_mask.flags.i = 0; >>> diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy >>> index 2b0c8bd..e1c0f3d 100644 >>> --- a/src/glsl/glsl_parser.yy >>> +++ b/src/glsl/glsl_parser.yy >>> @@ -1197,6 +1197,8 @@ layout_qualifier_id: >>> $$.flags.q.std140 = 1; >>> } else if (match_layout_
Re: [Mesa-dev] [PATCH v4 (part2) 24/59] glsl: a shader storage buffer must be smaller than the maximum size allowed
On 08/09/15 19:50, Jordan Justen wrote: > On 2015-08-05 01:30:21, Iago Toral Quiroga wrote: >> From: Samuel Iglesias Gonsalvez >> >> Otherwise, generate a link time error as per the >> ARB_shader_storage_buffer_object spec. >> >> Signed-off-by: Samuel Iglesias Gonsalvez >> --- >> src/glsl/glsl_types.cpp | 9 +++-- >> src/glsl/link_uniform_blocks.cpp | 17 + >> src/glsl/linker.cpp | 2 +- >> src/glsl/linker.h| 1 + >> 4 files changed, 26 insertions(+), 3 deletions(-) >> >> diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp >> index 5758029..1b8e2e4 100644 >> --- a/src/glsl/glsl_types.cpp >> +++ b/src/glsl/glsl_types.cpp >> @@ -1312,7 +1312,7 @@ glsl_type::std140_size(bool row_major) const >> * rounded up to the next multiple of the base alignment of the >> * structure. >> */ >> - if (this->is_record()) { >> + if (this->is_record() || this->is_interface()) { >>unsigned size = 0; >>unsigned max_align = 0; >> >> @@ -1328,7 +1328,12 @@ glsl_type::std140_size(bool row_major) const >> >> const struct glsl_type *field_type = this->fields.structure[i].type; >> unsigned align = field_type->std140_base_alignment(field_row_major); >> -size = glsl_align(size, align); >> + >> + /* Ignore unsized arrays when calculating size */ >> + if (field_type->is_unsized_array()) >> +continue; >> + >> + size = glsl_align(size, align); >> size += field_type->std140_size(field_row_major); >> >> max_align = MAX2(align, max_align); >> diff --git a/src/glsl/link_uniform_blocks.cpp >> b/src/glsl/link_uniform_blocks.cpp >> index c891d03..2cda78d 100644 >> --- a/src/glsl/link_uniform_blocks.cpp >> +++ b/src/glsl/link_uniform_blocks.cpp >> @@ -178,6 +178,7 @@ struct block { >> >> unsigned >> link_uniform_blocks(void *mem_ctx, >> +struct gl_context *ctx, >> struct gl_shader_program *prog, >> struct gl_shader **shader_list, >> unsigned num_shaders, >> @@ -299,6 +300,14 @@ link_uniform_blocks(void *mem_ctx, >> >> blocks[i].UniformBufferSize = parcel.buffer_size; >> >> +/* Check SSBO size is lower than maximum supported size for >> SSBO */ >> +if (b->is_shader_storage && >> +parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) { >> + linker_error(prog, "shader storage block `%s' has size %d > >> %d", >> + block_type->name, >> + parcel.buffer_size, >> + ctx->Const.MaxShaderStorageBlockSize); > > This error message (and below) seems confusing. What about "shader > storage block `%s' has size %d, which is larger than than the maximum > allowed (%d)"? > Yeah, it is more clear. I will do that change. > Reviewed-by: Jordan Justen Thanks for your reviews! Sam > >> +} >> blocks[i].NumUniforms = >> (unsigned)(ptrdiff_t)(&variables[parcel.index] - >> blocks[i].Uniforms); >> >> @@ -319,6 +328,14 @@ link_uniform_blocks(void *mem_ctx, >> >> blocks[i].UniformBufferSize = parcel.buffer_size; >> >> + /* Check SSBO size is lower than maximum supported size for SSBO */ >> + if (b->is_shader_storage && >> + parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) { >> +linker_error(prog, "shader storage block `%s' has size %d > %d", >> +block_type->name, >> +parcel.buffer_size, >> +ctx->Const.MaxShaderStorageBlockSize); >> + } >> blocks[i].NumUniforms = >> (unsigned)(ptrdiff_t)(&variables[parcel.index] - >> blocks[i].Uniforms); >> >> diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp >> index 9aff3ce..7ba685b 100644 >> --- a/src/glsl/linker.cpp >> +++ b/src/glsl/linker.cpp >> @@ -2023,7 +2023,7 @@ link_intrastage_shaders(void *mem_ctx, >> >> /* Link up uniform blocks defined within this stage. */ >> const unsigned num_uniform_blocks = >> - link_uniform_blocks(mem_ctx, prog, shader_list, num_shaders, >> + link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders, >>&uniform_blocks); >> if (!prog->LinkStatus) >>return NULL; >> diff --git a/src/glsl/linker.h b/src/glsl/linker.h >> index ce3dc32..ada5a1f 100644 >> --- a/src/glsl/linker.h >> +++ b/src/glsl/linker.h >> @@ -56,6 +56,7 @@ link_uniform_blocks_are_compatible(const gl_uniform_block >> *a, >> >> extern unsigned >> link_uniform_blocks(void *mem_ctx, >> +struct gl_context *ctx, >> struct gl_shader_program *prog, >> struct gl_shader **shader_list, >> unsigned num_shaders, >> -- >> 1.9.1 >> > ___
[Mesa-dev] [PATCH 0/9] Implement textureQueryLod in softpipe
The following patches do some preparatory refactoring in softpipe's sp_tex_sample.{c,h} before actually implementing the feature. There is also a patch that fixes softpipe's textureLod with bias. I caught this bug when finished the textureQueryLod implementation - one of four textureQueryLod tests were failing because of it. I sent patches adding the testcases with non-zero lod-bias value to piglit mailing list (http://lists.freedesktop.org/archives/piglit/2015-September/017018.html). Also, this is my first contribution to mesa, but I hope I didn't make any obvious mistakes. Thanks. Krzesimir Nowak (9): tgsi: Remove trailing backslash in comment softpipe: Fix textureLod with nonzero GL_TEXTURE_LOD_BIAS value. softpipe: Split compute_lambda_lod into two functions softpipe: Put mip_filter_func inside a struct softpipe: Split code getting a filter into separate function softpipe: Split 3D to 2D coords conversion into separate function. softpipe: Add functions for computing mipmap level tgsi: Add code for handling lodq opcode softpipe: Implement and enable textureQueryLod src/gallium/auxiliary/tgsi/tgsi_exec.c | 46 +++- src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 + src/gallium/drivers/softpipe/sp_screen.c | 2 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 338 +-- src/gallium/drivers/softpipe/sp_tex_sample.h | 32 ++- 5 files changed, 345 insertions(+), 83 deletions(-) -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/9] softpipe: Add functions for computing mipmap level
These functions will be used by textureQueryLod. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 100 +-- src/gallium/drivers/softpipe/sp_tex_sample.h | 7 ++ 2 files changed, 101 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index cdec984..6e639e0 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1937,6 +1937,38 @@ get_gather_component(const float lod_in[TGSI_QUAD_SIZE]) } static void +clamp_lod(const struct sp_sampler_view *sp_sview, + const struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float clamped[TGSI_QUAD_SIZE]) +{ + const float min_lod = sp_samp->base.min_lod; + const float max_lod = sp_samp->base.max_lod; + const float min_level = sp_sview->base.u.tex.first_level; + const float max_level = sp_sview->base.u.tex.last_level; + int i; + + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + float cl = lod[i]; + + cl = CLAMP(cl, min_lod, max_lod); + /* XXX: Is min_level ever different from 0? + */ + cl = CLAMP(cl, 0, max_level - min_level); + clamped[i] = cl; + } +} + +static void +mip_level_linear(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + clamp_lod(sp_sview, sp_samp, lod, level); +} + +static void mip_filter_linear(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, img_filter_func min_filter, @@ -1998,6 +2030,23 @@ mip_filter_linear(struct sp_sampler_view *sp_sview, } +static void +mip_level_nearest(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + const int first_level = sp_sview->base.u.tex.first_level; + int j; + + clamp_lod(sp_sview, sp_samp, lod, level); + for (j = 0; j < TGSI_QUAD_SIZE; j++) + /* TODO: It should rather be: + * level[j] = first_level + ceil(level[j] + 0.5F) - 1.0F; + */ + level[j] = first_level + (int)(level[j] + 0.5F); +} + /** * Compute nearest mipmap level from texcoords. * Then sample the texture level for four elements of a quad. @@ -2050,6 +2099,19 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview, static void +mip_level_none(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + int j; + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + level[j] = sp_sview->base.u.tex.first_level; + } +} + +static void mip_filter_none(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, img_filter_func min_filter, @@ -2088,6 +2150,15 @@ mip_filter_none(struct sp_sampler_view *sp_sview, static void +mip_level_none_no_filter_select(struct sp_sampler_view *sp_sview, +struct sp_sampler *sp_samp, +const float lod[TGSI_QUAD_SIZE], +float level[TGSI_QUAD_SIZE]) +{ + mip_level_none(sp_sview, sp_samp, lod, level); +} + +static void mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, img_filter_func min_filter, @@ -2339,6 +2410,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, } +static void +mip_level_linear_aniso(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + mip_level_linear(sp_sview, sp_samp, lod, level); +} + /** * Sample 2D texture using an anisotropic filter. */ @@ -2450,6 +2530,14 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview, } } +static void +mip_level_linear_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + mip_level_linear(sp_sview, sp_samp, lod, level); +} /** * Specialized version of mip_filter_linear with hard-wired calls to @@ -2515,12 +2603,12 @@ mip_filter_linear_2d_linear_repeat_POT( } } -static struct sp_mip mip_linear = {mip_filter_linear}; -static struct sp_mip mip_nearest = {mip_filter_nearest}; -static struct sp_mip mip_none = {mip_filter_none}; -static struct sp_mip mip_none_no_filter_select = {mip_filter_none_no_filter_select}; -static struct sp_mip mip_linear_aniso = {mip_filter_linear_aniso}; -static struct sp_mip mip_linear_2d_linear_repeat_
[Mesa-dev] [PATCH 1/9] tgsi: Remove trailing backslash in comment
It clearly is here by accident. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 75cd0d5..9544623 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2021,7 +2021,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach, /* * execute a texture instruction. * - * modifier is used to control the channel routing for the\ + * modifier is used to control the channel routing for the * instruction variants like proj, lod, and texture with lod bias. * sampler indicates which src register the sampler is contained in. */ -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/9] softpipe: Split compute_lambda_lod into two functions
textureQueryLod returns a vec2 with a mipmap information and a LOD. The latter needs to be not clamped. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 55 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 19188b0..38bdc93 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1855,24 +1855,23 @@ compute_lod(const struct pipe_sampler_state *sampler, } -/* Calculate level of detail for every fragment. +/* Calculate level of detail for every fragment. The computed value is not + * clamped into lod_min and lod_max. * \param lod_in per-fragment lod_bias or explicit_lod. * \param lod results per-fragment lod. */ static inline void -compute_lambda_lod(struct sp_sampler_view *sp_sview, - struct sp_sampler *sp_samp, - const float s[TGSI_QUAD_SIZE], - const float t[TGSI_QUAD_SIZE], - const float p[TGSI_QUAD_SIZE], - const float lod_in[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, - float lod[TGSI_QUAD_SIZE]) +compute_lambda_lod_not_clamped(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float lod_in[TGSI_QUAD_SIZE], + enum tgsi_sampler_control control, + float lod[TGSI_QUAD_SIZE]) { const struct pipe_sampler_state *sampler = &sp_samp->base; - float lod_bias = sampler->lod_bias; - float min_lod = sampler->min_lod; - float max_lod = sampler->max_lod; + const float lod_bias = sampler->lod_bias; float lambda; uint i; @@ -1881,24 +1880,23 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, /* XXX FIXME */ case tgsi_sampler_derivs_explicit: lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias; - lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod); + lod[0] = lod[1] = lod[2] = lod[3] = lambda; break; case tgsi_sampler_lod_bias: lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias; for (i = 0; i < TGSI_QUAD_SIZE; i++) { lod[i] = lambda + lod_in[i]; - lod[i] = CLAMP(lod[i], min_lod, max_lod); } break; case tgsi_sampler_lod_explicit: for (i = 0; i < TGSI_QUAD_SIZE; i++) { - lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod); + lod[i] = lod_in[i] + lod_bias; } break; case tgsi_sampler_lod_zero: case tgsi_sampler_gather: /* this is all static state in the sampler really need clamp here? */ - lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod); + lod[0] = lod[1] = lod[2] = lod[3] = lod_bias; break; default: assert(0); @@ -1906,6 +1904,31 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, } } +/* Calculate level of detail for every fragment. + * \param lod_in per-fragment lod_bias or explicit_lod. + * \param lod results per-fragment lod. + */ +static inline void +compute_lambda_lod(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float lod_in[TGSI_QUAD_SIZE], + enum tgsi_sampler_control control, + float lod[TGSI_QUAD_SIZE]) +{ + const struct pipe_sampler_state *sampler = &sp_samp->base; + const float min_lod = sampler->min_lod; + const float max_lod = sampler->max_lod; + int i; + + compute_lambda_lod_not_clamped(sp_sview, sp_samp, s, t, p, lod_in, control, lod); + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + lod[i] = CLAMP(lod[i], min_lod, max_lod); + } +} + static inline unsigned get_gather_component(const float lod_in[TGSI_QUAD_SIZE]) { -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 8/9] tgsi: Add code for handling lodq opcode
This introduces new vfunc in tgsi_sampler just for this opcode. I decided against extending get_samples vfunc to return the mipmap level and LOD - the function's prototype is already too scary and doing the sampling for textureQueryLod would be a waste of time. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 ++ src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 2 files changed, 54 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 9544623..054ad08 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2132,6 +2132,44 @@ exec_tex(struct tgsi_exec_machine *mach, } } +static void +exec_lodq(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint unit; + int dim; + int i; + union tgsi_exec_channel coords[4]; + const union tgsi_exec_channel *args[Elements(coords)]; + union tgsi_exec_channel r[2]; + + unit = fetch_sampler_unit(mach, inst, 1); + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL); + assert(dim <= Elements(coords)); + /* fetch coordinates */ + for (i = 0; i < dim; i++) { + FETCH(&coords[i], 0, TGSI_CHAN_X + i); + args[i] = &coords[i]; + } + for (i = dim; i < Elements(coords); i++) { + args[i] = &ZeroVec; + } + mach->Sampler->query_lod(mach->Sampler, unit, unit, +args[0]->f, +args[1]->f, +args[2]->f, +args[3]->f, +tgsi_sampler_lod_none, +r[0].f, +r[1].f); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } +} static void exec_txd(struct tgsi_exec_machine *mach, @@ -4378,6 +4416,12 @@ exec_instruction( exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); break; + case TGSI_OPCODE_LODQ: + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_lodq(mach, inst); + break; + case TGSI_OPCODE_UP2H: assert (0); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 5d56aab..556e0af 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -138,6 +138,16 @@ struct tgsi_sampler const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE], const int lod[TGSI_QUAD_SIZE], const int8_t offset[3], float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + void (*query_lod)(struct tgsi_sampler *tgsi_sampler, + const unsigned sview_index, + const unsigned sampler_index, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float c0[TGSI_QUAD_SIZE], + enum tgsi_sampler_control control, + float mipmap[TGSI_QUAD_SIZE], + float lod[TGSI_QUAD_SIZE]); }; #define TGSI_EXEC_NUM_TEMPS 4096 -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/9] softpipe: Split code getting a filter into separate function
This function will be later used by textureQueryLod. The img_filter_func are optional, because textureQueryLod will not need them. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 53 +++- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index cd4a659..0a3fc20 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2912,6 +2912,42 @@ get_img_filter(const struct sp_sampler_view *sp_sview, } } +/** + * Get mip filter, and optionally both img min filter and img mag filter + */ +static void +get_filters(struct sp_sampler_view *sp_sview, +struct sp_sampler *sp_samp, +enum tgsi_sampler_control control, +struct sp_mip **mip, +img_filter_func *min, +img_filter_func *mag) +{ + assert(mip); + if (control == tgsi_sampler_gather) { + *mip = &mip_nearest; + if (min) + *min = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true); + } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { + *mip = &mip_linear_2d_linear_repeat_POT; + } + else { + *mip = &sp_samp->mip; + if (min || mag) { + img_filter_func tmp_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false); + if (min) +*min = tmp_filter; + if (mag) { +if (sp_samp->min_mag_equal) { + *mag = tmp_filter; +} +else { + *mag = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false); +} + } + } + } +} static void sample_mip(struct sp_sampler_view *sp_sview, @@ -2928,22 +2964,7 @@ sample_mip(struct sp_sampler_view *sp_sview, img_filter_func min_img_filter = NULL; img_filter_func mag_img_filter = NULL; - if (filt_args->control == tgsi_sampler_gather) { - mip = &mip_nearest; - min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true); - } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { - mip = &mip_linear_2d_linear_repeat_POT; - } - else { - mip = &sp_samp->mip; - min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false); - if (sp_samp->min_mag_equal) { - mag_img_filter = min_img_filter; - } - else { - mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false); - } - } + get_filters(sp_sview, sp_samp, filt_args->control, &mip, &min_img_filter, &mag_img_filter); mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter, s, t, p, c0, lod, filt_args, rgba); -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/9] softpipe: Fix textureLod with nonzero GL_TEXTURE_LOD_BIAS value.
The level-of-detail bias wasn't simply added in the explicit LOD case. This case seems to be tested only in piglit's fs-texturequerylod-nearest-biased test, which is currently skipped, as softpipe does not support textureQueryLod at the moment. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 565fca6..19188b0 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1892,7 +1892,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, break; case tgsi_sampler_lod_explicit: for (i = 0; i < TGSI_QUAD_SIZE; i++) { - lod[i] = CLAMP(lod_in[i], min_lod, max_lod); + lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod); } break; case tgsi_sampler_lod_zero: -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 9/9] softpipe: Implement and enable textureQueryLod
Passes the shader piglit tests and introduces no regressions. This commit finally makes use of the refactoring in previous commits. --- src/gallium/drivers/softpipe/sp_screen.c | 2 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 47 +++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 0bfd9c3..7ca8a67 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -193,9 +193,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return 4; case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_TEXTURE_QUERY_LOD: return 1; case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: - case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: return 0; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 6e639e0..499c8f9 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -3566,6 +3566,51 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba); } +static void +sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler, + const unsigned sview_index, + const unsigned sampler_index, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float c0[TGSI_QUAD_SIZE], + enum tgsi_sampler_control control, + float mipmap[TGSI_QUAD_SIZE], + float lod[TGSI_QUAD_SIZE]) +{ + static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 }; + + struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler *)tgsi_sampler; + struct sp_sampler_view *sp_sview; + struct sp_sampler *sp_samp; + struct sp_mip *mip; + int i; + float cs[TGSI_QUAD_SIZE]; + float ct[TGSI_QUAD_SIZE]; + float cp[TGSI_QUAD_SIZE]; + + assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS); + assert(sampler_index < PIPE_MAX_SAMPLERS); + assert(sp_tgsi_samp->sp_sampler[sampler_index]); + + sp_sview = &sp_tgsi_samp->sp_sview[sview_index]; + sp_samp = sp_tgsi_samp->sp_sampler[sampler_index]; + /* always have a view here but texture is NULL if no sampler view was set. */ + if (!sp_sview->base.texture) { + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + mipmap[i] = 0.0f; + lod[i] = 0.0f; + } + return; + } + + sp_sview->convert_coords(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp); + + compute_lambda_lod_not_clamped(sp_sview, sp_samp, + cs, ct, cp, lod_in, control, lod); + get_filters(sp_sview, sp_samp, control, &mip, NULL, NULL); + mip->level(sp_sview, sp_samp, lod, mipmap); +} static void sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler, @@ -3602,7 +3647,7 @@ sp_create_tgsi_sampler(void) samp->base.get_dims = sp_tgsi_get_dims; samp->base.get_samples = sp_tgsi_get_samples; samp->base.get_texel = sp_tgsi_get_texel; + samp->base.query_lod = sp_tgsi_query_lod; return samp; } - -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/9] softpipe: Split 3D to 2D coords conversion into separate function.
This is to avoid tying the conversion to sampling - textureQueryLod will need to do the conversion too, but it does not do any sampling. So instead of a "sample" vfunc, there is a "convert" vfunc. The drawback of this approach is that a "noop" convert copies 3 arrays of floats. Would be nice to avoid it in some clean way. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 73 +--- src/gallium/drivers/softpipe/sp_tex_sample.h | 20 2 files changed, 54 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 0a3fc20..cdec984 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2981,29 +2981,36 @@ sample_mip(struct sp_sampler_view *sp_sview, } +static void +convert_noop(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float c0[TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE]) +{ + const size_t len = sizeof(float) * TGSI_QUAD_SIZE; + + memcpy(, s, len); + memcpy(, t, len); + memcpy(, p, len); +} -/** - * Use 3D texcoords to choose a cube face, then sample the 2D cube faces. - * Put face info into the sampler faces[] array. - */ static void -sample_cube(struct sp_sampler_view *sp_sview, -struct sp_sampler *sp_samp, -const float s[TGSI_QUAD_SIZE], -const float t[TGSI_QUAD_SIZE], -const float p[TGSI_QUAD_SIZE], -const float c0[TGSI_QUAD_SIZE], -const float c1[TGSI_QUAD_SIZE], -const struct filter_args *filt_args, -float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) +convert_cube(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float c0[TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE]) { unsigned j; - float [4], [4]; - - /* Not actually used, but the intermediate steps that do the -* dereferencing don't know it. -*/ - static float [4] = { 0, 0, 0, 0 }; [0] = c0[0]; [1] = c0[1]; @@ -3071,8 +3078,6 @@ sample_cube(struct sp_sampler_view *sp_sview, } } } - - sample_mip(sp_sview, sp_samp, , , , c0, c1, filt_args, rgba); } @@ -3393,9 +3398,9 @@ softpipe_create_sampler_view(struct pipe_context *pipe, if (view->target == PIPE_TEXTURE_CUBE || view->target == PIPE_TEXTURE_CUBE_ARRAY) - sview->get_samples = sample_cube; + sview->convert_coords = convert_cube; else { - sview->get_samples = sample_mip; + sview->convert_coords = convert_noop; } sview->pot2d = spr->pot && (view->target == PIPE_TEXTURE_2D || @@ -3440,13 +3445,22 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { - struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler; + struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler *)tgsi_sampler; + struct sp_sampler_view *sp_sview; + struct sp_sampler *sp_samp; struct filter_args filt_args; + float cs[TGSI_QUAD_SIZE]; + float ct[TGSI_QUAD_SIZE]; + float cp[TGSI_QUAD_SIZE]; + assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS); assert(sampler_index < PIPE_MAX_SAMPLERS); - assert(sp_samp->sp_sampler[sampler_index]); + assert(sp_tgsi_samp->sp_sampler[sampler_index]); + + sp_sview = &sp_tgsi_samp->sp_sview[sview_index]; + sp_samp = sp_tgsi_samp->sp_sampler[sampler_index]; /* always have a view here but texture is NULL if no sampler view was set. */ - if (!sp_samp->sp_sview[sview_index].base.texture) { + if (!sp_sview->base.texture) { int i, j; for (j = 0; j < TGSI_NUM_CHANNELS; j++) { for (i = 0; i < TGSI_QUAD_SIZE; i++) { @@ -3456,11 +3470,12 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, return; } + sp_sview->convert_coords(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp); + filt_args.control = control; filt_args.offset = offset; - sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index], - sp_samp->sp_sampler[sampler_index], - s, t, p, c0, lod, &filt_args, rgba); + + sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba); }
[Mesa-dev] [PATCH 4/9] softpipe: Put mip_filter_func inside a struct
Putting this function pointer into a struct enables grouping of several related functions in a single place. For now it is just a single function, but the struct will be later extended with a mip_level_func for returning mip level. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 28 +--- src/gallium/drivers/softpipe/sp_tex_sample.h | 5 - 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 38bdc93..cd4a659 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2515,6 +2515,12 @@ mip_filter_linear_2d_linear_repeat_POT( } } +static struct sp_mip mip_linear = {mip_filter_linear}; +static struct sp_mip mip_nearest = {mip_filter_nearest}; +static struct sp_mip mip_none = {mip_filter_none}; +static struct sp_mip mip_none_no_filter_select = {mip_filter_none_no_filter_select}; +static struct sp_mip mip_linear_aniso = {mip_filter_linear_aniso}; +static struct sp_mip mip_linear_2d_linear_repeat_POT = {mip_filter_linear_2d_linear_repeat_POT}; /** * Do shadow/depth comparisons. @@ -2918,18 +2924,18 @@ sample_mip(struct sp_sampler_view *sp_sview, const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { - mip_filter_func mip_filter; + struct sp_mip *mip = NULL; img_filter_func min_img_filter = NULL; img_filter_func mag_img_filter = NULL; if (filt_args->control == tgsi_sampler_gather) { - mip_filter = mip_filter_nearest; + mip = &mip_nearest; min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true); } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { - mip_filter = mip_filter_linear_2d_linear_repeat_POT; + mip = &mip_linear_2d_linear_repeat_POT; } else { - mip_filter = sp_samp->mip_filter; + mip = &sp_samp->mip; min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false); if (sp_samp->min_mag_equal) { mag_img_filter = min_img_filter; @@ -2939,8 +2945,8 @@ sample_mip(struct sp_sampler_view *sp_sview, } } - mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter, - s, t, p, c0, lod, filt_args, rgba); + mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter, + s, t, p, c0, lod, filt_args, rgba); if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) { sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba); @@ -3239,13 +3245,13 @@ softpipe_create_sampler_state(struct pipe_context *pipe, switch (sampler->min_mip_filter) { case PIPE_TEX_MIPFILTER_NONE: if (sampler->min_img_filter == sampler->mag_img_filter) - samp->mip_filter = mip_filter_none_no_filter_select; + samp->mip = mip_none_no_filter_select; else - samp->mip_filter = mip_filter_none; + samp->mip = mip_none; break; case PIPE_TEX_MIPFILTER_NEAREST: - samp->mip_filter = mip_filter_nearest; + samp->mip = mip_nearest; break; case PIPE_TEX_MIPFILTER_LINEAR: @@ -3257,11 +3263,11 @@ softpipe_create_sampler_state(struct pipe_context *pipe, sampler->max_anisotropy <= 1) { samp->min_mag_equal_repeat_linear = TRUE; } - samp->mip_filter = mip_filter_linear; + samp->mip = mip_linear; /* Anisotropic filtering extension. */ if (sampler->max_anisotropy > 1) { - samp->mip_filter = mip_filter_linear_aniso; + samp->mip = mip_linear_aniso; /* Override min_img_filter: * min_img_filter needs to be set to NEAREST since we need to access diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 7d1aafc..78541e1 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -128,6 +128,9 @@ struct sp_sampler_view }; +struct sp_mip { + mip_filter_func filter; +}; struct sp_sampler { struct pipe_sampler_state base; @@ -144,7 +147,7 @@ struct sp_sampler { wrap_linear_func linear_texcoord_t; wrap_linear_func linear_texcoord_p; - mip_filter_func mip_filter; + struct sp_mip mip; }; -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Match MESA_FORMAT_B5G6R5 for a shallow pixel format of GL_RGB
On Wed, Sep 09, 2015 at 12:09:40PM +0200, Erik Faye-Lund wrote: > On Wed, Sep 9, 2015 at 11:25 AM, Chris Wilson > wrote: > > On Wed, Sep 09, 2015 at 11:11:59AM +0200, Erik Faye-Lund wrote: > >> On Thu, Sep 3, 2015 at 6:05 PM, Chris Wilson > >> wrote: > >> > If the user supplies a pixel format of GL_RGB + GL_UNSIGNED_SHORT_5_6_5 > >> > and specifies a generic unsized GL_RGB internal format, match that to a > >> > texture format of MESA_FORMAT_B5G6R5 if supported by the hardware. > >> > > >> > Noticed while playing with mesa-demos/teximage: > >> > > >> > TexImage(RGB/565 256 x 256): 79.8 images/sec, 10.0 MB/sec > >> > TexSubImage(RGB/565 256 x 256): 3804.9 images/sec, 475.6 MB/sec > >> > GetTexImage(RGB/565 256 x 256): 99.5 images/sec, 12.4 MB/sec > >> > > >> > becomes > >> > > >> > TexImage(RGB/565 256 x 256): 3439.1 images/sec, 429.9 MB/sec > >> > TexSubImage(RGB/565 256 x 256): 3744.1 images/sec, 468.0 MB/sec > >> > GetTexImage(RGB/565 256 x 256): 4713.5 images/sec, 589.2 MB/sec > >> > > >> > on a puny Baytrail which is still far from what it is capable of. The > >> > reason for the disparity is that the teximage demo uses a busy texture > >> > which is performs an accelerated pixel conversion from the user's B5G6R5 > >> > into the native X8B8G8R8. After the patch, no conversion is required > >> > allowing use of the blitter and memcpy fast paths. > >> > > >> > Signed-off-by: Chris Wilson > >> > --- > >> > src/mesa/main/texformat.c | 2 ++ > >> > 1 file changed, 2 insertions(+) > >> > > >> > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c > >> > index fd9f335..866c7b3 100644 > >> > --- a/src/mesa/main/texformat.c > >> > +++ b/src/mesa/main/texformat.c > >> > @@ -114,6 +114,8 @@ _mesa_choose_tex_format(struct gl_context *ctx, > >> > GLenum target, > >> > case GL_RGB: > >> >if (type == GL_UNSIGNED_INT_2_10_10_10_REV) { > >> > RETURN_IF_SUPPORTED(MESA_FORMAT_B10G10R10A2_UNORM); > >> > + } else if (type == GL_UNSIGNED_SHORT_5_6_5) { > >> > + RETURN_IF_SUPPORTED(MESA_FORMAT_B5G6R5_UNORM); > >> > >> Shouldn't this be MESA_FORMAT_R5G6B5_UNORM? AFAICT, the reason for > >> using BGR above, is the _REV suffix on the type... > > > > No, it's the first line that's "wrong" since the B10G10R10A2 matches > > GL_BGRA + GL_UNSIGNED_INT_2_10_10_10_REV, GL_RGB implies that the > > incoming data only has 3 channels (no alpha at all). > > Good point about the alpha channel. It sounds like it should be > changed to MESA_FORMAT_B10G10R10X2_UNORM instead. > > But according to src/mesa/main/format_pack.c's > pack_ubyte_b10g10r10a2_unorm(), mesa's B10G10R10A2 corresponds to > OpenGL's UNSIGNED_INT_2_10_10_10_REV. So I think it matches GL_RGBA, > not GL_BGRA. The latter would mean another swizzle, AFAICT. The mapping is (_mesa_format_from_format_and_type): case GL_UNSIGNED_INT_2_10_10_10_REV: if (format == GL_RGB) return MESA_FORMAT_R10G10B10X2_UNORM; if (format == GL_RGBA) return MESA_FORMAT_R10G10B10A2_UNORM; else if (format == GL_RGBA_INTEGER) return MESA_FORMAT_R10G10B10A2_UINT; else if (format == GL_BGRA) return MESA_FORMAT_B10G10R10A2_UNORM; else if (format == GL_BGRA_INTEGER) return MESA_FORMAT_B10G10R10A2_UINT; break; The trick is that the packed formats are written as lsb first (or it may just be native and my lsb bias is showing). > > i965 know how to do B5G6R5 and not R5G6B5, but for completeness we could > > also add RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM); > > I think intel-specific hacks (like preferring B5G6R5 over R5G6B5) > shouldn't leak into _mesa_choose_tex_format(), Hah, did you look at _mesa_choose_tex_format()? :) I sent an another patch to do the hardware agnostic unswizzled conversions. > So I think it'd be a > good move to add "RETURN_IF_SUPPORTED(MESA_FORMAT_R5G6B5_UNORM);" > before the latter return. Not quite, because the mapping for 565 is: case GL_UNSIGNED_SHORT_5_6_5: if (format == GL_RGB) return MESA_FORMAT_B5G6R5_UNORM; else if (format == GL_BGR) return MESA_FORMAT_R5G6B5_UNORM; else if (format == GL_RGB_INTEGER) return MESA_FORMAT_B5G6R5_UINT; break; -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 80821] When LIBGL_ALWAYS_SOFTWARE is set, KHR_create_context is not supported
https://bugs.freedesktop.org/show_bug.cgi?id=80821 Emil Velikov changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #6 from Emil Velikov --- Props to Boyan, a slightly different version has landed for the DRI based libEGL. On the st/egl side, it has been dropped from upstream due lack of interested maintainer :'( -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i915: fixing driver crashes if too few vertices are submitted
Comparison with a signed expression and unsigned value is converted to unsigned value, reason for minus value is interpreted as a big unsigned value. For this case the "for" loop is going into unexpected behavior. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109 Signed-off-by: Marius Predut --- src/mesa/tnl_dd/t_dd_dmatmp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index 7be3954..2ae33c0 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -627,6 +627,8 @@ static void TAG(render_quads_verts)( struct gl_context *ctx, LOCAL_VARS; GLuint j; + if(count%4 != 0) return; + INIT(GL_TRIANGLES); for (j = start; j < count-3; j += 4) { @@ -1248,7 +1250,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx, ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS(); } else { - ok = HAVE_TRIANGLES; /* flatshading is ok. */ + ok = HAVE_TRIANGLES && (count%4 == 0); /* flatshading is ok. */ } break; default: -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Don't tell the hardware about our UAV access.
Mark Janes writes: > Francisco Jerez writes: > >> Mark Janes writes: >> >>> When I tested this, I saw an intermittent BSW gpu hang. I haven't been >>> able to confirm that it is due to the host-mem-barrier test. >>> >> It would probably be useful to know if the hang is due to any of the >> image load/store tests or if it's something unrelated. > > Yes, you are right. I will take some time tomorrow to catch the > specific test. Given the low rate of gpu hangs on BSW lately, I expect > it will be due to image load/store tests. However I need to confirm it. > Anyway, unless the BSW hang is introduced by this patch or caused by the host-mem-barrier/Indirect/RaW subtest, feel free to open a separate bug report for it. It's unlikely to be related to this thread. > -Mark signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] i965/fs: Fix hang on IVB and VLV with image format mismatch.
Ian Romanick writes: > On 09/03/2015 06:03 AM, Francisco Jerez wrote: >> IVB and VLV hang sporadically when an untyped surface read or write >> message is used to access a surface of format other than RAW, as may >> happen when there is a mismatch between the format qualifier of the >> image uniform and the format of the actual image bound to the >> pipeline. According to the spec this condition gives undefined >> results but may not lead to program termination (which is one of the >> possible outcomes of the hang). Fix it by checking at runtime whether >> the surface is of the right type. >> >> Fixes the "arb_shader_image_load_store.invalid/format mismatch" piglit >> subtest. >> >> Reported-by: Mark Janes >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91718 >> CC: mesa-sta...@lists.freedesktop.org >> --- >> .../drivers/dri/i965/brw_fs_surface_builder.cpp| 42 >> +++--- >> 1 file changed, 38 insertions(+), 4 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >> index f60afc9..57ce87f 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >> @@ -313,12 +313,42 @@ namespace { >> >> namespace image_validity { >>/** >> + * Check whether the bound image is suitable for untyped access. >> + */ >> + brw_predicate >> + emit_untyped_image_check(const fs_builder &bld, const fs_reg &image, >> + brw_predicate pred) >> + { >> + const brw_device_info *devinfo = bld.shader->devinfo; >> + const fs_reg stride = offset(image, bld, >> BRW_IMAGE_PARAM_STRIDE_OFFSET); >> + >> + if (devinfo->gen == 7 && !devinfo->is_haswell) { >> +/* Check whether the first stride component (i.e. the Bpp value) >> + * is greater than four, what on Gen7 indicates that a surface >> of >> + * type RAW has been bound for untyped access. Reading or >> writing >> + * to a surface of type other than RAW using untyped surface >> + * messages causes a hang on IVB and VLV. >> + */ >> +set_predicate(pred, >> + bld.CMP(bld.null_reg_ud(), stride, fs_reg(4), >> + BRW_CONDITIONAL_G)); >> + >> +return BRW_PREDICATE_NORMAL; >> + } else { >> +/* More recent generations handle the format mismatch >> + * gracefully. >> + */ >> +return pred; >> + } >> + } >> + >> + /** >> * Check whether there is an image bound at the given index and write >> * the comparison result to f0.0. Returns an appropriate predication >> * mode to use on subsequent image operations. >> */ >>brw_predicate >> - emit_surface_check(const fs_builder &bld, const fs_reg &image) >> + emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image) > > This change seems spurious (and also reasonable). > The problem is that this patch introduces a new kind of surface check applicable to untyped surface reads and writes only, so it would have been confusing to keep the other surface check which is applicable to atomics only with its previous rather unspecific name. >>{ >> const brw_device_info *devinfo = bld.shader->devinfo; >> const fs_reg size = offset(image, bld, >> BRW_IMAGE_PARAM_SIZE_OFFSET); >> @@ -895,7 +925,9 @@ namespace brw { >> * surface read on the result, >> */ >> const brw_predicate pred = >> - emit_bounds_check(bld, image, saddr, dims); >> + emit_untyped_image_check(bld, image, >> +emit_bounds_check(bld, image, >> + saddr, dims)); > > These appear to be the only two users of emit_bounds_check... shouldn't > the bounds still be tested? > Yes, they are still. >> >> /* and they don't know about surface coordinates, we need to >> * convert them to a raw memory offset. >> @@ -1041,7 +1073,9 @@ namespace brw { >> * the surface write on the result, >> */ >> const brw_predicate pred = >> - emit_bounds_check(bld, image, saddr, dims); >> + emit_untyped_image_check(bld, image, >> + emit_bounds_check(bld, image, >> + saddr, dims)); >> >> /* and, phew, they don't know about surface coordinates, we >> * need to convert them to a raw memory offset. >> @@ -1072,7 +1106,7 @@ namespace brw { >> using namespace image_coordinates; >> using namespace surface_access; >>
[Mesa-dev] [PATCH v2] i915/aa: fixing anti-aliasing bug for thinnest width lines
On PNV platform, for 1 pixel line thickness or less, the general anti-aliasing algorithm gives up, and a garbage line is generated. Setting a Line Width of 0.0 specifies the rasterization of the "thinnest" (one-pixel-wide), non-antialiased lines. Lines rendered with zero Line Width are rasterized using Grid Intersection Quantization rules as specified by bspec G45: Volume 2: 3D/Media, 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section. This patch follow the same rules as patches fixing the https://bugs.freedesktop.org/show_bug.cgi?id=28832 bug. v1: Eduardo Lima Mitev: Wrong indentation inside the if clause. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367 Signed-off-by: Marius Predut --- src/mesa/drivers/dri/i915/i915_state.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 4c83073..ebb4e9a 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf) width = (int) (widthf * 2); width = CLAMP(width, 1, 0xf); + + if (ctx->Line.Width < 1.5 || widthf < 1.5) { + /* For 1 pixel line thickness or less, the general + * anti-aliasing algorithm gives up, and a garbage line is + * generated. Setting a Line Width of 0.0 specifies the + * rasterization of the "thinnest" (one-pixel-wide), + * non-antialiased lines. + * + * Lines rendered with zero Line Width are rasterized using + * Grid Intersection Quantization rules as specified by + * bspec G45: Volume 2: 3D/Media, + * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section + */ + width = 0; + } lis4 |= width << S4_LINE_WIDTH_SHIFT; if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) { -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Don't tell the hardware about our UAV access.
Am 09.09.2015 um 05:56 schrieb Ian Romanick: > On 08/15/2015 02:24 AM, Francisco Jerez wrote: >> Roland Scheidegger writes: >> >>> I guess though you'd need these bits when implementing things like >>> ARB_fragment_shader_ordering? (Thus stuff actually looks useful but I >>> don't know if anybody wants to implement it in the near term.) >>> >> I believe that could be implemented using the thread dependency >> mechanism which is independent of the UAV coherency stuff. BTW I wasn't >> aware that the extension had become ARB. > > It hasn't. It's still just an Intel extension. Ah yes that's right not sure what I was looking at. There's a very similar NV extension though too (NV_fragment_shader_interlock), so there may be an ARB extension in the future. Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Adaptive Vsync
On Sun, Sep 6, 2015 at 4:28 AM, Lauri Kasanen wrote: > On Sat, 5 Sep 2015 23:29:05 + > Albert Freeman wrote: > >> The reply from Eric Anholt made two suggestions that should not be >> difficult to implement for someone who made the patch in the first >> place. Why would code be committed when improvements could be easily >> made? From what I have seen, this kind of thing happens even to >> experienced mesa developers who know exactly what they are doing. >> >> It is much better to arrive at a solution without the issue now than >> wait a few years (or some other period of time) for it to be replaced >> by someone who has likely forgotten the details involved with its >> implementation (or by someone who did not write the code in the first >> place). > > Oh, absolutely - I had no issues with "this needs changing". My issue > was with the fact it took months to get that. Had I come up with a new > patch, it would likely have taken a similar time, months again, which > did not inspire confidence. You need to drive it. Developers are busy and can miss patches especially if the subject is not necessarily an area they are familiar with. This is not specific to mesa; the same is true about just about any open source project. If you don't get any feedback right away send a gentle reminder to the list asking if there are any objections stating your desire to get it committed. Wait a bit more time and if there is still no feedback, feel free to commit or ask someone to commit it. If you just drive by and dump patches with no follow up, we have no idea how serious about the patches you are. Were they just an RFC, did you really want to commit them? If you don't follow up on them, developers assume you don't really care about getting them committed. Alex > > Benjamin, if you want to take it forward, please do. > > - Lauri > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v1] i915: fixing driver crashes if too few vertices are submitted
Comparison with a signed expression and unsigned value is converted to unsigned value, reason for minus value is interpreted as a big unsigned value. For this case the "for" loop is going into unexpected behavior. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109 Signed-off-by: Marius Predut --- src/mesa/tnl_dd/t_dd_dmatmp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index 7be3954..88ecc78 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -627,6 +627,8 @@ static void TAG(render_quads_verts)( struct gl_context *ctx, LOCAL_VARS; GLuint j; + if(count%4 != 0) return; + INIT(GL_TRIANGLES); for (j = start; j < count-3; j += 4) { @@ -1248,7 +1250,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx, ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS(); } else { - ok = HAVE_TRIANGLES; /* flatshading is ok. */ + ok = HAVE_TRIANGLES && (count%4 == 0); /* flatshading is ok. */ } break; default: -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Adaptive Vsync
On Wed, 9 Sep 2015 09:11:50 -0400 Alex Deucher wrote: > > Oh, absolutely - I had no issues with "this needs changing". My issue > > was with the fact it took months to get that. Had I come up with a new > > patch, it would likely have taken a similar time, months again, which > > did not inspire confidence. > > You need to drive it. Developers are busy and can miss patches > especially if the subject is not necessarily an area they are familiar > with. This is not specific to mesa; the same is true about just about > any open source project. If you don't get any feedback right away > send a gentle reminder to the list asking if there are any objections > stating your desire to get it committed. Wait a bit more time and if > there is still no feedback, feel free to commit or ask someone to > commit it. If you just drive by and dump patches with no follow up, > we have no idea how serious about the patches you are. Were they just > an RFC, did you really want to commit them? If you don't follow up on > them, developers assume you don't really care about getting them > committed. I had followed up twice on ML in this particular case, and some times on IRC. - Lauri ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] i965: Move resolve from state-update notification to draw
Since _mesa_update_state() may be called several times per draw call, we wish to be judicious about the amount of work we place inside the intel_update_state() callback. By moving the current HiZ/color resolves we need before drawing from out of the notify and into the draw itself, we can save a few percent of overhead in OglBatch7 on Atom class devices. The catch is that putting the resolve inside the draw call itself causes recursion so we need a new vbo entry point so that we do the resolves prior to sealing the vbo context. No i965 piglits were harmed. -Chris ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/7] i965: Move the predraw resolve from _mesa_update_state into brw_draw_prims()
Performing the HiZ/color resolves is a fairly heavyweight operation that we don't need to perform on every state update, only before use the unresolved surfaces in a draw call. Since we use meta operations to perform the resolve, we cannot do so from inside brw_draw_prims() itself (as that will reenter and corrupt the vbo context). Instead we move the resolve into a vbo->resolve() hook that is called just before the primitives are drawn. Signed-off-by: Chris Wilson Cc: Jordan Justen Cc: Jason Ekstrand Cc: Kenneth Graunke Cc: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_context.c | 24 src/mesa/drivers/dri/i965/brw_draw.c| 28 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 907b2a0..fdeadda 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -160,36 +160,12 @@ static void intel_update_state(struct gl_context * ctx, GLuint new_state) { struct brw_context *brw = brw_context(ctx); - struct intel_texture_object *tex_obj; - struct intel_renderbuffer *depth_irb; if (ctx->swrast_context) _swrast_InvalidateState(ctx, new_state); _vbo_InvalidateState(ctx, new_state); brw->NewGLState |= new_state; - - _mesa_unlock_context_textures(ctx); - - /* Resolve the depth buffer's HiZ buffer. */ - depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - if (depth_irb) - intel_renderbuffer_resolve_hiz(brw, depth_irb); - - /* Resolve depth buffer and render cache of each enabled texture. */ - int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; - for (int i = 0; i <= maxEnabledUnit; i++) { - if (!ctx->Texture.Unit[i]._Current) -continue; - tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); - if (!tex_obj || !tex_obj->mt) -continue; - intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); - intel_miptree_resolve_color(brw, tex_obj->mt); - brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); - } - - _mesa_lock_context_textures(ctx); } #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index e5de420..d1b6279 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -342,6 +342,33 @@ brw_merge_inputs(struct brw_context *brw, } } +static void +brw_draw_resolve(struct gl_context *ctx) +{ + struct brw_context *brw = brw_context(ctx); + struct intel_texture_object *tex_obj; + struct intel_renderbuffer *depth_irb; + + /* Resolve the depth buffer's HiZ buffer. */ + depth_irb = intel_get_renderbuffer(brw->ctx.DrawBuffer, BUFFER_DEPTH); + if (depth_irb) + intel_renderbuffer_resolve_hiz(brw, depth_irb); + + /* Resolve depth buffer and render cache of each enabled texture. */ + int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit; + for (int i = 0; i <= maxEnabledUnit; i++) { + if (!brw->ctx.Texture.Unit[i]._Current) + continue; + tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current); + if (!tex_obj || !tex_obj->mt) + continue; + intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); + intel_miptree_resolve_color(brw, tex_obj->mt); + brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); + } +} + + /** * \brief Call this after drawing to mark which buffers need resolving * @@ -615,6 +642,7 @@ brw_draw_init(struct brw_context *brw) /* Register our drawing function: */ + vbo->resolve = brw_draw_resolve; vbo->draw_prims = brw_draw_prims; for (int i = 0; i < VERT_ATTRIB_MAX; i++) -- 2.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/7] vbo: Reorder clearing NeedFlush flag with performing the flush
When we come to flush the vertices, we need to clear the NeedFlush flag first before calling into the backend as that backend may trigger FLUSH_VERTICES() itself (through use of meta) before dirtying the vertex state. If the NeedFlush flag is still in place, we end up recursing into the backend again (ad infinitum or until we hit an assertion). Signed-off-by: Chris Wilson Cc: Brian Paul Cc: Jordan Justen Cc: Jason Ekstrand Cc: Kenneth Graunke Cc: Francisco Jerez --- src/mesa/vbo/vbo_exec_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 138cd60..5486104 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -1187,6 +1187,9 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags ) return; } + /* Don't try and recursively flush the current VBO */ + ctx->Driver.NeedFlush &= ~(FLUSH_UPDATE_CURRENT | flags); + /* Flush (draw), and make sure VBO is left unmapped when done */ vbo_exec_FlushVertices_internal(exec, GL_TRUE); -- 2.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/7] i965: Move update_state from brw_meta_resolve_color() to after all resolves
Now that we moved the resolving from out of the _mesa_update_state() callback we can remove the trick of calling _mesa_update_state() again from inside the resolver (as we no longer need to adhere to the _mesa_update_state() contract). However, we still need to make sure that the context is up to date before proceeding on with the vbo draw. Signed-off-by: Chris Wilson Cc: Jordan Justen Cc: Jason Ekstrand Cc: Kenneth Graunke Cc: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_draw.c| 7 +++ src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 9 - 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index d1b6279..1269b65 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -366,6 +366,13 @@ brw_draw_resolve(struct gl_context *ctx) intel_miptree_resolve_color(brw, tex_obj->mt); brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); } + + /* As we may have resolved using meta operations and so left the +* context in a dirty state, we need to complete the invalidations +* before continuing on with the vbo draw (and its context checks). +*/ + if (ctx->NewState) + _mesa_update_state(ctx); } diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index f5ecbb5..dd7f148 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -710,13 +710,4 @@ brw_meta_resolve_color(struct brw_context *brw, _mesa_DeleteFramebuffers(1, &fbo); _mesa_meta_end(ctx); - - /* We're typically called from intel_update_state() and we're supposed to -* return with the state all updated to what it was before -* brw_meta_resolve_color() was called. The meta rendering will have -* messed up the state and we need to call _mesa_update_state() again to -* get back to where we were supposed to be when resolve was called. -*/ - if (ctx->NewState) - _mesa_update_state(ctx); } -- 2.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/7] i965: Remove redundant update_state from intelReadPixels()
The very first operation performed by _mesa_readpixels() is to call _mesa_update_state() - we do not need to do so ourselves. Signed-off-by: Chris Wilson Cc: Jordan Justen Cc: Jason Ekstrand Cc: Kenneth Graunke Cc: Francisco Jerez --- src/mesa/drivers/dri/i965/intel_pixel_read.c | 8 1 file changed, 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index eb366cd..16dcc55 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -269,14 +269,6 @@ intelReadPixels(struct gl_context * ctx, intel_prepare_render(brw); brw->front_buffer_dirty = dirty; - /* Update Mesa state before calling _mesa_readpixels(). -* XXX this may not be needed since ReadPixels no longer uses the -* span code. -*/ - - if (ctx->NewState) - _mesa_update_state(ctx); - _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); /* There's an intel_prepare_render() call in intelSpanRenderStart(). */ -- 2.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/7] i965: Only resolve the textures if a _NEW_STATE state change is flagged
Only walk through the set of enabled TextureUnits looking for a texture that needs to be resolved if the context state flags a new texture. Note that this will miss if the client is rendering into a texture that it is reading from, though that needs explicit barriers (and futhermore no piglits complain). Signed-off-by: Chris Wilson Cc: Jordan Justen Cc: Jason Ekstrand Cc: Kenneth Graunke Cc: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_draw.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 1269b65..0ffcc24 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -355,16 +355,18 @@ brw_draw_resolve(struct gl_context *ctx) intel_renderbuffer_resolve_hiz(brw, depth_irb); /* Resolve depth buffer and render cache of each enabled texture. */ - int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit; - for (int i = 0; i <= maxEnabledUnit; i++) { - if (!brw->ctx.Texture.Unit[i]._Current) - continue; - tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current); - if (!tex_obj || !tex_obj->mt) - continue; - intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); - intel_miptree_resolve_color(brw, tex_obj->mt); - brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); + if (brw->NewGLState & _NEW_TEXTURE) { + int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit; + for (int i = 0; i <= maxEnabledUnit; i++) { + if (!brw->ctx.Texture.Unit[i]._Current) +continue; + tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current); + if (!tex_obj || !tex_obj->mt) +continue; + intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); + intel_miptree_resolve_color(brw, tex_obj->mt); + brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); + } } /* As we may have resolved using meta operations and so left the -- 2.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback
A common problem with using HiZ and multisampling is that surfaces need to resolved prior to use. Currently i965 does this inside its state update hook, but that is a comparatively heavyweight operation that need not be performed so frequently. The obvious solution (and therefore fraught with dragons) is to move the HiZ/color resolves into the brw_draw_prims() - however, the resolves are performed using meta and end up re-entering brw_draw_prims() corrupting the context state of the original call. To avoid the meta recursion, we can add a new callback (vbo->resolve()) into the vbo pipeline that is called just before vbo->draw(). Signed-off-by: Chris Wilson Cc: Brian Paul Cc: Jordan Justen Cc: Jason Ekstrand Cc: Kenneth Graunke Cc: Francisco Jerez --- src/mesa/vbo/vbo.h| 1 + src/mesa/vbo/vbo_context.c| 19 +++ src/mesa/vbo/vbo_context.h| 1 + src/mesa/vbo/vbo_exec_array.c | 1 + src/mesa/vbo/vbo_exec_draw.c | 5 - src/mesa/vbo/vbo_save_draw.c | 2 ++ 6 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 2aaff5d..b64c468 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -89,6 +89,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx, struct _glapi_table *exec); +typedef void (*vbo_resolve_func)( struct gl_context *ctx); typedef void (*vbo_draw_func)( struct gl_context *ctx, const struct _mesa_prim *prims, GLuint nr_prims, diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c index e3eb286..1f0b46a 100644 --- a/src/mesa/vbo/vbo_context.c +++ b/src/mesa/vbo/vbo_context.c @@ -148,11 +148,30 @@ static void init_mat_currval(struct gl_context *ctx) } +static void nop_resolve(struct gl_context *ctx) +{ +} + +static void nop_draw(struct gl_context *ctx, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct gl_transform_feedback_object *tfb_vertcount, + unsigned stream, + struct gl_buffer_object *indirect) +{ +} + GLboolean _vbo_CreateContext( struct gl_context *ctx ) { struct vbo_context *vbo = CALLOC_STRUCT(vbo_context); ctx->vbo_context = vbo; + vbo->draw_prims = nop_draw; + vbo->resolve = nop_resolve; /* Initialize the arrayelt helper */ diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h index a376efe..c4033ee4 100644 --- a/src/mesa/vbo/vbo_context.h +++ b/src/mesa/vbo/vbo_context.h @@ -75,6 +75,7 @@ struct vbo_context { /* Callback into the driver. This must always succeed, the driver * is responsible for initiating any fallback actions required: */ + vbo_resolve_func resolve; vbo_draw_func draw_prims; }; diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 34d2c1d..d592ae4 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -549,6 +549,7 @@ vbo_bind_arrays(struct gl_context *ctx) struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = &vbo->exec; + vbo->resolve(ctx); vbo_draw_method(vbo, DRAW_ARRAYS); if (exec->array.recalculate_inputs) { diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 2bfb0c3..fa5b06b 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -388,11 +388,14 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped) if (exec->vtx.copied.nr != exec->vtx.vert_count) { struct gl_context *ctx = exec->ctx; + + vbo_context(ctx)->resolve( ctx ); /* Before the update_state() as this may raise _NEW_VARYING_VP_INPUTS * from _mesa_set_varying_vp_inputs(). */ -vbo_exec_bind_arrays( ctx ); + vbo_draw_method( vbo_context(ctx), DRAW_BEGIN_END); + vbo_exec_bind_arrays( ctx ); if (ctx->NewState) _mesa_update_state( ctx ); diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index b1fd689..2103b8e 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -297,6 +297,8 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data) return; } + vbo_context(ctx)->resolve(ctx); + vbo_bind_vertex_list( ctx, node ); vbo_draw_method(vbo_context(ctx), DRAW_DISPLAY_LIST); -- 2.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Nouveau] [PATCH mesa 2/3] nv30: Fix color resolving for nv3x cards
Hi, On 08-09-15 09:53, Hans de Goede wrote: Hi, On 07-09-15 21:55, Ilia Mirkin wrote: May I ask why you're doing 512x512 instead of 1024x1024? These are already scaled up coordinates, so 1024x1024 should work no? Or is it because of the seams on the edges? Do those not also appear with 512x512 or does it sample outside of the box? This is my bad because of the bug fixed by patch 1/3 I had 512x512 in there for a while, I've moved back and forth between 512 and 1024 a couple of times. I've also tried 2048 but the hardware does not like that. I will retest with 1024 and submit a fixed version. I've not noticed any seams on the edges, even though I've been actively looking for them. Separately, why not use this approach on nv40 as well? I can't imagine the blitter would be faster... does this result in lower quality? I've the feeling the sifm bilinear filtering is more "blurry" then the blitter one. I will do some more objective (ahem) tests on nv4x and get back to you on this. Ok I've run some tests comparing the rendering between using the blitter and the sifm and the results seem identical, so one v2 using 1024x1024 blocks and doing so on all nv3x/nv4x cards coming up. Regards, Hans ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 7/7] i965: Remove redundant test for NULL intel_texture_object
Having checked whether the base class (gl_texture_object) is NULL, we know that intel_texture_object itself cannot be NULL. Signed-off-by: Chris Wilson Cc: Jordan Justen Cc: Jason Ekstrand Cc: Kenneth Graunke Cc: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_draw.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 0ffcc24..3cea331 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -346,7 +346,6 @@ static void brw_draw_resolve(struct gl_context *ctx) { struct brw_context *brw = brw_context(ctx); - struct intel_texture_object *tex_obj; struct intel_renderbuffer *depth_irb; /* Resolve the depth buffer's HiZ buffer. */ @@ -358,11 +357,15 @@ brw_draw_resolve(struct gl_context *ctx) if (brw->NewGLState & _NEW_TEXTURE) { int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit; for (int i = 0; i <= maxEnabledUnit; i++) { + struct intel_texture_object *tex_obj; + if (!brw->ctx.Texture.Unit[i]._Current) continue; + tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current); - if (!tex_obj || !tex_obj->mt) + if (!tex_obj->mt) continue; + intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); intel_miptree_resolve_color(brw, tex_obj->mt); brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); -- 2.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Don't tell the hardware about our UAV access.
Am 09.09.2015 um 15:04 schrieb Roland Scheidegger: > Am 09.09.2015 um 05:56 schrieb Ian Romanick: >> On 08/15/2015 02:24 AM, Francisco Jerez wrote: >>> Roland Scheidegger writes: >>> I guess though you'd need these bits when implementing things like ARB_fragment_shader_ordering? (Thus stuff actually looks useful but I don't know if anybody wants to implement it in the near term.) >>> I believe that could be implemented using the thread dependency >>> mechanism which is independent of the UAV coherency stuff. BTW I wasn't >>> aware that the extension had become ARB. >> >> It hasn't. It's still just an Intel extension. > > Ah yes that's right not sure what I was looking at. There's a very > similar NV extension though too (NV_fragment_shader_interlock), so there > may be an ARB extension in the future. > Actually it already exists, ARB_fragment_shader_interlock. Was just missing the different name :-). Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH mesa v2 1/2] nv30: Fix color resolving for nv3x cards
We do not have a generic blitter on nv3x cards, so we must use the sifm object for color resolving. This commit divides the sources and dest surfaces in to tiles which match the constraints of the sifm object, so that color resolving will work properly on nv3x cards. Signed-off-by: Hans de Goede --- Changes in v2: -Use 1024x1024 blocks -Use the sifm on both nv3x and nv4x cards instead of only on nv3x cards --- src/gallium/drivers/nouveau/nv30/nv30_miptree.c | 38 - 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c index 76bb8b8..735c718 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c @@ -149,14 +149,50 @@ static void nv30_resource_resolve(struct nv30_context *nv30, const struct pipe_blit_info *info) { + struct nv30_miptree *src_mt = nv30_miptree(info->src.resource); struct nv30_rect src, dst; + unsigned x, x0, x1, y, y1, w, h; define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x, info->src.box.y, info->src.box.width, info->src.box.height, &src); define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x, info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst); - nv30_transfer_rect(nv30, BILINEAR, &src, &dst); + x0 = src.x0; + x1 = src.x1; + y1 = src.y1; + + /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */ + for (y = src.y0; y < y1; y += h) { + h = y1 - y; + if (h > 1024) + h = 1024; + + src.y0 = 0; + src.y1 = h; + src.h = h; + + dst.y1 = dst.y0 + (h >> src_mt->ms_y); + dst.h = h >> src_mt->ms_y; + + for (x = x0; x < x1; x += w) { + w = x1 - x; + if (w > 1024) +w = 1024; + + src.offset = y * src.pitch + x * src.cpp; + src.x0 = 0; + src.x1 = w; + src.w = w; + + dst.offset = (y >> src_mt->ms_y) * dst.pitch + + (x >> src_mt->ms_x) * dst.cpp; + dst.x1 = dst.x0 + (w >> src_mt->ms_x); + dst.w = w >> src_mt->ms_x; + + nv30_transfer_rect(nv30, BILINEAR, &src, &dst); + } + } } void -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH mesa v2 2/2] nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA
Some modern apps try to use msaa without keeping in mind the restrictions on videomem of older cards. Resulting in dmesg saying: [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12 Because we are running out of video memory, after which the program using the msaa visual freezes, and eventually the entire system freezes. To work around this we do not allow msaa visauls by default and allow the user to override this via NV30_MAX_MSAA. Signed-off-by: Hans de Goede --- Changes in v2: -Allow re-enabling msaa by setting NV30_MAX_MSAA in the environment --- src/gallium/drivers/nouveau/nv30/nv30_screen.c | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 7aad26b..4b77f43 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -319,8 +319,28 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen, unsigned sample_count, unsigned bindings) { - if (sample_count > 4) + unsigned int max_sample_count; + + /* +* Some modern apps try to use msaa without keeping in mind the +* restrictions on videomem of older cards. Resulting in dmesg saying: +* [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate +* [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list +* [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12 +* +* Because we are running out of video memory, after which the program +* using the msaa visual freezes, and eventually the entire system freezes. +* +* To work around this we do not allow msaa visauls by default and allow +* the user to override this via NV30_MAX_MSAA. +*/ + max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0); + if (max_sample_count > 4) + max_sample_count = 4; + + if (sample_count > max_sample_count) return false; + if (!(0x0017 & (1 << sample_count))) return false; -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v1] i915: fixing driver crashes if too few vertices are submitted
On 09/09/2015 08:16 AM, Marius Predut wrote: Comparison with a signed expression and unsigned value is converted to unsigned value, reason for minus value is interpreted as a big unsigned value. For this case the "for" loop is going into unexpected behavior. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109 Signed-off-by: Marius Predut --- src/mesa/tnl_dd/t_dd_dmatmp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index 7be3954..88ecc78 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -627,6 +627,8 @@ static void TAG(render_quads_verts)( struct gl_context *ctx, LOCAL_VARS; GLuint j; + if(count%4 != 0) return; Should be: if (count % 4 != 0) return In addition to following our code style, this allows one to put a breakpoint on the return statement if one wanted to catch that particular condition when debugging. + INIT(GL_TRIANGLES); for (j = start; j < count-3; j += 4) { @@ -1248,7 +1250,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx, ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS(); } else { - ok = HAVE_TRIANGLES; /* flatshading is ok. */ + ok = HAVE_TRIANGLES && (count%4 == 0); /* flatshading is ok. */ We put spaces around operators, so "count % 4" please. } break; default: ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] i915/aa: fixing anti-aliasing bug for thinnest width lines
On 09/09/2015 07:59 AM, Marius Predut wrote: On PNV platform, for 1 pixel line thickness or less, the general anti-aliasing algorithm gives up, and a garbage line is generated. Setting a Line Width of 0.0 specifies the rasterization of the "thinnest" (one-pixel-wide), non-antialiased lines. Lines rendered with zero Line Width are rasterized using Grid Intersection Quantization rules as specified by bspec G45: Volume 2: 3D/Media, 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section. This patch follow the same rules as patches fixing the https://bugs.freedesktop.org/show_bug.cgi?id=28832 bug. v1: Eduardo Lima Mitev: Wrong indentation inside the if clause. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367 Signed-off-by: Marius Predut --- src/mesa/drivers/dri/i915/i915_state.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 4c83073..ebb4e9a 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf) width = (int) (widthf * 2); width = CLAMP(width, 1, 0xf); + + if (ctx->Line.Width < 1.5 || widthf < 1.5) { By using "1.5f" we can avoid some float/double conversion. + /* For 1 pixel line thickness or less, the general + * anti-aliasing algorithm gives up, and a garbage line is + * generated. Setting a Line Width of 0.0 specifies the + * rasterization of the "thinnest" (one-pixel-wide), + * non-antialiased lines. + * + * Lines rendered with zero Line Width are rasterized using + * Grid Intersection Quantization rules as specified by + * bspec G45: Volume 2: 3D/Media, bspec? + * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section + */ + width = 0; + } lis4 |= width << S4_LINE_WIDTH_SHIFT; if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) { ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v6 2/2] drm/i915: Add soft-pinning API for execbuffer
On Wed, Sep 09, 2015 at 04:07:09PM +0200, Michał Winiarski wrote: > From: Chris Wilson > > Userspace can pass in an offset that it presumes the object is located > at. The kernel will then do its utmost to fit the object into that > location. The assumption is that userspace is handling its own object > locations (for example along with full-ppgtt) and that the kernel will > rarely have to make space for the user's requests. > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by > Chris > Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. > (Not published externally) > > v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction > to allow eviction of soft-pinned objects when another soft-pinned object used > by a subsequent execbuffer overlaps reported by Michal Winiarski. > (Not published externally) > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are > pinned first after an address conflict happens to avoid repeated conflicts in > rare cases (Suggested by Chris Wilson). Expanded comment on > drm_i915_gem_exec_object2.offset to cover this new API. > > v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability > (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure > buffers are not considered misplaced without the user specifying > EXEC_OBJECT_SUPPORTS_48BBADDRESS. User must assume responsibility for any > addressing workarounds. Updated object2.offset field comment again to clarify > NO_RELOC case (Chris). checkpatch cleanup. > > v6: Rebase on top of current nightly. Dropped any references to > EXEC_OBJECT_SUPPORTS_48BBADDRESS since those don't exist in upstream > yet, and are not a dependency. > > Cc: Chris Wilson > Cc: Akash Goel > Cc: Vinay Belgaumkar > Cc: Michal Winiarski > Cc: Zou Nanhai > Cc: Kristian Høgsberg > Signed-off-by: Chris Wilson > Signed-off-by: Thomas Daniel > Signed-off-by: Michał Winiarski Again, the precursors are not included in this series or upstream, so NAK. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/9] Implement textureQueryLod in softpipe
On 09/09/2015 04:35 AM, Krzesimir Nowak wrote: The following patches do some preparatory refactoring in softpipe's sp_tex_sample.{c,h} before actually implementing the feature. There is also a patch that fixes softpipe's textureLod with bias. I caught this bug when finished the textureQueryLod implementation - one of four textureQueryLod tests were failing because of it. I sent patches adding the testcases with non-zero lod-bias value to piglit mailing list (http://lists.freedesktop.org/archives/piglit/2015-September/017018.html). Also, this is my first contribution to mesa, but I hope I didn't make any obvious mistakes. Thanks. Krzesimir Nowak (9): tgsi: Remove trailing backslash in comment softpipe: Fix textureLod with nonzero GL_TEXTURE_LOD_BIAS value. softpipe: Split compute_lambda_lod into two functions softpipe: Put mip_filter_func inside a struct softpipe: Split code getting a filter into separate function softpipe: Split 3D to 2D coords conversion into separate function. softpipe: Add functions for computing mipmap level tgsi: Add code for handling lodq opcode softpipe: Implement and enable textureQueryLod src/gallium/auxiliary/tgsi/tgsi_exec.c | 46 +++- src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 + src/gallium/drivers/softpipe/sp_screen.c | 2 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 338 +-- src/gallium/drivers/softpipe/sp_tex_sample.h | 32 ++- 5 files changed, 345 insertions(+), 83 deletions(-) Overall, this looks really nice. You did a good job of following the coding style and documenting your patches. I just have some nitpicks on some of the patches that I'll post/reply on. Basically, I think a few more things could be const-qualified and more comments on the new functions to explain what's going on would be helpful. If you can clean up those minor things I'll commit the series for you. Thanks! -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/9] softpipe: Put mip_filter_func inside a struct
On 09/09/2015 04:35 AM, Krzesimir Nowak wrote: Putting this function pointer into a struct enables grouping of several related functions in a single place. For now it is just a single function, but the struct will be later extended with a mip_level_func for returning mip level. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 28 +--- src/gallium/drivers/softpipe/sp_tex_sample.h | 5 - 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 38bdc93..cd4a659 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2515,6 +2515,12 @@ mip_filter_linear_2d_linear_repeat_POT( } } +static struct sp_mip mip_linear = {mip_filter_linear}; +static struct sp_mip mip_nearest = {mip_filter_nearest}; +static struct sp_mip mip_none = {mip_filter_none}; +static struct sp_mip mip_none_no_filter_select = {mip_filter_none_no_filter_select}; +static struct sp_mip mip_linear_aniso = {mip_filter_linear_aniso}; +static struct sp_mip mip_linear_2d_linear_repeat_POT = {mip_filter_linear_2d_linear_repeat_POT}; Can those be const-qualified too? /** * Do shadow/depth comparisons. @@ -2918,18 +2924,18 @@ sample_mip(struct sp_sampler_view *sp_sview, const struct filter_args *filt_args, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { - mip_filter_func mip_filter; + struct sp_mip *mip = NULL; img_filter_func min_img_filter = NULL; img_filter_func mag_img_filter = NULL; if (filt_args->control == tgsi_sampler_gather) { - mip_filter = mip_filter_nearest; + mip = &mip_nearest; min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true); } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { - mip_filter = mip_filter_linear_2d_linear_repeat_POT; + mip = &mip_linear_2d_linear_repeat_POT; } else { - mip_filter = sp_samp->mip_filter; + mip = &sp_samp->mip; min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false); if (sp_samp->min_mag_equal) { mag_img_filter = min_img_filter; @@ -2939,8 +2945,8 @@ sample_mip(struct sp_sampler_view *sp_sview, } } - mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter, - s, t, p, c0, lod, filt_args, rgba); + mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter, + s, t, p, c0, lod, filt_args, rgba); if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) { sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba); @@ -3239,13 +3245,13 @@ softpipe_create_sampler_state(struct pipe_context *pipe, switch (sampler->min_mip_filter) { case PIPE_TEX_MIPFILTER_NONE: if (sampler->min_img_filter == sampler->mag_img_filter) - samp->mip_filter = mip_filter_none_no_filter_select; + samp->mip = mip_none_no_filter_select; else - samp->mip_filter = mip_filter_none; + samp->mip = mip_none; break; case PIPE_TEX_MIPFILTER_NEAREST: - samp->mip_filter = mip_filter_nearest; + samp->mip = mip_nearest; break; case PIPE_TEX_MIPFILTER_LINEAR: @@ -3257,11 +3263,11 @@ softpipe_create_sampler_state(struct pipe_context *pipe, sampler->max_anisotropy <= 1) { samp->min_mag_equal_repeat_linear = TRUE; } - samp->mip_filter = mip_filter_linear; + samp->mip = mip_linear; /* Anisotropic filtering extension. */ if (sampler->max_anisotropy > 1) { - samp->mip_filter = mip_filter_linear_aniso; + samp->mip = mip_linear_aniso; /* Override min_img_filter: * min_img_filter needs to be set to NEAREST since we need to access diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 7d1aafc..78541e1 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -128,6 +128,9 @@ struct sp_sampler_view }; +struct sp_mip { I wonder if we could come up with a more descriptive name than "sp_mip". Maybe something like sp_filter_funcs since it contains functions related to filtering? + mip_filter_func filter; +}; struct sp_sampler { struct pipe_sampler_state base; @@ -144,7 +147,7 @@ struct sp_sampler { wrap_linear_func linear_texcoord_t; wrap_linear_func linear_texcoord_p; - mip_filter_func mip_filter; + struct sp_mip mip; }; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] softpipe: Split compute_lambda_lod into two functions
On 09/09/2015 04:35 AM, Krzesimir Nowak wrote: textureQueryLod returns a vec2 with a mipmap information and a LOD. The latter needs to be not clamped. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 55 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 19188b0..38bdc93 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1855,24 +1855,23 @@ compute_lod(const struct pipe_sampler_state *sampler, } -/* Calculate level of detail for every fragment. +/* Calculate level of detail for every fragment. The computed value is not + * clamped into lod_min and lod_max. * \param lod_in per-fragment lod_bias or explicit_lod. * \param lod results per-fragment lod. */ static inline void -compute_lambda_lod(struct sp_sampler_view *sp_sview, - struct sp_sampler *sp_samp, - const float s[TGSI_QUAD_SIZE], - const float t[TGSI_QUAD_SIZE], - const float p[TGSI_QUAD_SIZE], - const float lod_in[TGSI_QUAD_SIZE], - enum tgsi_sampler_control control, - float lod[TGSI_QUAD_SIZE]) +compute_lambda_lod_not_clamped(struct sp_sampler_view *sp_sview, Not a big deal, but we use "unclamped" in various other places in Gallium. Patches 1-3: Reviewed-by: Brian Paul + struct sp_sampler *sp_samp, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float lod_in[TGSI_QUAD_SIZE], + enum tgsi_sampler_control control, + float lod[TGSI_QUAD_SIZE]) { const struct pipe_sampler_state *sampler = &sp_samp->base; - float lod_bias = sampler->lod_bias; - float min_lod = sampler->min_lod; - float max_lod = sampler->max_lod; + const float lod_bias = sampler->lod_bias; float lambda; uint i; @@ -1881,24 +1880,23 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, /* XXX FIXME */ case tgsi_sampler_derivs_explicit: lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias; - lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod); + lod[0] = lod[1] = lod[2] = lod[3] = lambda; break; case tgsi_sampler_lod_bias: lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias; for (i = 0; i < TGSI_QUAD_SIZE; i++) { lod[i] = lambda + lod_in[i]; - lod[i] = CLAMP(lod[i], min_lod, max_lod); } break; case tgsi_sampler_lod_explicit: for (i = 0; i < TGSI_QUAD_SIZE; i++) { - lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod); + lod[i] = lod_in[i] + lod_bias; } break; case tgsi_sampler_lod_zero: case tgsi_sampler_gather: /* this is all static state in the sampler really need clamp here? */ - lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod); + lod[0] = lod[1] = lod[2] = lod[3] = lod_bias; break; default: assert(0); @@ -1906,6 +1904,31 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, } } +/* Calculate level of detail for every fragment. + * \param lod_in per-fragment lod_bias or explicit_lod. + * \param lod results per-fragment lod. + */ +static inline void +compute_lambda_lod(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float lod_in[TGSI_QUAD_SIZE], + enum tgsi_sampler_control control, + float lod[TGSI_QUAD_SIZE]) +{ + const struct pipe_sampler_state *sampler = &sp_samp->base; + const float min_lod = sampler->min_lod; + const float max_lod = sampler->max_lod; + int i; + + compute_lambda_lod_not_clamped(sp_sview, sp_samp, s, t, p, lod_in, control, lod); + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + lod[i] = CLAMP(lod[i], min_lod, max_lod); + } +} + static inline unsigned get_gather_component(const float lod_in[TGSI_QUAD_SIZE]) { ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/9] softpipe: Split code getting a filter into separate function
On 09/09/2015 04:35 AM, Krzesimir Nowak wrote: This function will be later used by textureQueryLod. The img_filter_func are optional, because textureQueryLod will not need them. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 53 +++- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index cd4a659..0a3fc20 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2912,6 +2912,42 @@ get_img_filter(const struct sp_sampler_view *sp_sview, } } +/** + * Get mip filter, and optionally both img min filter and img mag filter + */ +static void +get_filters(struct sp_sampler_view *sp_sview, +struct sp_sampler *sp_samp, +enum tgsi_sampler_control control, +struct sp_mip **mip, +img_filter_func *min, +img_filter_func *mag) +{ + assert(mip); + if (control == tgsi_sampler_gather) { + *mip = &mip_nearest; + if (min) + *min = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true); + } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { + *mip = &mip_linear_2d_linear_repeat_POT; + } + else { + *mip = &sp_samp->mip; + if (min || mag) { + img_filter_func tmp_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false); + if (min) +*min = tmp_filter; + if (mag) { +if (sp_samp->min_mag_equal) { + *mag = tmp_filter; +} +else { + *mag = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false); +} + } + } + } +} static void sample_mip(struct sp_sampler_view *sp_sview, @@ -2928,22 +2964,7 @@ sample_mip(struct sp_sampler_view *sp_sview, img_filter_func min_img_filter = NULL; img_filter_func mag_img_filter = NULL; - if (filt_args->control == tgsi_sampler_gather) { - mip = &mip_nearest; - min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true); - } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { - mip = &mip_linear_2d_linear_repeat_POT; - } - else { - mip = &sp_samp->mip; - min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false); - if (sp_samp->min_mag_equal) { - mag_img_filter = min_img_filter; - } - else { - mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false); - } - } + get_filters(sp_sview, sp_samp, filt_args->control, &mip, &min_img_filter, &mag_img_filter); mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter, s, t, p, c0, lod, filt_args, rgba); Reviewed-by: Brian Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/9] softpipe: Split 3D to 2D coords conversion into separate function.
On 09/09/2015 04:35 AM, Krzesimir Nowak wrote: This is to avoid tying the conversion to sampling - textureQueryLod will need to do the conversion too, but it does not do any sampling. So instead of a "sample" vfunc, there is a "convert" vfunc. The drawback of this approach is that a "noop" convert copies 3 arrays of floats. Would be nice to avoid it in some clean way. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 73 +--- src/gallium/drivers/softpipe/sp_tex_sample.h | 20 2 files changed, 54 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 0a3fc20..cdec984 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2981,29 +2981,36 @@ sample_mip(struct sp_sampler_view *sp_sview, } +static void +convert_noop(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float c0[TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE]) +{ + const size_t len = sizeof(float) * TGSI_QUAD_SIZE; + + memcpy(, s, len); + memcpy(, t, len); + memcpy(, p, len); +} -/** - * Use 3D texcoords to choose a cube face, then sample the 2D cube faces. - * Put face info into the sampler faces[] array. - */ static void -sample_cube(struct sp_sampler_view *sp_sview, -struct sp_sampler *sp_samp, -const float s[TGSI_QUAD_SIZE], -const float t[TGSI_QUAD_SIZE], -const float p[TGSI_QUAD_SIZE], -const float c0[TGSI_QUAD_SIZE], -const float c1[TGSI_QUAD_SIZE], -const struct filter_args *filt_args, -float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) +convert_cube(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float c0[TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE], + float [TGSI_QUAD_SIZE]) Could you put some comments on top of these new functions to explain what they're doing? Otherwise, Reviewed-by: Brian Paul { unsigned j; - float [4], [4]; - - /* Not actually used, but the intermediate steps that do the -* dereferencing don't know it. -*/ - static float [4] = { 0, 0, 0, 0 }; [0] = c0[0]; [1] = c0[1]; @@ -3071,8 +3078,6 @@ sample_cube(struct sp_sampler_view *sp_sview, } } } - - sample_mip(sp_sview, sp_samp, , , , c0, c1, filt_args, rgba); } @@ -3393,9 +3398,9 @@ softpipe_create_sampler_view(struct pipe_context *pipe, if (view->target == PIPE_TEXTURE_CUBE || view->target == PIPE_TEXTURE_CUBE_ARRAY) - sview->get_samples = sample_cube; + sview->convert_coords = convert_cube; else { - sview->get_samples = sample_mip; + sview->convert_coords = convert_noop; } sview->pot2d = spr->pot && (view->target == PIPE_TEXTURE_2D || @@ -3440,13 +3445,22 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, enum tgsi_sampler_control control, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { - struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler; + struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler *)tgsi_sampler; + struct sp_sampler_view *sp_sview; + struct sp_sampler *sp_samp; struct filter_args filt_args; + float cs[TGSI_QUAD_SIZE]; + float ct[TGSI_QUAD_SIZE]; + float cp[TGSI_QUAD_SIZE]; + assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS); assert(sampler_index < PIPE_MAX_SAMPLERS); - assert(sp_samp->sp_sampler[sampler_index]); + assert(sp_tgsi_samp->sp_sampler[sampler_index]); + + sp_sview = &sp_tgsi_samp->sp_sview[sview_index]; + sp_samp = sp_tgsi_samp->sp_sampler[sampler_index]; /* always have a view here but texture is NULL if no sampler view was set. */ - if (!sp_samp->sp_sview[sview_index].base.texture) { + if (!sp_sview->base.texture) { int i, j; for (j = 0; j < TGSI_NUM_CHANNELS; j++) { for (i = 0; i < TGSI_QUAD_SIZE; i++) { @@ -3456,11 +3470,12 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, return; } + sp_sview->convert_coords(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp); + filt_args.control = control; filt_args.offset = offset; - sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index], -
Re: [Mesa-dev] [PATCH 7/9] softpipe: Add functions for computing mipmap level
On 09/09/2015 04:35 AM, Krzesimir Nowak wrote: These functions will be used by textureQueryLod. --- src/gallium/drivers/softpipe/sp_tex_sample.c | 100 +-- src/gallium/drivers/softpipe/sp_tex_sample.h | 7 ++ 2 files changed, 101 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index cdec984..6e639e0 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1937,6 +1937,38 @@ get_gather_component(const float lod_in[TGSI_QUAD_SIZE]) } static void +clamp_lod(const struct sp_sampler_view *sp_sview, + const struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float clamped[TGSI_QUAD_SIZE]) Can you add a comment on this function too? We also have compute_lambda_lod() which does lod clamping so the comments could help to explain what's different between them. +{ + const float min_lod = sp_samp->base.min_lod; + const float max_lod = sp_samp->base.max_lod; + const float min_level = sp_sview->base.u.tex.first_level; + const float max_level = sp_sview->base.u.tex.last_level; + int i; + + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + float cl = lod[i]; + + cl = CLAMP(cl, min_lod, max_lod); + /* XXX: Is min_level ever different from 0? + */ + cl = CLAMP(cl, 0, max_level - min_level); + clamped[i] = cl; + } +} + +static void +mip_level_linear(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + clamp_lod(sp_sview, sp_samp, lod, level); +} + +static void mip_filter_linear(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, img_filter_func min_filter, @@ -1998,6 +2030,23 @@ mip_filter_linear(struct sp_sampler_view *sp_sview, } +static void +mip_level_nearest(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + const int first_level = sp_sview->base.u.tex.first_level; + int j; + + clamp_lod(sp_sview, sp_samp, lod, level); + for (j = 0; j < TGSI_QUAD_SIZE; j++) + /* TODO: It should rather be: + * level[j] = first_level + ceil(level[j] + 0.5F) - 1.0F; + */ + level[j] = first_level + (int)(level[j] + 0.5F); +} + /** * Compute nearest mipmap level from texcoords. * Then sample the texture level for four elements of a quad. @@ -2050,6 +2099,19 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview, static void +mip_level_none(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + int j; + + for (j = 0; j < TGSI_QUAD_SIZE; j++) { + level[j] = sp_sview->base.u.tex.first_level; + } +} + +static void mip_filter_none(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, img_filter_func min_filter, @@ -2088,6 +2150,15 @@ mip_filter_none(struct sp_sampler_view *sp_sview, static void +mip_level_none_no_filter_select(struct sp_sampler_view *sp_sview, +struct sp_sampler *sp_samp, +const float lod[TGSI_QUAD_SIZE], +float level[TGSI_QUAD_SIZE]) +{ + mip_level_none(sp_sview, sp_samp, lod, level); +} + +static void mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview, struct sp_sampler *sp_samp, img_filter_func min_filter, @@ -2339,6 +2410,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, } +static void +mip_level_linear_aniso(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + mip_level_linear(sp_sview, sp_samp, lod, level); +} + /** * Sample 2D texture using an anisotropic filter. */ @@ -2450,6 +2530,14 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview, } } +static void +mip_level_linear_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview, + struct sp_sampler *sp_samp, + const float lod[TGSI_QUAD_SIZE], + float level[TGSI_QUAD_SIZE]) +{ + mip_level_linear(sp_sview, sp_samp, lod, level); +} /** * Specialized version of mip_filter_linear with hard-wired calls to @@ -2515,12 +2603,12 @@ mip_filter_linear_2d_linear_repeat_POT( } } -static struct sp_mip mip_linear = {mip_filter_linear}; -static struct sp_mip mip_nearest = {mip_filter_nearest}; -static st
Re: [Mesa-dev] [PATCH 8/9] tgsi: Add code for handling lodq opcode
On 09/09/2015 04:35 AM, Krzesimir Nowak wrote: This introduces new vfunc in tgsi_sampler just for this opcode. I decided against extending get_samples vfunc to return the mipmap level and LOD - the function's prototype is already too scary and doing the sampling for textureQueryLod would be a waste of time. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 ++ src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 2 files changed, 54 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 9544623..054ad08 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2132,6 +2132,44 @@ exec_tex(struct tgsi_exec_machine *mach, } } +static void +exec_lodq(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint unit; + int dim; + int i; + union tgsi_exec_channel coords[4]; + const union tgsi_exec_channel *args[Elements(coords)]; + union tgsi_exec_channel r[2]; + + unit = fetch_sampler_unit(mach, inst, 1); + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL); + assert(dim <= Elements(coords)); + /* fetch coordinates */ + for (i = 0; i < dim; i++) { + FETCH(&coords[i], 0, TGSI_CHAN_X + i); + args[i] = &coords[i]; + } + for (i = dim; i < Elements(coords); i++) { + args[i] = &ZeroVec; + } + mach->Sampler->query_lod(mach->Sampler, unit, unit, +args[0]->f, +args[1]->f, +args[2]->f, +args[3]->f, +tgsi_sampler_lod_none, +r[0].f, +r[1].f); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } +} static void exec_txd(struct tgsi_exec_machine *mach, @@ -4378,6 +4416,12 @@ exec_instruction( exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); break; + case TGSI_OPCODE_LODQ: + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_lodq(mach, inst); + break; + case TGSI_OPCODE_UP2H: assert (0); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 5d56aab..556e0af 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -138,6 +138,16 @@ struct tgsi_sampler const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE], const int lod[TGSI_QUAD_SIZE], const int8_t offset[3], float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + void (*query_lod)(struct tgsi_sampler *tgsi_sampler, Can tgsi_sampler be const-qualified? + const unsigned sview_index, + const unsigned sampler_index, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float c0[TGSI_QUAD_SIZE], + enum tgsi_sampler_control control, + float mipmap[TGSI_QUAD_SIZE], + float lod[TGSI_QUAD_SIZE]); }; #define TGSI_EXEC_NUM_TEMPS 4096 Reviewed-by: Brian Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 9/9] softpipe: Implement and enable textureQueryLod
On 09/09/2015 04:35 AM, Krzesimir Nowak wrote: Passes the shader piglit tests and introduces no regressions. This commit finally makes use of the refactoring in previous commits. --- src/gallium/drivers/softpipe/sp_screen.c | 2 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 47 +++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 0bfd9c3..7ca8a67 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -193,9 +193,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return 4; case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_TEXTURE_QUERY_LOD: return 1; case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: - case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: return 0; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 6e639e0..499c8f9 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -3566,6 +3566,51 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, sample_mip(sp_sview, sp_samp, cs, ct, cp, c0, lod, &filt_args, rgba); } +static void +sp_tgsi_query_lod(struct tgsi_sampler *tgsi_sampler, + const unsigned sview_index, + const unsigned sampler_index, + const float s[TGSI_QUAD_SIZE], + const float t[TGSI_QUAD_SIZE], + const float p[TGSI_QUAD_SIZE], + const float c0[TGSI_QUAD_SIZE], + enum tgsi_sampler_control control, + float mipmap[TGSI_QUAD_SIZE], + float lod[TGSI_QUAD_SIZE]) +{ + static const float lod_in[TGSI_QUAD_SIZE] = { 0.0, 0.0, 0.0, 0.0 }; + + struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler *)tgsi_sampler; Can that be const-qualified, and the tgsi_sampler function parameter? Ideally, we'd also have a cast-wrapper function to use instead of an inline cast here and elsewhere. That could be done as a follow-up. + struct sp_sampler_view *sp_sview; + struct sp_sampler *sp_samp; + struct sp_mip *mip; + int i; + float cs[TGSI_QUAD_SIZE]; + float ct[TGSI_QUAD_SIZE]; + float cp[TGSI_QUAD_SIZE]; + + assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS); + assert(sampler_index < PIPE_MAX_SAMPLERS); + assert(sp_tgsi_samp->sp_sampler[sampler_index]); + + sp_sview = &sp_tgsi_samp->sp_sview[sview_index]; + sp_samp = sp_tgsi_samp->sp_sampler[sampler_index]; + /* always have a view here but texture is NULL if no sampler view was set. */ + if (!sp_sview->base.texture) { + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + mipmap[i] = 0.0f; + lod[i] = 0.0f; + } + return; + } + + sp_sview->convert_coords(sp_sview, sp_samp, s, t, p, c0, cs, ct, cp); + + compute_lambda_lod_not_clamped(sp_sview, sp_samp, + cs, ct, cp, lod_in, control, lod); + get_filters(sp_sview, sp_samp, control, &mip, NULL, NULL); + mip->level(sp_sview, sp_samp, lod, mipmap); +} static void sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler, @@ -3602,7 +3647,7 @@ sp_create_tgsi_sampler(void) samp->base.get_dims = sp_tgsi_get_dims; samp->base.get_samples = sp_tgsi_get_samples; samp->base.get_texel = sp_tgsi_get_texel; + samp->base.query_lod = sp_tgsi_query_lod; return samp; } - Reviewed-by: Brian Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v6 2/2] drm/i915: Add soft-pinning API for execbuffer
From: Chris Wilson Userspace can pass in an offset that it presumes the object is located at. The kernel will then do its utmost to fit the object into that location. The assumption is that userspace is handling its own object locations (for example along with full-ppgtt) and that the kernel will rarely have to make space for the user's requests. v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. (Not published externally) v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction to allow eviction of soft-pinned objects when another soft-pinned object used by a subsequent execbuffer overlaps reported by Michal Winiarski. (Not published externally) v4: Moved soft-pinned objects to the front of ordered_vmas so that they are pinned first after an address conflict happens to avoid repeated conflicts in rare cases (Suggested by Chris Wilson). Expanded comment on drm_i915_gem_exec_object2.offset to cover this new API. v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure buffers are not considered misplaced without the user specifying EXEC_OBJECT_SUPPORTS_48BBADDRESS. User must assume responsibility for any addressing workarounds. Updated object2.offset field comment again to clarify NO_RELOC case (Chris). checkpatch cleanup. v6: Rebase on top of current nightly. Dropped any references to EXEC_OBJECT_SUPPORTS_48BBADDRESS since those don't exist in upstream yet, and are not a dependency. Cc: Chris Wilson Cc: Akash Goel Cc: Vinay Belgaumkar Cc: Michal Winiarski Cc: Zou Nanhai Cc: Kristian Høgsberg Signed-off-by: Chris Wilson Signed-off-by: Thomas Daniel Signed-off-by: Michał Winiarski --- drivers/gpu/drm/i915/i915_dma.c| 3 ++ drivers/gpu/drm/i915/i915_drv.h| 3 ++ drivers/gpu/drm/i915/i915_gem.c| 52 ++ drivers/gpu/drm/i915/i915_gem_evict.c | 39 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++-- include/uapi/drm/i915_drm.h| 10 +- 6 files changed, 106 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 066a0ef..bd619af 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_RESOURCE_STREAMER: value = HAS_RESOURCE_STREAMER(dev); break; + case I915_PARAM_HAS_EXEC_SOFTPIN: + value = 1; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2ef83c5..8eb01e0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2815,6 +2815,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma); #define PIN_OFFSET_BIAS(1<<3) #define PIN_USER (1<<4) #define PIN_UPDATE (1<<5) +#define PIN_OFFSET_FIXED (1<<6) #define PIN_OFFSET_MASK (~4095) int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, @@ -3157,6 +3158,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, unsigned long start, unsigned long end, unsigned flags); +int __must_check +i915_gem_evict_for_vma(struct i915_vma *target); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); int i915_gem_evict_everything(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 41263cd..bb2ff81 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3437,22 +3437,42 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) goto err_unpin; + if (flags & PIN_OFFSET_FIXED) { + uint64_t offset = flags & PIN_OFFSET_MASK; + + if (offset & (alignment - 1)) { + ret = -EINVAL; + goto err_free_vma; + } + vma->node.start = offset; + vma->node.size = size; + vma->node.color = obj->cache_level; + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + if (ret) { + ret = i915_gem_evict_for_vma(vma); + if (ret == 0) + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + } + if (ret) + goto err_free_vma; + } else { search_free: - ret = drm_mm_insert_node_in_range_generic(&vm->mm
[Mesa-dev] [RFC libdrm] intel: Add support for softpin
Softpin allows userspace to take greater control of GPU virtual address space and eliminates the need of relocations. It can also be used to mirror addresses between GPU and CPU (shared virtual memory). Calls to drm_intel_bo_emit_reloc are still required to build the list of drm_i915_gem_exec_objects at exec time, but no entries in relocs are created. Self-relocs don't make any sense for softpinned objects and can indicate a programming errors, thus are forbidden. Softpinned objects are marked by asterisk in debug dumps. Cc: Thomas Daniel Cc: Kristian Høgsberg Cc: Zou Nanhai Cc: Michel Thierry Cc: Ben Widawsky Cc: Chris Wilson Signed-off-by: Michał Winiarski --- include/drm/i915_drm.h| 4 +- intel/intel_bufmgr.c | 9 +++ intel/intel_bufmgr.h | 1 + intel/intel_bufmgr_gem.c | 176 -- intel/intel_bufmgr_priv.h | 7 ++ 5 files changed, 173 insertions(+), 24 deletions(-) diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index ded43b1..2b99fc6 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -350,6 +350,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_REVISION 32 #define I915_PARAM_SUBSLICE_TOTAL 33 #define I915_PARAM_EU_TOTAL 34 +#define I915_PARAM_HAS_EXEC_SOFTPIN 37 typedef struct drm_i915_getparam { int param; @@ -680,7 +681,8 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_FENCE (1<<0) #define EXEC_OBJECT_NEEDS_GTT (1<<1) #define EXEC_OBJECT_WRITE (1<<2) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) +#define EXEC_OBJECT_PINNED (1<<3) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) __u64 flags; __u64 rsvd1; diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c index 14ea9f9..bd92335 100644 --- a/intel/intel_bufmgr.c +++ b/intel/intel_bufmgr.c @@ -261,6 +261,15 @@ drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, } int +drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) +{ + if (bo->bufmgr->bo_set_softpin_offset) + return bo->bufmgr->bo_set_softpin_offset(bo, offset); + + return -ENODEV; +} + +int drm_intel_bo_disable_reuse(drm_intel_bo *bo) { if (bo->bufmgr->bo_disable_reuse) diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index 95eecb8..62ea4a0 100644 --- a/intel/intel_bufmgr.h +++ b/intel/intel_bufmgr.h @@ -164,6 +164,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name); int drm_intel_bo_busy(drm_intel_bo *bo); int drm_intel_bo_madvise(drm_intel_bo *bo, int madv); +int drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset); int drm_intel_bo_disable_reuse(drm_intel_bo *bo); int drm_intel_bo_is_reusable(drm_intel_bo *bo); diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 2723e21..615741c 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -184,6 +184,13 @@ struct _drm_intel_bo_gem { drm_intel_reloc_target *reloc_target_info; /** Number of entries in relocs */ int reloc_count; + /** Array of BOs that are referenced by this buffer and will be softpinned */ + drm_intel_bo **softpin_target; + /** Number softpinned BOs that are referenced by this buffer */ + int softpin_target_count; + /** Maximum amount of softpinned BOs that are referenced by this buffer */ + int softpin_target_size; + /** Mapped address for the buffer, saved across map/unmap cycles */ void *mem_virtual; /** GTT virtual address for the buffer, saved across map/unmap cycles */ @@ -237,6 +244,11 @@ struct _drm_intel_bo_gem { bool is_userptr; /** +* Whether this buffer is softpinned at offset specified by the user +*/ + bool is_softpin; + + /** * Size in bytes of this buffer and its relocation descendents. * * Used to avoid costly tree walking in @@ -384,8 +396,9 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - if (bo_gem->relocs == NULL) { - DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, + if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) { + DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle, + bo_gem->is_softpin ? "*" : "", bo_gem->name); continue; } @@ -395,16 +408,33 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *) target_bo; - DBG("%2d: %d (%s)@0x%08llx ->
[Mesa-dev] [PATCH 1/2] drm/i915/gtt: Allow adventurous users to select enable_ppgtt=3
While support for 48b ppgtt is here, parameter enabling it is not known to the sanitize function. Let's update it to allow selecting full_48bit_ppgtt using module parameter. Cc: Michel Thierry Cc: Mika Kuoppala Signed-off-by: Michał Winiarski --- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8786281..f598d63 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -104,9 +104,11 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) { bool has_aliasing_ppgtt; bool has_full_ppgtt; + bool has_full_48bit_ppgtt; has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6; has_full_ppgtt = INTEL_INFO(dev)->gen >= 7; + has_full_48bit_ppgtt = INTEL_INFO(dev)->gen >= 9 || IS_BROADWELL(dev); if (intel_vgpu_active(dev)) has_full_ppgtt = false; /* emulation is too hard */ @@ -125,6 +127,9 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) if (enable_ppgtt == 2 && has_full_ppgtt) return 2; + if (enable_ppgtt == 3 && has_full_48bit_ppgtt) + return 3; + #ifdef CONFIG_INTEL_IOMMU /* Disable ppgtt on SNB if VT-d is on. */ if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { -- 2.4.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC libdrm] intel: Softpin support
The goal of this series is to start a discussion whether the interface and implementation of softpin support proposed for libdrm is acceptable by all interested parties. The i915 patches are added so that it's easy to apply the series on latest drm-intel-nightly and libdrm master and start using softpin. Michał Winiarski (1): drm/i915/gtt: Allow adventurous users to select enable_ppgtt=3 is not strictly necessary for softpin itself, but it is needed for SVM usecase. Chris Wilson (1): drm/i915: Add soft-pinning API for execbuffer is just v5 from Thomas Daniel rebased on top of current nightly. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa v2 1/2] nv30: Fix color resolving for nv3x cards
On Wed, Sep 9, 2015 at 9:52 AM, Hans de Goede wrote: > We do not have a generic blitter on nv3x cards, so we must use the > sifm object for color resolving. > > This commit divides the sources and dest surfaces in to tiles which > match the constraints of the sifm object, so that color resolving > will work properly on nv3x cards. > > Signed-off-by: Hans de Goede > --- > Changes in v2: > -Use 1024x1024 blocks > -Use the sifm on both nv3x and nv4x cards instead of only on nv3x cards Thanks, pushed. > --- > src/gallium/drivers/nouveau/nv30/nv30_miptree.c | 38 > - > 1 file changed, 37 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c > b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c > index 76bb8b8..735c718 100644 > --- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c > +++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c > @@ -149,14 +149,50 @@ static void > nv30_resource_resolve(struct nv30_context *nv30, >const struct pipe_blit_info *info) > { > + struct nv30_miptree *src_mt = nv30_miptree(info->src.resource); > struct nv30_rect src, dst; > + unsigned x, x0, x1, y, y1, w, h; > > define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x, >info->src.box.y, info->src.box.width, info->src.box.height, &src); > define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x, >info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst); > > - nv30_transfer_rect(nv30, BILINEAR, &src, &dst); > + x0 = src.x0; > + x1 = src.x1; > + y1 = src.y1; > + > + /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */ > + for (y = src.y0; y < y1; y += h) { > + h = y1 - y; > + if (h > 1024) > + h = 1024; > + > + src.y0 = 0; > + src.y1 = h; > + src.h = h; > + > + dst.y1 = dst.y0 + (h >> src_mt->ms_y); > + dst.h = h >> src_mt->ms_y; > + > + for (x = x0; x < x1; x += w) { > + w = x1 - x; > + if (w > 1024) > +w = 1024; > + > + src.offset = y * src.pitch + x * src.cpp; > + src.x0 = 0; > + src.x1 = w; > + src.w = w; > + > + dst.offset = (y >> src_mt->ms_y) * dst.pitch + > + (x >> src_mt->ms_x) * dst.cpp; > + dst.x1 = dst.x0 + (w >> src_mt->ms_x); > + dst.w = w >> src_mt->ms_x; > + > + nv30_transfer_rect(nv30, BILINEAR, &src, &dst); > + } > + } > } > > void > -- > 2.4.3 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH mesa v2 2/2] nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA
On Wed, Sep 9, 2015 at 9:52 AM, Hans de Goede wrote: > Some modern apps try to use msaa without keeping in mind the > restrictions on videomem of older cards. Resulting in dmesg saying: > > [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate > [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list > [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12 > > Because we are running out of video memory, after which the program > using the msaa visual freezes, and eventually the entire system freezes. > > To work around this we do not allow msaa visauls by default and allow > the user to override this via NV30_MAX_MSAA. > > Signed-off-by: Hans de Goede > --- > Changes in v2: > -Allow re-enabling msaa by setting NV30_MAX_MSAA in the environment Moved this to screen init and pushed. is_format_supported is called a lot, not sure how expensive getenv() is, don't want to find out. > --- > src/gallium/drivers/nouveau/nv30/nv30_screen.c | 22 +- > 1 file changed, 21 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c > b/src/gallium/drivers/nouveau/nv30/nv30_screen.c > index 7aad26b..4b77f43 100644 > --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c > +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c > @@ -319,8 +319,28 @@ nv30_screen_is_format_supported(struct pipe_screen > *pscreen, > unsigned sample_count, > unsigned bindings) > { > - if (sample_count > 4) > + unsigned int max_sample_count; > + > + /* > +* Some modern apps try to use msaa without keeping in mind the > +* restrictions on videomem of older cards. Resulting in dmesg saying: > +* [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate > +* [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list > +* [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12 > +* > +* Because we are running out of video memory, after which the program > +* using the msaa visual freezes, and eventually the entire system > freezes. > +* > +* To work around this we do not allow msaa visauls by default and allow > +* the user to override this via NV30_MAX_MSAA. > +*/ > + max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0); > + if (max_sample_count > 4) > + max_sample_count = 4; > + > + if (sample_count > max_sample_count) >return false; > + > if (!(0x0017 & (1 << sample_count))) >return false; > > -- > 2.4.3 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] r600g: update num_dw in scissor_enable workaround
"r600g: apply disable workaround on all scissors" forgot to update num_dw, fix it. Fixes: fbb423b433 "r600g: apply disable workaround on all scissors" Reported-and-tested-by: Markus Trippelsdorf Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91921 --- src/gallium/drivers/r600/r600_state_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index ae13411..cd70889 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -374,6 +374,7 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state) rs->scissor_enable != rctx->scissor.enable) { rctx->scissor.enable = rs->scissor_enable; rctx->scissor.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; + rctx->scissor.atom.num_dw = R600_MAX_VIEWPORTS * 4; r600_mark_atom_dirty(rctx, &rctx->scissor.atom); } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Disallow fast blit paths for CopyTexImage with PixelTransfer ops
On Tue, Sep 08, 2015 at 01:16:11PM -0700, Ian Romanick wrote: > On 09/06/2015 09:37 AM, Chris Wilson wrote: > > glCopyTexImage behaves similarly to glReadPixels with respect to the > > pixel transfer operations. Therefore if any are set we cannot use the > > simply blit fast paths. > > Do we have any test cases that hit these paths? If not, we should > probably con someone into writing some... It looks like piglit should have coverage of PixelZoom in tests/fbo/fbo-copypix.c, but only it ever uses a zoom of (1, 1). I could not spot any of the other transfer operations being set for glCopyTex*Image. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Disallow fast blit paths for CopyTexImage with PixelTransfer ops
On Wed, Sep 9, 2015 at 12:44 PM, Chris Wilson wrote: > On Tue, Sep 08, 2015 at 01:16:11PM -0700, Ian Romanick wrote: >> On 09/06/2015 09:37 AM, Chris Wilson wrote: >> > glCopyTexImage behaves similarly to glReadPixels with respect to the >> > pixel transfer operations. Therefore if any are set we cannot use the >> > simply blit fast paths. >> >> Do we have any test cases that hit these paths? If not, we should >> probably con someone into writing some... > > It looks like piglit should have coverage of PixelZoom in > tests/fbo/fbo-copypix.c, but only it ever uses a zoom of (1, 1). I could > not spot any of the other transfer operations being set for > glCopyTex*Image. There was someone who filed a bug about pixel zoom not behaving well when the source data was > max texture size (and a small zoom factor obviously). I believe that he added patches and tests into some bugzilla entry... maybe even sent them to the list. This would have been about a year ago. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Advertise 65536 for GL_MAX_UNIFORM_BLOCK_SIZE.
On 09/08/2015 10:43 PM, Kenneth Graunke wrote: > On Tuesday, September 08, 2015 08:50:41 PM Ian Romanick wrote: >> On 09/08/2015 06:04 PM, Kenneth Graunke wrote: >>> Our old value of 16384 is the minimum value. DirectX apparently >>> requires 65536 at a minimum; that's also what nVidia and the Intel >>> Windows driver advertise. AMD advertises MAX_INT. >>> >>> Ilia Mirkin noticed that "Shadow Warrior" uses UBOs larger than 16k >>> on Nouveau, which advertises 65536 bytes for this limit. Traces >>> captured on Nouveau don't work on i965 because our lower limit causes >>> the GLSL linker to reject the captured shaders. While this isn't >>> important in and of itself, it does suggest that raising the limit >>> would be beneficial. >>> >>> We can read linear buffers up to 2^27 bytes in size, so raising this >>> should be safe; we could probably even go larger. For now, matching >>> nVidia and Intel/Windows seems like a good plan. >>> >>> We have to reinitialize MaxCombinedUniformComponents as core Mesa will >>> have set it based on a stale value for MaxUniformBlockSize. >>> >>> Signed-off-by: Kenneth Graunke >>> --- >>> src/mesa/drivers/dri/i965/brw_context.c | 9 + >>> 1 file changed, 9 insertions(+) >>> >>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c >>> b/src/mesa/drivers/dri/i965/brw_context.c >>> index 907b2a0..7c1c133 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_context.c >>> +++ b/src/mesa/drivers/dri/i965/brw_context.c >>> @@ -323,6 +323,15 @@ brw_initialize_context_constants(struct brw_context >>> *brw) >>> >>> ctx->Const.StripTextureBorder = true; >>> >>> + ctx->Const.MaxUniformBlockSize = 65536; >>> + for (int i = 0; i < MESA_SHADER_STAGES; i++) { >>> + struct gl_program_constants *prog = &ctx->Const.Program[i]; >>> + prog->MaxUniformBlocks = 12; >> >> Is this actually necessary? It looks like this is that value already >> set in init_program_limits. Either way, this patch is >> >> Reviewed-by: Ian Romanick > > You're right, setting prog->MaxUniformBlocks = 12 is unnecessary. > > We do need to reset prog->MaxCombinedUniformComponents since > init_program_limits computed a value using a stale MaxUniformBlockSize. > > I figured since the three values are related, we may as well just > explicitly initialize them all. But if you'd prefer I take it out, > I'm fine with that too. What do you prefer? təˈmeɪtoʊ / təˈmɑːtoʊ. I mostly wanted to be sure I wasn't missing something. > Thanks for the quick review! > > --Ken > >>> + prog->MaxCombinedUniformComponents = >>> + prog->MaxUniformComponents + >>> + ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; >>> + } >>> + >>> ctx->Const.MaxDualSourceDrawBuffers = 1; >>> ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; >>> ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = >>> max_samplers; >>> >> signature.asc Description: OpenPGP digital signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] i965/fs: Fix hang on IVB and VLV with image format mismatch.
On 09/09/2015 05:30 AM, Francisco Jerez wrote: > Ian Romanick writes: > >> On 09/03/2015 06:03 AM, Francisco Jerez wrote: >>> IVB and VLV hang sporadically when an untyped surface read or write >>> message is used to access a surface of format other than RAW, as may >>> happen when there is a mismatch between the format qualifier of the >>> image uniform and the format of the actual image bound to the >>> pipeline. According to the spec this condition gives undefined >>> results but may not lead to program termination (which is one of the >>> possible outcomes of the hang). Fix it by checking at runtime whether >>> the surface is of the right type. >>> >>> Fixes the "arb_shader_image_load_store.invalid/format mismatch" piglit >>> subtest. >>> >>> Reported-by: Mark Janes >>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91718 >>> CC: mesa-sta...@lists.freedesktop.org >>> --- >>> .../drivers/dri/i965/brw_fs_surface_builder.cpp| 42 >>> +++--- >>> 1 file changed, 38 insertions(+), 4 deletions(-) >>> >>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >>> b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >>> index f60afc9..57ce87f 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >>> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp >>> @@ -313,12 +313,42 @@ namespace { >>> >>> namespace image_validity { >>>/** >>> + * Check whether the bound image is suitable for untyped access. >>> + */ >>> + brw_predicate >>> + emit_untyped_image_check(const fs_builder &bld, const fs_reg &image, >>> + brw_predicate pred) >>> + { >>> + const brw_device_info *devinfo = bld.shader->devinfo; >>> + const fs_reg stride = offset(image, bld, >>> BRW_IMAGE_PARAM_STRIDE_OFFSET); >>> + >>> + if (devinfo->gen == 7 && !devinfo->is_haswell) { >>> +/* Check whether the first stride component (i.e. the Bpp >>> value) >>> + * is greater than four, what on Gen7 indicates that a surface >>> of >>> + * type RAW has been bound for untyped access. Reading or >>> writing >>> + * to a surface of type other than RAW using untyped surface >>> + * messages causes a hang on IVB and VLV. >>> + */ >>> +set_predicate(pred, >>> + bld.CMP(bld.null_reg_ud(), stride, fs_reg(4), >>> + BRW_CONDITIONAL_G)); >>> + >>> +return BRW_PREDICATE_NORMAL; >>> + } else { >>> +/* More recent generations handle the format mismatch >>> + * gracefully. >>> + */ >>> +return pred; >>> + } >>> + } >>> + >>> + /** >>> * Check whether there is an image bound at the given index and write >>> * the comparison result to f0.0. Returns an appropriate predication >>> * mode to use on subsequent image operations. >>> */ >>>brw_predicate >>> - emit_surface_check(const fs_builder &bld, const fs_reg &image) >>> + emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image) >> >> This change seems spurious (and also reasonable). >> > The problem is that this patch introduces a new kind of surface check > applicable to untyped surface reads and writes only, so it would have > been confusing to keep the other surface check which is applicable to > atomics only with its previous rather unspecific name. > >>>{ >>> const brw_device_info *devinfo = bld.shader->devinfo; >>> const fs_reg size = offset(image, bld, >>> BRW_IMAGE_PARAM_SIZE_OFFSET); >>> @@ -895,7 +925,9 @@ namespace brw { >>> * surface read on the result, >>> */ >>> const brw_predicate pred = >>> - emit_bounds_check(bld, image, saddr, dims); >>> + emit_untyped_image_check(bld, image, >>> +emit_bounds_check(bld, image, >>> + saddr, dims)); >> >> These appear to be the only two users of emit_bounds_check... shouldn't >> the bounds still be tested? >> > Yes, they are still. Ah... I completely missed that emit_bounds_check was moved into a parameter of the call to emit_untyped_image_check. Reviewed-by: Ian Romanick >>> >>> /* and they don't know about surface coordinates, we need to >>> * convert them to a raw memory offset. >>> @@ -1041,7 +1073,9 @@ namespace brw { >>> * the surface write on the result, >>> */ >>> const brw_predicate pred = >>> - emit_bounds_check(bld, image, saddr, dims); >>> + emit_untyped_image_check(bld, image, >>> + emit_bounds_check(bld, image, >>> +
Re: [Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback
On Wednesday, September 09, 2015 02:38:56 PM Chris Wilson wrote: > A common problem with using HiZ and multisampling is that surfaces need > to resolved prior to use. Currently i965 does this inside its state > update hook, but that is a comparatively heavyweight operation that need > not be performed so frequently. The obvious solution (and therefore > fraught with dragons) is to move the HiZ/color resolves into the > brw_draw_prims() - however, the resolves are performed using meta and > end up re-entering brw_draw_prims() corrupting the context state of the > original call. To avoid the meta recursion, we can add a new callback > (vbo->resolve()) into the vbo pipeline that is called just before > vbo->draw(). > > Signed-off-by: Chris Wilson > Cc: Brian Paul > Cc: Jordan Justen > Cc: Jason Ekstrand > Cc: Kenneth Graunke > Cc: Francisco Jerez > --- > src/mesa/vbo/vbo.h| 1 + > src/mesa/vbo/vbo_context.c| 19 +++ > src/mesa/vbo/vbo_context.h| 1 + > src/mesa/vbo/vbo_exec_array.c | 1 + > src/mesa/vbo/vbo_exec_draw.c | 5 - > src/mesa/vbo/vbo_save_draw.c | 2 ++ > 6 files changed, 28 insertions(+), 1 deletion(-) What problem are you trying to solve with this patch series? Are you trying to fix bugs? If so, what triggers them? Are you trying to improve performance? If so, do you have any data demonstrating that it benefits some workload? signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [RFC 1/2] nir: Add a new lowering pass nir_lower_vec_and_coalesce
On Sep 8, 2015 23:27, "Eduardo Lima Mitev" wrote: > > This pass will propagate the destination components of a vecN instructions, > as destination of the instructions that define its sources; if certain > conditions are met. > > If all the components of the destination register in the vecN instruction > can be propagated, the instruction is removed. Otherwise, a new, reduced > vecN instruction is emitted with the channels that remained. > > This effectively coalesces registers and reduces indirection. > > By now, this pass will only propagate to ALU instructions, but it could > be extended to include other instructions like load_input intrinsic. > > It also propagates to instructions within the same block as the vecN > instruction. But it could be made to work cross-block in the future, > though there are non-trivial issues with this like considering > registers that are written in different branches of a conditional. > More analysis is needed to correctly cover these cases. > > This pass works on a NIR shader in final form (after SSA), and is > expected to run before nir_lower_vec_to_movs(). > --- > src/glsl/Makefile.sources | 1 + > src/glsl/nir/nir.h| 1 + > src/glsl/nir/nir_lower_vec_and_coalesce.c | 301 ++ > 3 files changed, 303 insertions(+) > create mode 100644 src/glsl/nir/nir_lower_vec_and_coalesce.c > > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources > index c422303..015f242 100644 > --- a/src/glsl/Makefile.sources > +++ b/src/glsl/Makefile.sources > @@ -48,6 +48,7 @@ NIR_FILES = \ > nir/nir_lower_vars_to_ssa.c \ > nir/nir_lower_var_copies.c \ > nir/nir_lower_vec_to_movs.c \ > + nir/nir_lower_vec_and_coalesce.c \ > nir/nir_metadata.c \ > nir/nir_normalize_cubemap_coords.c \ > nir/nir_opt_constant_folding.c \ > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index 3c375f3..6a89f1d 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1786,6 +1786,7 @@ void nir_lower_vars_to_ssa(nir_shader *shader); > void nir_remove_dead_variables(nir_shader *shader); > > void nir_lower_vec_to_movs(nir_shader *shader); > +void nir_lower_vec_and_coalesce(nir_shader *shader); > void nir_lower_alu_to_scalar(nir_shader *shader); > void nir_lower_load_const_to_scalar(nir_shader *shader); > > diff --git a/src/glsl/nir/nir_lower_vec_and_coalesce.c b/src/glsl/nir/nir_lower_vec_and_coalesce.c > new file mode 100644 > index 000..2b21ec1 > --- /dev/null > +++ b/src/glsl/nir/nir_lower_vec_and_coalesce.c > @@ -0,0 +1,301 @@ > +/* > + * Copyright © 2015 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + * Authors: > + *Eduardo Lima Mitev (el...@igalia.com) > + * > + */ > + > +#include "nir.h" > + > +/* > + * Implements a pass that lowers vecN instructions by propagating the > + * components of their destinations, as the destination of the > + * instructions that defines the sources of the vecN instruction. > + * > + * This effectively coalesces registers and reduces indirection. > + * > + * If all the components of the destination register in the vecN > + * instruction can be propagated, the instruction is removed. Otherwise, > + * a new, reduced vecN instruction is emitted with the channels that > + * remained. > + * > + * By now, this pass will only propagate to ALU instructions, but it could > + * be extended to include load_const instructions or some intrinsics like > + * load_input. > + * > + * This pass works on a NIR shader in final form (after SSA), and is > + * expected to run before nir_lower_vec_to_movs(). > + */ > + > +/** > + * Clone an ALU instruction and override the destination with the one given by > + * new_dest. It copies sources from original ALU to the new one, adjusting > + * their swizzles. > + * > + * Ret
[Mesa-dev] [PATCH v2] i915: fixing driver crashes if too few vertices are submitted
Comparison with a signed expression and unsigned value is converted to unsigned value, reason for minus value is interpreted as a big unsigned value. For this case the "for" loop is going into unexpected behavior. v1:Brian Paul: code style fix. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109 Signed-off-by: Marius Predut --- src/mesa/tnl_dd/t_dd_dmatmp.h | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index 7be3954..79de224 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -627,6 +627,9 @@ static void TAG(render_quads_verts)( struct gl_context *ctx, LOCAL_VARS; GLuint j; + if(count % 4 != 0) + return; + INIT(GL_TRIANGLES); for (j = start; j < count-3; j += 4) { @@ -1248,7 +1251,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx, ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS(); } else { - ok = HAVE_TRIANGLES; /* flatshading is ok. */ + ok = HAVE_TRIANGLES && (count % 4 == 0); /* flatshading is ok. */ } break; default: -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback
On 09/09/2015 10:10 AM, Kenneth Graunke wrote: > On Wednesday, September 09, 2015 02:38:56 PM Chris Wilson wrote: >> A common problem with using HiZ and multisampling is that surfaces need >> to resolved prior to use. Currently i965 does this inside its state >> update hook, but that is a comparatively heavyweight operation that need >> not be performed so frequently. The obvious solution (and therefore >> fraught with dragons) is to move the HiZ/color resolves into the >> brw_draw_prims() - however, the resolves are performed using meta and >> end up re-entering brw_draw_prims() corrupting the context state of the >> original call. To avoid the meta recursion, we can add a new callback >> (vbo->resolve()) into the vbo pipeline that is called just before >> vbo->draw(). >> >> Signed-off-by: Chris Wilson >> Cc: Brian Paul >> Cc: Jordan Justen >> Cc: Jason Ekstrand >> Cc: Kenneth Graunke >> Cc: Francisco Jerez >> --- >> src/mesa/vbo/vbo.h| 1 + >> src/mesa/vbo/vbo_context.c| 19 +++ >> src/mesa/vbo/vbo_context.h| 1 + >> src/mesa/vbo/vbo_exec_array.c | 1 + >> src/mesa/vbo/vbo_exec_draw.c | 5 - >> src/mesa/vbo/vbo_save_draw.c | 2 ++ >> 6 files changed, 28 insertions(+), 1 deletion(-) > > What problem are you trying to solve with this patch series? > Are you trying to fix bugs? If so, what triggers them? > Are you trying to improve performance? If so, do you have any data > demonstrating that it benefits some workload? In 0/7 he says, "By moving the current HiZ/color resolves we need before drawing from out of the notify and into the draw itself, we can save a few percent of overhead in OglBatch7 on Atom class devices." Given the amount of change here, I too would like to see some actual performance data. > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev signature.asc Description: OpenPGP digital signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] vbo: Add a predraw resolve callback
On Wednesday, September 09, 2015 10:19:10 AM Ian Romanick wrote: > On 09/09/2015 10:10 AM, Kenneth Graunke wrote: > > On Wednesday, September 09, 2015 02:38:56 PM Chris Wilson wrote: > >> A common problem with using HiZ and multisampling is that surfaces need > >> to resolved prior to use. Currently i965 does this inside its state > >> update hook, but that is a comparatively heavyweight operation that need > >> not be performed so frequently. The obvious solution (and therefore > >> fraught with dragons) is to move the HiZ/color resolves into the > >> brw_draw_prims() - however, the resolves are performed using meta and > >> end up re-entering brw_draw_prims() corrupting the context state of the > >> original call. To avoid the meta recursion, we can add a new callback > >> (vbo->resolve()) into the vbo pipeline that is called just before > >> vbo->draw(). > >> > >> Signed-off-by: Chris Wilson > >> Cc: Brian Paul > >> Cc: Jordan Justen > >> Cc: Jason Ekstrand > >> Cc: Kenneth Graunke > >> Cc: Francisco Jerez > >> --- > >> src/mesa/vbo/vbo.h| 1 + > >> src/mesa/vbo/vbo_context.c| 19 +++ > >> src/mesa/vbo/vbo_context.h| 1 + > >> src/mesa/vbo/vbo_exec_array.c | 1 + > >> src/mesa/vbo/vbo_exec_draw.c | 5 - > >> src/mesa/vbo/vbo_save_draw.c | 2 ++ > >> 6 files changed, 28 insertions(+), 1 deletion(-) > > > > What problem are you trying to solve with this patch series? > > Are you trying to fix bugs? If so, what triggers them? > > Are you trying to improve performance? If so, do you have any data > > demonstrating that it benefits some workload? > > In 0/7 he says, "By moving the current HiZ/color resolves > we need before drawing from out of the notify and into the draw itself, > we can save a few percent of overhead in OglBatch7 on Atom class > devices." Given the amount of change here, I too would like to see some > actual performance data. Ah, sorry - 1-7 were CC'd to me, but 0 wasn't, so it ended up in a different mail folder and I missed it. signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/9] softpipe: Split compute_lambda_lod into two functions
Just a minor nitpick. Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak: > textureQueryLod returns a vec2 with a mipmap information and a > LOD. The latter needs to be not clamped. > --- > src/gallium/drivers/softpipe/sp_tex_sample.c | 55 > > 1 file changed, 39 insertions(+), 16 deletions(-) > > diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c > b/src/gallium/drivers/softpipe/sp_tex_sample.c > index 19188b0..38bdc93 100644 > --- a/src/gallium/drivers/softpipe/sp_tex_sample.c > +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c > @@ -1855,24 +1855,23 @@ compute_lod(const struct pipe_sampler_state *sampler, > } > > > -/* Calculate level of detail for every fragment. > +/* Calculate level of detail for every fragment. The computed value is not > + * clamped into lod_min and lod_max. Probably should be just "clamped to"? > * \param lod_in per-fragment lod_bias or explicit_lod. > * \param lod results per-fragment lod. > */ > static inline void > -compute_lambda_lod(struct sp_sampler_view *sp_sview, > - struct sp_sampler *sp_samp, > - const float s[TGSI_QUAD_SIZE], > - const float t[TGSI_QUAD_SIZE], > - const float p[TGSI_QUAD_SIZE], > - const float lod_in[TGSI_QUAD_SIZE], > - enum tgsi_sampler_control control, > - float lod[TGSI_QUAD_SIZE]) > +compute_lambda_lod_not_clamped(struct sp_sampler_view *sp_sview, > + struct sp_sampler *sp_samp, > + const float s[TGSI_QUAD_SIZE], > + const float t[TGSI_QUAD_SIZE], > + const float p[TGSI_QUAD_SIZE], > + const float lod_in[TGSI_QUAD_SIZE], > + enum tgsi_sampler_control control, > + float lod[TGSI_QUAD_SIZE]) > { > const struct pipe_sampler_state *sampler = &sp_samp->base; > - float lod_bias = sampler->lod_bias; > - float min_lod = sampler->min_lod; > - float max_lod = sampler->max_lod; > + const float lod_bias = sampler->lod_bias; > float lambda; > uint i; > > @@ -1881,24 +1880,23 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, >/* XXX FIXME */ > case tgsi_sampler_derivs_explicit: >lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias; > - lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod); > + lod[0] = lod[1] = lod[2] = lod[3] = lambda; >break; > case tgsi_sampler_lod_bias: >lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias; >for (i = 0; i < TGSI_QUAD_SIZE; i++) { > lod[i] = lambda + lod_in[i]; > - lod[i] = CLAMP(lod[i], min_lod, max_lod); >} >break; > case tgsi_sampler_lod_explicit: >for (i = 0; i < TGSI_QUAD_SIZE; i++) { > - lod[i] = CLAMP(lod_in[i] + lod_bias, min_lod, max_lod); > + lod[i] = lod_in[i] + lod_bias; >} >break; > case tgsi_sampler_lod_zero: > case tgsi_sampler_gather: >/* this is all static state in the sampler really need clamp here? */ > - lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod); > + lod[0] = lod[1] = lod[2] = lod[3] = lod_bias; >break; > default: >assert(0); > @@ -1906,6 +1904,31 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview, > } > } > > +/* Calculate level of detail for every fragment. > + * \param lod_in per-fragment lod_bias or explicit_lod. > + * \param lod results per-fragment lod. > + */ > +static inline void > +compute_lambda_lod(struct sp_sampler_view *sp_sview, > + struct sp_sampler *sp_samp, > + const float s[TGSI_QUAD_SIZE], > + const float t[TGSI_QUAD_SIZE], > + const float p[TGSI_QUAD_SIZE], > + const float lod_in[TGSI_QUAD_SIZE], > + enum tgsi_sampler_control control, > + float lod[TGSI_QUAD_SIZE]) > +{ > + const struct pipe_sampler_state *sampler = &sp_samp->base; > + const float min_lod = sampler->min_lod; > + const float max_lod = sampler->max_lod; > + int i; > + > + compute_lambda_lod_not_clamped(sp_sview, sp_samp, s, t, p, lod_in, > control, lod); > + for (i = 0; i < TGSI_QUAD_SIZE; i++) { > + lod[i] = CLAMP(lod[i], min_lod, max_lod); > + } > +} > + > static inline unsigned > get_gather_component(const float lod_in[TGSI_QUAD_SIZE]) > { > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/3] Move libdrm dependencies out of backend compiler
In trying to separate the backend compiler from the core driver, I ran into a couple of inconsistencies with how the compute shader code is split across files. First patch moves code around to follow our convention. The last two patches moves fs precompile and perf debug around to move libdrm dependencies out of the compiler. Kristian Høgsberg Kristensen (3): i965: Move compute shader code around i965: Move brw_fs_precompile() to brw_wm.c i965: Move perf_debug code to brw_codegen_*_prog() src/mesa/drivers/dri/i965/Makefile.sources | 3 +- src/mesa/drivers/dri/i965/brw_cs.c | 206 +++ src/mesa/drivers/dri/i965/brw_cs.cpp | 535 - src/mesa/drivers/dri/i965/brw_cs.h | 9 + src/mesa/drivers/dri/i965/brw_fs.cpp | 132 +++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 19 - src/mesa/drivers/dri/i965/brw_vs.c | 29 +- src/mesa/drivers/dri/i965/brw_wm.c | 90 - src/mesa/drivers/dri/i965/gen7_cs_state.c | 269 +++ 9 files changed, 664 insertions(+), 628 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_cs.c delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.cpp create mode 100644 src/mesa/drivers/dri/i965/gen7_cs_state.c -- 2.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] i965: Move brw_fs_precompile() to brw_wm.c
All other precompile functions live in the brw_.c files, make fs follow the convention. Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_fs.cpp | 58 --- src/mesa/drivers/dri/i965/brw_wm.c | 59 2 files changed, 59 insertions(+), 58 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e994bb3..801f650 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5229,64 +5229,6 @@ brw_wm_fs_emit(struct brw_context *brw, return g.get_assembly(final_assembly_size); } -extern "C" bool -brw_fs_precompile(struct gl_context *ctx, - struct gl_shader_program *shader_prog, - struct gl_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_wm_prog_key key; - - struct gl_fragment_program *fp = (struct gl_fragment_program *) prog; - struct brw_fragment_program *bfp = brw_fragment_program(fp); - bool program_uses_dfdy = fp->UsesDFdy; - - memset(&key, 0, sizeof(key)); - - if (brw->gen < 6) { - if (fp->UsesKill) - key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; - - if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) - key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; - - /* Just assume depth testing. */ - key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; - } - - if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead & - BRW_FS_VARYING_INPUT_MASK) > 16) - key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS; - - brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base); - - if (fp->Base.InputsRead & VARYING_BIT_POS) { - key.drawable_height = ctx->DrawBuffer->Height; - } - - key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten & - ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | - BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))); - - if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { - key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) || - key.nr_color_regions > 1; - } - - key.program_string_id = bfp->id; - - uint32_t old_prog_offset = brw->wm.base.prog_offset; - struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; - - bool success = brw_codegen_wm_prog(brw, shader_prog, bfp, &key); - - brw->wm.base.prog_offset = old_prog_offset; - brw->wm.prog_data = old_prog_data; - - return success; -} - const unsigned * brw_cs_emit(struct brw_context *brw, void *mem_ctx, diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 6ee9284..8dfa142 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -26,6 +26,7 @@ #include "brw_context.h" #include "brw_wm.h" #include "brw_state.h" +#include "brw_shader.h" #include "main/enums.h" #include "main/formats.h" #include "main/fbobject.h" @@ -642,3 +643,61 @@ brw_upload_wm_prog(struct brw_context *brw) } brw->wm.base.prog_data = &brw->wm.prog_data->base; } + +bool +brw_fs_precompile(struct gl_context *ctx, + struct gl_shader_program *shader_prog, + struct gl_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_wm_prog_key key; + + struct gl_fragment_program *fp = (struct gl_fragment_program *) prog; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + bool program_uses_dfdy = fp->UsesDFdy; + + memset(&key, 0, sizeof(key)); + + if (brw->gen < 6) { + if (fp->UsesKill) + key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* Just assume depth testing. */ + key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + } + + if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS; + + brw_setup_tex_for_precompile(brw, &key.tex, &fp->Base); + + if (fp->Base.InputsRead & VARYING_BIT_POS) { + key.drawable_height = ctx->DrawBuffer->Height; + } + + key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten & + ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | + BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))); + + if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer) || + key.nr_color_regions > 1; + } + + key.program_string_id = bfp->id; + + uint32_t old_prog_offset = brw->wm.base.prog_offset; + struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; + + bool success
[Mesa-dev] [PATCH 3/3] i965: Move perf_debug code to brw_codegen_*_prog()
We're trying to avoid a libdrm dependency in the core compiler, so let's move the perf_debug code one level up from the brw_*_emit() helpers to the brw_codegen_*_prog() helpers. Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/brw_cs.c | 31 - src/mesa/drivers/dri/i965/brw_fs.cpp | 41 -- src/mesa/drivers/dri/i965/brw_vec4.cpp | 19 src/mesa/drivers/dri/i965/brw_vs.c | 29 +++- src/mesa/drivers/dri/i965/brw_wm.c | 31 - 5 files changed, 75 insertions(+), 76 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 24e4b16..8d9d9d2 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -63,8 +63,11 @@ brw_codegen_cs_prog(struct brw_context *brw, void *mem_ctx = ralloc_context(NULL); GLuint program_size; struct brw_cs_prog_data prog_data; + bool start_busy = false; + double start_time = 0; - struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + struct brw_shader *cs = + (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE]; assert (cs); memset(&prog_data, 0, sizeof(prog_data)); @@ -73,8 +76,8 @@ brw_codegen_cs_prog(struct brw_context *brw, * prog_data associated with the compiled program, and which will be freed * by the state cache. */ - int param_count = cs->num_uniform_components + - cs->NumImages * BRW_IMAGE_PARAM_SIZE; + int param_count = cs->base.num_uniform_components + + cs->base.NumImages * BRW_IMAGE_PARAM_SIZE; /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; @@ -83,9 +86,15 @@ brw_codegen_cs_prog(struct brw_context *brw, prog_data.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.image_param = - rzalloc_array(NULL, struct brw_image_param, cs->NumImages); + rzalloc_array(NULL, struct brw_image_param, cs->base.NumImages); prog_data.base.nr_params = param_count; - prog_data.base.nr_image_params = cs->NumImages; + prog_data.base.nr_image_params = cs->base.NumImages; + + if (unlikely(brw->perf_debug)) { + start_busy = (brw->batch.last_bo && +drm_intel_bo_busy(brw->batch.last_bo)); + start_time = get_time(); + } program = brw_cs_emit(brw, mem_ctx, key, &prog_data, &cp->program, prog, &program_size); @@ -94,6 +103,18 @@ brw_codegen_cs_prog(struct brw_context *brw, return false; } + if (unlikely(brw->perf_debug) && cs) { + if (cs->compiled_once) { + _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles"); + } + cs->compiled_once = true; + + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { + perf_debug("CS compile took %.03f ms and stalled the GPU\n", +(get_time() - start_time) * 1000); + } + } + if (prog_data.base.total_scratch) { brw_get_scratch_bo(brw, &brw->cs.base.scratch_bo, prog_data.base.total_scratch * brw->max_cs_threads); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 801f650..fcd0c25 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5129,15 +5129,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct gl_shader_program *prog, unsigned *final_assembly_size) { - bool start_busy = false; - double start_time = 0; - - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && -drm_intel_bo_busy(brw->batch.last_bo)); - start_time = get_time(); - } - struct brw_shader *shader = NULL; if (prog) shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; @@ -5215,17 +5206,6 @@ brw_wm_fs_emit(struct brw_context *brw, if (simd16_cfg) prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16); - if (unlikely(brw->perf_debug) && shader) { - if (shader->compiled_once) - brw_wm_debug_recompile(brw, prog, key); - shader->compiled_once = true; - - if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { - perf_debug("FS compile took %.03f ms and stalled the GPU\n", -(get_time() - start_time) * 1000); - } - } - return g.get_assembly(final_assembly_size); } @@ -5238,15 +5218,6 @@ brw_cs_emit(struct brw_context *brw, struct gl_shader_program *prog, unsigned *final_assembly_size) { - bool start_busy = false; - double start_time = 0; - - if (unlikely(brw->perf_debug)) { - start_busy = (brw->batch.last_bo && -drm_intel_bo_busy(brw->batch.last_bo));
[Mesa-dev] [PATCH 1/3] i965: Move compute shader code around
This moves the compute shader code around in order to make the way the code is split up more consistent. There should be no functional changes. Typically we have a few files per stage: brw_vs.c, brw_wm.c brw_gs.c: code to drive code generation and implement precompiling and cache search. genX__state.c gen specific implementation of the state emission for the shader stage. The brw_*_emit() functions are all in the same files as the visitor classes they use (with the exception of VS, which may use either vec4 or fs). To make compute follow this convention, we move the brw_cs_emit() function into brw_fs.cpp. We can then rename brw_cs.cpp to brw_cs.c and do this in C like the other similar files. Finally, move state setup and atoms to gen7_cs_state.c. Signed-off-by: Kristian Høgsberg Kristensen --- src/mesa/drivers/dri/i965/Makefile.sources | 3 +- src/mesa/drivers/dri/i965/brw_cs.c | 185 ++ src/mesa/drivers/dri/i965/brw_cs.cpp | 535 - src/mesa/drivers/dri/i965/brw_cs.h | 9 + src/mesa/drivers/dri/i965/brw_fs.cpp | 107 ++ src/mesa/drivers/dri/i965/gen7_cs_state.c | 269 +++ 6 files changed, 572 insertions(+), 536 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_cs.c delete mode 100644 src/mesa/drivers/dri/i965/brw_cs.cpp create mode 100644 src/mesa/drivers/dri/i965/gen7_cs_state.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index dfdad75..b53802c 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -21,7 +21,7 @@ i965_FILES = \ brw_conditional_render.c \ brw_context.c \ brw_context.h \ - brw_cs.cpp \ + brw_cs.c \ brw_cs.h \ brw_cubemap_normalize.cpp \ brw_curbe.c \ @@ -163,6 +163,7 @@ i965_FILES = \ gen6_wm_state.c \ gen7_blorp.cpp \ gen7_blorp.h \ + gen7_cs_state.c \ gen7_disable.c \ gen7_gs_state.c \ gen7_misc_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c new file mode 100644 index 000..24e4b16 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2014 - 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include "util/ralloc.h" +#include "brw_context.h" +#include "brw_cs.h" +#include "brw_eu.h" +#include "brw_shader.h" +#include "intel_mipmap_tree.h" +#include "brw_state.h" +#include "intel_batchbuffer.h" + +bool +brw_cs_prog_data_compare(const void *in_a, const void *in_b) +{ + const struct brw_cs_prog_data *a = + (const struct brw_cs_prog_data *)in_a; + const struct brw_cs_prog_data *b = + (const struct brw_cs_prog_data *)in_b; + + /* Compare the base structure. */ + if (!brw_stage_prog_data_compare(&a->base, &b->base)) + return false; + + /* Compare the rest of the structure. */ + const unsigned offset = sizeof(struct brw_stage_prog_data); + if (memcmp(((char *) a) + offset, ((char *) b) + offset, + sizeof(struct brw_cs_prog_data) - offset)) + return false; + + return true; +} + +static bool +brw_codegen_cs_prog(struct brw_context *brw, +struct gl_shader_program *prog, +struct brw_compute_program *cp, +struct brw_cs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + const GLuint *program; + void *mem_ctx = ralloc_context(NULL); + GLuint program_size; + struct brw_cs_prog_data prog_data; + + struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + assert (cs); + + memset(&prog_data, 0, sizeof(prog_data)); + + /* Allocate the references to the uniforms that will end up in the +* prog_data associated wit
Re: [Mesa-dev] [PATCH] nvc0: keep track of cb bindings per buffer, use for upload settings
On Wed, Sep 9, 2015 at 3:17 AM, Ilia Mirkin wrote: > CB updates to bound buffers need to go through the CB_DATA endpoints, > otherwise the shader may not notice that the updates happened. > Furthermore, these updates have to go in to the same address as the > bound buffer, otherwise, again, the shader may not notice updates. > > So we keep track of all the places where a constbuf is bound, and > iterate over all of them when updating data. If a binding is found that > encompasses the region to be updated, then we use the settings of that > binding for the upload. Otherwise we upload as a regular data update. > > This fixes piglit 'arb_uniform_buffer_object-rendering offset' as well > as blurriness in Witcher2. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91890 > Signed-off-by: Ilia Mirkin > Cc: "11.0" > --- > src/gallium/drivers/nouveau/nouveau_buffer.c | 4 +- > src/gallium/drivers/nouveau/nouveau_buffer.h | 2 + > src/gallium/drivers/nouveau/nouveau_context.h | 5 ++- > src/gallium/drivers/nouveau/nvc0/nvc0_context.h| 8 ++-- > src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 2 + > .../drivers/nouveau/nvc0/nvc0_state_validate.c | 3 +- > src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 46 > -- > 7 files changed, 58 insertions(+), 12 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c > b/src/gallium/drivers/nouveau/nouveau_buffer.c > index 912b778..4937dae 100644 > --- a/src/gallium/drivers/nouveau/nouveau_buffer.c > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c > @@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct > nouveau_transfer *tx, >nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain, > tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size); > else > - if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb) > - nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0, > + if (nv->push_cb && can_cb) > + nv->push_cb(nv, buf, >base, size / 4, (const uint32_t *)data); > else >nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, > data); > diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h > b/src/gallium/drivers/nouveau/nouveau_buffer.h > index 7e6a6cc..d45bf7a 100644 > --- a/src/gallium/drivers/nouveau/nouveau_buffer.h > +++ b/src/gallium/drivers/nouveau/nouveau_buffer.h > @@ -41,6 +41,8 @@ struct nv04_resource { > uint8_t status; > uint8_t domain; > > + uint16_t cb_bindings[6]; /* per-shader per-slot bindings */ > + > struct nouveau_fence *fence; > struct nouveau_fence *fence_wr; > > diff --git a/src/gallium/drivers/nouveau/nouveau_context.h > b/src/gallium/drivers/nouveau/nouveau_context.h > index 24deb7e..decb271 100644 > --- a/src/gallium/drivers/nouveau/nouveau_context.h > +++ b/src/gallium/drivers/nouveau/nouveau_context.h > @@ -6,6 +6,8 @@ > > #define NOUVEAU_MAX_SCRATCH_BUFS 4 > > +struct nv04_resource; > + > struct nouveau_context { > struct pipe_context pipe; > struct nouveau_screen *screen; > @@ -23,8 +25,7 @@ struct nouveau_context { > unsigned, const void *); > /* base, size refer to the whole constant buffer */ > void (*push_cb)(struct nouveau_context *, > - struct nouveau_bo *, unsigned domain, > - unsigned base, unsigned size, > + struct nv04_resource *, > unsigned offset, unsigned words, const uint32_t *); > > /* @return: @ref reduced by nr of references found in context */ > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > index 6ed79cf..30bee3a 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > @@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv, >struct nouveau_bo *dst, unsigned offset, unsigned > domain, >unsigned size, const void *data); > void > -nvc0_cb_push(struct nouveau_context *, > - struct nouveau_bo *bo, unsigned domain, > - unsigned base, unsigned size, > - unsigned offset, unsigned words, const uint32_t *data); > +nvc0_cb_bo_push(struct nouveau_context *, > +struct nouveau_bo *bo, unsigned domain, > +unsigned base, unsigned size, > +unsigned offset, unsigned words, const uint32_t *data); > > /* nvc0_vbo.c */ > void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *); > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > index ee29912..c5bfd03 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c > @@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct
Re: [Mesa-dev] [PATCH 5/9] softpipe: Split code getting a filter into separate function
Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak: > This function will be later used by textureQueryLod. The > img_filter_func are optional, because textureQueryLod will not need > them. > --- > src/gallium/drivers/softpipe/sp_tex_sample.c | 53 > +++- > 1 file changed, 37 insertions(+), 16 deletions(-) > > diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c > b/src/gallium/drivers/softpipe/sp_tex_sample.c > index cd4a659..0a3fc20 100644 > --- a/src/gallium/drivers/softpipe/sp_tex_sample.c > +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c > @@ -2912,6 +2912,42 @@ get_img_filter(const struct sp_sampler_view *sp_sview, > } > } > > +/** > + * Get mip filter, and optionally both img min filter and img mag filter > + */ > +static void > +get_filters(struct sp_sampler_view *sp_sview, > +struct sp_sampler *sp_samp, > +enum tgsi_sampler_control control, > +struct sp_mip **mip, > +img_filter_func *min, > +img_filter_func *mag) > +{ > + assert(mip); > + if (control == tgsi_sampler_gather) { > + *mip = &mip_nearest; > + if (min) > + *min = get_img_filter(sp_sview, &sp_samp->base, > PIPE_TEX_FILTER_LINEAR, true); > + } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { > + *mip = &mip_linear_2d_linear_repeat_POT; > + } > + else { > + *mip = &sp_samp->mip; > + if (min || mag) { I think this (and the following conditions) could be simplified a bit since a caller should only either request both or none of min/mag. So something like if (min) { assert(mag); ... Roland > + img_filter_func tmp_filter = get_img_filter(sp_sview, > &sp_samp->base, sp_samp->min_img_filter, false); > + if (min) > +*min = tmp_filter; > + if (mag) { > +if (sp_samp->min_mag_equal) { > + *mag = tmp_filter; > +} > +else { > + *mag = get_img_filter(sp_sview, &sp_samp->base, > sp_samp->base.mag_img_filter, false); > +} > + } > + } > + } > +} > > static void > sample_mip(struct sp_sampler_view *sp_sview, > @@ -2928,22 +2964,7 @@ sample_mip(struct sp_sampler_view *sp_sview, > img_filter_func min_img_filter = NULL; > img_filter_func mag_img_filter = NULL; > > - if (filt_args->control == tgsi_sampler_gather) { > - mip = &mip_nearest; > - min_img_filter = get_img_filter(sp_sview, &sp_samp->base, > PIPE_TEX_FILTER_LINEAR, true); > - } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) { > - mip = &mip_linear_2d_linear_repeat_POT; > - } > - else { > - mip = &sp_samp->mip; > - min_img_filter = get_img_filter(sp_sview, &sp_samp->base, > sp_samp->min_img_filter, false); > - if (sp_samp->min_mag_equal) { > - mag_img_filter = min_img_filter; > - } > - else { > - mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, > sp_samp->base.mag_img_filter, false); > - } > - } > + get_filters(sp_sview, sp_samp, filt_args->control, &mip, &min_img_filter, > &mag_img_filter); > > mip->filter(sp_sview, sp_samp, min_img_filter, mag_img_filter, > s, t, p, c0, lod, filt_args, rgba); > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/2] egl/dri2: Close file descriptor on error.
On 9 September 2015 at 01:43, Ian Romanick wrote: > On 09/07/2015 01:58 AM, Emil Velikov wrote: >> From: Matt Turner >> >> v2: [Emil Velikov] >> Rework the error path to a common goto, close only if we own the fd. >> >> Signed-off-by: Emil Velikov >> --- >> src/egl/drivers/dri2/platform_drm.c | 27 ++- >> 1 file changed, 14 insertions(+), 13 deletions(-) >> >> diff --git a/src/egl/drivers/dri2/platform_drm.c >> b/src/egl/drivers/dri2/platform_drm.c >> index eda5087..e8fe7ea 100644 >> --- a/src/egl/drivers/dri2/platform_drm.c >> +++ b/src/egl/drivers/dri2/platform_drm.c >> @@ -623,26 +623,20 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) >>dri2_dpy->own_device = 1; >>gbm = gbm_create_device(fd); >>if (gbm == NULL) >> - return EGL_FALSE; >> + goto cleanup; >> } >> >> - if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) { >> - free(dri2_dpy); >> - return EGL_FALSE; >> - } >> + if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) >> + goto cleanup; >> >> dri2_dpy->gbm_dri = gbm_dri_device(gbm); >> - if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) { >> - free(dri2_dpy); >> - return EGL_FALSE; >> - } >> + if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) >> + goto cleanup; >> >> if (fd < 0) { >>fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3); >> - if (fd < 0) { >> - free(dri2_dpy); >> - return EGL_FALSE; >> - } >> + if (fd < 0) >> + goto cleanup; > > Shouldn't this fd also get closed eventually? If we decide to catch > other failures (e.g., memory allocation failures) later in this function? > By 'this fd' I assume you mean the dup'd fd as opposed to the device_fd ? Yes we should, Matt's patch was doing the correct thing as I misread the DUPFD part in F_DUPFD_CLOEXEC :-\ Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/7] i965: Remove redundant update_state from intelReadPixels()
Regardless of what happens with the rest of this series, I think this is a reasonable cleanup. This patch is Reviewed-by: Ian Romanick On 09/09/2015 06:38 AM, Chris Wilson wrote: > The very first operation performed by _mesa_readpixels() is to call > _mesa_update_state() - we do not need to do so ourselves. > > Signed-off-by: Chris Wilson > Cc: Jordan Justen > Cc: Jason Ekstrand > Cc: Kenneth Graunke > Cc: Francisco Jerez > --- > src/mesa/drivers/dri/i965/intel_pixel_read.c | 8 > 1 file changed, 8 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c > b/src/mesa/drivers/dri/i965/intel_pixel_read.c > index eb366cd..16dcc55 100644 > --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c > +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c > @@ -269,14 +269,6 @@ intelReadPixels(struct gl_context * ctx, > intel_prepare_render(brw); > brw->front_buffer_dirty = dirty; > > - /* Update Mesa state before calling _mesa_readpixels(). > -* XXX this may not be needed since ReadPixels no longer uses the > -* span code. > -*/ > - > - if (ctx->NewState) > - _mesa_update_state(ctx); > - > _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); > > /* There's an intel_prepare_render() call in intelSpanRenderStart(). */ > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 91793] the "bin" directory is not created
https://bugs.freedesktop.org/show_bug.cgi?id=91793 --- Comment #4 from Emil Velikov --- Just had a closer look and it seems that the wgl tests are only build with cmake. Situation seems slightly confusing atm, but if you feel like helping out that'll be appreciated :) -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/7] i965: Remove redundant test for NULL intel_texture_object
On 09/09/2015 06:39 AM, Chris Wilson wrote: > Having checked whether the base class (gl_texture_object) is NULL, we > know that intel_texture_object itself cannot be NULL. > > Signed-off-by: Chris Wilson > Cc: Jordan Justen > Cc: Jason Ekstrand > Cc: Kenneth Graunke > Cc: Francisco Jerez > --- > src/mesa/drivers/dri/i965/brw_draw.c | 7 +-- > 1 file changed, 5 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > b/src/mesa/drivers/dri/i965/brw_draw.c > index 0ffcc24..3cea331 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw.c > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > @@ -346,7 +346,6 @@ static void > brw_draw_resolve(struct gl_context *ctx) > { > struct brw_context *brw = brw_context(ctx); > - struct intel_texture_object *tex_obj; > struct intel_renderbuffer *depth_irb; > > /* Resolve the depth buffer's HiZ buffer. */ > @@ -358,11 +357,15 @@ brw_draw_resolve(struct gl_context *ctx) > if (brw->NewGLState & _NEW_TEXTURE) { >int maxEnabledUnit = brw->ctx.Texture._MaxEnabledTexImageUnit; >for (int i = 0; i <= maxEnabledUnit; i++) { > + struct intel_texture_object *tex_obj; > + > if (!brw->ctx.Texture.Unit[i]._Current) > continue; > + > tex_obj = intel_texture_object(brw->ctx.Texture.Unit[i]._Current); I'd be in favor of some form of combining the assignment and the declaration. Either move the declaration down here, or move the assignment above. If you choose the latter, the if-statement below could remain unchanged while the if-statement above is just removed... since intel_texture_object is just a cast. > - if (!tex_obj || !tex_obj->mt) > + if (!tex_obj->mt) > continue; > + > intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); > intel_miptree_resolve_color(brw, tex_obj->mt); > brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/9] softpipe: Split 3D to 2D coords conversion into separate function.
Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak: > This is to avoid tying the conversion to sampling - textureQueryLod > will need to do the conversion too, but it does not do any sampling. > > So instead of a "sample" vfunc, there is a "convert" vfunc. The > drawback of this approach is that a "noop" convert copies 3 arrays of > floats. Would be nice to avoid it in some clean way. Well yes I'm not sure it's such a great idea. These functions were there initially to make things faster (by skipping work not necessary without conditions basically), but it looks to me like that's not really happening here (because the common case, non cube map, does extra work for nothing - surely a perfectly predictable condition would be faster than copying stuff around). So, one idea to avoid this would be to just forget about those function pointers there. Instead of get_samples (or now convert_coords) in sview, just set needs_cube_convert or something like that in sview, then use a condition based on needs_cube_convert to either invoke sample_mip directly or cube coord conversion + sample mip in the appropriate place(s). Either way though looks acceptable to me. Roland > --- > src/gallium/drivers/softpipe/sp_tex_sample.c | 73 > +--- > src/gallium/drivers/softpipe/sp_tex_sample.h | 20 > 2 files changed, 54 insertions(+), 39 deletions(-) > > diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c > b/src/gallium/drivers/softpipe/sp_tex_sample.c > index 0a3fc20..cdec984 100644 > --- a/src/gallium/drivers/softpipe/sp_tex_sample.c > +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c > @@ -2981,29 +2981,36 @@ sample_mip(struct sp_sampler_view *sp_sview, > > } > > +static void > +convert_noop(struct sp_sampler_view *sp_sview, > + struct sp_sampler *sp_samp, > + const float s[TGSI_QUAD_SIZE], > + const float t[TGSI_QUAD_SIZE], > + const float p[TGSI_QUAD_SIZE], > + const float c0[TGSI_QUAD_SIZE], > + float [TGSI_QUAD_SIZE], > + float [TGSI_QUAD_SIZE], > + float [TGSI_QUAD_SIZE]) > +{ > + const size_t len = sizeof(float) * TGSI_QUAD_SIZE; > + > + memcpy(, s, len); > + memcpy(, t, len); > + memcpy(, p, len); > +} > > -/** > - * Use 3D texcoords to choose a cube face, then sample the 2D cube faces. > - * Put face info into the sampler faces[] array. > - */ > static void > -sample_cube(struct sp_sampler_view *sp_sview, > -struct sp_sampler *sp_samp, > -const float s[TGSI_QUAD_SIZE], > -const float t[TGSI_QUAD_SIZE], > -const float p[TGSI_QUAD_SIZE], > -const float c0[TGSI_QUAD_SIZE], > -const float c1[TGSI_QUAD_SIZE], > -const struct filter_args *filt_args, > -float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) > +convert_cube(struct sp_sampler_view *sp_sview, > + struct sp_sampler *sp_samp, > + const float s[TGSI_QUAD_SIZE], > + const float t[TGSI_QUAD_SIZE], > + const float p[TGSI_QUAD_SIZE], > + const float c0[TGSI_QUAD_SIZE], > + float [TGSI_QUAD_SIZE], > + float [TGSI_QUAD_SIZE], > + float [TGSI_QUAD_SIZE]) > { > unsigned j; > - float [4], [4]; > - > - /* Not actually used, but the intermediate steps that do the > -* dereferencing don't know it. > -*/ > - static float [4] = { 0, 0, 0, 0 }; > > [0] = c0[0]; > [1] = c0[1]; > @@ -3071,8 +3078,6 @@ sample_cube(struct sp_sampler_view *sp_sview, > } >} > } > - > - sample_mip(sp_sview, sp_samp, , , , c0, c1, filt_args, rgba); > } > > > @@ -3393,9 +3398,9 @@ softpipe_create_sampler_view(struct pipe_context *pipe, > >if (view->target == PIPE_TEXTURE_CUBE || >view->target == PIPE_TEXTURE_CUBE_ARRAY) > - sview->get_samples = sample_cube; > + sview->convert_coords = convert_cube; >else { > - sview->get_samples = sample_mip; > + sview->convert_coords = convert_noop; >} >sview->pot2d = spr->pot && > (view->target == PIPE_TEXTURE_2D || > @@ -3440,13 +3445,22 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler, > enum tgsi_sampler_control control, > float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) > { > - struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler; > + struct sp_tgsi_sampler *sp_tgsi_samp = (struct sp_tgsi_sampler > *)tgsi_sampler; > + struct sp_sampler_view *sp_sview; > + struct sp_sampler *sp_samp; > struct filter_args filt_args; > + float cs[TGSI_QUAD_SIZE]; > + float ct[TGSI_QUAD_SIZE]; > + float cp[TGSI_QUAD_SIZE]; > + > assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS); > assert(sampler_index < PIPE_MAX_SAMPLERS); > - assert
[Mesa-dev] [PATCH] meta: Use result of texture coordinate clamping operation
From: Ian Romanick Previously the result of the complicated clamp() expression just dropped on the floor: clamp does not modify any of its parameters. Looking at the surrounding code, I believe this is supposed to modify the value of tex_coord. This change (along with a change to avoid the use of brw_blorp_framebuffer) does not affect any existing piglit tests. I'm not sure what this clamp is trying to accomplish, so I'm not sure how to write a test to exercise this path. I also noticed another bug in this code. There is no way the array texture case could possibly work. This will generate code for the TEXEL_FETCH macro like: #define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord), sample_map[int(2 * fract(coord.x))]); Since the coord parameter of this macro is a vec2 at all invocations, no expansion of this macro will even compile. Signed-off-by: Ian Romanick Cc: Anuj Phogat Cc: Topi Pohjolainen Cc: Jordan Justen --- src/mesa/drivers/common/meta_blit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index 71d18de..a41fe42 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, " vec2 tex_coord = texCoords - s_0_offset;\n" "\n" " tex_coord *= scale;\n" - " clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n" - " clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n" + " tex_coord.x = clamp(tex_coord.x, 0.0f, scale.x * src_width - 1.0f);\n" + " tex_coord.y = clamp(tex_coord.y, 0.0f, scale.y * src_height - 1.0f);\n" " interp = fract(tex_coord);\n" " tex_coord = ivec2(tex_coord) * scale_inv;\n" "\n" -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] meta: Use result of texture coordinate clamping operation
On 09/09/2015 11:13 AM, Ian Romanick wrote: > From: Ian Romanick > > Previously the result of the complicated clamp() expression just dropped > on the floor: clamp does not modify any of its parameters. Looking at > the surrounding code, I believe this is supposed to modify the value of > tex_coord. > > This change (along with a change to avoid the use of > brw_blorp_framebuffer) does not affect any existing piglit tests. I'm > not sure what this clamp is trying to accomplish, so I'm not sure how to > write a test to exercise this path. I just noticed that the piglit test contains the same incorrect shader as meta for the "reference" implementation. > I also noticed another bug in this code. There is no way the array > texture case could possibly work. This will generate code for the > TEXEL_FETCH macro like: > > #define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord), > sample_map[int(2 * fract(coord.x))]); > > Since the coord parameter of this macro is a vec2 at all invocations, no > expansion of this macro will even compile. > > Signed-off-by: Ian Romanick > Cc: Anuj Phogat > Cc: Topi Pohjolainen > Cc: Jordan Justen > --- > src/mesa/drivers/common/meta_blit.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/common/meta_blit.c > b/src/mesa/drivers/common/meta_blit.c > index 71d18de..a41fe42 100644 > --- a/src/mesa/drivers/common/meta_blit.c > +++ b/src/mesa/drivers/common/meta_blit.c > @@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, > " vec2 tex_coord = texCoords - > s_0_offset;\n" > "\n" > " tex_coord *= scale;\n" > - " clamp(tex_coord.x, 0.0f, scale.x * > src_width - 1.0f);\n" > - " clamp(tex_coord.y, 0.0f, scale.y * > src_height - 1.0f);\n" > + " tex_coord.x = clamp(tex_coord.x, 0.0f, > scale.x * src_width - 1.0f);\n" > + " tex_coord.y = clamp(tex_coord.y, 0.0f, > scale.y * src_height - 1.0f);\n" > " interp = fract(tex_coord);\n" > " tex_coord = ivec2(tex_coord) * > scale_inv;\n" > "\n" ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/9] softpipe: Add functions for computing mipmap level
Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak: > These functions will be used by textureQueryLod. > --- > src/gallium/drivers/softpipe/sp_tex_sample.c | 100 > +-- > src/gallium/drivers/softpipe/sp_tex_sample.h | 7 ++ > 2 files changed, 101 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c > b/src/gallium/drivers/softpipe/sp_tex_sample.c > index cdec984..6e639e0 100644 > --- a/src/gallium/drivers/softpipe/sp_tex_sample.c > +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c > @@ -1937,6 +1937,38 @@ get_gather_component(const float > lod_in[TGSI_QUAD_SIZE]) > } > > static void > +clamp_lod(const struct sp_sampler_view *sp_sview, > + const struct sp_sampler *sp_samp, > + const float lod[TGSI_QUAD_SIZE], > + float clamped[TGSI_QUAD_SIZE]) > +{ > + const float min_lod = sp_samp->base.min_lod; > + const float max_lod = sp_samp->base.max_lod; > + const float min_level = sp_sview->base.u.tex.first_level; > + const float max_level = sp_sview->base.u.tex.last_level; > + int i; > + > + for (i = 0; i < TGSI_QUAD_SIZE; i++) { > + float cl = lod[i]; > + > + cl = CLAMP(cl, min_lod, max_lod); > + /* XXX: Is min_level ever different from 0? I think the comment is bogus. min_level can easily be different from 0, at least when using ARB_texture_view afaik (playing around with base level might also cause this to be non-zero, though I'm not too familiar with what the state tracker does for those legacy GL weirdness things). > + */ > + cl = CLAMP(cl, 0, max_level - min_level); > + clamped[i] = cl; > + } > +} > + > +static void > +mip_level_linear(struct sp_sampler_view *sp_sview, > + struct sp_sampler *sp_samp, > + const float lod[TGSI_QUAD_SIZE], > + float level[TGSI_QUAD_SIZE]) > +{ > + clamp_lod(sp_sview, sp_samp, lod, level); > +} > + > +static void > mip_filter_linear(struct sp_sampler_view *sp_sview, >struct sp_sampler *sp_samp, >img_filter_func min_filter, > @@ -1998,6 +2030,23 @@ mip_filter_linear(struct sp_sampler_view *sp_sview, > } > > > +static void > +mip_level_nearest(struct sp_sampler_view *sp_sview, > + struct sp_sampler *sp_samp, > + const float lod[TGSI_QUAD_SIZE], > + float level[TGSI_QUAD_SIZE]) > +{ > + const int first_level = sp_sview->base.u.tex.first_level; > + int j; > + > + clamp_lod(sp_sview, sp_samp, lod, level); > + for (j = 0; j < TGSI_QUAD_SIZE; j++) > + /* TODO: It should rather be: > + * level[j] = first_level + ceil(level[j] + 0.5F) - 1.0F; > + */ > + level[j] = first_level + (int)(level[j] + 0.5F); > +} > + > /** > * Compute nearest mipmap level from texcoords. > * Then sample the texture level for four elements of a quad. > @@ -2050,6 +2099,19 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview, > > > static void > +mip_level_none(struct sp_sampler_view *sp_sview, > + struct sp_sampler *sp_samp, > + const float lod[TGSI_QUAD_SIZE], > + float level[TGSI_QUAD_SIZE]) > +{ > + int j; > + > + for (j = 0; j < TGSI_QUAD_SIZE; j++) { > + level[j] = sp_sview->base.u.tex.first_level; > + } > +} > + > +static void > mip_filter_none(struct sp_sampler_view *sp_sview, > struct sp_sampler *sp_samp, > img_filter_func min_filter, > @@ -2088,6 +2150,15 @@ mip_filter_none(struct sp_sampler_view *sp_sview, > > > static void > +mip_level_none_no_filter_select(struct sp_sampler_view *sp_sview, > +struct sp_sampler *sp_samp, > +const float lod[TGSI_QUAD_SIZE], > +float level[TGSI_QUAD_SIZE]) > +{ > + mip_level_none(sp_sview, sp_samp, lod, level); > +} > + > +static void > mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview, > struct sp_sampler *sp_samp, > img_filter_func min_filter, > @@ -2339,6 +2410,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, > } > > > +static void > +mip_level_linear_aniso(struct sp_sampler_view *sp_sview, > + struct sp_sampler *sp_samp, > + const float lod[TGSI_QUAD_SIZE], > + float level[TGSI_QUAD_SIZE]) > +{ > + mip_level_linear(sp_sview, sp_samp, lod, level); > +} > + > /** > * Sample 2D texture using an anisotropic filter. > */ > @@ -2450,6 +2530,14 @@ mip_filter_linear_aniso(struct sp_sampler_view > *sp_sview, > } > } > > +static void > +mip_level_linear_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview, > + struct sp_sampler *sp_samp, > + const float lod[TGSI_QUAD_SIZE], > +
Re: [Mesa-dev] [PATCH 7/9] softpipe: Add functions for computing mipmap level
On Wed, Sep 9, 2015 at 2:17 PM, Roland Scheidegger wrote: > Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak: >> These functions will be used by textureQueryLod. >> --- >> src/gallium/drivers/softpipe/sp_tex_sample.c | 100 >> +-- >> src/gallium/drivers/softpipe/sp_tex_sample.h | 7 ++ >> 2 files changed, 101 insertions(+), 6 deletions(-) >> >> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c >> b/src/gallium/drivers/softpipe/sp_tex_sample.c >> index cdec984..6e639e0 100644 >> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c >> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c >> @@ -1937,6 +1937,38 @@ get_gather_component(const float >> lod_in[TGSI_QUAD_SIZE]) >> } >> >> static void >> +clamp_lod(const struct sp_sampler_view *sp_sview, >> + const struct sp_sampler *sp_samp, >> + const float lod[TGSI_QUAD_SIZE], >> + float clamped[TGSI_QUAD_SIZE]) >> +{ >> + const float min_lod = sp_samp->base.min_lod; >> + const float max_lod = sp_samp->base.max_lod; >> + const float min_level = sp_sview->base.u.tex.first_level; >> + const float max_level = sp_sview->base.u.tex.last_level; >> + int i; >> + >> + for (i = 0; i < TGSI_QUAD_SIZE; i++) { >> + float cl = lod[i]; >> + >> + cl = CLAMP(cl, min_lod, max_lod); >> + /* XXX: Is min_level ever different from 0? > I think the comment is bogus. min_level can easily be different from 0, > at least when using ARB_texture_view afaik (playing around with base > level might also cause this to be non-zero, though I'm not too familiar > with what the state tracker does for those legacy GL weirdness things). Either setting the base level or a texture view (or both!) can cause the min level to be 0. > >> + */ >> + cl = CLAMP(cl, 0, max_level - min_level); >> + clamped[i] = cl; >> + } >> +} >> + >> +static void >> +mip_level_linear(struct sp_sampler_view *sp_sview, >> + struct sp_sampler *sp_samp, >> + const float lod[TGSI_QUAD_SIZE], >> + float level[TGSI_QUAD_SIZE]) >> +{ >> + clamp_lod(sp_sview, sp_samp, lod, level); >> +} >> + >> +static void >> mip_filter_linear(struct sp_sampler_view *sp_sview, >>struct sp_sampler *sp_samp, >>img_filter_func min_filter, >> @@ -1998,6 +2030,23 @@ mip_filter_linear(struct sp_sampler_view *sp_sview, >> } >> >> >> +static void >> +mip_level_nearest(struct sp_sampler_view *sp_sview, >> + struct sp_sampler *sp_samp, >> + const float lod[TGSI_QUAD_SIZE], >> + float level[TGSI_QUAD_SIZE]) >> +{ >> + const int first_level = sp_sview->base.u.tex.first_level; >> + int j; >> + >> + clamp_lod(sp_sview, sp_samp, lod, level); >> + for (j = 0; j < TGSI_QUAD_SIZE; j++) >> + /* TODO: It should rather be: >> + * level[j] = first_level + ceil(level[j] + 0.5F) - 1.0F; >> + */ >> + level[j] = first_level + (int)(level[j] + 0.5F); >> +} >> + >> /** >> * Compute nearest mipmap level from texcoords. >> * Then sample the texture level for four elements of a quad. >> @@ -2050,6 +2099,19 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview, >> >> >> static void >> +mip_level_none(struct sp_sampler_view *sp_sview, >> + struct sp_sampler *sp_samp, >> + const float lod[TGSI_QUAD_SIZE], >> + float level[TGSI_QUAD_SIZE]) >> +{ >> + int j; >> + >> + for (j = 0; j < TGSI_QUAD_SIZE; j++) { >> + level[j] = sp_sview->base.u.tex.first_level; >> + } >> +} >> + >> +static void >> mip_filter_none(struct sp_sampler_view *sp_sview, >> struct sp_sampler *sp_samp, >> img_filter_func min_filter, >> @@ -2088,6 +2150,15 @@ mip_filter_none(struct sp_sampler_view *sp_sview, >> >> >> static void >> +mip_level_none_no_filter_select(struct sp_sampler_view *sp_sview, >> +struct sp_sampler *sp_samp, >> +const float lod[TGSI_QUAD_SIZE], >> +float level[TGSI_QUAD_SIZE]) >> +{ >> + mip_level_none(sp_sview, sp_samp, lod, level); >> +} >> + >> +static void >> mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview, >> struct sp_sampler *sp_samp, >> img_filter_func min_filter, >> @@ -2339,6 +2410,15 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview, >> } >> >> >> +static void >> +mip_level_linear_aniso(struct sp_sampler_view *sp_sview, >> + struct sp_sampler *sp_samp, >> + const float lod[TGSI_QUAD_SIZE], >> + float level[TGSI_QUAD_SIZE]) >> +{ >> + mip_level_linear(sp_sview, sp_samp, lod, level); >> +} >> + >> /** >> * Sample 2D texture using an anisotropic filter. >> */ >> @@ -2450,6 +2530,14 @@ mip_filter_linear_aniso(struct sp_sampler_view >> *sp_sview, >> } >> }
Re: [Mesa-dev] [PATCH 8/9] tgsi: Add code for handling lodq opcode
Yes, using a new function is definitely the right way to go imho. Apart from the things mentioned by Brian, the series looks good to me. Roland Am 09.09.2015 um 12:35 schrieb Krzesimir Nowak: > This introduces new vfunc in tgsi_sampler just for this opcode. I > decided against extending get_samples vfunc to return the mipmap level > and LOD - the function's prototype is already too scary and doing the > sampling for textureQueryLod would be a waste of time. > --- > src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 > ++ > src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 > 2 files changed, 54 insertions(+) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c > b/src/gallium/auxiliary/tgsi/tgsi_exec.c > index 9544623..054ad08 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c > @@ -2132,6 +2132,44 @@ exec_tex(struct tgsi_exec_machine *mach, > } > } > > +static void > +exec_lodq(struct tgsi_exec_machine *mach, > + const struct tgsi_full_instruction *inst) > +{ > + uint unit; > + int dim; > + int i; > + union tgsi_exec_channel coords[4]; > + const union tgsi_exec_channel *args[Elements(coords)]; > + union tgsi_exec_channel r[2]; > + > + unit = fetch_sampler_unit(mach, inst, 1); > + dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL); > + assert(dim <= Elements(coords)); > + /* fetch coordinates */ > + for (i = 0; i < dim; i++) { > + FETCH(&coords[i], 0, TGSI_CHAN_X + i); > + args[i] = &coords[i]; > + } > + for (i = dim; i < Elements(coords); i++) { > + args[i] = &ZeroVec; > + } > + mach->Sampler->query_lod(mach->Sampler, unit, unit, > +args[0]->f, > +args[1]->f, > +args[2]->f, > +args[3]->f, > +tgsi_sampler_lod_none, > +r[0].f, > +r[1].f); > + > + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { > + store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, > TGSI_EXEC_DATA_FLOAT); > + } > + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { > + store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, > TGSI_EXEC_DATA_FLOAT); > + } > +} > > static void > exec_txd(struct tgsi_exec_machine *mach, > @@ -4378,6 +4416,12 @@ exec_instruction( >exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); >break; > > + case TGSI_OPCODE_LODQ: > + /* src[0] = texcoord */ > + /* src[1] = sampler unit */ > + exec_lodq(mach, inst); > + break; > + > case TGSI_OPCODE_UP2H: >assert (0); >break; > diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h > b/src/gallium/auxiliary/tgsi/tgsi_exec.h > index 5d56aab..556e0af 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h > +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h > @@ -138,6 +138,16 @@ struct tgsi_sampler > const int j[TGSI_QUAD_SIZE], const int > k[TGSI_QUAD_SIZE], > const int lod[TGSI_QUAD_SIZE], const int8_t offset[3], > float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); > + void (*query_lod)(struct tgsi_sampler *tgsi_sampler, > + const unsigned sview_index, > + const unsigned sampler_index, > + const float s[TGSI_QUAD_SIZE], > + const float t[TGSI_QUAD_SIZE], > + const float p[TGSI_QUAD_SIZE], > + const float c0[TGSI_QUAD_SIZE], > + enum tgsi_sampler_control control, > + float mipmap[TGSI_QUAD_SIZE], > + float lod[TGSI_QUAD_SIZE]); > }; > > #define TGSI_EXEC_NUM_TEMPS 4096 > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] meta: Use result of texture coordinate clamping operation
I'm pretty sure our implementation of this extension is complete rubbish. I have attached an image from the blit-scaled test. This result cannot be useful to anyone. On 09/09/2015 11:16 AM, Ian Romanick wrote: > On 09/09/2015 11:13 AM, Ian Romanick wrote: >> From: Ian Romanick >> >> Previously the result of the complicated clamp() expression just dropped >> on the floor: clamp does not modify any of its parameters. Looking at >> the surrounding code, I believe this is supposed to modify the value of >> tex_coord. >> >> This change (along with a change to avoid the use of >> brw_blorp_framebuffer) does not affect any existing piglit tests. I'm >> not sure what this clamp is trying to accomplish, so I'm not sure how to >> write a test to exercise this path. > > I just noticed that the piglit test contains the same incorrect shader > as meta for the "reference" implementation. > >> I also noticed another bug in this code. There is no way the array >> texture case could possibly work. This will generate code for the >> TEXEL_FETCH macro like: >> >> #define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord), >> sample_map[int(2 * fract(coord.x))]); >> >> Since the coord parameter of this macro is a vec2 at all invocations, no >> expansion of this macro will even compile. >> >> Signed-off-by: Ian Romanick >> Cc: Anuj Phogat >> Cc: Topi Pohjolainen >> Cc: Jordan Justen >> --- >> src/mesa/drivers/common/meta_blit.c | 4 ++-- >> 1 file changed, 2 insertions(+), 2 deletions(-) >> >> diff --git a/src/mesa/drivers/common/meta_blit.c >> b/src/mesa/drivers/common/meta_blit.c >> index 71d18de..a41fe42 100644 >> --- a/src/mesa/drivers/common/meta_blit.c >> +++ b/src/mesa/drivers/common/meta_blit.c >> @@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context >> *ctx, >> " vec2 tex_coord = texCoords - >> s_0_offset;\n" >> "\n" >> " tex_coord *= scale;\n" >> - " clamp(tex_coord.x, 0.0f, scale.x * >> src_width - 1.0f);\n" >> - " clamp(tex_coord.y, 0.0f, scale.y * >> src_height - 1.0f);\n" >> + " tex_coord.x = clamp(tex_coord.x, 0.0f, >> scale.x * src_width - 1.0f);\n" >> + " tex_coord.y = clamp(tex_coord.y, 0.0f, >> scale.y * src_height - 1.0f);\n" >> " interp = fract(tex_coord);\n" >> " tex_coord = ivec2(tex_coord) * >> scale_inv;\n" >> "\n" > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] meta: Use result of texture coordinate clamping operation
In case it's of any interest, nouveau fails many of the blit-scaled tests. However nouveau's MS blits are notoriously bad. [I think I know what's wrong, just don't have the energy to fix it.] OTOH the NVIDIA driver also fails at every scale factor (for 2x and 8x, passes for 4x). On Wed, Sep 9, 2015 at 2:30 PM, Ian Romanick wrote: > I'm pretty sure our implementation of this extension is complete > rubbish. I have attached an image from the blit-scaled test. This > result cannot be useful to anyone. > > On 09/09/2015 11:16 AM, Ian Romanick wrote: >> On 09/09/2015 11:13 AM, Ian Romanick wrote: >>> From: Ian Romanick >>> >>> Previously the result of the complicated clamp() expression just dropped >>> on the floor: clamp does not modify any of its parameters. Looking at >>> the surrounding code, I believe this is supposed to modify the value of >>> tex_coord. >>> >>> This change (along with a change to avoid the use of >>> brw_blorp_framebuffer) does not affect any existing piglit tests. I'm >>> not sure what this clamp is trying to accomplish, so I'm not sure how to >>> write a test to exercise this path. >> >> I just noticed that the piglit test contains the same incorrect shader >> as meta for the "reference" implementation. >> >>> I also noticed another bug in this code. There is no way the array >>> texture case could possibly work. This will generate code for the >>> TEXEL_FETCH macro like: >>> >>> #define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord), >>> sample_map[int(2 * fract(coord.x))]); >>> >>> Since the coord parameter of this macro is a vec2 at all invocations, no >>> expansion of this macro will even compile. >>> >>> Signed-off-by: Ian Romanick >>> Cc: Anuj Phogat >>> Cc: Topi Pohjolainen >>> Cc: Jordan Justen >>> --- >>> src/mesa/drivers/common/meta_blit.c | 4 ++-- >>> 1 file changed, 2 insertions(+), 2 deletions(-) >>> >>> diff --git a/src/mesa/drivers/common/meta_blit.c >>> b/src/mesa/drivers/common/meta_blit.c >>> index 71d18de..a41fe42 100644 >>> --- a/src/mesa/drivers/common/meta_blit.c >>> +++ b/src/mesa/drivers/common/meta_blit.c >>> @@ -187,8 +187,8 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context >>> *ctx, >>> " vec2 tex_coord = texCoords - >>> s_0_offset;\n" >>> "\n" >>> " tex_coord *= scale;\n" >>> - " clamp(tex_coord.x, 0.0f, scale.x * >>> src_width - 1.0f);\n" >>> - " clamp(tex_coord.y, 0.0f, scale.y * >>> src_height - 1.0f);\n" >>> + " tex_coord.x = clamp(tex_coord.x, 0.0f, >>> scale.x * src_width - 1.0f);\n" >>> + " tex_coord.y = clamp(tex_coord.y, 0.0f, >>> scale.y * src_height - 1.0f);\n" >>> " interp = fract(tex_coord);\n" >>> " tex_coord = ivec2(tex_coord) * >>> scale_inv;\n" >>> "\n" >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/2] egl/dri2: Close file descriptor on error.
On 09/09/2015 10:59 AM, Emil Velikov wrote: > On 9 September 2015 at 01:43, Ian Romanick wrote: >> On 09/07/2015 01:58 AM, Emil Velikov wrote: >>> From: Matt Turner >>> >>> v2: [Emil Velikov] >>> Rework the error path to a common goto, close only if we own the fd. >>> >>> Signed-off-by: Emil Velikov >>> --- >>> src/egl/drivers/dri2/platform_drm.c | 27 ++- >>> 1 file changed, 14 insertions(+), 13 deletions(-) >>> >>> diff --git a/src/egl/drivers/dri2/platform_drm.c >>> b/src/egl/drivers/dri2/platform_drm.c >>> index eda5087..e8fe7ea 100644 >>> --- a/src/egl/drivers/dri2/platform_drm.c >>> +++ b/src/egl/drivers/dri2/platform_drm.c >>> @@ -623,26 +623,20 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay >>> *disp) >>>dri2_dpy->own_device = 1; >>>gbm = gbm_create_device(fd); >>>if (gbm == NULL) >>> - return EGL_FALSE; >>> + goto cleanup; >>> } >>> >>> - if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) { >>> - free(dri2_dpy); >>> - return EGL_FALSE; >>> - } >>> + if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) >>> + goto cleanup; >>> >>> dri2_dpy->gbm_dri = gbm_dri_device(gbm); >>> - if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) { >>> - free(dri2_dpy); >>> - return EGL_FALSE; >>> - } >>> + if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) >>> + goto cleanup; >>> >>> if (fd < 0) { >>>fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3); >>> - if (fd < 0) { >>> - free(dri2_dpy); >>> - return EGL_FALSE; >>> - } >>> + if (fd < 0) >>> + goto cleanup; >> >> Shouldn't this fd also get closed eventually? If we decide to catch >> other failures (e.g., memory allocation failures) later in this function? >> > By 'this fd' I assume you mean the dup'd fd as opposed to the > device_fd ? Yes we should, Matt's patch was doing the correct thing as > I misread the DUPFD part in F_DUPFD_CLOEXEC :-\ Yeah, that's what I meant. In spite of that, I like the 'goto cleanup' change. I was going to suggest that to Matt... but you went and did it before I could suggest it. :) Maybe blend the strengths? > Thanks > Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] i965: Abort tiled_memcpy path for ReadPixels in case of transfer operations
On Tue, Sep 1, 2015 at 6:58 AM, Emil Velikov wrote: > Hi all > > On 21 August 2015 at 23:04, Anuj Phogat wrote: >> We have a similar check in meta pbo path. >> >> Cc: >> Signed-off-by: Anuj Phogat >> --- >> src/mesa/drivers/dri/i965/intel_pixel_read.c | 4 >> 1 file changed, 4 insertions(+) >> >> diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c >> b/src/mesa/drivers/dri/i965/intel_pixel_read.c >> index 3fe506e..55f6852 100644 >> --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c >> +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c >> @@ -81,6 +81,10 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, >> if (rb == NULL) >>return false; >> >> + if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format, >> + type, GL_FALSE)) >> + return false; >> + >> struct intel_renderbuffer *irb = intel_renderbuffer(rb); >> int dst_pitch; >> > Can we get a pair of eyes looking this way (same goes for the rest of > the series). > They all seem like pretty trivial fixes. > > Anuj, can you amend the cc tag before pushing to include 10.6 please. > Yes, I'll amend the cc tag. > Thanks > Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] i915: fixing driver crashes if too few vertices are submitted
On 09/09/2015 11:16 AM, Marius Predut wrote: > Comparison with a signed expression and unsigned value > is converted to unsigned value, reason for minus value is interpreted > as a big unsigned value. For this case the "for" loop > is going into unexpected behavior. > > v1:Brian Paul: code style fix. I don't think you really did... > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109 > Signed-off-by: Marius Predut > --- > src/mesa/tnl_dd/t_dd_dmatmp.h | 5 - > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h > index 7be3954..79de224 100644 > --- a/src/mesa/tnl_dd/t_dd_dmatmp.h > +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h > @@ -627,6 +627,9 @@ static void TAG(render_quads_verts)( struct gl_context > *ctx, >LOCAL_VARS; >GLuint j; > > + if(count % 4 != 0) ^ ...because I'm quite sure Brian's code had a space here, per Mesa's coding standards. Also, this change is incorrect. If an application does glDrawArrays(GL_QUADS, 0, (n * 4) + 3), this change will cause zero quads to be drawn when n quads should be drawn. Page 18 (page 32 of the PDF) of the OpenGL 2.1 spec says: "The total number of vertices between Begin and End is 4n + k, where 0 ≤ k ≤ 3; if k is not zero, the final k vertices are ignored." We probably don't have a piglit test for this scenario, so one should be added. You can CC me on that patch. :) I think the correct change is to trim count such that (count % 4) == 0. If the modified value of count is zero, bail out. With that change, the other hunk (below) is unnecessary. > + return; > + >INIT(GL_TRIANGLES); > >for (j = start; j < count-3; j += 4) { > @@ -1248,7 +1251,7 @@ static GLboolean TAG(validate_render)( struct > gl_context *ctx, > ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS(); >} >else { > - ok = HAVE_TRIANGLES; /* flatshading is ok. */ > + ok = HAVE_TRIANGLES && (count % 4 == 0); /* flatshading is ok. */ >} >break; >default: > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] i915: fixing driver crashes if too few vertices are submitted
On 09/09/2015 11:53 AM, Ian Romanick wrote: > On 09/09/2015 11:16 AM, Marius Predut wrote: >> Comparison with a signed expression and unsigned value >> is converted to unsigned value, reason for minus value is interpreted >> as a big unsigned value. For this case the "for" loop >> is going into unexpected behavior. >> >> v1:Brian Paul: code style fix. > > I don't think you really did... > >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109 >> Signed-off-by: Marius Predut >> --- >> src/mesa/tnl_dd/t_dd_dmatmp.h | 5 - >> 1 file changed, 4 insertions(+), 1 deletion(-) >> >> diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h >> index 7be3954..79de224 100644 >> --- a/src/mesa/tnl_dd/t_dd_dmatmp.h >> +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h >> @@ -627,6 +627,9 @@ static void TAG(render_quads_verts)( struct gl_context >> *ctx, >>LOCAL_VARS; >>GLuint j; >> >> + if(count % 4 != 0) >^ > ...because I'm quite sure Brian's code had a space here, per Mesa's > coding standards. > > Also, this change is incorrect. If an application does > glDrawArrays(GL_QUADS, 0, (n * 4) + 3), this change will cause zero > quads to be drawn when n quads should be drawn. > > Page 18 (page 32 of the PDF) of the OpenGL 2.1 spec says: > > "The total number of vertices between Begin and End is 4n + k, > where 0 ≤ k ≤ 3; if k is not zero, the final k vertices are > ignored." > > We probably don't have a piglit test for this scenario, so one should be > added. You can CC me on that patch. :) > > I think the correct change is to trim count such that (count % 4) == 0. Also... wherever you add this trim code, you should include the spec quotation that I mentioned above. > If the modified value of count is zero, bail out. With that change, > the other hunk (below) is unnecessary. > >> + return; >> + >>INIT(GL_TRIANGLES); >> >>for (j = start; j < count-3; j += 4) { >> @@ -1248,7 +1251,7 @@ static GLboolean TAG(validate_render)( struct >> gl_context *ctx, >> ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS(); >> } >> else { >> -ok = HAVE_TRIANGLES; /* flatshading is ok. */ >> +ok = HAVE_TRIANGLES && (count % 4 == 0); /* flatshading is ok. */ >> } >> break; >>default: > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] i915/aa: fixing anti-aliasing bug for thinnest width lines
On 09/09/2015 06:59 AM, Marius Predut wrote: > On PNV platform, for 1 pixel line thickness or less, > the general anti-aliasing algorithm gives up, and a garbage line is generated. > Setting a Line Width of 0.0 specifies the rasterization > of the "thinnest" (one-pixel-wide), non-antialiased lines. > Lines rendered with zero Line Width are rasterized using > Grid Intersection Quantization rules as specified by bspec G45: Volume 2: > 3D/Media, > 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section. > > This patch follow the same rules as patches fixing the > https://bugs.freedesktop.org/show_bug.cgi?id=28832 > bug. > > v1: Eduardo Lima Mitev: Wrong indentation inside the if clause. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90367 > > Signed-off-by: Marius Predut > --- > src/mesa/drivers/dri/i915/i915_state.c | 15 +++ > 1 file changed, 15 insertions(+) > > diff --git a/src/mesa/drivers/dri/i915/i915_state.c > b/src/mesa/drivers/dri/i915/i915_state.c > index 4c83073..ebb4e9a 100644 > --- a/src/mesa/drivers/dri/i915/i915_state.c > +++ b/src/mesa/drivers/dri/i915/i915_state.c > @@ -599,6 +599,21 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf) > > width = (int) (widthf * 2); > width = CLAMP(width, 1, 0xf); > + > + if (ctx->Line.Width < 1.5 || widthf < 1.5) { > + /* For 1 pixel line thickness or less, the general > + * anti-aliasing algorithm gives up, and a garbage line is > + * generated. Setting a Line Width of 0.0 specifies the > + * rasterization of the "thinnest" (one-pixel-wide), > + * non-antialiased lines. > + * > + * Lines rendered with zero Line Width are rasterized using > + * Grid Intersection Quantization rules as specified by > + * bspec G45: Volume 2: 3D/Media, Eh... Isn't G45 actually i965-class hardware? GEN4.5, right? I don't see how a GEN4 bspec reference is useful (or correct?) in the GEN3 driver. I think you want "2.8.4.1 Zero-Width (Cosmetic) Line Rasterization" from volume 1f of the GEN3 docs. I looked at section 2.8.4.3 Anti-aliased Line Rasterization of volume 1f of the GEN3 docs that I have. I don't see any mention of this restriction. Is it documented anywhere? It also seems like this will affect non-antialised wide lines. Is that correct? > + * 7.3.13.1 Zero-Width (Cosmetic) Line Rasterization section > + */ > + width = 0; > + } > lis4 |= width << S4_LINE_WIDTH_SHIFT; > > if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) { ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-dev, 1/5] i965: Support CS in update_stage_texture_surfaces
Jordan Justen writes: > Signed-off-by: Jordan Justen > > --- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 9 - > 1 file changed, 8 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index 72aad96..0eb5361 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -804,7 +804,7 @@ update_stage_texture_surfaces(struct brw_context *brw, >struct brw_stage_state *stage_state, >bool for_gather) > { > - if (!prog) > + if (!prog || !stage_state->prog_data) >return; Why do we need to test for stage_state->prog_data being NULL now? This atom always run after brw_upload_cs_prog(), which sets brw->cs.base.prog_data = &brw->cs.prog_data->base; which shouldn't change after that. Other that that, Reviewed-by: Kristian Høgsberg > struct gl_context *ctx = &brw->ctx; > @@ -848,10 +848,14 @@ brw_update_texture_surfaces(struct brw_context *brw) > /* BRW_NEW_FRAGMENT_PROGRAM */ > struct gl_program *fs = (struct gl_program *) brw->fragment_program; > > + /* BRW_NEW_COMPUTE_PROGRAM */ > + struct gl_program *cs = (struct gl_program *) brw->compute_program; > + > /* _NEW_TEXTURE */ > update_stage_texture_surfaces(brw, vs, &brw->vs.base, false); > update_stage_texture_surfaces(brw, gs, &brw->gs.base, false); > update_stage_texture_surfaces(brw, fs, &brw->wm.base, false); > + update_stage_texture_surfaces(brw, cs, &brw->cs.base, false); > > /* emit alternate set of surface state for gather. this > * allows the surface format to be overriden for only the > @@ -863,6 +867,8 @@ brw_update_texture_surfaces(struct brw_context *brw) > update_stage_texture_surfaces(brw, gs, &brw->gs.base, true); >if (fs && fs->UsesGather) > update_stage_texture_surfaces(brw, fs, &brw->wm.base, true); > + if (cs && cs->UsesGather) > + update_stage_texture_surfaces(brw, cs, &brw->cs.base, true); > } > > brw->ctx.NewDriverState |= BRW_NEW_SURFACES; > @@ -872,6 +878,7 @@ const struct brw_tracked_state brw_texture_surfaces = { > .dirty = { >.mesa = _NEW_TEXTURE, >.brw = BRW_NEW_BATCH | > + BRW_NEW_COMPUTE_PROGRAM | > BRW_NEW_FRAGMENT_PROGRAM | > BRW_NEW_FS_PROG_DATA | > BRW_NEW_GEOMETRY_PROGRAM | ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v1] i915: fixing driver crashes if too few vertices are submitted
Marius Predut writes: > Comparison with a signed expression and unsigned value > is converted to unsigned value, reason for minus value is interpreted > as a big unsigned value. For this case the "for" loop > is going into unexpected behavior. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38109 > Signed-off-by: Marius Predut > --- > src/mesa/tnl_dd/t_dd_dmatmp.h | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h > index 7be3954..88ecc78 100644 > --- a/src/mesa/tnl_dd/t_dd_dmatmp.h > +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h > @@ -627,6 +627,8 @@ static void TAG(render_quads_verts)( struct gl_context > *ctx, >LOCAL_VARS; >GLuint j; > > + if(count%4 != 0) return; > + Seems to me that does a bit more than just fixing the crash. I would think if (count < 4) would fix the crash only. (And then quite possibly you won't need the next hunk?) But I have no idea whether the side effect is desired. (Actually, what if count is 0? Or is that impossible?) eirik >INIT(GL_TRIANGLES); > >for (j = start; j < count-3; j += 4) { > @@ -1248,7 +1250,7 @@ static GLboolean TAG(validate_render)( struct > gl_context *ctx, > ok = (GLint) count < GET_SUBSEQUENT_VB_MAX_ELTS(); >} >else { > - ok = HAVE_TRIANGLES; /* flatshading is ok. */ > + ok = HAVE_TRIANGLES && (count%4 == 0); /* flatshading is ok. */ >} >break; >default: > -- > 1.9.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Making ext_framebuffer_multisample-accuracy run on non-i965 drivers
Both st/mesa and the NVIDIA fail framebuffer completeness on this command sequence: 270 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples = 0, internalformat = GL_RGBA, width = 256, height = 256) 272 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment = GL_COLOR_ATTACHMENT0, renderbuffertarget = GL_RENDERBUFFER, renderbuffer = 1) 273 glBindRenderbuffer(target = GL_RENDERBUFFER, renderbuffer = 10) 274 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples = 0, internalformat = GL_STENCIL_INDEX8, width = 256, height = 256) 275 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment = GL_STENCIL_ATTACHMENT, renderbuffertarget = GL_RENDERBUFFER, renderbuffer = 10) 276 glBindRenderbuffer(target = GL_RENDERBUFFER, renderbuffer = 9) 277 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples = 0, internalformat = GL_DEPTH_COMPONENT24, width = 256, height = 256) 278 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment = GL_DEPTH_ATTACHMENT, renderbuffertarget = GL_RENDERBUFFER, renderbuffer = 9) 279 glCheckFramebufferStatus(target = GL_DRAW_FRAMEBUFFER) = GL_FRAMEBUFFER_UNSUPPORTED This is because (in the case of st/mesa), the depth and stencil backing textures are different, which is not something that the gallium API allows (nor does NVIDIA hardware handle). For textures this is handled by merging attachment points when the textures are identical, but this is harder to do with renderbuffers since they're allocated implicitly, and can be attached/detached individually. I'm not sure that there's a clean way to fix this. I think that the piglit should be fixed instead to not do this. However I know these sorts of suggestions are often frowned upon, so I'm asking here. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] i915: use EmitNoIndirectSampler
Reviewed-by: Ian Romanick On 06/29/2015 12:35 AM, Tapani Pälli wrote: > Signed-off-by: Tapani Pälli > --- > src/mesa/drivers/dri/i915/i915_context.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/src/mesa/drivers/dri/i915/i915_context.c > b/src/mesa/drivers/dri/i915/i915_context.c > index 42ea54e..57b033c 100644 > --- a/src/mesa/drivers/dri/i915/i915_context.c > +++ b/src/mesa/drivers/dri/i915/i915_context.c > @@ -255,6 +255,8 @@ i915CreateContext(int api, > * FINISHME: vertex shaders? > */ > ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = true; > + > ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectSampler = > + true; > > struct gl_shader_compiler_options *const fs_options = >& ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT]; > @@ -266,6 +268,7 @@ i915CreateContext(int api, > fs_options->EmitNoIndirectOutput = true; > fs_options->EmitNoIndirectUniform = true; > fs_options->EmitNoIndirectTemp = true; > + fs_options->EmitNoIndirectSampler = true; > > ctx->Const.MaxDrawBuffers = 1; > ctx->Const.QueryCounterBits.SamplesPassed = 0; > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-dev, 2/5] i965: Run vector splitting for CS in brw_link_shader
Jordan Justen writes: > Signed-off-by: Jordan Justen I'd change the subject to something like i965: Teach is_scalar_shader_stage() about compute shaders since that's the change in the patch and it affects many other lowering/optimization decisions. With that, Reviewed-by: Kristian Høgsberg > --- > src/mesa/drivers/dri/i965/brw_shader.cpp | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp > b/src/mesa/drivers/dri/i965/brw_shader.cpp > index 5653d6b..309f495 100644 > --- a/src/mesa/drivers/dri/i965/brw_shader.cpp > +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp > @@ -185,6 +185,7 @@ is_scalar_shader_stage(struct brw_context *brw, int stage) > { > switch (stage) { > case MESA_SHADER_FRAGMENT: > + case MESA_SHADER_COMPUTE: >return true; > case MESA_SHADER_VERTEX: >return brw->intelScreen->compiler->scalar_vs; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] meta: Always bind the texture
From: Ian Romanick We may have been called from glGenerateTextureMipmap with CurrentUnit still set to 0, so we don't know when we can skip binding the texture. Assume that _mesa_BindTexture will be fast if we're rebinding the same texture. Signed-off-by: Ian Romanick Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91847 Cc: "11.0" --- This patch seems to resolve the problems seen in the apitrace attached to the bug. I'm working on a piglit test. src/mesa/drivers/common/meta_generate_mipmap.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index 0655f05..6731478 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -202,8 +202,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, samplerSave = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ? ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0; - if (currentTexUnitSave != 0) - _mesa_BindTexture(target, texObj->Name); + /* We may have been called from glGenerateTextureMipmap with CurrentUnit +* still set to 0, so we don't know when we can skip binding the texture. +* Assume that _mesa_BindTexture will be fast if we're rebinding the same +* texture. +*/ + _mesa_BindTexture(target, texObj->Name); if (!mipmap->Sampler) { _mesa_GenSamplers(1, &mipmap->Sampler); -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Making ext_framebuffer_multisample-accuracy run on non-i965 drivers
Aha, apparently there's a 'depthstencil' option that uses a combined RB. Nevermind! Didn't realize that was the case. On Wed, Sep 9, 2015 at 3:24 PM, Ilia Mirkin wrote: > Both st/mesa and the NVIDIA fail framebuffer completeness on this > command sequence: > > 270 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples > = 0, internalformat = GL_RGBA, width = 256, height = 256) > 272 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment > = GL_COLOR_ATTACHMENT0, renderbuffertarget = GL_RENDERBUFFER, > renderbuffer = 1) > 273 glBindRenderbuffer(target = GL_RENDERBUFFER, renderbuffer = 10) > 274 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples > = 0, internalformat = GL_STENCIL_INDEX8, width = 256, height = 256) > 275 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment > = GL_STENCIL_ATTACHMENT, renderbuffertarget = GL_RENDERBUFFER, > renderbuffer = 10) > 276 glBindRenderbuffer(target = GL_RENDERBUFFER, renderbuffer = 9) > 277 glRenderbufferStorageMultisample(target = GL_RENDERBUFFER, samples > = 0, internalformat = GL_DEPTH_COMPONENT24, width = 256, height = 256) > 278 glFramebufferRenderbuffer(target = GL_DRAW_FRAMEBUFFER, attachment > = GL_DEPTH_ATTACHMENT, renderbuffertarget = GL_RENDERBUFFER, > renderbuffer = 9) > 279 glCheckFramebufferStatus(target = GL_DRAW_FRAMEBUFFER) = > GL_FRAMEBUFFER_UNSUPPORTED > > This is because (in the case of st/mesa), the depth and stencil > backing textures are different, which is not something that the > gallium API allows (nor does NVIDIA hardware handle). > > For textures this is handled by merging attachment points when the > textures are identical, but this is harder to do with renderbuffers > since they're allocated implicitly, and can be attached/detached > individually. > > I'm not sure that there's a clean way to fix this. I think that the > piglit should be fixed instead to not do this. However I know these > sorts of suggestions are often frowned upon, so I'm asking here. > > -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev