[Mesa-dev] [PATCH 3/7] Complete implementation for ARB_framebuffer_no_attachment in Mesa core: implement GetFramebufferParameteriv, FramebufferParameteri and changes to _mesa_test_framebuffer_complete
From: Kevin Rogovin Complete the implementation of ARB_framebuffer_no_attachment in Mesa core: 1. Implement _mesa_GetFramebufferParameteriv() and _mesa_FramebufferParameteri() 2. Additions to _mesa_test_framebuffer_completeness() --- src/mesa/main/fbobject.c | 153 --- 1 file changed, 144 insertions(+), 9 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 5c78c40..6c0c2b0 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1113,14 +1113,49 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, } else if (att_layer_count > max_layer_count) { max_layer_count = att_layer_count; } + + /** + * The extension GL_ARB_framebuffer_no_attachments places the additional + * requirement on each attachment that + * + * "The width and height of image are greater than zero and less than or equal + * to the values of the implementation-dependent limits MAX_FRAMEBUFFER_- + * WIDTH and MAX_FRAMEBUFFER_HEIGHT , respectively. " + * + * "If is a three-dimensional texture or a one- or two-dimensional + * array texture and the attachment is layered, the depth or layer count of + * the texture is less than or equal to the implementation-dependent limit + * MAX_FRAMEBUFFER_LAYERS." + * + * "If image has multiple samples, its sample count is less than or equal to + * the value of the implementation-dependent limit MAX_FRAMEBUFFER_- + * SAMPLES ." + * + * The same requirements are also in place for GL 4.5, + * Section 9.4.1 "Framebuffer Attachment Completeness", pg 310-311 + * + * However, this is a tighter restriction than previous version of GL. + * In interest of better compatibility, we will not enforce these + * restrictions. + */ } fb->MaxNumLayers = max_layer_count; if (numImages == 0) { - fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; - fbo_incomplete(ctx, "no attachments", -1); - return; + fb->_HasAttachments = GL_FALSE; + + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; + fbo_incomplete(ctx, "no attachments", -1); + return; + } + + if (fb->DefaultGeometry.Width == 0 || fb->DefaultGeometry.Height == 0) { + fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; + fbo_incomplete(ctx, "no attachments and default width or height is 0", -1); + return; + } } if (_mesa_is_desktop_gl(ctx) && !ctx->Extensions.ARB_ES2_compatibility) { @@ -1185,8 +1220,10 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, * renderbuffers/textures are different sizes, the framebuffer * width/height will be set to the smallest width/height. */ - fb->Width = minWidth; - fb->Height = minHeight; + if (numImages != 0) { + fb->Width = minWidth; + fb->Height = minHeight; + } /* finally, update the visual info for the framebuffer */ _mesa_update_framebuffer_visual(ctx, fb); @@ -1292,16 +1329,114 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer) bind_renderbuffer(target, renderbuffer, true); } -extern void GLAPIENTRY +static void +framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum pname, GLint param, const char *func) +{ + switch (pname) { + case GL_FRAMEBUFFER_DEFAULT_WIDTH: + if (param < 0 || param > ctx->Const.MaxFramebufferWidth) +_mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Width = param; + break; + case GL_FRAMEBUFFER_DEFAULT_HEIGHT: + if (param < 0 || param > ctx->Const.MaxFramebufferHeight) +_mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Height = param; + break; + case GL_FRAMEBUFFER_DEFAULT_LAYERS: + if (param < 0 || param > ctx->Const.MaxFramebufferLayers) +_mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Layers = param; + break; + case GL_FRAMEBUFFER_DEFAULT_SAMPLES: + if (param < 0 || param > ctx->Const.MaxFramebufferSamples) +_mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else +fb->DefaultGeometry.NumSamples = param; + break; + case GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS: + fb->DefaultGeometry.FixedSampleLocations = param; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "%s(pname=0x%x)", func, pname); + } +} + +void GLAPIENTRY _mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param) { - /* to be implemented */ + GET_CURRENT_CONTEXT(ctx); + struct gl_framebuffer *fb; + + fb = get_framebuf
[Mesa-dev] [PATCH 7/7] i965: enable ARB_framebuffer_no_attachment extension
From: Kevin Rogovin Enable extension GL_ARB_framebuffer_no_attachment in i965 for Gen7 and higher. Lower gens are left disabled because those generations do not support fragment shaders with side effects. --- src/mesa/drivers/dri/i965/brw_context.c | 6 ++ src/mesa/drivers/dri/i965/intel_extensions.c | 1 + 2 files changed, 7 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 9d90360..5c9577f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -608,6 +608,12 @@ brw_initialize_context_constants(struct brw_context *brw) /* ARB_gpu_shader5 */ if (brw->gen >= 7) ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); + + /* ARB_framebuffer_no_attachments */ + ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth; + ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight; + ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; + ctx->Const.MaxFramebufferSamples = max_samples; } /** diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 48064e1..65cf2bb 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -304,6 +304,7 @@ intelInitExtensions(struct gl_context *ctx) if (brw->gen >= 7) { ctx->Extensions.ARB_conservative_depth = true; ctx->Extensions.ARB_texture_view = true; + ctx->Extensions.ARB_framebuffer_no_attachments = true; if (can_do_pipelined_register_writes(brw)) { ctx->Extensions.ARB_transform_feedback2 = true; ctx->Extensions.ARB_transform_feedback3 = true; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/7] helper-conveniance functions for drivers to implement ARB_framebuffer_no_attachment
From: Kevin Rogovin To assist drivers to implement ARB_framebuffer_no_attachment, provide a set of convenience functions that check for gl_framebuffer::_HasAttachments that return the geometry of the gl_framebuffer. --- src/mesa/main/framebuffer.c | 49 ++--- src/mesa/main/framebuffer.h | 29 +++ src/mesa/main/mtypes.h | 21 ++- 3 files changed, 74 insertions(+), 25 deletions(-) diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 4e4d896..7d8921b 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -357,30 +357,20 @@ update_framebuffer_size(struct gl_context *ctx, struct gl_framebuffer *fb) } + /** - * Calculate the inclusive bounding box for the scissor of a specific viewport + * Given a bounding box, intersect the bounding box with the scirros of + * a specified vieport. * * \param ctx GL context. - * \param buffer Framebuffer to be checked against * \param idx Index of the desired viewport * \param bboxBounding box for the scissored viewport. Stored as xmin, *xmax, ymin, ymax. - * - * \warning This function assumes that the framebuffer dimensions are up to - * date (e.g., update_framebuffer_size has been recently called on \c buffer). - * - * \sa _mesa_clip_to_region */ -void -_mesa_scissor_bounding_box(const struct gl_context *ctx, - const struct gl_framebuffer *buffer, - unsigned idx, int *bbox) +extern void +_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx, + unsigned idx, int *bbox) { - bbox[0] = 0; - bbox[2] = 0; - bbox[1] = buffer->Width; - bbox[3] = buffer->Height; - if (ctx->Scissor.EnableFlags & (1u << idx)) { if (ctx->Scissor.ScissorArray[idx].X > bbox[0]) { bbox[0] = ctx->Scissor.ScissorArray[idx].X; @@ -402,6 +392,33 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, bbox[2] = bbox[3]; } } +} + +/** + * Calculate the inclusive bounding box for the scissor of a specific viewport + * + * \param ctx GL context. + * \param buffer Framebuffer to be checked against + * \param idx Index of the desired viewport + * \param bboxBounding box for the scissored viewport. Stored as xmin, + *xmax, ymin, ymax. + * + * \warning This function assumes that the framebuffer dimensions are up to + * date (e.g., update_framebuffer_size has been recently called on \c buffer). + * + * \sa _mesa_clip_to_region + */ +void +_mesa_scissor_bounding_box(const struct gl_context *ctx, + const struct gl_framebuffer *buffer, + unsigned idx, int *bbox) +{ + bbox[0] = 0; + bbox[2] = 0; + bbox[1] = buffer->Width; + bbox[3] = buffer->Height; + + _mesa_intersect_scissor_bounding_box(ctx, idx, bbox); assert(bbox[0] <= bbox[1]); assert(bbox[2] <= bbox[3]); diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index a427421..8b84d26 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -76,6 +76,35 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, const struct gl_framebuffer *buffer, unsigned idx, int *bbox); +extern void +_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx, + unsigned idx, int *bbox); + +static inline GLuint +_mesa_geometric_width(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? buffer->Width : buffer->DefaultGeometry.Width; +} + + +static inline GLuint +_mesa_geometric_height(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? buffer->Height : buffer->DefaultGeometry.Height; +} + +static inline GLuint +_mesa_geometric_samples(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? buffer->Visual.samples : buffer->DefaultGeometry.NumSamples; +} + +static inline GLuint +_mesa_geometric_layers(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? buffer->MaxNumLayers : buffer->DefaultGeometry.Layers; +} + extern void _mesa_update_draw_buffer_bounds(struct gl_context *ctx); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 38a3817..ac7cdb6 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3134,13 +3134,13 @@ struct gl_framebuffer struct gl_config Visual; /** -* size of frame buffer in pixels, -* no attachments has these values as 0 +* size of frame buffer in pixels, +* no attachments has these values as 0 */ - GLuint Width, Height; + GLuint Width, Height; /** -* In the case that the framebuffer has no attachment (i.e. +* In the case that the framebuffer has no attachment (i.e. * GL_ARB_framebuffer_no_attachmen
[Mesa-dev] [PATCH 5/7] i965: use _mesa_geometry_width/height/layers/samples for programming geometry of framebuffer to GEN
From: Kevin Rogovin To prepare for i965 to support ARB_framebuffer_no_attachment, use the convenience functions mesa_geometry_width/height/layers/samples to specify the geometry of the render target surfaces to the GPU. --- src/mesa/drivers/dri/i965/brw_clip_state.c | 9 - src/mesa/drivers/dri/i965/brw_misc_state.c | 12 -- src/mesa/drivers/dri/i965/brw_sf_state.c | 46 -- src/mesa/drivers/dri/i965/brw_state_upload.c | 7 +++- src/mesa/drivers/dri/i965/brw_wm.c | 7 ++-- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 -- src/mesa/drivers/dri/i965/gen6_clip_state.c| 11 -- src/mesa/drivers/dri/i965/gen6_multisample_state.c | 3 +- src/mesa/drivers/dri/i965/gen6_scissor_state.c | 14 +-- src/mesa/drivers/dri/i965/gen6_sf_state.c | 3 +- src/mesa/drivers/dri/i965/gen6_viewport_state.c| 3 +- src/mesa/drivers/dri/i965/gen6_wm_state.c | 3 +- src/mesa/drivers/dri/i965/gen7_sf_state.c | 3 +- src/mesa/drivers/dri/i965/gen7_viewport_state.c| 3 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 3 +- src/mesa/drivers/dri/i965/gen8_viewport_state.c| 9 +++-- 16 files changed, 108 insertions(+), 41 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 3223834..3aa679f 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -32,6 +32,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "main/framebuffer.h" static void upload_clip_vp(struct brw_context *brw) @@ -60,6 +61,10 @@ brw_upload_clip_unit(struct brw_context *brw) /* _NEW_BUFFERS */ struct gl_framebuffer *fb = ctx->DrawBuffer; + GLint fb_width, fb_height; + + fb_width = _mesa_geometric_width(fb); + fb_height = _mesa_geometric_height(fb); upload_clip_vp(brw); @@ -127,8 +132,8 @@ brw_upload_clip_unit(struct brw_context *brw) /* enable guardband clipping if we can */ if (ctx->ViewportArray[0].X == 0 && ctx->ViewportArray[0].Y == 0 && - ctx->ViewportArray[0].Width == (float) fb->Width && - ctx->ViewportArray[0].Height == (float) fb->Height) + ctx->ViewportArray[0].Width == (float) fb_width && + ctx->ViewportArray[0].Height == (float) fb_height) { clip->clip5.guard_band_enable = 1; clip->clip6.clipper_viewport_state_ptr = diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 78a46cb..ef94a6e 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -39,6 +39,7 @@ #include "brw_state.h" #include "brw_defines.h" +#include "main/framebuffer.h" #include "main/fbobject.h" #include "main/glformats.h" @@ -46,12 +47,17 @@ static void upload_drawing_rect(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + GLint fb_width, fb_height; + struct gl_framebuffer *fb = ctx->DrawBuffer; + + fb_width = _mesa_geometric_width(fb); + fb_height = _mesa_geometric_height(fb); BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ - OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0x) | - ((ctx->DrawBuffer->Height - 1) << 16)); + OUT_BATCH(((fb_width - 1) & 0x) | + ((fb_height - 1) << 16)); OUT_BATCH(0); ADVANCE_BATCH(); } @@ -767,7 +773,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) * works just fine, and there's no window system to worry about. */ if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) - OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31); + OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31); else OUT_BATCH(0); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 014b434..1fa3d44 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -35,6 +35,7 @@ #include "main/macros.h" #include "main/fbobject.h" #include "main/viewport.h" +#include "main/framebuffer.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" @@ -47,18 +48,42 @@ static void upload_sf_vp(struct brw_context *brw) GLfloat y_scale, y_bias; double scale[3], translate[3]; const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); + GLint fb_width, fb_height, xmin, xmax, ymin, ymax; sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); + if (ctx->DrawBuffer->_HasAttachments) { + fb_width = ctx->DrawBuffer->Width; + fb_height = ctx->DrawBuffer->Height; + xmin = ctx->DrawBuffer->_Xmin; + xmax = ctx->DrawBuffer->_Xmax; + ymin
[Mesa-dev] Please ignore posting: [PATCH] i965:Fix intel_mipmap_copy_teximage for GL_TEXTURE_1D_ARRAY
Please ignore posting: [PATCH] i965:Fix intel_mipmap_copy_teximage for GL_TEXTURE_1D_ARRAY. Sighs. I had put the wrong directory when git send-email. My big apologies for the idiot-noise. However, please do NOT ignore the patch series for ARB_framebuffer_no_attachment. Best Regards, -Kevin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] Define constants and functions for ARB_framebuffer_no_attachment extension
The subject should be prefixed with "mesa:" On Thu, Apr 23, 2015 at 11:59 PM, wrote: > From: Kevin Rogovin > > Define enumerations, functions and associated glGet's for > extension ARB_framebuffer_no_attachment. > > --- > .../glapi/gen/ARB_framebuffer_no_attachments.xml | 33 ++ > src/mapi/glapi/gen/Makefile.am | 1 + > src/mapi/glapi/gen/gl_API.xml | 1 + > src/mesa/main/fbobject.c | 12 +++ > src/mesa/main/fbobject.h | 7 > src/mesa/main/get.c| 3 ++ > src/mesa/main/get_hash_params.py | 40 > ++ > 7 files changed, 97 insertions(+) > create mode 100644 src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml > > diff --git a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml > b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml > new file mode 100644 > index 000..60e40d0 > --- /dev/null > +++ b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml > @@ -0,0 +1,33 @@ > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > + > diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am > index 1c4b86a..9a0e944 100644 > --- a/src/mapi/glapi/gen/Makefile.am > +++ b/src/mapi/glapi/gen/Makefile.am > @@ -130,6 +130,7 @@ API_XML = \ > ARB_ES2_compatibility.xml \ > ARB_ES3_compatibility.xml \ > ARB_framebuffer_object.xml \ > + ARB_framebuffer_no_attachments.xml \ This is an alphabetized list. Please keep it so. > ARB_geometry_shader4.xml \ > ARB_get_program_binary.xml \ > ARB_gpu_shader_fp64.xml \ > diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml > index a8a6db6..4eea396 100644 > --- a/src/mapi/glapi/gen/gl_API.xml > +++ b/src/mapi/glapi/gen/gl_API.xml > @@ -8325,6 +8325,7 @@ > > > > + xmlns:xi="http://www.w3.org/2001/XInclude"/> > > > > diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c > index eabbb96..5c78c40 100644 > --- a/src/mesa/main/fbobject.c > +++ b/src/mesa/main/fbobject.c > @@ -1292,6 +1292,18 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint > renderbuffer) > bind_renderbuffer(target, renderbuffer, true); > } > > +extern void GLAPIENTRY > +_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param) > +{ > + /* to be implemented */ > +} > + > +extern void GLAPIENTRY > +_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params) > +{ > + /* to be implemented */ > +} > + > > /** > * Remove the specified renderbuffer or texture from any attachment point in > diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h > index 61aa1f5..76adb92 100644 > --- a/src/mesa/main/fbobject.h > +++ b/src/mesa/main/fbobject.h > @@ -211,4 +211,11 @@ extern void GLAPIENTRY > _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments, > const GLenum *attachments); > > + > +extern void GLAPIENTRY > +_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param); > + > +extern void GLAPIENTRY > +_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params); > + > #endif /* FBOBJECT_H */ > diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c > index a881bc5..ca9d13c 100644 > --- a/src/mesa/main/get.c > +++ b/src/mesa/main/get.c > @@ -393,6 +393,7 @@ EXTRA_EXT(INTEL_performance_query); > EXTRA_EXT(ARB_explicit_uniform_location); > EXTRA_EXT(ARB_clip_control); > EXTRA_EXT(EXT_polygon_offset_clamp); > +EXTRA_EXT(ARB_framebuffer_no_attachments); > > static const int > extra_ARB_color_buffer_float_or_glcore[] = { > @@ -466,6 +467,8 @@ static const int > extra_core_ARB_color_buffer_float_and_new_buffers[] = { > * remaining combinations. To look up the enums valid in a given API > * we will use a hash table specific to that API. These tables are in > * turn generated at build time and included through get_hash.h. > + * To add an entry (i.e. a new enumeration for glGet that is taken > + * from the table) add an entry to get_hash_params.py. > */ > > #include "get_hash.h" > diff --git a/src/mesa/main/get_hash_params.py > b/src/mesa/main/get_hash_params.py > index 41cb2c1..8712c46 100644 > --- a/src/mesa/main/get_hash_params.py > +++ b/src/mesa/main/get_hash_params.py > @@ -1,3 +1,35 @@ > +# glGet pnames possible, organized first by GL version > +# and then organized by GL extension. > +# There is a list for different combinations of GL_ version > +# that combination is specified by the field "apis". > +# The field "params" is a list of entries describing Here, and the previous hunk... I like additional documentation, but it needs to be in a separate patch. > +# how glGet* operates when passed a pname. > +# To add a pname for glGet* to handle, the entry is of the form > +# a) [ "PnameX", "
Re: [Mesa-dev] [PATCH 1/7] Define extension ARB_framebuffer_no_attachments and additions to gl_framebuffer for extension ARB_framebuffer_no_attachments
There's trailing white space errors in quite a few lines. I noticed that patch 4 corrects some of these but you should really fix them in this commit, patch 4 should not touch them. On 04/24/2015 09:59 AM, kevin.rogo...@intel.com wrote: From: Kevin Rogovin Add extension flag and constant values for ARB_framebuffer_no_attachments. --- src/mesa/main/extensions.c | 1 + src/mesa/main/fbobject.c| 1 + src/mesa/main/framebuffer.c | 1 + src/mesa/main/mtypes.h | 52 - 4 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 3d4965c..43b5c0b 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -118,6 +118,7 @@ static const struct extension extension_table[] = { { "GL_ARB_fragment_program_shadow", o(ARB_fragment_program_shadow), GLL,2003 }, { "GL_ARB_fragment_shader", o(ARB_fragment_shader), GL, 2002 }, { "GL_ARB_framebuffer_object", o(ARB_framebuffer_object), GL, 2005 }, + { "GL_ARB_framebuffer_no_attachments", o(ARB_framebuffer_no_attachments), GL, 2012 }, { "GL_ARB_framebuffer_sRGB",o(EXT_framebuffer_sRGB), GL, 1998 }, { "GL_ARB_get_program_binary", o(dummy_true), GL, 2010 }, { "GL_ARB_gpu_shader5", o(ARB_gpu_shader5), GLC,2010 }, diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 27cf97f..eabbb96 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -914,6 +914,7 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, fb->Height = 0; fb->_AllColorBuffersFixedPoint = GL_TRUE; fb->_HasSNormOrFloatColorBuffer = GL_FALSE; + fb->_HasAttachments = GL_TRUE; /* Start at -2 to more easily loop over all attachment points. * -2: depth buffer diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 4f7736a..4e4d896 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -157,6 +157,7 @@ _mesa_initialize_window_framebuffer(struct gl_framebuffer *fb, fb->_Status = GL_FRAMEBUFFER_COMPLETE_EXT; fb->_AllColorBuffersFixedPoint = !visual->floatMode; fb->_HasSNormOrFloatColorBuffer = visual->floatMode; + fb->_HasAttachments = GL_TRUE; compute_depth_max(fb); } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index fb41430..38a3817 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3133,12 +3133,29 @@ struct gl_framebuffer */ struct gl_config Visual; - GLuint Width, Height; /**< size of frame buffer in pixels */ + /** +* size of frame buffer in pixels, +* no attachments has these values as 0 +*/ + GLuint Width, Height; + + /** +* In the case that the framebuffer has no attachment (i.e. +* GL_ARB_framebuffer_no_attachments) then the geometry of +* the framebuffer is specified by the default values. +*/ + struct { + GLuint Width, Height, Layers, NumSamples; + GLboolean FixedSampleLocations; + } DefaultGeometry; - /** \name Drawing bounds (Intersection of buffer size and scissor box) */ + /** \name Drawing bounds (Intersection of buffer size and scissor box) +* The drawing region is given by [_Xmin, _Xmax) x [_Ymin, _Ymax), +* (inclusive for _Xmin and _Ymin while exclusive for _Xmax and _Ymax) +*/ /*@{*/ - GLint _Xmin, _Xmax; /**< inclusive */ - GLint _Ymin, _Ymax; /**< exclusive */ + GLint _Xmin, _Xmax; + GLint _Ymin, _Ymax; /*@}*/ /** \name Derived Z buffer stuff */ @@ -3151,6 +3168,20 @@ struct gl_framebuffer /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */ GLenum _Status; + /** True if both of the conditions: +* - one of Attachment has gl_renderbuffer_attachment::Type != GL_NONE +* - _Status is GL_FRAMEBUFFER_COMPLETE_EXT +* NOTE: the values for Width and Height are set to 0 in the +* case of no attachments, a backend driver supporting +* GL_ARB_framebuffer_no_attachments must check for +* the flag _HasAttachments and if GL_FALSE, must then +* use the values in DefaultGeometry to initialize its +* viewport, scissor and so on (in particular _Xmin, _Xmax, +* _Ymin and _Ymax do NOT take into account _HasAttachments +* being false IMO the documentation here would look better if you would use a bit longer lines and overall more consistent line length. +*/ + GLboolean _HasAttachments; + /** Integer color values */ GLboolean _IntegerColor; @@ -3161,7 +3192,9 @@ struct gl_framebuffer /** * The maximum number of layers in the framebuf
Re: [Mesa-dev] [PATCH 2/7] Define constants and functions for ARB_framebuffer_no_attachment extension
On 04/24/2015 09:59 AM, kevin.rogo...@intel.com wrote: From: Kevin Rogovin Define enumerations, functions and associated glGet's for extension ARB_framebuffer_no_attachment. --- .../glapi/gen/ARB_framebuffer_no_attachments.xml | 33 ++ src/mapi/glapi/gen/Makefile.am | 1 + src/mapi/glapi/gen/gl_API.xml | 1 + src/mesa/main/fbobject.c | 12 +++ src/mesa/main/fbobject.h | 7 src/mesa/main/get.c| 3 ++ src/mesa/main/get_hash_params.py | 40 ++ 7 files changed, 97 insertions(+) create mode 100644 src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml diff --git a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml new file mode 100644 index 000..60e40d0 --- /dev/null +++ b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + For the function parameters that are written to (param, params here) add output="true" to the xml entries. + + + + + diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am index 1c4b86a..9a0e944 100644 --- a/src/mapi/glapi/gen/Makefile.am +++ b/src/mapi/glapi/gen/Makefile.am @@ -130,6 +130,7 @@ API_XML = \ ARB_ES2_compatibility.xml \ ARB_ES3_compatibility.xml \ ARB_framebuffer_object.xml \ + ARB_framebuffer_no_attachments.xml \ ARB_geometry_shader4.xml \ ARB_get_program_binary.xml \ ARB_gpu_shader_fp64.xml \ diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index a8a6db6..4eea396 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -8325,6 +8325,7 @@ +http://www.w3.org/2001/XInclude"/> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index eabbb96..5c78c40 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1292,6 +1292,18 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer) bind_renderbuffer(target, renderbuffer, true); } +extern void GLAPIENTRY +_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param) +{ + /* to be implemented */ +} + +extern void GLAPIENTRY +_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params) +{ + /* to be implemented */ +} + /** * Remove the specified renderbuffer or texture from any attachment point in diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index 61aa1f5..76adb92 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -211,4 +211,11 @@ extern void GLAPIENTRY _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments, const GLenum *attachments); + +extern void GLAPIENTRY +_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param); + +extern void GLAPIENTRY +_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params); + #endif /* FBOBJECT_H */ diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index a881bc5..ca9d13c 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -393,6 +393,7 @@ EXTRA_EXT(INTEL_performance_query); EXTRA_EXT(ARB_explicit_uniform_location); EXTRA_EXT(ARB_clip_control); EXTRA_EXT(EXT_polygon_offset_clamp); +EXTRA_EXT(ARB_framebuffer_no_attachments); static const int extra_ARB_color_buffer_float_or_glcore[] = { @@ -466,6 +467,8 @@ static const int extra_core_ARB_color_buffer_float_and_new_buffers[] = { * remaining combinations. To look up the enums valid in a given API * we will use a hash table specific to that API. These tables are in * turn generated at build time and included through get_hash.h. + * To add an entry (i.e. a new enumeration for glGet that is taken + * from the table) add an entry to get_hash_params.py. */ #include "get_hash.h" diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 41cb2c1..8712c46 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -1,3 +1,35 @@ +# glGet pnames possible, organized first by GL version +# and then organized by GL extension. +# There is a list for different combinations of GL_ version +# that combination is specified by the field "apis". +# The field "params" is a list of entries describing +# how glGet* operates when passed a pname. +# To add a pname for glGet* to handle, the entry is of the form +# a) [ "PnameX", "LOC_CUSTOM, TypeX, ExtraRequirementX" ] +# OR +# b) [ "PnameX", "OffsetX, ExtraRequirement" ] +# +# - PnameX is the name of the pname passed to glGet without the GL_ prefix +# - ExtraRequirement is extra requirment declared in get.c listing +# GL requirements (such as extension requirements). If no extra +# requirement is needed, then it is NO_EX
Re: [Mesa-dev] [PATCH v2] mesa: add support for exposing up to GL4.2
On 04/23/2015 07:28 PM, Ilia Mirkin wrote: Add the 4.0/4.1/4.2 extensions lists to compute_version. A coule of extensions aren't in mesa yet, so those are marked with 0 until they become supported. Signed-off-by: Ilia Mirkin coule -> couple. -- Petri Latvala ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/7] Complete implementation for ARB_framebuffer_no_attachment in Mesa core: implement GetFramebufferParameteriv, FramebufferParameteri and changes to _mesa_test_framebuffer_comp
Functionally everything looks correct to me, as a coding style fix, don't exceed 80 characters per line, there are few lines that do this. With that fixed; Reviewed-by: Tapani Pälli On 04/24/2015 09:59 AM, kevin.rogo...@intel.com wrote: From: Kevin Rogovin Complete the implementation of ARB_framebuffer_no_attachment in Mesa core: 1. Implement _mesa_GetFramebufferParameteriv() and _mesa_FramebufferParameteri() 2. Additions to _mesa_test_framebuffer_completeness() --- src/mesa/main/fbobject.c | 153 --- 1 file changed, 144 insertions(+), 9 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 5c78c40..6c0c2b0 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1113,14 +1113,49 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, } else if (att_layer_count > max_layer_count) { max_layer_count = att_layer_count; } + + /** + * The extension GL_ARB_framebuffer_no_attachments places the additional + * requirement on each attachment that + * + * "The width and height of image are greater than zero and less than or equal + * to the values of the implementation-dependent limits MAX_FRAMEBUFFER_- + * WIDTH and MAX_FRAMEBUFFER_HEIGHT , respectively. " + * + * "If is a three-dimensional texture or a one- or two-dimensional + * array texture and the attachment is layered, the depth or layer count of + * the texture is less than or equal to the implementation-dependent limit + * MAX_FRAMEBUFFER_LAYERS." + * + * "If image has multiple samples, its sample count is less than or equal to + * the value of the implementation-dependent limit MAX_FRAMEBUFFER_- + * SAMPLES ." + * + * The same requirements are also in place for GL 4.5, + * Section 9.4.1 "Framebuffer Attachment Completeness", pg 310-311 + * + * However, this is a tighter restriction than previous version of GL. + * In interest of better compatibility, we will not enforce these + * restrictions. + */ } fb->MaxNumLayers = max_layer_count; if (numImages == 0) { - fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; - fbo_incomplete(ctx, "no attachments", -1); - return; + fb->_HasAttachments = GL_FALSE; + + if (!ctx->Extensions.ARB_framebuffer_no_attachments) { + fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; + fbo_incomplete(ctx, "no attachments", -1); + return; + } + + if (fb->DefaultGeometry.Width == 0 || fb->DefaultGeometry.Height == 0) { + fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT; + fbo_incomplete(ctx, "no attachments and default width or height is 0", -1); + return; + } } if (_mesa_is_desktop_gl(ctx) && !ctx->Extensions.ARB_ES2_compatibility) { @@ -1185,8 +1220,10 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, * renderbuffers/textures are different sizes, the framebuffer * width/height will be set to the smallest width/height. */ - fb->Width = minWidth; - fb->Height = minHeight; + if (numImages != 0) { + fb->Width = minWidth; + fb->Height = minHeight; + } /* finally, update the visual info for the framebuffer */ _mesa_update_framebuffer_visual(ctx, fb); @@ -1292,16 +1329,114 @@ _mesa_BindRenderbufferEXT(GLenum target, GLuint renderbuffer) bind_renderbuffer(target, renderbuffer, true); } -extern void GLAPIENTRY +static void +framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum pname, GLint param, const char *func) +{ + switch (pname) { + case GL_FRAMEBUFFER_DEFAULT_WIDTH: + if (param < 0 || param > ctx->Const.MaxFramebufferWidth) +_mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Width = param; + break; + case GL_FRAMEBUFFER_DEFAULT_HEIGHT: + if (param < 0 || param > ctx->Const.MaxFramebufferHeight) +_mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Height = param; + break; + case GL_FRAMEBUFFER_DEFAULT_LAYERS: + if (param < 0 || param > ctx->Const.MaxFramebufferLayers) +_mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else + fb->DefaultGeometry.Layers = param; + break; + case GL_FRAMEBUFFER_DEFAULT_SAMPLES: + if (param < 0 || param > ctx->Const.MaxFramebufferSamples) +_mesa_error(ctx, GL_INVALID_VALUE, "%s", func); + else +fb->DefaultGeometry.NumSamples = param; + break; + case GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS: + fb->DefaultGeometry.FixedSampleLocations = param; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, +
Re: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader when fragment shader has atomic buffer access
Reviewed-by: Tapani Pälli On 04/24/2015 09:59 AM, kevin.rogo...@intel.com wrote: From: Kevin Rogovin Ensure that the GPU spawns the fragment shader thread for those fragment shaders with atomic buffer access. --- src/mesa/drivers/dri/i965/gen7_wm_state.c | 7 +++ src/mesa/drivers/dri/i965/gen8_ps_state.c | 4 2 files changed, 11 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 82e116c..fa04221 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -77,6 +77,13 @@ upload_wm_state(struct brw_context *brw) dw1 |= GEN7_WM_KILL_ENABLE; } + /* pixel shader must run if it has side-effects +*/ + if (brw->ctx.Shader._CurrentFragmentProgram!=NULL && + brw->ctx.Shader._CurrentFragmentProgram->NumAtomicBuffers > 0) { + dw1 |= GEN7_WM_DISPATCH_ENABLE; + } + /* _NEW_BUFFERS | _NEW_COLOR */ if (brw_color_buffer_write_enabled(brw) || writes_depth || dw1 & GEN7_WM_KILL_ENABLE) { diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 5f39e12..614bc9b 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -62,6 +62,10 @@ upload_ps_extra(struct brw_context *brw) if (prog_data->uses_omask) dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET; + if (brw->ctx.Shader._CurrentFragmentProgram!=NULL && + brw->ctx.Shader._CurrentFragmentProgram->NumAtomicBuffers > 0) + dw1 |= GEN8_PSX_SHADER_HAS_UAV; + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); OUT_BATCH(dw1); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Initial amdgpu driver release
On Fri, Apr 24, 2015 at 9:43 AM, Zhou, Jammy wrote: > Hi Alex, > > For the core driver patch: > > +config DRM_AMDGPU > + tristate "AMD GPU" > + depends on DRM && PCI > + select FB_CFB_FILLRECT > + select FB_CFB_COPYAREA > + select FB_CFB_IMAGEBLIT > + select FW_LOADER > +select DRM_KMS_HELPER > + select DRM_KMS_FB_HELPER > +select DRM_TTM > + select POWER_SUPPLY > + select HWMON > + select BACKLIGHT_CLASS_DEVICE > + select DRM_AMD_GNB_BUS > + select INTERVAL_TREE > > I think DRM_AMD_GNB_BUS is not used, we can probably remove it now. > > +/* TODO: Here are things that needs to be done : > + * - surface allocator & initializer : (bit like scratch reg) should > + * initialize HDP_ stuff on RS600, R600, R700 hw, well anythings > + * related to surface > + * - WB : write back stuff (do it bit like scratch reg things) > + * - Vblank : look at Jesse's rework and what we should do > + * - r600/r700: gart & cp > + * - cs : clean cs ioctl use bitmap & things like that. > + * - power management stuff > + * - Barrier in gart code > + * - Unmappabled vram ? > + * - TESTING, TESTING, TESTING > + */ > + > +/* Initialization path: > + * We expect that acceleration initialization might fail for various > + * reasons even thought we work hard to make it works on most > + * configurations. In order to still have a working userspace in such > + * situation the init path must succeed up to the memory controller > + * initialization point. Failure before this point are considered as > + * fatal error. Here is the init callchain : > + * amdgpu_device_init perform common structure, mutex initialization > + * asic_init setup the GPU memory layout and perform all > + * one time initialization (failure in this > + * function are considered fatal) > + * asic_startupsetup the GPU acceleration, in order to > + * follow guideline the first thing this > + * function should do is setting the GPU > + * memory controller (only MC setup failure > + * are considered as fatal) > + */ > + > These should be outdated, and I think they can be removed now. > > For the uapi header patch: > > +#define AMDGPU_TILING_MACRO0x1 > +#define AMDGPU_TILING_MICRO0x2 > +#define AMDGPU_TILING_SWAP_16BIT 0x4 > +#define AMDGPU_TILING_R600_NO_SCANOUT > AMDGPU_TILING_SWAP_16BIT > +#define AMDGPU_TILING_SWAP_32BIT 0x8 > +/* this object requires a surface when mapped - i.e. front buffer */ > +#define AMDGPU_TILING_SURFACE 0x10 > +#define AMDGPU_TILING_MICRO_SQUARE 0x20 > +#define AMDGPU_TILING_EG_BANKW_SHIFT 8 > +#define AMDGPU_TILING_EG_BANKW_MASK0xf > +#define AMDGPU_TILING_EG_BANKH_SHIFT 12 > +#define AMDGPU_TILING_EG_BANKH_MASK0xf > +#define AMDGPU_TILING_EG_MACRO_TILE_ASPECT_SHIFT 16 > +#define AMDGPU_TILING_EG_MACRO_TILE_ASPECT_MASK0xf > +#define AMDGPU_TILING_EG_TILE_SPLIT_SHIFT 24 > +#define AMDGPU_TILING_EG_TILE_SPLIT_MASK 0xf > +#define AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_SHIFT 28 > +#define AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_MASK 0xf > .. > +#define SI_TILE_MODE_COLOR_LINEAR_ALIGNED 8 > +#define SI_TILE_MODE_COLOR_1D 13 > +#define SI_TILE_MODE_COLOR_1D_SCANOUT 9 > +#define SI_TILE_MODE_COLOR_2D_8BPP 14 > +#define SI_TILE_MODE_COLOR_2D_16BPP15 > +#define SI_TILE_MODE_COLOR_2D_32BPP16 > +#define SI_TILE_MODE_COLOR_2D_64BPP17 > +#define SI_TILE_MODE_COLOR_2D_SCANOUT_16BPP11 > +#define SI_TILE_MODE_COLOR_2D_SCANOUT_32BPP12 > +#define SI_TILE_MODE_DEPTH_STENCIL_1D 4 > +#define SI_TILE_MODE_DEPTH_STENCIL_2D 0 > +#define SI_TILE_MODE_DEPTH_STENCIL_2D_2AA 3 > +#define SI_TILE_MODE_DEPTH_STENCIL_2D_4AA 3 > +#define SI_TILE_MODE_DEPTH_STENCIL_2D_8AA 2 > + > +#define CIK_TILE_MODE_DEPTH_STENCIL_1D 5 > > It looks these definitions are not used by libdrm_amdgpu anymore (and even by > the kernel driver). Maybe we can remove the unused definitions, and move the > used ones to amdgpu.h instead. Besides, I think we'd better remove > 'R600/EG/SI/CIK' from the naming. The AMDGPU_TILING definitions are used by UMDs to set amdgpu_bo_metadata::tiling_info. I plan to rework them, because they are not sufficient for VI and contain obsolete stuff. The SI/CIK_TILE_MODE definitions can be removed indeed. Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/m
Re: [Mesa-dev] [PATCH 23/23] st/mesa: enable shader subroutine
I'd enable this if at least GLSL 1.30 was supported. I think drivers like r300, nv30, i915, and svga shouldn't get this. Marek On Fri, Apr 24, 2015 at 3:42 AM, Dave Airlie wrote: > From: Dave Airlie > > I'm not sure if we shouldn't enable this everywhere > and rip out the API checks, > > discuss, > > Signed-off-by: Dave Airlie > --- > src/mesa/state_tracker/st_extensions.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/mesa/state_tracker/st_extensions.c > b/src/mesa/state_tracker/st_extensions.c > index 25932dd..d22fc4f 100644 > --- a/src/mesa/state_tracker/st_extensions.c > +++ b/src/mesa/state_tracker/st_extensions.c > @@ -598,6 +598,7 @@ void st_init_extensions(struct pipe_screen *screen, > extensions->ARB_half_float_vertex = GL_TRUE; > extensions->ARB_internalformat_query = GL_TRUE; > extensions->ARB_map_buffer_range = GL_TRUE; > + extensions->ARB_shader_subroutine = GL_TRUE; > extensions->ARB_texture_border_clamp = GL_TRUE; /* XXX temp */ > extensions->ARB_texture_cube_map = GL_TRUE; > extensions->ARB_texture_env_combine = GL_TRUE; > -- > 2.1.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/7] helper-conveniance functions for drivers to implement ARB_framebuffer_no_attachment
Functionally correct, style nitpicks below, with those fixed; Reviewed-by: Tapani Pälli On 04/24/2015 09:59 AM, kevin.rogo...@intel.com wrote: From: Kevin Rogovin To assist drivers to implement ARB_framebuffer_no_attachment, provide a set of convenience functions that check for gl_framebuffer::_HasAttachments that return the geometry of the gl_framebuffer. --- src/mesa/main/framebuffer.c | 49 ++--- src/mesa/main/framebuffer.h | 29 +++ src/mesa/main/mtypes.h | 21 ++- 3 files changed, 74 insertions(+), 25 deletions(-) diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 4e4d896..7d8921b 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -357,30 +357,20 @@ update_framebuffer_size(struct gl_context *ctx, struct gl_framebuffer *fb) } + /** - * Calculate the inclusive bounding box for the scissor of a specific viewport + * Given a bounding box, intersect the bounding box with the scirros of scirros -> scissor + * a specified vieport. * * \param ctx GL context. - * \param buffer Framebuffer to be checked against * \param idx Index of the desired viewport * \param bboxBounding box for the scissored viewport. Stored as xmin, *xmax, ymin, ymax. - * - * \warning This function assumes that the framebuffer dimensions are up to - * date (e.g., update_framebuffer_size has been recently called on \c buffer). - * - * \sa _mesa_clip_to_region */ -void -_mesa_scissor_bounding_box(const struct gl_context *ctx, - const struct gl_framebuffer *buffer, - unsigned idx, int *bbox) +extern void +_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx, + unsigned idx, int *bbox) { - bbox[0] = 0; - bbox[2] = 0; - bbox[1] = buffer->Width; - bbox[3] = buffer->Height; - if (ctx->Scissor.EnableFlags & (1u << idx)) { if (ctx->Scissor.ScissorArray[idx].X > bbox[0]) { bbox[0] = ctx->Scissor.ScissorArray[idx].X; @@ -402,6 +392,33 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, bbox[2] = bbox[3]; } } +} + +/** + * Calculate the inclusive bounding box for the scissor of a specific viewport + * + * \param ctx GL context. + * \param buffer Framebuffer to be checked against + * \param idx Index of the desired viewport + * \param bboxBounding box for the scissored viewport. Stored as xmin, + *xmax, ymin, ymax. + * + * \warning This function assumes that the framebuffer dimensions are up to + * date (e.g., update_framebuffer_size has been recently called on \c buffer). + * + * \sa _mesa_clip_to_region + */ +void +_mesa_scissor_bounding_box(const struct gl_context *ctx, + const struct gl_framebuffer *buffer, + unsigned idx, int *bbox) +{ + bbox[0] = 0; + bbox[2] = 0; + bbox[1] = buffer->Width; + bbox[3] = buffer->Height; + + _mesa_intersect_scissor_bounding_box(ctx, idx, bbox); assert(bbox[0] <= bbox[1]); assert(bbox[2] <= bbox[3]); diff --git a/src/mesa/main/framebuffer.h b/src/mesa/main/framebuffer.h index a427421..8b84d26 100644 --- a/src/mesa/main/framebuffer.h +++ b/src/mesa/main/framebuffer.h @@ -76,6 +76,35 @@ _mesa_scissor_bounding_box(const struct gl_context *ctx, const struct gl_framebuffer *buffer, unsigned idx, int *bbox); +extern void +_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx, + unsigned idx, int *bbox); + These functions should not exceed 80-char limit. +static inline GLuint +_mesa_geometric_width(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? buffer->Width : buffer->DefaultGeometry.Width; +} + + +static inline GLuint +_mesa_geometric_height(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? buffer->Height : buffer->DefaultGeometry.Height; +} + +static inline GLuint +_mesa_geometric_samples(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? buffer->Visual.samples : buffer->DefaultGeometry.NumSamples; +} + +static inline GLuint +_mesa_geometric_layers(const struct gl_framebuffer *buffer) +{ + return buffer->_HasAttachments ? buffer->MaxNumLayers : buffer->DefaultGeometry.Layers; +} + extern void _mesa_update_draw_buffer_bounds(struct gl_context *ctx); As mentioned, lines below belong to other patch. diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 38a3817..ac7cdb6 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3134,13 +3134,13 @@ struct gl_framebuffer struct gl_config Visual; /** -* size of frame buffer in pixels, -* no attachments has these values as 0 +* size of frame buffer in p
[Mesa-dev] [PATCH 2/4] clover: remove compat::string
--- src/gallium/state_trackers/clover/api/program.cpp | 2 +- .../state_trackers/clover/core/compiler.hpp| 14 +-- src/gallium/state_trackers/clover/core/error.hpp | 4 +- src/gallium/state_trackers/clover/core/program.cpp | 2 +- .../state_trackers/clover/llvm/invocation.cpp | 22 ++--- .../state_trackers/clover/tgsi/compiler.cpp| 7 +- src/gallium/state_trackers/clover/util/compat.hpp | 104 - 7 files changed, 26 insertions(+), 129 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index c985690..b3be2b8 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -216,7 +216,7 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs, throw error(CL_INVALID_OPERATION); if (!any_of(key_equals(name), headers)) -headers.push_back(std::pair( +headers.push_back(std::pair( name, header.source())); }, range(header_names, num_headers), diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp index bec8aac..62c0f47 100644 --- a/src/gallium/state_trackers/clover/core/compiler.hpp +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -29,17 +29,17 @@ #include "pipe/p_defines.h" namespace clover { - typedef compat::vector > header_map; + typedef compat::vector > header_map; - module compile_program_llvm(const compat::string &source, + module compile_program_llvm(const std::string &source, const header_map &headers, pipe_shader_ir ir, - const compat::string &target, - const compat::string &opts, - compat::string &r_log); + const std::string &target, + const std::string &opts, + std::string &r_log); - module compile_program_tgsi(const compat::string &source); + module compile_program_tgsi(const std::string &source); } #endif diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp index 45a38c1..805a0ec 100644 --- a/src/gallium/state_trackers/clover/core/error.hpp +++ b/src/gallium/state_trackers/clover/core/error.hpp @@ -54,7 +54,7 @@ namespace clover { /// class error : public std::runtime_error { public: - error(cl_int code, compat::string what = "") : + error(cl_int code, std::string what = "") : std::runtime_error(what), code(code) { } @@ -68,7 +68,7 @@ namespace clover { class build_error : public error { public: - build_error(const compat::string &what = "") : + build_error(const std::string &what = "") : error(CL_COMPILE_PROGRAM_FAILURE, what) { } }; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index c07548c..50ac01b 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -52,7 +52,7 @@ program::build(const ref_vector &devs, const char *opts, _opts.insert({ &dev, opts }); - compat::string log; + std::string log; try { auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ? diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index e07d95b..2157909 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -136,7 +136,7 @@ namespace { const std::string &name, const std::string &triple, const std::string &processor, const std::string &opts, clang::LangAS::Map& address_spaces, unsigned &optimization_level, -compat::string &r_log) { +std::string &r_log) { clang::CompilerInstance c; clang::EmitLLVMOnlyAction act(&llvm_ctx); @@ -470,7 +470,7 @@ namespace { emit_code(LLVMTargetMachineRef tm, LLVMModuleRef mod, LLVMCodeGenFileType file_type, LLVMMemoryBufferRef *out_buffer, - compat::string &r_log) { + std::string &r_log) { LLVMBool err; char *err_message = NULL; @@ -491,7 +491,7 @@ namespace { std::vector compile_native(const llvm::Module *mod, const std::string &triple, const std::string &processor, unsigned dump_asm, - compat::string &r_log) { + std::string &r_log) { std::string log; LLVMTargetRef target; @@ -545,7 +545,7 @@ namespace { std::map get_kernel_offsets(std::vector
[Mesa-dev] [PATCH 1/4] clover: remove compat classes that match std one
--- src/gallium/state_trackers/clover/Makefile.sources | 1 - src/gallium/state_trackers/clover/api/program.cpp | 2 +- .../state_trackers/clover/core/compiler.hpp| 2 +- src/gallium/state_trackers/clover/core/error.hpp | 6 ++-- src/gallium/state_trackers/clover/util/compat.cpp | 38 -- src/gallium/state_trackers/clover/util/compat.hpp | 27 --- 6 files changed, 6 insertions(+), 70 deletions(-) delete mode 100644 src/gallium/state_trackers/clover/util/compat.cpp diff --git a/src/gallium/state_trackers/clover/Makefile.sources b/src/gallium/state_trackers/clover/Makefile.sources index 5b3344c..03eb754 100644 --- a/src/gallium/state_trackers/clover/Makefile.sources +++ b/src/gallium/state_trackers/clover/Makefile.sources @@ -45,7 +45,6 @@ CPP_SOURCES := \ util/adaptor.hpp \ util/algebra.hpp \ util/algorithm.hpp \ - util/compat.cpp \ util/compat.hpp \ util/factor.hpp \ util/functional.hpp \ diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index 60184ed..c985690 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -216,7 +216,7 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs, throw error(CL_INVALID_OPERATION); if (!any_of(key_equals(name), headers)) -headers.push_back(compat::pair( +headers.push_back(std::pair( name, header.source())); }, range(header_names, num_headers), diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp index 7210d1e..bec8aac 100644 --- a/src/gallium/state_trackers/clover/core/compiler.hpp +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -29,7 +29,7 @@ #include "pipe/p_defines.h" namespace clover { - typedef compat::vector > header_map; module compile_program_llvm(const compat::string &source, diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp index 7b010f1..45a38c1 100644 --- a/src/gallium/state_trackers/clover/core/error.hpp +++ b/src/gallium/state_trackers/clover/core/error.hpp @@ -25,6 +25,8 @@ #include "CL/cl.h" +#include + #include "util/compat.hpp" namespace clover { @@ -50,10 +52,10 @@ namespace clover { /// Class that represents an error that can be converted to an /// OpenCL status code. /// - class error : public compat::runtime_error { + class error : public std::runtime_error { public: error(cl_int code, compat::string what = "") : - compat::runtime_error(what), code(code) { + std::runtime_error(what), code(code) { } cl_int get() const { diff --git a/src/gallium/state_trackers/clover/util/compat.cpp b/src/gallium/state_trackers/clover/util/compat.cpp deleted file mode 100644 index 80d5b3e..000 --- a/src/gallium/state_trackers/clover/util/compat.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright 2013 Francisco Jerez -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// - -#include "util/compat.hpp" - -using namespace clover::compat; - -exception::~exception() { -} - -const char * -exception::what() const { - return ""; -} - -const char * -runtime_error::what() const { - return _what.c_str(); -} diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp index 735994f..ea7d3a0 100644 --- a/src/gallium/state_trackers/clover/util/compat.hpp +++ b/src/gallium/state_trackers/clover/util/compat.hpp @@ -411,33 +411,6 @@ namespace clover { private: mutable vector v; }; - - template - struct pair { - pair(T first, S second) : -first(first), second(second) {} - - T first; -
[Mesa-dev] [PATCH 0/4] clover: this serie remove util/compat.*
Since clover should compile use -std=c++11, compat classes are no longer neccessary EdB (4): clover: remove compat class that matche std one clover: remove compat::string clover: make module::symbol::name a string clover: remove util/compat src/gallium/state_trackers/clover/Makefile.sources | 2 - src/gallium/state_trackers/clover/api/program.cpp | 19 +- .../state_trackers/clover/core/compiler.hpp| 14 +- src/gallium/state_trackers/clover/core/error.hpp | 10 +- src/gallium/state_trackers/clover/core/kernel.cpp | 2 +- src/gallium/state_trackers/clover/core/module.cpp | 56 ++- src/gallium/state_trackers/clover/core/module.hpp | 23 +- src/gallium/state_trackers/clover/core/program.cpp | 4 +- src/gallium/state_trackers/clover/core/program.hpp | 2 +- .../state_trackers/clover/llvm/invocation.cpp | 42 +- .../state_trackers/clover/tgsi/compiler.cpp| 12 +- src/gallium/state_trackers/clover/util/compat.cpp | 38 -- src/gallium/state_trackers/clover/util/compat.hpp | 444 - 13 files changed, 105 insertions(+), 563 deletions(-) delete mode 100644 src/gallium/state_trackers/clover/util/compat.cpp delete mode 100644 src/gallium/state_trackers/clover/util/compat.hpp -- 2.3.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] clover: make module::symbol::name a string
--- src/gallium/state_trackers/clover/api/program.cpp | 3 +-- src/gallium/state_trackers/clover/core/module.cpp | 21 + src/gallium/state_trackers/clover/core/module.hpp | 4 ++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index b3be2b8..913d195 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -293,8 +293,7 @@ clGetProgramInfo(cl_program d_prog, cl_program_info param, case CL_PROGRAM_KERNEL_NAMES: buf.as_string() = fold([](const std::string &a, const module::symbol &s) { -return ((a.empty() ? "" : a + ";") + -std::string(s.name.begin(), s.name.size())); +return ((a.empty() ? "" : a + ";") + s.name); }, std::string(), prog.symbols()); break; diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp index be10e35..f098b05 100644 --- a/src/gallium/state_trackers/clover/core/module.cpp +++ b/src/gallium/state_trackers/clover/core/module.cpp @@ -133,6 +133,27 @@ namespace { } }; + /// (De)serialize a string. + template<> + struct _serializer { + static void + proc(compat::ostream &os, const std::string &s) { + _proc(os, s.size()); + os.write(&s[0], s.size() * sizeof(std::string::value_type)); + } + + static void + proc(compat::istream &is, std::string &s) { + s.resize(_proc(is)); + is.read(&s[0], s.size() * sizeof(std::string::value_type)); + } + + static void + proc(module::size_t &sz, const std::string &s) { + sz += sizeof(uint32_t) + sizeof(std::string::value_type) * s.size(); + } + }; + /// (De)serialize a module::section. template<> struct _serializer { diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp index ee6caf9..46112a3 100644 --- a/src/gallium/state_trackers/clover/core/module.hpp +++ b/src/gallium/state_trackers/clover/core/module.hpp @@ -100,12 +100,12 @@ namespace clover { }; struct symbol { - symbol(const compat::vector &name, resource_id section, + symbol(const std::string &name, resource_id section, size_t offset, const compat::vector &args) : name(name), section(section), offset(offset), args(args) { } symbol() : name(), section(0), offset(0), args() { } - compat::vector name; + std::string name; resource_id section; size_t offset; compat::vector args; -- 2.3.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/10] EGL Sync extensions
Ping for the non-OpenCL stuff. Thanks, Marek On Wed, Apr 15, 2015 at 12:19 AM, Marek Olšák wrote: > Hi, > > This patch series implements: > - EGL_KHR_fence_sync > - EGL_KHR_wait_sync > - EGL_KHR_cl_event2 > > These extensions are required by EGL 1.5. > > EGL_KHR_fence_sync has piglit tests. The other two don't. EGL_KHR_wait_sync > doesn't do anything useful, because no Mesa driver implements the equivalent > function from GL_ARB_sync anyway, though at least the driver interface is > added. > > EGL_KHR_cl_event2 is tricky one. I can't do anything about the fact it's in > EGL 1.5. This series implements the extension for interop with clover, though > there is also an interface if some drivers want to do it differently. When > the first cl_event is imported to EGL, st/dri calls dlsym(RTLD_DEFAULT, ..) > to get entrypoints to the OpenCL driver. Not sure if that's the best way. > > Please review. > > Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Initial amdgpu driver release
Hi Alex, For the core driver patch: +config DRM_AMDGPU + tristate "AMD GPU" + depends on DRM && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select FW_LOADER +select DRM_KMS_HELPER + select DRM_KMS_FB_HELPER +select DRM_TTM + select POWER_SUPPLY + select HWMON + select BACKLIGHT_CLASS_DEVICE + select DRM_AMD_GNB_BUS + select INTERVAL_TREE I think DRM_AMD_GNB_BUS is not used, we can probably remove it now. +/* TODO: Here are things that needs to be done : + * - surface allocator & initializer : (bit like scratch reg) should + * initialize HDP_ stuff on RS600, R600, R700 hw, well anythings + * related to surface + * - WB : write back stuff (do it bit like scratch reg things) + * - Vblank : look at Jesse's rework and what we should do + * - r600/r700: gart & cp + * - cs : clean cs ioctl use bitmap & things like that. + * - power management stuff + * - Barrier in gart code + * - Unmappabled vram ? + * - TESTING, TESTING, TESTING + */ + +/* Initialization path: + * We expect that acceleration initialization might fail for various + * reasons even thought we work hard to make it works on most + * configurations. In order to still have a working userspace in such + * situation the init path must succeed up to the memory controller + * initialization point. Failure before this point are considered as + * fatal error. Here is the init callchain : + * amdgpu_device_init perform common structure, mutex initialization + * asic_init setup the GPU memory layout and perform all + * one time initialization (failure in this + * function are considered fatal) + * asic_startupsetup the GPU acceleration, in order to + * follow guideline the first thing this + * function should do is setting the GPU + * memory controller (only MC setup failure + * are considered as fatal) + */ + These should be outdated, and I think they can be removed now. For the uapi header patch: +#define AMDGPU_TILING_MACRO0x1 +#define AMDGPU_TILING_MICRO0x2 +#define AMDGPU_TILING_SWAP_16BIT 0x4 +#define AMDGPU_TILING_R600_NO_SCANOUT AMDGPU_TILING_SWAP_16BIT +#define AMDGPU_TILING_SWAP_32BIT 0x8 +/* this object requires a surface when mapped - i.e. front buffer */ +#define AMDGPU_TILING_SURFACE 0x10 +#define AMDGPU_TILING_MICRO_SQUARE 0x20 +#define AMDGPU_TILING_EG_BANKW_SHIFT 8 +#define AMDGPU_TILING_EG_BANKW_MASK0xf +#define AMDGPU_TILING_EG_BANKH_SHIFT 12 +#define AMDGPU_TILING_EG_BANKH_MASK0xf +#define AMDGPU_TILING_EG_MACRO_TILE_ASPECT_SHIFT 16 +#define AMDGPU_TILING_EG_MACRO_TILE_ASPECT_MASK0xf +#define AMDGPU_TILING_EG_TILE_SPLIT_SHIFT 24 +#define AMDGPU_TILING_EG_TILE_SPLIT_MASK 0xf +#define AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_SHIFT 28 +#define AMDGPU_TILING_EG_STENCIL_TILE_SPLIT_MASK 0xf .. +#define SI_TILE_MODE_COLOR_LINEAR_ALIGNED 8 +#define SI_TILE_MODE_COLOR_1D 13 +#define SI_TILE_MODE_COLOR_1D_SCANOUT 9 +#define SI_TILE_MODE_COLOR_2D_8BPP 14 +#define SI_TILE_MODE_COLOR_2D_16BPP15 +#define SI_TILE_MODE_COLOR_2D_32BPP16 +#define SI_TILE_MODE_COLOR_2D_64BPP17 +#define SI_TILE_MODE_COLOR_2D_SCANOUT_16BPP11 +#define SI_TILE_MODE_COLOR_2D_SCANOUT_32BPP12 +#define SI_TILE_MODE_DEPTH_STENCIL_1D 4 +#define SI_TILE_MODE_DEPTH_STENCIL_2D 0 +#define SI_TILE_MODE_DEPTH_STENCIL_2D_2AA 3 +#define SI_TILE_MODE_DEPTH_STENCIL_2D_4AA 3 +#define SI_TILE_MODE_DEPTH_STENCIL_2D_8AA 2 + +#define CIK_TILE_MODE_DEPTH_STENCIL_1D 5 It looks these definitions are not used by libdrm_amdgpu anymore (and even by the kernel driver). Maybe we can remove the unused definitions, and move the used ones to amdgpu.h instead. Besides, I think we'd better remove 'R600/EG/SI/CIK' from the naming. Other than the comments above, the kernel series are Acked-by: Jammy Zhou Regards, Jammy -Original Message- From: dri-devel [mailto:dri-devel-boun...@lists.freedesktop.org] On Behalf Of Alex Deucher Sent: Tuesday, April 21, 2015 6:34 AM To: Maling list - DRI developers; mesa-dev@lists.freedesktop.org Cc: Deucher, Alexander Subject: Initial amdgpu driver release I'm pleased to announce the initial release of the new amdgpu driver. This is a partial replacement for the radeon driver for newer AMD asics. A number of components are still shared. Here is a comparison of the radeon and amdgpu stack
[Mesa-dev] [Bug 90162] glGetFramebufferAttachmentParameteriv failing if certain FB attributes are zero
https://bugs.freedesktop.org/show_bug.cgi?id=90162 Bug ID: 90162 Summary: glGetFramebufferAttachmentParameteriv failing if certain FB attributes are zero Product: Mesa Version: 10.1 Hardware: Other OS: Linux (All) Status: NEW Severity: normal Priority: medium Component: GLX Assignee: mesa-dev@lists.freedesktop.org Reporter: martina.kollar...@intel.com QA Contact: mesa-dev@lists.freedesktop.org Created attachment 115302 --> https://bugs.freedesktop.org/attachment.cgi?id=115302&action=edit reproducer.output The command glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_DEPTH, GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE, &depth_bits) fails with GL_INVALID_OPERATION if the attribute GLX_DEPTH_SIZE is set to 0 (or not explicitly set) in the FB config. The same happens with the command glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_STENCIL, GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE, &stencil_bits) if GLX_STENCIL_SIZE is set to 0 or not explicitly set. You can reproduce those two cases with https://github.com/mkollaro/opengl_snippets/blob/getattachments/glx/main.cpp (after building, run with `./build/bin/glx`). My output is in the attachments as `reproducer.output`. Additionally, if GLX_DOUBLEBUFFER is set to false, the command glGetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, GL_BACK_LEFT, GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE, &alpha_bits); also fails with GL_INVALID_OPERATION, but not if set to true. This error happened to me in the Chromium project, but I cannot reproduce it in the above mentioned code, because I somehow cannot get a context with GLX_DOUBLEBUFFER set to false (this is probably just my mistake). I would expect this command to return zero if e.g. the depth size is set to zero, or at least use some more informative error message, since the command is correct and only the framebuffer configuration is wrong. My system is Linux Mint 17.1 Rebecca, more info in the glxinfo.output attachment. lspci -vnn|grep VGA 00:02.0 VGA compatible controller [0300]: Intel Corporation 4th Gen Core Processor Integrated Graphics Controller [8086:0416] (rev 06) (prog-if 00 [VGA controller]) 01:00.0 VGA compatible controller [0300]: NVIDIA Corporation Device [10de:13d8] (rev a1) (prog-if 00 [VGA controller]) -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 90162] glGetFramebufferAttachmentParameteriv failing if certain FB attributes are zero
https://bugs.freedesktop.org/show_bug.cgi?id=90162 --- Comment #1 from Martina Kollarova --- Created attachment 115303 --> https://bugs.freedesktop.org/attachment.cgi?id=115303&action=edit glxinfo.output -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 23/23] st/mesa: enable shader subroutine
Yes, I second that. -Brian On 04/24/2015 04:29 AM, Marek Olšák wrote: I'd enable this if at least GLSL 1.30 was supported. I think drivers like r300, nv30, i915, and svga shouldn't get this. Marek On Fri, Apr 24, 2015 at 3:42 AM, Dave Airlie wrote: From: Dave Airlie I'm not sure if we shouldn't enable this everywhere and rip out the API checks, discuss, Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_extensions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 25932dd..d22fc4f 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -598,6 +598,7 @@ void st_init_extensions(struct pipe_screen *screen, extensions->ARB_half_float_vertex = GL_TRUE; extensions->ARB_internalformat_query = GL_TRUE; extensions->ARB_map_buffer_range = GL_TRUE; + extensions->ARB_shader_subroutine = GL_TRUE; extensions->ARB_texture_border_clamp = GL_TRUE; /* XXX temp */ extensions->ARB_texture_cube_map = GL_TRUE; extensions->ARB_texture_env_combine = GL_TRUE; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8&m=XRYVFi3aEE45uP7Dd2aDjVFxFlW0ISsAIkGKCqEdgBg&s=l-01_HHXxTEZG4GOKVmF6iExtIt4HWaU7JFFrj5YaZ4&e= ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8&m=XRYVFi3aEE45uP7Dd2aDjVFxFlW0ISsAIkGKCqEdgBg&s=l-01_HHXxTEZG4GOKVmF6iExtIt4HWaU7JFFrj5YaZ4&e= ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/7] Complete implementation for ARB_framebuffer_no_attachment in Mesa core: implement GetFramebufferParameteriv, FramebufferParameteri and changes to _mesa_test_framebuffer_comp
Normally we have brief title on the subject line followed by more elaborate description of the changes. You had: Subject: [Mesa-dev] [PATCH 3/7] Complete implementation for ARB_framebuffer_no_attachment in Mesa core: implement GetFramebufferParameteriv, FramebufferParameteri and changes to _mesa_test_framebuffer_completeness Better, for example: Subject: [Mesa-dev] [PATCH 3/7] mesa: Implement ARB_framebuffer_no_attachment Implementation for GetFramebufferParameteriv, FramebufferParameteri and changes to _mesa_test_framebuffer_completeness. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 23/23] st/mesa: enable shader subroutine
The extension is currently marked as GLC, so that equates to dummy_true right? On Fri, Apr 24, 2015 at 9:00 AM, Brian Paul wrote: > Yes, I second that. > > -Brian > > > On 04/24/2015 04:29 AM, Marek Olšák wrote: >> >> I'd enable this if at least GLSL 1.30 was supported. I think drivers >> like r300, nv30, i915, and svga shouldn't get this. >> >> Marek >> >> On Fri, Apr 24, 2015 at 3:42 AM, Dave Airlie wrote: >>> >>> From: Dave Airlie >>> >>> I'm not sure if we shouldn't enable this everywhere >>> and rip out the API checks, >>> >>> discuss, >>> >>> Signed-off-by: Dave Airlie >>> --- >>> src/mesa/state_tracker/st_extensions.c | 1 + >>> 1 file changed, 1 insertion(+) >>> >>> diff --git a/src/mesa/state_tracker/st_extensions.c >>> b/src/mesa/state_tracker/st_extensions.c >>> index 25932dd..d22fc4f 100644 >>> --- a/src/mesa/state_tracker/st_extensions.c >>> +++ b/src/mesa/state_tracker/st_extensions.c >>> @@ -598,6 +598,7 @@ void st_init_extensions(struct pipe_screen *screen, >>> extensions->ARB_half_float_vertex = GL_TRUE; >>> extensions->ARB_internalformat_query = GL_TRUE; >>> extensions->ARB_map_buffer_range = GL_TRUE; >>> + extensions->ARB_shader_subroutine = GL_TRUE; >>> extensions->ARB_texture_border_clamp = GL_TRUE; /* XXX temp */ >>> extensions->ARB_texture_cube_map = GL_TRUE; >>> extensions->ARB_texture_env_combine = GL_TRUE; >>> -- >>> 2.1.0 >>> >>> ___ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> >>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8&m=XRYVFi3aEE45uP7Dd2aDjVFxFlW0ISsAIkGKCqEdgBg&s=l-01_HHXxTEZG4GOKVmF6iExtIt4HWaU7JFFrj5YaZ4&e= >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> >> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8&m=XRYVFi3aEE45uP7Dd2aDjVFxFlW0ISsAIkGKCqEdgBg&s=l-01_HHXxTEZG4GOKVmF6iExtIt4HWaU7JFFrj5YaZ4&e= >> > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/7] i965: use _mesa_geometry_width/height/layers/samples for programming geometry of framebuffer to GEN
On Fri, Apr 24, 2015 at 09:59:08AM +0300, kevin.rogo...@intel.com wrote: > From: Kevin Rogovin > > To prepare for i965 to support ARB_framebuffer_no_attachment, use > the convenience functions mesa_geometry_width/height/layers/samples > to specify the geometry of the render target surfaces to the GPU. > > > --- > src/mesa/drivers/dri/i965/brw_clip_state.c | 9 - > src/mesa/drivers/dri/i965/brw_misc_state.c | 12 -- > src/mesa/drivers/dri/i965/brw_sf_state.c | 46 > -- > src/mesa/drivers/dri/i965/brw_state_upload.c | 7 +++- > src/mesa/drivers/dri/i965/brw_wm.c | 7 ++-- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 -- > src/mesa/drivers/dri/i965/gen6_clip_state.c| 11 -- > src/mesa/drivers/dri/i965/gen6_multisample_state.c | 3 +- > src/mesa/drivers/dri/i965/gen6_scissor_state.c | 14 +-- > src/mesa/drivers/dri/i965/gen6_sf_state.c | 3 +- > src/mesa/drivers/dri/i965/gen6_viewport_state.c| 3 +- > src/mesa/drivers/dri/i965/gen6_wm_state.c | 3 +- > src/mesa/drivers/dri/i965/gen7_sf_state.c | 3 +- > src/mesa/drivers/dri/i965/gen7_viewport_state.c| 3 +- > src/mesa/drivers/dri/i965/gen7_wm_state.c | 3 +- > src/mesa/drivers/dri/i965/gen8_viewport_state.c| 9 +++-- > 16 files changed, 108 insertions(+), 41 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c > b/src/mesa/drivers/dri/i965/brw_clip_state.c > index 3223834..3aa679f 100644 > --- a/src/mesa/drivers/dri/i965/brw_clip_state.c > +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c > @@ -32,6 +32,7 @@ > #include "brw_context.h" > #include "brw_state.h" > #include "brw_defines.h" > +#include "main/framebuffer.h" > > static void > upload_clip_vp(struct brw_context *brw) > @@ -60,6 +61,10 @@ brw_upload_clip_unit(struct brw_context *brw) > > /* _NEW_BUFFERS */ > struct gl_framebuffer *fb = ctx->DrawBuffer; > + GLint fb_width, fb_height; > + > + fb_width = _mesa_geometric_width(fb); > + fb_height = _mesa_geometric_height(fb); You defined _mesa_geometric_width() and _mesa_geometric_height() to return unsigned, in principle we should use unsigned here also. But you actually need them converted to floats so why not convert already the returned value. Internally in the driver we also try to avoid using gl-types. There is also no need to separate the declaration and definition of the variables. I would write this as follows dropping the cast when using them. const float fb_width = (float)_mesa_geometric_width(fb); const float fb_height = (float)_mesa_geometric_height(fb); Same applies to the rest of the patch. > > upload_clip_vp(brw); > > @@ -127,8 +132,8 @@ brw_upload_clip_unit(struct brw_context *brw) > /* enable guardband clipping if we can */ > if (ctx->ViewportArray[0].X == 0 && > ctx->ViewportArray[0].Y == 0 && > - ctx->ViewportArray[0].Width == (float) fb->Width && > - ctx->ViewportArray[0].Height == (float) fb->Height) > + ctx->ViewportArray[0].Width == (float) fb_width && > + ctx->ViewportArray[0].Height == (float) fb_height) > { >clip->clip5.guard_band_enable = 1; >clip->clip6.clipper_viewport_state_ptr = > diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c > b/src/mesa/drivers/dri/i965/brw_misc_state.c > index 78a46cb..ef94a6e 100644 > --- a/src/mesa/drivers/dri/i965/brw_misc_state.c > +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c > @@ -39,6 +39,7 @@ > #include "brw_state.h" > #include "brw_defines.h" > > +#include "main/framebuffer.h" > #include "main/fbobject.h" > #include "main/glformats.h" > > @@ -46,12 +47,17 @@ > static void upload_drawing_rect(struct brw_context *brw) > { > struct gl_context *ctx = &brw->ctx; > + GLint fb_width, fb_height; > + struct gl_framebuffer *fb = ctx->DrawBuffer; Use 'const', you are only reading. > + > + fb_width = _mesa_geometric_width(fb); > + fb_height = _mesa_geometric_height(fb); > > BEGIN_BATCH(4); > OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); > OUT_BATCH(0); /* xmin, ymin */ > - OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0x) | > - ((ctx->DrawBuffer->Height - 1) << 16)); > + OUT_BATCH(((fb_width - 1) & 0x) | > + ((fb_height - 1) << 16)); > OUT_BATCH(0); > ADVANCE_BATCH(); > } > @@ -767,7 +773,7 @@ static void upload_polygon_stipple_offset(struct > brw_context *brw) > * works just fine, and there's no window system to worry about. > */ > if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) > - OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31); > + OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31); > else >OUT_BATCH(0); > ADVANCE_BATCH(); > diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c > b/src/mesa/drivers/dri/i965/brw_sf_state.c > index 014b434..1fa3d44 100644
Re: [Mesa-dev] [PATCH 23/23] st/mesa: enable shader subroutine
Yes, as long as it's core-only, dummy_true is fine. Marek On Fri, Apr 24, 2015 at 3:14 PM, Ilia Mirkin wrote: > The extension is currently marked as GLC, so that equates to dummy_true right? > > On Fri, Apr 24, 2015 at 9:00 AM, Brian Paul wrote: >> Yes, I second that. >> >> -Brian >> >> >> On 04/24/2015 04:29 AM, Marek Olšák wrote: >>> >>> I'd enable this if at least GLSL 1.30 was supported. I think drivers >>> like r300, nv30, i915, and svga shouldn't get this. >>> >>> Marek >>> >>> On Fri, Apr 24, 2015 at 3:42 AM, Dave Airlie wrote: From: Dave Airlie I'm not sure if we shouldn't enable this everywhere and rip out the API checks, discuss, Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_extensions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 25932dd..d22fc4f 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -598,6 +598,7 @@ void st_init_extensions(struct pipe_screen *screen, extensions->ARB_half_float_vertex = GL_TRUE; extensions->ARB_internalformat_query = GL_TRUE; extensions->ARB_map_buffer_range = GL_TRUE; + extensions->ARB_shader_subroutine = GL_TRUE; extensions->ARB_texture_border_clamp = GL_TRUE; /* XXX temp */ extensions->ARB_texture_cube_map = GL_TRUE; extensions->ARB_texture_env_combine = GL_TRUE; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8&m=XRYVFi3aEE45uP7Dd2aDjVFxFlW0ISsAIkGKCqEdgBg&s=l-01_HHXxTEZG4GOKVmF6iExtIt4HWaU7JFFrj5YaZ4&e= >>> >>> ___ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> >>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=T0t4QG7chq2ZwJo6wilkFznRSFy-8uDKartPGbomVj8&m=XRYVFi3aEE45uP7Dd2aDjVFxFlW0ISsAIkGKCqEdgBg&s=l-01_HHXxTEZG4GOKVmF6iExtIt4HWaU7JFFrj5YaZ4&e= >>> >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] Define constants and functions for ARB_framebuffer_no_attachment extension
This change will make the dispatch_sanity test fail. On Fri, Apr 24, 2015 at 3:05 AM, Matt Turner wrote: > The subject should be prefixed with "mesa:" > > On Thu, Apr 23, 2015 at 11:59 PM, wrote: >> From: Kevin Rogovin >> >> Define enumerations, functions and associated glGet's for >> extension ARB_framebuffer_no_attachment. >> >> --- >> .../glapi/gen/ARB_framebuffer_no_attachments.xml | 33 ++ >> src/mapi/glapi/gen/Makefile.am | 1 + >> src/mapi/glapi/gen/gl_API.xml | 1 + >> src/mesa/main/fbobject.c | 12 +++ >> src/mesa/main/fbobject.h | 7 >> src/mesa/main/get.c| 3 ++ >> src/mesa/main/get_hash_params.py | 40 >> ++ >> 7 files changed, 97 insertions(+) >> create mode 100644 src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >> >> diff --git a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >> b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >> new file mode 100644 >> index 000..60e40d0 >> --- /dev/null >> +++ b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >> @@ -0,0 +1,33 @@ >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am >> index 1c4b86a..9a0e944 100644 >> --- a/src/mapi/glapi/gen/Makefile.am >> +++ b/src/mapi/glapi/gen/Makefile.am >> @@ -130,6 +130,7 @@ API_XML = \ >> ARB_ES2_compatibility.xml \ >> ARB_ES3_compatibility.xml \ >> ARB_framebuffer_object.xml \ >> + ARB_framebuffer_no_attachments.xml \ > > This is an alphabetized list. Please keep it so. > >> ARB_geometry_shader4.xml \ >> ARB_get_program_binary.xml \ >> ARB_gpu_shader_fp64.xml \ >> diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml >> index a8a6db6..4eea396 100644 >> --- a/src/mapi/glapi/gen/gl_API.xml >> +++ b/src/mapi/glapi/gen/gl_API.xml >> @@ -8325,6 +8325,7 @@ >> >> >> >> +> xmlns:xi="http://www.w3.org/2001/XInclude"/> This is extension #130, so it should go above the comment, and the comment should be changed to just say . -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/fs: Strip trailing contant zeroes in sample messages
If a send message is emitted with a message length that is less than required for the message then the remaining parameters default to zero. We can take advantage of this to save a register when a shader passes constant zeroes as the final coordinates to the sample function. I think this might be useful for GLES applications that are using 2D textures to simulate 1D textures. On Skylake it will be useful for shaders that do texelFetch(tex,something,0) which I think is fairly common. This helps more on Skylake because in that case the order of the instruction operands are u,v,lod,r which is good for 2D textures whereas before they were u,lod,v,r which is only good for 1D textures. On Haswell: total instructions in shared programs: 8538662 -> 8537377 (-0.02%) instructions in affected programs: 193546 -> 192261 (-0.66%) helped:1032 On Skylake: total instructions in shared programs: 10336216 -> 10332976 (-0.03%) instructions in affected programs: 243118 -> 239878 (-1.33%) helped:1066 --- src/mesa/drivers/dri/i965/brw_fs.cpp | 43 src/mesa/drivers/dri/i965/brw_fs.h | 1 + 2 files changed, 44 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 61ee056..87a15b3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2536,6 +2536,48 @@ fs_visitor::opt_algebraic() } /** + * Optimize sample messages that have constant zero values for the trailing + * texture coordinates. We can just reduce the message length for these + * instructions instead of reserving a register for it. Trailing parameters + * that aren't sent default to zero anyway. This will cause the dead code + * eliminator to remove the MOV instruction that would otherwise be emitted to + * set up the zero value. + */ +bool +fs_visitor::opt_zero_samples() +{ + bool progress = false; + + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if ((inst->opcode == SHADER_OPCODE_TEX || + inst->opcode == SHADER_OPCODE_TXF) && + !inst->shadow_compare) { + fs_inst *load_payload = (fs_inst *) inst->prev; + + if (load_payload->is_head_sentinel() || + load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD) +continue; + + /* We don't want to remove the message header. Removing all of the + * parameters is avoided because it seems to cause a GPU hang but I + * can't find any documentation indicating that this is expected. + */ + while (inst->mlen > inst->header_present + dispatch_width / 8 && +load_payload->src[(inst->mlen - inst->header_present) / + (dispatch_width / 8) - 1].is_zero()) { +inst->mlen -= dispatch_width / 8; +progress = true; + } + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + +/** * Optimize sample messages which are followed by the final RT write. * * CHV, and GEN9+ can mark a texturing SEND instruction with EOT to have its @@ -3824,6 +3866,7 @@ fs_visitor::optimize() pass_num = 0; + OPT(opt_zero_samples); OPT(opt_sampler_eot); if (OPT(lower_load_payload)) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4e17d44..6200deb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -324,6 +324,7 @@ public: bool opt_peephole_predicated_break(); bool opt_saturate_propagation(); bool opt_cmod_propagation(); + bool opt_zero_samples(); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]); void emit_if_gen6(ir_if *ir); -- 1.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] Define constants and functions for ARB_framebuffer_no_attachment extension
Hi, I agree with the comments about the code (and when the last element of the series is reviewed I will submit the series with review comments taken into use), but when I applied just Patch 1 and Patch 2, and ran src/mesa/main/tests/main-test (after a git clean -dfx and all that cleaning) all test pass, in particular the 4 DispatchSanity_test's: DispatchSanity_test.GL31_CORE , DispatchSanity_test.GLES11, DispatchSanity_test.GLES2 and DispatchSanity_test.GLES3. In addition, make check passes all test as well. If you are referring to another test, what test is that? -Kevin -Original Message- From: ibmir...@gmail.com [mailto:ibmir...@gmail.com] On Behalf Of Ilia Mirkin Sent: Friday, April 24, 2015 4:36 PM To: Matt Turner Cc: Rogovin, Kevin; mesa-...@freedesktop.org Subject: Re: [Mesa-dev] [PATCH 2/7] Define constants and functions for ARB_framebuffer_no_attachment extension This change will make the dispatch_sanity test fail. On Fri, Apr 24, 2015 at 3:05 AM, Matt Turner wrote: > The subject should be prefixed with "mesa:" > > On Thu, Apr 23, 2015 at 11:59 PM, wrote: >> From: Kevin Rogovin >> >> Define enumerations, functions and associated glGet's for extension >> ARB_framebuffer_no_attachment. >> >> --- >> .../glapi/gen/ARB_framebuffer_no_attachments.xml | 33 ++ >> src/mapi/glapi/gen/Makefile.am | 1 + >> src/mapi/glapi/gen/gl_API.xml | 1 + >> src/mesa/main/fbobject.c | 12 +++ >> src/mesa/main/fbobject.h | 7 >> src/mesa/main/get.c| 3 ++ >> src/mesa/main/get_hash_params.py | 40 >> ++ >> 7 files changed, 97 insertions(+) >> create mode 100644 >> src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >> >> diff --git a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >> b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >> new file mode 100644 >> index 000..60e40d0 >> --- /dev/null >> +++ b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >> @@ -0,0 +1,33 @@ >> + >> + >> + >> + >> + >> + >> + >> + > +name="FRAMEBUFFER_DEFAULT_HEIGHT" value="0x9311" /> > +name="FRAMEBUFFER_DEFAULT_LAYERS" value="0x9312" /> > +name="FRAMEBUFFER_DEFAULT_SAMPLES" value="0x9313" /> > +name="FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS" value="0x9314" /> >> + > +name="MAX_FRAMEBUFFER_HEIGHT" value="0x9316" /> > +name="MAX_FRAMEBUFFER_LAYERS" value="0x9317" /> > +name="MAX_FRAMEBUFFER_SAMPLES" value="0x9318" /> >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> + >> diff --git a/src/mapi/glapi/gen/Makefile.am >> b/src/mapi/glapi/gen/Makefile.am index 1c4b86a..9a0e944 100644 >> --- a/src/mapi/glapi/gen/Makefile.am >> +++ b/src/mapi/glapi/gen/Makefile.am >> @@ -130,6 +130,7 @@ API_XML = \ >> ARB_ES2_compatibility.xml \ >> ARB_ES3_compatibility.xml \ >> ARB_framebuffer_object.xml \ >> + ARB_framebuffer_no_attachments.xml \ > > This is an alphabetized list. Please keep it so. > >> ARB_geometry_shader4.xml \ >> ARB_get_program_binary.xml \ >> ARB_gpu_shader_fp64.xml \ >> diff --git a/src/mapi/glapi/gen/gl_API.xml >> b/src/mapi/glapi/gen/gl_API.xml index a8a6db6..4eea396 100644 >> --- a/src/mapi/glapi/gen/gl_API.xml >> +++ b/src/mapi/glapi/gen/gl_API.xml >> @@ -8325,6 +8325,7 @@ >> >> >> >> +> +xmlns:xi="http://www.w3.org/2001/XInclude"/> This is extension #130, so it should go above the comment, and the comment should be changed to just say . -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] Define constants and functions for ARB_framebuffer_no_attachment extension
H that's surprising. src/mesa/main/tests/dispatch_sanity.cpp:// { "glFramebufferParameteri", 43, -1 }, // XXX: Add to xml I thought it should detect that there's a new API and complain loudly. At least that's how I remembered it working, but that doesn't seem to be the case? Are you sure you had a clean build? Either way, those should probably get uncommented, and there are probably interactions with ARB_dsa as well, should probably figure out if you or Laura should add support for that (or perhaps you had it in your patches already). -ilia On Fri, Apr 24, 2015 at 11:06 AM, Rogovin, Kevin wrote: > Hi, > > I agree with the comments about the code (and when the last element of the > series is reviewed I will submit the series with review comments taken into > use), but when I applied just Patch 1 and Patch 2, and ran > src/mesa/main/tests/main-test (after a git clean -dfx and all that cleaning) > all test pass, in particular the 4 DispatchSanity_test's: > DispatchSanity_test.GL31_CORE , DispatchSanity_test.GLES11, > DispatchSanity_test.GLES2 and DispatchSanity_test.GLES3. In addition, make > check passes all test as well. If you are referring to another test, what > test is that? > > -Kevin > > -Original Message- > From: ibmir...@gmail.com [mailto:ibmir...@gmail.com] On Behalf Of Ilia Mirkin > Sent: Friday, April 24, 2015 4:36 PM > To: Matt Turner > Cc: Rogovin, Kevin; mesa-...@freedesktop.org > Subject: Re: [Mesa-dev] [PATCH 2/7] Define constants and functions for > ARB_framebuffer_no_attachment extension > > This change will make the dispatch_sanity test fail. > > On Fri, Apr 24, 2015 at 3:05 AM, Matt Turner wrote: >> The subject should be prefixed with "mesa:" >> >> On Thu, Apr 23, 2015 at 11:59 PM, wrote: >>> From: Kevin Rogovin >>> >>> Define enumerations, functions and associated glGet's for extension >>> ARB_framebuffer_no_attachment. >>> >>> --- >>> .../glapi/gen/ARB_framebuffer_no_attachments.xml | 33 ++ >>> src/mapi/glapi/gen/Makefile.am | 1 + >>> src/mapi/glapi/gen/gl_API.xml | 1 + >>> src/mesa/main/fbobject.c | 12 +++ >>> src/mesa/main/fbobject.h | 7 >>> src/mesa/main/get.c| 3 ++ >>> src/mesa/main/get_hash_params.py | 40 >>> ++ >>> 7 files changed, 97 insertions(+) >>> create mode 100644 >>> src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >>> >>> diff --git a/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >>> b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >>> new file mode 100644 >>> index 000..60e40d0 >>> --- /dev/null >>> +++ b/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml >>> @@ -0,0 +1,33 @@ >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> + >> +name="FRAMEBUFFER_DEFAULT_HEIGHT" value="0x9311" /> >> +name="FRAMEBUFFER_DEFAULT_LAYERS" value="0x9312" /> >> +name="FRAMEBUFFER_DEFAULT_SAMPLES" value="0x9313" /> >> +name="FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS" value="0x9314" /> >>> + >> +name="MAX_FRAMEBUFFER_HEIGHT" value="0x9316" /> >> +name="MAX_FRAMEBUFFER_LAYERS" value="0x9317" /> >> +name="MAX_FRAMEBUFFER_SAMPLES" value="0x9318" /> >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> + >>> diff --git a/src/mapi/glapi/gen/Makefile.am >>> b/src/mapi/glapi/gen/Makefile.am index 1c4b86a..9a0e944 100644 >>> --- a/src/mapi/glapi/gen/Makefile.am >>> +++ b/src/mapi/glapi/gen/Makefile.am >>> @@ -130,6 +130,7 @@ API_XML = \ >>> ARB_ES2_compatibility.xml \ >>> ARB_ES3_compatibility.xml \ >>> ARB_framebuffer_object.xml \ >>> + ARB_framebuffer_no_attachments.xml \ >> >> This is an alphabetized list. Please keep it so. >> >>> ARB_geometry_shader4.xml \ >>> ARB_get_program_binary.xml \ >>> ARB_gpu_shader_fp64.xml \ >>> diff --git a/src/mapi/glapi/gen/gl_API.xml >>> b/src/mapi/glapi/gen/gl_API.xml index a8a6db6..4eea396 100644 >>> --- a/src/mapi/glapi/gen/gl_API.xml >>> +++ b/src/mapi/glapi/gen/gl_API.xml >>> @@ -8325,6 +8325,7 @@ >>> >>> >>> >>> +>> +xmlns:xi="http://www.w3.org/2001/XInclude"/> > > This is extension #130, so it should go above the comment, and the comment > should be changed to just say . > > -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/fs: Strip trailing contant zeroes in sample messages
On Fri, Apr 24, 2015 at 8:02 AM, Neil Roberts wrote: > If a send message is emitted with a message length that is less than > required for the message then the remaining parameters default to > zero. We can take advantage of this to save a register when a shader > passes constant zeroes as the final coordinates to the sample > function. > > I think this might be useful for GLES applications that are using 2D > textures to simulate 1D textures. > > On Skylake it will be useful for shaders that do > texelFetch(tex,something,0) which I think is fairly common. This helps > more on Skylake because in that case the order of the instruction > operands are u,v,lod,r which is good for 2D textures whereas before > they were u,lod,v,r which is only good for 1D textures. > > On Haswell: > total instructions in shared programs: 8538662 -> 8537377 (-0.02%) > instructions in affected programs: 193546 -> 192261 (-0.66%) > helped:1032 > > On Skylake: > total instructions in shared programs: 10336216 -> 10332976 (-0.03%) > instructions in affected programs: 243118 -> 239878 (-1.33%) > helped:1066 Neat! I never thought to try this. I have some vague memory that there are times when we purposefully emit a MOV 0 because of supposed hardware bugs, but looking at brw_fs_visitor.cpp, that's seems to only be the case on Gen4. So I think this is safe. > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 43 > > src/mesa/drivers/dri/i965/brw_fs.h | 1 + > 2 files changed, 44 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 61ee056..87a15b3 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -2536,6 +2536,48 @@ fs_visitor::opt_algebraic() > } > > /** > + * Optimize sample messages that have constant zero values for the trailing > + * texture coordinates. We can just reduce the message length for these > + * instructions instead of reserving a register for it. Trailing parameters > + * that aren't sent default to zero anyway. This will cause the dead code > + * eliminator to remove the MOV instruction that would otherwise be emitted > to > + * set up the zero value. > + */ > +bool > +fs_visitor::opt_zero_samples() > +{ > + bool progress = false; > + > + foreach_block_and_inst(block, fs_inst, inst, cfg) { > + if ((inst->opcode == SHADER_OPCODE_TEX || > + inst->opcode == SHADER_OPCODE_TXF) && > + !inst->shadow_compare) { > + fs_inst *load_payload = (fs_inst *) inst->prev; > + > + if (load_payload->is_head_sentinel() || > + load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD) > +continue; We can't guarantee that the load_payload isn't used by another texture later in the program, and since you need to change the texture operation's mlen, I think you need to check that the load_payload isn't used after this texture operation. To do that, (1) add an ip variable and initialize it to -1, (2) add ip++ as the first statement in the foreach_block_and_inst loop, (3) add some code to this check similar to in brw_fs_saturate_propagation.cpp using this->live_intervals. > + > + /* We don't want to remove the message header. Removing all of the > + * parameters is avoided because it seems to cause a GPU hang but I > + * can't find any documentation indicating that this is expected. > + */ > + while (inst->mlen > inst->header_present + dispatch_width / 8 && > +load_payload->src[(inst->mlen - inst->header_present) / > + (dispatch_width / 8) - 1].is_zero()) { > +inst->mlen -= dispatch_width / 8; > +progress = true; > + } > + } > + } > + > + if (progress) > + invalidate_live_intervals(); > + > + return progress; > +} > + > +/** > * Optimize sample messages which are followed by the final RT write. > * > * CHV, and GEN9+ can mark a texturing SEND instruction with EOT to have its > @@ -3824,6 +3866,7 @@ fs_visitor::optimize() > > pass_num = 0; > > + OPT(opt_zero_samples); I think you're probably right that this can be done after the optimization loop. I guess it's possible that we might trim a texture payload down and it'll then be the same as an existing payload and we can then CSE them. I'd be interested to see if putting it inside the optimization loop improves anything. > OPT(opt_sampler_eot); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/7] i965: use _mesa_geometry_width/height/layers/samples for programming geometry of framebuffer to GEN
On Fri, Apr 24, 2015 at 04:19:17PM +0300, Pohjolainen, Topi wrote: > On Fri, Apr 24, 2015 at 09:59:08AM +0300, kevin.rogo...@intel.com wrote: > > From: Kevin Rogovin > > > > To prepare for i965 to support ARB_framebuffer_no_attachment, use > > the convenience functions mesa_geometry_width/height/layers/samples > > to specify the geometry of the render target surfaces to the GPU. > > > > > > --- > > src/mesa/drivers/dri/i965/brw_clip_state.c | 9 - > > src/mesa/drivers/dri/i965/brw_misc_state.c | 12 -- > > src/mesa/drivers/dri/i965/brw_sf_state.c | 46 > > -- > > src/mesa/drivers/dri/i965/brw_state_upload.c | 7 +++- > > src/mesa/drivers/dri/i965/brw_wm.c | 7 ++-- > > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 -- > > src/mesa/drivers/dri/i965/gen6_clip_state.c| 11 -- > > src/mesa/drivers/dri/i965/gen6_multisample_state.c | 3 +- > > src/mesa/drivers/dri/i965/gen6_scissor_state.c | 14 +-- > > src/mesa/drivers/dri/i965/gen6_sf_state.c | 3 +- > > src/mesa/drivers/dri/i965/gen6_viewport_state.c| 3 +- > > src/mesa/drivers/dri/i965/gen6_wm_state.c | 3 +- > > src/mesa/drivers/dri/i965/gen7_sf_state.c | 3 +- > > src/mesa/drivers/dri/i965/gen7_viewport_state.c| 3 +- > > src/mesa/drivers/dri/i965/gen7_wm_state.c | 3 +- > > src/mesa/drivers/dri/i965/gen8_viewport_state.c| 9 +++-- > > 16 files changed, 108 insertions(+), 41 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c > > b/src/mesa/drivers/dri/i965/brw_clip_state.c > > index 3223834..3aa679f 100644 > > --- a/src/mesa/drivers/dri/i965/brw_clip_state.c > > +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c > > @@ -32,6 +32,7 @@ > > #include "brw_context.h" > > #include "brw_state.h" > > #include "brw_defines.h" > > +#include "main/framebuffer.h" > > > > static void > > upload_clip_vp(struct brw_context *brw) > > @@ -60,6 +61,10 @@ brw_upload_clip_unit(struct brw_context *brw) > > > > /* _NEW_BUFFERS */ > > struct gl_framebuffer *fb = ctx->DrawBuffer; > > + GLint fb_width, fb_height; > > + > > + fb_width = _mesa_geometric_width(fb); > > + fb_height = _mesa_geometric_height(fb); > > You defined _mesa_geometric_width() and _mesa_geometric_height() to return > unsigned, in principle we should use unsigned here also. But you actually > need them converted to floats so why not convert already the returned > value. Internally in the driver we also try to avoid using gl-types. There > is also no need to separate the declaration and definition of the > variables. I would write this as follows dropping the cast when using them. > > const float fb_width = (float)_mesa_geometric_width(fb); > const float fb_height = (float)_mesa_geometric_height(fb); > > Same applies to the rest of the patch. Actually I realized that you add quite a bit of support to gen4-6 logic that _isn't_ used for gen7 and higher. In the last patch of the series you claim to enable this only for gen7 and higher - I'm confused. > > > > > upload_clip_vp(brw); > > > > @@ -127,8 +132,8 @@ brw_upload_clip_unit(struct brw_context *brw) > > /* enable guardband clipping if we can */ > > if (ctx->ViewportArray[0].X == 0 && > > ctx->ViewportArray[0].Y == 0 && > > - ctx->ViewportArray[0].Width == (float) fb->Width && > > - ctx->ViewportArray[0].Height == (float) fb->Height) > > + ctx->ViewportArray[0].Width == (float) fb_width && > > + ctx->ViewportArray[0].Height == (float) fb_height) > > { > >clip->clip5.guard_band_enable = 1; > >clip->clip6.clipper_viewport_state_ptr = > > diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c > > b/src/mesa/drivers/dri/i965/brw_misc_state.c > > index 78a46cb..ef94a6e 100644 > > --- a/src/mesa/drivers/dri/i965/brw_misc_state.c > > +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c > > @@ -39,6 +39,7 @@ > > #include "brw_state.h" > > #include "brw_defines.h" > > > > +#include "main/framebuffer.h" > > #include "main/fbobject.h" > > #include "main/glformats.h" > > > > @@ -46,12 +47,17 @@ > > static void upload_drawing_rect(struct brw_context *brw) > > { > > struct gl_context *ctx = &brw->ctx; > > + GLint fb_width, fb_height; > > + struct gl_framebuffer *fb = ctx->DrawBuffer; > > Use 'const', you are only reading. > > > + > > + fb_width = _mesa_geometric_width(fb); > > + fb_height = _mesa_geometric_height(fb); > > > > BEGIN_BATCH(4); > > OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); > > OUT_BATCH(0); /* xmin, ymin */ > > - OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0x) | > > - ((ctx->DrawBuffer->Height - 1) << 16)); > > + OUT_BATCH(((fb_width - 1) & 0x) | > > + ((fb_height - 1) << 16)); > > OUT_BATCH(0); > > ADVANCE_BATCH(); > > } > > @@ -767,7 +773,7 @@ static void upload_polygon_stipple
Re: [Mesa-dev] [PATCH] i965/fs: Strip trailing contant zeroes in sample messages
In the title s/contant/constant/. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/7] i965: use _mesa_geometry_width/height/layers/samples for programming geometry of framebuffer to GEN
> Actually I realized that you add quite a bit of support to gen4-6 logic that > _isn't_ used for gen7 and higher. In the last patch of the series you claim > to enable this only for gen7 and higher - I'm confused. There are two reasons: 1. Because atoms get reused all the time across generations, it is just easier to use the _mesa_geomety_* functions in any batch buffer builder that is concerned about the geometry of the render target. It keeps the code consistent and much easier than checking what functions and atoms are directly or indirectly used by different Gens. However, blorp, blitting and a few others are left untouched since they want to talk about the buffer, not really 3D pipeline rasterization things. 2. At first I was going to support pre Gen7 hardware with the series. However, I do not have hardware on which to test it. In truth I want this to also run on pre-Gen7, but without testing on device, I cannot vouch for the patches. I believe it should just work for pre Gen7 (by just tweaking the last patch to enable it on pre Gen7), but I would rather be careful than in this case. I also confess, it is a silly extension for pre Gen7 anyways... -Kevin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader when fragment shader has atomic buffer access
On Fri, Apr 24, 2015 at 09:59:09AM +0300, kevin.rogo...@intel.com wrote: > From: Kevin Rogovin > > Ensure that the GPU spawns the fragment shader thread for those > fragment shaders with atomic buffer access. > > --- > src/mesa/drivers/dri/i965/gen7_wm_state.c | 7 +++ > src/mesa/drivers/dri/i965/gen8_ps_state.c | 4 > 2 files changed, 11 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c > b/src/mesa/drivers/dri/i965/gen7_wm_state.c > index 82e116c..fa04221 100644 > --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c > @@ -77,6 +77,13 @@ upload_wm_state(struct brw_context *brw) >dw1 |= GEN7_WM_KILL_ENABLE; > } > > + /* pixel shader must run if it has side-effects > +*/ One line comment style: /* pixel shader must run if it has side-effects */ > + if (brw->ctx.Shader._CurrentFragmentProgram!=NULL && Add spaces around != > + brw->ctx.Shader._CurrentFragmentProgram->NumAtomicBuffers > 0) { > + dw1 |= GEN7_WM_DISPATCH_ENABLE; > + } > + > /* _NEW_BUFFERS | _NEW_COLOR */ > if (brw_color_buffer_write_enabled(brw) || writes_depth || > dw1 & GEN7_WM_KILL_ENABLE) { > diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c > b/src/mesa/drivers/dri/i965/gen8_ps_state.c > index 5f39e12..614bc9b 100644 > --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c > +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c > @@ -62,6 +62,10 @@ upload_ps_extra(struct brw_context *brw) > if (prog_data->uses_omask) >dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET; > > + if (brw->ctx.Shader._CurrentFragmentProgram!=NULL && Same here. I'll check with Jordan and others. I have a faint recollection that compute shaders have similar needs. I think your change is fine though, I just want to understand the bigger picture first. > + brw->ctx.Shader._CurrentFragmentProgram->NumAtomicBuffers > 0) > + dw1 |= GEN8_PSX_SHADER_HAS_UAV; > + > BEGIN_BATCH(2); > OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); > OUT_BATCH(dw1); > -- > 1.9.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/7] i965: use _mesa_geometry_width/height/layers/samples for programming geometry of framebuffer to GEN
On Fri, Apr 24, 2015 at 07:02:18PM +0300, Rogovin, Kevin wrote: > > > Actually I realized that you add quite a bit of support to gen4-6 logic > > that > > _isn't_ used for gen7 and higher. In the last patch of the series you claim > > to enable this only for gen7 and higher - I'm confused. > > There are two reasons: > 1. Because atoms get reused all the time across generations, it is just > easier to use > the _mesa_geomety_* functions in any batch buffer builder that is concerned > about the geometry of the render target. It keeps the code consistent and > much > easier than checking what functions and atoms are directly or indirectly used > by > different Gens. However, blorp, blitting and a few others are left untouched > since > they want to talk about the buffer, not really 3D pipeline rasterization > things. > My point specifically was that you are also updating atoms that _are not_ re-used. And as those changes are not really needed, I wouldn't take the risk of changing something in vain. I would introduce them only when you have patches to really enable older generations. > 2. At first I was going to support pre Gen7 hardware with the series. However, > I do not have hardware on which to test it. In truth I want this to also run > on > pre-Gen7, but without testing on device, I cannot vouch for the patches. > I believe it should just work for pre Gen7 (by just tweaking the last patch > to > enable it on pre Gen7), but I would rather be careful than in this case. I > also > confess, it is a silly extension for pre Gen7 anyways... > > -Kevin > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/7] i965: use _mesa_geometry_width/height/layers/samples for programming geometry of framebuffer to GEN
> My point specifically was that you are also updating atoms that _are not_ > re-used. > And as those changes are not really needed, I wouldn't take the risk of > changing > something in vain. I would introduce them only when you have patches to > really enable older generations. My take is the following: 1. Tracking (and guaranteeing) that those function left unchanged as is are exactly just those for before Gen7 is a pain. Much easier, and more reliable to hit them all instead. A significant number of functions in i965 are not emit functions of any atom but emit functions of atoms map to them. Again, more reliable and -safer- to change them all, then just the bare minimum. 2. The change is benign. If _HasAttachments is true, then the function substitution gives the same value. For Gens not supporting the extension there is no effect. 3. Lastly, as stated: for later it leaves the option to enable it for Gen6 and below, it is just trivial change, but it needs testing on hardware. When I writing this work, I originally had it for all Gens, but changed to support only Gen7and higher because that is all on which I can test it. -Kevin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader when fragment shader has atomic buffer access
> I'll check with Jordan and others. I have a faint recollection that compute > shaders have similar > needs. I think your change is fine though, I just want to understand the > bigger picture first. I do not think compute shaders have similar needs. These flags are for making sure the rasterizer-wm thingy in Gen spawns the fragment shader threads. Compute kernels are not (I believe) spawned by the raster-wm thing, as they do not actually use the pipeline (rather they use L3, samplers and EU's only essentially). -Kevin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] i965: Add viewport extents (gen8) to batch decode
On Thu, Apr 23, 2015 at 04:49:58PM -0700, Ben Widawsky wrote: > 0x7da0: 0xc1da740e: SF_CLIP VP: guardband xmin = -27.306667 > 0x7da4: 0x41da740e: SF_CLIP VP: guardband xmax = 27.306667 > 0x7da4: 0x41da740e: SF_CLIP VP: guardband ymin = -23.405714 > 0x7da8: 0xc1bb3ee7: SF_CLIP VP: guardband ymax = 23.405714 > 0x7db0: 0x: SF_CLIP VP: Min extents: 0.00x0.00 > 0x7db8: 0x: SF_CLIP VP: Max extents: 299.00x349.00 > > While here, fix the wrong offsets for the guardband (I didn't check if it used > to be valid on GEN4). Confirmed from PRM, dwords 6 and 7 are reserved. > > Signed-off-by: Ben Widawsky > --- > src/mesa/drivers/dri/i965/brw_defines.h| 1 + > src/mesa/drivers/dri/i965/brw_state_dump.c | 15 +++ > 2 files changed, 12 insertions(+), 4 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index bd3218a..e37d2e0 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -38,6 +38,7 @@ >fieldval & field ## _MASK;\ > }) > > +#define GET_BITS(data, high, low) ((data & INTEL_MASK(high, low)) >> low) Leftover for something else, at least not used anywhere here? And if you need it, guard the arguments with () (even though 'low' and 'high' are likely to be compile time constants. #define GET_BITS(data, high, low) \ (((data) & INTEL_MASK((high), (low))) >> (low)) Otherwise: Reviewed-by: Topi Pohjolainen > #define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## > _SHIFT) > > #ifndef BRW_DEFINES_H > diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c > b/src/mesa/drivers/dri/i965/brw_state_dump.c > index a688ba3..89c1a29 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_dump.c > +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c > @@ -532,10 +532,17 @@ static void dump_sf_clip_viewport_state(struct > brw_context *brw, > batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); > batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); > batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); > - batch_out(brw, name, offset, 6, "guardband xmin = %f\n", > vp->guardband.xmin); > - batch_out(brw, name, offset, 7, "guardband xmax = %f\n", > vp->guardband.xmax); > - batch_out(brw, name, offset, 8, "guardband ymin = %f\n", > vp->guardband.ymin); > - batch_out(brw, name, offset, 9, "guardband ymax = %f\n", > vp->guardband.ymax); > + batch_out(brw, name, offset, 8, "guardband xmin = %f\n", > vp->guardband.xmin); > + batch_out(brw, name, offset, 9, "guardband xmax = %f\n", > vp->guardband.xmax); > + batch_out(brw, name, offset, 9, "guardband ymin = %f\n", > vp->guardband.ymin); > + batch_out(brw, name, offset, 10, "guardband ymax = %f\n", > vp->guardband.ymax); > + if (brw->gen >= 8) { > + float *cc_vp = brw->batch.bo->virtual + offset; > + batch_out(brw, name, offset, 12, "Min extents: %.2fx%.2f\n", > +cc_vp[12], cc_vp[14]); > + batch_out(brw, name, offset, 14, "Max extents: %.2fx%.2f\n", > +cc_vp[13], cc_vp[15]); > + } > } > > > -- > 2.3.6 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V2 03/22] i965: Move intel_miptree_choose_tiling() to brw_tex_layout.c
On Thu, Apr 23, 2015 at 11:38 AM, Pohjolainen, Topi wrote: > On Fri, Apr 17, 2015 at 04:51:24PM -0700, Anuj Phogat wrote: >> Patch continues code refactoring. >> >> Signed-off-by: Anuj Phogat >> --- >> src/mesa/drivers/dri/i965/brw_tex_layout.c| 105 >> ++ >> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 104 >> - >> src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 8 -- >> 3 files changed, 105 insertions(+), 112 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c >> b/src/mesa/drivers/dri/i965/brw_tex_layout.c >> index b8408d3..08ef7a6 100644 >> --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c >> +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c >> @@ -377,6 +377,111 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, >> align_cube(mt); >> } >> >> +/** >> + * \brief Helper function for intel_miptree_create(). >> + */ >> +static uint32_t >> +intel_miptree_choose_tiling(struct brw_context *brw, > > All the other functions in this file use "brw_miptree"-prefix, perhaps > this should be as well? > I'll add the prefix. Many static functions in the file don't use brw_miptree prefix. e.g: intel_horizontal_texture_alignment_unit intel_vertical_texture_alignment_unit gen9_miptree_layout_1d use_linear_1d_layout It might be a good idea to add this prefix to all of them. I'll to do this in a separate series. >> +mesa_format format, >> +uint32_t width0, >> +uint32_t num_samples, >> +enum intel_miptree_tiling_mode requested, >> +struct intel_mipmap_tree *mt) > > You could change both 'brw' and 'mt' to constant pointers, they are only > used for reading. > Consider fixed. > With that: > > Reviewed-by: Topi Pohjolainen > >> +{ >> + if (format == MESA_FORMAT_S_UINT8) { >> + /* The stencil buffer is W tiled. However, we request from the kernel >> a >> + * non-tiled buffer because the GTT is incapable of W fencing. >> + */ >> + return I915_TILING_NONE; >> + } >> + >> + /* Some usages may want only one type of tiling, like depth miptrees (Y >> +* tiled), or temporary BOs for uploading data once (linear). >> +*/ >> + switch (requested) { >> + case INTEL_MIPTREE_TILING_ANY: >> + break; >> + case INTEL_MIPTREE_TILING_Y: >> + return I915_TILING_Y; >> + case INTEL_MIPTREE_TILING_NONE: >> + return I915_TILING_NONE; >> + } >> + >> + if (num_samples > 1) { >> + /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled >> + * Surface"): >> + * >> + * [DevSNB+]: For multi-sample render targets, this field must be >> + * 1. MSRTs can only be tiled. >> + * >> + * Our usual reason for preferring X tiling (fast blits using the >> + * blitting engine) doesn't apply to MSAA, since we'll generally be >> + * downsampling or upsampling when blitting between the MSAA buffer >> + * and another buffer, and the blitting engine doesn't support that. >> + * So use Y tiling, since it makes better use of the cache. >> + */ >> + return I915_TILING_Y; >> + } >> + >> + GLenum base_format = _mesa_get_format_base_format(format); >> + if (base_format == GL_DEPTH_COMPONENT || >> + base_format == GL_DEPTH_STENCIL_EXT) >> + return I915_TILING_Y; >> + >> + /* 1D textures (and 1D array textures) don't get any benefit from tiling, >> +* in fact it leads to a less efficient use of memory space and bandwidth >> +* due to tile alignment. >> +*/ >> + if (mt->logical_height0 == 1) >> + return I915_TILING_NONE; >> + >> + int minimum_pitch = mt->total_width * mt->cpp; >> + >> + /* If the width is much smaller than a tile, don't bother tiling. */ >> + if (minimum_pitch < 64) >> + return I915_TILING_NONE; >> + >> + if (ALIGN(minimum_pitch, 512) >= 32768 || >> + mt->total_width >= 32768 || mt->total_height >= 32768) { >> + perf_debug("%dx%d miptree too large to blit, falling back to untiled", >> + mt->total_width, mt->total_height); >> + return I915_TILING_NONE; >> + } >> + >> + /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */ >> + if (brw->gen < 6) >> + return I915_TILING_X; >> + >> + /* From the Sandybridge PRM, Volume 1, Part 2, page 32: >> +* "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either >> TileX >> +* or Linear." >> +* 128 bits per pixel translates to 16 bytes per pixel. This is necessary >> +* all the way back to 965, but is permitted on Gen7+. >> +*/ >> + if (brw->gen < 7 && mt->cpp >= 16) >> + return I915_TILING_X; >> + >> + /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most >> +* messages), on p64, under the heading "Surface Vertical Alignment": >> +* >> +* This field must b
Re: [Mesa-dev] [PATCH 3/7] i965: Add gen7+ sampler state to batch debug
On Thu, Apr 23, 2015 at 04:49:59PM -0700, Ben Widawsky wrote: > OLD: > 0x7e00: 0x1000: WM SAMP0: filtering > 0x7e04: 0x000d: WM SAMP0: wrapping, lod > 0x7e08: 0x: WM SAMP0: default color pointer > 0x7e0c: 0x0090: WM SAMP0: chroma key, aniso > > NEW: > 0x7e00: 0x1000: SAMPLER_STATE 0: Disabled = no, Base Mip: 0.0, > Mip/Mag/Min Filter: NONE/NEAREST/NEAREST, LOD Bias: 0.0 > 0x7e04: 0x000d: SAMPLER_STATE 0: Min LOD: 0.0, Max LOD: 13.0 > 0x7e08: 0x: SAMPLER_STATE 0: Border Color > 0x7e0c: 0x0090: SAMPLER_STATE 0: Max aniso: RATIO 2:1, TC[XYZ] > Address Control: CLAMP|CLAMP|WRAP > > Signed-off-by: Ben Widawsky Reviewed-by: Topi Pohjolainen > --- > src/mesa/drivers/dri/i965/brw_state_dump.c | 71 > +- > 1 file changed, 70 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c > b/src/mesa/drivers/dri/i965/brw_state_dump.c > index 89c1a29..21a3d8f 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_dump.c > +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c > @@ -32,6 +32,33 @@ > #include "brw_defines.h" > #include "brw_eu.h" > > +static const char *sampler_mip_filter[] = { > + "NONE", > + "NEAREST", > + "RSVD", > + "LINEAR" > +}; > + > +static const char *sampler_mag_filter[] = { > + "NEAREST", > + "LINEAR", > + "ANISOTROPIC", > + "FLEXIBLE (GEN8+)", > + "RSVD", "RSVD", > + "MONO", > + "RSVD" > +}; > + > +static const char *sampler_addr_mode[] = { > + "WRAP", > + "MIRROR", > + "CLAMP", > + "CUBE", > + "CLAMP_BORDER", > + "MIRROR_ONCE", > + "HALF_BORDER" > +}; > + > static void > batch_out(struct brw_context *brw, const char *name, uint32_t offset, > int index, char *fmt, ...) PRINTFLIKE(5, 6); > @@ -483,6 +510,45 @@ static void dump_sampler_state(struct brw_context *brw, > } > } > > +static void gen7_dump_sampler_state(struct brw_context *brw, > +uint32_t offset, uint32_t size) > +{ > + uint32_t *samp = brw->batch.bo->virtual + offset; > + char name[20]; > + > + for (int i = 0; i < size / 16; i++) { > + sprintf(name, "SAMPLER_STATE %d", i); > + batch_out(brw, name, offset, i, > +"Disabled = %s, Base Mip: %u.%u, Mip/Mag/Min Filter: > %s/%s/%s, LOD Bias: %d.%d\n", > +GET_BITS(samp[0], 31, 31) ? "yes" : "no", > +GET_BITS(samp[0], 26, 23), > +GET_BITS(samp[0], 22, 22), > +sampler_mip_filter[GET_FIELD(samp[0], > BRW_SAMPLER_MIP_FILTER)], > +sampler_mag_filter[GET_FIELD(samp[0], > BRW_SAMPLER_MAG_FILTER)], > +/* min filter defs are the same as mag */ > +sampler_mag_filter[GET_FIELD(samp[0], > BRW_SAMPLER_MIN_FILTER)], > +GET_BITS(samp[0], 13, 10), > +GET_BITS(samp[0], 9, 1) > + ); > + batch_out(brw, name, offset, i+1, "Min LOD: %u.%u, Max LOD: %u.%u\n", > +GET_BITS(samp[1], 31, 28), > +GET_BITS(samp[1], 27, 20), > +GET_BITS(samp[1], 19, 16), > +GET_BITS(samp[1], 15, 8) > + ); > + batch_out(brw, name, offset, i+2, "Border Color\n"); /* FINISHME: > gen8+ */ > + batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] > Address Control: %s|%s|%s\n", > +(GET_FIELD(samp[3], BRW_SAMPLER_MAX_ANISOTROPY) + 1) * 2, > +sampler_addr_mode[GET_FIELD(samp[3], > BRW_SAMPLER_TCX_WRAP_MODE)], > +sampler_addr_mode[GET_FIELD(samp[3], > BRW_SAMPLER_TCY_WRAP_MODE)], > +sampler_addr_mode[GET_FIELD(samp[3], > BRW_SAMPLER_TCZ_WRAP_MODE)] > + ); > + > + samp += 4; > + offset += 4 * sizeof(uint32_t); > + } > +} > + > static void dump_sf_viewport_state(struct brw_context *brw, > uint32_t offset) > { > @@ -784,7 +850,10 @@ dump_state_batch(struct brw_context *brw) >} >break; >case AUB_TRACE_SAMPLER_STATE: > - dump_sampler_state(brw, offset, size); > + if (brw->gen >= 7) > +gen7_dump_sampler_state(brw, offset, size); > + else > +dump_sampler_state(brw, offset, size); >break; >case AUB_TRACE_SAMPLER_DEFAULT_COLOR: >dump_sdc(brw, offset); > -- > 2.3.6 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/fs: Strip trailing contant zeroes in sample messages
On Friday, April 24, 2015 08:02:58 AM Neil Roberts wrote: > If a send message is emitted with a message length that is less than > required for the message then the remaining parameters default to > zero. We can take advantage of this to save a register when a shader > passes constant zeroes as the final coordinates to the sample > function. > > I think this might be useful for GLES applications that are using 2D > textures to simulate 1D textures. > > On Skylake it will be useful for shaders that do > texelFetch(tex,something,0) which I think is fairly common. This helps > more on Skylake because in that case the order of the instruction > operands are u,v,lod,r which is good for 2D textures whereas before > they were u,lod,v,r which is only good for 1D textures. > > On Haswell: > total instructions in shared programs: 8538662 -> 8537377 (-0.02%) > instructions in affected programs: 193546 -> 192261 (-0.66%) > helped:1032 > > On Skylake: > total instructions in shared programs: 10336216 -> 10332976 (-0.03%) > instructions in affected programs: 243118 -> 239878 (-1.33%) > helped:1066 > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 43 > > src/mesa/drivers/dri/i965/brw_fs.h | 1 + > 2 files changed, 44 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 61ee056..87a15b3 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -2536,6 +2536,48 @@ fs_visitor::opt_algebraic() > } > > /** > + * Optimize sample messages that have constant zero values for the trailing > + * texture coordinates. We can just reduce the message length for these > + * instructions instead of reserving a register for it. Trailing parameters > + * that aren't sent default to zero anyway. This will cause the dead code > + * eliminator to remove the MOV instruction that would otherwise be emitted > to > + * set up the zero value. > + */ > +bool > +fs_visitor::opt_zero_samples() > +{ > + bool progress = false; > + > + foreach_block_and_inst(block, fs_inst, inst, cfg) { > + if ((inst->opcode == SHADER_OPCODE_TEX || > + inst->opcode == SHADER_OPCODE_TXF) && > + !inst->shadow_compare) { I like this idea! We definitely need to skip this optimization on Gen4, since the Gen4/G45 sampler infers the texturing opcode based on the message length. But for Gen5+, it should be no problem. Matt mentioned that we have to emit zero in some cases due to hardware bugs. IIRC, we used to skip some parameters in the middle - i.e. if the message took "u, v, r, lod"...and we were using a 2D texture...we'd omit 'r', since it shouldn't matter. But it did matter - and had to be zeroed. I think skipping ones at the end and reducing mlen should be fine. Why not do this for all texture messages, though? Or for that matter, all messages? inst->is_tex() or inst->mlen > 0 might make sense. > + fs_inst *load_payload = (fs_inst *) inst->prev; > + > + if (load_payload->is_head_sentinel() || > + load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD) > +continue; > + > + /* We don't want to remove the message header. Removing all of the > + * parameters is avoided because it seems to cause a GPU hang but I > + * can't find any documentation indicating that this is expected. > + */ > + while (inst->mlen > inst->header_present + dispatch_width / 8 && > +load_payload->src[(inst->mlen - inst->header_present) / > + (dispatch_width / 8) - 1].is_zero()) { > +inst->mlen -= dispatch_width / 8; > +progress = true; > + } Another idea...you could just create a new LOAD_PAYLOAD for what you want, and leave the old one in place just in case it's used (with the assumption that it's probably not, and dead code elimination will make it go away). Just a suggestion. > + } > + } > + > + if (progress) > + invalidate_live_intervals(); > + > + return progress; > +} > + > +/** > * Optimize sample messages which are followed by the final RT write. > * > * CHV, and GEN9+ can mark a texturing SEND instruction with EOT to have its > @@ -3824,6 +3866,7 @@ fs_visitor::optimize() > > pass_num = 0; > > + OPT(opt_zero_samples); > OPT(opt_sampler_eot); > > if (OPT(lower_load_payload)) { > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 4e17d44..6200deb 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -324,6 +324,7 @@ public: > bool opt_peephole_predicated_break(); > bool opt_saturate_propagation(); > bool opt_cmod_propagation(); > + bool opt_zero_samples(); > void emit_bool_to_cond_code(ir_rvalue *condition); > void emit_
Re: [Mesa-dev] [PATCH 3/7] i965: Add gen7+ sampler state to batch debug
On Fri, Apr 24, 2015 at 08:20:49PM +0300, Pohjolainen, Topi wrote: > On Thu, Apr 23, 2015 at 04:49:59PM -0700, Ben Widawsky wrote: > > OLD: > > 0x7e00: 0x1000: WM SAMP0: filtering > > 0x7e04: 0x000d: WM SAMP0: wrapping, lod > > 0x7e08: 0x: WM SAMP0: default color pointer > > 0x7e0c: 0x0090: WM SAMP0: chroma key, aniso > > > > NEW: > > 0x7e00: 0x1000: SAMPLER_STATE 0: Disabled = no, Base Mip: 0.0, > > Mip/Mag/Min Filter: NONE/NEAREST/NEAREST, LOD Bias: 0.0 > > 0x7e04: 0x000d: SAMPLER_STATE 0: Min LOD: 0.0, Max LOD: 13.0 > > 0x7e08: 0x: SAMPLER_STATE 0: Border Color > > 0x7e0c: 0x0090: SAMPLER_STATE 0: Max aniso: RATIO 2:1, TC[XYZ] > > Address Control: CLAMP|CLAMP|WRAP > > > > Signed-off-by: Ben Widawsky > > Reviewed-by: Topi Pohjolainen > > > --- > > src/mesa/drivers/dri/i965/brw_state_dump.c | 71 > > +- > > 1 file changed, 70 insertions(+), 1 deletion(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c > > b/src/mesa/drivers/dri/i965/brw_state_dump.c > > index 89c1a29..21a3d8f 100644 > > --- a/src/mesa/drivers/dri/i965/brw_state_dump.c > > +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c > > @@ -32,6 +32,33 @@ > > #include "brw_defines.h" > > #include "brw_eu.h" > > > > +static const char *sampler_mip_filter[] = { > > + "NONE", > > + "NEAREST", > > + "RSVD", > > + "LINEAR" > > +}; > > + > > +static const char *sampler_mag_filter[] = { > > + "NEAREST", > > + "LINEAR", > > + "ANISOTROPIC", > > + "FLEXIBLE (GEN8+)", > > + "RSVD", "RSVD", > > + "MONO", > > + "RSVD" > > +}; > > + > > +static const char *sampler_addr_mode[] = { > > + "WRAP", > > + "MIRROR", > > + "CLAMP", > > + "CUBE", > > + "CLAMP_BORDER", > > + "MIRROR_ONCE", > > + "HALF_BORDER" > > +}; > > + > > static void > > batch_out(struct brw_context *brw, const char *name, uint32_t offset, > > int index, char *fmt, ...) PRINTFLIKE(5, 6); > > @@ -483,6 +510,45 @@ static void dump_sampler_state(struct brw_context *brw, > > } > > } > > > > +static void gen7_dump_sampler_state(struct brw_context *brw, > > +uint32_t offset, uint32_t size) > > +{ > > + uint32_t *samp = brw->batch.bo->virtual + offset; Sorry, small nit, use const here, we are only reading. > > + char name[20]; > > + > > + for (int i = 0; i < size / 16; i++) { > > + sprintf(name, "SAMPLER_STATE %d", i); > > + batch_out(brw, name, offset, i, > > +"Disabled = %s, Base Mip: %u.%u, Mip/Mag/Min Filter: > > %s/%s/%s, LOD Bias: %d.%d\n", > > +GET_BITS(samp[0], 31, 31) ? "yes" : "no", > > +GET_BITS(samp[0], 26, 23), > > +GET_BITS(samp[0], 22, 22), > > +sampler_mip_filter[GET_FIELD(samp[0], > > BRW_SAMPLER_MIP_FILTER)], > > +sampler_mag_filter[GET_FIELD(samp[0], > > BRW_SAMPLER_MAG_FILTER)], > > +/* min filter defs are the same as mag */ > > +sampler_mag_filter[GET_FIELD(samp[0], > > BRW_SAMPLER_MIN_FILTER)], > > +GET_BITS(samp[0], 13, 10), > > +GET_BITS(samp[0], 9, 1) > > + ); > > + batch_out(brw, name, offset, i+1, "Min LOD: %u.%u, Max LOD: %u.%u\n", > > +GET_BITS(samp[1], 31, 28), > > +GET_BITS(samp[1], 27, 20), > > +GET_BITS(samp[1], 19, 16), > > +GET_BITS(samp[1], 15, 8) > > + ); > > + batch_out(brw, name, offset, i+2, "Border Color\n"); /* FINISHME: > > gen8+ */ > > + batch_out(brw, name, offset, i+3, "Max aniso: RATIO %d:1, TC[XYZ] > > Address Control: %s|%s|%s\n", > > +(GET_FIELD(samp[3], BRW_SAMPLER_MAX_ANISOTROPY) + 1) * 2, > > +sampler_addr_mode[GET_FIELD(samp[3], > > BRW_SAMPLER_TCX_WRAP_MODE)], > > +sampler_addr_mode[GET_FIELD(samp[3], > > BRW_SAMPLER_TCY_WRAP_MODE)], > > +sampler_addr_mode[GET_FIELD(samp[3], > > BRW_SAMPLER_TCZ_WRAP_MODE)] > > + ); > > + > > + samp += 4; > > + offset += 4 * sizeof(uint32_t); > > + } > > +} > > + > > static void dump_sf_viewport_state(struct brw_context *brw, > >uint32_t offset) > > { > > @@ -784,7 +850,10 @@ dump_state_batch(struct brw_context *brw) > > } > > break; > >case AUB_TRACE_SAMPLER_STATE: > > - dump_sampler_state(brw, offset, size); > > + if (brw->gen >= 7) > > +gen7_dump_sampler_state(brw, offset, size); > > + else > > +dump_sampler_state(brw, offset, size); > > break; > >case AUB_TRACE_SAMPLER_DEFAULT_COLOR: > > dump_sdc(brw, offset); > > -- > > 2.3.6 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.
Re: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader when fragment shader has atomic buffer access
On Friday, April 24, 2015 09:59:09 AM kevin.rogo...@intel.com wrote: > From: Kevin Rogovin > > Ensure that the GPU spawns the fragment shader thread for those > fragment shaders with atomic buffer access. > > --- > src/mesa/drivers/dri/i965/gen7_wm_state.c | 7 +++ > src/mesa/drivers/dri/i965/gen8_ps_state.c | 4 > 2 files changed, 11 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c > b/src/mesa/drivers/dri/i965/gen7_wm_state.c > index 82e116c..fa04221 100644 > --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c > @@ -77,6 +77,13 @@ upload_wm_state(struct brw_context *brw) >dw1 |= GEN7_WM_KILL_ENABLE; > } > > + /* pixel shader must run if it has side-effects > +*/ > + if (brw->ctx.Shader._CurrentFragmentProgram!=NULL && > + brw->ctx.Shader._CurrentFragmentProgram->NumAtomicBuffers > 0) { > + dw1 |= GEN7_WM_DISPATCH_ENABLE; > + } > + Hi Kevin, Checking brw->ctx.Shader._CurrentFragmentProgram != NULL is unnecessary. There is always a valid pixel shader. (If the application is using fixed-function, we supply a fragment shader for them.) Please drop that check. Also, this patch conflicts with Curro's ARB_image_load_store series - he was also setting the UAV bits. We'll have to sort out which should land first. Yours is smaller, but I think he did this in a more complete manner... > /* _NEW_BUFFERS | _NEW_COLOR */ > if (brw_color_buffer_write_enabled(brw) || writes_depth || > dw1 & GEN7_WM_KILL_ENABLE) { > diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c > b/src/mesa/drivers/dri/i965/gen8_ps_state.c > index 5f39e12..614bc9b 100644 > --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c > +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c > @@ -62,6 +62,10 @@ upload_ps_extra(struct brw_context *brw) > if (prog_data->uses_omask) >dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET; > > + if (brw->ctx.Shader._CurrentFragmentProgram!=NULL && > + brw->ctx.Shader._CurrentFragmentProgram->NumAtomicBuffers > 0) > + dw1 |= GEN8_PSX_SHADER_HAS_UAV; > + I thought that UAVs were essentially for Images...I'm not clear why this is needed. Perhaps Curro can confirm one way or another. > BEGIN_BATCH(2); > OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); > OUT_BATCH(dw1); > signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/7] i965: Add gen8 surface state debug info
On Thu, Apr 23, 2015 at 04:50:00PM -0700, Ben Widawsky wrote: > AFAICT, none of the old data was wrong (the gen7 decoder), but it wa smissing > a > bunch of stuff. > > Adds a tick (') to denote the beginning of the surface state for easier > reading. > This will be replaced later with some better, but more risky code. > > OLD: > 0x7980: 0x23016000: SURF: 2D BRW_SURFACEFORMAT_B8G8R8A8_UNORM > 0x7984: 0x1800: SURF: offset > 0x7988: 0x00ff00ff: SURF: 256x256 size, 0 mips, 1 slices > 0x798c: 0x03ff: SURF: pitch 1024, tiled > 0x7990: 0x: SURF: min array element 0, array extent 1 > 0x7994: 0x: SURF: mip base 0 > 0x7998: 0x: SURF: x,y offset: 0,0 > 0x799c: 0x0977: SURF: > 0x7940: 0x231d7000: SURF: 2D BRW_SURFACEFORMAT_R8G8B8A8_UNORM > 0x7944: 0x7800: SURF: offset > 0x7948: 0x001f001f: SURF: 32x32 size, 0 mips, 1 slices > 0x794c: 0x007f: SURF: pitch 128, tiled > 0x7950: 0x: SURF: min array element 0, array extent 1 > 0x7954: 0x: SURF: mip base 0 > 0x7958: 0x: SURF: x,y offset: 0,0 > 0x795c: 0x0977: SURF: > > NEW: > 0x7980: 0x23016000:SURF': 2D B8G8R8A8_UNORM VALIGN4 HALIGN4 > X-tiled > 0x7984: 0x1800: SURF: MOCS: 0x18 Base MIP: 0.0 (0 mips) > Surface QPitch: 0 > 0x7988: 0x00ff00ff: SURF: 256x256 [AUX_NONE] > 0x798c: 0x03ff: SURF: 1 slices (depth), pitch: 1024 > 0x7990: 0x: SURF: min array element: 0, array extent 1, > MULTISAMPLE_1 > 0x7994: 0x: SURF: x,y offset: 0,0, min LOD: 0 > 0x7998: 0x: SURF: AUX pitch: 0 qpitch: 0 > 0x799c: 0x0977: SURF: Clear color: > 0x7940: 0x231d7000:SURF': 2D R8G8B8A8_UNORM VALIGN4 HALIGN4 > Y-tiled > 0x7944: 0x7800: SURF: MOCS: 0x78 Base MIP: 0 (0 mips) > Surface QPitch: ff > 0x7948: 0x001f001f: SURF: 32x32 [AUX_NONE] > 0x794c: 0x007f: SURF: 1 slices (depth), pitch: 128 > 0x7950: 0x: SURF: min array element: 0, array extent 1, > MULTISAMPLE_1 > 0x7954: 0x: SURF: x,y offset: 0,0, min LOD: 0 > 0x7958: 0x: SURF: AUX pitch: 0 qpitch: 0 > 0x795c: 0x0977: SURF: Clear color: > 0x7920: 0x7980:BIND0: surface state address > 0x7924: 0x7940:BIND1: surface state address > > Signed-off-by: Ben Widawsky > --- > src/mesa/drivers/dri/i965/brw_defines.h| 4 +- > src/mesa/drivers/dri/i965/brw_state_dump.c | 86 > -- > 2 files changed, 85 insertions(+), 5 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index e37d2e0..b9aae29 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -529,9 +529,11 @@ > #define GEN7_SURFACE_ARYSPC_FULL (0 << 10) > #define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10) > > -/* Surface state DW0 */ > +/* Surface state DW1 */ > #define GEN8_SURFACE_MOCS_SHIFT 24 > #define GEN8_SURFACE_MOCS_MASK INTEL_MASK(30, 24) > +#define GEN8_SURFACE_QPITCH_SHIFT 0 > +#define GEN8_SURFACE_QPITCH_MASKINTEL_MASK(14, 0) > > /* Surface state DW2 */ > #define BRW_SURFACE_HEIGHT_SHIFT 19 > diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c > b/src/mesa/drivers/dri/i965/brw_state_dump.c > index 21a3d8f..642bdc8 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_dump.c > +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c > @@ -59,6 +59,22 @@ static const char *sampler_addr_mode[] = { > "HALF_BORDER" > }; > > +static const char *surface_tiling[] = { > + "LINEAR", > + "W-tiled", > + "X-tiled", > + "Y-tiled" > +}; > + > +static const char *surface_aux_mode[] = { > + "AUX_NONE", > + "AUX_MCS", > + "AUX_APPEND", > + "AUX_HIZ", > + "RSVD", > + "RSVD" > +}; > + > static void > batch_out(struct brw_context *brw, const char *name, uint32_t offset, > int index, char *fmt, ...) PRINTFLIKE(5, 6); > @@ -461,6 +477,66 @@ static void dump_gen7_surface_state(struct brw_context > *brw, uint32_t offset) > batch_out(brw, name, offset, 7, "\n"); > } > > +static float q_to_float(uint32_t data, int integer_end, int integer_start, > +int fractional_end, int fractional_start) > +{ > + /* Convert the number to floating point. */ > + float n = GET_BITS(data, integer_start, fractional_end); > + > + /* Multiple by 2^-n */ > + return n * pow(2, -(fractional_end - fractional_start + 1)); > +} > + > +static void dump_gen8_surface_state(struct brw_context *brw, uint32_t offset) > +{ > + const char *name = "SURF"; > + uint32_t *surf = brw->ba
Re: [Mesa-dev] [PATCH 4/7] i965: Add gen8 surface state debug info
On Fri, Apr 24, 2015 at 08:47:41PM +0300, Pohjolainen, Topi wrote: > On Thu, Apr 23, 2015 at 04:50:00PM -0700, Ben Widawsky wrote: > > AFAICT, none of the old data was wrong (the gen7 decoder), but it wa > > smissing a > > bunch of stuff. > > > > Adds a tick (') to denote the beginning of the surface state for easier > > reading. > > This will be replaced later with some better, but more risky code. > > > > OLD: > > 0x7980: 0x23016000: SURF: 2D BRW_SURFACEFORMAT_B8G8R8A8_UNORM > > 0x7984: 0x1800: SURF: offset > > 0x7988: 0x00ff00ff: SURF: 256x256 size, 0 mips, 1 slices > > 0x798c: 0x03ff: SURF: pitch 1024, tiled > > 0x7990: 0x: SURF: min array element 0, array extent 1 > > 0x7994: 0x: SURF: mip base 0 > > 0x7998: 0x: SURF: x,y offset: 0,0 > > 0x799c: 0x0977: SURF: > > 0x7940: 0x231d7000: SURF: 2D BRW_SURFACEFORMAT_R8G8B8A8_UNORM > > 0x7944: 0x7800: SURF: offset > > 0x7948: 0x001f001f: SURF: 32x32 size, 0 mips, 1 slices > > 0x794c: 0x007f: SURF: pitch 128, tiled > > 0x7950: 0x: SURF: min array element 0, array extent 1 > > 0x7954: 0x: SURF: mip base 0 > > 0x7958: 0x: SURF: x,y offset: 0,0 > > 0x795c: 0x0977: SURF: > > > > NEW: > > 0x7980: 0x23016000:SURF': 2D B8G8R8A8_UNORM VALIGN4 HALIGN4 > > X-tiled > > 0x7984: 0x1800: SURF: MOCS: 0x18 Base MIP: 0.0 (0 mips) > > Surface QPitch: 0 > > 0x7988: 0x00ff00ff: SURF: 256x256 [AUX_NONE] > > 0x798c: 0x03ff: SURF: 1 slices (depth), pitch: 1024 > > 0x7990: 0x: SURF: min array element: 0, array extent > > 1, MULTISAMPLE_1 > > 0x7994: 0x: SURF: x,y offset: 0,0, min LOD: 0 > > 0x7998: 0x: SURF: AUX pitch: 0 qpitch: 0 > > 0x799c: 0x0977: SURF: Clear color: > > 0x7940: 0x231d7000:SURF': 2D R8G8B8A8_UNORM VALIGN4 HALIGN4 > > Y-tiled > > 0x7944: 0x7800: SURF: MOCS: 0x78 Base MIP: 0 (0 mips) > > Surface QPitch: ff > > 0x7948: 0x001f001f: SURF: 32x32 [AUX_NONE] > > 0x794c: 0x007f: SURF: 1 slices (depth), pitch: 128 > > 0x7950: 0x: SURF: min array element: 0, array extent > > 1, MULTISAMPLE_1 > > 0x7954: 0x: SURF: x,y offset: 0,0, min LOD: 0 > > 0x7958: 0x: SURF: AUX pitch: 0 qpitch: 0 > > 0x795c: 0x0977: SURF: Clear color: > > 0x7920: 0x7980:BIND0: surface state address > > 0x7924: 0x7940:BIND1: surface state address > > > > Signed-off-by: Ben Widawsky > > --- > > src/mesa/drivers/dri/i965/brw_defines.h| 4 +- > > src/mesa/drivers/dri/i965/brw_state_dump.c | 86 > > -- > > 2 files changed, 85 insertions(+), 5 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > > b/src/mesa/drivers/dri/i965/brw_defines.h > > index e37d2e0..b9aae29 100644 > > --- a/src/mesa/drivers/dri/i965/brw_defines.h > > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > > @@ -529,9 +529,11 @@ > > #define GEN7_SURFACE_ARYSPC_FULL (0 << 10) > > #define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10) > > > > -/* Surface state DW0 */ > > +/* Surface state DW1 */ > > #define GEN8_SURFACE_MOCS_SHIFT 24 > > #define GEN8_SURFACE_MOCS_MASK INTEL_MASK(30, 24) > > +#define GEN8_SURFACE_QPITCH_SHIFT 0 > > +#define GEN8_SURFACE_QPITCH_MASKINTEL_MASK(14, 0) > > > > /* Surface state DW2 */ > > #define BRW_SURFACE_HEIGHT_SHIFT 19 > > diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c > > b/src/mesa/drivers/dri/i965/brw_state_dump.c > > index 21a3d8f..642bdc8 100644 > > --- a/src/mesa/drivers/dri/i965/brw_state_dump.c > > +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c > > @@ -59,6 +59,22 @@ static const char *sampler_addr_mode[] = { > > "HALF_BORDER" > > }; > > > > +static const char *surface_tiling[] = { > > + "LINEAR", > > + "W-tiled", > > + "X-tiled", > > + "Y-tiled" > > +}; > > + > > +static const char *surface_aux_mode[] = { > > + "AUX_NONE", > > + "AUX_MCS", > > + "AUX_APPEND", > > + "AUX_HIZ", > > + "RSVD", > > + "RSVD" > > +}; > > + > > static void > > batch_out(struct brw_context *brw, const char *name, uint32_t offset, > > int index, char *fmt, ...) PRINTFLIKE(5, 6); > > @@ -461,6 +477,66 @@ static void dump_gen7_surface_state(struct brw_context > > *brw, uint32_t offset) > > batch_out(brw, name, offset, 7, "\n"); > > } > > > > +static float q_to_float(uint32_t data, int integer_end, int integer_start, > > +int fractional_end, int fractional_start) > > +{ > > + /* Convert the number to floating point. */ > > + float n = GET_BITS(data,
Re: [Mesa-dev] [PATCH 1/7] i965: Add all surface types to the batch decode
On Thursday, April 23, 2015 04:49:57 PM Ben Widawsky wrote: > It's true that not all surfaces apply for every gen, but for the most part > this > is what we want. (The unfortunate case is when we use an valid surface, but > not > for the specific GEN). > > This was automated with a vim macro. > > v2: Shortened common forms such as R8G8B8A8->RGBA8. Note that this makes some > of > the sample output in subsequent commits slightly incorrect. > > Signed-off-by: Ben Widawsky > --- > src/mesa/drivers/dri/i965/brw_state_dump.c | 226 > - > 1 file changed, 219 insertions(+), 7 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c > b/src/mesa/drivers/dri/i965/brw_state_dump.c > index 5cf70eb..a688ba3 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_dump.c > +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c > @@ -67,13 +67,225 @@ get_965_surfacetype(unsigned int surfacetype) > static const char * > get_965_surface_format(unsigned int surface_format) > { > -switch (surface_format) { > -case 0x000: return "r32g32b32a32_float"; > -case 0x0c1: return "b8g8r8a8_unorm"; > -case 0x100: return "b5g6r5_unorm"; > -case 0x102: return "b5g5r5a1_unorm"; > -case 0x104: return "b4g4r4a4_unorm"; > -default: return "unknown"; > + switch (surface_format) { > + case BRW_SURFACEFORMAT_R32G32B32A32_FLOAT: return "RGBA32_FLOAT"; > + case BRW_SURFACEFORMAT_R32G32B32A32_SINT: return "RGBA32_SINT"; > + case BRW_SURFACEFORMAT_R32G32B32A32_UINT: return "RGBA32_UINT"; > + case BRW_SURFACEFORMAT_R32G32B32A32_UNORM: return "RGBA32_UNORM"; > + case BRW_SURFACEFORMAT_R32G32B32A32_SNORM: return "RGBA32_SNORM"; I'd prefer to decode these exactly as they are..."R32G32B32A32_FLOAT". I think a better plan would be to edit the table in brw_surface_formats.c - add a const char *name field, and change the macro to: #define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \ [BRW_SURFACEFORMAT_##sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color, #sf }, then remove BRW_SURFACEFORMAT_* from the entries in the table. With that in place, you can just look it up in the table, and adding new formats will be easy - all in one place. signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V2 02/22] i965: Choose tiling in brw_miptree_layout() function
On Thu, Apr 23, 2015 at 4:38 PM, Pohjolainen, Topi wrote: > On Fri, Apr 17, 2015 at 04:51:23PM -0700, Anuj Phogat wrote: >> This refactoring is required by later patches in this series. >> >> Signed-off-by: Anuj Phogat >> --- >> src/mesa/drivers/dri/i965/brw_tex_layout.c| 19 +++- >> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 31 >> ++- >> src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 19 ++-- >> 3 files changed, 51 insertions(+), 18 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c >> b/src/mesa/drivers/dri/i965/brw_tex_layout.c >> index 7a1e09d..b8408d3 100644 >> --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c >> +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c >> @@ -378,7 +378,13 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, >> } >> >> void >> -brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) >> +brw_miptree_layout(struct brw_context *brw, >> + mesa_format format, >> + uint32_t width0, >> + uint32_t num_samples, >> + bool for_bo, >> + enum intel_miptree_tiling_mode requested, >> + struct intel_mipmap_tree *mt) >> { >> bool multisampled = mt->num_samples > 1; > > Doesn't given "num_samples" equal to "mt->num_samples", "width0" equal to > "mt->logical_width0" and "format" equal to "mt->format" at this point? > Right. I'll remove them from parameters list. >> bool gen6_hiz_or_stencil = false; >> @@ -461,5 +467,16 @@ brw_miptree_layout(struct brw_context *brw, struct >> intel_mipmap_tree *mt) >> } >> DBG("%s: %dx%dx%d\n", __FUNCTION__, >> mt->total_width, mt->total_height, mt->cpp); >> + >> +/* pitch == 0 || height == 0 indicates the null texture */ >> + if (!mt || !mt->total_width || !mt->total_height) { >> + intel_miptree_release(&mt); >> + return; >> + } >> + >> + if (!for_bo) >> + mt->tiling = intel_miptree_choose_tiling(brw, format, width0, >> + num_samples, >> + requested, mt); >> } >> >> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> index eb226d5..7a64282 100644 >> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> @@ -232,6 +232,7 @@ intel_miptree_create_layout(struct brw_context *brw, >> GLuint depth0, >> bool for_bo, >> GLuint num_samples, >> +enum intel_miptree_tiling_mode requested, >> bool force_all_slices_at_each_lod) >> { >> struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); >> @@ -432,7 +433,7 @@ intel_miptree_create_layout(struct brw_context *brw, >> if (force_all_slices_at_each_lod) >>mt->array_layout = ALL_SLICES_AT_EACH_LOD; >> >> - brw_miptree_layout(brw, mt); >> + brw_miptree_layout(brw, format, width0, num_samples, for_bo, requested, >> mt); >> >> return mt; >> } >> @@ -440,7 +441,7 @@ intel_miptree_create_layout(struct brw_context *brw, >> /** >> * \brief Helper function for intel_miptree_create(). >> */ >> -static uint32_t >> +uint32_t >> intel_miptree_choose_tiling(struct brw_context *brw, >> mesa_format format, >> uint32_t width0, >> @@ -609,14 +610,11 @@ intel_miptree_create(struct brw_context *brw, >> first_level, last_level, width0, >> height0, depth0, >> false, num_samples, >> +requested_tiling, > > Just as Matt instructed me, you could fix tabs here in the argument list. > OK. >> force_all_slices_at_each_lod); >> - /* >> -* pitch == 0 || height == 0 indicates the null texture >> -*/ >> - if (!mt || !mt->total_width || !mt->total_height) { >> - intel_miptree_release(&mt); >> + >> + if (!mt) >>return NULL; >> - } >> >> total_width = mt->total_width; >> total_height = mt->total_height; >> @@ -627,16 +625,11 @@ intel_miptree_create(struct brw_context *brw, >>total_height = ALIGN(total_height, 64); >> } >> >> - uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0, >> - num_samples, >> requested_tiling, >> - mt); >> bool y_or_x = false; >> >> - if (tiling == (I915_TILING_Y | I915_TILING_X)) { >> + if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) { >>y_or_x = true; >>mt->tiling = I915_TILING_Y; >> - } else { >> - mt->tiling = tiling; >> } >> >> unsigned long pitch; >> @
Re: [Mesa-dev] [PATCH 5/7] i965: Add Gen9 surface state decoding
On Thu, Apr 23, 2015 at 04:50:02PM -0700, Ben Widawsky wrote: > Gen9 surface state is very similar to the previous generation. The important > changes here are aux mode, and the way clear colors work. > > NOTE: There are some things intentionally left out of this decoding. > > Signed-off-by: Ben Widawsky > --- > src/mesa/drivers/dri/i965/brw_state_dump.c | 36 > -- > 1 file changed, 29 insertions(+), 7 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c > b/src/mesa/drivers/dri/i965/brw_state_dump.c > index 642bdc8..60e6b05 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_dump.c > +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c > @@ -491,6 +491,22 @@ static void dump_gen8_surface_state(struct brw_context > *brw, uint32_t offset) > { > const char *name = "SURF"; > uint32_t *surf = brw->batch.bo->virtual + offset; > + int aux_mode = surf[7] & INTEL_MASK(2, 0); Same question as in the previous patch, surf[6] ? > + const char *aux_str; > + > + if (brw->gen >= 9) { > + bool msrt = GET_BITS(surf[4], 5, 3) > 0; > + switch (aux_mode) { > + case 5: > + aux_str = msrt ? "AUX_CCS_E [MCS]" : "AUX_CCS_D [MCS]"; The way I read the spec, I would have written this as: aux_str = msrt ? "AUX_CCS_E [MCS]" : "AUX_CCS_E [CCS]"; Ande the one below: aux_str = msrt ? "AUX_CCS_D [MCS]" : "AUX_CCS_D [CCS]"; But maybe I'm misreading. > + break; > + case 1: > + aux_str = msrt ? "AUX_CCS_E [CCS]" : "AUX_CCS_D [MCS]"; Missing break? > + default: > + aux_str = surface_aux_mode[aux_mode]; > + } > + } else > + aux_str = surface_aux_mode[aux_mode]; > > batch_out(brw, "SURF'", offset, 0, "%s %s %s VALIGN%d HALIGN%d %s\n", > get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), > @@ -509,7 +525,7 @@ static void dump_gen8_surface_state(struct brw_context > *brw, uint32_t offset) > batch_out(brw, name, offset, 2, "%dx%d [%s]\n", > GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1, > GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1, > - surface_aux_mode[surf[7] & INTEL_MASK(2, 0)] > + aux_str > ); > batch_out(brw, name, offset, 3, "%d slices (depth), pitch: %d\n", > GET_FIELD(surf[3], BRW_SURFACE_DEPTH) + 1, > @@ -529,12 +545,18 @@ static void dump_gen8_surface_state(struct brw_context > *brw, uint32_t offset) > GET_FIELD(surf[6], GEN8_SURFACE_AUX_QPITCH) << 2, > GET_FIELD(surf[6], GEN8_SURFACE_AUX_PITCH) << 2 > ); > - batch_out(brw, name, offset, 7, "Clear color: %c%c%c%c\n", > - GET_BITS(surf[7], 31, 31) ? 'R' : '-', > - GET_BITS(surf[7], 30, 30) ? 'G' : '-', > - GET_BITS(surf[7], 29, 29) ? 'B' : '-', > - GET_BITS(surf[7], 28, 28) ? 'A' : '-' > -); > + if (brw->gen >= 9) { > + batch_out(brw, name, offset, 7, "Clear color: R(%x)G(%x)B(%x)A(%x)\n", > +surf[12], surf[13], surf[14], surf[15] > + ); > + } else { > + batch_out(brw, name, offset, 7, "Clear color: %c%c%c%c\n", > +GET_BITS(surf[7], 31, 31) ? 'R' : '-', > +GET_BITS(surf[7], 30, 30) ? 'G' : '-', > +GET_BITS(surf[7], 29, 29) ? 'B' : '-', > +GET_BITS(surf[7], 28, 28) ? 'A' : '-' > + ); > + } > } > > static void > -- > 2.3.6 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/4] i965: Enable ARB_gpu_shader5 on Gen8+.
--- docs/relnotes/10.6.0.html| 1 + src/mesa/drivers/dri/i965/intel_extensions.c | 8 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 48f76f9..dbf1229 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -55,6 +55,7 @@ Note: some of the new features are only available with certain drivers. GL_ARB_clip_control on i965 GL_ARB_program_interface_query (all drivers) GL_ARB_texture_stencil8 on nv50, nvc0, r600, radeonsi, softpipe +GL_ARB_gpu_shader5 on i965/gen8+ Bug fixes diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 48064e1..c28c171 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -303,6 +303,8 @@ intelInitExtensions(struct gl_context *ctx) if (brw->gen >= 7) { ctx->Extensions.ARB_conservative_depth = true; + ctx->Extensions.ARB_gpu_shader5 = true; + ctx->Extensions.ARB_shader_atomic_counters = true; ctx->Extensions.ARB_texture_view = true; if (can_do_pipelined_register_writes(brw)) { ctx->Extensions.ARB_transform_feedback2 = true; @@ -342,12 +344,6 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ANGLE_texture_compression_dxt = true; - if (brw->gen >= 7) - ctx->Extensions.ARB_shader_atomic_counters = true; - - if (brw->gen == 7) - ctx->Extensions.ARB_gpu_shader5 = true; - ctx->Extensions.OES_texture_float = true; ctx->Extensions.OES_texture_float_linear = true; ctx->Extensions.OES_texture_half_float = true; -- 2.0.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] Revert "i965/fs: Allow SIMD16 borrow/carry/64-bit multiply on Gen > 7."
This reverts commit 9f5e5bd34d8ba48c851b442fb88f742b1ba6a571. I have no idea what made me believe these didn't apply to Gen > 7. They do, and without them we generate bad code that causes failures on Gen 8. --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 11f38c2..cf59570 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -892,7 +892,7 @@ fs_visitor::visit(ir_expression *ir) } break; case ir_binop_imul_high: { - if (devinfo->gen == 7) + if (devinfo->gen >= 7) no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(dispatch_width), @@ -929,7 +929,7 @@ fs_visitor::visit(ir_expression *ir) emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); break; case ir_binop_carry: { - if (devinfo->gen == 7) + if (devinfo->gen >= 7) no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(dispatch_width), @@ -940,7 +940,7 @@ fs_visitor::visit(ir_expression *ir) break; } case ir_binop_borrow: { - if (devinfo->gen == 7) + if (devinfo->gen >= 7) no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(dispatch_width), -- 2.0.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] i965/fs: Fix code emission for imul_high in NIR.
Copy over from brw_fs_visitor.cpp. --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 24 +++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 9564764..523e56d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -827,8 +827,30 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type); - emit(MUL(acc, op[0], op[1])); + fs_inst *mul = emit(MUL(acc, op[0], op[1])); emit(MACH(result, op[0], op[1])); + + /* Until Gen8, integer multiplies read 32-bits from one source, and + * 16-bits from the other, and relying on the MACH instruction to + * generate the high bits of the result. + * + * On Gen8, the multiply instruction does a full 32x32-bit multiply, + * but in order to do a 64x64-bit multiply we have to simulate the + * previous behavior and then use a MACH instruction. + * + * FINISHME: Don't use source modifiers on src1. + */ + if (devinfo->gen >= 8) { + assert(mul->src[1].type == BRW_REGISTER_TYPE_D || +mul->src[1].type == BRW_REGISTER_TYPE_UD); + if (mul->src[1].type == BRW_REGISTER_TYPE_D) { +mul->src[1].type = BRW_REGISTER_TYPE_W; +mul->src[1].stride = 2; + } else { +mul->src[1].type = BRW_REGISTER_TYPE_UW; +mul->src[1].stride = 2; + } + } break; } -- 2.0.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] i965/fs: Fix stride for multiply in macro.
We have to use W/UW type for src1 of the multiply in the MUL/MACH macro, but in order to read the low 16-bits of each 32-bit integer, we need to set the appropriate stride. --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cf59570..f37fdea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -916,8 +916,10 @@ fs_visitor::visit(ir_expression *ir) mul->src[1].type == BRW_REGISTER_TYPE_UD); if (mul->src[1].type == BRW_REGISTER_TYPE_D) { mul->src[1].type = BRW_REGISTER_TYPE_W; +mul->src[1].stride = 2; } else { mul->src[1].type = BRW_REGISTER_TYPE_UW; +mul->src[1].stride = 2; } } -- 2.0.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 7/7] i965: Add gen8 blend state
On Thu, Apr 23, 2015 at 04:50:04PM -0700, Ben Widawsky wrote: > OLD: > 0x7340: 0x0080:BLEND: > 0x7344: 0x84202100:BLEND: > > NEW: > 0x7340: 0x0080:BLEND: Alpha blend/test > 0x7344: 0x000b84202100: BLEND_ENTRY00: > Color Buffer Blend factor ONE,ONE,ONE,ONE > (src,dst,src alpha, dst alpha) > function ADD,ADD (color, alpha), Disables: > 0x734c: 0x000b84202100: BLEND_ENTRY01: > Color Buffer Blend factor ONE,ONE,ONE,ONE > (src,dst,src alpha, dst alpha) > function ADD,ADD (color, alpha), Disables: > 0x7354: 0x000b84202100: BLEND_ENTRY02: > Color Buffer Blend factor ONE,ONE,ONE,ONE > (src,dst,src alpha, dst alpha) > function ADD,ADD (color, alpha), Disables: > 0x735c: 0x000b84202100: BLEND_ENTRY03: > Color Buffer Blend factor ONE,ONE,ONE,ONE > (src,dst,src alpha, dst alpha) > function ADD,ADD (color, alpha), Disables: > 0x7364: 0x000b84202100: BLEND_ENTRY04: > Color Buffer Blend factor ONE,ONE,ONE,ONE > (src,dst,src alpha, dst alpha) > function ADD,ADD (color, alpha), Disables: > 0x736c: 0x000b84202100: BLEND_ENTRY05: > Color Buffer Blend factor ONE,ONE,ONE,ONE > (src,dst,src alpha, dst alpha) > function ADD,ADD (color, alpha), Disables: > 0x7374: 0x000b84202100: BLEND_ENTRY06: > Color Buffer Blend factor ONE,ONE,ONE,ONE > (src,dst,src alpha, dst alpha) > function ADD,ADD (color, alpha), Disables: > 0x737c: 0x000b84202100: BLEND_ENTRY07: > Color Buffer Blend factor ONE,ONE,ONE,ONE > (src,dst,src alpha, dst alpha) > function ADD,ADD (color, alpha), Disables: > > Signed-off-by: Ben Widawsky > --- > src/mesa/drivers/dri/i965/brw_state_dump.c | 105 > - > 1 file changed, 103 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c > b/src/mesa/drivers/dri/i965/brw_state_dump.c > index 7217141..a98cef7 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_dump.c > +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c > @@ -1,5 +1,5 @@ > /* > - * Copyright © 2007 Intel Corporation > + * Copyright © 2007-2015 Intel Corporation > * > * Permission is hereby granted, free of charge, to any person obtaining a > * copy of this software and associated documentation files (the "Software"), > @@ -93,6 +93,25 @@ batch_out(struct brw_context *brw, const char *name, > uint32_t offset, > va_end(va); > } > > +static void > +batch_out64(struct brw_context *brw, const char *name, uint32_t offset, > +int index, char *fmt, ...) > +{ > + uint32_t *tmp = brw->batch.bo->virtual + offset; const > + > + /* Swap the dwords since we want to handle this as a 64b value, but the > data > +* is typically emitted as dwords. > +*/ > + uint64_t data = ((uint64_t)tmp[index + 1]) << 32 | tmp[index]; > + va_list va; > + > + fprintf(stderr, "0x%08x: 0x%016" PRIx64 ": %8s: ", > + offset + index * 4, data, name); > + va_start(va, fmt); > + vfprintf(stderr, fmt, va); > + va_end(va); > +} > + > static const char * > get_965_surfacetype(unsigned int surfacetype) > { > @@ -784,6 +803,85 @@ static void dump_blend_state(struct brw_context *brw, > uint32_t offset) > } > > static void > +gen8_dump_blend_state(struct brw_context *brw, uint32_t offset, uint32_t > size) > +{ > + uint32_t *blend = brw->batch.bo->virtual + offset; const > + const char *logicop[] = > + { > +"LOGICOP_CLEAR (BLACK)", > +"LOGICOP_NOR", > +"LOGICOP_AND_INVERTED", > +"LOGICOP_COPY_INVERTED", > +"LOGICOP_AND_REVERSE", > +"LOGICOP_INVERT", > +"LOGICOP_XOR", > +"LOGICOP_NAND", > +"LOGICOP_AND", > +"LOGICOP_EQUIV", > +"LOGICOP_NOOP", > +"LOGICOP_OR_INVERTED", > +"LOGICOP_COPY", > +"LOGICOP_OR_REVERSE", > +"LOGICOP_OR", > +"LOGICOP_SET (WHITE)" > + }; > + > + const char *blend_function[] = > + { "ADD", "SUBTRACT", "REVERSE_SUBTRACT", "MIN", "MAX};" }; > + > + const char *blend_factor[0x1b] = > + { > + "RSVD", > + "ONE", > + "SRC_COLOR", "SRC_ALPHA", > + "DST_ALPHA", "DST_COLOR", > + "SRC_ALPHA_SATURATE", > + "CONST_COLOR", "CONST_ALPHA", > + "SRC1_COLOR", "SRC1_ALPHA", > + "RSVD", "RSVD", "RSVD", "RSVD", "RSVD", "RSVD", > + "ZERO", > + "INV_SRC_COLOR", "INV_SRC_ALPHA", > + "INV_DST_ALPHA", "INV_DST_COLOR", > + "RSVD", > + "INV_CONST_COLOR", "INV_CONST_ALPHA", > + "INV_SRC1_COLO
[Mesa-dev] [PATCH] mesa: put more info in glTexImage GL_OUT_OF_MEMORY error message
Give the user some idea about the size of the texture which caused the GL_OUT_OF_MEMORY error. --- src/mesa/main/teximage.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index d07263c..7bc1da7 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -3320,7 +3320,9 @@ teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims, if (!sizeOK) { _mesa_error(ctx, GL_OUT_OF_MEMORY, - "glTexImage%uD(image too large)", dims); + "glTexImage%uD(image too large: %d x %d x %d, %s format)", + dims, width, height, depth, + _mesa_lookup_enum_by_nr(internalFormat)); return; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/7] surface state decode improvements (gen8+)
On Thu, Apr 23, 2015 at 04:49:56PM -0700, Ben Widawsky wrote: > While trying to debug Skylake fast color clears, I noticed that the surface > state generated by our decoder was woefully inadequate. Much of the dumped > state > was so stale as to be useless. > > Just to be clear, the code is not auto generated from a spec, and so I'm > certain > there are bugs in the decoder. There are the typo kind of bugs, and then some > bugs where I feel the hardware spec is a bit vague, and I took a guess as to > how > things should work (I am looking at you Base mips). I guarantee that even with > whatever bugs present, the output with INTEL_DEBUG=batch is significantly > improved on all gen8+ platforms. Therefore, I think it makes a lot of sense to > not worry too much about any minor issues, and fix them up in later patches... > but it's your call. > > I made all of the character strings global since really any new code added to > the file would likely benefit from having it there. (Also potentially we get > slightly less bad performance when using INTEL_DEBUG=batch) > > I didn't intend to leave any major state changes out, so if you're aware of > one > I've missed, please don't hesitate to have me add it. I guess we can add things little by little later on. Many thanks for doing this, such thing would have been helpful to me before. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: put more info in glTexImage GL_OUT_OF_MEMORY error message
Good idea. Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V2 15/22] i965/gen9: Use _mesa_meta_pbo_TexSubImage to write to YF/YS surfaces
On Fri, Apr 17, 2015 at 04:51:36PM -0700, Anuj Phogat wrote: > No other path currently supports uploading data to these surfaces. > > Signed-off-by: Anuj Phogat > --- > src/mesa/drivers/dri/i965/intel_tex_image.c| 24 ++-- > src/mesa/drivers/dri/i965/intel_tex_subimage.c | 23 +-- > 2 files changed, 43 insertions(+), 4 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c > b/src/mesa/drivers/dri/i965/intel_tex_image.c > index 31cbabe..03db100 100644 > --- a/src/mesa/drivers/dri/i965/intel_tex_image.c > +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c > @@ -93,8 +93,10 @@ intelTexImage(struct gl_context * ctx, >const struct gl_pixelstore_attrib *unpack) > { > struct intel_texture_image *intelImage = intel_texture_image(texImage); > + struct brw_context *brw = brw_context(ctx); > bool ok; > - > + bool create_pbo = false; > + uint32_t tr_mode = INTEL_MIPTREE_TRMODE_NONE; > bool tex_busy = intelImage->mt && drm_intel_bo_busy(intelImage->mt->bo); > > DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", > @@ -111,15 +113,33 @@ intelTexImage(struct gl_context * ctx, > > assert(intelImage->mt); > > + if (brw->gen >= 9) { > + tr_mode = intelImage->mt->tr_mode; > + > + /* Set create_pbo = true for surfaces with > INTEL_MIPTREE_TRMODE_{YF/YS}. > + * _mesa_meta_pbo_TexSubImage() is the only working path to upload data > + * to such surfaces. > + */ > + create_pbo = tex_busy || (intelImage->mt && > + intelImage->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE); > + } else { > + create_pbo = tex_busy; > + } > + What would you think about a helper? static bool need_to_create_pbo(const struct intel_texture_image *img) { if (!img->mt) return false; const bool tex_busy = drm_intel_bo_busy(img->mt->bo); if (tex_busy || brw->gen < 9) return tex_busy; /* Set create_pbo = true for surfaces with INTEL_MIPTREE_TRMODE_{YF/YS}. * _mesa_meta_pbo_TexSubImage() is the only working path to upload data * to such surfaces. */ return img->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE; } > ok = _mesa_meta_pbo_TexSubImage(ctx, dims, texImage, 0, 0, 0, > texImage->Width, texImage->Height, > texImage->Depth, > format, type, pixels, > false /*allocate_storage*/, > - tex_busy, unpack); > + create_pbo, unpack); > if (ok) >return; > > + /* Currently there are no fallback paths to upload data to surfaces with > +* tr_mode != INTEL_MIPTREE_TRMODE_NONE. > +*/ > + assert(tr_mode == INTEL_MIPTREE_TRMODE_NONE); And I would put this assertion into _mesa_meta_pbo_TexSubImage() instead of duplicating it for both callers. What do you think? > + > ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage, > 0, 0, 0, /*x,y,z offsets*/ > texImage->Width, > diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c > b/src/mesa/drivers/dri/i965/intel_tex_subimage.c > index 909ff25..a7ad10e 100644 > --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c > +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c > @@ -200,8 +200,10 @@ intelTexSubImage(struct gl_context * ctx, > const struct gl_pixelstore_attrib *packing) > { > struct intel_texture_image *intelImage = intel_texture_image(texImage); > + struct brw_context *brw = brw_context(ctx); > bool ok; > - > + bool create_pbo = false; > + uint32_t tr_mode = INTEL_MIPTREE_TRMODE_NONE; > bool tex_busy = intelImage->mt && drm_intel_bo_busy(intelImage->mt->bo); > > DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", > @@ -210,13 +212,30 @@ intelTexSubImage(struct gl_context * ctx, > _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), > texImage->Level, texImage->Width, texImage->Height, texImage->Depth); > > + if (brw->gen >= 9) { > + tr_mode = intelImage->mt->tr_mode; > + /* Set create_pbo = true for surfaces with > INTEL_MIPTREE_TRMODE_{YF/YS}. > + * _mesa_meta_pbo_TexSubImage() is the only working path to upload data > + * to such surfaces. > + */ > + create_pbo = tex_busy || (intelImage->mt && > + intelImage->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE); > + } else { > + create_pbo = tex_busy; > + } > + > ok = _mesa_meta_pbo_TexSubImage(ctx, dims, texImage, > xoffset, yoffset, zoffset, > width, height, depth, format, type, > - pixels, false, tex_busy, packing); > + pixel
[Mesa-dev] [Bug 89599] symbol 'x86_64_entry_start' is already defined when building with LLVM/clang
https://bugs.freedesktop.org/show_bug.cgi?id=89599 --- Comment #3 from yunl...@chromium.org --- Can we land this? -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH V2 16/22] i965/gen9: Use _mesa_meta_pbo_GetTexSubImage() to read YF/YS surfaces
On Fri, Apr 17, 2015 at 04:51:37PM -0700, Anuj Phogat wrote: > Currently, that's the only path that supports reading data from these buffers. > > Signed-off-by: Anuj Phogat > --- > src/mesa/drivers/dri/i965/intel_pixel_read.c | 22 -- > src/mesa/drivers/dri/i965/intel_tex_image.c | 28 > > 2 files changed, 40 insertions(+), 10 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c > b/src/mesa/drivers/dri/i965/intel_pixel_read.c > index 9ab5ed1..a7eefb3 100644 > --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c > +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c > @@ -221,15 +221,33 @@ intelReadPixels(struct gl_context * ctx, > > struct brw_context *brw = brw_context(ctx); > bool dirty; > + bool create_pbo = false; > + uint32_t tr_mode = INTEL_MIPTREE_TRMODE_NONE; > > DBG("%s\n", __FUNCTION__); > > + if (brw->gen >= 9) { > + struct gl_renderbuffer_attachment *readAtt = const > + > &ctx->ReadBuffer->Attachment[ctx->ReadBuffer->_ColorReadBufferIndex]; > + struct intel_renderbuffer *irb = const > + intel_renderbuffer(readAtt->Renderbuffer); > + if (irb && irb->mt) { > + tr_mode = irb->mt->tr_mode; > + create_pbo = irb->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE; > + } > + } > + > if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, > height, 1, format, type, pixels, > - false /*create_pbo*/, > - true /*for_readpixels*/, pack)) > + create_pbo, true /*for_readpixels*/, > + pack)) >return; > > + /* Currently there are no fallback paths to read data from surfaces with > +* tr_mode != INTEL_MIPTREE_TRMODE_NONE. > +*/ > + assert(tr_mode == INTEL_MIPTREE_TRMODE_NONE); > + Similar suggestion as in the previous patch, put this into _mesa_meta_pbo_GetTexSubImage() in order to avoid both callers of having the same assertion. > if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) >perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__); > > diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c > b/src/mesa/drivers/dri/i965/intel_tex_image.c > index 03db100..8e845c6 100644 > --- a/src/mesa/drivers/dri/i965/intel_tex_image.c > +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c > @@ -498,19 +498,31 @@ intel_get_tex_image(struct gl_context *ctx, > struct gl_texture_image *texImage) { > struct brw_context *brw = brw_context(ctx); > bool ok; > + bool create_pbo = false; > + uint32_t tr_mode = INTEL_MIPTREE_TRMODE_NONE; > > DBG("%s\n", __FUNCTION__); > > - if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { > - if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0, > -texImage->Width, texImage->Height, > -texImage->Depth, format, type, > -pixels, false /* create_pbo */, > -false /*for_readpixels*/, > &ctx->Pack)) > - return; > + if (brw->gen >= 9) { > + struct intel_texture_image *intelImage = intel_texture_image(texImage); > + tr_mode = intelImage->mt->tr_mode; > + create_pbo = intelImage->mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE; > + } And here you could take advantage of the helper I suggested in the previous patch, I think. > + > + if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0, > + texImage->Width, texImage->Height, > + texImage->Depth, format, type, > + pixels, create_pbo, > + false /*for_readpixels*/, &ctx->Pack)) > + return; > + > + /* Currently there are no fallback paths to read data from surfaces with > +* tr_mode != INTEL_MIPTREE_TRMODE_NONE. > +*/ > + assert(tr_mode == INTEL_MIPTREE_TRMODE_NONE); > > + if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) >perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__); > - } > > ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, 0, 0, >texImage->Width, texImage->Height, > -- > 2.3.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader when fragment shader has atomic buffer access
Kenneth Graunke writes: > On Friday, April 24, 2015 09:59:09 AM kevin.rogo...@intel.com wrote: >> From: Kevin Rogovin >> >> Ensure that the GPU spawns the fragment shader thread for those >> fragment shaders with atomic buffer access. >> >> --- >> src/mesa/drivers/dri/i965/gen7_wm_state.c | 7 +++ >> src/mesa/drivers/dri/i965/gen8_ps_state.c | 4 >> 2 files changed, 11 insertions(+) >> >> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c >> b/src/mesa/drivers/dri/i965/gen7_wm_state.c >> index 82e116c..fa04221 100644 >> --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c >> +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c >> @@ -77,6 +77,13 @@ upload_wm_state(struct brw_context *brw) >>dw1 |= GEN7_WM_KILL_ENABLE; >> } >> >> + /* pixel shader must run if it has side-effects >> +*/ >> + if (brw->ctx.Shader._CurrentFragmentProgram!=NULL && >> + brw->ctx.Shader._CurrentFragmentProgram->NumAtomicBuffers > 0) { >> + dw1 |= GEN7_WM_DISPATCH_ENABLE; >> + } >> + > > Hi Kevin, > > Checking brw->ctx.Shader._CurrentFragmentProgram != NULL is unnecessary. > There is always a valid pixel shader. (If the application is using > fixed-function, we supply a fragment shader for them.) Please drop > that check. > > Also, this patch conflicts with Curro's ARB_image_load_store series - he > was also setting the UAV bits. We'll have to sort out which should land > first. Yours is smaller, but I think he did this in a more complete > manner... > Meh. I'm OK with resolving the conflicts if this patch lands first. I haven't merged my patch yet (even though it has your R-b) because it depends on some other patches in the same series that haven't been reviewed yet. >> /* _NEW_BUFFERS | _NEW_COLOR */ >> if (brw_color_buffer_write_enabled(brw) || writes_depth || >> dw1 & GEN7_WM_KILL_ENABLE) { >> diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c >> b/src/mesa/drivers/dri/i965/gen8_ps_state.c >> index 5f39e12..614bc9b 100644 >> --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c >> +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c >> @@ -62,6 +62,10 @@ upload_ps_extra(struct brw_context *brw) >> if (prog_data->uses_omask) >>dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET; >> >> + if (brw->ctx.Shader._CurrentFragmentProgram!=NULL && >> + brw->ctx.Shader._CurrentFragmentProgram->NumAtomicBuffers > 0) >> + dw1 |= GEN8_PSX_SHADER_HAS_UAV; >> + > > I thought that UAVs were essentially for Images...I'm not clear why this > is needed. Perhaps Curro can confirm one way or another. > Yeah. I told him to enable this bit because it influences the calculation of the WM_INT::ThreadDispatchEnable signal on BDW [it also influences the cross-draw UAV coherency stuff but we don't currently need that]. Technically atomic counters are implemented using the hardware support for UAVs so it seems reasonable to me to set the bit. Another possibility would be to enable Force Thread Dispatch in 3DSTATE_WM which according to the B-Spec "must always be set to Normal, except for driver debug" -- Sounds like it may not have been properly validated? >> BEGIN_BATCH(2); >> OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); >> OUT_BATCH(dw1); >> signature.asc Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/4] Revert "i965/fs: Allow SIMD16 borrow/carry/64-bit multiply on Gen > 7."
On Friday, April 24, 2015 11:28:03 AM Matt Turner wrote: > This reverts commit 9f5e5bd34d8ba48c851b442fb88f742b1ba6a571. > > I have no idea what made me believe these didn't apply to Gen > 7. They > do, and without them we generate bad code that causes failures on Gen 8. > --- > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 11f38c2..cf59570 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -892,7 +892,7 @@ fs_visitor::visit(ir_expression *ir) >} >break; > case ir_binop_imul_high: { > - if (devinfo->gen == 7) > + if (devinfo->gen >= 7) > no16("SIMD16 explicit accumulator operands unsupported\n"); > >struct brw_reg acc = retype(brw_acc_reg(dispatch_width), > @@ -929,7 +929,7 @@ fs_visitor::visit(ir_expression *ir) >emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); >break; > case ir_binop_carry: { > - if (devinfo->gen == 7) > + if (devinfo->gen >= 7) > no16("SIMD16 explicit accumulator operands unsupported\n"); > >struct brw_reg acc = retype(brw_acc_reg(dispatch_width), > @@ -940,7 +940,7 @@ fs_visitor::visit(ir_expression *ir) >break; > } > case ir_binop_borrow: { > - if (devinfo->gen == 7) > + if (devinfo->gen >= 7) > no16("SIMD16 explicit accumulator operands unsupported\n"); > >struct brw_reg acc = retype(brw_acc_reg(dispatch_width), > Series is: Reviewed-by: Kenneth Graunke signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader when fragment shader has atomic buffer access
> Checking brw->ctx.Shader._CurrentFragmentProgram != NULL is unnecessary. > There is always a valid pixel shader. (If the application is using > fixed-function, we supply a fragment shader for them.) Please drop that > check. Without this check(in the Gen7 function/code), about 30 crashes are induced on piglit tests for Gen7; the tests are all using GL fixed function pipeline. I have not run piglit without this check on Gen8 though. > I thought that UAVs were essentially for Images...I'm not clear why this is > needed. Perhaps Curro can confirm one way or another. The essential reason is to guarantee that the pixel shader gets invoked by Gen even when all render target surfaces are NULL surfaces. There are other flags one can use, but the UAV seems (to me) the most natural. -Kevin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/15] GL_AMD_performance_monitor
On Tuesday, March 31, 2015 01:56:32 PM Alex Deucher wrote: > I would prefer to keep support for AMD_performance_monitor in mesa. > We may implement more extensive support for this extension in our > radeon open source drivers and it would be nice to be compatible with > our closed source drivers on both Linux and other OSes. > > Alex I would prefer that as well. FWIW, the core Mesa code I wrote for GL_AMD_performance_monitor may be pretty poor for Radeon - it uses bitsets for a lot of things, which made sense on i965 since I had < 64 counters to expose. The last time I looked at the closed source AMD driver, it exposed over 6,000 counters. So you may need some better data structures... --Ken signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] clover: Call clBuildProgram() notification function when build completes
On 23/04/15 12:15, Francisco Jerez wrote: > Emil Velikov writes: > >> Humble ping. >> > This patch is obsolete. IIRC Tom sent a v2 to which I replied with some > (mostly trivial) suggestions. There's no v3 yet AFAIK. > I noticed the other email, although I got v1 and v2 swapped in my queue :-) Thanks for the correction. -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/7] i965: use _mesa_geometry_width/height/layers/samples for programming geometry of framebuffer to GEN
I want to add one more comment on why to replace with the _mesa_geometry_ functions, which I had thought was so obvious I neglected to mention it: With this series the meaning of gl_framebuffer::Width, Height, and so on have a different meaning. They give the intersection of the backing stores of the render targets. In contrast, the _mesa_geometry_* functions give the geometry to feed a rasterizer/windower. By using _mesa_geometry_* functions the code communicates clearly it wants the geometry to feed windower/rasterizer and not the geometry of the intersection of the (potentially empty) set of backing stores. Moreover, it is better to be consistent as well, as later someone will likely wonder: "why in Gen7 and higher are those _mesa_geometry functions used but not before?" That question has no good answer because it does not make sense to not use those functions when programming the rasterizer/windower thingies. -Kevin -Original Message- From: Rogovin, Kevin Sent: Friday, April 24, 2015 7:43 PM To: Pohjolainen, Topi Cc: mesa-...@freedesktop.org Subject: RE: [Mesa-dev] [PATCH 5/7] i965: use _mesa_geometry_width/height/layers/samples for programming geometry of framebuffer to GEN > My point specifically was that you are also updating atoms that _are not_ > re-used. > And as those changes are not really needed, I wouldn't take the risk > of changing something in vain. I would introduce them only when you have > patches to really enable older generations. My take is the following: 1. Tracking (and guaranteeing) that those function left unchanged as is are exactly just those for before Gen7 is a pain. Much easier, and more reliable to hit them all instead. A significant number of functions in i965 are not emit functions of any atom but emit functions of atoms map to them. Again, more reliable and -safer- to change them all, then just the bare minimum. 2. The change is benign. If _HasAttachments is true, then the function substitution gives the same value. For Gens not supporting the extension there is no effect. 3. Lastly, as stated: for later it leaves the option to enable it for Gen6 and below, it is just trivial change, but it needs testing on hardware. When I writing this work, I originally had it for all Gens, but changed to support only Gen7and higher because that is all on which I can test it. -Kevin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 15/16] st/nine: Fix update_vertex_elements bad rebase
This code was supposed to be removed, but a rebase seems to have made it stay. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/nine_state.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 6e01955..034e666 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -225,9 +225,6 @@ update_vertex_elements(struct NineDevice9 *device) memset(used_streams, 0, device->caps.MaxStreams); vs = device->state.vs ? device->state.vs : device->ff.vs; -if (!vdecl) /* no inputs */ -return; - if (vdecl) { for (n = 0; n < vs->num_inputs; ++n) { DBG("looking up input %u (usage %u) from vdecl(%p)\n", -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/16] st/nine: Fix comment in update_viewport
Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/nine_state.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 6c7eab3..27800c6 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -138,10 +138,9 @@ update_viewport(struct NineDevice9 *device) const D3DVIEWPORT9 *vport = &device->state.viewport; struct pipe_viewport_state pvport; -/* XXX: - * I hope D3D clip coordinates are still +/* D3D coordinates are: * -1 .. +1 for X,Y and - * 0 .. +1 for Z (use pipe_rasterizer_state.clip_halfz) + * 0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz) */ pvport.scale[0] = (float)vport->Width * 0.5f; pvport.scale[1] = (float)vport->Height * -0.5f; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/16] st/nine: Change x86 FPU Control word on device creation as on wined3d and windows
From: Tiziano Bacocco Signed-off-by: Tiziano Bacocco --- src/gallium/state_trackers/nine/device9.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 1a776a7..4ef02bb 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -53,6 +53,18 @@ #define DBG_CHANNEL DBG_DEVICE +static void nine_setup_fpu(void) +{ +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +WORD cw; +__asm__ volatile ("fnstcw %0" : "=m" (cw)); +cw = (cw & ~0xf3f) | 0x3f; +__asm__ volatile ("fldcw %0" : : "m" (cw)); +#else +WARN_ONCE("FPU setup not supported on non-x86 platforms\n"); +#endif +} + static void NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset ) { @@ -168,6 +180,9 @@ NineDevice9_ctor( struct NineDevice9 *This, IDirect3D9_AddRef(This->d3d9); ID3DPresentGroup_AddRef(This->present); +if (!(This->params.BehaviorFlags & D3DCREATE_FPU_PRESERVE)) +nine_setup_fpu(); + This->pipe = This->screen->context_create(This->screen, NULL); if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */ -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/11] st/nine: util_gen_mipmap doesn't need we reset states.
util_gen_mipmap uses pipe->blit, and thus doesn't need we restore all states after using it. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/basetexture9.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index c9e82f3..7315e78 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -404,8 +404,6 @@ NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ) first_layer, last_layer, filter); This->dirty_mip = FALSE; - -NineDevice9_RestoreNonCSOState(This->base.base.device, ~0x3); } HRESULT -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/11] st/nine: Fix NineBaseTexture9_PreLoad
It wasn't uploading the texture when the lod had changed. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/basetexture9.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index 330827a..c9e82f3 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -573,7 +573,7 @@ NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This ) { DBG("This=%p\n", This); -if (This->managed.dirty && This->base.pool == D3DPOOL_MANAGED) +if (This->base.pool == D3DPOOL_MANAGED) NineBaseTexture9_UploadSelf(This); } -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/16] st/nine: Ignore D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING when D3DUSAGE_RENDERTARGET is specified
From: Xavier Bouchoux This behaviour matches windows drivers. Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux --- src/gallium/state_trackers/nine/adapter9.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c index 1d75155..9d6d659 100644 --- a/src/gallium/state_trackers/nine/adapter9.c +++ b/src/gallium/state_trackers/nine/adapter9.c @@ -311,7 +311,8 @@ NineAdapter9_CheckDeviceFormat( struct NineAdapter9 *This, if (CheckFormat == D3DFMT_ATOC && RType == D3DRTYPE_SURFACE) return D3D_OK; -if (Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING) +if ((Usage & D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING) && +(Usage & D3DUSAGE_RENDERTARGET)) bind |= PIPE_BIND_BLENDABLE; if (Usage & D3DUSAGE_DMAP) { -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/16] st/nine: Fix D3DQUERYTYPE_TIMESTAMPFREQ query
From: Xavier Bouchoux D3DQUERYTYPE_TIMESTAMPFREQ is supposed to give the frequency at which the clock of D3DQUERYTYPE_TIMESTAMP runs. PIPE_QUERY_TIMESTAMP returns a value in ns, thus the corresponding frequency is 10. PIPE_QUERY_TIMESTAMP_DISJOINT returns the frequency at which PIPE_QUERY_TIMESTAMP value is updated. It isn't always 10. Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux --- src/gallium/state_trackers/nine/query9.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/query9.c b/src/gallium/state_trackers/nine/query9.c index 466b4ba..df49340 100644 --- a/src/gallium/state_trackers/nine/query9.c +++ b/src/gallium/state_trackers/nine/query9.c @@ -254,7 +254,15 @@ NineQuery9_GetData( struct NineQuery9 *This, nresult.b = presult.timestamp_disjoint.disjoint; break; case D3DQUERYTYPE_TIMESTAMPFREQ: -nresult.u64 = presult.timestamp_disjoint.frequency; +/* Applications use it to convert the TIMESTAMP value to time. + AMD drivers on win seem to return the actual hardware clock + resolution and corresponding values in TIMESTAMP. + However, this behaviour is not easy to replicate here. + So instead we do what wine and opengl do, and use + nanoseconds TIMESTAMPs. + (Which is also the unit used by PIPE_QUERY_TIMESTAMP.) +*/ +nresult.u64 = 10; break; case D3DQUERYTYPE_VERTEXSTATS: nresult.vertexstats.NumRenderedTriangles = -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/11] st/nine: Remove Managed texture hack.
Previously binding an unitialized managed texture was causing a crash, and a workaround was added to prevent the crash. This patch removes this workaround and instead set the initial state of managed textures as dirty, so that when the texture is bound for the first time, it is always initialized. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/basetexture9.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index b868019..17a8f44 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -69,6 +69,10 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This, D3DTEXF_LINEAR : D3DTEXF_NONE; This->managed.lod = 0; This->managed.lod_resident = -1; +/* Mark the texture as dirty to trigger first upload when we need the texture, + * even if it wasn't set by the application */ +if (Pool == D3DPOOL_MANAGED) +This->managed.dirty = TRUE; /* When a depth buffer is sampled, it is for shadow mapping, except for * D3DFMT_INTZ, D3DFMT_DF16 and D3DFMT_DF24. * In addition D3DFMT_INTZ can be used for both texturing and depth buffering @@ -496,9 +500,6 @@ NineBaseTexture9_UpdateSamplerView( struct NineBaseTexture9 *This, if (unlikely(This->format == D3DFMT_NULL)) return D3D_OK; NineBaseTexture9_Dump(This); -/* hack due to incorrect POOL_MANAGED handling */ -NineBaseTexture9_GenerateMipSubLevels(This); -resource = This->base.resource; } assert(resource); -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/16] st/nine: Fix wrong assert in nine_shader
The sampler src index was wrong for texldl and texldd Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/nine_shader.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 0fd3d37..1f1f7c8 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -2493,8 +2493,8 @@ DECL_SPECIAL(TEXLDD) tx_src_param(tx, &tx->insn.src[2]), tx_src_param(tx, &tx->insn.src[3]) }; -assert(tx->insn.src[3].idx >= 0 && - tx->insn.src[3].idx < Elements(tx->sampler_targets)); +assert(tx->insn.src[1].idx >= 0 && + tx->insn.src[1].idx < Elements(tx->sampler_targets)); target = tx->sampler_targets[tx->insn.src[1].idx]; ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]); @@ -2509,8 +2509,8 @@ DECL_SPECIAL(TEXLDL) tx_src_param(tx, &tx->insn.src[0]), tx_src_param(tx, &tx->insn.src[1]) }; -assert(tx->insn.src[3].idx >= 0 && - tx->insn.src[3].idx < Elements(tx->sampler_targets)); +assert(tx->insn.src[1].idx >= 0 && + tx->insn.src[1].idx < Elements(tx->sampler_targets)); target = tx->sampler_targets[tx->insn.src[1].idx]; ureg_TXL(tx->ureg, dst, target, src[0], src[1]); -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/16] st/nine: Improve D3DQUERYTYPE_TIMESTAMP
From: Xavier Bouchoux Avoid blocking when retrieving D3DQUERYTYPE_TIMESTAMP result with NineQuery9_GetData(), when D3DGETDATA_FLUSH is not specified. This mimics Win behaviour and gives slightly better performance for some games. Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux --- src/gallium/state_trackers/nine/query9.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/nine/query9.c b/src/gallium/state_trackers/nine/query9.c index df49340..04f4aad 100644 --- a/src/gallium/state_trackers/nine/query9.c +++ b/src/gallium/state_trackers/nine/query9.c @@ -227,8 +227,13 @@ NineQuery9_GetData( struct NineQuery9 *This, wait_query_result = TRUE; } -/* Wine tests: D3DQUERYTYPE_TIMESTAMP always succeeds */ -wait_query_result |= This->type == D3DQUERYTYPE_TIMESTAMP; +/* The documention mentions no special case for D3DQUERYTYPE_TIMESTAMP. + * However Windows tests show that the query always succeeds when + * D3DGETDATA_FLUSH is specified. */ +if (This->type == D3DQUERYTYPE_TIMESTAMP && +(dwGetDataFlags & D3DGETDATA_FLUSH)) +wait_query_result = TRUE; + /* Note: We ignore dwGetDataFlags, because get_query_result will * flush automatically if needed */ -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/16] st/nine: Fix computation of const_used_size
From: Xavier Bouchoux Was sometimes too large for PS. Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux --- src/gallium/state_trackers/nine/nine_shader.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 1f1f7c8..7beb1ab 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -3082,6 +3082,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) HRESULT hr = D3D_OK; const unsigned processor = tgsi_processor_from_type(info->type); unsigned s, slot_max; +unsigned max_const_f; user_assert(processor != ~0, D3DERR_INVALIDCALL); @@ -3221,11 +3222,12 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) if (tx->indirect_const_access) /* vs only */ info->const_float_slots = device->max_vs_const_f; +max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f; slot_max = info->const_bool_slots > 0 ? - device->max_vs_const_f + NINE_MAX_CONST_I - + info->const_bool_slots : + max_const_f + NINE_MAX_CONST_I + + (info->const_bool_slots+3)/4 : info->const_int_slots > 0 ? - device->max_vs_const_f + info->const_int_slots : + max_const_f + info->const_int_slots : info->const_float_slots; info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */ -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 00/16] Some Gallium Nine fixes
Here are 16 various fixes for Gallium Nine. They should be independent from each other. Next is a serie of 11 patches to rework the handling of the MANAGED pool for textures. This is a pool for which all textures have a ram backing, which is uploaded when needed to gpu memory. They have several restrictions on their use. These patches clean up the implementation (more comments, remove handling of things that were forbidden by spec, some fixes). This serie solves issues we had with these textures on some games (black textures, crash without the 'hack' we had pushed some time ago). This serie should be harder to review, while the 16 independent patches should be easier to review. Axel Davy (9): st/nine: Handle special LIT case st/nine: Fix wrong assert in nine_shader st/nine: Workaround barycentrics issue on some cards st/nine: Fix comment in update_viewport st/nine: Do not advertise D3DDEVCAPS_TEXTURESYSTEMMEMORY st/nine: Rework update_vertex_buffers st/nine: Add debug warning when application uses sw processing st/nine: Fix update_vertex_elements bad rebase st/nine: Rework texture data allocation Patrick Rudolph (1): st/nine: NineDevice9_Clear skip fastpath for bigger depth-buffers Tiziano Bacocco (1): st/nine: Change x86 FPU Control word on device creation as on wined3d and windows Xavier Bouchoux (5): st/nine: Clear struct pipe_blit_info before use. st/nine: Fix D3DQUERYTYPE_TIMESTAMPFREQ query st/nine: Improve D3DQUERYTYPE_TIMESTAMP st/nine: Ignore D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING when D3DUSAGE_RENDERTARGET is specified st/nine: Fix computation of const_used_size src/gallium/state_trackers/nine/adapter9.c | 5 ++- src/gallium/state_trackers/nine/cubetexture9.c | 45 +++- src/gallium/state_trackers/nine/cubetexture9.h | 1 + src/gallium/state_trackers/nine/device9.c| 53 ++-- src/gallium/state_trackers/nine/device9.h| 4 ++ src/gallium/state_trackers/nine/indexbuffer9.c | 14 --- src/gallium/state_trackers/nine/nine_pipe.h | 34 +++ src/gallium/state_trackers/nine/nine_shader.c| 35 src/gallium/state_trackers/nine/nine_state.c | 47 + src/gallium/state_trackers/nine/query9.c | 19 +++-- src/gallium/state_trackers/nine/surface9.c | 50 +++--- src/gallium/state_trackers/nine/surface9.h | 1 - src/gallium/state_trackers/nine/swapchain9.c | 2 + src/gallium/state_trackers/nine/texture9.c | 23 +- src/gallium/state_trackers/nine/texture9.h | 1 + src/gallium/state_trackers/nine/vertexbuffer9.c | 4 +- src/gallium/state_trackers/nine/volumetexture9.c | 3 ++ 17 files changed, 254 insertions(+), 87 deletions(-) -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/16] st/nine: Clear struct pipe_blit_info before use.
From: Xavier Bouchoux render_condition_enable was uninitialized. Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux --- src/gallium/state_trackers/nine/device9.c| 2 ++ src/gallium/state_trackers/nine/swapchain9.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 1430ca5..43eb7e6 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -1342,6 +1342,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This, (pSourceRect->left <= pSourceRect->right && pSourceRect->top <= pSourceRect->bottom), D3DERR_INVALIDCALL); +memset(&blit, 0, sizeof(blit)); blit.dst.resource = dst_res; blit.dst.level = dst->level; blit.dst.box.z = dst->layer; @@ -2130,6 +2131,7 @@ NineDevice9_ResolveZ( struct NineDevice9 *This ) desc = util_format_description(dst->format); user_assert(desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS, D3DERR_INVALIDCALL); +memset(&blit, 0, sizeof(blit)); blit.src.resource = src; blit.src.level = 0; blit.src.format = src->format; diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c index 296d6dc..f91b3be 100644 --- a/src/gallium/state_trackers/nine/swapchain9.c +++ b/src/gallium/state_trackers/nine/swapchain9.c @@ -554,6 +554,7 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, struct pipe_resource *r struct pipe_blit_info blit; if (device->cursor.software && device->cursor.visible && device->cursor.w) { +memset(&blit, 0, sizeof(blit)); blit.src.resource = device->cursor.image; blit.src.level = 0; blit.src.format = device->cursor.image->format; @@ -677,6 +678,7 @@ present( struct NineSwapChain9 *This, handle_draw_cursor_and_hud(This, resource); if (This->present_buffers) { +memset(&blit, 0, sizeof(blit)); blit.src.resource = resource; blit.src.level = 0; blit.src.format = resource->format; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 13/16] st/nine: Rework update_vertex_buffers
Previous code was trying to optimise to call set_vertex_buffers on big packets, and thus avoids as many calls as possible. However in practice doing so won't be faster (drivers implement set_vertex_buffers by a loop over the buffers we want to bind) When we want to unbind a buffer, we were calling set_vertex_buffers on a buffer with vtxbuf->buffer = NULL. It works on some drivers, but not on all of them, because it isn't in Gallium spec. This patch fixes that. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/nine_state.c | 15 --- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 27800c6..6e01955 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -631,7 +631,6 @@ update_vertex_buffers(struct NineDevice9 *device) uint32_t mask = state->changed.vtxbuf; unsigned i; unsigned start; -unsigned count = 0; DBG("mask=%x\n", mask); @@ -650,18 +649,12 @@ update_vertex_buffers(struct NineDevice9 *device) for (i = 0; mask; mask >>= 1, ++i) { if (mask & 1) { -if (!count) -start = i; -++count; -} else { -if (count) -pipe->set_vertex_buffers(pipe, start, count, - &state->vtxbuf[start]); -count = 0; +if (state->vtxbuf[i].buffer) +pipe->set_vertex_buffers(pipe, i, 1, &state->vtxbuf[i]); +else +pipe->set_vertex_buffers(pipe, i, 1, NULL); } } -if (count) -pipe->set_vertex_buffers(pipe, start, count, &state->vtxbuf[start]); state->changed.vtxbuf = 0; } -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/16] st/nine: Handle special LIT case
Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/nine_shader.c | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 2ba625e..0fd3d37 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -2040,6 +2040,23 @@ DECL_SPECIAL(LOG) return D3D_OK; } +DECL_SPECIAL(LIT) +{ +struct ureg_program *ureg = tx->ureg; +struct ureg_dst tmp = tx_scratch(tx); +struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); +struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); +ureg_LIT(ureg, tmp, src); +/* d3d9 LIT is the same than gallium LIT. One difference is that d3d9 + * states that dst.z is 0 when src.y <= 0. Gallium definition can assign + * it 0^0 if src.w=0, which value is driver dependent. */ +ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), + ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)), + ureg_src(tmp), ureg_imm1f(ureg, 0.0f)); +ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp)); +return D3D_OK; +} + DECL_SPECIAL(NRM) { struct ureg_program *ureg = tx->ureg; @@ -2543,7 +2560,7 @@ struct sm1_op_info inst_table[] = _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */ _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */ _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */ -_OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */ +_OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */ _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */ _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */ _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */ -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/11] st/nine: Some D3DUSAGE_AUTOGENMIPMAP fixes
Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/device9.c | 14 +- src/gallium/state_trackers/nine/surface9.c | 2 +- src/gallium/state_trackers/nine/surface9.h | 3 +++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 698c4e3..2bb1398 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -1198,6 +1198,13 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, if (dstb->base.usage & D3DUSAGE_AUTOGENMIPMAP) { /* Only the first level is updated, the others regenerated. */ last_level = 0; +/* if the source has D3DUSAGE_AUTOGENMIPMAP, we have to ignore + * the sublevels, thus level 0 has to match */ +user_assert(!(srcb->base.usage & D3DUSAGE_AUTOGENMIPMAP) || +(srcb->base.info.width0 == dstb->base.info.width0 && + srcb->base.info.height0 == dstb->base.info.height0 && + srcb->base.info.depth0 == dstb->base.info.depth0), +D3DERR_INVALIDCALL); } else { user_assert(!(srcb->base.usage & D3DUSAGE_AUTOGENMIPMAP), D3DERR_INVALIDCALL); } @@ -1260,8 +1267,10 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, assert(!"invalid texture type"); } -if (dstb->base.usage & D3DUSAGE_AUTOGENMIPMAP) +if (dstb->base.usage & D3DUSAGE_AUTOGENMIPMAP) { +dstb->dirty_mip = TRUE; NineBaseTexture9_GenerateMipSubLevels(dstb); +} return D3D_OK; } @@ -1493,6 +1502,9 @@ NineDevice9_StretchRect( struct NineDevice9 *This, &blit.src.box); } +/* Communicate the container it needs to update sublevels - if apply */ +NineSurface9_MarkContainerDirty(dst); + return D3D_OK; } diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index 2706efa..de81e0d 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -231,7 +231,7 @@ NineSurface9_GetContainer( struct NineSurface9 *This, return hr; } -static INLINE void +void NineSurface9_MarkContainerDirty( struct NineSurface9 *This ) { if (This->texture) { diff --git a/src/gallium/state_trackers/nine/surface9.h b/src/gallium/state_trackers/nine/surface9.h index aa586f3..0e527fe 100644 --- a/src/gallium/state_trackers/nine/surface9.h +++ b/src/gallium/state_trackers/nine/surface9.h @@ -83,6 +83,9 @@ NineSurface9_dtor( struct NineSurface9 *This ); /*** Nine private ***/ +void +NineSurface9_MarkContainerDirty( struct NineSurface9 *This ); + struct pipe_surface * NineSurface9_CreatePipeSurface( struct NineSurface9 *This, const int sRGB ); -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/11] st/nine: D3DUSAGE_AUTOGENMIPMAP is forbidden for volumes
Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/volume9.c| 3 --- src/gallium/state_trackers/nine/volumetexture9.c | 14 +- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c index 8c9f148..b34ee07 100644 --- a/src/gallium/state_trackers/nine/volume9.c +++ b/src/gallium/state_trackers/nine/volume9.c @@ -169,9 +169,6 @@ NineVolume9_MarkContainerDirty( struct NineVolume9 *This ) assert(tex); if (This->desc.Pool == D3DPOOL_MANAGED) tex->managed.dirty = TRUE; -else -if (This->desc.Usage & D3DUSAGE_AUTOGENMIPMAP) -tex->dirty_mip = TRUE; BASETEX_REGISTER_UPDATE(tex); } diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c index 8f1de13..1193e12 100644 --- a/src/gallium/state_trackers/nine/volumetexture9.c +++ b/src/gallium/state_trackers/nine/volumetexture9.c @@ -51,14 +51,10 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This, /* An IDirect3DVolume9 cannot be bound as a render target can it ? */ user_assert(!(Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)), D3DERR_INVALIDCALL); -user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP) || -(Pool != D3DPOOL_SYSTEMMEM && Levels <= 1), D3DERR_INVALIDCALL); +user_assert(!(Usage & D3DUSAGE_AUTOGENMIPMAP), D3DERR_INVALIDCALL); user_assert(!pSharedHandle, D3DERR_INVALIDCALL); /* TODO */ -if (Usage & D3DUSAGE_AUTOGENMIPMAP) -Levels = 0; - pf = d3d9_to_pipe_format_checked(screen, Format, PIPE_TEXTURE_3D, 0, PIPE_BIND_SAMPLER_VIEW, FALSE); if (pf == PIPE_FORMAT_NONE) @@ -145,8 +141,6 @@ NineVolumeTexture9_GetLevelDesc( struct NineVolumeTexture9 *This, D3DVOLUME_DESC *pDesc ) { user_assert(Level <= This->base.base.info.last_level, D3DERR_INVALIDCALL); -user_assert(Level == 0 || !(This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP), -D3DERR_INVALIDCALL); *pDesc = This->volumes[Level]->desc; @@ -159,8 +153,6 @@ NineVolumeTexture9_GetVolumeLevel( struct NineVolumeTexture9 *This, IDirect3DVolume9 **ppVolumeLevel ) { user_assert(Level <= This->base.base.info.last_level, D3DERR_INVALIDCALL); -user_assert(Level == 0 || !(This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP), -D3DERR_INVALIDCALL); NineUnknown_AddRef(NineUnknown(This->volumes[Level])); *ppVolumeLevel = (IDirect3DVolume9 *)This->volumes[Level]; @@ -179,8 +171,6 @@ NineVolumeTexture9_LockBox( struct NineVolumeTexture9 *This, This, Level, pLockedVolume, pBox, Flags); user_assert(Level <= This->base.base.info.last_level, D3DERR_INVALIDCALL); -user_assert(Level == 0 || !(This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP), -D3DERR_INVALIDCALL); return NineVolume9_LockBox(This->volumes[Level], pLockedVolume, pBox, Flags); @@ -204,8 +194,6 @@ NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This, DBG("This=%p pDirtybox=%p\n", This, pDirtyBox); if (This->base.base.pool != D3DPOOL_MANAGED) { -if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) -This->base.dirty_mip = TRUE; return D3D_OK; } This->base.managed.dirty = TRUE; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/16] st/nine: Workaround barycentrics issue on some cards
Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/device9.c| 4 +++- src/gallium/state_trackers/nine/device9.h| 4 src/gallium/state_trackers/nine/nine_state.c | 24 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 43eb7e6..9ca1bb9 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -310,8 +310,10 @@ NineDevice9_ctor( struct NineDevice9 *This, return E_OUTOFMEMORY; if (strstr(pScreen->get_name(pScreen), "AMD") || -strstr(pScreen->get_name(pScreen), "ATI")) +strstr(pScreen->get_name(pScreen), "ATI")) { This->prefer_user_constbuf = TRUE; +This->driver_bugs.buggy_barycentrics = TRUE; +} tmpl.target = PIPE_BUFFER; tmpl.format = PIPE_FORMAT_R8_UNORM; diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h index f412088..d662f83 100644 --- a/src/gallium/state_trackers/nine/device9.h +++ b/src/gallium/state_trackers/nine/device9.h @@ -118,6 +118,10 @@ struct NineDevice9 boolean ps_integer; } driver_caps; +struct { +boolean buggy_barycentrics; +} driver_bugs; + struct u_upload_mgr *upload; struct nine_range_pool range_pool; diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 495cc86..6c7eab3 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -150,6 +150,30 @@ update_viewport(struct NineDevice9 *device) pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y; pvport.translate[2] = vport->MinZ; +/* We found R600 and SI cards have some imprecision + * on the barycentric coordinates used for interpolation. + * Some shaders rely on having something precise. + * We found that the proprietary driver has the imprecision issue, + * except when the render target width and height are powers of two. + * It is using some sort of workaround for these cases + * which covers likely all the cases the applications rely + * on something precise. + * We haven't found the workaround, but it seems like it's better + * for applications if the imprecision is biased towards infinity + * instead of -infinity (which is what measured). So shift slightly + * the viewport: not enough to change rasterization result (in particular + * for multisampling), but enough to make the imprecision biased + * towards infinity. We do this shift only if render target width and + * height are powers of two. + * Solves 'red shadows' bug on UE3 games. + */ +if (device->driver_bugs.buggy_barycentrics && +((vport->Width & (vport->Width-1)) == 0) && +((vport->Height & (vport->Height-1)) == 0)) { +pvport.translate[0] -= 1.0f / 128.0f; +pvport.translate[1] -= 1.0f / 128.0f; +} + pipe->set_viewport_states(pipe, 0, 1, &pvport); } -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/16] st/nine: Do not advertise D3DDEVCAPS_TEXTURESYSTEMMEMORY
No major vendor advertises it, and we weren't supporting it. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/adapter9.c | 2 +- src/gallium/state_trackers/nine/device9.c | 10 ++ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c index 069cc03..1d75155 100644 --- a/src/gallium/state_trackers/nine/adapter9.c +++ b/src/gallium/state_trackers/nine/adapter9.c @@ -545,7 +545,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, /*D3DDEVCAPS_RTPATCHHANDLEZERO |*/ /*D3DDEVCAPS_SEPARATETEXTUREMEMORIES |*/ /*D3DDEVCAPS_TEXTURENONLOCALVIDMEM |*/ - D3DDEVCAPS_TEXTURESYSTEMMEMORY | + /* D3DDEVCAPS_TEXTURESYSTEMMEMORY |*/ D3DDEVCAPS_TEXTUREVIDEOMEMORY | D3DDEVCAPS_TLVERTEXSYSTEMMEMORY | D3DDEVCAPS_TLVERTEXVIDEOMEMORY; diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 9ca1bb9..1a776a7 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -2406,14 +2406,8 @@ NineDevice9_SetTexture( struct NineDevice9 *This, Stage == D3DDMAPSAMPLER || (Stage >= D3DVERTEXTEXTURESAMPLER0 && Stage <= D3DVERTEXTEXTURESAMPLER3), D3DERR_INVALIDCALL); -user_assert(!tex || tex->base.pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL); - -if (unlikely(tex && tex->base.pool == D3DPOOL_SYSTEMMEM)) { -/* TODO: Currently not implemented. Better return error - * with message telling what's wrong */ -ERR("This=%p D3DPOOL_SYSTEMMEM not implemented for SetTexture\n", This); -user_assert(tex->base.pool != D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL); -} +user_assert(!tex || (tex->base.pool != D3DPOOL_SCRATCH && +tex->base.pool != D3DPOOL_SYSTEMMEM), D3DERR_INVALIDCALL); if (Stage >= D3DDMAPSAMPLER) Stage = Stage - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/11] st/nine: Enforce LOD 0 for D3DUSAGE_AUTOGENMIPMAP
For D3DUSAGE_AUTOGENMIPMAP textures, applications can only lock/copy from/get surface descriptor for/etc the first level. Thus it makes sense to restrict the LOD to 0, and use only the first level to generate the sublevels. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/basetexture9.c | 8 +--- src/gallium/state_trackers/nine/texture9.c | 5 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index 7315e78..b868019 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -104,12 +104,15 @@ NineBaseTexture9_SetLOD( struct NineBaseTexture9 *This, DWORD LODNew ) { DWORD old = This->managed.lod; +DWORD max_level; DBG("This=%p LODNew=%d\n", This, LODNew); user_assert(This->base.pool == D3DPOOL_MANAGED, 0); -This->managed.lod = MIN2(LODNew, This->base.info.last_level); +max_level = (This->base.usage & D3DUSAGE_AUTOGENMIPMAP) ? +0 : This->base.info.last_level; +This->managed.lod = MIN2(LODNew, max_level); if (This->managed.lod != old && This->bind_count && LIST_IS_EMPTY(&This->list)) list_add(&This->list, &This->base.base.device->update_textures); @@ -172,7 +175,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) assert(This->base.pool == D3DPOOL_MANAGED); if (This->base.usage & D3DUSAGE_AUTOGENMIPMAP) -last_level = 0; /* TODO: What if level 0 is not resident ? */ +last_level = 0; update_lod = This->managed.lod_resident != This->managed.lod; if (!update_lod && !This->managed.dirty) @@ -366,7 +369,6 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) if (This->base.usage & D3DUSAGE_AUTOGENMIPMAP) This->dirty_mip = TRUE; -/* TODO: if dirty only because of lod change, only generate added levels */ DBG("DONE, generate mip maps = %i\n", This->dirty_mip); return D3D_OK; diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c index e0aec3c..4ce8248 100644 --- a/src/gallium/state_trackers/nine/texture9.c +++ b/src/gallium/state_trackers/nine/texture9.c @@ -139,6 +139,11 @@ NineTexture9_ctor( struct NineTexture9 *This, if (pSharedHandle && *pSharedHandle) { /* Pool == D3DPOOL_SYSTEMMEM */ user_buffer = (void *)*pSharedHandle; } else if (Pool != D3DPOOL_DEFAULT) { +/* TODO: For D3DUSAGE_AUTOGENMIPMAP, it is likely we only have to + * allocate only for the first level, since it is the only lockable + * level. Check apps don't crash if we allocate smaller buffer (some + * apps access sublevels of texture even if they locked only first + * level) */ user_buffer = MALLOC(nine_format_get_alloc_size(pf, Width, Height, info->last_level)); This->managed_buffer = user_buffer; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/11] st/nine: Remove impossible cases with Managed textures
Copying to/from a Managed texture is forbidden. Rendering to a Managed texture is forbidden. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/surface9.c | 23 +-- src/gallium/state_trackers/nine/volume9.c | 19 +++ 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index ec3719e..e934941 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -165,8 +165,7 @@ NineSurface9_CreatePipeSurface( struct NineSurface9 *This, const int sRGB ) struct pipe_surface templ; enum pipe_format srgb_format; -assert(This->desc.Pool == D3DPOOL_DEFAULT || - This->desc.Pool == D3DPOOL_MANAGED); +assert(This->desc.Pool == D3DPOOL_DEFAULT); assert(resource); srgb_format = util_format_srgb(resource->format); @@ -503,6 +502,9 @@ NineSurface9_CopySurface( struct NineSurface9 *This, DBG("This=%p From=%p pDestPoint=%p pSourceRect=%p\n", This, From, pDestPoint, pSourceRect); +assert(This->base.pool != D3DPOOL_MANAGED && + From->base.pool != D3DPOOL_MANAGED); + user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL); dst_box.x = pDestPoint ? pDestPoint->x : 0; @@ -543,20 +545,6 @@ NineSurface9_CopySurface( struct NineSurface9 *This, dst_box.width = src_box.width; dst_box.height = src_box.height; -/* Don't copy to device memory of managed resources. - * We don't want to download it back again later. - */ -if (This->base.pool == D3DPOOL_MANAGED) -r_dst = NULL; - -/* Don't copy from stale device memory of managed resources. - * Also, don't copy between system and device if we don't have to. - */ -if (From->base.pool == D3DPOOL_MANAGED) { -if (!r_dst || NineSurface9_IsDirty(From)) -r_src = NULL; -} - /* check source block align for compressed textures */ if (util_format_is_compressed(From->base.info.format) && ((src_box.width != From->desc.Width) || @@ -622,8 +610,7 @@ NineSurface9_CopySurface( struct NineSurface9 *This, From->stride, src_box.x, src_box.y); } -if (This->base.pool == D3DPOOL_DEFAULT || -This->base.pool == D3DPOOL_MANAGED) +if (This->base.pool == D3DPOOL_DEFAULT) NineSurface9_MarkContainerDirty(This); if (!r_dst && This->base.resource) NineSurface9_AddDirtyRect(This, &dst_box); diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c index 24d5d53..8c9f148 100644 --- a/src/gallium/state_trackers/nine/volume9.c +++ b/src/gallium/state_trackers/nine/volume9.c @@ -355,6 +355,8 @@ NineVolume9_CopyVolume( struct NineVolume9 *This, DBG("This=%p From=%p dstx=%u dsty=%u dstz=%u pSrcBox=%p\n", This, From, dstx, dsty, dstz, pSrcBox); +assert(This->desc.Pool != D3DPOOL_MANAGED && + From->desc.Pool != D3DPOOL_MANAGED); user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL); dst_box.x = dstx; @@ -392,20 +394,6 @@ NineVolume9_CopyVolume( struct NineVolume9 *This, dst_box.height = src_box.height; dst_box.depth = src_box.depth; -/* Don't copy to device memory of managed resources. - * We don't want to download it back again later. - */ -if (This->desc.Pool == D3DPOOL_MANAGED) -r_dst = NULL; - -/* Don't copy from stale device memory of managed resources. - * Also, don't copy between system and device if we don't have to. - */ -if (From->desc.Pool == D3DPOOL_MANAGED) { -if (!r_dst || NineVolume9_IsDirty(From)) -r_src = NULL; -} - if (r_dst && r_src) { pipe->resource_copy_region(pipe, r_dst, This->level, @@ -452,8 +440,7 @@ NineVolume9_CopyVolume( struct NineVolume9 *This, src_box.x, src_box.y, src_box.z); } -if (This->desc.Pool == D3DPOOL_DEFAULT || -This->desc.Pool == D3DPOOL_MANAGED) +if (This->desc.Pool == D3DPOOL_DEFAULT) NineVolume9_MarkContainerDirty(This); if (!r_dst && This->resource) NineVolume9_AddDirtyRegion(This, &dst_box); -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/11] st/nine: Bound the dirty regions to resource size
Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/cubetexture9.c | 4 src/gallium/state_trackers/nine/texture9.c | 3 +++ src/gallium/state_trackers/nine/volumetexture9.c | 9 + 3 files changed, 16 insertions(+) diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c index d501d29..9c99b7c 100644 --- a/src/gallium/state_trackers/nine/cubetexture9.c +++ b/src/gallium/state_trackers/nine/cubetexture9.c @@ -272,6 +272,10 @@ NineCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This, rect_to_pipe_box_clamp(&box, pDirtyRect); u_box_union_2d(&This->dirty_rect[FaceType], &This->dirty_rect[FaceType], &box); +(void) u_box_clip_2d(&This->dirty_rect[FaceType], + &This->dirty_rect[FaceType], + This->base.base.info.width0, + This->base.base.info.height0); } return D3D_OK; } diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c index bee8095..e0aec3c 100644 --- a/src/gallium/state_trackers/nine/texture9.c +++ b/src/gallium/state_trackers/nine/texture9.c @@ -301,6 +301,9 @@ NineTexture9_AddDirtyRect( struct NineTexture9 *This, struct pipe_box box; rect_to_pipe_box_clamp(&box, pDirtyRect); u_box_union_2d(&This->dirty_rect, &This->dirty_rect, &box); +(void) u_box_clip_2d(&This->dirty_rect, &This->dirty_rect, + This->base.base.info.width0, + This->base.base.info.height0); } return D3D_OK; } diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c index b566173..8f1de13 100644 --- a/src/gallium/state_trackers/nine/volumetexture9.c +++ b/src/gallium/state_trackers/nine/volumetexture9.c @@ -223,6 +223,15 @@ NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This, struct pipe_box box; d3dbox_to_pipe_box(&box, pDirtyBox); u_box_union_3d(&This->dirty_box, &This->dirty_box, &box); +This->dirty_box.x = MAX2(This->dirty_box.x, 0); +This->dirty_box.y = MAX2(This->dirty_box.y, 0); +This->dirty_box.z = MAX2(This->dirty_box.z, 0); +This->dirty_box.width = MIN2(This->dirty_box.width, + This->base.base.info.width0 - This->dirty_box.x); +This->dirty_box.height = MIN2(This->dirty_box.height, + This->base.base.info.height0 - This->dirty_box.y); +This->dirty_box.depth = MIN2(This->dirty_box.depth, + This->base.base.info.depth0 - This->dirty_box.z); } return D3D_OK; } -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/11] st/nine: Rewrite Managed texture uploads
That part of the code was quite obscure. This new implementation tries to make it clearer by separating the differents parts, and commenting more. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/basetexture9.c | 221 ++--- 1 file changed, 125 insertions(+), 96 deletions(-) diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index 9b7976c..330827a 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -163,7 +163,8 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) { HRESULT hr; unsigned last_level = This->base.info.last_level; -unsigned l; +unsigned l, min_level_dirty = This->managed.lod; +BOOL update_lod; DBG("This=%p dirty=%i type=%s\n", This, This->managed.dirty, nine_D3DRTYPE_to_str(This->base.type)); @@ -173,7 +174,14 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) if (This->base.usage & D3DUSAGE_AUTOGENMIPMAP) last_level = 0; /* TODO: What if level 0 is not resident ? */ -if (This->managed.lod_resident != This->managed.lod) { +update_lod = This->managed.lod_resident != This->managed.lod; +if (!update_lod && !This->managed.dirty) +return D3D_OK; + +/* Allocate a new resource with the correct number of levels, + * Mark states for update, and tell the nine surfaces/volumes + * their new resource. */ +if (update_lod) { struct pipe_resource *res; DBG("updating LOD from %u to %u ...\n", This->managed.lod_resident, This->managed.lod); @@ -192,148 +200,169 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) state->changed.group |= NINE_STATE_TEXTURE; } +/* Allocate a new resource */ hr = NineBaseTexture9_CreatePipeResource(This, This->managed.lod_resident != -1); if (FAILED(hr)) return hr; res = This->base.resource; -if (This->managed.lod_resident == -1) /* no levels were resident */ +if (This->managed.lod_resident == -1) {/* no levels were resident */ +This->managed.dirty = FALSE; /* We are going to upload everything. */ This->managed.lod_resident = This->base.info.last_level + 1; +} + +if (This->base.type == D3DRTYPE_TEXTURE) { +struct NineTexture9 *tex = NineTexture9(This); + +/* last content (if apply) has been copied to the new resource. + * Note: We cannot render to surfaces of managed textures. + * Note2: the level argument passed is to get the level offset + * right when the texture is uploaded (the texture first level + * corresponds to This->managed.lod). + * Note3: We don't care about the value passed for the surfaces + * before This->managed.lod, negative with this implementation. */ +for (l = 0; l <= This->base.info.last_level; ++l) +NineSurface9_SetResource(tex->surfaces[l], res, l - This->managed.lod); +} else +if (This->base.type == D3DRTYPE_CUBETEXTURE) { +struct NineCubeTexture9 *tex = NineCubeTexture9(This); +unsigned z; + +for (l = 0; l <= This->base.info.last_level; ++l) { +for (z = 0; z < 6; ++z) +NineSurface9_SetResource(tex->surfaces[l * 6 + z], + res, l - This->managed.lod); +} +} else +if (This->base.type == D3DRTYPE_VOLUMETEXTURE) { +struct NineVolumeTexture9 *tex = NineVolumeTexture9(This); + +for (l = 0; l <= This->base.info.last_level; ++l) +NineVolume9_SetResource(tex->volumes[l], res, l - This->managed.lod); +} else { +assert(!"invalid texture type"); +} + +/* We are going to fully upload the new levels, + * no need to update dirty parts of the texture for these */ +min_level_dirty = MAX2(This->managed.lod, This->managed.lod_resident); +} + +/* Update dirty parts of the texture */ +if (This->managed.dirty) { +if (This->base.type == D3DRTYPE_TEXTURE) { +struct NineTexture9 *tex = NineTexture9(This); +struct pipe_box box; +box.z = 0; +box.depth = 1; + +DBG("TEXTURE: dirty rect=(%u,%u) (%ux%u)\n", +tex->dirty_rect.x, tex->dirty_rect.y, +tex->dirty_rect.width, tex->dirty_rect.height); + +/* Note: for l < min_level_dirty, the resource is + * either non-existing (and thus will be entirely re-uploaded + * if the lod changes) or going to have a full upload */ +if (tex->dirty_rect.width) { +for (l = min_level_dirty; l <= last_level; ++l) { +u_box_minify_2d(&box, &tex->dirty_rect, l);
[Mesa-dev] [PATCH 01/11] st/nine: Encapsulate variables for MANAGED resource
Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/basetexture9.c | 82 src/gallium/state_trackers/nine/basetexture9.h | 19 +++--- src/gallium/state_trackers/nine/cubetexture9.c | 2 +- src/gallium/state_trackers/nine/device9.c| 2 +- src/gallium/state_trackers/nine/nine_state.c | 2 +- src/gallium/state_trackers/nine/stateblock9.c| 2 +- src/gallium/state_trackers/nine/surface9.c | 2 +- src/gallium/state_trackers/nine/texture9.c | 2 +- src/gallium/state_trackers/nine/volume9.c| 2 +- src/gallium/state_trackers/nine/volumetexture9.c | 2 +- 10 files changed, 60 insertions(+), 57 deletions(-) diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index f2ca35b..75a305f 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -67,8 +67,8 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This, This->pipe = pParams->device->pipe; This->mipfilter = (Usage & D3DUSAGE_AUTOGENMIPMAP) ? D3DTEXF_LINEAR : D3DTEXF_NONE; -This->lod = 0; -This->lod_resident = -1; +This->managed.lod = 0; +This->managed.lod_resident = -1; /* When a depth buffer is sampled, it is for shadow mapping, except for * D3DFMT_INTZ, D3DFMT_DF16 and D3DFMT_DF24. * In addition D3DFMT_INTZ can be used for both texturing and depth buffering @@ -103,15 +103,15 @@ DWORD WINAPI NineBaseTexture9_SetLOD( struct NineBaseTexture9 *This, DWORD LODNew ) { -DWORD old = This->lod; +DWORD old = This->managed.lod; DBG("This=%p LODNew=%d\n", This, LODNew); user_assert(This->base.pool == D3DPOOL_MANAGED, 0); -This->lod = MIN2(LODNew, This->base.info.last_level); +This->managed.lod = MIN2(LODNew, This->base.info.last_level); -if (This->lod != old && This->bind_count && LIST_IS_EMPTY(&This->list)) +if (This->managed.lod != old && This->bind_count && LIST_IS_EMPTY(&This->list)) list_add(&This->list, &This->base.base.device->update_textures); return old; @@ -122,7 +122,7 @@ NineBaseTexture9_GetLOD( struct NineBaseTexture9 *This ) { DBG("This=%p\n", This); -return This->lod; +return This->managed.lod; } DWORD WINAPI @@ -165,7 +165,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) unsigned last_level = This->base.info.last_level; unsigned l; -DBG("This=%p dirty=%i type=%s\n", This, This->dirty, +DBG("This=%p dirty=%i type=%s\n", This, This->managed.dirty, nine_D3DRTYPE_to_str(This->base.type)); assert(This->base.pool == D3DPOOL_MANAGED); @@ -173,10 +173,10 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) if (This->base.usage & D3DUSAGE_AUTOGENMIPMAP) last_level = 0; /* TODO: What if level 0 is not resident ? */ -if (This->lod_resident != This->lod) { +if (This->managed.lod_resident != This->managed.lod) { struct pipe_resource *res; -DBG("updating LOD from %u to %u ...\n", This->lod_resident, This->lod); +DBG("updating LOD from %u to %u ...\n", This->managed.lod_resident, This->managed.lod); pipe_sampler_view_reference(&This->view[0], NULL); pipe_sampler_view_reference(&This->view[1], NULL); @@ -192,13 +192,13 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) state->changed.group |= NINE_STATE_TEXTURE; } -hr = NineBaseTexture9_CreatePipeResource(This, This->lod_resident != -1); +hr = NineBaseTexture9_CreatePipeResource(This, This->managed.lod_resident != -1); if (FAILED(hr)) return hr; res = This->base.resource; -if (This->lod_resident == -1) /* no levels were resident */ -This->lod_resident = This->base.info.last_level + 1; +if (This->managed.lod_resident == -1) /* no levels were resident */ +This->managed.lod_resident = This->base.info.last_level + 1; if (This->base.type == D3DRTYPE_TEXTURE) { struct NineTexture9 *tex = NineTexture9(This); @@ -207,15 +207,15 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) /* Mark uninitialized levels as dirty. */ box.x = box.y = box.z = 0; box.depth = 1; -for (l = This->lod; l < This->lod_resident; ++l) { +for (l = This->managed.lod; l < This->managed.lod_resident; ++l) { box.width = u_minify(This->base.info.width0, l); box.height = u_minify(This->base.info.height0, l); NineSurface9_AddDirtyRect(tex->surfaces[l], &box); } -for (l = 0; l < This->lod; ++l) +for (l = 0; l < This->managed.lod; ++l) NineSurface9_SetResource(tex->surfaces[l], NULL, -1); for (; l <= This->base.info.last_level; ++l) -
[Mesa-dev] [PATCH 03/11] st/nine: Simplify Surface9 Managed resources implementation
Remove the Surface9 code for dirty rects, used only for Managed resources. Instead convey the information to the parent texture. According to documentation, this seems to be the expected behaviour, and if documentation is wrong there, that's not a problem since it can only leads to more texture updates in corner cases. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/basetexture9.c | 15 ++-- src/gallium/state_trackers/nine/nine_pipe.h| 11 --- src/gallium/state_trackers/nine/surface9.c | 108 ++--- src/gallium/state_trackers/nine/surface9.h | 12 +-- 4 files changed, 53 insertions(+), 93 deletions(-) diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index 75a305f..9b7976c 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -279,16 +279,17 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) tex->dirty_rect.x, tex->dirty_rect.y, tex->dirty_rect.width, tex->dirty_rect.height); +/* Note: for l < This->managed.lod, the resource is + * non-existing, and thus will be entirely re-uploaded + * if This->managed.lod changes */ if (tex->dirty_rect.width) { -for (l = 0; l <= last_level; ++l) { +for (l = This->managed.lod; l <= last_level; ++l) { u_box_minify_2d(&box, &tex->dirty_rect, l); -NineSurface9_AddDirtyRect(tex->surfaces[l], &box); +NineSurface9_UploadSelf(tex->surfaces[l], &box); } memset(&tex->dirty_rect, 0, sizeof(tex->dirty_rect)); tex->dirty_rect.depth = 1; } -for (l = This->managed.lod; l <= last_level; ++l) -NineSurface9_UploadSelf(tex->surfaces[l]); } else if (This->base.type == D3DRTYPE_CUBETEXTURE) { struct NineCubeTexture9 *tex = NineCubeTexture9(This); @@ -303,15 +304,13 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) tex->dirty_rect[z].width, tex->dirty_rect[z].height); if (tex->dirty_rect[z].width) { -for (l = 0; l <= last_level; ++l) { +for (l = This->managed.lod; l <= last_level; ++l) { u_box_minify_2d(&box, &tex->dirty_rect[z], l); -NineSurface9_AddDirtyRect(tex->surfaces[l * 6 + z], &box); +NineSurface9_UploadSelf(tex->surfaces[l * 6 + z], &box); } memset(&tex->dirty_rect[z], 0, sizeof(tex->dirty_rect[z])); tex->dirty_rect[z].depth = 1; } -for (l = This->managed.lod; l <= last_level; ++l) -NineSurface9_UploadSelf(tex->surfaces[l * 6 + z]); } } else if (This->base.type == D3DRTYPE_VOLUMETEXTURE) { diff --git a/src/gallium/state_trackers/nine/nine_pipe.h b/src/gallium/state_trackers/nine/nine_pipe.h index 20916b7..d9c643c 100644 --- a/src/gallium/state_trackers/nine/nine_pipe.h +++ b/src/gallium/state_trackers/nine/nine_pipe.h @@ -108,17 +108,6 @@ rect_to_pipe_box_flip(struct pipe_box *dst, const RECT *src) } static INLINE void -nine_u_rect_to_pipe_box(struct pipe_box *dst, const struct u_rect *rect, int z) -{ -dst->x = rect->x0; -dst->y = rect->y0; -dst->z = z; -dst->width = rect->x1 - rect->x0; -dst->height = rect->y1 - rect->y0; -dst->depth = 1; -} - -static INLINE void rect_to_pipe_box_xy_only(struct pipe_box *dst, const RECT *src) { user_warn(src->left > src->right || src->top > src->bottom); diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index e934941..2706efa 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -22,7 +22,11 @@ #include "surface9.h" #include "device9.h" -#include "basetexture9.h" /* for marking dirty */ + +/* for marking dirty */ +#include "basetexture9.h" +#include "texture9.h" +#include "cubetexture9.h" #include "nine_helpers.h" #include "nine_pipe.h" @@ -145,7 +149,6 @@ NineSurface9_dtor( struct NineSurface9 *This ) { if (This->transfer) NineSurface9_UnlockRect(This); -NineSurface9_ClearDirtyRects(This); pipe_surface_reference(&This->surface[0], NULL); pipe_surface_reference(&This->surface[1], NULL); @@ -256,55 +259,38 @@ NineSurface9_GetDesc( struct NineSurface9 *This, return D3D_OK; } -/* Wine just keeps a single directy rect and expands it to cover all - * the dirty rects ever added. - * We'll keep 2, and expand the one that fits better, just for fun. - */ +/* Add the dirty rects to the source texture */ INLINE void NineSurface9_AddDirtyRect( struct NineSurface9 *This, const struct pipe_box *box ) { -float area[2]; -struct u_rect rect, cover_a, cover_b; +RECT dirty_rect; DBG("
[Mesa-dev] [PATCH 14/16] st/nine: Add debug warning when application uses sw processing
Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/cubetexture9.c | 3 +++ src/gallium/state_trackers/nine/device9.c| 5 + src/gallium/state_trackers/nine/indexbuffer9.c | 14 -- src/gallium/state_trackers/nine/texture9.c | 5 + src/gallium/state_trackers/nine/vertexbuffer9.c | 4 +++- src/gallium/state_trackers/nine/volumetexture9.c | 3 +++ 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c index e250315..d81cc70 100644 --- a/src/gallium/state_trackers/nine/cubetexture9.c +++ b/src/gallium/state_trackers/nine/cubetexture9.c @@ -93,6 +93,9 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, PIPE_BIND_TRANSFER_READ | PIPE_BIND_TRANSFER_WRITE; } +if (Usage & D3DUSAGE_SOFTWAREPROCESSING) +DBG("Application asked for Software Vertex Processing, " +"but this is unimplemented\n"); This->surfaces = CALLOC(6 * (info->last_level + 1), sizeof(*This->surfaces)); if (!This->surfaces) diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 4ef02bb..924f755 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -183,6 +183,11 @@ NineDevice9_ctor( struct NineDevice9 *This, if (!(This->params.BehaviorFlags & D3DCREATE_FPU_PRESERVE)) nine_setup_fpu(); +if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) +DBG("Application asked full Software Vertex Processing. Ignoring.\n"); +if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) +DBG("Application asked mixed Software Vertex Processing. Ignoring.\n"); + This->pipe = This->screen->context_create(This->screen, NULL); if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */ diff --git a/src/gallium/state_trackers/nine/indexbuffer9.c b/src/gallium/state_trackers/nine/indexbuffer9.c index 48553fd..860313b 100644 --- a/src/gallium/state_trackers/nine/indexbuffer9.c +++ b/src/gallium/state_trackers/nine/indexbuffer9.c @@ -63,12 +63,14 @@ NineIndexBuffer9_ctor( struct NineIndexBuffer9 *This, if (pDesc->Pool == D3DPOOL_SYSTEMMEM) info->usage = PIPE_USAGE_STAGING; -/* if (This->desc.Usage & D3DUSAGE_DONOTCLIP) { } */ -/* if (This->desc.Usage & D3DUSAGE_NONSECURE) { } */ -/* if (This->desc.Usage & D3DUSAGE_NPATCHES) { } */ -/* if (This->desc.Usage & D3DUSAGE_POINTS) { } */ -/* if (This->desc.Usage & D3DUSAGE_RTPATCHES) { } */ -/* if (This->desc.Usage & D3DUSAGE_SOFTWAREPROCESSING) { } */ +/* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */ +/* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */ +/* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */ +/* if (pDesc->Usage & D3DUSAGE_POINTS) { } */ +/* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */ +if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING) +DBG("Application asked for Software Vertex Processing, " +"but this is unimplemented\n"); info->height0 = 1; info->depth0 = 1; diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c index 0dbdd37..78a632f 100644 --- a/src/gallium/state_trackers/nine/texture9.c +++ b/src/gallium/state_trackers/nine/texture9.c @@ -125,6 +125,11 @@ NineTexture9_ctor( struct NineTexture9 *This, PIPE_BIND_TRANSFER_READ | PIPE_BIND_TRANSFER_WRITE; } + +if (Usage & D3DUSAGE_SOFTWAREPROCESSING) +DBG("Application asked for Software Vertex Processing, " +"but this is unimplemented\n"); + if (pSharedHandle) info->bind |= PIPE_BIND_SHARED; diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.c b/src/gallium/state_trackers/nine/vertexbuffer9.c index 11cc60f..8e2eaaf 100644 --- a/src/gallium/state_trackers/nine/vertexbuffer9.c +++ b/src/gallium/state_trackers/nine/vertexbuffer9.c @@ -76,7 +76,9 @@ NineVertexBuffer9_ctor( struct NineVertexBuffer9 *This, /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */ /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */ /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */ -/* if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING) { } */ +if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING) +DBG("Application asked for Software Vertex Processing, " +"but this is unimplemented\n"); /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */ info->height0 = 1; diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c index 6c0073f..08fdd3d 100644 --- a/src/gallium/state_trackers/nine/volumetexture9.c +++ b/src/gallium/state_trackers/nine/volumetexture9.c @@ -90,6 +90,9 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This, PIPE_BIND_TRANSFER_READ | PIPE_BIND_TRANSFER_WRITE;
[Mesa-dev] [PATCH 16/16] st/nine: Rework texture data allocation
Some applications assume the memory for multilevel textures is allocated per continuous blocks. This patch implements that behaviour. Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/cubetexture9.c | 42 -- src/gallium/state_trackers/nine/cubetexture9.h | 1 + src/gallium/state_trackers/nine/nine_pipe.h| 34 ++ src/gallium/state_trackers/nine/surface9.c | 50 +- src/gallium/state_trackers/nine/surface9.h | 1 - src/gallium/state_trackers/nine/texture9.c | 18 -- src/gallium/state_trackers/nine/texture9.h | 1 + 7 files changed, 110 insertions(+), 37 deletions(-) diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c index d81cc70..34ef4ec 100644 --- a/src/gallium/state_trackers/nine/cubetexture9.c +++ b/src/gallium/state_trackers/nine/cubetexture9.c @@ -40,8 +40,9 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, struct pipe_resource *info = &This->base.base.info; struct pipe_screen *screen = pParams->device->screen; enum pipe_format pf; -unsigned i; +unsigned i, l, f, offset, face_size = 0; D3DSURFACE_DESC sfdesc; +void *p; HRESULT hr; DBG("This=%p pParams=%p EdgeLength=%u Levels=%u Usage=%d " @@ -97,6 +98,14 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, DBG("Application asked for Software Vertex Processing, " "but this is unimplemented\n"); +if (Pool != D3DPOOL_DEFAULT) { +face_size = nine_format_get_alloc_size(pf, EdgeLength, EdgeLength, + info->last_level); +This->managed_buffer = MALLOC(6 * face_size); +if (!This->managed_buffer) +return E_OUTOFMEMORY; +} + This->surfaces = CALLOC(6 * (info->last_level + 1), sizeof(*This->surfaces)); if (!This->surfaces) return E_OUTOFMEMORY; @@ -117,16 +126,26 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, sfdesc.Pool = Pool; sfdesc.MultiSampleType = D3DMULTISAMPLE_NONE; sfdesc.MultiSampleQuality = 0; -for (i = 0; i < (info->last_level + 1) * 6; ++i) { -sfdesc.Width = sfdesc.Height = u_minify(EdgeLength, i / 6); - -hr = NineSurface9_new(This->base.base.base.device, NineUnknown(This), - This->base.base.resource, NULL, D3DRTYPE_CUBETEXTURE, - i / 6, i % 6, - &sfdesc, &This->surfaces[i]); -if (FAILED(hr)) -return hr; +/* We allocate the memory for the surfaces as continous blocks. + * This is the expected behaviour, however we haven't tested for + * cube textures in which order the faces/levels should be in memory + */ +for (f = 0; f < 6; f++) { +offset = f * face_size; +for (l = 0; l < info->last_level + 1; l++) { +sfdesc.Width = sfdesc.Height = u_minify(EdgeLength, l); +p = This->managed_buffer ? This->managed_buffer + offset + +nine_format_get_p_offset(pf, EdgeLength, EdgeLength, l) : +NULL; + +hr = NineSurface9_new(This->base.base.base.device, NineUnknown(This), + This->base.base.resource, p, D3DRTYPE_CUBETEXTURE, + l, f, &sfdesc, &This->surfaces[f + 6 * l]); +if (FAILED(hr)) +return hr; +} } + for (i = 0; i < 6; ++i) /* width = 0 means empty, depth stays 1 */ This->dirty_rect[i].depth = 1; @@ -146,6 +165,9 @@ NineCubeTexture9_dtor( struct NineCubeTexture9 *This ) FREE(This->surfaces); } +if (This->managed_buffer) +FREE(This->managed_buffer); + NineBaseTexture9_dtor(&This->base); } diff --git a/src/gallium/state_trackers/nine/cubetexture9.h b/src/gallium/state_trackers/nine/cubetexture9.h index e8594d3..ee7e275 100644 --- a/src/gallium/state_trackers/nine/cubetexture9.h +++ b/src/gallium/state_trackers/nine/cubetexture9.h @@ -31,6 +31,7 @@ struct NineCubeTexture9 struct NineBaseTexture9 base; struct NineSurface9 **surfaces; struct pipe_box dirty_rect[6]; /* covers all mip levels */ +uint8_t *managed_buffer; }; static INLINE struct NineCubeTexture9 * NineCubeTexture9( void *data ) diff --git a/src/gallium/state_trackers/nine/nine_pipe.h b/src/gallium/state_trackers/nine/nine_pipe.h index b8e728e..20916b7 100644 --- a/src/gallium/state_trackers/nine/nine_pipe.h +++ b/src/gallium/state_trackers/nine/nine_pipe.h @@ -673,4 +673,38 @@ d3dtexturefiltertype_to_pipe_tex_mipfilter(D3DTEXTUREFILTERTYPE filter) } } +static INLINE unsigned nine_format_get_stride(enum pipe_format format, + unsigned width) +{ +unsigned stride = util_format_get_stride(format, width); + +return align(stride, 4); +} + +static INLINE unsigned nine_format_
[Mesa-dev] [PATCH 03/16] st/nine: NineDevice9_Clear skip fastpath for bigger depth-buffers
From: Patrick Rudolph This adds an additional check to make sure the bound depth-buffer doesn't exceed the rendertarget size when clearing depth and color buffer at once. D3D9 clears only a rectangle with the same dimensions as the viewport, leaving other parts of the depth-buffer intact. This fixes failing WINE test visual.c:depth_buffer_test() Signed-off-by: Patrick Rudolph Signed-off-by: Axel Davy --- src/gallium/state_trackers/nine/device9.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 78e148b..1430ca5 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -1756,12 +1756,21 @@ NineDevice9_Clear( struct NineDevice9 *This, rt_mask |= 1 << i; } +/* fast path, clears everything at once */ if (!Count && (!(bufs & PIPE_CLEAR_COLOR) || (rt_mask == This->state.rt_mask)) && -rect.x1 == 0 && rect.x2 >= This->state.fb.width && -rect.y1 == 0 && rect.y2 >= This->state.fb.height) { -/* fast path, clears everything at once */ -DBG("fast path\n"); +rect.x1 == 0 && rect.y1 == 0 && +/* Case we clear only render target. Check clear region vs rt. */ +((!(bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) && + rect.x2 >= This->state.fb.width && + rect.y2 >= This->state.fb.height) || +/* Case we clear depth buffer (and eventually rt too). + * depth buffer size is always >= rt size. Compare to clear region */ +((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) && + This->state.fb.zsbuf != NULL && + rect.x2 >= zsbuf_surf->desc.Width && + rect.y2 >= zsbuf_surf->desc.Height))) { +DBG("Clear fast path\n"); pipe->clear(pipe, bufs, &rgba, Z, Stencil); return D3D_OK; } -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/4] clover: this serie remove util/compat.*
On Fri, Apr 24, 2015 at 12:59:53PM +0200, EdB wrote: > Since clover should compile use -std=c++11, > compat classes are no longer neccessary > Thank you for working on this, it is a nice improvement. I will try to review/test these early next week. Thanks, Tom > EdB (4): > clover: remove compat class that matche std one > clover: remove compat::string > clover: make module::symbol::name a string > clover: remove util/compat > > src/gallium/state_trackers/clover/Makefile.sources | 2 - > src/gallium/state_trackers/clover/api/program.cpp | 19 +- > .../state_trackers/clover/core/compiler.hpp| 14 +- > src/gallium/state_trackers/clover/core/error.hpp | 10 +- > src/gallium/state_trackers/clover/core/kernel.cpp | 2 +- > src/gallium/state_trackers/clover/core/module.cpp | 56 ++- > src/gallium/state_trackers/clover/core/module.hpp | 23 +- > src/gallium/state_trackers/clover/core/program.cpp | 4 +- > src/gallium/state_trackers/clover/core/program.hpp | 2 +- > .../state_trackers/clover/llvm/invocation.cpp | 42 +- > .../state_trackers/clover/tgsi/compiler.cpp| 12 +- > src/gallium/state_trackers/clover/util/compat.cpp | 38 -- > src/gallium/state_trackers/clover/util/compat.hpp | 444 > - > 13 files changed, 105 insertions(+), 563 deletions(-) > delete mode 100644 src/gallium/state_trackers/clover/util/compat.cpp > delete mode 100644 src/gallium/state_trackers/clover/util/compat.hpp > > -- > 2.3.6 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/16] st/nine: Change x86 FPU Control word on device creation as on wined3d and windows
Can you use the fe* class of functions for this? Not entirely sure what this is setting, but you should be able to do it with fegetenv/fesetenv, or the more specialized functions. On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy wrote: > From: Tiziano Bacocco > > Signed-off-by: Tiziano Bacocco > --- > src/gallium/state_trackers/nine/device9.c | 15 +++ > 1 file changed, 15 insertions(+) > > diff --git a/src/gallium/state_trackers/nine/device9.c > b/src/gallium/state_trackers/nine/device9.c > index 1a776a7..4ef02bb 100644 > --- a/src/gallium/state_trackers/nine/device9.c > +++ b/src/gallium/state_trackers/nine/device9.c > @@ -53,6 +53,18 @@ > > #define DBG_CHANNEL DBG_DEVICE > > +static void nine_setup_fpu(void) > +{ > +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) > +WORD cw; > +__asm__ volatile ("fnstcw %0" : "=m" (cw)); > +cw = (cw & ~0xf3f) | 0x3f; > +__asm__ volatile ("fldcw %0" : : "m" (cw)); > +#else > +WARN_ONCE("FPU setup not supported on non-x86 platforms\n"); > +#endif > +} > + > static void > NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset ) > { > @@ -168,6 +180,9 @@ NineDevice9_ctor( struct NineDevice9 *This, > IDirect3D9_AddRef(This->d3d9); > ID3DPresentGroup_AddRef(This->present); > > +if (!(This->params.BehaviorFlags & D3DCREATE_FPU_PRESERVE)) > +nine_setup_fpu(); > + > This->pipe = This->screen->context_create(This->screen, NULL); > if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */ > > -- > 2.1.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader when fragment shader has atomic buffer access
One more comment, that I neglected to add: there are other checks for _CurrentFragmentProgram to be non-NULL, indeed function brw_upload_wm_abo_surface() [file brw_wm_surface_state.c], also has a check for it being non-NULL. That function is the emit for the atom brw_wm_abo_surfaces which is present in both gen7_atoms and gen8_atoms. I would argue that _CurrentFragmentProgram can be NULL, given that other places check for it and that without the check piglit gets about 30 more crashes. Sorry for not posting this in the first reply. -Kevin -Original Message- From: Rogovin, Kevin Sent: Friday, April 24, 2015 11:22 PM To: 'Kenneth Graunke'; mesa-dev@lists.freedesktop.org Cc: mesa-...@freedesktop.org; curroje...@riseup.net Subject: RE: [Mesa-dev] [PATCH 6/7] i965: ensure execution of fragment shader when fragment shader has atomic buffer access > Checking brw->ctx.Shader._CurrentFragmentProgram != NULL is unnecessary. > There is always a valid pixel shader. (If the application is using > fixed-function, we supply a fragment shader for them.) Please drop that > check. Without this check(in the Gen7 function/code), about 30 crashes are induced on piglit tests for Gen7; the tests are all using GL fixed function pipeline. I have not run piglit without this check on Gen8 though. > I thought that UAVs were essentially for Images...I'm not clear why this is > needed. Perhaps Curro can confirm one way or another. The essential reason is to guarantee that the pixel shader gets invoked by Gen even when all render target surfaces are NULL surfaces. There are other flags one can use, but the UAV seems (to me) the most natural. -Kevin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/16] st/nine: Handle special LIT case
On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy wrote: > Signed-off-by: Axel Davy > --- > src/gallium/state_trackers/nine/nine_shader.c | 19 ++- > 1 file changed, 18 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/state_trackers/nine/nine_shader.c > b/src/gallium/state_trackers/nine/nine_shader.c > index 2ba625e..0fd3d37 100644 > --- a/src/gallium/state_trackers/nine/nine_shader.c > +++ b/src/gallium/state_trackers/nine/nine_shader.c > @@ -2040,6 +2040,23 @@ DECL_SPECIAL(LOG) > return D3D_OK; > } > > +DECL_SPECIAL(LIT) > +{ > +struct ureg_program *ureg = tx->ureg; > +struct ureg_dst tmp = tx_scratch(tx); > +struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); > +struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); > +ureg_LIT(ureg, tmp, src); > +/* d3d9 LIT is the same than gallium LIT. One difference is that d3d9 > + * states that dst.z is 0 when src.y <= 0. Gallium definition can assign > + * it 0^0 if src.w=0, which value is driver dependent. */ > +ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), > + ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)), > + ureg_src(tmp), ureg_imm1f(ureg, 0.0f)); > +ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp)); Why use a separate temp? Couldn't you just ureg_LIT(ureg, dst, src); ureg_CMP(ureg, writemask(dst, Z), -src.y, ureg_src(dst), ureg_imm1f(0.0)) ? > +return D3D_OK; > +} > + > DECL_SPECIAL(NRM) > { > struct ureg_program *ureg = tx->ureg; > @@ -2543,7 +2560,7 @@ struct sm1_op_info inst_table[] = > _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */ > _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */ > _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* > 15 */ > -_OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), /* 16 */ > +_OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* > 16 */ > _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */ > _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */ > _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */ > -- > 2.1.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 02/16] st/nine: Fix wrong assert in nine_shader
Reviewed-by: Ilia Mirkin On Fri, Apr 24, 2015 at 4:09 PM, Axel Davy wrote: > The sampler src index was wrong for texldl and texldd > > Signed-off-by: Axel Davy > --- > src/gallium/state_trackers/nine/nine_shader.c | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/src/gallium/state_trackers/nine/nine_shader.c > b/src/gallium/state_trackers/nine/nine_shader.c > index 0fd3d37..1f1f7c8 100644 > --- a/src/gallium/state_trackers/nine/nine_shader.c > +++ b/src/gallium/state_trackers/nine/nine_shader.c > @@ -2493,8 +2493,8 @@ DECL_SPECIAL(TEXLDD) > tx_src_param(tx, &tx->insn.src[2]), > tx_src_param(tx, &tx->insn.src[3]) > }; > -assert(tx->insn.src[3].idx >= 0 && > - tx->insn.src[3].idx < Elements(tx->sampler_targets)); > +assert(tx->insn.src[1].idx >= 0 && > + tx->insn.src[1].idx < Elements(tx->sampler_targets)); > target = tx->sampler_targets[tx->insn.src[1].idx]; > > ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]); > @@ -2509,8 +2509,8 @@ DECL_SPECIAL(TEXLDL) > tx_src_param(tx, &tx->insn.src[0]), > tx_src_param(tx, &tx->insn.src[1]) > }; > -assert(tx->insn.src[3].idx >= 0 && > - tx->insn.src[3].idx < Elements(tx->sampler_targets)); > +assert(tx->insn.src[1].idx >= 0 && > + tx->insn.src[1].idx < Elements(tx->sampler_targets)); > target = tx->sampler_targets[tx->insn.src[1].idx]; > > ureg_TXL(tx->ureg, dst, target, src[0], src[1]); > -- > 2.1.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev