[Mesa-dev] [v3] egl: dri2: support for creating images out of dma buffers
v2: - upon success close the given file descriptors Signed-off-by: Topi Pohjolainen --- src/egl/drivers/dri2/egl_dri2.c | 276 1 file changed, 276 insertions(+) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 10fdcef..bbdf888 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1170,6 +1171,279 @@ dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx, return dri2_create_image(disp, dri_image); } +static EGLBoolean +dri2_check_dma_buf_attribs(const _EGLImageAttribs *attrs) +{ + unsigned i; + + /** + * The spec says: + * + * "Required attributes and their values are as follows: + * + * * EGL_WIDTH & EGL_HEIGHT: The logical dimensions of the buffer in pixels + * + * * EGL_LINUX_DRM_FOURCC_EXT: The pixel format of the buffer, as specified + *by drm_fourcc.h and used as the pixel_format parameter of the + *drm_mode_fb_cmd2 ioctl." + * + * * EGL_DMA_BUF_PLANE0_FD_EXT: The dma_buf file descriptor of plane 0 of + *the image. + * + * * EGL_DMA_BUF_PLANE0_OFFSET_EXT: The offset from the start of the + *dma_buf of the first sample in plane 0, in bytes. + * + * * EGL_DMA_BUF_PLANE0_PITCH_EXT: The number of bytes between the start of + *subsequent rows of samples in plane 0. May have special meaning for + *non-linear formats." + * + * "* If is EGL_LINUX_DMA_BUF_EXT, and the list of attributes is + *incomplete, EGL_BAD_PARAMETER is generated." + */ + if (attrs->Width <= 0 || attrs->Height <= 0 || + !attrs->DMABufFourCC.IsPresent || + !attrs->DMABufPlaneFds[0].IsPresent || + !attrs->DMABufPlaneOffsets[0].IsPresent || + !attrs->DMABufPlanePitches[0].IsPresent) { + _eglError(EGL_BAD_PARAMETER, "attribute(s) missing"); + return EGL_FALSE; + } + + /** +* Also: +* +* "If is EGL_LINUX_DMA_BUF_EXT and one or more of the values +* specified for a plane's pitch or offset isn't supported by EGL, +* EGL_BAD_ACCESS is generated." +*/ + for (i = 0; i < sizeof(attrs->DMABufPlanePitches) / + sizeof(attrs->DMABufPlanePitches[0]); ++i) { + if (attrs->DMABufPlanePitches[i].IsPresent && + attrs->DMABufPlanePitches[i].Value <= 0) { + _eglError(EGL_BAD_ACCESS, "invalid pitch"); + return EGL_FALSE; + } + } + + return EGL_TRUE; +} + +/* Returns the total number of file descriptors zero indicating an error. */ +static unsigned +dri2_check_dma_buf_format(const _EGLImageAttribs *attrs) +{ + switch (attrs->DMABufFourCC.Value) { + case DRM_FORMAT_RGB332: + case DRM_FORMAT_BGR233: + case DRM_FORMAT_XRGB: + case DRM_FORMAT_XBGR: + case DRM_FORMAT_RGBX: + case DRM_FORMAT_BGRX: + case DRM_FORMAT_ARGB: + case DRM_FORMAT_ABGR: + case DRM_FORMAT_RGBA: + case DRM_FORMAT_BGRA: + case DRM_FORMAT_XRGB1555: + case DRM_FORMAT_XBGR1555: + case DRM_FORMAT_RGBX5551: + case DRM_FORMAT_BGRX5551: + case DRM_FORMAT_ARGB1555: + case DRM_FORMAT_ABGR1555: + case DRM_FORMAT_RGBA5551: + case DRM_FORMAT_BGRA5551: + case DRM_FORMAT_RGB565: + case DRM_FORMAT_BGR565: + case DRM_FORMAT_RGB888: + case DRM_FORMAT_BGR888: + case DRM_FORMAT_XRGB: + case DRM_FORMAT_XBGR: + case DRM_FORMAT_RGBX: + case DRM_FORMAT_BGRX: + case DRM_FORMAT_ARGB: + case DRM_FORMAT_ABGR: + case DRM_FORMAT_RGBA: + case DRM_FORMAT_BGRA: + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_RGBX1010102: + case DRM_FORMAT_BGRX1010102: + case DRM_FORMAT_ARGB2101010: + case DRM_FORMAT_ABGR2101010: + case DRM_FORMAT_RGBA1010102: + case DRM_FORMAT_BGRA1010102: + case DRM_FORMAT_YUYV: + case DRM_FORMAT_YVYU: + case DRM_FORMAT_UYVY: + case DRM_FORMAT_VYUY: + /* There must be one and only one plane present */ + if (attrs->DMABufPlaneFds[0].IsPresent && + attrs->DMABufPlaneOffsets[0].IsPresent && + attrs->DMABufPlanePitches[0].IsPresent && + !attrs->DMABufPlaneFds[1].IsPresent && + !attrs->DMABufPlaneOffsets[1].IsPresent && + !attrs->DMABufPlanePitches[1].IsPresent && + !attrs->DMABufPlaneFds[2].IsPresent && + !attrs->DMABufPlaneOffsets[2].IsPresent && + !attrs->DMABufPlanePitches[2].IsPresent) + return 1; + case DRM_FORMAT_NV12: + case DRM_FORMAT_NV21: + case DRM_FORMAT_NV16: + case DRM_FORMAT_NV61: + /* There must be two and only two planes present */ + if (attrs->DMABufPlaneFds[0].IsPresent && + attrs->DMABufPlaneOffsets[0].IsPresent && + attrs->DMABufPlanePitches[0].IsPresent && + attrs->DMABufPlaneFds[1].IsPresent && +
Re: [Mesa-dev] [PATCH mesa] wayland: Disable prime support on buggy kernels
On 04/29/2013 07:15 PM, Kristian Høgsberg wrote: On Tue, Apr 23, 2013 at 9:54 AM, Ander Conselvan de Oliveira wrote: Linux kernel 3.8 shipped with a bug in the prime fd passing code that makes it unreliable. As of this writing, it seems unlikely that 3.9 will contain the fix for the issue. This patch disable prime support when running on top of those kernels, in order to prevent unexpected behavior when running a Wayland compositor. Commit be8a42ae60addd8b6092535c11b42d099d6470ec in Linus tree introduces the problem, which can be fixed by the patch below (not upstream yet): http://lists.freedesktop.org/archives/dri-devel/2013-April/037716.html What about the idea of using the self-ref test at init time to determine if the kernel is buggy or not instead of the hard-coded version check? The test isn't reliable enough. It depends on the memory allocation for a dma_buf reusing the memory for an old dma_buf. Ander Kristian --- src/egl/drivers/dri2/egl_dri2.c | 36 +++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 06a21d7..13b9107 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "egl_dri2.h" @@ -1557,6 +1558,38 @@ static struct wayland_drm_callbacks wl_drm_callbacks = { }; static EGLBoolean +kernel_prime_support_not_buggy() +{ + struct utsname un; + + /* Linux kernel 3.8 shipped with a bug in the prime fd passing code that +* makes it unreliable. As of this writing, it seems unlikely that 3.9 +* will contain the fix for the issue. Disable prime support when running +* on top of those kernels, in order to prevent unexpected behavior when +* running a Wayland compositor. +* +* Commit be8a42ae60addd8b6092535c11b42d099d6470ec in Linus tree introduces +* the problem, which can be fixed by the patch below (not upstream yet): +* http://lists.freedesktop.org/archives/dri-devel/2013-April/037716.html +*/ + if (uname(&un) == -1) { + _eglLog(_EGL_INFO, + "DRI2: wayland prime support disabled: unknown kernel version"); + return EGL_FALSE; + } + + if (strncmp(un.sysname, "Linux", strlen("Linux")) == 0 && + (strncmp(un.release, "3.8.", strlen("3.8.")) == 0 || +strncmp(un.release, "3.9.", strlen("3.9.")) == 0)) { + _eglLog(_EGL_INFO, + "DRI2: wayland prime support disabled: buggy kernel"); + return EGL_FALSE; + } + + return EGL_TRUE; +} + +static EGLBoolean dri2_bind_wayland_display_wl(_EGLDriver *drv, _EGLDisplay *disp, struct wl_display *wl_dpy) { @@ -1575,7 +1608,8 @@ dri2_bind_wayland_display_wl(_EGLDriver *drv, _EGLDisplay *disp, ret = drmGetCap(dri2_dpy->fd, DRM_CAP_PRIME, &cap); if (ret == 0 && cap == (DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT) && dri2_dpy->image->base.version >= 7 && - dri2_dpy->image->createImageFromFds != NULL) + dri2_dpy->image->createImageFromFds != NULL && + kernel_prime_support_not_buggy()) flags |= WAYLAND_DRM_PRIME; dri2_dpy->wl_server_drm = -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev - Intel Finland Oy Registered Address: PL 281, 00181 Helsinki Business Identity Code: 0357606 - 4 Domiciled in Helsinki This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.
Consider the following shader: vec4 f(vec4 v) { return v; } vec4 f(vec4 v); The prototype exactly matches the signature of the earlier definition, so there's absolutely no point in it. However, it doesn't appear to be illegal. The GLSL 4.30 specification offers two relevant quotes: "If a function name is declared twice with the same parameter types, then the return types and all qualifiers must also match, and it is the same function being declared." "User-defined functions can have multiple declarations, but only one definition." In this case the same function was declared twice, and there's only one definition, which fits both pieces of text. There doesn't appear to be any text saying late prototypes are illegal, so presumably it's valid. Unfortunately, it currently triggers an assertion failure: ir_dereference_variable @ specifies undeclared variable `v' @ When we process the second line, we look for an existing exact match so we can enforce the one-definition rule. We then leave sig set to that existing function, and hit sig->replace_parameters(&hir_parameters), unfortunately nuking our existing definition's parameters (which have actual dereferences) with the prototype's bogus unused parameters. Simply bailing out and ignoring such late prototypes is the safest thing to do. Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android. NOTE: This is a candidate for stable branches. Cc: Tapani Pälli Cc: Ian Romanick Signed-off-by: Kenneth Graunke --- src/glsl/ast_to_hir.cpp | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 2638411..e595110 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3205,10 +3205,17 @@ ast_function::hir(exec_list *instructions, "match prototype", name); } -if (is_definition && sig->is_defined) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, "function `%s' redefined", name); + if (sig->is_defined) { +if (is_definition) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, "function `%s' redefined", name); +} else { + /* We just encountered a prototype that exactly matches a +* function that's already been defined. This is redundant, +* and we should ignore it. +*/ + return NULL; +} } } } else { -- 1.8.2.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.
On 04/30/2013 11:52 AM, Kenneth Graunke wrote: Consider the following shader: vec4 f(vec4 v) { return v; } vec4 f(vec4 v); The prototype exactly matches the signature of the earlier definition, so there's absolutely no point in it. However, it doesn't appear to be illegal. The GLSL 4.30 specification offers two relevant quotes: "If a function name is declared twice with the same parameter types, then the return types and all qualifiers must also match, and it is the same function being declared." "User-defined functions can have multiple declarations, but only one definition." In this case the same function was declared twice, and there's only one definition, which fits both pieces of text. There doesn't appear to be any text saying late prototypes are illegal, so presumably it's valid. Unfortunately, it currently triggers an assertion failure: ir_dereference_variable @ specifies undeclared variable `v' @ When we process the second line, we look for an existing exact match so we can enforce the one-definition rule. We then leave sig set to that existing function, and hit sig->replace_parameters(&hir_parameters), unfortunately nuking our existing definition's parameters (which have actual dereferences) with the prototype's bogus unused parameters. Simply bailing out and ignoring such late prototypes is the safest thing to do. Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android. NOTE: This is a candidate for stable branches. Cc: Tapani Pälli Cc: Ian Romanick Signed-off-by: Kenneth Graunke --- src/glsl/ast_to_hir.cpp | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 2638411..e595110 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3205,10 +3205,17 @@ ast_function::hir(exec_list *instructions, "match prototype", name); } - if (is_definition && sig->is_defined) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, "function `%s' redefined", name); + if (sig->is_defined) { +if (is_definition) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, "function `%s' redefined", name); +} else { + /* We just encountered a prototype that exactly matches a +* function that's already been defined. This is redundant, +* and we should ignore it. +*/ + return NULL; +} } } } else { Works for me, now I can remove my rather awful hack getting around this :) Tested-by: Tapani Pälli // Tapani ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64084] New: Requesting git commit access to mesa
https://bugs.freedesktop.org/show_bug.cgi?id=64084 Priority: medium Bug ID: 64084 Assignee: mesa-dev@lists.freedesktop.org Summary: Requesting git commit access to mesa Severity: normal Classification: Unclassified OS: All Reporter: rob...@sixbynine.org Hardware: Other Status: NEW Version: unspecified Component: Other Product: Mesa I'd like to request for my freedesktop.org account (rib) to have git commit access to the mesa repo please. I'm working with mesa quite closely on the Cogl project and have made a few small patches including some recent EGL_EXT_swap_buffers_with_damage patches which could be nice to land soon, now that they have been reviewed. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] draw: don't crash if GS doesn't emit anything
On 04/27/2013 06:57 AM, Zack Rusin wrote: Technically it's legal for geometry shader to not emit any vertices. It's silly, but perfectly legal, so lets make draw stop crashing if it happens. Signed-off-by: Zack Rusin --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c |9 + .../auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c|9 + 2 files changed, 18 insertions(+) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 7b871c2..8d66c88 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -299,6 +299,15 @@ static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, } } } + if (prim_info->count == 0) { + debug_printf("GS/IA didn't emit any vertices!\n"); What is "IA"? -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Google summer code
Dear Mr Ian Romanick, I am a third year undergraduate from National University of Singapore, School of Computing. I am interested in this year Google Summer Program. As this is my first time applying and also my first time seeing most of the accepted organisations for this year program. After much browsing through, I handpicked a few projects which I think I might be able to do. To be honest, as this is my first time applying, so the two main reasons I am interested in X.org's project idea because I saw the difficulty level of the project idea: Improved application of GLSL complier optimizations. and also the part stating X.org treats GSoC as an opportunity to teach new developers. Some brief summary of my software skills: I learnt C++ and Java. I more confident with media softwares such as Adobe Photoshop, Illustrator, Premiere Pro, Aftereffects, Maya, Blender and Unity. I would like to know more the details and chances of me getting selected for the project. Thank you for your time. Regards, Yew Chong ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] draw: don't crash if GS doesn't emit anything
On Tue, Apr 30, 2013 at 9:40 AM, Brian Paul wrote: > On 04/27/2013 06:57 AM, Zack Rusin wrote: >> >> Technically it's legal for geometry shader to not emit any >> vertices. It's silly, but perfectly legal, so lets make draw >> stop crashing if it happens. >> >> Signed-off-by: Zack Rusin >> --- >> src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c |9 >> + >> .../auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c|9 >> + >> 2 files changed, 18 insertions(+) >> >> diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c >> b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c >> index 7b871c2..8d66c88 100644 >> --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c >> +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c >> @@ -299,6 +299,15 @@ static void fetch_pipeline_generic( struct >> draw_pt_middle_end *middle, >>} >> } >> } >> + if (prim_info->count == 0) { >> + debug_printf("GS/IA didn't emit any vertices!\n"); > > > What is "IA"? Input Assembler I assume. First part of the DX11 pipeline. Alex ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] draw: don't crash if GS doesn't emit anything
- Original Message - > On 04/27/2013 06:57 AM, Zack Rusin wrote: > > Technically it's legal for geometry shader to not emit any > > vertices. It's silly, but perfectly legal, so lets make draw > > stop crashing if it happens. > > > > Signed-off-by: Zack Rusin > > --- > > src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c |9 > > + > > .../auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c|9 > > + > > 2 files changed, 18 insertions(+) > > > > diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c > > b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c > > index 7b871c2..8d66c88 100644 > > --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c > > +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c > > @@ -299,6 +299,15 @@ static void fetch_pipeline_generic( struct > > draw_pt_middle_end *middle, > >} > > } > > } > > + if (prim_info->count == 0) { > > + debug_printf("GS/IA didn't emit any vertices!\n"); > > What is "IA"? "Input Assembly", a D3D10 term that roughly matches pipe_vertex_elements / pipe_vertex_buffer state. BTW, I think that Chris Forbes makes a good point -- a GS might choose to not emit any vertices (e.g., that does fancy culling) -- so maybe this debugging message should be silent by default. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] r600g/llvm: Undefines unrequired texture coord values
This is a port of "r600g:mask unused source components for SAMPLE" patch from Vadim Girlin. --- src/gallium/drivers/r600/r600_llvm.c | 25 - 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 83d7340..a94faf2 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -429,9 +429,32 @@ static void llvm_emit_tex( } } + if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX) { + LLVMValueRef Vector[4] = { + LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""), + LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 1), ""), + LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 2), ""), + LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 3), ""), + }; + switch (emit_data->inst->Texture.Texture) { + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); + break; + case TGSI_TEXTURE_1D: + Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); + break; + default: + break; + } + args[0] = lp_build_gather_values(gallivm, Vector, 4); + } else { + args[0] = emit_data->args[0]; + } + assert(emit_data->arg_count + 2 <= Elements(args)); - for (c = 0; c < emit_data->arg_count; ++c) + for (c = 1; c < emit_data->arg_count; ++c) args[c] = emit_data->args[c]; sampler_src = emit_data->inst->Instruction.NumSrcRegs-1; -- 1.8.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 03/17] swrast: Factor out texture slice counting.
On 04/22/2013 10:14 AM, Eric Anholt wrote: This function going to get used a lot more in upcoming patches. --- src/mesa/swrast/s_texture.c | 16 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/mesa/swrast/s_texture.c b/src/mesa/swrast/s_texture.c index 51048be..36a90dd 100644 --- a/src/mesa/swrast/s_texture.c +++ b/src/mesa/swrast/s_texture.c @@ -58,6 +58,14 @@ _swrast_delete_texture_image(struct gl_context *ctx, _mesa_delete_texture_image(ctx, texImage); } +static unsigned int +texture_slices(struct gl_texture_image *texImage) +{ + if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) + return texImage->Height; + else + return texImage->Depth; +} /** * Called via ctx->Driver.AllocTextureImageBuffer() @@ -83,11 +91,11 @@ _swrast_alloc_texture_image_buffer(struct gl_context *ctx, * We allocate the array for 1D/2D textures too in order to avoid special- * case code in the texstore routines. */ - swImg->ImageOffsets = malloc(texImage->Depth * sizeof(GLuint)); + swImg->ImageOffsets = malloc(texture_slices(texImage) * sizeof(GLuint)); if (!swImg->ImageOffsets) return GL_FALSE; - for (i = 0; i< texImage->Depth; i++) { + for (i = 0; i< texture_slices(texImage); i++) { swImg->ImageOffsets[i] = i * texImage->Width * texImage->Height; } Maybe save the result of texture_slices(texImage) in a local var so it doesn't get called for each loop iteration. Not a big deal though. @@ -209,20 +217,20 @@ _swrast_map_teximage(struct gl_context *ctx, map = swImage->Buffer; + assert(slice< texture_slices(texImage)); + if (texImage->TexObject->Target == GL_TEXTURE_3D || texImage->TexObject->Target == GL_TEXTURE_2D_ARRAY) { GLuint sliceSize = _mesa_format_image_size(texImage->TexFormat, texImage->Width, texImage->Height, 1); - assert(slice< texImage->Depth); map += slice * sliceSize; } else if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) { GLuint sliceSize = _mesa_format_image_size(texImage->TexFormat, texImage->Width, 1, 1); - assert(slice< texImage->Height); map += slice * sliceSize; } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 04/17] swrast: Clean up and explain the mapping process.
On 04/22/2013 10:14 AM, Eric Anholt wrote: --- src/mesa/swrast/s_texture.c | 17 - 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/mesa/swrast/s_texture.c b/src/mesa/swrast/s_texture.c index 36a90dd..b6dd8cb 100644 --- a/src/mesa/swrast/s_texture.c +++ b/src/mesa/swrast/s_texture.c @@ -218,18 +218,17 @@ _swrast_map_teximage(struct gl_context *ctx, map = swImage->Buffer; assert(slice< texture_slices(texImage)); + if (slice != 0) { + int sliceHeight = texImage->Height; + /* For 1D array textures, the slices are all 1 pixel high, and Height is + * the number of slices. + */ + if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) + sliceHeight = 1; Maybe move this logic into a slice_height() helper function as you did for texture_slices(). - if (texImage->TexObject->Target == GL_TEXTURE_3D || - texImage->TexObject->Target == GL_TEXTURE_2D_ARRAY) { GLuint sliceSize = _mesa_format_image_size(texImage->TexFormat, texImage->Width, - texImage->Height, - 1); - map += slice * sliceSize; - } else if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) { - GLuint sliceSize = _mesa_format_image_size(texImage->TexFormat, - texImage->Width, - 1, + sliceHeight, 1); map += slice * sliceSize; } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/17] swrast: Replace ImageOffsets with an ImageSlices pointer.
On 04/22/2013 10:14 AM, Eric Anholt wrote: This is a step toward allowing drivers to use their normal mapping paths, instead of requiring that all slice mappings come from an aligned offset from the first slice's map. This incidentally fixes missing slice handling in FXT1 swrast. --- src/mesa/drivers/dri/intel/intel_tex_validate.c | 37 src/mesa/drivers/dri/radeon/radeon_texture.c| 13 ++--- src/mesa/main/texcompress.c |2 +- src/mesa/main/texcompress.h |3 +- src/mesa/main/texcompress_etc.c | 51 +++-- src/mesa/main/texcompress_fxt1.c|8 +-- src/mesa/main/texcompress_rgtc.c| 70 +-- src/mesa/main/texcompress_s3tc.c| 56 -- src/mesa/swrast/s_context.h |2 +- src/mesa/swrast/s_texfetch.c|5 +- src/mesa/swrast/s_texfetch_tmp.h|4 +- src/mesa/swrast/s_texrender.c | 14 + src/mesa/swrast/s_texture.c | 54 + 13 files changed, 127 insertions(+), 192 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index c880bce..6068733 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -163,34 +163,19 @@ intel_tex_map_image_for_swrast(struct intel_context *intel, for (int i = 0; i< mt->level[level].depth; i++) intel_miptree_slice_resolve_depth(intel, mt, level, i); - if (mt->target == GL_TEXTURE_3D || - mt->target == GL_TEXTURE_2D_ARRAY || - mt->target == GL_TEXTURE_1D_ARRAY) { - int i; - - /* ImageOffsets[] is only used for swrast's fetch_texel_3d, so we can't - * share code with the normal path. - */ - for (i = 0; i< mt->level[level].depth; i++) { -intel_miptree_get_image_offset(mt, level, i,&x,&y); -intel_image->base.ImageOffsets[i] = x + y * (mt->region->pitch / - mt->region->cpp); - } - - DBG("%s \n", __FUNCTION__); - - intel_image->base.Map = intel_miptree_map_raw(intel, mt); - } else { - assert(intel_image->base.Base.Depth == 1); - intel_miptree_get_image_offset(mt, level, face,&x,&y); - - DBG("%s: (%d,%d) -> (%d, %d)/%d\n", - __FUNCTION__, face, level, x, y, mt->region->pitch); - - intel_image->base.Map = intel_miptree_map_raw(intel, mt) + -x * mt->cpp + y * mt->region->pitch; + void *map = intel_miptree_map_raw(intel, mt); + + for (int i = 0; i< mt->level[level].depth; i++) { + intel_miptree_get_image_offset(mt, level, i,&x,&y); + intel_image->base.ImageSlices[i] = (map + + y * mt->region->pitch + + x * mt->cpp); + DBG("%s: (%d,%d,%d) -> (%d, %d)/%d\n", + __FUNCTION__, face, level, i, x, y, mt->region->pitch); } + intel_image->base.Map = intel_image->base.ImageSlices[0]; + assert(mt->region->pitch % mt->region->cpp == 0); intel_image->base.RowStride = mt->region->pitch / mt->region->cpp; } diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 23942cb..aa2f734 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -638,7 +638,6 @@ radeon_swrast_map_image(radeonContextPtr rmesa, radeon_mipmap_tree *mt; GLuint texel_size; radeon_mipmap_level *lvl; - int rs; if (!image || !image->mt) return; @@ -650,18 +649,16 @@ radeon_swrast_map_image(radeonContextPtr rmesa, lvl =&image->mt->levels[level]; - rs = lvl->rowstride / texel_size; - radeon_bo_map(mt->bo, 1); image->base.Map = mt->bo->ptr + lvl->faces[face].offset; - if (mt->target == GL_TEXTURE_3D) { - int i; - for (i = 0; i< mt->levels[level].depth; i++) - image->base.ImageOffsets[i] = rs * lvl->height * i; + for (int i = 0; i< mt->levels[level].depth; i++) { + image->base.ImageSlices[i] = + image->base.Map + (lvl->rowstride * lvl->height * i); } - image->base.RowStride = rs; + + image->base.RowStride = lvl->rowstride / texel_size; } static void diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index f74ac5d..1afd51c 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -587,7 +587,7 @@ _mesa_decompress_image(gl_format format, GLuint width, GLuint height, for (j = 0; j< height; j++) { for (i = 0; i< width; i++) { - fetch(src, NULL, stride, i, j, 0, dest); + fetch(src, stride, i, j, dest);
Re: [Mesa-dev] [PATCH 12/17] swrast: Always use MapTextureImage for mapping textures for swrast.
On 04/22/2013 10:14 AM, Eric Anholt wrote: Now that everything goes through ImageSlices[], we can rely on the driver's existing texture mapping function. A big block of code goes away on Radeon that looks like it was to deal with the validate that happened at SpanRenderStart, which no longer occurs since we don't need validation for the MapTextureImage hook. --- src/mesa/drivers/dri/i915/intel_tris.c |2 - src/mesa/drivers/dri/intel/intel_span.c | 80 ++- src/mesa/drivers/dri/intel/intel_span.h |2 - src/mesa/drivers/dri/intel/intel_tex.h |6 -- src/mesa/drivers/dri/intel/intel_tex_validate.c | 90 -- src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c | 21 - src/mesa/drivers/dri/radeon/radeon_span.c| 18 + src/mesa/drivers/dri/radeon/radeon_texture.c | 74 -- src/mesa/drivers/dri/radeon/radeon_texture.h |3 - src/mesa/swrast/s_texture.c | 61 --- 10 files changed, 59 insertions(+), 298 deletions(-) diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 4516db6..30eb6ac 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1096,11 +1096,9 @@ intelRunPipeline(struct gl_context * ctx) intel->NewGLState = 0; } - intel_map_vertex_shader_textures(ctx); intel->tnl_pipeline_running = true; _tnl_run_pipeline(ctx); intel->tnl_pipeline_running = false; - intel_unmap_vertex_shader_textures(ctx); _mesa_unlock_context_textures(ctx); } diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index d7eaa41..e74398d 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -105,31 +105,8 @@ intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled) } /** - * Map the regions needed by intelSpanRenderStart(). - */ -static void -intel_span_map_buffers(struct intel_context *intel) -{ - struct gl_context *ctx =&intel->ctx; - struct intel_texture_object *tex_obj; - - for (int i = 0; i< ctx->Const.MaxTextureImageUnits; i++) { - if (!ctx->Texture.Unit[i]._ReallyEnabled) -continue; - tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); - intel_finalize_mipmap_tree(intel, i); - intel_tex_map_images(intel, tex_obj, - GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); - } - - _swrast_map_renderbuffers(ctx); -} - -/** * Prepare for software rendering. Map current read/draw framebuffers' - * renderbuffes and all currently bound texture objects. - * - * Old note: Moved locking out to get reasonable span performance. + * renderbuffers and all currently bound texture objects. */ void intelSpanRenderStart(struct gl_context * ctx) @@ -139,7 +116,9 @@ intelSpanRenderStart(struct gl_context * ctx) intel_flush(ctx); intel_prepare_render(intel); intel_flush(ctx); - intel_span_map_buffers(intel); + + _swrast_map_textures(ctx); + _swrast_map_renderbuffers(ctx); } /** @@ -149,18 +128,8 @@ intelSpanRenderStart(struct gl_context * ctx) void intelSpanRenderFinish(struct gl_context * ctx) { - struct intel_context *intel = intel_context(ctx); - GLuint i; - _swrast_flush(ctx); - - for (i = 0; i< ctx->Const.MaxTextureImageUnits; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { - struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; - intel_tex_unmap_images(intel, intel_texture_object(texObj)); - } - } - + _swrast_unmap_textures(ctx); _swrast_unmap_renderbuffers(ctx); } @@ -174,42 +143,3 @@ intelInitSpanFuncs(struct gl_context * ctx) swdd->SpanRenderFinish = intelSpanRenderFinish; } } - -void -intel_map_vertex_shader_textures(struct gl_context *ctx) -{ - struct intel_context *intel = intel_context(ctx); - int i; - - if (ctx->VertexProgram._Current == NULL) - return; - - for (i = 0; i< ctx->Const.MaxTextureImageUnits; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled&& - ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) { - struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; - - intel_tex_map_images(intel, intel_texture_object(texObj), - GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); - } - } -} - -void -intel_unmap_vertex_shader_textures(struct gl_context *ctx) -{ - struct intel_context *intel = intel_context(ctx); - int i; - - if (ctx->VertexProgram._Current == NULL) - return; - - for (i = 0; i< ctx->Const.MaxTextureImageUnits; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled&& - ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) { - struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; - - intel_tex_unmap_images(
Re: [Mesa-dev] swrast MapTextureImage fetches
On 04/22/2013 10:14 AM, Eric Anholt wrote: 34 files changed, 311 insertions(+), 889 deletions(-) Also, swrast_dri.so now passes two FXT1 tests that failed before, and i915's vertex shader texturing works. I haven't tested the radeon/nouveau code, nor have I been as invasive as I could be, because I don't have any AGP systems left. nouveau's got a giant pile of code to be deleted if anyone gets around to doing AllocTextureImageBuffer for it. The way swrast renderbuffers are linked to swrast textures is still pretty hokey, but it's the best I could do at the moment. I think we should have MapRenderbuffer in general call MapTextureImage for texture renderbuffers, without drivers having to manually do so. To do that we'd want the gl_renderbuffer to have a pointer to its TextureImage/slice instead of that being in the gl_renderbuffer_attachment. Branch is at swrast-texture-mapping of my tree. I just had a handful of small comments. Reviewed-by: Brian Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] vbo code and flush explicit mapping
- Original Message - > On Mon, Apr 29, 2013 at 2:52 PM, Dave Airlie wrote: > > I've been playing with a gallium driver, and started looking at some > > wierd gears behaviour, > > > > The vbo code maps the buffer with GL_MAP_FLUSH_EXPLICIT_BIT which to > > me requires that at some point > > we call ctx->Driver.FlushMappedBufferRange. Now the code attempts to > > call it in vbo_exec_vtx_unmap > > but from what I can see the length is always 0 in there and so we > > never get called. > > > > GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * > > sizeof(float); > > > > but I can not see buffer_ptr != buffer_map when we hit this code, I'm > > a bit lost at this point, maybe someone else might know more. > > Okay maybe I'm missing something, I'm just tracing gears with softpipe > and realised the map/unmap > pairs occur due to the Material calls but they don't actually seem to > modify the VBOs. That's weird. I thought that material changes would just translate to constant buffer updates. It looks like we replace the glBegin .. glEnd with VBOs, but we still translate Material calls literally. Now that no Mesa driver handles fixed function directly, we could probably simplify some of these things. Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] vbo code and flush explicit mapping
On 04/30/2013 09:03 AM, Jose Fonseca wrote: - Original Message - On Mon, Apr 29, 2013 at 2:52 PM, Dave Airlie wrote: I've been playing with a gallium driver, and started looking at some wierd gears behaviour, The vbo code maps the buffer with GL_MAP_FLUSH_EXPLICIT_BIT which to me requires that at some point we call ctx->Driver.FlushMappedBufferRange. Now the code attempts to call it in vbo_exec_vtx_unmap but from what I can see the length is always 0 in there and so we never get called. GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float); but I can not see buffer_ptr != buffer_map when we hit this code, I'm a bit lost at this point, maybe someone else might know more. Okay maybe I'm missing something, I'm just tracing gears with softpipe and realised the map/unmap pairs occur due to the Material calls but they don't actually seem to modify the VBOs. That's weird. I thought that material changes would just translate to constant buffer updates. It looks like we replace the glBegin .. glEnd with VBOs, but we still translate Material calls literally. Now that no Mesa driver handles fixed function directly, we could probably simplify some of these things. glMaterial is complicated. It's handled differently depending on whether it's called inside or outside glBegin/End or in a display list. Even though I've been working in the VBO code lately, it would take me a while to re-learn how glMaterial is handled. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] draw: don't crash if GS doesn't emit anything
> > What is "IA"? > > "Input Assembly", a D3D10 term that roughly matches pipe_vertex_elements / > pipe_vertex_buffer state. > > BTW, I think that Chris Forbes makes a good point -- a GS might choose to > not emit any vertices (e.g., that does fancy culling) -- so maybe this > debugging message should be silent by default. That's still a silly thing to do, because you're running a high cost operation on the slowest part of the pipeline. Ideally we'd connect to arb_debug_output for this stuff, but currently the most frequent cause of gs/ia not emitting any primitives is that something broke (e.g. one of our instructions is not fully compliant causing vs or gs errors in outputs) and the debug message is quite helpful in figuring that out. Of course I don't have a strong sentimental attachment to a debugging output but this particular one does make my job easier. z ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/7] i965/vs: Do round-robin register allocation on gen6+ like we do in the FS.
This will free instruction scheduling to make better choices. No statistically significant performance difference on GLB2.7 (n=93). --- src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp |4 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index ac3d401..7149d46 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -102,6 +102,8 @@ brw_alloc_reg_set_for_classes(struct brw_context *brw, int class_count, int base_reg_count) { + struct intel_context *intel = &brw->intel; + /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; for (int i = 0; i < class_count; i++) { @@ -112,6 +114,8 @@ brw_alloc_reg_set_for_classes(struct brw_context *brw, brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); ralloc_free(brw->vs.regs); brw->vs.regs = ra_alloc_reg_set(brw, ra_reg_count); + if (intel->gen >= 6) + ra_set_allocate_round_robin(brw->vs.regs); ralloc_free(brw->vs.classes); brw->vs.classes = ralloc_array(brw, int, class_count + 1); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/7] i965: Share the register file enum between the two backends.
I need this so I can look at vec4 and fs registers' files from the same .cpp file without namespaces. As far as I can tell we never rely on the particular numerical values of the files, though I thought it sounded like a good idea when doing the VS (it turns out having 0 be BAD_FILE is nicer). --- src/mesa/drivers/dri/i965/brw_fs.cpp | 12 ++-- src/mesa/drivers/dri/i965/brw_fs.h | 10 -- src/mesa/drivers/dri/i965/brw_fs_emit.cpp |2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |4 ++-- src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp |8 src/mesa/drivers/dri/i965/brw_shader.h | 11 +++ src/mesa/drivers/dri/i965/brw_vec4.h | 11 --- 7 files changed, 24 insertions(+), 34 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9a76408..8411675 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -218,7 +218,7 @@ fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition) */ if (intel->gen == 4) { dst.type = src0.type; - if (dst.file == FIXED_HW_REG) + if (dst.file == HW_REG) dst.fixed_hw_reg.type = dst.type; } @@ -405,7 +405,7 @@ fs_reg::fs_reg(uint32_t u) fs_reg::fs_reg(struct brw_reg fixed_hw_reg) { init(); - this->file = FIXED_HW_REG; + this->file = HW_REG; this->fixed_hw_reg = fixed_hw_reg; this->type = fixed_hw_reg.type; } @@ -1212,7 +1212,7 @@ fs_visitor::assign_curb_setup() constant_nr / 8, constant_nr % 8); - inst->src[i].file = FIXED_HW_REG; + inst->src[i].file = HW_REG; inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type); } } @@ -1280,12 +1280,12 @@ fs_visitor::assign_urb_setup() fs_inst *inst = (fs_inst *)node; if (inst->opcode == FS_OPCODE_LINTERP) { -assert(inst->src[2].file == FIXED_HW_REG); +assert(inst->src[2].file == HW_REG); inst->src[2].fixed_hw_reg.nr += urb_start; } if (inst->opcode == FS_OPCODE_CINTERP) { -assert(inst->src[0].file == FIXED_HW_REG); +assert(inst->src[0].file == HW_REG); inst->src[0].fixed_hw_reg.nr += urb_start; } } @@ -2402,7 +2402,7 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps, int grf; if (inst->src[i].file == GRF) { grf = inst->src[i].reg; - } else if (inst->src[i].file == FIXED_HW_REG && + } else if (inst->src[i].file == HW_REG && inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { grf = inst->src[i].fixed_hw_reg.nr; } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index bf76357..436a97a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -55,16 +55,6 @@ namespace { struct acp_entry; } -enum register_file { - BAD_FILE, - ARF, - GRF, - MRF, - IMM, - FIXED_HW_REG, /* a struct brw_reg */ - UNIFORM, /* prog_data->params[reg] */ -}; - class fs_reg { public: /* Callers of this ralloc-based new need not call delete. It's diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 0f6b715..5a5044e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -854,7 +854,7 @@ brw_reg_from_fs_reg(fs_reg *reg) break; } break; - case FIXED_HW_REG: + case HW_REG: brw_reg = reg->fixed_hw_reg; break; case BAD_FILE: diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index b9b0303..fa1a938 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -258,7 +258,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, * the start (see interp_reg()). */ for (int i = 0; i < 3; i++) { - if (inst->src[i].file == FIXED_HW_REG && + if (inst->src[i].file == HW_REG && inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { int node_nr = inst->src[i].fixed_hw_reg.nr / reg_width; if (node_nr >= payload_node_count) @@ -288,7 +288,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, */ if (intel->gen >= 6) { int delta_x_arg = 0; -if (inst->src[delta_x_arg].file == FIXED_HW_REG && +if (inst->src[delta_x_arg].file == HW_REG && inst->src[delta_x_arg].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { int secha
[Mesa-dev] [PATCH 2/7] i965: Make dump_instructions be a virtual method of the visitor.
--- src/mesa/drivers/dri/i965/brw_fs.cpp | 15 +++ src/mesa/drivers/dri/i965/brw_fs.h |3 +-- src/mesa/drivers/dri/i965/brw_shader.cpp | 11 +++ src/mesa/drivers/dri/i965/brw_shader.h |3 +++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 15 +++ src/mesa/drivers/dri/i965/brw_vec4.h |3 +-- 6 files changed, 22 insertions(+), 28 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a8610ee..9a76408 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2665,8 +2665,10 @@ fs_visitor::lower_uniform_pull_constant_loads() } void -fs_visitor::dump_instruction(fs_inst *inst) +fs_visitor::dump_instruction(backend_instruction *be_inst) { + fs_inst *inst = (fs_inst *)be_inst; + if (inst->predicate) { printf("(%cf0.%d) ", inst->predicate_inverse ? '-' : '+', @@ -2769,17 +2771,6 @@ fs_visitor::dump_instruction(fs_inst *inst) printf("\n"); } -void -fs_visitor::dump_instructions() -{ - int ip = 0; - foreach_list(node, &this->instructions) { - fs_inst *inst = (fs_inst *)node; - printf("%d: ", ip++); - dump_instruction(inst); - } -} - /** * Possibly returns an instruction that set up @param reg. * diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index c9c9856..bf76357 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -422,8 +422,7 @@ public: void setup_builtin_uniform_values(ir_variable *ir); int implied_mrf_writes(fs_inst *inst); - void dump_instructions(); - void dump_instruction(fs_inst *inst); + void dump_instruction(backend_instruction *inst); struct gl_fragment_program *fp; struct brw_wm_compile *c; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index a820952..9968ee5 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -553,3 +553,14 @@ backend_instruction::is_control_flow() return false; } } + +void +backend_visitor::dump_instructions() +{ + int ip = 0; + foreach_list(node, &this->instructions) { + backend_instruction *inst = (backend_instruction *)node; + printf("%d: ", ip++); + dump_instruction(inst); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 5189fdc..4b2b399 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -56,6 +56,9 @@ public: * backend_instruction) */ exec_list instructions; + + virtual void dump_instruction(backend_instruction *inst) = 0; + void dump_instructions(); }; int brw_type_for_base_type(const struct glsl_type *type); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index ab4668f..a3ae4a1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1064,8 +1064,10 @@ vec4_visitor::split_virtual_grfs() } void -vec4_visitor::dump_instruction(vec4_instruction *inst) +vec4_visitor::dump_instruction(backend_instruction *be_inst) { + vec4_instruction *inst = (vec4_instruction *)be_inst; + printf("%s ", brw_instruction_name(inst->opcode)); switch (inst->dst.file) { @@ -1146,17 +1148,6 @@ vec4_visitor::dump_instruction(vec4_instruction *inst) printf("\n"); } -void -vec4_visitor::dump_instructions() -{ - int ip = 0; - foreach_list_safe(node, &this->instructions) { - vec4_instruction *inst = (vec4_instruction *)node; - printf("%d: ", ip++); - dump_instruction(inst); - } -} - /** * Replace each register of type ATTR in this->instructions with a reference * to a fixed HW register. diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index a4fca2d..cb97a86 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -477,8 +477,7 @@ public: bool process_move_condition(ir_rvalue *ir); - void dump_instruction(vec4_instruction *inst); - void dump_instructions(); + void dump_instruction(backend_instruction *inst); protected: void emit_vertex(); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/7] i965: Pull a couple of FS scheduling functions out to methods.
These will get virtualized as we add VS scheduling support. --- .../drivers/dri/i965/brw_schedule_instructions.cpp | 132 1 file changed, 77 insertions(+), 55 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 5affedf..af8af1d 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -364,6 +364,17 @@ public: void calculate_deps(); void schedule_instructions(fs_inst *next_block_header); + schedule_node *choose_instruction_to_schedule(); + + /** +* Returns how many cycles it takes the instruction to issue. +* +* Instructions in gen hardware are handled one simd4 vector at a time, +* with 1 cycle per vector dispatched. Thus 8-wide pixel shaders take 2 +* cycles to dispatch and 16-wide (compressed) instructions take 4. +*/ + int issue_time(fs_inst *inst); + bool is_compressed(fs_inst *inst); void *mem_ctx; @@ -709,6 +720,67 @@ instruction_scheduler::calculate_deps() } } +schedule_node * +instruction_scheduler::choose_instruction_to_schedule() +{ + schedule_node *chosen = NULL; + + if (post_reg_alloc) { + int chosen_time = 0; + + /* Of the instructions closest ready to execute or the closest to + * being ready, choose the oldest one. + */ + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; + + if (!chosen || n->unblocked_time < chosen_time) { +chosen = n; +chosen_time = n->unblocked_time; + } + } + } else { + /* Before register allocation, we don't care about the latencies of + * instructions. All we care about is reducing live intervals of + * variables so that we can avoid register spilling, or get 16-wide + * shaders which naturally do a better job of hiding instruction + * latency. + * + * To do so, schedule our instructions in a roughly LIFO/depth-first + * order: when new instructions become available as a result of + * scheduling something, choose those first so that our result + * hopefully is consumed quickly. + * + * The exception is messages that generate more than one result + * register (AKA texturing). In those cases, the LIFO search would + * normally tend to choose them quickly (because scheduling the + * previous message not only unblocked the children using its result, + * but also the MRF setup for the next sampler message, which in turn + * unblocks the next sampler message). + */ + for (schedule_node *node = (schedule_node *)instructions.get_tail(); + node != instructions.get_head()->prev; + node = (schedule_node *)node->prev) { + schedule_node *n = (schedule_node *)node; + + chosen = n; + if (chosen->inst->regs_written <= 1) +break; + } + } + + return chosen; +} + +int +instruction_scheduler::issue_time(fs_inst *inst) +{ + if (is_compressed(inst)) + return 4; + else + return 2; +} + void instruction_scheduler::schedule_instructions(fs_inst *next_block_header) { @@ -722,52 +794,7 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) } while (!instructions.is_empty()) { - schedule_node *chosen = NULL; - int chosen_time = 0; - - if (post_reg_alloc) { - /* Of the instructions closest ready to execute or the closest to - * being ready, choose the oldest one. - */ - foreach_list(node, &instructions) { -schedule_node *n = (schedule_node *)node; - -if (!chosen || n->unblocked_time < chosen_time) { - chosen = n; - chosen_time = n->unblocked_time; -} - } - } else { - /* Before register allocation, we don't care about the latencies of - * instructions. All we care about is reducing live intervals of - * variables so that we can avoid register spilling, or get 16-wide - * shaders which naturally do a better job of hiding instruction - * latency. - * - * To do so, schedule our instructions in a roughly LIFO/depth-first - * order: when new instructions become available as a result of - * scheduling something, choose those first so that our result - * hopefully is consumed quickly. - * - * The exception is messages that generate more than one result - * register (AKA texturing). In those cases, the LIFO search would - * normally tend to choose them quickly (because scheduling the - * previous message not only unblocked the children using its result, - * but also the MRF setup for the next sampler message, which in turn - * unblocks the next
[Mesa-dev] [PATCH 7/7] i965/vs: Add instruction scheduling.
While this is ignorant of dependency control, it's still good for a 0.39% +/- 0.08% performance improvement on GLBenchmark 2.7 (n=548) v2: Rewrite as a subclass of the base class for the FS instruction scheduler, inheriting the same latency information. --- .../drivers/dri/i965/brw_schedule_instructions.cpp | 219 src/mesa/drivers/dri/i965/brw_vec4.cpp |9 + src/mesa/drivers/dri/i965/brw_vec4.h |1 + 3 files changed, 229 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 94fdf3e..6a52754 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -26,10 +26,13 @@ */ #include "brw_fs.h" +#include "brw_vec4.h" #include "glsl/glsl_types.h" #include "glsl/ir_optimization.h" #include "glsl/ir_print_visitor.h" +using namespace brw; + /** @file brw_fs_schedule_instructions.cpp * * List scheduling of FS instructions. @@ -297,6 +300,7 @@ schedule_node::set_latency_gen7(bool is_haswell) case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case VS_OPCODE_PULL_CONSTANT_LOAD: /* testing using varying-index pull constants: * * 16 cycles: @@ -405,6 +409,23 @@ fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v, { } +class vec4_instruction_scheduler : public instruction_scheduler +{ +public: + vec4_instruction_scheduler(vec4_visitor *v, int grf_count); + void calculate_deps(); + schedule_node *choose_instruction_to_schedule(); + int issue_time(backend_instruction *inst); + vec4_visitor *v; +}; + +vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v, + int grf_count) + : instruction_scheduler(v, grf_count, true), + v(v) +{ +} + void instruction_scheduler::add_inst(backend_instruction *inst) { @@ -739,6 +760,163 @@ fs_instruction_scheduler::calculate_deps() } } +void +vec4_instruction_scheduler::calculate_deps() +{ + schedule_node *last_grf_write[grf_count]; + schedule_node *last_mrf_write[BRW_MAX_MRF]; + schedule_node *last_conditional_mod = NULL; + /* Fixed HW registers are assumed to be separate from the virtual +* GRFs, so they can be tracked separately. We don't really write +* to fixed GRFs much, so don't bother tracking them on a more +* granular level. +*/ + schedule_node *last_fixed_grf_write = NULL; + + /* The last instruction always needs to still be the last instruction. +* Either it's flow control (IF, ELSE, ENDIF, DO, WHILE) and scheduling +* other things after it would disturb the basic block, or it's the EOT +* URB_WRITE and we should do a better job at dead code eliminating +* anything that could have been scheduled after it. +*/ + schedule_node *last = (schedule_node *)instructions.get_tail(); + add_barrier_deps(last); + + memset(last_grf_write, 0, sizeof(last_grf_write)); + memset(last_mrf_write, 0, sizeof(last_mrf_write)); + + /* top-to-bottom dependencies: RAW and WAW. */ + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; + vec4_instruction *inst = (vec4_instruction *)n->inst; + + /* read-after-write deps. */ + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { +add_dep(last_grf_write[inst->src[i].reg], n); + } else if (inst->src[i].file == HW_REG && +(inst->src[i].fixed_hw_reg.file == + BRW_GENERAL_REGISTER_FILE)) { +add_dep(last_fixed_grf_write, n); + } else if (inst->src[i].file != BAD_FILE && +inst->src[i].file != IMM && +inst->src[i].file != UNIFORM) { +/* No reads from MRF, and ATTR is already translated away */ +assert(inst->src[i].file != MRF && + inst->src[i].file != ATTR); +add_barrier_deps(n); + } + } + + for (int i = 0; i < inst->mlen; i++) { + /* It looks like the MRF regs are released in the send + * instruction once it's sent, not when the result comes + * back. + */ + add_dep(last_mrf_write[inst->base_mrf + i], n); + } + + if (inst->predicate) { + assert(last_conditional_mod); + add_dep(last_conditional_mod, n); + } + + /* write-after-write deps. */ + if (inst->dst.file == GRF) { + add_dep(last_grf_write[inst->dst.reg], n); + last_grf_write[inst->dst.reg] = n; + } else if (inst->dst.file == MRF) { + add_dep(last_mrf_write[inst->dst.reg], n); + last_mrf_write[inst->dst.reg] = n; + } else if (inst->dst.file == HW_REG && + inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + last_fixed_grf_
[Mesa-dev] [PATCH 6/7] i965: Move most of the FS instruction scheduler code to a general class.
About half of this is shareable with the VS code. --- .../drivers/dri/i965/brw_schedule_instructions.cpp | 124 1 file changed, 75 insertions(+), 49 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index af8af1d..94fdf3e 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -57,7 +57,7 @@ static bool debug = false; class schedule_node : public exec_node { public: - schedule_node(fs_inst *inst, const struct intel_context *intel) + schedule_node(backend_instruction *inst, const struct intel_context *intel) { this->inst = inst; this->child_array_size = 0; @@ -79,7 +79,7 @@ public: void set_latency_gen4(); void set_latency_gen7(bool is_haswell); - fs_inst *inst; + backend_instruction *inst; schedule_node **children; int *child_latency; int child_count; @@ -341,15 +341,15 @@ schedule_node::set_latency_gen7(bool is_haswell) class instruction_scheduler { public: - instruction_scheduler(fs_visitor *v, void *mem_ctx, int grf_count, - bool post_reg_alloc) + instruction_scheduler(backend_visitor *v, int grf_count, bool post_reg_alloc) { - this->v = v; - this->mem_ctx = ralloc_context(mem_ctx); + this->bv = v; + this->mem_ctx = ralloc_context(v->mem_ctx); this->grf_count = grf_count; this->instructions.make_empty(); this->instructions_to_schedule = 0; this->post_reg_alloc = post_reg_alloc; + this->time = 0; } ~instruction_scheduler() @@ -360,11 +360,10 @@ public: void add_dep(schedule_node *before, schedule_node *after, int latency); void add_dep(schedule_node *before, schedule_node *after); - void add_inst(fs_inst *inst); - void calculate_deps(); - void schedule_instructions(fs_inst *next_block_header); - - schedule_node *choose_instruction_to_schedule(); + void run(exec_list *instructions); + void add_inst(backend_instruction *inst); + virtual void calculate_deps() = 0; + virtual schedule_node *choose_instruction_to_schedule() = 0; /** * Returns how many cycles it takes the instruction to issue. @@ -373,23 +372,43 @@ public: * with 1 cycle per vector dispatched. Thus 8-wide pixel shaders take 2 * cycles to dispatch and 16-wide (compressed) instructions take 4. */ - int issue_time(fs_inst *inst); + virtual int issue_time(backend_instruction *inst) = 0; - bool is_compressed(fs_inst *inst); + void schedule_instructions(backend_instruction *next_block_header); void *mem_ctx; bool post_reg_alloc; int instructions_to_schedule; int grf_count; + int time; exec_list instructions; + backend_visitor *bv; +}; + +class fs_instruction_scheduler : public instruction_scheduler +{ +public: + fs_instruction_scheduler(fs_visitor *v, int grf_count, bool post_reg_alloc); + void calculate_deps(); + bool is_compressed(fs_inst *inst); + schedule_node *choose_instruction_to_schedule(); + int issue_time(backend_instruction *inst); fs_visitor *v; }; +fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v, + int grf_count, + bool post_reg_alloc) + : instruction_scheduler(v, grf_count, post_reg_alloc), + v(v) +{ +} + void -instruction_scheduler::add_inst(fs_inst *inst) +instruction_scheduler::add_inst(backend_instruction *inst) { - schedule_node *n = new(mem_ctx) schedule_node(inst, v->intel); + schedule_node *n = new(mem_ctx) schedule_node(inst, bv->intel); assert(!inst->is_head_sentinel()); assert(!inst->is_tail_sentinel()); @@ -480,7 +499,7 @@ instruction_scheduler::add_barrier_deps(schedule_node *n) * actually writes 2 MRFs. */ bool -instruction_scheduler::is_compressed(fs_inst *inst) +fs_instruction_scheduler::is_compressed(fs_inst *inst) { return (v->dispatch_width == 16 && !inst->force_uncompressed && @@ -488,7 +507,7 @@ instruction_scheduler::is_compressed(fs_inst *inst) } void -instruction_scheduler::calculate_deps() +fs_instruction_scheduler::calculate_deps() { /* Pre-register-allocation, this tracks the last write per VGRF (so * different reg_offsets within it can interfere when they shouldn't). @@ -521,7 +540,7 @@ instruction_scheduler::calculate_deps() /* top-to-bottom dependencies: RAW and WAW. */ foreach_list(node, &instructions) { schedule_node *n = (schedule_node *)node; - fs_inst *inst = n->inst; + fs_inst *inst = (fs_inst *)n->inst; if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT) add_barrier_deps(n); @@ -629,7 +648,7 @@ instruction_scheduler::calculate_deps() !node->is_head_sentinel(); node = prev, prev = node->prev) { schedule_node *n = (schedule_no
Re: [Mesa-dev] [PATCH 2/2] mesa/program: Don't copy propagate from swizzles.
On 04/24/2013 04:30 PM, Fabian Bieler wrote: Do not propagate a copy if source and destination are identical. Otherwise code like MOV TEMP[0].xyzw, TEMP[0].wzyx MOV TEMP[1].xyzw, TEMP[0].xyzw is changed to MOV TEMP[0].xyzw, TEMP[0].wzyx MOV TEMP[1].xyzw, TEMP[0].wzyx This fixes Piglit test shaders/glsl-copy-propagation-self-2 for drivers that use Mesa IR. D'oh. Good catch. Please add NOTE: This is a candidate for stable branches. to the commit message. Reviewed-by: Ian Romanick --- src/mesa/program/ir_to_mesa.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 14cf5ba..dff1762 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2757,6 +2757,8 @@ ir_to_mesa_visitor::copy_propagate(void) /* If this is a copy, add it to the ACP. */ if (inst->op == OPCODE_MOV && inst->dst.file == PROGRAM_TEMPORARY && + !(inst->dst.file == inst->src[0].file && + inst->dst.index == inst->src[0].index) && !inst->dst.reladdr && !inst->saturate && !inst->src[0].reladdr && ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3 v2] mesa: Add a script to generate the list of fixed bugs
On 04/18/2013 12:38 AM, Andreas Boll wrote: This list appears in the fixed bugs section of the release notes. v2: Add usage examples NOTE: This is a candidate for the stable branches. The series is Reviewed-by: Ian Romanick --- bin/bugzilla_mesa.sh | 52 ++ 1 file changed, 52 insertions(+) create mode 100755 bin/bugzilla_mesa.sh diff --git a/bin/bugzilla_mesa.sh b/bin/bugzilla_mesa.sh new file mode 100755 index 000..491ca0e --- /dev/null +++ b/bin/bugzilla_mesa.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# This script is used to generate the list of fixed bugs that +# appears in the release notes files, with HTML formatting. +# +# Note: This script could take a while until all details have +# been fetched from bugzilla. +# +# Usage examples: +# +# $ bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 +# $ bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 > bugfixes +# $ bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 | tee bugfixes +# $ DRYRUN=yes bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 +# $ DRYRUN=yes bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 | wc -l + + +# regex pattern: trim before url +trim_before='s/.*\(http\)/\1/' + +# regex pattern: trim after url +trim_after='s/\(show_bug.cgi?id=[0-9]*\).*/\1/' + +# regex pattern: always use https +use_https='s/http:/https:/' + +# extract fdo urls from commit log +urls=$(git log $* | grep 'bugs.freedesktop.org/show_bug' | sed -e $trim_before -e $trim_after -e $use_https | sort | uniq) + +# if DRYRUN is set to "yes", simply print the URLs and don't fetch the +# details from fdo bugzilla. +#DRYRUN=yes + +if [ "x$DRYRUN" = xyes ]; then + for i in $urls + do + echo $i + done +else + echo "" + echo "" + + for i in $urls + do + id=$(echo $i | cut -d'=' -f2) + summary=$(wget --quiet -O - $i | grep -e '.*' | sed -e 's/ *Bug [0-9]\+ – \(.*\)<\/title>/\1/') + echo "Bug $id - $summary" + echo "" + done + + echo "" +fi ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [GSoC 13]Interested in the idea: Improved application of GLSL complier optimizations
On 04/19/2013 06:04 AM, pkucoin wrote: Hi, My name is Sida Li and I am a senior student from Peking University in China. I am interested in the idea that improved application of GLSL complier optimizations. I have downloaded the source code and read some parts of the it. First let me talk about my understanding about the problem. The loop we try to improve should be in the main.cpp: do { progress = do_common_optimization(shader->ir, false, false, 32); } while (progress); In the function do_common_optimization, all optimization passes are called in a certain order. As the loop goes in a few times, some optimization passes do not contribute to optimizing the code since the original unoptimized code has been optimized and no more code that can be optimized by these optimization passes is generated by other optimization passes. So time spending on these optimization passes is wasted. The goal is to find a static ordering, with possible repeats, of optimization passes that does not compromise the quality of the generated code. I find it difficult to start because a single optimization pass or a combo of optimization passes may generate some code that can be optimized by other optimization pass, which can not be decided before. Since I suggested the project, I'll offer some opinions. There are a few ways to attack this problem, I think. Certain optimizations can generate opportunities for other optimizations to do work. For example, copy propagation can "create" dead code. Using this sort of analysis, it should be possible to come up with an initial ordering of passes that still uses the "while progress" loop. Once that is done, it should be possible to instrument the code to see which optimization passes make progress on each pass. This will let us know, for example, if do_algebraic never does anything after the first pass. Then it's a matter of running the compiler on a large set of shaders and seeing what happens. Can you give me some advice? Best regards, Sida Li ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] st/vdpau: fix background handling in the mixer
From: Christian König Signed-off-by: Christian König --- src/gallium/state_trackers/vdpau/mixer.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c index 1d2ae49..26db5c8 100644 --- a/src/gallium/state_trackers/vdpau/mixer.c +++ b/src/gallium/state_trackers/vdpau/mixer.c @@ -221,7 +221,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, vlVdpVideoMixer *vmixer; vlVdpSurface *surf; - vlVdpOutputSurface *dst; + vlVdpOutputSurface *dst, *bg = NULL; struct vl_compositor *compositor; @@ -250,20 +250,21 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, if (!dst) return VDP_STATUS_INVALID_HANDLE; - pipe_mutex_lock(vmixer->device->mutex); - vlVdpResolveDelayedRendering(vmixer->device, NULL, NULL); if (background_surface != VDP_INVALID_HANDLE) { - vlVdpOutputSurface *bg = vlGetDataHTAB(background_surface); - if (!bg) { - pipe_mutex_unlock(vmixer->device->mutex); + bg = vlGetDataHTAB(background_surface); + if (!bg) return VDP_STATUS_INVALID_HANDLE; - } - vl_compositor_set_rgba_layer(&vmixer->cstate, compositor, layer++, bg->sampler_view, - RectToPipe(background_source_rect, &rect), NULL, NULL); } + pipe_mutex_lock(vmixer->device->mutex); + vlVdpResolveDelayedRendering(vmixer->device, NULL, NULL); + vl_compositor_clear_layers(&vmixer->cstate); + if (bg) + vl_compositor_set_rgba_layer(&vmixer->cstate, compositor, layer++, bg->sampler_view, + RectToPipe(background_source_rect, &rect), NULL, NULL); + switch (current_picture_structure) { case VDP_VIDEO_MIXER_PICTURE_STRUCTURE_TOP_FIELD: deinterlace = VL_COMPOSITOR_BOB_TOP; -- 1.7.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] vl/buffer: use 2D_ARRAY instead of 3D textures
From: Christian König Signed-off-by: Christian König --- src/gallium/auxiliary/vl/vl_compositor.c| 12 ++-- src/gallium/auxiliary/vl/vl_video_buffer.c | 23 --- src/gallium/auxiliary/vl/vl_video_buffer.h |7 --- src/gallium/drivers/r600/r600_uvd.c | 14 +++--- src/gallium/drivers/radeonsi/radeonsi_uvd.c | 14 +++--- src/gallium/state_trackers/vdpau/surface.c |8 6 files changed, 40 insertions(+), 38 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 1c393a9..62f593a 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -152,7 +152,7 @@ create_frag_shader_video_buffer(struct vl_compositor *c) * fragment = csc * texel */ for (i = 0; i < 3; ++i) - ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc, sampler[i]); + ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D_ARRAY, tc, sampler[i]); ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); @@ -207,7 +207,7 @@ create_frag_shader_weave(struct vl_compositor *c) i_tc[i], ureg_imm1f(shader, 0.5f)); ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i])); ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W), - ureg_imm1f(shader, i ? -0.25f : 0.25f)); + ureg_imm1f(shader, i ? 1.0f : 0.0f)); ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f)); ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y), @@ -227,7 +227,7 @@ create_frag_shader_weave(struct vl_compositor *c) TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j), - TGSI_TEXTURE_3D, src, sampler[j]); + TGSI_TEXTURE_2D_ARRAY, src, sampler[j]); } /* calculate linear interpolation factor @@ -558,7 +558,7 @@ static INLINE struct u_rect default_rect(struct vl_compositor_layer *layer) { struct pipe_resource *res = layer->sampler_views[0]->texture; - struct u_rect rect = { 0, res->width0, 0, res->height0 * res->depth0 }; + struct u_rect rect = { 0, res->width0, 0, res->height0 * res->array_size }; return rect; } @@ -902,14 +902,14 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s, break; case VL_COMPOSITOR_BOB_TOP: - s->layers[layer].zw.x = 0.25f; + s->layers[layer].zw.x = 0.0f; s->layers[layer].src.tl.y += half_a_line; s->layers[layer].src.br.y += half_a_line; s->layers[layer].fs = c->fs_video_buffer; break; case VL_COMPOSITOR_BOB_BOTTOM: - s->layers[layer].zw.x = 0.75f; + s->layers[layer].zw.x = 1.0f; s->layers[layer].src.tl.y -= half_a_line; s->layers[layer].src.br.y -= half_a_line; s->layers[layer].fs = c->fs_video_buffer; diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c index d61dab2..220c3ea 100644 --- a/src/gallium/auxiliary/vl/vl_video_buffer.c +++ b/src/gallium/auxiliary/vl/vl_video_buffer.c @@ -216,15 +216,16 @@ void vl_vide_buffer_template(struct pipe_resource *templ, const struct pipe_video_buffer *tmpl, enum pipe_format resource_format, -unsigned depth, unsigned usage, unsigned plane) +unsigned array_size, unsigned usage, +unsigned plane) { memset(templ, 0, sizeof(*templ)); - templ->target = depth > 1 ? PIPE_TEXTURE_3D : PIPE_TEXTURE_2D; + templ->target = array_size > 1 ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D; templ->format = resource_format; templ->width0 = tmpl->width; templ->height0 = tmpl->height; - templ->depth0 = depth; - templ->array_size = 1; + templ->depth0 = 1; + templ->array_size = array_size; templ->bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; templ->usage = usage; @@ -349,15 +350,15 @@ vl_video_buffer_surfaces(struct pipe_video_buffer *buffer) struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer; struct pipe_surface surf_templ; struct pipe_context *pipe; - unsigned i, j, depth, surf; + unsigned i, j, array_size, surf; assert(buf); pipe = buf->base.context; - depth = buffer->interlaced ? 2 : 1; + array_size = buffer->interlaced ? 2 : 1; for (i = 0, surf = 0; i < VL_NUM_COMPONENTS; ++i) { - for (j = 0; j < depth; ++j, ++surf) { + for (j = 0; j < array_size; ++j, ++surf) { assert(surf < (VL_NUM_COMPONENTS * 2)); if (!buf->resources[i]) { @@ -433,7 +434,7 @@ st
[Mesa-dev] [PATCH 1/3] vl/compositor: cleanup background clearing
From: Christian König Add an extra parameter to specify if we should clear the render target. Signed-off-by: Christian König --- src/gallium/auxiliary/vl/vl_compositor.c|7 --- src/gallium/auxiliary/vl/vl_compositor.h|3 ++- src/gallium/state_trackers/vdpau/device.c |2 +- src/gallium/state_trackers/vdpau/mixer.c|2 +- src/gallium/state_trackers/vdpau/output.c | 12 src/gallium/state_trackers/vdpau/presentation.c |2 +- src/gallium/state_trackers/xvmc/surface.c |2 +- 7 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 0df2b57..1c393a9 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -986,7 +986,8 @@ void vl_compositor_render(struct vl_compositor_state *s, struct vl_compositor *c, struct pipe_surface*dst_surface, - struct u_rect *dirty_area) + struct u_rect *dirty_area, + boolclear_dirty) { assert(c); assert(dst_surface); @@ -1004,8 +1005,8 @@ vl_compositor_render(struct vl_compositor_state *s, gen_vertex_data(c, s, dirty_area); - if (dirty_area && (dirty_area->x0 < dirty_area->x1 || - dirty_area->y0 < dirty_area->y1)) { + if (clear_dirty && dirty_area && + (dirty_area->x0 < dirty_area->x1 || dirty_area->y0 < dirty_area->y1)) { c->pipe->clear_render_target(c->pipe, dst_surface, &s->clear_color, 0, 0, dst_surface->width, dst_surface->height); diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 6de6ca0..2a1f66c 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -224,7 +224,8 @@ void vl_compositor_render(struct vl_compositor_state *state, struct vl_compositor *compositor, struct pipe_surface*dst_surface, - struct u_rect *dirty_area); + struct u_rect *dirty_area, + boolclear_dirty); /** * destroy this compositor diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c index dd586f5..c530f43 100644 --- a/src/gallium/state_trackers/vdpau/device.c +++ b/src/gallium/state_trackers/vdpau/device.c @@ -279,7 +279,7 @@ vlVdpResolveDelayedRendering(vlVdpDevice *dev, struct pipe_surface *surface, str dirty_area = &vlsurface->dirty_area; } - vl_compositor_render(cstate, &dev->compositor, surface, dirty_area); + vl_compositor_render(cstate, &dev->compositor, surface, dirty_area, true); dev->delayed_rendering.surface = VDP_INVALID_HANDLE; dev->delayed_rendering.cstate = NULL; diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c index 81a5c29..1d2ae49 100644 --- a/src/gallium/state_trackers/vdpau/mixer.c +++ b/src/gallium/state_trackers/vdpau/mixer.c @@ -312,7 +312,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer, if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter) vlVdpSave4DelayedRendering(vmixer->device, destination_surface, &vmixer->cstate); else { - vl_compositor_render(&vmixer->cstate, compositor, dst->surface, &dst->dirty_area); + vl_compositor_render(&vmixer->cstate, compositor, dst->surface, &dst->dirty_area, true); /* applying the noise reduction after scaling is actually not very clever, but currently we should avoid to copy around the image diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c index df0f458..7266cdb 100644 --- a/src/gallium/state_trackers/vdpau/output.c +++ b/src/gallium/state_trackers/vdpau/output.c @@ -382,8 +382,7 @@ vlVdpOutputSurfacePutBitsIndexed(VdpOutputSurface surface, vl_compositor_clear_layers(cstate); vl_compositor_set_palette_layer(cstate, compositor, 0, sv_idx, sv_tbl, NULL, NULL, false); vl_compositor_set_layer_dst_area(cstate, 0, RectToPipe(destination_rect, &dst_rect)); - vl_compositor_render(cstate, compositor, vlsurface->surface, NULL); - vl_compositor_reset_dirty_area(&vlsurface->dirty_area); + vl_compositor_render(cstate, compositor, vlsurface->surface, &vlsurface->dirty_area, false); pipe_sampler_view_reference(&sv_idx, NULL); pipe_sampler_view_reference(&sv_tbl, NULL); @@ -489,8 +488,7 @@ vlVdpOutputSurfacePutBitsYCbCr(VdpOutputSurface surface, vl_compositor_clear_layers(cstate); vl_compositor_set_buffer_layer(cstate, compositor, 0, vbuffer, NULL, NULL, VL_COMPOSITOR_WEAVE); vl_compositor_set_layer_dst_area(cstate, 0, RectToPip
Re: [Mesa-dev] GSOC 2013
On 04/20/2013 10:26 PM, Piyush Tiwari wrote: Hello, I am really interested in doing the GSOC 2013 project "Find common patterns in real GLSL shaders". Implementation: Algorithm:- Max-miner algorithm as it uses the same data structure as Apriori i.e. hash tree. I've only skimmed the Bayardo paper on Max-Miner, and I think it may be overkill. It is optimized for finding very long patterns in a database. In this context "very long" is likely longer than any GLSL shader our compiler has ever encountered. That's not to say it's a bad idea, it just might be more work to implement than is necessary for this problem. Doing a quick search, I don't see any papers about applying this algorithm to this problem, so, from a pure research perspective, it may be interesting none the less. I think the difficulty of this project will be finding a representation of programs that will allow them to be mined. We need to be able to detect that "a + b * c" in one shader is the same pattern as "d + e * f" in another shader. For longer programs with lots of variables, this becomes challenging. The following implementation has been found faster than normal ways: Max-Miner uses the hash tree to quickly look up all candidate groups whose head appears in the transaction. Then, for each candidate group "g" identified, it traverses down its tail items one by one. (Efficiently mining long patterns from database). I would like some reviews on my idea. Thanks Piyush ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.
On 04/30/2013 01:52 AM, Kenneth Graunke wrote: Consider the following shader: vec4 f(vec4 v) { return v; } vec4 f(vec4 v); The prototype exactly matches the signature of the earlier definition, so there's absolutely no point in it. However, it doesn't appear to be illegal. The GLSL 4.30 specification offers two relevant quotes: "If a function name is declared twice with the same parameter types, then the return types and all qualifiers must also match, and it is the same function being declared." "User-defined functions can have multiple declarations, but only one definition." In this case the same function was declared twice, and there's only one definition, which fits both pieces of text. There doesn't appear to be any text saying late prototypes are illegal, so presumably it's valid. Unfortunately, it currently triggers an assertion failure: ir_dereference_variable @ specifies undeclared variable `v' @ When we process the second line, we look for an existing exact match so we can enforce the one-definition rule. We then leave sig set to that existing function, and hit sig->replace_parameters(&hir_parameters), unfortunately nuking our existing definition's parameters (which have actual dereferences) with the prototype's bogus unused parameters. Simply bailing out and ignoring such late prototypes is the safest thing to do. Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android. NOTE: This is a candidate for stable branches. Cc: Tapani Pälli Cc: Ian Romanick Signed-off-by: Kenneth Graunke Reviewed-by: Chad Versace ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.
Reviewed-by: Jordan Justen On Tue, Apr 30, 2013 at 1:52 AM, Kenneth Graunke wrote: > Consider the following shader: > > vec4 f(vec4 v) { return v; } > vec4 f(vec4 v); > > The prototype exactly matches the signature of the earlier definition, > so there's absolutely no point in it. However, it doesn't appear to > be illegal. The GLSL 4.30 specification offers two relevant quotes: > > "If a function name is declared twice with the same parameter types, > then the return types and all qualifiers must also match, and it is the > same function being declared." > > "User-defined functions can have multiple declarations, but only one > definition." > > In this case the same function was declared twice, and there's only one > definition, which fits both pieces of text. There doesn't appear to be > any text saying late prototypes are illegal, so presumably it's valid. > > Unfortunately, it currently triggers an assertion failure: > ir_dereference_variable @ specifies undeclared variable `v' @ > > When we process the second line, we look for an existing exact match so > we can enforce the one-definition rule. We then leave sig set to that > existing function, and hit sig->replace_parameters(&hir_parameters), > unfortunately nuking our existing definition's parameters (which have > actual dereferences) with the prototype's bogus unused parameters. > > Simply bailing out and ignoring such late prototypes is the safest > thing to do. > > Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android. > > NOTE: This is a candidate for stable branches. > Cc: Tapani Pälli > Cc: Ian Romanick > Signed-off-by: Kenneth Graunke > --- > src/glsl/ast_to_hir.cpp | 15 +++ > 1 file changed, 11 insertions(+), 4 deletions(-) > > diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp > index 2638411..e595110 100644 > --- a/src/glsl/ast_to_hir.cpp > +++ b/src/glsl/ast_to_hir.cpp > @@ -3205,10 +3205,17 @@ ast_function::hir(exec_list *instructions, > "match prototype", name); > } > > -if (is_definition && sig->is_defined) { > - YYLTYPE loc = this->get_location(); > - > - _mesa_glsl_error(& loc, state, "function `%s' redefined", name); > + if (sig->is_defined) { > +if (is_definition) { > + YYLTYPE loc = this->get_location(); > + _mesa_glsl_error(& loc, state, "function `%s' redefined", > name); > +} else { > + /* We just encountered a prototype that exactly matches a > +* function that's already been defined. This is redundant, > +* and we should ignore it. > +*/ > + return NULL; > +} > } >} > } else { > -- > 1.8.2.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] glsl: add AMD_vertex_shader_layer support
On 04/27/2013 04:32 PM, Jordan Justen wrote: This GLSL extension requires that AMD_vertex_shader_layer be enabled by the driver. Most (all?) extensions also add a preprocessor define. Can you poke at AMD's driver to see if GL_AMD_vertex_shader_layer is defined? If so, then we need to add it too. Signed-off-by: Jordan Justen --- src/glsl/builtin_variables.cpp | 31 +++ src/glsl/glsl_parser_extras.cpp |1 + src/glsl/glsl_parser_extras.h |2 ++ 3 files changed, 34 insertions(+) diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp index b0c7a20..098c3f1 100644 --- a/src/glsl/builtin_variables.cpp +++ b/src/glsl/builtin_variables.cpp @@ -39,6 +39,12 @@ generate_ARB_draw_instanced_variables(exec_list *, struct _mesa_glsl_parse_state *, bool, _mesa_glsl_parser_targets); +static void +generate_AMD_vertex_shader_layer_variables(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + bool warn, + _mesa_glsl_parser_targets target); + struct builtin_variable { enum ir_variable_mode mode; int slot; @@ -818,6 +824,8 @@ generate_130_vs_variables(exec_list *instructions, "gl_ClipDistance", clip_distance_array_type, ir_var_shader_out, VARYING_SLOT_CLIP_DIST0); + generate_AMD_vertex_shader_layer_variables(instructions, state, false, + vertex_shader); } @@ -1020,6 +1028,29 @@ generate_ARB_draw_instanced_variables(exec_list *instructions, } } +static void +generate_AMD_vertex_shader_layer_variables(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + bool warn, + _mesa_glsl_parser_targets target) +{ + /* gl_Layer is only available in the vertex shader for the +* AMD_vertex_shader_layer extension. It will also be available in the +* geometry shader when GLSL 1.50 is supported. +*/ + if (target != vertex_shader) + return; + + if (state->AMD_vertex_shader_layer_enable) { + ir_variable *inst = + add_variable(instructions, state->symbols, + "gl_Layer", glsl_type::int_type, + ir_var_shader_out, VARYING_SLOT_LAYER); + + if (warn) + inst->warn_extension = "GL_AMD_vertex_shader_layer"; + } +} static void generate_ARB_shader_stencil_export_variables(exec_list *instructions, diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 0992294..e419264 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -468,6 +468,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_shading_language_packing, true, false, true, true, false, ARB_shading_language_packing), EXT(ARB_texture_multisample,true, false, true, true, false, ARB_texture_multisample), EXT(ARB_texture_query_lod, false, false, true, true, false, ARB_texture_query_lod), + EXT(AMD_vertex_shader_layer,true, false, false, true, false, AMD_vertex_shader_layer), }; #undef EXT diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 95891b5..3386365 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -284,6 +284,8 @@ struct _mesa_glsl_parse_state { bool ARB_texture_multisample_warn; bool ARB_texture_query_lod_enable; bool ARB_texture_query_lod_warn; + bool AMD_vertex_shader_layer_enable; + bool AMD_vertex_shader_layer_warn; /*@}*/ /** Extensions supported by the OpenGL implementation. */ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] glsl: add AMD_vertex_shader_layer support
On Tue, Apr 30, 2013 at 9:57 AM, Ian Romanick wrote: > On 04/27/2013 04:32 PM, Jordan Justen wrote: >> >> This GLSL extension requires that AMD_vertex_shader_layer be >> enabled by the driver. > > Most (all?) extensions also add a preprocessor define. Can you poke at > AMD's driver to see if GL_AMD_vertex_shader_layer is defined? If so, then > we need to add it too. Unfortunately, I don't have an AMD card. Anuj tried my piglit test with his AMD card, and found the extension wasn't available. -Jordan >> Signed-off-by: Jordan Justen >> --- >> src/glsl/builtin_variables.cpp | 31 +++ >> src/glsl/glsl_parser_extras.cpp |1 + >> src/glsl/glsl_parser_extras.h |2 ++ >> 3 files changed, 34 insertions(+) >> >> diff --git a/src/glsl/builtin_variables.cpp >> b/src/glsl/builtin_variables.cpp >> index b0c7a20..098c3f1 100644 >> --- a/src/glsl/builtin_variables.cpp >> +++ b/src/glsl/builtin_variables.cpp >> @@ -39,6 +39,12 @@ generate_ARB_draw_instanced_variables(exec_list *, >> struct _mesa_glsl_parse_state *, >> bool, _mesa_glsl_parser_targets); >> >> +static void >> +generate_AMD_vertex_shader_layer_variables(exec_list *instructions, >> + struct _mesa_glsl_parse_state >> *state, >> + bool warn, >> + _mesa_glsl_parser_targets >> target); >> + >> struct builtin_variable { >> enum ir_variable_mode mode; >> int slot; >> @@ -818,6 +824,8 @@ generate_130_vs_variables(exec_list *instructions, >> "gl_ClipDistance", clip_distance_array_type, >> ir_var_shader_out, >> VARYING_SLOT_CLIP_DIST0); >> >> + generate_AMD_vertex_shader_layer_variables(instructions, state, false, >> + vertex_shader); >> } >> >> >> @@ -1020,6 +1028,29 @@ generate_ARB_draw_instanced_variables(exec_list >> *instructions, >> } >> } >> >> +static void >> +generate_AMD_vertex_shader_layer_variables(exec_list *instructions, >> + struct _mesa_glsl_parse_state >> *state, >> + bool warn, >> + _mesa_glsl_parser_targets >> target) >> +{ >> + /* gl_Layer is only available in the vertex shader for the >> +* AMD_vertex_shader_layer extension. It will also be available in the >> +* geometry shader when GLSL 1.50 is supported. >> +*/ >> + if (target != vertex_shader) >> + return; >> + >> + if (state->AMD_vertex_shader_layer_enable) { >> + ir_variable *inst = >> + add_variable(instructions, state->symbols, >> + "gl_Layer", glsl_type::int_type, >> + ir_var_shader_out, VARYING_SLOT_LAYER); >> + >> + if (warn) >> + inst->warn_extension = "GL_AMD_vertex_shader_layer"; >> + } >> +} >> >> static void >> generate_ARB_shader_stencil_export_variables(exec_list *instructions, >> diff --git a/src/glsl/glsl_parser_extras.cpp >> b/src/glsl/glsl_parser_extras.cpp >> index 0992294..e419264 100644 >> --- a/src/glsl/glsl_parser_extras.cpp >> +++ b/src/glsl/glsl_parser_extras.cpp >> @@ -468,6 +468,7 @@ static const _mesa_glsl_extension >> _mesa_glsl_supported_extensions[] = { >> EXT(ARB_shading_language_packing, true, false, true, true, >> false, ARB_shading_language_packing), >> EXT(ARB_texture_multisample,true, false, true, true, >> false, ARB_texture_multisample), >> EXT(ARB_texture_query_lod, false, false, true, true, >> false, ARB_texture_query_lod), >> + EXT(AMD_vertex_shader_layer,true, false, false, true, false, >> AMD_vertex_shader_layer), >> }; >> >> #undef EXT >> diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h >> index 95891b5..3386365 100644 >> --- a/src/glsl/glsl_parser_extras.h >> +++ b/src/glsl/glsl_parser_extras.h >> @@ -284,6 +284,8 @@ struct _mesa_glsl_parse_state { >> bool ARB_texture_multisample_warn; >> bool ARB_texture_query_lod_enable; >> bool ARB_texture_query_lod_warn; >> + bool AMD_vertex_shader_layer_enable; >> + bool AMD_vertex_shader_layer_warn; >> /*@}*/ >> >> /** Extensions supported by the OpenGL implementation. */ >> > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] mesa: implement glFramebufferTexture
On 04/27/2013 04:37 PM, Jordan Justen wrote: Signed-off-by: Jordan Justen --- src/mapi/glapi/gen/GL3x.xml |2 +- src/mesa/main/fbobject.c| 17 + src/mesa/main/fbobject.h|4 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/mapi/glapi/gen/GL3x.xml b/src/mapi/glapi/gen/GL3x.xml index 9ca3d47..5078f7b 100644 --- a/src/mapi/glapi/gen/GL3x.xml +++ b/src/mapi/glapi/gen/GL3x.xml @@ -607,7 +607,7 @@ - + diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 419e871..32dcc75 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2402,6 +2402,23 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, void GLAPIENTRY +_mesa_FramebufferTexture(GLenum target, GLenum attachment, + GLuint texture, GLint level) +{ + GET_CURRENT_CONTEXT(ctx); + + if ((_mesa_is_desktop_gl(ctx) && ctx->Version >= 32) || + ctx->Extensions.ARB_geometry_shader4) { This should be if (_mesa_is_desktop_gl(ctx) && (ctx->Version >= 32 || ctx->Extensions.ARB_geometry_shader4)) If a driver sets the ARB_geometry_shader4 bit in an ES context, this function should still generate GL_INVALID_OPERATION. Actually... is the _mesa_is_desktop_gl check even necessary? This function shouldn't get put in the dispatch table for an ES context at all. Right? + framebuffer_texture(ctx, "Layer", target, attachment, 0, texture, + level, 0, GL_TRUE); + } else { + _mesa_error(ctx, GL_INVALID_OPERATION, + "unsupported function (glFramebufferTexture) called"); + } +} + + +void GLAPIENTRY _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, GLenum renderbufferTarget, GLuint renderbuffer) diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index ba013fd..2d88001 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -192,6 +192,10 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); extern void GLAPIENTRY +_mesa_FramebufferTexture(GLenum target, GLenum attachment, + GLuint texture, GLint level); + +extern void GLAPIENTRY _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: Ignore redundant prototypes after a function's been defined.
On 04/30/2013 01:52 AM, Kenneth Graunke wrote: Consider the following shader: vec4 f(vec4 v) { return v; } vec4 f(vec4 v); The prototype exactly matches the signature of the earlier definition, so there's absolutely no point in it. However, it doesn't appear to be illegal. The GLSL 4.30 specification offers two relevant quotes: "If a function name is declared twice with the same parameter types, then the return types and all qualifiers must also match, and it is the same function being declared." "User-defined functions can have multiple declarations, but only one definition." In this case the same function was declared twice, and there's only one definition, which fits both pieces of text. There doesn't appear to be any text saying late prototypes are illegal, so presumably it's valid. Unfortunately, it currently triggers an assertion failure: ir_dereference_variable @ specifies undeclared variable `v' @ OMG. I wonder if this is the bug with Second Life. https://bugs.freedesktop.org/show_bug.cgi?id=39251 https://bugs.freedesktop.org/show_bug.cgi?id=61773 When we process the second line, we look for an existing exact match so we can enforce the one-definition rule. We then leave sig set to that existing function, and hit sig->replace_parameters(&hir_parameters), unfortunately nuking our existing definition's parameters (which have actual dereferences) with the prototype's bogus unused parameters. Simply bailing out and ignoring such late prototypes is the safest thing to do. Fixes Piglit's late-proto.vert as well as 3DMark/Ice Storm for Android. NOTE: This is a candidate for stable branches. Cc: Tapani Pälli Cc: Ian Romanick Reviewed-by: Ian Romanick Signed-off-by: Kenneth Graunke --- src/glsl/ast_to_hir.cpp | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 2638411..e595110 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3205,10 +3205,17 @@ ast_function::hir(exec_list *instructions, "match prototype", name); } -if (is_definition && sig->is_defined) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, "function `%s' redefined", name); + if (sig->is_defined) { +if (is_definition) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, "function `%s' redefined", name); +} else { + /* We just encountered a prototype that exactly matches a +* function that's already been defined. This is redundant, +* and we should ignore it. +*/ + return NULL; +} } } } else { ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] mesa: implement glFramebufferTexture
On Tue, Apr 30, 2013 at 10:04 AM, Ian Romanick wrote: > On 04/27/2013 04:37 PM, Jordan Justen wrote: >> >> Signed-off-by: Jordan Justen >> --- >> src/mapi/glapi/gen/GL3x.xml |2 +- >> src/mesa/main/fbobject.c| 17 + >> src/mesa/main/fbobject.h|4 >> 3 files changed, 22 insertions(+), 1 deletion(-) >> >> diff --git a/src/mapi/glapi/gen/GL3x.xml b/src/mapi/glapi/gen/GL3x.xml >> index 9ca3d47..5078f7b 100644 >> --- a/src/mapi/glapi/gen/GL3x.xml >> +++ b/src/mapi/glapi/gen/GL3x.xml >> @@ -607,7 +607,7 @@ >> >> >> >> - >> + >> >> >> >> diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c >> index 419e871..32dcc75 100644 >> --- a/src/mesa/main/fbobject.c >> +++ b/src/mesa/main/fbobject.c >> @@ -2402,6 +2402,23 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum >> attachment, >> >> >> void GLAPIENTRY >> +_mesa_FramebufferTexture(GLenum target, GLenum attachment, >> + GLuint texture, GLint level) >> +{ >> + GET_CURRENT_CONTEXT(ctx); >> + >> + if ((_mesa_is_desktop_gl(ctx) && ctx->Version >= 32) || >> + ctx->Extensions.ARB_geometry_shader4) { > > > This should be > > if (_mesa_is_desktop_gl(ctx) && (ctx->Version >= 32 || > ctx->Extensions.ARB_geometry_shader4)) > > If a driver sets the ARB_geometry_shader4 bit in an ES context, this > function should still generate GL_INVALID_OPERATION. > > Actually... is the _mesa_is_desktop_gl check even necessary? This function > shouldn't get put in the dispatch table for an ES context at all. Right? You are right. The _mesa_is_desktop_gl check is not necessary. Hopefully at some point we can update the api_exec.c to handle GL versions / extensions as well. -Jordan ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeon/uvd: fix quant scan order for mpeg2
From: Christian König Signed-off-by: Christian König --- src/gallium/drivers/radeon/radeon_uvd.c |8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 5a7eab6..bcd282e 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -514,6 +514,7 @@ static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buff static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec, struct pipe_mpeg12_picture_desc *pic) { + const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; struct ruvd_mpeg2 result; unsigned i; @@ -524,8 +525,11 @@ static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec, result.load_intra_quantiser_matrix = 1; result.load_nonintra_quantiser_matrix = 1; - memcpy(&result.intra_quantiser_matrix, pic->intra_matrix, 64); - memcpy(&result.nonintra_quantiser_matrix, pic->non_intra_matrix, 64); + + for (i = 0; i < 64; ++i) { + result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]]; + result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]]; + } result.profile_and_level_indication = 0; result.chroma_format = 0x1; -- 1.7.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [v2 07/10] dri: propagate extra dma_buf import attributes to the drivers
On 04/29/2013 04:08 AM, Topi Pohjolainen wrote: Signed-off-by: Topi Pohjolainen --- include/GL/internal/dri_interface.h| 23 +++ src/egl/drivers/dri2/egl_dri2.c| 1 + src/mesa/drivers/dri/intel/intel_regions.h | 7 +++ src/mesa/drivers/dri/intel/intel_screen.c | 9 + 4 files changed, 40 insertions(+) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index 30ce175..2691893 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -1025,6 +1025,25 @@ struct __DRIdri2ExtensionRec { * 7+. Each query will return a * new fd. */ +enum __DRIYUVColorSpace { + __DRI_YUV_COLOR_SPACE_UNDEFINED = 0, + __DRI_YUV_COLOR_SPACE_ITU_REC601 = 0x327F, + __DRI_YUV_COLOR_SPACE_ITU_REC709 = 0x3280, + __DRI_YUV_COLOR_SPACE_ITU_REC2020 = 0x3281 +}; + +enum __DRISampleRange { + __DRI_YUV_RANGE_UNDEFINED = 0, + __DRI_YUV_FULL_RANGE = 0x3282, + __DRI_YUV_NARROW_RANGE = 0x3283 +}; + +enum __DRIChromaSiting { + __DRI_YUV_CHROMA_SITING_UNDEFINED = 0, + __DRI_YUV_CHROMA_SITING_0 = 0x3284, + __DRI_YUV_CHROMA_SITING_0_5 = 0x3285 +}; + /** * \name Reasons that __DRIimageExtensionRec::createImageFromTexture might fail */ @@ -1129,6 +1148,10 @@ struct __DRIimageExtensionRec { int width, int height, int fourcc, int *fds, int num_fds, int *strides, int *offsets, + enum __DRIYUVColorSpace yuv_color_space, + enum __DRISampleRange sample_range, + enum __DRIChromaSiting horizontal_siting, + enum __DRIChromaSiting vertical_siting, void *loaderPrivate); Uh... you absolutely cannot change existing functions. This is ABI! If you run an updated driver against an old libEGL (or vice versa), it will explode. You need to add a new function and bump the version of DRI_IMAGE to 8. egl_dri.c can use the version of the DRI_IMAGE extension from the driver to determine whether to enable EGL_EXT_image_dma_buf_import. }; diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 1011f27..10fdcef 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -1523,6 +1523,7 @@ dri2_wl_reference_buffer(void *user_data, uint32_t name, int fd, &fd, 1, buffer->stride, buffer->offset, +0, 0, 0, 0, NULL); if (img == NULL) diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index e610f6b..323f834 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -40,6 +40,7 @@ #include "main/mtypes.h" #include "intel_bufmgr.h" +#include #ifdef __cplusplus extern "C" { @@ -156,6 +157,12 @@ struct __DRIimageRec { GLuint tile_y; bool has_depthstencil; + /* Provided by dma_buf import extension */ + enum __DRIYUVColorSpace yuv_color_space; + enum __DRISampleRange sample_range; + enum __DRIChromaSiting horizontal_siting; + enum __DRIChromaSiting vertical_siting; + void *data; }; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e912bc7..75dbb5a 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -703,6 +703,10 @@ static __DRIimage * intel_create_image_from_fds(__DRIscreen *screen, int width, int height, int fourcc, int *fds, int num_fds, int *strides, int *offsets, +enum __DRIYUVColorSpace yuv_color_space, +enum __DRISampleRange sample_range, +enum __DRIChromaSiting horizontal_siting, +enum __DRIChromaSiting vertical_siting, void *loaderPrivate) { struct intel_screen *intelScreen = screen->driverPrivate; @@ -737,6 +741,11 @@ intel_create_image_from_fds(__DRIscreen *screen, } } + image->yuv_color_space = yuv_color_space; + image->sample_range = sample_range; + image->horizontal_siting = horizontal_siting; + image->vertical_siting = vertical_siting; + return image; } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [v2 08/10] egl: definitions for EXT_image_dma_buf_import
On 04/29/2013 04:08 AM, Topi Pohjolainen wrote: As specified in: http://www.khronos.org/registry/egl/extensions/EXT/EGL_EXT_image_dma_buf_import.txt Checking for the valid fourcc values is left for drivers avoiding dependency to drm header files here. v2: - enforce EGL_NO_CONTEXT Signed-off-by: Topi Pohjolainen --- include/EGL/eglext.h | 33 src/egl/main/eglapi.c | 7 - src/egl/main/egldisplay.h | 1 + src/egl/main/eglimage.c| 76 ++ src/egl/main/eglimage.h| 15 + src/mesa/main/extensions.c | 1 + src/mesa/main/mtypes.h | 1 + 7 files changed, 133 insertions(+), 1 deletion(-) diff --git a/include/EGL/eglext.h b/include/EGL/eglext.h index b2b5a80..9b9be8f 100644 --- a/include/EGL/eglext.h +++ b/include/EGL/eglext.h @@ -532,6 +532,39 @@ typedef EGLint (EGLAPIENTRYP PFNEGLDUPNATIVEFENCEFDANDROIDPROC)(EGLDisplay dpy, #define EGL_BUFFER_AGE_EXT0x313D #endif +#define EGL_LINUX_DMA_BUF_EXT 0x3270 /* eglCreateImageKHR target */ + +/* Attributes for eglCreateImageKHR. */ +#define EGL_LINUX_DRM_FOURCC_EXT 0x3271 +#define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272 +#define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273 +#define EGL_DMA_BUF_PLANE0_PITCH_EXT 0x3274 +#define EGL_DMA_BUF_PLANE1_FD_EXT 0x3275 +#define EGL_DMA_BUF_PLANE1_OFFSET_EXT 0x3276 +#define EGL_DMA_BUF_PLANE1_PITCH_EXT 0x3277 +#define EGL_DMA_BUF_PLANE2_FD_EXT 0x3278 +#define EGL_DMA_BUF_PLANE2_OFFSET_EXT 0x3279 +#define EGL_DMA_BUF_PLANE2_PITCH_EXT 0x327A +#define EGL_YUV_COLOR_SPACE_HINT_EXT 0x327B +#define EGL_SAMPLE_RANGE_HINT_EXT 0x327C +#define EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT 0x327D +#define EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT0x327E + +/* Accepted values for EGL_YUV_COLOR_SPACE_HINT_EXT attribute */ +#define EGL_ITU_REC601_EXT 0x327F +#define EGL_ITU_REC709_EXT 0x3280 +#define EGL_ITU_REC2020_EXT 0x3281 + +/* Accepted values for EGL_SAMPLE_RANGE_HINT_EXT attribute */ +#define EGL_YUV_FULL_RANGE_EXT0x3282 +#define EGL_YUV_NARROW_RANGE_EXT 0x3283 + +/* Accepted values for attributes EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT + * and EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT + */ +#define EGL_YUV_CHROMA_SITING_0_EXT0x3284 +#define EGL_YUV_CHROMA_SITING_0_5_EXT 0x3285 + #include #ifdef __cplusplus diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index bcc5465..2355d45 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1310,7 +1310,12 @@ eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target, _EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv); if (!disp->Extensions.KHR_image_base) RETURN_EGL_EVAL(disp, EGL_NO_IMAGE_KHR); - if (!context && ctx != EGL_NO_CONTEXT) + + /** +* "If is EGL_LINUX_DMA_BUF_EXT, must be a valid display, +* must be EGL_NO_CONTEXT..." +*/ + if (ctx != EGL_NO_CONTEXT && (!context || target == EGL_LINUX_DMA_BUF_EXT)) RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_NO_IMAGE_KHR); img = drv->API.CreateImageKHR(drv, diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 4b33470..5a21f78 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -115,6 +115,7 @@ struct _egl_extensions EGLBoolean EXT_create_context_robustness; EGLBoolean EXT_buffer_age; + EGLBoolean EXT_image_dma_buf_import; }; diff --git a/src/egl/main/eglimage.c b/src/egl/main/eglimage.c index bfae709..1cede31 100644 --- a/src/egl/main/eglimage.c +++ b/src/egl/main/eglimage.c @@ -93,6 +93,82 @@ _eglParseImageAttribList(_EGLImageAttribs *attrs, _EGLDisplay *dpy, attrs->PlaneWL = val; break; + case EGL_LINUX_DRM_FOURCC_EXT: + attrs->DMABufFourCC.Value = val; + attrs->DMABufFourCC.IsPresent = EGL_TRUE; + break; + case EGL_DMA_BUF_PLANE0_FD_EXT: + attrs->DMABufPlaneFds[0].Value = val; + attrs->DMABufPlaneFds[0].IsPresent = EGL_TRUE; + break; + case EGL_DMA_BUF_PLANE0_OFFSET_EXT: + attrs->DMABufPlaneOffsets[0].Value = val; + attrs->DMABufPlaneOffsets[0].IsPresent = EGL_TRUE; + break; + case EGL_DMA_BUF_PLANE0_PITCH_EXT: + attrs->DMABufPlanePitches[0].Value = val; + attrs->DMABufPlanePitches[0].IsPresent = EGL_TRUE; + break; + case EGL_DMA_BUF_PLANE1_FD_EXT: + attrs->DMABufPlaneFds[1].Value = val; + attrs->DMABufPlaneFds[1].IsPresent = EGL_TRUE; + break; + case EGL_DMA_BUF_PLANE1_OFFSET_EXT: + attrs->DMABufPlaneOffsets[1].Value = val; + attrs->DMABufPlaneOffsets[1].IsPresent = EGL_TRUE; + break; + case EGL_DMA_BUF_PLANE1_PITCH_EXT: + attrs->DMABufPlanePitches[1].Value = val; + attrs->DMABufPlan
[Mesa-dev] [Bug 64091] New: piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter
https://bugs.freedesktop.org/show_bug.cgi?id=64091 Priority: medium Bug ID: 64091 Assignee: mesa-dev@lists.freedesktop.org Summary: piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter Severity: normal Classification: Unclassified OS: Linux (All) Reporter: kleb...@linux.vnet.ibm.com Hardware: PowerPC Status: NEW Version: 9.1 Component: Mesa core Product: Mesa I'm performing the bring-up of a Radeon adapter (AMD FirePro 2270) using Fedora 19 on a ppc64 machine, and running the sanity test profile from piglit the glean/readPixSanity testcase fails. I've also run the test with LIBGL_ALWAYS_SOFTWARE=1 and it fails some of the tests, so it might indicate that the problem is not on the Radeon Mesa driver itself. I will attach to this bug the output from glxinfo and glean/readPixSanity for both Radeon driver and software rasterizer. Steps to reproduce: 1. Install piglit 2. Run: # ./piglit-run.py tests/sanity.tests results/sanity.results -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter
https://bugs.freedesktop.org/show_bug.cgi?id=64091 --- Comment #1 from Kleber Sacilotto de Souza --- Created attachment 78661 --> https://bugs.freedesktop.org/attachment.cgi?id=78661&action=edit glxinfo using the radeon driver -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter
https://bugs.freedesktop.org/show_bug.cgi?id=64091 --- Comment #2 from Kleber Sacilotto de Souza --- Created attachment 78662 --> https://bugs.freedesktop.org/attachment.cgi?id=78662&action=edit readPixSanity output using the radeon driver -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter
https://bugs.freedesktop.org/show_bug.cgi?id=64091 --- Comment #3 from Kleber Sacilotto de Souza --- Created attachment 78663 --> https://bugs.freedesktop.org/attachment.cgi?id=78663&action=edit glxinfo using software rasterizer -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter
https://bugs.freedesktop.org/show_bug.cgi?id=64091 --- Comment #4 from Kleber Sacilotto de Souza --- Created attachment 78664 --> https://bugs.freedesktop.org/attachment.cgi?id=78664&action=edit readPixSanity output using software rasterizer -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter
https://bugs.freedesktop.org/show_bug.cgi?id=64091 Brian King changed: What|Removed |Added CC||brk...@linux.vnet.ibm.com -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] swrast: add casts for ImageSlices pointer arithmetic
MSVC doesn't like pointer arithmetic with void * so use GLubyte *. --- src/mesa/swrast/s_texfetch_tmp.h |8 src/mesa/swrast/s_texfilter.c|2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mesa/swrast/s_texfetch_tmp.h b/src/mesa/swrast/s_texfetch_tmp.h index c9991cd..714226c 100644 --- a/src/mesa/swrast/s_texfetch_tmp.h +++ b/src/mesa/swrast/s_texfetch_tmp.h @@ -44,15 +44,15 @@ #if DIM == 1 #define TEXEL_ADDR( type, image, i, j, k, size ) \ - ((void) (j), (void) (k), ((type *)(image)->ImageSlices[0] + (i) * (size))) + ((void) (j), (void) (k), ((type *)((GLubyte *) (image)->ImageSlices[0]) + (i) * (size))) #define FETCH(x) fetch_texel_1d_##x #elif DIM == 2 #define TEXEL_ADDR( type, image, i, j, k, size ) \ - ((void) (k),\ -((type *)((image)->ImageSlices[0] + (image)->RowStride * (j)) + \ + ((void) (k),\ +((type *)((GLubyte *) (image)->ImageSlices[0] + (image)->RowStride * (j)) + \ (i) * (size))) #define FETCH(x) fetch_texel_2d_##x @@ -60,7 +60,7 @@ #elif DIM == 3 #define TEXEL_ADDR( type, image, i, j, k, size ) \ - ((type *)((image)->ImageSlices[k] + \ +((type *)((GLubyte *) (image)->ImageSlices[k] + \ (image)->RowStride * (j)) + (i) * (size)) #define FETCH(x) fetch_texel_3d_##x diff --git a/src/mesa/swrast/s_texfilter.c b/src/mesa/swrast/s_texfilter.c index c8ea26a..fba8e6c 100644 --- a/src/mesa/swrast/s_texfilter.c +++ b/src/mesa/swrast/s_texfilter.c @@ -1436,7 +1436,7 @@ opt_sample_rgb_2d(struct gl_context *ctx, GLint i = IFLOOR(texcoords[k][0] * width) & colMask; GLint j = IFLOOR(texcoords[k][1] * height) & rowMask; GLint pos = (j << shift) | i; - GLubyte *texel = swImg->ImageSlices[0] + 3 * pos; + GLubyte *texel = (GLubyte *) swImg->ImageSlices[0] + 3 * pos; rgba[k][RCOMP] = UBYTE_TO_FLOAT(texel[2]); rgba[k][GCOMP] = UBYTE_TO_FLOAT(texel[1]); rgba[k][BCOMP] = UBYTE_TO_FLOAT(texel[0]); -- 1.7.3.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] i965: Use brw_blorp_blit_miptrees() for CopyTexSubImage().
Now that depth resolves are handled there, we don't need to make the temporary renderbuffer. --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 84 ++-- src/mesa/drivers/dri/intel/intel_fbo.c | 30 -- src/mesa/drivers/dri/intel/intel_fbo.h | 4 -- 3 files changed, 41 insertions(+), 77 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 32da141..c3ef054 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -169,19 +169,9 @@ do_blorp_blit(struct intel_context *intel, GLbitfield buffer_bit, intel_renderbuffer_set_needs_downsample(dst_irb); } - static bool -formats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb, - struct intel_renderbuffer *dst_irb) +color_formats_match(gl_format src_format, gl_format dst_format) { - /* Note: don't just check gl_renderbuffer::Format, because in some cases -* multiple gl_formats resolve to the same native type in the miptree (for -* example MESA_FORMAT_X8_Z24 and MESA_FORMAT_S8_Z24), and we can blit -* between those formats. -*/ - gl_format src_format = find_miptree(buffer_bit, src_irb)->format; - gl_format dst_format = find_miptree(buffer_bit, dst_irb)->format; - gl_format linear_src_format = _mesa_get_srgb_format_linear(src_format); gl_format linear_dst_format = _mesa_get_srgb_format_linear(dst_format); @@ -197,6 +187,21 @@ formats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb, } static bool +formats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb, + struct intel_renderbuffer *dst_irb) +{ + /* Note: don't just check gl_renderbuffer::Format, because in some cases +* multiple gl_formats resolve to the same native type in the miptree (for +* example MESA_FORMAT_X8_Z24 and MESA_FORMAT_S8_Z24), and we can blit +* between those formats. +*/ + gl_format src_format = find_miptree(buffer_bit, src_irb)->format; + gl_format dst_format = find_miptree(buffer_bit, dst_irb)->format; + + return color_formats_match(src_format, dst_format); +} + +static bool try_blorp_blit(struct intel_context *intel, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, @@ -313,29 +318,21 @@ brw_blorp_copytexsubimage(struct intel_context *intel, { struct gl_context *ctx = &intel->ctx; struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb); - struct intel_renderbuffer *dst_irb; + struct intel_texture_image *intel_image = intel_texture_image(dst_image); + + /* Sync up the state of window system buffers. We need to do this before +* we go looking at the src renderbuffer's miptree. +*/ + intel_prepare_render(intel); + + struct intel_mipmap_tree *src_mt = src_irb->mt; + struct intel_mipmap_tree *dst_mt = intel_image->mt; /* BLORP is not supported before Gen6. */ if (intel->gen < 6) return false; - /* Create a fake/wrapper renderbuffer to allow us to use do_blorp_blit(). */ - dst_irb = intel_create_fake_renderbuffer_wrapper(intel, dst_image); - if (!dst_irb) - return false; - - struct gl_renderbuffer *dst_rb = &dst_irb->Base.Base; - - /* Unlike BlitFramebuffer, CopyTexSubImage doesn't have a buffer bit. -* It's only used by find_miptee() to decide whether to dereference the -* separate stencil miptree. In the case of packed depth/stencil, core -* Mesa hands us the depth attachment as src_rb (not stencil), so assume -* non-stencil for now. A buffer bit of 0 works for both color and depth. -*/ - GLbitfield buffer_bit = 0; - - if (!formats_match(buffer_bit, src_irb, dst_irb)) { - dst_rb->Delete(ctx, dst_rb); + if (!color_formats_match(src_mt->format, dst_mt->format)) { return false; } @@ -353,11 +350,6 @@ brw_blorp_copytexsubimage(struct intel_context *intel, int dstX1 = dstX0 + width; int dstY1 = dstY0 + height; - /* Sync up the state of window system buffers. We need to do this before -* we go looking for the buffers. -*/ - intel_prepare_render(intel); - /* Account for the fact that in the system framebuffer, the origin is at * the lower left. */ @@ -369,23 +361,29 @@ brw_blorp_copytexsubimage(struct intel_context *intel, mirror_y = true; } - do_blorp_blit(intel, buffer_bit, src_irb, dst_irb, - srcX0, srcY0, dstX0, dstY0, dstX1, dstY1, false, mirror_y); + brw_blorp_blit_miptrees(intel, + src_mt, src_irb->mt_level, src_irb->mt_layer, + dst_mt, dst_image->Level, dst_image->Face, + srcX0, srcY0, dstX0, dstY0, dstX1, dstY1, + false, mirror_y); - /* If we're copying a packed depth stencil texture, the above do_blorp_blit -*
[Mesa-dev] [PATCH 1/6] i965: Move blorp resolve setup into brw_blorp_blit_miptrees().
There was some comment about trying to avoid marking resolves in updownsample, but if the downsample is never actually rendered to, then the required resolve tracked in the downsample will never be executed, so who cares? --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 12 +--- src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 11 --- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index d4b1fda..32da141 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -132,6 +132,9 @@ brw_blorp_blit_miptrees(struct intel_context *intel, int dst_x1, int dst_y1, bool mirror_x, bool mirror_y) { + intel_miptree_slice_resolve_depth(intel, src_mt, src_level, src_layer); + intel_miptree_slice_resolve_depth(intel, dst_mt, dst_level, dst_layer); + brw_blorp_blit_params params(brw_context(&intel->ctx), src_mt, src_level, src_layer, dst_mt, dst_level, dst_layer, @@ -140,6 +143,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel, dst_x1, dst_y1, mirror_x, mirror_y); brw_blorp_exec(intel, ¶ms); + + intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer); } static void @@ -154,12 +159,6 @@ do_blorp_blit(struct intel_context *intel, GLbitfield buffer_bit, struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb); struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb); - /* Get ready to blit. This includes depth resolving the src and dst -* buffers if necessary. -*/ - intel_renderbuffer_resolve_depth(intel, src_irb); - intel_renderbuffer_resolve_depth(intel, dst_irb); - /* Do the blit */ brw_blorp_blit_miptrees(intel, src_mt, src_irb->mt_level, src_irb->mt_layer, @@ -167,7 +166,6 @@ do_blorp_blit(struct intel_context *intel, GLbitfield buffer_bit, srcX0, srcY0, dstX0, dstY0, dstX1, dstY1, mirror_x, mirror_y); - intel_renderbuffer_set_needs_hiz_resolve(dst_irb); intel_renderbuffer_set_needs_downsample(dst_irb); } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 12a4a22..586599e 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -1289,9 +1289,6 @@ intel_miptree_updownsample(struct intel_context *intel, int dst_x0 = 0; int dst_y0 = 0; - intel_miptree_slice_resolve_depth(intel, src, 0, 0); - intel_miptree_slice_resolve_depth(intel, dst, 0, 0); - brw_blorp_blit_miptrees(intel, src, 0 /* level */, 0 /* layer */, dst, 0 /* level */, 0 /* layer */, @@ -1339,13 +1336,6 @@ intel_miptree_downsample(struct intel_context *intel, mt->logical_width0, mt->logical_height0); mt->need_downsample = false; - - /* Strictly speaking, after a downsample on a depth miptree, a hiz -* resolve is needed on the singlesample miptree. However, since the -* singlesample miptree is never rendered to, the hiz resolve will never -* occur. Therefore we do not mark the needed hiz resolve after -* downsampling. -*/ } /** @@ -1365,7 +1355,6 @@ intel_miptree_upsample(struct intel_context *intel, mt->singlesample_mt, mt, mt->logical_width0, mt->logical_height0); - intel_miptree_slice_set_needs_hiz_resolve(mt, 0, 0); } void * -- 1.8.3.rc0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] intel: Simplify renderbuffer-for-texture width setup.
We're looking for the logical width of our level, which is what image->Width2/Height2 is. The previous code relied on MSAA textures being only level 0. --- src/mesa/drivers/dri/intel/intel_fbo.c | 13 ++--- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index f44cb4d..a3817eb 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -493,17 +493,8 @@ intel_renderbuffer_update_wrapper(struct intel_context *intel, rb->InternalFormat = image->InternalFormat; rb->_BaseFormat = image->_BaseFormat; rb->NumSamples = mt->num_samples; - - if (mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE) { - assert(level == 0); - rb->Width = mt->logical_width0; - rb->Height = mt->logical_height0; - } - else { - rb->Width = mt->level[level].width; - rb->Height = mt->level[level].height; - } - + rb->Width = image->Width2; + rb->Height = image->Height2; rb->Delete = intel_delete_renderbuffer; rb->AllocStorage = intel_nop_alloc_storage; -- 1.8.3.rc0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] mesa: Make core Mesa allocate the texture renderbuffer wrapper.
Every driver did the same thing. --- src/mesa/drivers/dri/intel/intel_fbo.c | 17 +-- src/mesa/drivers/dri/nouveau/nouveau_fbo.c | 10 --- src/mesa/drivers/dri/radeon/radeon_fbo.c | 14 - src/mesa/main/fbobject.c | 48 ++ src/mesa/main/fbobject.h | 5 src/mesa/main/teximage.c | 3 +- src/mesa/state_tracker/st_cb_fbo.c | 22 ++ src/mesa/swrast/s_texrender.c | 38 +++ 8 files changed, 56 insertions(+), 101 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 1d247c7..f44cb4d 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -606,28 +606,13 @@ intel_render_texture(struct gl_context * ctx, /* Fallback on drawing to a texture that doesn't have a miptree * (has a border, width/height 0, etc.) */ - _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _swrast_render_texture(ctx, fb, att); return; } - else if (!irb) { - intel_miptree_check_level_layer(mt, att->TextureLevel, layer); - irb = (struct intel_renderbuffer *)intel_new_renderbuffer(ctx, ~0); - - if (irb) { - /* bind the wrapper to the attachment point */ - _mesa_reference_renderbuffer(&att->Renderbuffer, &irb->Base.Base); - } - else { - /* fallback to software rendering */ - _swrast_render_texture(ctx, fb, att); - return; - } - } + intel_miptree_check_level_layer(mt, att->TextureLevel, layer); if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer)) { - _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _swrast_render_texture(ctx, fb, att); return; } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index b487009..adead3d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -270,16 +270,6 @@ nouveau_render_texture(struct gl_context *ctx, struct gl_framebuffer *fb, struct gl_texture_image *ti = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; - /* Allocate a renderbuffer object for the texture if we -* haven't already done so. */ - if (!rb) { - rb = nouveau_renderbuffer_new(ctx, ~0); - assert(rb); - - rb->AllocStorage = NULL; - _mesa_reference_renderbuffer(&att->Renderbuffer, rb); - } - /* Update the renderbuffer fields from the texture. */ set_renderbuffer_format(rb, get_tex_format(ti)); rb->Width = ti->Width; diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index eb592db..5f996c5 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -835,25 +835,11 @@ radeon_render_texture(struct gl_context * ctx, if (!radeon_image->mt) { /* Fallback on drawing to a texture without a miptree. */ - _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _swrast_render_texture(ctx, fb, att); return; } - else if (!rrb) { - rrb = radeon_wrap_texture(ctx, newImage); - if (rrb) { - /* bind the wrapper to the attachment point */ - _mesa_reference_renderbuffer(&att->Renderbuffer, &rrb->base.Base); - } - else { - /* fallback to software rendering */ - _swrast_render_texture(ctx, fb, att); - return; - } - } if (!radeon_update_wrapper(ctx, rrb, newImage)) { - _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _swrast_render_texture(ctx, fb, att); return; } diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 645a8a3..26d1cce 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -341,6 +341,47 @@ _mesa_remove_attachment(struct gl_context *ctx, att->Complete = GL_TRUE; } +/** + * Create a renderbuffer which will be set up by the driver to wrap the + * texture image slice. + * + * By using a gl_renderbuffer (like user-allocated renderbuffers), drivers get + * to share most of their framebuffer rendering code between winsys, + * renderbuffer, and texture attachments. + * + * The allocated renderbuffer uses a non-zero Name so that drivers can check + * it for determining vertical orientation, but we use ~0 to make it fairly + * unambiguous with actual user (non-texture) renderbuffers. + */ +void +_mesa_update_texture_renderbuffer(struct gl_context *ctx, + struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att) +{ + struct gl_texture_image *texImage; + struct gl_renderbuffer *rb; + + texImage = _mesa_get_attachment_teximage(att); +
[Mesa-dev] [PATCH 5/6] mesa: Make Mesa core set up wrapped texture renderbuffer state.
Everyone was doing effectively the same thing, except for some funky code reuse in Intel, and swrast mistakenly recomputing _BaseFormat instead of using the texture's _BaseFormat. swrast's sRGB handling is left in place, though it should be done by using _mesa_get_render_format() at render time instead (as-is, it will miss updates to GL_FRAMEBUFFER_SRGB). --- src/mesa/drivers/dri/intel/intel_fbo.c | 6 -- src/mesa/drivers/dri/nouveau/nouveau_fbo.c | 18 -- src/mesa/main/fbobject.c | 7 +++ src/mesa/state_tracker/st_cb_fbo.c | 5 - src/mesa/swrast/s_texrender.c | 5 - 5 files changed, 7 insertions(+), 34 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index a3817eb..f037445 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -489,12 +489,6 @@ intel_renderbuffer_update_wrapper(struct intel_context *intel, struct intel_mipmap_tree *mt = intel_image->mt; int level = image->Level; - rb->Format = image->TexFormat; - rb->InternalFormat = image->InternalFormat; - rb->_BaseFormat = image->_BaseFormat; - rb->NumSamples = mt->num_samples; - rb->Width = image->Width2; - rb->Height = image->Height2; rb->Delete = intel_delete_renderbuffer; rb->AllocStorage = intel_nop_alloc_storage; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index adead3d..a692051 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -247,21 +247,6 @@ nouveau_framebuffer_renderbuffer(struct gl_context *ctx, struct gl_framebuffer * context_dirty(ctx, FRAMEBUFFER); } -static GLenum -get_tex_format(struct gl_texture_image *ti) -{ - switch (ti->TexFormat) { - case MESA_FORMAT_ARGB: - return GL_RGBA8; - case MESA_FORMAT_XRGB: - return GL_RGB8; - case MESA_FORMAT_RGB565: - return GL_RGB5; - default: - return GL_NONE; - } -} - static void nouveau_render_texture(struct gl_context *ctx, struct gl_framebuffer *fb, struct gl_renderbuffer_attachment *att) @@ -271,9 +256,6 @@ nouveau_render_texture(struct gl_context *ctx, struct gl_framebuffer *fb, att->Texture->Image[att->CubeMapFace][att->TextureLevel]; /* Update the renderbuffer fields from the texture. */ - set_renderbuffer_format(rb, get_tex_format(ti)); - rb->Width = ti->Width; - rb->Height = ti->Height; nouveau_surface_ref(&to_nouveau_teximage(ti)->surface, &to_nouveau_renderbuffer(rb)->surface); diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 26d1cce..d88c062 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -380,6 +380,13 @@ _mesa_update_texture_renderbuffer(struct gl_context *ctx, rb->AllocStorage = NULL; } + rb->_BaseFormat = texImage->_BaseFormat; + rb->Format = texImage->TexFormat; + rb->InternalFormat = texImage->InternalFormat; + rb->Width = texImage->Width2; + rb->Height = texImage->Height2; + rb->NumSamples = texImage->NumSamples; + ctx->Driver.RenderTexture(ctx, fb, att); } diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index affe656..aa245d3 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -414,11 +414,6 @@ st_render_texture(struct gl_context *ctx, strb->rtt_level = att->TextureLevel; strb->rtt_face = att->CubeMapFace; strb->rtt_slice = att->Zoffset; - rb->NumSamples = texImage->NumSamples; - rb->Width = texImage->Width2; - rb->Height = texImage->Height2; - rb->_BaseFormat = texImage->_BaseFormat; - rb->InternalFormat = texImage->InternalFormat; pipe_resource_reference( &strb->texture, pt ); diff --git a/src/mesa/swrast/s_texrender.c b/src/mesa/swrast/s_texrender.c index f56a0d5..00b3ca5 100644 --- a/src/mesa/swrast/s_texrender.c +++ b/src/mesa/swrast/s_texrender.c @@ -50,11 +50,6 @@ update_wrapper(struct gl_context *ctx, struct gl_renderbuffer_attachment *att) zOffset = att->Zoffset; } - rb->Width = swImage->Base.Width; - rb->Height = swImage->Base.Height; - rb->InternalFormat = swImage->Base.InternalFormat; - rb->_BaseFormat = _mesa_get_format_base_format(format); - /* Want to store linear values, not sRGB */ rb->Format = _mesa_get_srgb_format_linear(format); -- 1.8.3.rc0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] intel: Remove renderbuffer delete setup from texture wrapping.
This is already set by intel_new_renderbuffer(). --- src/mesa/drivers/dri/intel/intel_fbo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index f037445..45424a9 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -489,7 +489,6 @@ intel_renderbuffer_update_wrapper(struct intel_context *intel, struct intel_mipmap_tree *mt = intel_image->mt; int level = image->Level; - rb->Delete = intel_delete_renderbuffer; rb->AllocStorage = intel_nop_alloc_storage; intel_miptree_check_level_layer(mt, level, layer); -- 1.8.3.rc0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter
https://bugs.freedesktop.org/show_bug.cgi?id=64091 --- Comment #5 from Jerome Glisse --- For radeon my guess is that depth/stencil readback needs some fiddling with texture format : mesa/src/gallium/drivers/r600/r600_blit.c r600_blit_decompress_depth() surf_tmpl.format = flushed_depth_texture->resource.b.b.format; i would look at what format you got there and what try to play with some other format. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter
https://bugs.freedesktop.org/show_bug.cgi?id=64091 --- Comment #6 from Alex Deucher --- Gallium is big endian safe at the moment. See these discussion threads for what needs to be done: http://lists.freedesktop.org/archives/mesa-dev/2013-January/033590.html http://lists.freedesktop.org/archives/mesa-dev/2013-February/034047.html -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64091] piglit glean/readPixSanity testcase fails on ppc64 with Radeon adapter
https://bugs.freedesktop.org/show_bug.cgi?id=64091 --- Comment #7 from Alex Deucher --- (In reply to comment #6) > Gallium is big endian safe at the moment. See these discussion threads for > what needs to be done: > http://lists.freedesktop.org/archives/mesa-dev/2013-January/033590.html > http://lists.freedesktop.org/archives/mesa-dev/2013-February/034047.html *Gallium is NOT big endian safe at the moment. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] fixup! glsl: Add a pass to lower bitfield-insert into bfm+bfi.
--- Does this squashed in seem okay, Eric? src/glsl/lower_instructions.cpp | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 6c70a4a..d32ec80 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -321,9 +321,10 @@ lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir) ir_rvalue *base_expr = ir->operands[0]; ir->operation = ir_triop_bfi; - ir->operands[0] = new(ir) ir_expression(ir_binop_bfm, ir->type, - swizzle_(ir->operands[3]), - swizzle_(ir->operands[2])); + ir->operands[0] = new(ir) ir_expression(ir_binop_bfm, + ir->type->get_base_type(), + ir->operands[3], + ir->operands[2]); /* ir->operands[1] is still the value to insert. */ ir->operands[2] = base_expr; ir->operands[3] = NULL; -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Mesa 9.1.2 release
Mesa 9.1.2 has been released. Mesa 9.1.2 is a bug fix release which fixes bugs found since the 9.1.1 release. The tag in the GIT repository for Mesa 9.1.2 is 'mesa-9.1.2'. Mesa 9.1.1 is available for download at ftp://freedesktop.org/pub/mesa/9.1.2/ md5sums: df2aab86ff4a510ce5b0d074caa0a59f MesaLib-9.1.2.tar.bz2 415c2bc3a9eb571aafbfa474ebf5a2e0 MesaLib-9.1.2.tar.gz b1ae5a4d9255953980bc9254f5323420 MesaLib-9.1.2.zip I have verified building from the .tar.bz2 file by doing: tar -xjf MesaLib-9.1.2.tar.bz2 cd Mesa-9.1.2 ./configure --enable-gallium-llvm --with-llvm-shared-libs make -j6 make install I have also verified that I pushed the tag. Better never than late, I always say! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeon/llvm: Use LLVM C API for compiling LLVM IR to ISA.
On Sat, Apr 27, 2013 at 10:33:29AM +0200, Mathias Fröhlich wrote: > > Hi, > > On Thursday, April 25, 2013 10:29:27 Jose Fonseca wrote: > > - There are a bunch of options that need to be set via globals, (see > > lp_set_target_options), so app/drivers could tamper with each other > > options. > > > > - llvm::cl::ParseCommandLineOptions will complain if called multiple times > > -- I think we no longer need to call it these days though > > > > In short, LLVM was not designed for multiple users in the same process. > Yep. > > Also llvm is still emerging too fast to assume a specific version to be > available. At least with r600 we do currently need a somewhat recent version > and kind of have this assumption. > But due to the api not kept strictly backwards compatible and all the > pitfalls > that happen while emerging fast its very likely that a potential application > that also tries to make use of the driver modules just brings its own > probably > incompatible llvm version in some way. So shielding this in any way makes > sense ... > > > For the Mesa wrappers: > I have attached a shell script again as a rapid proof that is able to build a > linker script that builds up a wrapper shared library that contains a private > llvm copy. That's again non optimal - it contains just all static libs that I > have in my current test environment... It's just to sketch how this could > work. > > The MesaLLVM-with-prefix.link script can by used with the command > > g++ -shared -o libMesaLLVM.so MesaLLVM-with-prefix.link > > to produce a libMesaLLVM.so that contains all C symbols starting with LLVM > from libLLVMCore.a. All of them get prefixed with Mesa and are the only > exported symbols then. > That's close to Joses suggestion but with less work to be done in sources. > > The MesaLLVM-with-version.link script can by used with the command > > g++ -shared -o libMesaLLVM.so MesaLLVM-with-version.link > > to produce a libMesaLLVM.so shared library that uses symbol versioning to > distinguish between the llvm versions. I got this idea yesterday and this > might simplify the problem a lot. > By this variant we do not even need to prefix all the callers by Mesa. What > this does is to explicitly assign a symbol version to all these calls. At > static link symbol resolve time with this libMesaLLVM.so, this symbol version > > (the 'A MesaLLVM_1.0' entry) is then pulled out of this shared object and all > users, in libllvmradeon.so for example, are linked against > LLVMCreateContext@MesaLLVM_1,0 instead of just LLVMCreateContext. So we > should > get a private copy of llvm in libMesaLLVM.so with just the same call names > than usual source code wise. > That's to be tested and verified, but if this works like I think it should, > this is the easiest way to get our own LLVM version on linux at least. > > And sorry for just doing these crude proof of concept stuff ... > Hi Mathias, I took the linker script from your email and took at shot at creating libMesaLLVM.so within Mesa. I've pushed my initial code here: http://cgit.freedesktop.org/~tstellar/mesa/log/?h=libmesallvm I ran into a few minor issues: I had to export all the LLVM symbols in libMesaLLVM.so, because gallivm still uses some C++ functions, and I was unsure how to handle the name mangling in the linker script. Clover still has a number of undefined symbols. I'm still not quite sure what the problem is, but I think the problem has something to do with the LLVM symbols in the clang libraries clover is using. I didn't do much testing yet, but glxgears works for me with r600g and llvmpipe. Also, note that there are 4 new commits in that repo, the first three are just variations from my previous C API patches for drivers/radeon. The biggest change is that I moved the static initializer that calls the llvm_multithreaded* functions into gallivm/lp_bld_misc.cpp Let me know if you have any questions, concerns or other ideas. Thanks, Tom > /* Mesa llvm linker script */ > EXTERN( > LLVMAddAlias > LLVMAddAttribute > LLVMAddCase > LLVMAddClause > LLVMAddDestination > LLVMAddFunction > LLVMAddFunctionAttr > LLVMAddGlobal > LLVMAddGlobalInAddressSpace > LLVMAddIncoming > LLVMAddInstrAttribute > LLVMAddNamedMetadataOperand > LLVMAddTargetDependentFunctionAttr > LLVMAlignOf > LLVMAppendBasicBlock > LLVMAppendBasicBlockInContext > LLVMArrayType > LLVMBasicBlockAsValue > LLVMBlockAddress > LLVMBuildAdd > LLVMBuildAggregateRet > LLVMBuildAlloca > LLVMBuildAnd > LLVMBuildArrayAlloca > LLVMBuildArrayMalloc > LLVMBuildAShr > LLVMBuildAtomicRMW > LLVMBuildBinOp > LLVMBuildBitCast > LLVMBuildBr > LLVMBuildCall > LLVMBuildCast > LLVMBuildCondBr > LLVMBuildExactSDiv > LLVMBuildExtractElement > LLVMBuildExtractValue > LLVMBuildFAdd > LLVMBuildFCmp > LLVMBuildFDiv > LLVMBuildFMul > LLVMBuildFNeg > LLVMBuildFPCast > LLVMBuildFPExt > LLVMBuildFPToSI > LLVMBuildFPToUI > LLVMBuildFPTrunc > LLVMBuildFree > LLVMBuildFRem > LLVMBuildFSub > LLVMBuildGE
Re: [Mesa-dev] [PATCH] fixup! glsl: Add a pass to lower bitfield-insert into bfm+bfi.
Matt Turner writes: > --- > Does this squashed in seem okay, Eric? Yeah, though it seems like there could be a bit more ir_validation that things stay the way we think -- that bfm's on scalars making a scalar, and bfi's operands[0] is scalar. pgpIR7hwIhr2b.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965/fs: Make virtual grf live intervals actually cover their used range.
Previously, we would sometimes not consider a write to a register to extend the end of the interval, nor would we consider a read before a write to extend the start. This made for a bunch of complicated logic related to how to treat the results when dead code might be present. Instead, just extend the interval and fix dead code elimination to know how to remove it. Interestingly, this actually results in a tiny bit more optimization: total instructions in shared programs: 1391220 -> 1390799 (-0.03%) instructions in affected programs: 14037 -> 13616 (-3.00%) --- src/mesa/drivers/dri/i965/brw_fs.cpp | 21 +++--- src/mesa/drivers/dri/i965/brw_fs.h | 4 +- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +- .../drivers/dri/i965/brw_fs_live_variables.cpp | 76 ++ src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 3 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +- 6 files changed, 38 insertions(+), 72 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a8610ee..0821c05 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1449,8 +1449,8 @@ fs_visitor::compact_virtual_grfs() remap_table[i] = new_index; virtual_grf_sizes[new_index] = virtual_grf_sizes[i]; if (live_intervals_valid) { -virtual_grf_use[new_index] = virtual_grf_use[i]; -virtual_grf_def[new_index] = virtual_grf_def[i]; +virtual_grf_start[new_index] = virtual_grf_start[i]; +virtual_grf_end[new_index] = virtual_grf_end[i]; } ++new_index; } @@ -1764,10 +1764,8 @@ fs_visitor::opt_algebraic() } /** - * Must be called after calculate_live_intervales() to remove unused - * writes to registers -- register allocation will fail otherwise - * because something deffed but not used won't be considered to - * interfere with other regs. + * Removes any instructions writing a VGRF where that VGRF is not used by any + * later instruction. */ bool fs_visitor::dead_code_eliminate() @@ -1780,9 +1778,12 @@ fs_visitor::dead_code_eliminate() foreach_list_safe(node, &this->instructions) { fs_inst *inst = (fs_inst *)node; - if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { -inst->remove(); -progress = true; + if (inst->dst.file == GRF) { + assert(this->virtual_grf_end[inst->dst.reg] >= pc); + if (this->virtual_grf_end[inst->dst.reg] == pc) { +inst->remove(); +progress = true; + } } pc++; @@ -2194,7 +2195,7 @@ fs_visitor::compute_to_mrf() /* Can't compute-to-MRF this GRF if someone else was going to * read it later. */ - if (this->virtual_grf_use[inst->src[0].reg] > ip) + if (this->virtual_grf_end[inst->src[0].reg] > ip) continue; /* Found a move of a GRF to a MRF. Let's see if we can go diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index c9c9856..3df2ce1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -434,8 +434,8 @@ public: int *virtual_grf_sizes; int virtual_grf_count; int virtual_grf_array_size; - int *virtual_grf_def; - int *virtual_grf_use; + int *virtual_grf_start; + int *virtual_grf_end; bool live_intervals_valid; /* This is the map from UNIFORM hw_reg + reg_offset as generated by diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index b5c2200..9b60d9b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -194,7 +194,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) /* Kill any AEB entries using registers that don't get reused any * more -- a sure sign they'll fail operands_match(). */ -if (src_reg->file == GRF && virtual_grf_use[src_reg->reg] < ip) { +if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) { entry->remove(); ralloc_free(entry); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index fdcfac6..dd8923e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -167,16 +167,16 @@ fs_visitor::calculate_live_intervals() if (this->live_intervals_valid) return; - int *def = ralloc_array(mem_ctx, int, num_vars); - int *use = ralloc_array(mem_ctx, int, num_vars); - ralloc_free(this->virtual_grf_def); - ralloc_free(this->virtual_grf_use); - this->virtual_grf_def = def; - this->virtual_grf_use = use; + int *start = ralloc_array(mem_ctx, int, num_vars); + int *end = ralloc_array(me
[Mesa-dev] [PATCH 2/2] i965/vs: Make virtual grf live intervals actually cover their used range.
This is the same change as the previous commit to the FS. A very few VSes are regressed by 1 or 2 instructions, which look recoverable with a bit more dead code elimination. --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 11 ++-- src/mesa/drivers/dri/i965/brw_vec4.h | 4 +- .../drivers/dri/i965/brw_vec4_live_variables.cpp | 75 ++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 +- 4 files changed, 31 insertions(+), 63 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index ab4668f..75f446d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -298,9 +298,12 @@ vec4_visitor::dead_code_eliminate() foreach_list_safe(node, &this->instructions) { vec4_instruction *inst = (vec4_instruction *)node; - if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { -inst->remove(); -progress = true; + if (inst->dst.file == GRF) { + assert(this->virtual_grf_end[inst->dst.reg] >= pc); + if (this->virtual_grf_end[inst->dst.reg] == pc) { +inst->remove(); +progress = true; + } } pc++; @@ -825,7 +828,7 @@ vec4_visitor::opt_register_coalesce() /* Can't coalesce this GRF if someone else was going to * read it later. */ - if (this->virtual_grf_use[inst->src[0].reg] > ip) + if (this->virtual_grf_end[inst->src[0].reg] > ip) continue; /* We need to check interference with the final destination between this diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index a4fca2d..6fdeaeb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -249,8 +249,8 @@ public: int virtual_grf_array_size; int first_non_payload_grf; unsigned int max_grf; - int *virtual_grf_def; - int *virtual_grf_use; + int *virtual_grf_start; + int *virtual_grf_end; dst_reg userplane[MAX_CLIP_PLANES]; /** diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp index f34111c..db3787b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp @@ -183,8 +183,8 @@ vec4_live_variables::~vec4_live_variables() * We could expose per-channel live intervals to the consumer based on the * information we computed in vec4_live_variables, except that our only * current user is virtual_grf_interferes(). So we instead union the - * per-channel ranges into a per-vgrf range for virtual_grf_def[] and - * virtual_grf_use[]. + * per-channel ranges into a per-vgrf range for virtual_grf_start[] and + * virtual_grf_end[]. * * We could potentially have virtual_grf_interferes() do the test per-channel, * which would let some interesting register allocation occur (particularly on @@ -200,16 +200,16 @@ vec4_visitor::calculate_live_intervals() if (this->live_intervals_valid) return; - int *def = ralloc_array(mem_ctx, int, this->virtual_grf_count); - int *use = ralloc_array(mem_ctx, int, this->virtual_grf_count); - ralloc_free(this->virtual_grf_def); - ralloc_free(this->virtual_grf_use); - this->virtual_grf_def = def; - this->virtual_grf_use = use; + int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count); + int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count); + ralloc_free(this->virtual_grf_start); + ralloc_free(this->virtual_grf_end); + this->virtual_grf_start = start; + this->virtual_grf_end = end; for (int i = 0; i < this->virtual_grf_count; i++) { - def[i] = MAX_INSTRUCTION; - use[i] = -1; + start[i] = MAX_INSTRUCTION; + end[i] = -1; } /* Start by setting up the intervals with no knowledge of control @@ -223,14 +223,16 @@ vec4_visitor::calculate_live_intervals() if (inst->src[i].file == GRF) { int reg = inst->src[i].reg; - use[reg] = ip; +start[reg] = MIN2(start[reg], ip); +end[reg] = ip; } } if (inst->dst.file == GRF) { int reg = inst->dst.reg; - def[reg] = MIN2(def[reg], ip); + start[reg] = MIN2(start[reg], ip); + end[reg] = ip; } ip++; @@ -247,60 +249,23 @@ vec4_visitor::calculate_live_intervals() for (int b = 0; b < cfg.num_blocks; b++) { for (int i = 0; i < livevars.num_vars; i++) { if (livevars.bd[b].livein[i]) { - def[i / 4] = MIN2(def[i / 4], cfg.blocks[b]->start_ip); - use[i / 4] = MAX2(use[i / 4], cfg.blocks[b]->start_ip); + start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->start_ip); + end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->start_ip); } if (livevars.bd[b].liveout[i]) { - def[i / 4] = MI