Re: [Mesa-dev] [PATCH 01/15] i965: Add a table of the surface format information from the PRM.
On 11/15/2011 05:55 PM, Eric Anholt wrote: > This will be used to drive chosing formats and determining framebuffer > completeness, instead of the bunch of ad-hoc checks we have had until > now. > --- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 204 > ++ > 1 files changed, 204 insertions(+), 0 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index 0cc6201..f0df730 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -71,6 +71,210 @@ translate_tex_target(GLenum target) > } > } > > +struct surface_format_info { > + bool exists; > + int sampling; > + int filtering; > + int shadow_compare; > + int chroma_key; > + int render_target; > + int alpha_blend; > + int input_vb; > + int streamed_output_vb; > + int color_processing; > +}; > + > +/* This macro allows us to write the table almost as it appears in the PRM, > + * while restructuring it to turn it into the C code we want. > + */ > +#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \ > + [sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color }, > + > +#define Y 0 > +#define x 999 > +/** > + * This is the table of support for surface (texture, renderbuffer, and > vertex > + * buffer, but not depthbuffer) formats across the various hardware > generations. > + * > + * The table is formatted to match the documentation, except that the docs > have > + * this ridiculous mapping of Y[*+~^#&] for "supported on DevWhatever". To > put > + * it in our table, here's the mapping: > + * > + * Y*: 45 > + * Y+: 45 (g45/gm45) > + * Y~: 50 (gen5) > + * Y^: 60 (gen6) > + * Y#: 70 (gen7) > + * > + * See page 88 of the Sandybridge PRM VOL4_Part1 PDF. > + */ > +const struct surface_format_info surface_formats[] = { > +/* smpl filt shad CK RT AB VB SO color */ > + SF( Y, 50, x, x, Y, Y, Y, Y, x, > BRW_SURFACEFORMAT_R32G32B32A32_FLOAT) > + SF( Y, x, x, x, Y, x, Y, Y, x, > BRW_SURFACEFORMAT_R32G32B32A32_SINT) > + SF( Y, x, x, x, Y, x, Y, Y, x, > BRW_SURFACEFORMAT_R32G32B32A32_UINT) > + SF( x, x, x, x, x, x, Y, x, x, > BRW_SURFACEFORMAT_R32G32B32A32_UNORM) > + SF( x, x, x, x, x, x, Y, x, x, > BRW_SURFACEFORMAT_R32G32B32A32_SNORM) > + SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64_FLOAT) > + SF( Y, 50, x, x, x, x, x, x, x, > BRW_SURFACEFORMAT_R32G32B32X32_FLOAT) > + SF( x, x, x, x, x, x, Y, x, x, > BRW_SURFACEFORMAT_R32G32B32A32_SSCALED) > + SF( x, x, x, x, x, x, Y, x, x, > BRW_SURFACEFORMAT_R32G32B32A32_USCALED) > + SF( Y, 50, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_FLOAT) > + SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_SINT) > + SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_UINT) > + SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_UNORM) > + SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SNORM) > + SF( x, x, x, x, x, x, Y, x, x, > BRW_SURFACEFORMAT_R32G32B32_SSCALED) > + SF( x, x, x, x, x, x, Y, x, x, > BRW_SURFACEFORMAT_R32G32B32_USCALED) > + SF( Y, Y, x, x, Y, 45, Y, x, 60, > BRW_SURFACEFORMAT_R16G16B16A16_UNORM) > + SF( Y, Y, x, x, Y, 60, Y, x, x, > BRW_SURFACEFORMAT_R16G16B16A16_SNORM) > + SF( Y, x, x, x, Y, x, Y, x, x, > BRW_SURFACEFORMAT_R16G16B16A16_SINT) > + SF( Y, x, x, x, Y, x, Y, x, x, > BRW_SURFACEFORMAT_R16G16B16A16_UINT) > + SF( Y, Y, x, x, Y, Y, Y, x, x, > BRW_SURFACEFORMAT_R16G16B16A16_FLOAT) > + SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32_FLOAT) > + SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_SINT) > + SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_UINT) > + SF( Y, 50, Y, x, x, x, x, x, x, > BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS) > + SF( Y, x, x, x, x, x, x, x, x, > BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT) > + SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32A32_FLOAT) > + SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_UNORM) > + SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SNORM) > + SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64_FLOAT) > + SF( Y, Y, x, x, x, x, x, x, x, > BRW_SURFACEFORMAT_R16G16B16X16_UNORM) > + SF( Y, Y, x, x, x, x, x, x, x, > BRW_SURFACEFORMAT_R16G16B16X16_FLOAT) > + SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32X32_FLOAT) > + SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32X32_FLOAT) > + SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32X32_FLOAT) > + SF( x, x, x, x, x, x, Y, x, x, > BRW_SURFACEFORMAT_R16G16B16A16_SSCALED) > + SF( x, x, x, x, x, x, Y, x, x, > BRW_SURFACEFORMAT_R
[Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default
Dear devs, I am getting segmentation faults when running piglit r600.tests with latest mesa build from git: Nov 16 09:21:33 spoc kernel: [74538.198983] radeon :05:00.0: object_init failed for (1431699456, 0x0006) Nov 16 09:21:33 spoc kernel: [74538.198987] [drm:radeon_gem_object_create] *ERROR* Failed to allocate GEM object (1431699456, 4, 4096, -12) Nov 16 09:21:37 spoc kernel: [74542.266472] fbo-depthstenci[6867]: segfault at 0 ip 7fb271b5414a sp 7fffbfdf8fd0 error 4 in r600_dri.so[7fb2716c8000+dba000] --- Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil -auto readpixels defau'. Program terminated with signal 11, Segmentation fault. #0 u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63 63 ur->vtbl->transfer_unmap(pipe, transfer); (gdb) bt #0 u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63 #1 0x7f4cb36cc0eb in pipe_transfer_unmap (transfer=, context=0x2441df0) at ../../src/gallium/auxiliary/util/u_inlines.h:398 #2 st_UnmapRenderbuffer (ctx=, rb=0x25b7fc0) at state_tracker/st_cb_fbo.c:704 #3 0x7f4cb365f925 in slow_read_depth_stencil_pixels_separate (ctx=0x254b720, x=, y=, width=123, height=123, type=36269, packing=0x7fff653d0750, dst=0x7fff653ee138 "", dstStride=984) at main/readpix.c:427 #4 0x7f4cb36601d6 in read_depth_stencil_pixels (packing=0x7fff653d0750, pixels=, type=36269, height=, width=123, y=0, x=0, ctx=0x254b720) at main/readpix.c:469 #5 _mesa_readpixels (ctx=0x254b720, x=0, y=0, width=123, height=123, format=, type=36269, packing=0x255afb8, pixels=) at main/readpix.c:508 #6 0x7f4cb3660bbd in _mesa_ReadnPixelsARB (pixels=0x7fff653d0870, type=36269, format=34041, height=123, width=123, y=0, x=0, bufSize=) at main/readpix.c:753 #7 _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, type=36269, pixels=0x7fff653d0870) at main/readpix.c:761 #8 0x0042eb4a in read_32f_8 () #9 0x0042ec0b in compare () #10 0x0042ef73 in test_readpixels () #11 0x0042f7aa in piglit_display () #12 0x00430241 in display () #13 0x7f4cb6f11808 in ?? () from /usr/lib/libglut.so.3 #14 0x7f4cb6f15339 in fgEnumWindows () from /usr/lib/libglut.so.3 #15 0x7f4cb6f11ca1 in glutMainLoopEvent () from /usr/lib/libglut.so.3 #16 0x7f4cb6f12540 in glutMainLoop () from /usr/lib/libglut.so.3 #17 0x00430967 in main () --- My glxinfo: OpenGL vendor string: X.Org OpenGL renderer string: Gallium 0.4 on AMD BARTS OpenGL version string: 2.1 Mesa 7.12-devel (git-4f677ca) OpenGL shading language version string: 1.20 A general question: I am interested in running regular piglit tests on my hardware and would like to help by reporting problems/bugs. What´s the correct way of doing so? Should I post any problems to mesa-dev or should I open a bug. Thanks and keep up the good work! Regards, Ingo ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/15] i965: Mark texture formats as supported using the surface formats table.
On 11/15/2011 05:55 PM, Eric Anholt wrote: > This is currently duplicated with intel_context.c's setup of the > formats table, and sets true for exactly the same set of formats on > gen6. > --- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 - > 1 files changed, 12 insertions(+), 1 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index 28a40a2..e29a1dc 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -364,6 +364,7 @@ void > brw_init_surface_formats(struct brw_context *brw) > { > struct intel_context *intel = &brw->intel; > + struct gl_context *ctx = &intel->ctx; > int gen; > gl_format format; > > @@ -373,10 +374,11 @@ brw_init_surface_formats(struct brw_context *brw) > > for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) > { >uint32_t texture, render; > - const struct surface_format_info *rinfo; > + const struct surface_format_info *rinfo, *tinfo; >bool is_integer = _mesa_is_format_integer_color(format); > >render = texture = brw_format_for_mesa_format(format); > + tinfo = &surface_formats[texture]; > >/* The value of BRW_SURFACEFORMAT_R32G32B32A32_FLOAT is 0, so don't > skip > * it. > @@ -384,6 +386,9 @@ brw_init_surface_formats(struct brw_context *brw) >if (texture == 0 && format != MESA_FORMAT_RGBA_FLOAT32) >continue; > > + if (gen >= tinfo->sampling && (gen >= tinfo->filtering || is_integer)) > + ctx->TextureFormatSupported[format] = true; > + >/* Re-map some render target formats to make them supported when they > * wouldn't be using their format for texturing. > */ > @@ -434,6 +439,12 @@ brw_init_surface_formats(struct brw_context *brw) > brw->format_supported_as_render_target[MESA_FORMAT_X8_Z24] = true; > brw->format_supported_as_render_target[MESA_FORMAT_S8] = true; > brw->format_supported_as_render_target[MESA_FORMAT_Z16] = true; > + > + /* We remap depth formats to a supported texturing format in > +* translate_tex_format(). > +*/ > + ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = true; > + ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true; > } > > bool I think we can actually support MESA_FORMAT_Z16 as well, no? Though enabling that should definitely be a separate patch, in case it breaks things. (I'm a little wary about it because the Windows team apparently had some issues with Z16 in GLBenchmark, but they weren't sure if it was a hardware or software issue.) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] i965 texture formats rework
On 11/15/2011 05:55 PM, Eric Anholt wrote: > There are a couple of core mesa fixes tucked in this series. I like this! Great work. For the series: Reviewed-by: Kenneth Graunke ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/15] i965: Mark texture formats as supported using the surface formats table.
On 11/16/2011 12:52 AM, Kenneth Graunke wrote: > On 11/15/2011 05:55 PM, Eric Anholt wrote: >> This is currently duplicated with intel_context.c's setup of the >> formats table, and sets true for exactly the same set of formats on >> gen6. >> --- >> src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 - >> 1 files changed, 12 insertions(+), 1 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c >> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c >> index 28a40a2..e29a1dc 100644 >> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c >> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c >> @@ -364,6 +364,7 @@ void >> brw_init_surface_formats(struct brw_context *brw) >> { >> struct intel_context *intel = &brw->intel; >> + struct gl_context *ctx = &intel->ctx; >> int gen; >> gl_format format; >> >> @@ -373,10 +374,11 @@ brw_init_surface_formats(struct brw_context *brw) >> >> for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; >> format++) { >>uint32_t texture, render; >> - const struct surface_format_info *rinfo; >> + const struct surface_format_info *rinfo, *tinfo; >>bool is_integer = _mesa_is_format_integer_color(format); >> >>render = texture = brw_format_for_mesa_format(format); >> + tinfo = &surface_formats[texture]; >> >>/* The value of BRW_SURFACEFORMAT_R32G32B32A32_FLOAT is 0, so don't >> skip >> * it. >> @@ -384,6 +386,9 @@ brw_init_surface_formats(struct brw_context *brw) >>if (texture == 0 && format != MESA_FORMAT_RGBA_FLOAT32) >> continue; >> >> + if (gen >= tinfo->sampling && (gen >= tinfo->filtering || is_integer)) >> + ctx->TextureFormatSupported[format] = true; >> + >>/* Re-map some render target formats to make them supported when they >> * wouldn't be using their format for texturing. >> */ >> @@ -434,6 +439,12 @@ brw_init_surface_formats(struct brw_context *brw) >> brw->format_supported_as_render_target[MESA_FORMAT_X8_Z24] = true; >> brw->format_supported_as_render_target[MESA_FORMAT_S8] = true; >> brw->format_supported_as_render_target[MESA_FORMAT_Z16] = true; >> + >> + /* We remap depth formats to a supported texturing format in >> +* translate_tex_format(). >> +*/ >> + ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = true; >> + ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true; >> } >> >> bool > > I think we can actually support MESA_FORMAT_Z16 as well, no? Though > enabling that should definitely be a separate patch, in case it breaks > things. (I'm a little wary about it because the Windows team apparently > had some issues with Z16 in GLBenchmark, but they weren't sure if it was > a hardware or software issue.) Nevermind. :) I somehow missed the other message: [PATCH 4/4] intel: Support native Z16 depth textures (including as renderbuffer) on i965. Which is especially amusing since I reviewed it. Too much email. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/15] mesa: Add fallback from RGB_FLOAT16 to RGBA_FLOAT16 before RGBA_FLOAT32.
Neither DX9 or DX10 expose equivalent to GL_RGB16F_ARB: - http://msdn.microsoft.com/en-us/library/bb172558.aspx (D3D9) - http://msdn.microsoft.com/en-us/library/bb173059.aspx (D3D10) So probably no hardware ever will support them. Even for software rendering it is a very nasty format to support, because it is not 32bits aligned (48bits). Unfortunately it doesn't look like we can simply not support GL_RGB16F_ARB though. - Original Message - > The i965 driver can't do RGB float16 at all. > --- > src/mesa/main/texformat.c |1 + > 1 files changed, 1 insertions(+), 0 deletions(-) > > diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c > index aebe38e..ea42ced 100644 > --- a/src/mesa/main/texformat.c > +++ b/src/mesa/main/texformat.c > @@ -357,6 +357,7 @@ _mesa_choose_tex_format( struct gl_context *ctx, > GLint internalFormat, > case GL_RGB16F_ARB: > RETURN_IF_SUPPORTED(MESA_FORMAT_RGB_FLOAT16); > RETURN_IF_SUPPORTED(MESA_FORMAT_RGB_FLOAT32); > + RETURN_IF_SUPPORTED(MESA_FORMAT_RGBA_FLOAT16); RGB_FLOAT32 is 96bits where as RGBA_FLOAT16 is 64bits, so I think that RGBA_FLOAT16 should be tried before RGB_FLOAT32, as it is better from all POVs > RETURN_IF_SUPPORTED(MESA_FORMAT_RGBA_FLOAT32); > break; > case GL_RGB32F_ARB: Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] TGSI declarations missing type info
- Original Message - > On 11/14/2011 10:13 PM, Jose Fonseca wrote: > > > > > > - Original Message - > >> On 11/14/2011 09:48 PM, Christoph Bumiller wrote: > >>> On 11/14/2011 09:24 PM, Jose Fonseca wrote: > > > - Original Message - > > On 11/14/2011 08:23 PM, Jose Fonseca wrote: > >> > >> > >> - Original Message - > >>> The next open question I have is whether this warrants > >>> additions > >>> to > >>> the tgsi_opcode_info struct, mainly whether the return value > >>> is > >>> uint/int/float, > >>> and whether then src regs are uint/int/float, I'm not 100% > >>> that > >>> all > >>> opcodes always take all int/uint/float srcs, but my memory is > >>> a > >>> bit > >>> hazy on what the outliers might be. > >> > >> I agree that adding this sort of info to tgsi_opcode_info > >> would > >> be > >> a code typing saver for all drivers. > >> > >> However I never looked at the integer/double opcode set, but > >> from > >> quickly skimming d3d10/11 headers, there are three buckets: > >> - arithmetic: (u)ints -> (u)int > >> - comparison: (u)ints -> mask (and I don't know what's the > >> semantic > >> of these masks: 0/~0, 1.0f/0.0f, 0/1, or what) > >> - conversion: float <-> (u)int > >> > >> > >> And likewise to double, minus the unsigned vs signed split. > >> > >> So, probably we need tgsi_opcode_info to distuinguish the > >> source > >> type vs dest type, but we likely don't need per-source types. > >> > >> > >> > >> A much more intricate question, is how to represent boolean > >> mask > >> in > >> a mixed type world. My guess is one of two: > >> > >> a) is that there are two we should not mandate the bit > >> representation of a boolean (drivers are free to implement > >> booleans as 1.0f/0.0f, 0/~0, etc). The only constrainsts is > >> that > >> the state tracker never tries to write a boolean directly to > >> an > >> output. > >> > > > > > > Booleans can be set by the user (CONST) and thus the state > > tracker > > has > > know what drivers want. > > I forgot about those. > > One thing that's common to all representations is that > 0x > is false on all of them. So we could establish that as false. > > Another approach, make cleaner, would be to disallow CONST > booleans: i.e., the state tracker would use floats (0.0f/1.0f) > or > ints, and then explicitely emit a comparison against the null > element to determine the boolean value: > > CONST[0] // boolean as float > CONST[1] // boolean as integer > CONST[2] // boolean as double > IMM[0] = {0, 1.0f} > > // TEMP[0] == (bool)CONST[0] > EQ TEMP[0], CONST[0], IMM[0]. > > // TEMP[1] == (bool)CONST[1] > IEQ TEMP[1], CONST[1], IMM[0]. > > // TEMP[2] == (bool)CONST[2] > DEQ TEMP[2], CONST[2], IMM[0]. > > And the inverse for outputs. > > // OUT[0] = TEMP[0] ? 1.0f : 0.0f > SELECT OUT[0], TEMP[0], IMM[1], IMM[0] > > We need some way to encode integer/double immediates though. > > >> b) if by some miracle, all hardware implements boolean vectors > >> in > >> the same way, then we could stick to that. (For > >> sse/avx/llvm-2.9, > >> the natural way to represent a boolean vector is a bitmask > >> (0/~0). > >> llvm 3.0 finally added support for bit vectors.) > >> > >> > > > > Currently, classic mesa uses 1/0, gallium uses 1.0f or ~0 > > depending > > on > > whether the driver supports integers. > > > > Also, the float SET opcodes return 1.0f/0.0f so there has to be > > conversion if the driver supports integers, and the integer SET > > opcodes > > are supposed to return ~0. > > This is confusing -- so the semantics of e.g., IF's argument, > depends on whether the pipe driver supports integers or not? > > >>> > >>> No, TGSI_OPCODE_IF only checks for zero/nonzero (thus NaN counts > >>> as > >> > >> I mean to say float -0. Anyway, the example was confusing, I just > >> meant > >> we do an u32 comparison for drivers that support integers. > >> > >> (This is where TGSI_OPCODE_IF may yield different results for > >> float-only > >> cards, but in practice it's never actually used with anything > >> other > >> than > >> the result of a SET operation.) > > > > My other reply was about this particular case. > > > > But I'm not convinced we can ignore this case at all. I'm fine with > > drivers cutting corners. By I can't accept an interface that's > > broken by design for all drivers. > > > > The argument from TGSI_OPCODE_IF may very well come from the > > application/state tr
[Mesa-dev] [PATCH 1/6] pipe_video: Add more mpeg1 fields to pipe_mpeg12_picture_desc
Signed-off-by: Maarten Lankhorst --- src/gallium/include/pipe/p_video_state.h |3 +++ src/gallium/state_trackers/vdpau/decode.c |3 +++ 2 files changed, 6 insertions(+), 0 deletions(-) diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 1940bf1..9a70eb7 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -134,6 +134,9 @@ struct pipe_mpeg12_picture_desc unsigned concealment_motion_vectors; unsigned intra_dc_precision; unsigned f_code[2][2]; + unsigned top_field_first; + unsigned full_pel_forward_vector; + unsigned full_pel_backward_vector; unsigned num_slices; }; diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c index f135129..47212e3 100644 --- a/src/gallium/state_trackers/vdpau/decode.c +++ b/src/gallium/state_trackers/vdpau/decode.c @@ -245,6 +245,9 @@ vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder, picture.f_code[1][0] = picture_info->f_code[1][0] - 1; picture.f_code[1][1] = picture_info->f_code[1][1] - 1; picture.num_slices = picture_info->slice_count; + picture.top_field_first = picture_info->top_field_first; + picture.full_pel_forward_vector = picture_info->full_pel_forward_vector; + picture.full_pel_backward_vector = picture_info->full_pel_backward_vector; decoder->set_picture_parameters(decoder, &picture.base); -- 1.7.7.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] g3dvl: Remove get_sampler_view_planes, and replace with get/put bits
Doesn't break nouveau_video, apparently a check already causes it to use vl_video_buffer for vdpau. Signed-off-by: Maarten Lankhorst --- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 15 +++-- src/gallium/auxiliary/vl/vl_video_buffer.c | 92 +- src/gallium/auxiliary/vl/vl_video_buffer.h |2 + src/gallium/drivers/nouveau/nouveau_video.c | 37 -- src/gallium/drivers/nouveau/nouveau_video.h |1 - src/gallium/include/pipe/p_video_decoder.h | 17 -- src/gallium/state_trackers/vdpau/surface.c | 55 +--- 7 files changed, 131 insertions(+), 88 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 2442d78..32501e0 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -160,16 +160,18 @@ static bool init_idct_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer) { struct pipe_sampler_view **idct_source_sv, **mc_source_sv; + struct vl_video_buffer *mc_source = (struct vl_video_buffer*)dec->mc_source; + struct vl_video_buffer *idct_source = (struct vl_video_buffer*)dec->idct_source; unsigned i; - assert(dec && buffer); + assert(buffer); - idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source); + idct_source_sv = idct_source->get_sampler_view_planes(&idct_source->base); if (!idct_source_sv) goto error_source_sv; - mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source); + mc_source_sv = mc_source->get_sampler_view_planes(&mc_source->base); if (!mc_source_sv) goto error_mc_source_sv; @@ -553,7 +555,8 @@ vl_mpeg12_set_reference_frames(struct pipe_video_decoder *decoder, assert(num_ref_frames <= VL_MAX_REF_FRAMES); for (i = 0; i < num_ref_frames; ++i) { - sv = ref_frames[i]->get_sampler_view_planes(ref_frames[i]); + struct vl_video_buffer *ref = (struct vl_video_buffer *)ref_frames[i]; + sv = ref->get_sampler_view_planes(&ref->base); for (j = 0; j < VL_MAX_PLANES; ++j) pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]); } @@ -713,6 +716,7 @@ static void vl_mpeg12_end_frame(struct pipe_video_decoder *decoder) { struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; + struct vl_video_buffer *mc_source; struct pipe_sampler_view **mc_source_sv; struct pipe_vertex_buffer vb[3]; struct vl_mpeg12_buffer *buf; @@ -761,7 +765,8 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder) vl_idct_flush(&buf->idct[i], buf->num_ycbcr_blocks[i]); } - mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source); + mc_source = (struct vl_video_buffer *)dec->mc_source; + mc_source_sv = mc_source->get_sampler_view_planes(&mc_source->base); for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) { if (!dec->target_surfaces[i]) continue; diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c index 6d714d4..b982477 100644 --- a/src/gallium/auxiliary/vl/vl_video_buffer.c +++ b/src/gallium/auxiliary/vl/vl_video_buffer.c @@ -35,6 +35,7 @@ #include "util/u_inlines.h" #include "util/u_sampler.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "vl_video_buffer.h" @@ -151,6 +152,93 @@ error: return NULL; } +static int +vl_video_buffer_reformat(struct vl_video_buffer *buf, enum pipe_format format) +{ + struct pipe_resource templ, *newbuf, *newbuf2 = NULL; + struct pipe_screen *screen = buf->base.context->screen; + unsigned i; + templ = *buf->resources[1]; + if (format == PIPE_FORMAT_NV12) + templ.format = PIPE_FORMAT_R8G8_UNORM; + else if (format == PIPE_FORMAT_YV12) + templ.format = PIPE_FORMAT_R8_UNORM; + else + assert(0); + newbuf = screen->resource_create(screen, &templ); + if (!newbuf) + return -1; + if (templ.format == PIPE_FORMAT_R8_UNORM) { + newbuf2 = screen->resource_create(screen, &templ); + if (!newbuf2) { + pipe_resource_reference(&newbuf, NULL); + return -1; + } + buf->num_planes = 3; + } + else + buf->num_planes = 2; + buf->base.buffer_format = format; + pipe_resource_reference(&buf->resources[1], newbuf); + pipe_resource_reference(&buf->resources[2], newbuf2); + for (i = 1; i < VL_MAX_PLANES; ++i) { + pipe_surface_reference(&buf->surfaces[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL); + pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL); + } + return 0; +} + +static int +vl_video_buffer_put_bits(struct pipe_video_buffer *buffer, + enum pipe_format format, + void const *const *source_data, + uint32_t const *source_pitches) +{ + struct vl_video_buffer *buf = (struct vl_video_buffer *)buffe
[Mesa-dev] [PATCH 3/6] pipe_video: Get rid of a few pipe_video_decoder members
You won't be missed, functionality folded into picture_desc and decode.. Signed-off-by: Maarten Lankhorst --- src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c |4 +- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 68 --- src/gallium/drivers/nouveau/nouveau_video.c| 26 ++-- src/gallium/include/pipe/p_video_decoder.h | 21 +- src/gallium/include/pipe/p_video_state.h | 15 +--- src/gallium/state_trackers/vdpau/decode.c | 86 ++- src/gallium/state_trackers/xorg/xvmc/surface.c | 19 ++ 7 files changed, 85 insertions(+), 154 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c index 936cf2c..b586a00 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c @@ -823,7 +823,7 @@ decode_slice(struct vl_mpg12_bs *bs) inc += vl_vlc_get_vlclbf(&bs->vlc, tbl_B1, 11); if (x != -1) { mb.num_skipped_macroblocks = inc - 1; - bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1); + bs->decoder->decode_macroblock(bs->decoder, NULL, &mb.base, 1); } mb.x = x += inc; @@ -927,7 +927,7 @@ decode_slice(struct vl_mpg12_bs *bs) } while (vl_vlc_bits_left(&bs->vlc) && vl_vlc_peekbits(&bs->vlc, 23)); mb.num_skipped_macroblocks = 0; - bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1); + bs->decoder->decode_macroblock(bs->decoder, NULL, &mb.base, 1); return true; } diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 32501e0..18ba12d 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -507,24 +507,38 @@ vl_mpeg12_set_picture_parameters(struct pipe_video_decoder *decoder, { struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture; + struct pipe_sampler_view **sv; + struct vl_video_buffer *ref; + int i = 0, j; assert(dec && pic); dec->picture_desc = *pic; -} + assert(2 <= VL_MAX_REF_FRAMES); -static void -vl_mpeg12_set_quant_matrix(struct pipe_video_decoder *decoder, - const struct pipe_quant_matrix *matrix) -{ - struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; - const struct pipe_mpeg12_quant_matrix *m = (const struct pipe_mpeg12_quant_matrix *)matrix; + ref = (struct vl_video_buffer *)pic->ref_forward; + if (ref) { + sv = ref->get_sampler_view_planes(&ref->base); + for (j = 0; j < VL_MAX_PLANES; ++j) + pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]); + i++; + } + ref = (struct vl_video_buffer *)pic->ref_backward; + if (ref) { + sv = ref->get_sampler_view_planes(&ref->base); + for (j = 0; j < VL_MAX_PLANES; ++j) + pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]); + i++; + } - assert(dec); - assert(matrix->codec == PIPE_VIDEO_CODEC_MPEG12); + for (; i < VL_MAX_REF_FRAMES; ++i) + for (j = 0; j < VL_MAX_PLANES; ++j) + pipe_sampler_view_reference(&dec->ref_frames[i][j], NULL); - memcpy(dec->intra_matrix, m->intra_matrix, 64); - memcpy(dec->non_intra_matrix, m->non_intra_matrix, 64); + if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + memcpy(dec->intra_matrix, pic->intra_matrix, 64); + memcpy(dec->non_intra_matrix, pic->non_intra_matrix, 64); + } } static void @@ -543,30 +557,6 @@ vl_mpeg12_set_decode_target(struct pipe_video_decoder *decoder, } static void -vl_mpeg12_set_reference_frames(struct pipe_video_decoder *decoder, - struct pipe_video_buffer **ref_frames, - unsigned num_ref_frames) -{ - struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; - struct pipe_sampler_view **sv; - unsigned i,j; - - assert(dec); - assert(num_ref_frames <= VL_MAX_REF_FRAMES); - - for (i = 0; i < num_ref_frames; ++i) { - struct vl_video_buffer *ref = (struct vl_video_buffer *)ref_frames[i]; - sv = ref->get_sampler_view_planes(&ref->base); - for (j = 0; j < VL_MAX_PLANES; ++j) - pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]); - } - - for (; i < VL_MAX_REF_FRAMES; ++i) - for (j = 0; j < VL_MAX_PLANES; ++j) - pipe_sampler_view_reference(&dec->ref_frames[i][j], NULL); -} - -static void vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder) { struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; @@ -626,6 +616,7 @@ vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder) static void vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder, +struct pipe_video_buffer *target, const struct pipe_macroblock *macroblock
[Mesa-dev] [PATCH 4/6] xorg/xvmc: Remove recursion from RecursiveEndFrame
There should be no way for endframe to ever need recursion.. Signed-off-by: Maarten Lankhorst --- src/gallium/state_trackers/xorg/xvmc/surface.c | 23 +-- 1 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c index ffdac3d..3025f0e 100644 --- a/src/gallium/state_trackers/xorg/xvmc/surface.c +++ b/src/gallium/state_trackers/xorg/xvmc/surface.c @@ -122,7 +122,7 @@ SetDecoderStatus(XvMCSurfacePrivate *surface) } static void -RecursiveEndFrame(XvMCSurfacePrivate *surface) +EndFrame(XvMCSurfacePrivate *surface) { XvMCContextPrivate *context_priv; unsigned i; @@ -134,12 +134,11 @@ RecursiveEndFrame(XvMCSurfacePrivate *surface) for ( i = 0; i < 2; ++i ) { if (surface->ref[i]) { XvMCSurface *ref = surface->ref[i]; + XvMCSurfacePrivate *refpriv; - assert(ref); - - surface->ref[i] = NULL; - RecursiveEndFrame(ref->privData); - surface->ref[i] = ref; + assert(ref && ref->privData); + refpriv = ref->privData; + assert(!refpriv->picture_structure); // There's just no sane way this can happen.. } } @@ -260,10 +259,10 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur // call end frame on all referenced frames if (past_surface) - RecursiveEndFrame(past_surface->privData); + EndFrame(past_surface->privData); if (future_surface) - RecursiveEndFrame(future_surface->privData); + EndFrame(future_surface->privData); xvmc_mb = macroblocks->macro_blocks + first_macroblock; @@ -275,7 +274,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur (xvmc_mb->x == 0 && xvmc_mb->y == 0))) { // If they change anyway we must assume that the current frame is ended - RecursiveEndFrame(target_surface_priv); + EndFrame(target_surface_priv); } target_surface_priv->ref[0] = past_surface; @@ -390,7 +389,11 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, assert(desty + desth - 1 < drawable_surface->height); */ - RecursiveEndFrame(surface_priv); + if (surface_priv->ref[0]) + EndFrame(surface_priv->ref[0]->privData); + if (surface_priv->ref[1]) + EndFrame(surface_priv->ref[1]->privData); + EndFrame(surface_priv); context_priv->decoder->flush(context_priv->decoder); -- 1.7.7.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/6] pipe_video: Get rid of unnecessary state tracking
This requires some special care, vl_mpeg12_decoder was dumping some state in the decode_buffer, and we can no longer control the order in which decode_buffer/decoder is freed. As such, vl_idct cannot be used in cleanup. Fortunately it's not needed. Also, flush will have to be called before destroying a surface in XvMC, otherwise there exists the possibility that the decoder still held a reference to it. Signed-off-by: Maarten Lankhorst --- begin_frame was moved, end_frame was renamed to flush for vl_mpeg12_decoder (with some checks added) src/gallium/auxiliary/vl/vl_idct.c | 14 +- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 367 ++-- src/gallium/auxiliary/vl/vl_video_buffer.c |4 + src/gallium/auxiliary/vl/vl_video_buffer.h |2 + src/gallium/drivers/nouveau/nouveau_video.c| 12 - src/gallium/include/pipe/p_video_decoder.h | 25 -- src/gallium/state_trackers/vdpau/decode.c | 40 +-- src/gallium/state_trackers/xorg/xvmc/surface.c | 65 + .../state_trackers/xorg/xvmc/xvmc_private.h|1 - 9 files changed, 193 insertions(+), 337 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index a2b3537..8394542 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -614,9 +614,9 @@ init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) } static void -cleanup_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) +cleanup_source(struct vl_idct_buffer *buffer) { - assert(idct && buffer); + assert(buffer); pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL); @@ -665,13 +665,13 @@ error_surfaces: } static void -cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) +cleanup_intermediate(struct vl_idct_buffer *buffer) { unsigned i; - assert(idct && buffer); + assert(buffer); - for(i = 0; i < idct->nr_of_render_targets; ++i) + for(i = 0; i < buffer->fb_state.nr_cbufs; ++i) pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL); @@ -823,8 +823,8 @@ vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer) { assert(buffer); - cleanup_source(buffer->idct, buffer); - cleanup_intermediate(buffer->idct, buffer); + cleanup_source(buffer); + cleanup_intermediate(buffer); pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL); pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 18ba12d..4eaad16 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -389,7 +389,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) { struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder; - assert(decoder); + assert(dec); /* Asserted in softpipe_delete_fs_state() for some reason */ dec->base.context->bind_vs_state(dec->base.context, NULL); @@ -424,13 +424,164 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) FREE(dec); } -static void * -vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder) +static void +vl_mpeg12_begin_frame(struct vl_mpeg12_decoder *dec) +{ + struct vl_mpeg12_buffer *buf; + + struct pipe_resource *tex; + struct pipe_box rect = { 0, 0, 0, 1, 1, 1 }; + + unsigned i; + + assert(dec); + + buf = dec->current_buffer; + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) + dec->intra_matrix[0] = 1 << (7 - dec->picture_desc.intra_dc_precision); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + vl_zscan_upload_quant(&buf->zscan[i], dec->intra_matrix, true); + vl_zscan_upload_quant(&buf->zscan[i], dec->non_intra_matrix, false); + } + + vl_vb_map(&buf->vertex_stream, dec->base.context); + + tex = buf->zscan_source->texture; + rect.width = tex->width0; + rect.height = tex->height0; + + buf->tex_transfer = dec->base.context->get_transfer + ( + dec->base.context, tex, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + + buf->block_num = 0; + buf->texels = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer); + + for (i = 0; i < VL_MAX_PLANES; ++i) { + buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i); + buf->num_ycbcr_blocks[i] = 0; + } + + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) + buf->mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i); + + if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { + vl_mpg12_bs_set_picture_desc(&buf->bs, &dec->picture_desc); + + } else { + + for (i = 0; i < VL_MAX_PLANES; ++i) + vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear); + } +} + +static void
[Mesa-dev] [PATCH 6/6] pipe_video: Remove set_picture_parameters
Signed-off-by: Maarten Lankhorst --- src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c |4 +- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 12 +- src/gallium/drivers/nouveau/nouveau_video.c| 25 +-- src/gallium/include/pipe/p_video_decoder.h |9 +- src/gallium/state_trackers/vdpau/decode.c | 207 src/gallium/state_trackers/xorg/xvmc/surface.c | 38 + 6 files changed, 129 insertions(+), 166 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c index b586a00..5e44ede 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c @@ -823,7 +823,7 @@ decode_slice(struct vl_mpg12_bs *bs) inc += vl_vlc_get_vlclbf(&bs->vlc, tbl_B1, 11); if (x != -1) { mb.num_skipped_macroblocks = inc - 1; - bs->decoder->decode_macroblock(bs->decoder, NULL, &mb.base, 1); + bs->decoder->decode_macroblock(bs->decoder, NULL, NULL, &mb.base, 1); } mb.x = x += inc; @@ -927,7 +927,7 @@ decode_slice(struct vl_mpg12_bs *bs) } while (vl_vlc_bits_left(&bs->vlc) && vl_vlc_peekbits(&bs->vlc, 23)); mb.num_skipped_macroblocks = 0; - bs->decoder->decode_macroblock(bs->decoder, NULL, &mb.base, 1); + bs->decoder->decode_macroblock(bs->decoder, NULL, NULL, &mb.base, 1); return true; } diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 4eaad16..c61e95c 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -626,11 +626,9 @@ error_vertex_buffer: } static void -vl_mpeg12_set_picture_parameters(struct pipe_video_decoder *decoder, - struct pipe_picture_desc *picture) +vl_mpeg12_set_picture_parameters(struct vl_mpeg12_decoder *dec, + struct pipe_mpeg12_picture_desc *pic) { - struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; - struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture; struct pipe_sampler_view **sv; struct vl_video_buffer *ref; int i = 0, j; @@ -691,6 +689,7 @@ vl_mpeg12_set_decode_target(struct vl_mpeg12_decoder *dec, static void vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder, struct pipe_video_buffer *target, +struct pipe_picture_desc *desc, const struct pipe_macroblock *macroblocks, unsigned num_macroblocks) { @@ -703,6 +702,8 @@ vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder, assert(dec); assert(macroblocks && macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12); + if (desc) + vl_mpeg12_set_picture_parameters(dec, (struct pipe_mpeg12_picture_desc*)desc); if (target) vl_mpeg12_set_decode_target(dec, (struct vl_video_buffer*)target); buf = dec->current_buffer; @@ -761,6 +762,7 @@ vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder, static void vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder, struct pipe_video_buffer *target, + struct pipe_picture_desc *desc, unsigned num_bytes, const void *data) { struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; @@ -769,6 +771,7 @@ vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder, unsigned i; assert(dec); + vl_mpeg12_set_picture_parameters(dec, (struct pipe_mpeg12_picture_desc*)desc); vl_mpeg12_set_decode_target(dec, (struct vl_video_buffer*)target); buf = dec->current_buffer; @@ -1047,7 +1050,6 @@ vl_create_mpeg12_decoder(struct pipe_context *context, dec->base.max_references = max_references; dec->base.destroy = vl_mpeg12_destroy; - dec->base.set_picture_parameters = vl_mpeg12_set_picture_parameters; dec->base.decode_macroblock = vl_mpeg12_decode_macroblock; dec->base.decode_bitstream = vl_mpeg12_decode_bitstream; dec->base.flush = vl_mpeg12_flush; diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c index bf67d03..d84d9b3 100644 --- a/src/gallium/drivers/nouveau/nouveau_video.c +++ b/src/gallium/drivers/nouveau/nouveau_video.c @@ -424,32 +424,24 @@ nouveau_decoder_surface_index(struct nouveau_decoder *dec, } static void -nouveau_decoder_set_picture_parameters(struct pipe_video_decoder *decoder, - struct pipe_picture_desc *picture_desc) -{ - struct nouveau_decoder *dec = (struct nouveau_decoder *)decoder; - struct pipe_mpeg12_picture_desc *desc; - desc = (struct pipe_mpeg12_picture_desc *)picture_desc; - dec->picture_structure = desc->picture_structure; - - if (desc->ref_backward) - dec->past = nouveau_decoder_surface_index(dec, desc-
[Mesa-dev] [PATCH 7/6] pipe_video: remove PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED
This is no longer used with the removal of decoder buffers. Signed-off-by: Maarten Lankhorst --- src/gallium/auxiliary/vl/vl_decoder.c| 13 - src/gallium/auxiliary/vl/vl_decoder.h|6 -- src/gallium/drivers/nouveau/nouveau_video.c |2 -- src/gallium/drivers/nvfx/nvfx_screen.c |2 -- src/gallium/drivers/r300/r300_screen.c |2 -- src/gallium/drivers/r600/r600_pipe.c |2 -- src/gallium/drivers/softpipe/sp_screen.c |2 -- src/gallium/include/pipe/p_video_enums.h |3 +-- src/gallium/state_trackers/vdpau/decode.c|6 -- src/gallium/state_trackers/vdpau/vdpau_private.h |3 --- 10 files changed, 1 insertions(+), 40 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_decoder.c b/src/gallium/auxiliary/vl/vl_decoder.c index 383e02d..da905a6 100644 --- a/src/gallium/auxiliary/vl/vl_decoder.c +++ b/src/gallium/auxiliary/vl/vl_decoder.c @@ -44,19 +44,6 @@ vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile } } -unsigned -vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile) -{ - assert(screen); - switch (u_reduce_video_profile(profile)) { - case PIPE_VIDEO_CODEC_MPEG12: - return 4; - - default: - return 1; - } -} - struct pipe_video_decoder * vl_create_decoder(struct pipe_context *pipe, enum pipe_video_profile profile, diff --git a/src/gallium/auxiliary/vl/vl_decoder.h b/src/gallium/auxiliary/vl/vl_decoder.h index a997516..a7abe9c 100644 --- a/src/gallium/auxiliary/vl/vl_decoder.h +++ b/src/gallium/auxiliary/vl/vl_decoder.h @@ -38,12 +38,6 @@ bool vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile); /** - * the desired number of buffers for optimal operation - */ -unsigned -vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile); - -/** * standard implementation of pipe->create_video_decoder */ struct pipe_video_decoder * diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c index d84d9b3..486210d 100644 --- a/src/gallium/drivers/nouveau/nouveau_video.c +++ b/src/gallium/drivers/nouveau/nouveau_video.c @@ -780,8 +780,6 @@ nouveau_screen_get_video_param(struct pipe_screen *pscreen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(pscreen); - case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: - return vl_num_buffers_desired(pscreen, profile); default: debug_printf("unknown video param: %d\n", param); return 0; diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 6086d43..8f82829 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -224,8 +224,6 @@ nvfx_screen_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); - case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: - return vl_num_buffers_desired(screen, profile); default: return 0; } diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index ab1cec5..7b06235 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -322,8 +322,6 @@ static int r300_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); - case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: - return vl_num_buffers_desired(screen, profile); default: return 0; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 243de14..120f67c 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -530,8 +530,6 @@ static int r600_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); - case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: - return vl_num_buffers_desired(screen, profile); default: return 0; } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 4851049..623e915 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -190,8 +190,6 @@ softpipe_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_MAX_WIDTH: case PIPE_VIDEO_CAP_MAX_HEIGHT: return vl_video_buffer_max_size(screen); - case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED: - return vl_num_buffers_desired(screen, profile); default: return 0; }
Re: [Mesa-dev] reworking pipe_video_decoder / pipe_video_buffer
On 15.11.2011 17:52, Maarten Lankhorst wrote: Deleted: - begin_frame/end_frame: Was only useful for XvMC, should be folded into flush.. I'm not completely happy with the current interface also, but if you remove the state tracker ability to control how many buffers are used, which in turn lets the VDPAU and XvMC state tracker use the same buffering algorithm. This is quite bad, because for XvMC we are forced to use a buffer for each destination surface, while in the VDPAU/VAAPI case a much simpler method should be used. - set_quant_matrix/set_reference_frames: they should become part of picture_desc, not all codecs deal with it in the same way, and some may not have all of the calls. That is true, but also intended. The idea behind it is that it is not necessary for all codecs to set all buffer types, but instead inform the driver that a specific buffer type has changed. This gives the driver the ability to know which parameters has changed between calls to decode, and so only perform the (probably expensive) update of those parameters on the hardware side. - set_picture_parameters: Can be passed to decode_bitstream/macroblock Same as above, take a look at the enum VABufferType in the VAAPI specification (http://cgit.freedesktop.org/libva/tree/va/va.h). - set_decode_target: Idem - create_decode_buffer/set_decode_buffer/destroy_decode_buffer: Even if a decoder wants it, the state tracker has no business knowing about it. Unfortunately that isn't true, only VDPAU hasn't a concept of giving the application a certain control over the buffers. XvMC has the implicit association of internal decode buffers with destination surfaces, and VAAPI gives the application both an one shot copy and a map/unmap interface to provide buffer content. When we want to avoid unnecessary coping around of buffer content then this interface has to become even more complicated, like new buffer types, using create user buffer, mapping/unmapping of to be decoded data buffers etc. flush is changed to only flush a single pipe_video_buffer, this should reduce the state that needs to be maintained for XvMC otherwise. I can't really see how is that improving the situation, all it does is reducing the interface to what is needed for VDPAU, but totally ignoring requirements for VAAPI for example. Note: internally you can still use those calls, as long as the *public* api would be reduced to this, pipe_video_buffer is specific to each driver, so you can put in a lot of state there if you choose to do so. For example, quant matrix is not used in vc-1, and h264 will need a different way for set_reference_frames, see struct VdpReferenceFrameH264. This is why I want to remove those calls, and put them into picture_desc.. Correct, those calls need to be extended to support more codecs, but just stuffing everything into a single structure also doesn't seems to be a solution that will work for a wider view of different state trackers or hardware. struct pipe_video_buffer I'm less sure about, especially wrt interlacing and alignment it needs more thought. height and width are aligned to 64 on nvidia, and requires a special tiling flag. The compositor will need to become aware of interlacing, to be able to play back interlaced videos. Alignment isn't so much of an issue. Take a look at vl_video_buffer_create, it is already aligning the width/height to a power of two if the hardware needs that. So it is perfectly legal for a video buffer to be larger than the requested size, this is already tested with the current color conversion code and should work fine with both XvMC and VDPAU. I honestly haven't read up on interlacing yet, and haven't found a video to test it with, so that part is just speculative crap, and might change when I find out more about it. Regarding deinterlacing I'm also not really sure what we should do about it, but all interfaces (XvMC/VDPAU/VAAPI/DXVA) seems to treat surfaces as progressive. So I think the output of a decode call should already be in a progressive format (or can be at least handled as one for sampling). Testing interlaced videos that decode correctly with nvidia vdpau would help a lot to figure out what the proper way to handle interlacing would be, so if someone has a bunch that play with nvidia accelerated vdpau& mplayer correctly, could you please link them? ;) I will try to look around some more, but at least for MPEG2 I haven't found any interlaced videos for testing also. I will leave you a note if I find something. /** * output for decoding / input for displaying */ struct pipe_video_buffer { struct pipe_context *context; enum pipe_format buffer_format; // Note: buffer_format may change as a result of put_bits, or call to decode_bitstream // afaict there is no guarantee a buffer filled with put_bits can be used as reference // frame to decode_bitstream Altering the video buffer format as a result t
Re: [Mesa-dev] Problem building current git with osmesa and no gallium
Brian Paul vmware.com> writes: > > On 03/10/2011 08:30 PM, tom fogal wrote: > > Kenneth Graunke whitecape.org> writes: > >> On Thursday, March 10, 2011 01:17:04 PM Alexander Neundorf wrote: > >>> While at it (sorry for newbie questions), do I need gallium (maybe > >>> swrast) when I want only osmesa rendering into a software buffer ? > >> > >> I don't think OSMesa requires Gallium, but I've never used it. > > > > Kenneth's correct. We build OSMesa all the time without gallium. In > > this case OSMesa uses the swrast driver/backend/whatnot. The 'xlib' > > driver also uses this backend. > > > > I believe there is a way to build both of these on top of softpipe -- > > the software/reference gallium implementation -- but my project hasn't > > jumped on that boat yet. Someday. > > OSMesa doesn't work with gallium yet. I'd like to do that sometime > though. llvmpipe would be much faster than swrast in many cases. > > -Brian > Hi, I've been trying to build mesa with the following combination: osmesa llvm egl gles2 (with and without --with-drivers=fbdev,wayland) I am puzzled about the terminology but when I got my build to succeed, using a sha1 from 14th July 2011, I tried testing it and found that eglInitialize was failing. This was apparently because it couldn't find the display. I am using and X-less environment and want to render directly to fb0 or to some shared memory, hence the inclusion of osmesa. Am I on the right track here? The combinations of autoconf options seem to change a lot and I can't find much in the way of documentation to see which combinations and options are valid and correct for the particular sha1 I'm using. Can you point me in the right direction? Best Regards, Matthew Cattell ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] reworking pipe_video_decoder / pipe_video_buffer
Hey, On 11/16/2011 02:47 PM, Christian König wrote: > On 15.11.2011 17:52, Maarten Lankhorst wrote: >> Deleted: >> - begin_frame/end_frame: Was only useful for XvMC, should be folded into >> flush.. > > I'm not completely happy with the current interface also, but if you remove > the state tracker ability to control how many buffers are used, which in turn > lets the VDPAU and XvMC state tracker use the same buffering algorithm. This > is quite bad, because for XvMC we are forced to use a buffer for each > destination surface, while in the VDPAU/VAAPI case a much simpler method > should be used. I wasn't aware of that, the patch could easily be changed for entrypoint <= bitstream. Only create a single decoder buffer, otherwise attach it to the surface. I still think it should be removed from the state tracker though, and it would still be possible.. >> - set_quant_matrix/set_reference_frames: >> they should become part of picture_desc, >> not all codecs deal with it in the same way, >> and some may not have all of the calls. > That is true, but also intended. The idea behind it is that it is not > necessary for all codecs to set all buffer types, but instead inform the > driver that a specific buffer type has changed. This gives the driver the > ability to know which parameters has changed between calls to decode, and so > only perform the (probably expensive) update of those parameters on the > hardware side. This is why I used the flush call for XvMC as a way to signal parameter changes. If you looked at the decode_macroblock for g3dvl, it checks if (dec->current_buffer == target->decode_buffer) in begin_frame, so it doesn't flush state. For VA-API, I haven't had the .. pleasure(?) of playing around with it, and it seems to be only stubs. If the decode_bitstream interface is changed to get all bitstream buffers at the same time, there wouldn't be overhead to doing it like this. For a single picture it's supposed to stay constant, so for vdpau the sane way would be: set picture parameters for hardware (includes EVERYTHING), write all bitstream buffers to a hardware bo, wait until magic is done. Afaict, there isn't even a sane way to only submit partial buffers, so it's just a bunch of overhead for me. nvidia doesn't support va-api, it handles the entire process from picture parameters to a decoded buffer internally so it always convert the picture parameters into something the hardware can understand, every frame. >> - set_picture_parameters: Can be passed to decode_bitstream/macroblock > Same as above, take a look at the enum VABufferType in the VAAPI > specification (http://cgit.freedesktop.org/libva/tree/va/va.h). >> - set_decode_target: Idem >> - create_decode_buffer/set_decode_buffer/destroy_decode_buffer: >> Even if a decoder wants it, the state tracker has no business knowing >> about it. > Unfortunately that isn't true, only VDPAU hasn't a concept of giving the > application a certain control over the buffers. XvMC has the implicit > association of internal decode buffers with destination surfaces, and VAAPI > gives the application both an one shot copy and a map/unmap interface to > provide buffer content. > > When we want to avoid unnecessary coping around of buffer content then this > interface has to become even more complicated, like new buffer types, using > create user buffer, mapping/unmapping of to be decoded data buffers etc. There is no guarantee a user created video buffer can be used as reference frame, I honestly don't even know if it's possible. Furthermore I don't see any code in state_trackers/va except some stubs, so I cant discuss an interface that's not even there to begin with. >> flush is changed to only flush a single pipe_video_buffer, >> this should reduce the state that needs to be maintained for XvMC otherwise. > I can't really see how is that improving the situation, all it does is > reducing the interface to what is needed for VDPAU, but totally ignoring > requirements for VAAPI for example. See above. >> Note: internally you can still use those calls, as long as the *public* api >> would be reduced to this, pipe_video_buffer is specific to each driver, >> so you can put in a lot of state there if you choose to do so. For example, >> quant matrix is not used in vc-1, and h264 will need a different >> way for set_reference_frames, see struct VdpReferenceFrameH264. This is why >> I want to remove those calls, and put them into picture_desc.. > Correct, those calls need to be extended to support more codecs, but just > stuffing everything into a single structure also doesn't seems to be a > solution that will work for a wider view of different state trackers or > hardware. All hardware has its own unique approach with their own api, so it would make sense if there is a separate call for decode_va too, with maybe some bits that tell what changed, so it only has to upload those specific parts.. >> struc
Re: [Mesa-dev] [PATCH] mesa: make sure all lighting tables are updated before the computation
On 11/15/2011 08:18 PM, Yuanhan Liu wrote: On Tue, Nov 15, 2011 at 07:26:52AM -0700, Brian Paul wrote: On 11/15/2011 12:51 AM, Yuanhan Liu wrote: Make sure all lighting tables are updated before using the table to calculate something, say using _SpotExpTable to calculate _VP_inf_spot_attenuation. Signed-off-by: Yuanhan Liu --- src/mesa/main/light.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/src/mesa/main/light.c b/src/mesa/main/light.c index c27cf1d..60daa89 100644 --- a/src/mesa/main/light.c +++ b/src/mesa/main/light.c @@ -1138,6 +1138,9 @@ compute_light_positions( struct gl_context *ctx ) TRANSFORM_NORMAL( ctx->_EyeZDir, eye_z, ctx->ModelviewMatrixStack.Top->m ); } + /* Make sure all the light tables are updated before the computation */ + _mesa_validate_all_lighting_tables(ctx); + foreach (light,&ctx->Light.EnabledList) { if (ctx->_NeedEyeCoords) { Does this fix a specific bug? Nope(well, I didn't search the bugzilla). But it did fix an issue. I wrote a piglit case for this issue. --- From 47796b40c7f300e289e3a82361164b62e9e3411c Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Wed, 16 Nov 2011 11:16:18 +0800 Subject: [PATCH] Add an infinite spot light test case Signed-off-by: Yuanhan Liu --- tests/all.tests |1 + tests/general/CMakeLists.gl.txt |1 + tests/general/infinite-spot-light.c | 99 +++ 3 files changed, 101 insertions(+), 0 deletions(-) create mode 100644 tests/general/infinite-spot-light.c diff --git a/tests/all.tests b/tests/all.tests index 2a85b7a..664a9b4 100644 --- a/tests/all.tests +++ b/tests/all.tests @@ -254,6 +254,7 @@ add_plain_test(general, 'geterror-invalid-enum') add_plain_test(general, 'geterror-inside-begin') add_plain_test(general, 'gl30basic') add_plain_test(general, 'hiz') +add_plain_test(general, 'infinite-spot-light') add_plain_test(general, 'isbufferobj') add_plain_test(general, 'line-aa-width') add_plain_test(general, 'linestipple') diff --git a/tests/general/CMakeLists.gl.txt b/tests/general/CMakeLists.gl.txt index 8ff3771..2cfc7be 100644 --- a/tests/general/CMakeLists.gl.txt +++ b/tests/general/CMakeLists.gl.txt @@ -62,6 +62,7 @@ if (UNIX) target_link_libraries (hiz m) endif (UNIX) add_executable (early-z early-z.c) +add_executable (infinite-spot-light infinite-spot-light.c) add_executable (isbufferobj isbufferobj.c) add_executable (linestipple linestipple.c) add_executable (line-aa-width line-aa-width.c) diff --git a/tests/general/infinite-spot-light.c b/tests/general/infinite-spot-light.c new file mode 100644 index 000..cb8616f --- /dev/null +++ b/tests/general/infinite-spot-light.c @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Yuanhan Liu + */ + +/** @file infinite-spot-light.c + * + * This is a case that sounds doesn't make sense, but it is allowed by glSpec + * (see section 2.14.1 Lightin of glspec 2.1.pdf). While writing this case, + * it servers as two purposes: + * + * 1. Test if swrast is OK with this case. + * The old mesa code would always compute a zero attenuation, thus always + * get a black lighting color. + * + * 2. Test if hardware rendering(only i965 tested) is OK with this patch. + * The old mesa code would skip the attenuation and spot computation while + * infinite light is met. This is somehow not permitted by glSpec. + */ + +#include "piglit-util.h" + +int piglit_width = 100, piglit_height = 100; +int piglit_window_mode = GLUT_RGB | GLUT_DOUBLE; + +/* Already normalized, and 0.5 would be the expected color */ +static GLfloat dir[3] = {0.866025404, 0.0, 0.5}; +static GLfloat pos[4] = {0.0, 0.0, -1.0, 0.0}; /* infinite */ +static GLfloat light_ambient[3] = {1.0, 0.0, 0.0}; + +enum pigl
[Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()
In slow_read_depth_stencil_pixels_separate() we might have separate depth and stencil buffers or a combined buffer. In the later case, don't map the buffer twice. This function is used when the depth scale/bias pixel transfer values are not the defaults. Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963 --- src/mesa/main/readpix.c | 14 +++--- 1 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 8550618..0b41de6 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct gl_context *ctx, GLubyte *depthMap, *stencilMap; int depthStride, stencilStride, j; + /* The depth and stencil buffers might be separate, or a single buffer. +* If one buffer, only map it once. +*/ ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height, GL_MAP_READ_BIT, &depthMap, &depthStride); - ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height, - GL_MAP_READ_BIT, &stencilMap, &stencilStride); + if (stencilRb != depthRb) { + ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height, + GL_MAP_READ_BIT, &stencilMap, + &stencilStride); + } for (j = 0; j < height; j++) { GLubyte stencilVals[MAX_WIDTH]; @@ -424,7 +430,9 @@ slow_read_depth_stencil_pixels_separate(struct gl_context *ctx, } ctx->Driver.UnmapRenderbuffer(ctx, depthRb); - ctx->Driver.UnmapRenderbuffer(ctx, stencilRb); + if (stencilRb != depthRb) { + ctx->Driver.UnmapRenderbuffer(ctx, stencilRb); + } } -- 1.7.3.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default
On 11/16/2011 01:38 AM, Theiss, Ingo wrote: Dear devs, I am getting segmentation faults when running piglit r600.tests with latest mesa build from git: Nov 16 09:21:33 spoc kernel: [74538.198983] radeon :05:00.0: object_init failed for (1431699456, 0x0006) Nov 16 09:21:33 spoc kernel: [74538.198987] [drm:radeon_gem_object_create] *ERROR* Failed to allocate GEM object (1431699456, 4, 4096, -12) Nov 16 09:21:37 spoc kernel: [74542.266472] fbo-depthstenci[6867]: segfault at 0 ip 7fb271b5414a sp 7fffbfdf8fd0 error 4 in r600_dri.so[7fb2716c8000+dba000] --- Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil -auto readpixels defau'. Program terminated with signal 11, Segmentation fault. #0 u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63 63 ur->vtbl->transfer_unmap(pipe, transfer); (gdb) bt #0 u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63 #1 0x7f4cb36cc0eb in pipe_transfer_unmap (transfer=, context=0x2441df0) at ../../src/gallium/auxiliary/util/u_inlines.h:398 #2 st_UnmapRenderbuffer (ctx=, rb=0x25b7fc0) at state_tracker/st_cb_fbo.c:704 #3 0x7f4cb365f925 in slow_read_depth_stencil_pixels_separate (ctx=0x254b720, x=, y=, width=123, height=123, type=36269, packing=0x7fff653d0750, dst=0x7fff653ee138 "", dstStride=984) at main/readpix.c:427 #4 0x7f4cb36601d6 in read_depth_stencil_pixels (packing=0x7fff653d0750, pixels=, type=36269, height=, width=123, y=0, x=0, ctx=0x254b720) at main/readpix.c:469 #5 _mesa_readpixels (ctx=0x254b720, x=0, y=0, width=123, height=123, format=, type=36269, packing=0x255afb8, pixels=) at main/readpix.c:508 #6 0x7f4cb3660bbd in _mesa_ReadnPixelsARB (pixels=0x7fff653d0870, type=36269, format=34041, height=123, width=123, y=0, x=0, bufSize=) at main/readpix.c:753 #7 _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, type=36269, pixels=0x7fff653d0870) at main/readpix.c:761 #8 0x0042eb4a in read_32f_8 () #9 0x0042ec0b in compare () #10 0x0042ef73 in test_readpixels () #11 0x0042f7aa in piglit_display () #12 0x00430241 in display () #13 0x7f4cb6f11808 in ?? () from /usr/lib/libglut.so.3 #14 0x7f4cb6f15339 in fgEnumWindows () from /usr/lib/libglut.so.3 #15 0x7f4cb6f11ca1 in glutMainLoopEvent () from /usr/lib/libglut.so.3 #16 0x7f4cb6f12540 in glutMainLoop () from /usr/lib/libglut.so.3 #17 0x00430967 in main () --- My glxinfo: OpenGL vendor string: X.Org OpenGL renderer string: Gallium 0.4 on AMD BARTS OpenGL version string: 2.1 Mesa 7.12-devel (git-4f677ca) OpenGL shading language version string: 1.20 A general question: I am interested in running regular piglit tests on my hardware and would like to help by reporting problems/bugs. What´s the correct way of doing so? Should I post any problems to mesa-dev or should I open a bug. Thanks and keep up the good work! I think that this will be fixed by the patch I just posted (mesa: don't map depth+stencil buffer twice in glReadPixels()) Can you try it? -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa-demos: Add blinking-teapot demo.
On 11/15/2011 11:16 AM, vlj wrote: blinking-teapot is an UBO demo. I think you missed a bunch of other comments I made on the first version. Can you go back and re-read my reply? Thanks. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()
Looks good Brian. BTW, we should probably be more defensive about map failures on this code. This is not a regression, as st_cb_readpixels.c also did a poor job. Jose - Original Message - > In slow_read_depth_stencil_pixels_separate() we might have separate > depth and stencil buffers or a combined buffer. In the later case, > don't map the buffer twice. This function is used when the depth > scale/bias pixel transfer values are not the defaults. > > Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963 > --- > src/mesa/main/readpix.c | 14 +++--- > 1 files changed, 11 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c > index 8550618..0b41de6 100644 > --- a/src/mesa/main/readpix.c > +++ b/src/mesa/main/readpix.c > @@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct > gl_context *ctx, > GLubyte *depthMap, *stencilMap; > int depthStride, stencilStride, j; > > + /* The depth and stencil buffers might be separate, or a single > buffer. > +* If one buffer, only map it once. > +*/ > ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height, > GL_MAP_READ_BIT, &depthMap, &depthStride); > - ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height, > -GL_MAP_READ_BIT, &stencilMap, &stencilStride); > + if (stencilRb != depthRb) { > + ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, > height, > + GL_MAP_READ_BIT, &stencilMap, > + &stencilStride); > + } > > for (j = 0; j < height; j++) { >GLubyte stencilVals[MAX_WIDTH]; > @@ -424,7 +430,9 @@ slow_read_depth_stencil_pixels_separate(struct > gl_context *ctx, > } > > ctx->Driver.UnmapRenderbuffer(ctx, depthRb); > - ctx->Driver.UnmapRenderbuffer(ctx, stencilRb); > + if (stencilRb != depthRb) { > + ctx->Driver.UnmapRenderbuffer(ctx, stencilRb); > + } > } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()
On 11/16/2011 08:43 AM, Jose Fonseca wrote: Looks good Brian. BTW, we should probably be more defensive about map failures on this code. This is not a regression, as st_cb_readpixels.c also did a poor job. Yeah, it's only a matter of time until we find a case where MapRenderbuffer (or MapTextureImage) fails and we need to be more robust... -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()
On Wed, 2011-11-16 at 07:53 -0700, Brian Paul wrote: > In slow_read_depth_stencil_pixels_separate() we might have separate > depth and stencil buffers or a combined buffer. In the later case, > don't map the buffer twice. This function is used when the depth > scale/bias pixel transfer values are not the defaults. > > Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963 > --- > src/mesa/main/readpix.c | 14 +++--- > 1 files changed, 11 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c > index 8550618..0b41de6 100644 > --- a/src/mesa/main/readpix.c > +++ b/src/mesa/main/readpix.c > @@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct > gl_context *ctx, > GLubyte *depthMap, *stencilMap; > int depthStride, stencilStride, j; > > + /* The depth and stencil buffers might be separate, or a single buffer. > +* If one buffer, only map it once. > +*/ > ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height, > GL_MAP_READ_BIT, &depthMap, &depthStride); > - ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height, > -GL_MAP_READ_BIT, &stencilMap, &stencilStride); > + if (stencilRb != depthRb) { > + ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height, > + GL_MAP_READ_BIT, &stencilMap, > + &stencilStride); > + } > Probably we want the following here: else stencilMap = depthMap; Vadim > for (j = 0; j < height; j++) { >GLubyte stencilVals[MAX_WIDTH]; > @@ -424,7 +430,9 @@ slow_read_depth_stencil_pixels_separate(struct gl_context > *ctx, > } > > ctx->Driver.UnmapRenderbuffer(ctx, depthRb); > - ctx->Driver.UnmapRenderbuffer(ctx, stencilRb); > + if (stencilRb != depthRb) { > + ctx->Driver.UnmapRenderbuffer(ctx, stencilRb); > + } > } > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] read_rgba_pixels: Don't force clamping if the renderbuffer is already clamped.
From: Michel Dänzer Signed-off-by: Michel Dänzer --- src/mesa/main/readpix.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 8550618..582adc3 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -285,7 +285,8 @@ read_rgba_pixels( struct gl_context *ctx, return; if ((ctx->Color._ClampReadColor == GL_TRUE || type != GL_FLOAT) && - !_mesa_is_integer_format(format)) { + !_mesa_is_integer_format(format) && + _mesa_get_format_datatype(rb->Format) != GL_UNSIGNED_NORMALIZED) { transferOps |= IMAGE_CLAMP_BIT; } -- 1.7.7.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()
On Wed, 2011-11-16 at 20:41 +0400, Vadim Girlin wrote: > On Wed, 2011-11-16 at 07:53 -0700, Brian Paul wrote: > > In slow_read_depth_stencil_pixels_separate() we might have separate > > depth and stencil buffers or a combined buffer. In the later case, > > don't map the buffer twice. This function is used when the depth > > scale/bias pixel transfer values are not the defaults. > > > > Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963 > > --- > > src/mesa/main/readpix.c | 14 +++--- > > 1 files changed, 11 insertions(+), 3 deletions(-) > > > > diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c > > index 8550618..0b41de6 100644 > > --- a/src/mesa/main/readpix.c > > +++ b/src/mesa/main/readpix.c > > @@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct > > gl_context *ctx, > > GLubyte *depthMap, *stencilMap; > > int depthStride, stencilStride, j; > > > > + /* The depth and stencil buffers might be separate, or a single buffer. > > +* If one buffer, only map it once. > > +*/ > > ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height, > >GL_MAP_READ_BIT, &depthMap, &depthStride); > > - ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height, > > - GL_MAP_READ_BIT, &stencilMap, &stencilStride); > > + if (stencilRb != depthRb) { > > + ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height, > > + GL_MAP_READ_BIT, &stencilMap, > > + &stencilStride); > > + } > > > > Probably we want the following here: > >else > stencilMap = depthMap; > > And I also forgot stencilStride. Vadim > Vadim > > > for (j = 0; j < height; j++) { > >GLubyte stencilVals[MAX_WIDTH]; > > @@ -424,7 +430,9 @@ slow_read_depth_stencil_pixels_separate(struct > > gl_context *ctx, > > } > > > > ctx->Driver.UnmapRenderbuffer(ctx, depthRb); > > - ctx->Driver.UnmapRenderbuffer(ctx, stencilRb); > > + if (stencilRb != depthRb) { > > + ctx->Driver.UnmapRenderbuffer(ctx, stencilRb); > > + } > > } > > > > > > > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()
On 11/16/2011 09:54 AM, Vadim Girlin wrote: On Wed, 2011-11-16 at 20:41 +0400, Vadim Girlin wrote: On Wed, 2011-11-16 at 07:53 -0700, Brian Paul wrote: In slow_read_depth_stencil_pixels_separate() we might have separate depth and stencil buffers or a combined buffer. In the later case, don't map the buffer twice. This function is used when the depth scale/bias pixel transfer values are not the defaults. Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963 --- src/mesa/main/readpix.c | 14 +++--- 1 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 8550618..0b41de6 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct gl_context *ctx, GLubyte *depthMap, *stencilMap; int depthStride, stencilStride, j; + /* The depth and stencil buffers might be separate, or a single buffer. +* If one buffer, only map it once. +*/ ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height, GL_MAP_READ_BIT,&depthMap,&depthStride); - ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height, - GL_MAP_READ_BIT,&stencilMap,&stencilStride); + if (stencilRb != depthRb) { + ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height, + GL_MAP_READ_BIT,&stencilMap, +&stencilStride); + } Probably we want the following here: else stencilMap = depthMap; And I also forgot stencilStride. Vadim Thanks for catching that. I'll post a new patch. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: initialize stencilMap, Stride if stencilRb==depthRb
--- src/mesa/main/readpix.c |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 0b41de6..86b8753 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -412,6 +412,10 @@ slow_read_depth_stencil_pixels_separate(struct gl_context *ctx, GL_MAP_READ_BIT, &stencilMap, &stencilStride); } + else { + stencilMap = depthMap; + stencilStride = depthStride; + } for (j = 0; j < height; j++) { GLubyte stencilVals[MAX_WIDTH]; -- 1.7.3.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default
Am Mittwoch, 16. November 2011 15:58 CET, Brian Paul schrieb: > On 11/16/2011 01:38 AM, Theiss, Ingo wrote: > > Dear devs, > > > > I am getting segmentation faults when running piglit r600.tests with latest > > mesa build from git: > > > > Nov 16 09:21:33 spoc kernel: [74538.198983] radeon :05:00.0: > > object_init failed for (1431699456, 0x0006) > > Nov 16 09:21:33 spoc kernel: [74538.198987] [drm:radeon_gem_object_create] > > *ERROR* Failed to allocate GEM object (1431699456, 4, 4096, -12) > > Nov 16 09:21:37 spoc kernel: [74542.266472] fbo-depthstenci[6867]: segfault > > at 0 ip 7fb271b5414a sp 7fffbfdf8fd0 error 4 in > > r600_dri.so[7fb2716c8000+dba000] > > > > --- > > Core was generated by > > `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil -auto readpixels > > defau'. > > Program terminated with signal 11, Segmentation fault. > > #0 u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at > > util/u_resource.c:63 > > 63 ur->vtbl->transfer_unmap(pipe, transfer); > > (gdb) bt > > #0 u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at > > util/u_resource.c:63 > > #1 0x7f4cb36cc0eb in pipe_transfer_unmap (transfer=, > > context=0x2441df0) > > at ../../src/gallium/auxiliary/util/u_inlines.h:398 > > #2 st_UnmapRenderbuffer (ctx=, rb=0x25b7fc0) at > > state_tracker/st_cb_fbo.c:704 > > #3 0x7f4cb365f925 in slow_read_depth_stencil_pixels_separate > > (ctx=0x254b720, x=, y=, width=123, > > height=123, type=36269, packing=0x7fff653d0750, dst=0x7fff653ee138 "", > > dstStride=984) at main/readpix.c:427 > > #4 0x7f4cb36601d6 in read_depth_stencil_pixels > > (packing=0x7fff653d0750, pixels=, type=36269, > > height=, > > width=123, y=0, x=0, ctx=0x254b720) at main/readpix.c:469 > > #5 _mesa_readpixels (ctx=0x254b720, x=0, y=0, width=123, height=123, > > format=, type=36269, packing=0x255afb8, > > pixels=) at main/readpix.c:508 > > #6 0x7f4cb3660bbd in _mesa_ReadnPixelsARB (pixels=0x7fff653d0870, > > type=36269, format=34041, height=123, width=123, y=0, x=0, > > bufSize=) at main/readpix.c:753 > > #7 _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, > > type=36269, pixels=0x7fff653d0870) at main/readpix.c:761 > > #8 0x0042eb4a in read_32f_8 () > > #9 0x0042ec0b in compare () > > #10 0x0042ef73 in test_readpixels () > > #11 0x0042f7aa in piglit_display () > > #12 0x00430241 in display () > > #13 0x7f4cb6f11808 in ?? () from /usr/lib/libglut.so.3 > > #14 0x7f4cb6f15339 in fgEnumWindows () from /usr/lib/libglut.so.3 > > #15 0x7f4cb6f11ca1 in glutMainLoopEvent () from /usr/lib/libglut.so.3 > > #16 0x7f4cb6f12540 in glutMainLoop () from /usr/lib/libglut.so.3 > > #17 0x00430967 in main () > > --- > > > > My glxinfo: > > > > OpenGL vendor string: X.Org > > OpenGL renderer string: Gallium 0.4 on AMD BARTS > > OpenGL version string: 2.1 Mesa 7.12-devel (git-4f677ca) > > OpenGL shading language version string: 1.20 > > > > > > A general question: I am interested in running regular piglit tests on my > > hardware and would like to help by reporting problems/bugs. What´s the > > correct way of doing so? Should I post any problems to mesa-dev or should I > > open a bug. > > > > Thanks and keep up the good work! > > I think that this will be fixed by the patch I just posted (mesa: > don't map depth+stencil buffer twice in glReadPixels()) Can you try it? > > -Brian > > Hi Brian, I have applied the patch but still getting segfaults from the following piglit tests. The initial segfault I have posted seems to be gone. 1. fbo-depthstencil -auto drawpixels GL_DEPTH24_STENCIL8 32F_24_8_REV fbo-depthstencil -auto readpixels GL_DEPTH24_STENCIL8 32F_24_8_REV Here is the backtrace: Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil -auto readpixels GL_DE'. Program terminated with signal 11, Segmentation fault. #0 unpack_ubyte_s_Z24_S8 (n=, dst=, src=) at main/format_unpack.c:1896 1896 dst[i] = src32[i] >> 24; (gdb) bt #0 unpack_ubyte_s_Z24_S8 (n=, dst=, src=) at main/format_unpack.c:1896 #1 _mesa_unpack_ubyte_stencil_row (format=, n=123, src=0x7fff609b3c79, dst=0x7fff60977d30 "") at main/format_unpack.c:1918 #2 0x7fdfa294e8cd in slow_read_depth_stencil_pixels_separate (ctx=0x1963720, x=, y=, width=123, height=123, type=36269, packing=0x7fff6097bdf0, dst=0x7fff6097d9f8 "", dstStride=984) at main/readpix.c:421 #3 0x7fdfa294f1f6 in read_depth_stencil_pixels (packing=0x7fff6097bdf0, pixels=, type=36269, height=, width=123, y=0, x=0, ctx=0x1963720) at main/readpix.c:477 #4 _mesa_readpixels (ctx=0x1963720, x=0, y=0, width=123, height=123, format=, type=36269, packing=0x1972fb8, pixels=) at main/readpix.c:516 #5 0x7fdfa294fbdd in _mesa_ReadnPixelsARB (pixels=0x7fff6097bf10, type=36269, format=34041, height=123, width=123, y=0, x=0, bufSize=) at main/readpix.c
Re: [Mesa-dev] [PATCH v2] r600g: lazy load for AR register
Vadim Girlin [2011-11-13 22:08]: > Emit MOVA* instruction only when AR is used. > > Signed-off-by: Vadim Girlin > --- > > Tested on evergreen: no regressions, fixes some variable-indexing tests. On my rv730 this patch causes broken rendering in the game Heroes of Newerth. I have an apitrace file that reproduces the issue but unfortunately that file is 3.x GB. I've made the results of a piglit run comparing 88a140c (good) 8e366dc (bad) available at http://files.code-monkey.de/piglit.tar.bz2 . Regards, Tilman -- A: Because it messes up the order in which people normally read text. Q: Why is top-posting such a bad thing? A: Top-posting. Q: What is the most annoying thing on usenet and in e-mail? pgp5VzF609PDf.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] r600g: lazy load for AR register
On Wed, 2011-11-16 at 18:57 +0100, Tilman Sauerbeck wrote: > Vadim Girlin [2011-11-13 22:08]: > > Emit MOVA* instruction only when AR is used. > > > > Signed-off-by: Vadim Girlin > > --- > > > > Tested on evergreen: no regressions, fixes some variable-indexing tests. > > On my rv730 this patch causes broken rendering in the game Heroes of > Newerth. I have an apitrace file that reproduces the issue but > unfortunately that file is 3.x GB. > > I've made the results of a piglit run comparing 88a140c (good) 8e366dc (bad) > available at http://files.code-monkey.de/piglit.tar.bz2 . > Please create the shader dump for one of the broken tests, e.g. : R600_DUMP_SHADERS=1 bin/shader_runner tests/spec/glsl-1.10/execution/variable-indexing/fs-temp-array-mat2-col-row-wr.shader_test -auto -fbo Vadim > Regards, > Tilman > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default
On 11/16/2011 10:09 AM, Theiss, Ingo wrote: Am Mittwoch, 16. November 2011 15:58 CET, Brian Paul schrieb: On 11/16/2011 01:38 AM, Theiss, Ingo wrote: Dear devs, I am getting segmentation faults when running piglit r600.tests with latest mesa build from git: Nov 16 09:21:33 spoc kernel: [74538.198983] radeon :05:00.0: object_init failed for (1431699456, 0x0006) Nov 16 09:21:33 spoc kernel: [74538.198987] [drm:radeon_gem_object_create] *ERROR* Failed to allocate GEM object (1431699456, 4, 4096, -12) Nov 16 09:21:37 spoc kernel: [74542.266472] fbo-depthstenci[6867]: segfault at 0 ip 7fb271b5414a sp 7fffbfdf8fd0 error 4 in r600_dri.so[7fb2716c8000+dba000] --- Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil -auto readpixels defau'. Program terminated with signal 11, Segmentation fault. #0 u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63 63 ur->vtbl->transfer_unmap(pipe, transfer); (gdb) bt #0 u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63 #1 0x7f4cb36cc0eb in pipe_transfer_unmap (transfer=, context=0x2441df0) at ../../src/gallium/auxiliary/util/u_inlines.h:398 #2 st_UnmapRenderbuffer (ctx=, rb=0x25b7fc0) at state_tracker/st_cb_fbo.c:704 #3 0x7f4cb365f925 in slow_read_depth_stencil_pixels_separate (ctx=0x254b720, x=, y=, width=123, height=123, type=36269, packing=0x7fff653d0750, dst=0x7fff653ee138 "", dstStride=984) at main/readpix.c:427 #4 0x7f4cb36601d6 in read_depth_stencil_pixels (packing=0x7fff653d0750, pixels=, type=36269, height=, width=123, y=0, x=0, ctx=0x254b720) at main/readpix.c:469 #5 _mesa_readpixels (ctx=0x254b720, x=0, y=0, width=123, height=123, format=, type=36269, packing=0x255afb8, pixels=) at main/readpix.c:508 #6 0x7f4cb3660bbd in _mesa_ReadnPixelsARB (pixels=0x7fff653d0870, type=36269, format=34041, height=123, width=123, y=0, x=0, bufSize=) at main/readpix.c:753 #7 _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, type=36269, pixels=0x7fff653d0870) at main/readpix.c:761 #8 0x0042eb4a in read_32f_8 () #9 0x0042ec0b in compare () #10 0x0042ef73 in test_readpixels () #11 0x0042f7aa in piglit_display () #12 0x00430241 in display () #13 0x7f4cb6f11808 in ?? () from /usr/lib/libglut.so.3 #14 0x7f4cb6f15339 in fgEnumWindows () from /usr/lib/libglut.so.3 #15 0x7f4cb6f11ca1 in glutMainLoopEvent () from /usr/lib/libglut.so.3 #16 0x7f4cb6f12540 in glutMainLoop () from /usr/lib/libglut.so.3 #17 0x00430967 in main () --- My glxinfo: OpenGL vendor string: X.Org OpenGL renderer string: Gallium 0.4 on AMD BARTS OpenGL version string: 2.1 Mesa 7.12-devel (git-4f677ca) OpenGL shading language version string: 1.20 A general question: I am interested in running regular piglit tests on my hardware and would like to help by reporting problems/bugs. What´s the correct way of doing so? Should I post any problems to mesa-dev or should I open a bug. Thanks and keep up the good work! I think that this will be fixed by the patch I just posted (mesa: don't map depth+stencil buffer twice in glReadPixels()) Can you try it? -Brian Hi Brian, I have applied the patch but still getting segfaults from the following piglit tests. The initial segfault I have posted seems to be gone. 1. fbo-depthstencil -auto drawpixels GL_DEPTH24_STENCIL8 32F_24_8_REV fbo-depthstencil -auto readpixels GL_DEPTH24_STENCIL8 32F_24_8_REV Here is the backtrace: Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil -auto readpixels GL_DE'. Program terminated with signal 11, Segmentation fault. #0 unpack_ubyte_s_Z24_S8 (n=, dst=, src=) at main/format_unpack.c:1896 1896 dst[i] = src32[i]>> 24; (gdb) bt #0 unpack_ubyte_s_Z24_S8 (n=, dst=, src=) at main/format_unpack.c:1896 #1 _mesa_unpack_ubyte_stencil_row (format=, n=123, src=0x7fff609b3c79, dst=0x7fff60977d30 "") at main/format_unpack.c:1918 #2 0x7fdfa294e8cd in slow_read_depth_stencil_pixels_separate (ctx=0x1963720, x=, y=, width=123, height=123, type=36269, packing=0x7fff6097bdf0, dst=0x7fff6097d9f8 "", dstStride=984) at main/readpix.c:421 #3 0x7fdfa294f1f6 in read_depth_stencil_pixels (packing=0x7fff6097bdf0, pixels=, type=36269, height=, width=123, y=0, x=0, ctx=0x1963720) at main/readpix.c:477 #4 _mesa_readpixels (ctx=0x1963720, x=0, y=0, width=123, height=123, format=, type=36269, packing=0x1972fb8, pixels=) at main/readpix.c:516 #5 0x7fdfa294fbdd in _mesa_ReadnPixelsARB (pixels=0x7fff6097bf10, type=36269, format=34041, height=123, width=123, y=0, x=0, bufSize=) at main/readpix.c:761 #6 _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, type=36269, pixels=0x7fff6097bf10) at main/readpix.c:769 #7 0x0042eb4a in read_32f_8 () #8 0x0042ec0b in compare () #9 0x0042ef73 in test_read
[Mesa-dev] [PATCH 00/13] i965 vs: Split vec4_visitor into two classes.
In order to implement transform feedback on Gen6, we need to create a geometry shader program that will stream out the transformed vertices. It makes sense to re-use parts of the vec4_visitor infrastructure to do this, because (a) VS and GS handle data very similarly, (b) it allows us to take advantage of the optimization and register allocation code in vec4_visitor, and (c) in the long run when we add support for GLSL geometry shaders, we are going to need to use the vec4_visitor infrastructure to compile them. This patch series gets us ready for re-using parts of vec4_visitor, by breaking it into a base class containing the parts we are likely to re-use, and a derived class containing the parts we aren't. The base class, vec4_generator, is responsible for code generation, register allocation, and optimization; it is independent of GLSL IR (with one trivial exception), and mostly independent of the type of shader being compiled. The derived class, vec4_visitor, is responsible for visiting the GLSL IR and performing vertex-shader-specific actions. This split was already implicitly present in the code; this patch series merely makes it more explicit. The idea is that for now, we can make a class that derives from vec4_generator to do transform feedback, and we won't have to worry too much about interactions with the GLSL IR visiting code in vec4_visitor. Later, when we add support for GLSL geometry shaders, we'll have to do further work on vec4_visitor, but we won't have to touch vec4_generator very much. Patches 01-11 are all preparatory refactoring: they make small adjustments to the functioning of vec4_visitor so that the separation between its IR visiting parts and its code generation parts is a little more strict. Patch 12 splits the classes up, and then patch 13 reformats the class declarations to clarify the relationship between the two classes. The split isn't perfect yet; there are a few places where vec4_generator still makes implicit assumptions that are vertex-shader specific (particularly in register allocation), and there are a few places where vec4_visitor still does things that arguably should be part of code generation (such as setting up scratch registers). But I think this is close enough that we can clean those up with later patches on an as-needed basis. Note: To ease reading the diffs, I didn't make any effort to move code between .cpp files. As a result, vec4_visitor and vec4_generator code are intermingled in brw_vec4_visitor.cpp, brw_vec4_emit.cpp, brw_vec4.cpp, brw_vec4_copy_propagation.cpp, and brw_vec4_reg_allocate.cpp. Once this patch is reviewed and we've settled on how we want the classes to be split up, I'll make a follow-up patch that rearranges the .cpp files to match the changes to the class structure. [PATCH 01/13] i965 vs: Remove dead code from the vec4_visitor class. [PATCH 02/13] i965 vs: Make reg_allocate() return total_grf. [PATCH 03/13] i965 vs: Return last_scratch from move_grf_array_access_to_scratch() [PATCH 04/13] i965 vs: Make first_non_payload_grf an explicit function arguments. [PATCH 05/13] i965 vs: Move brw_set_access_mode() call into generate_code(). [PATCH 06/13] i965 vs: Move the call to reg_allocate() inside of generate_code(). [PATCH 07/13] i965 vs: Streamline the way fail is tracked. [PATCH 08/13] i965 vs: Add get_debug_flag() [PATCH 09/13] i965 vs: Add get_debug_name(). [PATCH 10/13] i965 vs: Make a separate optimize() method. [PATCH 11/13] i965 vs: Make get_assignment_lhs() a method [PATCH 12/13] i965 vs: Separate IR visiting from code generation in vec4_visitor; [PATCH 13/13] i965 vs: Rearrange class declarations. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/13] i965 vs: Remove dead code from the vec4_visitor class.
This patch removes two unused functions from vec4_visitor: implied_mrf_writes() and emit_bool_comparison(). implied_mrf_writes() is part of instruction scheduling, which has not been implemented for vec4_visitor yet. emit_bool_comparison() has never been used. It also removes two declarations that did't have any functions associated with them: reladdr_to_temp() and process_move_condition(). These functions appear to be a copy-paste error from when the vec4_visitor class was first introduced. Since later patches in this series refactor the vec4_visitor class into two classes, it seems sensible to eliminate dead code first, so that the refactoring job will be smaller. --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 37 src/mesa/drivers/dri/i965/brw_vec4.h |7 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 - 3 files changed, 0 insertions(+), 58 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 54ef9b6..3971532 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -45,43 +45,6 @@ vec4_instruction::is_math() opcode == SHADER_OPCODE_INT_REMAINDER || opcode == SHADER_OPCODE_POW); } -/** - * Returns how many MRFs an opcode will write over. - * - * Note that this is not the 0 or 1 implied writes in an actual gen - * instruction -- the generate_* functions generate additional MOVs - * for setup. - */ -int -vec4_visitor::implied_mrf_writes(vec4_instruction *inst) -{ - if (inst->mlen == 0) - return 0; - - switch (inst->opcode) { - case SHADER_OPCODE_RCP: - case SHADER_OPCODE_RSQ: - case SHADER_OPCODE_SQRT: - case SHADER_OPCODE_EXP2: - case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: - return 1; - case SHADER_OPCODE_POW: - return 2; - case VS_OPCODE_URB_WRITE: - return 1; - case VS_OPCODE_PULL_CONSTANT_LOAD: - return 2; - case VS_OPCODE_SCRATCH_READ: - return 2; - case VS_OPCODE_SCRATCH_WRITE: - return 3; - default: - assert(!"not reached"); - return inst->mlen; - } -} bool src_reg::equals(src_reg *r) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 93ccda9..c03bcad 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -338,8 +338,6 @@ public: dst_reg *variable_storage(ir_variable *var); - void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); - src_reg src_reg_for_float(float val); /** @@ -444,8 +442,6 @@ public: vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index); vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index); - int implied_mrf_writes(vec4_instruction *inst); - bool try_rewrite_rhs_to_dst(ir_assignment *ir, dst_reg dst, src_reg src, @@ -456,7 +452,6 @@ public: void visit_instructions(const exec_list *list); void emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate); - void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); void emit_if_gen6(ir_if *ir); void emit_block_move(dst_reg *dst, src_reg *src, @@ -512,8 +507,6 @@ public: bool try_emit_sat(ir_expression *ir); void resolve_ud_negate(src_reg *reg); - bool process_move_condition(ir_rvalue *ir); - void generate_code(); void generate_vs_instruction(vec4_instruction *inst, struct brw_reg dst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 853c3ee..3327eef 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1015,20 +1015,6 @@ vec4_visitor::try_emit_sat(ir_expression *ir) } void -vec4_visitor::emit_bool_comparison(unsigned int op, -dst_reg dst, src_reg src0, src_reg src1) -{ - /* original gen4 does destination conversion before comparison. */ - if (intel->gen < 5) - dst.type = src0.type; - - emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op))); - - dst.type = BRW_REGISTER_TYPE_D; - emit(AND(dst, src_reg(dst), src_reg(0x1))); -} - -void vec4_visitor::visit(ir_expression *ir) { unsigned int operand; -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/13] i965 vs: Make reg_allocate() return total_grf.
Previously, reg_allocate() (and reg_allocate_trivial()) stored the total number of registers allocated (including payload registers) in vec4_visitor::total_grf. Now, it returns this value to vec4_visitor::run(), which stores it in total_grf. This helps pave the way for separating the IR visiting parts of vec4_visitor (which include run()) from the code generation parts (which include reg_allocate()) by making the data flow explicit between them. --- src/mesa/drivers/dri/i965/brw_vec4.h |4 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|2 +- .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 24 ++- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c03bcad..b942284 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -390,8 +390,8 @@ public: int setup_attributes(int payload_reg); int setup_uniforms(int payload_reg); void setup_payload(); - void reg_allocate_trivial(); - void reg_allocate(); + int reg_allocate_trivial(); + int reg_allocate(); void move_grf_array_access_to_scratch(); void move_uniform_array_access_to_pull_constants(); void move_push_constants_to_pull_constants(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 54bbe13..f2a100b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -657,7 +657,7 @@ vec4_visitor::run() return false; setup_payload(); - reg_allocate(); + prog_data->total_grf = reg_allocate(); if (failed) return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 1ace91f..7848656 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -41,7 +41,7 @@ assign(int *reg_hw_locations, reg *reg) } } -void +int vec4_visitor::reg_allocate_trivial() { int hw_reg_mapping[this->virtual_grf_count]; @@ -76,7 +76,7 @@ vec4_visitor::reg_allocate_trivial() next += this->virtual_grf_sizes[i]; } } - prog_data->total_grf = next; + int total_grf = next; foreach_iter(exec_list_iterator, iter, this->instructions) { vec4_instruction *inst = (vec4_instruction *)iter.get(); @@ -87,10 +87,12 @@ vec4_visitor::reg_allocate_trivial() assign(hw_reg_mapping, &inst->src[2]); } - if (prog_data->total_grf > BRW_MAX_GRF) { + if (total_grf > BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - prog_data->total_grf, BRW_MAX_GRF); + total_grf, BRW_MAX_GRF); } + + return total_grf; } static void @@ -139,7 +141,7 @@ brw_alloc_reg_set_for_classes(struct brw_context *brw, ra_set_finalize(brw->vs.regs); } -void +int vec4_visitor::reg_allocate() { int hw_reg_mapping[virtual_grf_count]; @@ -152,8 +154,7 @@ vec4_visitor::reg_allocate() * register access as a result of broken optimization passes. */ if (0) { - reg_allocate_trivial(); - return; + return reg_allocate_trivial(); } calculate_live_intervals(); @@ -205,20 +206,19 @@ vec4_visitor::reg_allocate() if (!ra_allocate_no_spills(g)) { ralloc_free(g); fail("No register spilling support yet\n"); - return; + return 0; } /* Get the chosen virtual registers for each node, and map virtual * regs in the register classes back down to real hardware reg * numbers. */ - prog_data->total_grf = first_assigned_grf; + int total_grf = first_assigned_grf; for (int i = 0; i < virtual_grf_count; i++) { int reg = ra_get_node_reg(g, i); hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg]; - prog_data->total_grf = MAX2(prog_data->total_grf, - hw_reg_mapping[i] + virtual_grf_sizes[i]); + total_grf = MAX2(total_grf, hw_reg_mapping[i] + virtual_grf_sizes[i]); } foreach_list(node, &this->instructions) { @@ -231,6 +231,8 @@ vec4_visitor::reg_allocate() } ralloc_free(g); + + return total_grf; } } /* namespace brw */ -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/13] i965 vs: Return last_scratch from move_grf_array_access_to_scratch()
Previously, vec4_visitor::move_grf_array_access_to_scratch() stored the value of last_scratch in the brw_vs_compile structure directly. Now it returns it to vec4_visitor::run(), which stores it. This helps pave the way for sharing vec4 code between the GS and the VS, by making move_grf_array_access_to_scratch() more independent of the kind of shader it is compiling for. --- src/mesa/drivers/dri/i965/brw_vec4.h |2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|2 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 13 - 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index b942284..9c4495a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -392,7 +392,7 @@ public: void setup_payload(); int reg_allocate_trivial(); int reg_allocate(); - void move_grf_array_access_to_scratch(); + unsigned move_grf_array_access_to_scratch(); void move_uniform_array_access_to_pull_constants(); void move_push_constants_to_pull_constants(); void split_uniform_registers(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index f2a100b..061d487 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -638,7 +638,7 @@ vec4_visitor::run() * that we have reladdr computations available for CSE, since we'll * often do repeated subexpressions for those. */ - move_grf_array_access_to_scratch(); + c->last_scratch = move_grf_array_access_to_scratch(); move_uniform_array_access_to_pull_constants(); pack_uniform_registers(); move_push_constants_to_pull_constants(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3327eef..42214d4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2181,10 +2181,11 @@ vec4_visitor::emit_scratch_write(vec4_instruction *inst, * registers. So, we send all GRF arrays that get variable index * access to scratch space. */ -void +unsigned vec4_visitor::move_grf_array_access_to_scratch() { int scratch_loc[this->virtual_grf_count]; + unsigned last_scratch = 0; for (int i = 0; i < this->virtual_grf_count; i++) { scratch_loc[i] = -1; @@ -2199,8 +2200,8 @@ vec4_visitor::move_grf_array_access_to_scratch() if (inst->dst.file == GRF && inst->dst.reladdr && scratch_loc[inst->dst.reg] == -1) { -scratch_loc[inst->dst.reg] = c->last_scratch; -c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; +scratch_loc[inst->dst.reg] = last_scratch; +last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; } for (int i = 0 ; i < 3; i++) { @@ -2208,8 +2209,8 @@ vec4_visitor::move_grf_array_access_to_scratch() if (src->file == GRF && src->reladdr && scratch_loc[src->reg] == -1) { - scratch_loc[src->reg] = c->last_scratch; - c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; + scratch_loc[src->reg] = last_scratch; + last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; } } } @@ -2252,6 +2253,8 @@ vec4_visitor::move_grf_array_access_to_scratch() inst->src[i].reladdr = NULL; } } + + return last_scratch; } /** -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/13] i965 vs: Make first_non_payload_grf an explicit function argument.
Previously, vec4_visitor::setup_payload() stored the location of the first non-payload GRF in vec4_visitor::first_non_payload_grf, where it was later picked up by the register allocator. Now setup_payload() returns the location to the caller (vec4_visitor::run()), which passes it directly to the register allocator. This helps pave the way for separating the IR visiting parts of vec4_visitor (which include setup_payload()) and the code generation parts (which include the register allocator), by making the data flow between the two parts more explicit. --- src/mesa/drivers/dri/i965/brw_vec4.h |7 +++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|8 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 10 +- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 9c4495a..c546235 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -321,7 +321,6 @@ public: int *virtual_grf_sizes; int virtual_grf_count; int virtual_grf_array_size; - int first_non_payload_grf; int *virtual_grf_def; int *virtual_grf_use; dst_reg userplane[MAX_CLIP_PLANES]; @@ -389,9 +388,9 @@ public: void setup_builtin_uniform_values(ir_variable *ir); int setup_attributes(int payload_reg); int setup_uniforms(int payload_reg); - void setup_payload(); - int reg_allocate_trivial(); - int reg_allocate(); + int setup_payload(); + int reg_allocate_trivial(int first_non_payload_grf); + int reg_allocate(int first_non_payload_grf); unsigned move_grf_array_access_to_scratch(); void move_uniform_array_access_to_pull_constants(); void move_push_constants_to_pull_constants(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 061d487..3219fe4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -127,7 +127,7 @@ vec4_visitor::setup_uniforms(int reg) return reg; } -void +int vec4_visitor::setup_payload(void) { int reg = 0; @@ -142,7 +142,7 @@ vec4_visitor::setup_payload(void) reg = setup_attributes(reg); - this->first_non_payload_grf = reg; + return reg; } struct brw_reg @@ -656,8 +656,8 @@ vec4_visitor::run() if (failed) return false; - setup_payload(); - prog_data->total_grf = reg_allocate(); + int first_non_payload_grf = setup_payload(); + prog_data->total_grf = reg_allocate(first_non_payload_grf); if (failed) return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 7848656..f6db9b4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -42,7 +42,7 @@ assign(int *reg_hw_locations, reg *reg) } int -vec4_visitor::reg_allocate_trivial() +vec4_visitor::reg_allocate_trivial(int first_non_payload_grf) { int hw_reg_mapping[this->virtual_grf_count]; bool virtual_grf_used[this->virtual_grf_count]; @@ -68,7 +68,7 @@ vec4_visitor::reg_allocate_trivial() } } - hw_reg_mapping[0] = this->first_non_payload_grf; + hw_reg_mapping[0] = first_non_payload_grf; next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; for (i = 1; i < this->virtual_grf_count; i++) { if (virtual_grf_used[i]) { @@ -142,10 +142,10 @@ brw_alloc_reg_set_for_classes(struct brw_context *brw, } int -vec4_visitor::reg_allocate() +vec4_visitor::reg_allocate(int first_non_payload_grf) { int hw_reg_mapping[virtual_grf_count]; - int first_assigned_grf = this->first_non_payload_grf; + int first_assigned_grf = first_non_payload_grf; int base_reg_count = BRW_MAX_GRF - first_assigned_grf; int class_sizes[base_reg_count]; int class_count = 0; @@ -154,7 +154,7 @@ vec4_visitor::reg_allocate() * register access as a result of broken optimization passes. */ if (0) { - return reg_allocate_trivial(); + return reg_allocate_trivial(first_non_payload_grf); } calculate_live_intervals(); -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/13] i965 vs: Move brw_set_access_mode() call into generate_code().
Previously, vec4_visitor::run() called brw_set_access_mode() right before calling vec4_visitor::generate_code(). It seems clearer to put the call inside generate_code(), since the purpose of brw_set_access_mode() is to set up the proper state for code generation. This helps pave the way for separating the IR visiting parts of vec4_visitor (which include run()) from the code generation parts (which include generate_code()), by avoiding the need for run() to access the brw_compile structure. --- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 3219fe4..81e7ab7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -662,8 +662,6 @@ vec4_visitor::run() if (failed) return false; - brw_set_access_mode(p, BRW_ALIGN_16); - generate_code(); return !failed; @@ -683,6 +681,7 @@ vec4_visitor::generate_code() int *if_depth_in_loop = rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + brw_set_access_mode(p, BRW_ALIGN_16); if (unlikely(INTEL_DEBUG & DEBUG_VS)) { printf("Native code for vertex shader %d:\n", prog->Name); -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/13] i965 vs: Move the call to reg_allocate() inside of generate_code().
Previously, vec4_visitor::run() was responsible for calling reg_allocate() right before generate_code(). However, since there is no reason for run() to do any further manipulation of the instructions between calling reg_allocate() and generate_code(), it's just as simple to have generate_code() call reg_allocate(). This helps pave the way for separating the IR visiting parts of vec4_visitor 9which include run()) from the code generation parts (which include generate_code() and register_allocate()) by reducing the number of function calls between the two parts. --- src/mesa/drivers/dri/i965/brw_vec4.h|2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 18 ++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c546235..3e30514 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -506,7 +506,7 @@ public: bool try_emit_sat(ir_expression *ir); void resolve_ud_negate(src_reg *reg); - void generate_code(); + int generate_code(int first_non_payload_grf); void generate_vs_instruction(vec4_instruction *inst, struct brw_reg dst, struct brw_reg *src); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 81e7ab7..284cb86 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -657,18 +657,13 @@ vec4_visitor::run() return false; int first_non_payload_grf = setup_payload(); - prog_data->total_grf = reg_allocate(first_non_payload_grf); - - if (failed) - return false; - - generate_code(); + prog_data->total_grf = generate_code(first_non_payload_grf); return !failed; } -void -vec4_visitor::generate_code() +int +vec4_visitor::generate_code(int first_non_payload_grf) { int last_native_inst = 0; const char *last_annotation_string = NULL; @@ -683,6 +678,11 @@ vec4_visitor::generate_code() brw_set_access_mode(p, BRW_ALIGN_16); + int total_grf = reg_allocate(first_non_payload_grf); + + if (failed) + return 0; + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { printf("Native code for vertex shader %d:\n", prog->Name); } @@ -906,6 +906,8 @@ vec4_visitor::generate_code() } } } + + return total_grf; } extern "C" { -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/13] i965 vs: Streamline the way fail is tracked.
Previously, vec4_visitor flagged a failure by setting the "failed" boolean and storing a string in "fail_msg" (which, up until the point of failure, was undefined). This is redundant--it's just as easy to have fail_msg be NULL when there is no failure, and a string pointer when there is a failure. This patch also adds trivial inline functions failed() and get_fail_msg() so that this simplification doesn't make client code any less clear. --- src/mesa/drivers/dri/i965/brw_vec4.h | 13 +++-- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp| 15 +++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |6 ++ 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 3e30514..6fafc97 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -309,7 +309,6 @@ public: exec_list instructions; char *fail_msg; - bool failed; /** * GLSL IR currently being processed, which is associated with our @@ -337,6 +336,16 @@ public: dst_reg *variable_storage(ir_variable *var); + bool failed() const + { + return fail_msg != NULL; + } + + char *get_fail_msg() const + { + return fail_msg; + } + src_reg src_reg_for_float(float val); /** @@ -379,7 +388,7 @@ public: struct hash_table *variable_ht; - bool run(void); + void run(void); void fail(const char *msg, ...); int virtual_grf_alloc(int size); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 284cb86..e5dc763 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -619,7 +619,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, } } -bool +void vec4_visitor::run() { if (c->key.userclip_active && !c->key.uses_clip_distance) @@ -653,13 +653,11 @@ vec4_visitor::run() } while (progress); - if (failed) - return false; + if (failed()) + return; int first_non_payload_grf = setup_payload(); prog_data->total_grf = generate_code(first_non_payload_grf); - - return !failed; } int @@ -680,7 +678,7 @@ vec4_visitor::generate_code(int first_non_payload_grf) int total_grf = reg_allocate(first_non_payload_grf); - if (failed) + if (failed()) return 0; if (unlikely(INTEL_DEBUG & DEBUG_VS)) { @@ -930,9 +928,10 @@ brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) } vec4_visitor v(c, prog, shader); - if (!v.run()) { + v.run(); + if (v.failed()) { prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, v.fail_msg); + ralloc_strcat(&prog->InfoLog, v.get_fail_msg()); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 42214d4..338106c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2375,7 +2375,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->shader = shader; this->mem_ctx = ralloc_context(NULL); - this->failed = false; + this->fail_msg = NULL; this->base_ir = NULL; this->current_annotation = NULL; @@ -2418,11 +2418,9 @@ vec4_visitor::fail(const char *format, ...) va_list va; char *msg; - if (failed) + if (failed()) return; - failed = true; - va_start(va, format); msg = ralloc_vasprintf(mem_ctx, format, va); va_end(va); -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/13] i965 vs: Add get_debug_flag()
To determine whether a debug dump should be performed, vec4_visitor checks the DEBUG_VS bit in INTEL_DEBUG. This patch moves that check into a single common function, get_debug_flag(). This helps pave the way for sharing code between VS and GS code generation, by creating a function that can be made virtual. --- src/mesa/drivers/dri/i965/brw_vec4.h |2 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp| 21 - src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |2 +- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 6fafc97..4e17d15 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -515,6 +515,8 @@ public: bool try_emit_sat(ir_expression *ir); void resolve_ud_negate(src_reg *reg); + bool get_debug_flag() const; + int generate_code(int first_non_payload_grf); void generate_vs_instruction(vec4_instruction *inst, struct brw_reg dst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index e5dc763..0ee6525 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -660,9 +660,20 @@ vec4_visitor::run() prog_data->total_grf = generate_code(first_non_payload_grf); } +/** + * Return a bool indicating whether debugging output should be generated + * for this shader. + */ +bool +vec4_visitor::get_debug_flag() const +{ + return INTEL_DEBUG & DEBUG_VS; +} + int vec4_visitor::generate_code(int first_non_payload_grf) { + bool debug_flag = get_debug_flag(); int last_native_inst = 0; const char *last_annotation_string = NULL; ir_instruction *last_annotation_ir = NULL; @@ -681,7 +692,7 @@ vec4_visitor::generate_code(int first_non_payload_grf) if (failed()) return 0; - if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (unlikely(debug_flag)) { printf("Native code for vertex shader %d:\n", prog->Name); } @@ -689,7 +700,7 @@ vec4_visitor::generate_code(int first_non_payload_grf) vec4_instruction *inst = (vec4_instruction *)node; struct brw_reg src[3], dst; - if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (unlikely(debug_flag)) { if (last_annotation_ir != inst->ir) { last_annotation_ir = inst->ir; if (last_annotation_ir) { @@ -862,7 +873,7 @@ vec4_visitor::generate_code(int first_non_payload_grf) break; } - if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (unlikely(debug_flag)) { for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { if (0) { printf("0x%08x 0x%08x 0x%08x 0x%08x ", @@ -878,7 +889,7 @@ vec4_visitor::generate_code(int first_non_payload_grf) last_native_inst = p->nr_insn; } - if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (unlikely(debug_flag)) { printf("\n"); } @@ -893,7 +904,7 @@ vec4_visitor::generate_code(int first_non_payload_grf) * case you're doing that. */ if (0) { - if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (unlikely(debug_flag)) { for (unsigned int i = 0; i < p->nr_insn; i++) { printf("0x%08x 0x%08x 0x%08x 0x%08x ", ((uint32_t *)&p->store[i])[3], diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 338106c..6262093 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2428,7 +2428,7 @@ vec4_visitor::fail(const char *format, ...) this->fail_msg = msg; - if (INTEL_DEBUG & DEBUG_VS) { + if (get_debug_flag()) { fprintf(stderr, "%s", msg); } } -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/13] i965 vs: Add get_debug_name().
Previously, vec4_visitor contained several explicit references to vertex shading in debug messages. This patch moves all of those references into a single function, get_debug_name(), which returns a human-readable description of the shader being compiled. This helps pave the way for sharing code between VS and GS code generation, by creating a function that can be made virtual. --- src/mesa/drivers/dri/i965/brw_vec4.h |1 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp| 20 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |3 ++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 4e17d15..c0284fc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -516,6 +516,7 @@ public: void resolve_ud_negate(src_reg *reg); bool get_debug_flag() const; + const char *get_debug_name() const; int generate_code(int first_non_payload_grf); void generate_vs_instruction(vec4_instruction *inst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 0ee6525..dceacc7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -611,10 +611,12 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, default: if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { -fail("unsupported opcode in `%s' in VS\n", - brw_opcodes[inst->opcode].name); +fail("unsupported opcode in `%s' in %s\n", + brw_opcodes[inst->opcode].name, + get_debug_name()); } else { -fail("Unsupported opcode %d in VS", inst->opcode); +fail("Unsupported opcode %d in %s", inst->opcode, + get_debug_name()); } } } @@ -670,6 +672,16 @@ vec4_visitor::get_debug_flag() const return INTEL_DEBUG & DEBUG_VS; } +/** + * Return a string describing the program being compiled, for debugging + * purposes. Caller should not free this string. + */ +const char * +vec4_visitor::get_debug_name() const +{ + return ralloc_asprintf(mem_ctx, "vertex shader %d", prog->Name); +} + int vec4_visitor::generate_code(int first_non_payload_grf) { @@ -693,7 +705,7 @@ vec4_visitor::generate_code(int first_non_payload_grf) return 0; if (unlikely(debug_flag)) { - printf("Native code for vertex shader %d:\n", prog->Name); + printf("Native code for %s:\n", get_debug_name()); } foreach_list(node, &this->instructions) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6262093..5025115 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2424,7 +2424,8 @@ vec4_visitor::fail(const char *format, ...) va_start(va, format); msg = ralloc_vasprintf(mem_ctx, format, va); va_end(va); - msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); + msg = ralloc_asprintf(mem_ctx, "compile failed for %s: %s\n", + get_debug_name(), msg); this->fail_msg = msg; -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/13] i965 vs: Make a separate optimize() method.
This patch moves vec4_visitor::run()'s optimization loop to its own method. This helps pave the way for separating the IR visiting parts of vec4_visitor from the code generation parts, by reducing the number of function calls between the two parts. --- src/mesa/drivers/dri/i965/brw_vec4.h|1 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 23 ++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c0284fc..5d2f120 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -411,6 +411,7 @@ public: bool opt_copy_propagation(); bool opt_algebraic(); bool opt_compute_to_mrf(); + void optimize(); vec4_instruction *emit(vec4_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index dceacc7..5724a29 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -622,6 +622,19 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, } void +vec4_visitor::optimize() +{ + bool progress; + do { + progress = false; + progress = dead_code_eliminate() || progress; + progress = opt_copy_propagation() || progress; + progress = opt_algebraic() || progress; + progress = opt_compute_to_mrf() || progress; + } while (progress); +} + +void vec4_visitor::run() { if (c->key.userclip_active && !c->key.uses_clip_distance) @@ -645,15 +658,7 @@ vec4_visitor::run() pack_uniform_registers(); move_push_constants_to_pull_constants(); - bool progress; - do { - progress = false; - progress = dead_code_eliminate() || progress; - progress = opt_copy_propagation() || progress; - progress = opt_algebraic() || progress; - progress = opt_compute_to_mrf() || progress; - } while (progress); - + optimize(); if (failed()) return; -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/13] i965 vs: Make get_assignment_lhs() a method
Previously, get_assignment_lhs() was a static function. But it makes just as much sense to make it a private member functions of vec4_visitor, because it is one of vec4_visitor's implementation details, just like the other functions in brw_vec4_visitor.cpp. This will allow the implementation details of vec4_visitor to be made private. --- src/mesa/drivers/dri/i965/brw_vec4.h |1 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 10 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 5d2f120..226413b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -556,6 +556,7 @@ public: void generate_pull_constant_load(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index); + dst_reg get_assignment_lhs(ir_dereference *ir); }; } /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 5025115..cf71ef4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1460,8 +1460,8 @@ vec4_visitor::visit(ir_dereference_record *ir) * instead of potentially using a temporary like we might with the * ir_dereference handler. */ -static dst_reg -get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) +dst_reg +vec4_visitor::get_assignment_lhs(ir_dereference *ir) { /* The LHS must be a dereference. If the LHS is a variable indexed array * access of a vector, it must be separated into a series conditional moves @@ -1476,8 +1476,8 @@ get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) /* Use the rvalue deref handler for the most part. We'll ignore * swizzles in it and write swizzles using writemask, though. */ - ir->accept(v); - return dst_reg(v->result); + ir->accept(this); + return dst_reg(this->result); } void @@ -1590,7 +1590,7 @@ vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, void vec4_visitor::visit(ir_assignment *ir) { - dst_reg dst = get_assignment_lhs(ir->lhs, this); + dst_reg dst = get_assignment_lhs(ir->lhs); uint32_t predicate = BRW_PREDICATE_NONE; if (!ir->lhs->type->is_scalar() && -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/13] i965 vs: Separate IR visiting from code generation in vec4_visitor.
This patch separates vec4_visitor into a base class, vec4_generator, and a derived class, vec4_visitor. vec4_generator is responsible for code generation, register allocation, and optimization; it is independent of GLSL IR (with one trivial exception), and mostly independent of the type of shader being compiled. vec4_visitor is responsible for visiting the GLSL IR and performing vertex-shader-specific actions. This helps pave the way for sharing code between VS and GS code generation, by creating a new class for the parts of vec4_visitor that are (or should soon be) independent of the type of shader being compiled. It is also important that the base class be as independent of GLSL IR as possible, since our first use of the GS will be to create a GS thread that performs transform feedback on Gen6, and this thread will not be related to any GLSL IR. --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 10 +- src/mesa/drivers/dri/i965/brw_vec4.h | 199 +++- .../drivers/dri/i965/brw_vec4_copy_propagation.cpp |2 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp| 80 - .../drivers/dri/i965/brw_vec4_reg_allocate.cpp |4 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 140 +++--- 6 files changed, 228 insertions(+), 207 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 3971532..8632b9f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -63,7 +63,7 @@ src_reg::equals(src_reg *r) } void -vec4_visitor::calculate_live_intervals() +vec4_generator::calculate_live_intervals() { int *def = ralloc_array(mem_ctx, int, virtual_grf_count); int *use = ralloc_array(mem_ctx, int, virtual_grf_count); @@ -139,7 +139,7 @@ vec4_visitor::calculate_live_intervals() } bool -vec4_visitor::virtual_grf_interferes(int a, int b) +vec4_generator::virtual_grf_interferes(int a, int b) { int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); @@ -165,7 +165,7 @@ vec4_visitor::virtual_grf_interferes(int a, int b) * interfere with other regs. */ bool -vec4_visitor::dead_code_eliminate() +vec4_generator::dead_code_eliminate() { bool progress = false; int pc = 0; @@ -347,7 +347,7 @@ src_reg::is_one() const * instructions involving 0. */ bool -vec4_visitor::opt_algebraic() +vec4_generator::opt_algebraic() { bool progress = false; @@ -491,7 +491,7 @@ vec4_visitor::move_push_constants_to_pull_constants() * the GRF write directly to the MRF instead. */ bool -vec4_visitor::opt_compute_to_mrf() +vec4_generator::opt_compute_to_mrf() { bool progress = false; int next_ip = 0; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 226413b..6eb6e5d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -164,7 +164,7 @@ public: bool is_zero() const; bool is_one() const; - src_reg(class vec4_visitor *v, const struct glsl_type *type); + src_reg(class vec4_generator *v, const struct glsl_type *type); explicit src_reg(dst_reg reg); @@ -218,7 +218,7 @@ public: this->fixed_hw_reg = reg; } - dst_reg(class vec4_visitor *v, const struct glsl_type *type); + dst_reg(class vec4_generator *v, const struct glsl_type *type); explicit dst_reg(src_reg reg); @@ -241,7 +241,7 @@ public: return node; } - vec4_instruction(vec4_visitor *v, enum opcode opcode, + vec4_instruction(vec4_generator *v, enum opcode opcode, dst_reg dst = dst_reg(), src_reg src0 = src_reg(), src_reg src1 = src_reg(), @@ -279,12 +279,11 @@ public: bool is_math(); }; -class vec4_visitor : public ir_visitor +class vec4_generator { public: - vec4_visitor(struct brw_vs_compile *c, - struct gl_shader_program *prog, struct brw_shader *shader); - ~vec4_visitor(); + vec4_generator(struct brw_compile *p); + ~vec4_generator(); dst_reg dst_null_f() { @@ -297,14 +296,9 @@ public: } struct brw_context *brw; - const struct gl_vertex_program *vp; struct intel_context *intel; struct gl_context *ctx; - struct brw_vs_compile *c; - struct brw_vs_prog_data *prog_data; struct brw_compile *p; - struct brw_shader *shader; - struct gl_shader_program *prog; void *mem_ctx; exec_list instructions; @@ -322,7 +316,6 @@ public: int virtual_grf_array_size; int *virtual_grf_def; int *virtual_grf_use; - dst_reg userplane[MAX_CLIP_PLANES]; /** * This is the size to be used for an array with an element per @@ -334,8 +327,6 @@ public: bool live_intervals_valid; - dst_reg *variable_storage(ir_variable *var); - bool failed() const { return fail_msg != NULL; @@ -348,63 +339
[Mesa-dev] [PATCH 13/13] i965 vs: Rearrange class declarations.
This patch rearranges the class declarations for vec4_generator and vec4_visitor, and adds "private" and "protected" modifiers, to clarify how the code is organized and how the two classes relate to each other. There is no functional change. --- src/mesa/drivers/dri/i965/brw_vec4.h | 310 +- 1 files changed, 189 insertions(+), 121 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 6eb6e5d..3118c97 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -285,48 +285,6 @@ public: vec4_generator(struct brw_compile *p); ~vec4_generator(); - dst_reg dst_null_f() - { - return dst_reg(brw_null_reg()); - } - - dst_reg dst_null_d() - { - return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - } - - struct brw_context *brw; - struct intel_context *intel; - struct gl_context *ctx; - struct brw_compile *p; - void *mem_ctx; - exec_list instructions; - - char *fail_msg; - - /** -* GLSL IR currently being processed, which is associated with our -* driver IR instructions for debugging purposes. -*/ - ir_instruction *base_ir; - const char *current_annotation; - - int *virtual_grf_sizes; - int virtual_grf_count; - int virtual_grf_array_size; - int *virtual_grf_def; - int *virtual_grf_use; - - /** -* This is the size to be used for an array with an element per -* reg_offset -*/ - int virtual_grf_reg_count; - /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */ - int *virtual_grf_reg_map; - - bool live_intervals_valid; - bool failed() const { return fail_msg != NULL; @@ -337,36 +295,26 @@ public: return fail_msg; } - src_reg src_reg_for_float(float val); - - void fail(const char *msg, ...); - - int virtual_grf_alloc(int size); - int reg_allocate_trivial(int first_non_payload_grf); - int reg_allocate(int first_non_payload_grf); - unsigned move_grf_array_access_to_scratch(); - void calculate_live_intervals(); - bool dead_code_eliminate(); - bool virtual_grf_interferes(int a, int b); - bool opt_copy_propagation(); - bool opt_algebraic(); - bool opt_compute_to_mrf(); - void optimize(); - - vec4_instruction *emit(vec4_instruction *inst); - - vec4_instruction *emit(enum opcode opcode); - - vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); - - vec4_instruction *emit(enum opcode opcode, dst_reg dst, - src_reg src0, src_reg src1); +protected: + /** +* Return a bool indicating whether debugging output should be generated +* for this shader. +*/ + virtual bool get_debug_flag() const = 0; - vec4_instruction *emit(enum opcode opcode, dst_reg dst, - src_reg src0, src_reg src1, src_reg src2); + /** +* Return a string describing the program being compiled, for debugging +* purposes. Caller should not free this string. +*/ + virtual const char *get_debug_name() const = 0; - vec4_instruction *emit_before(vec4_instruction *inst, -vec4_instruction *new_inst); + /** +* \name Instruction constructors +* +* These functions create new vec4_instruction objects. +* +* @{ +*/ vec4_instruction *MOV(dst_reg dst, src_reg src0); vec4_instruction *NOT(dst_reg dst, src_reg src0); @@ -387,15 +335,32 @@ public: uint32_t condition); vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition); vec4_instruction *IF(uint32_t predicate); - vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index); - vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index); - vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index); - bool try_rewrite_rhs_to_dst(ir_assignment *ir, - dst_reg dst, - src_reg src, - vec4_instruction *pre_rhs_inst, - vec4_instruction *last_rhs_inst); + /** @} */ + + /** +* \name Emitter functions +* +* These functions place new vec4_instruction objects in the generator's +* instruction list. +* +* @{ +*/ + + vec4_instruction *emit(vec4_instruction *inst); + + vec4_instruction *emit(enum opcode opcode); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2); + + vec4_instruction *emit_before(vec4_instruction *inst, +vec4_instruction *new_inst); /** * Emit the correct dot-product instruction for the type of arguments @@ -411,17 +376,9
[Mesa-dev] [PATCH 1/3] mesa: Use uniform interfaces in fixed-function fragment shader code
From: Ian Romanick Poking directly at the backing resources works only by luck. Core Mesa code should only know about the gl_uniform_storage structure. Soon other code that looks at samplers will use the gl_uniform_storage structures instead of the data in the gl_program. Signed-off-by: Ian Romanick Cc: Eric Anholt --- src/mesa/main/ff_fragment_shader.cpp | 29 - 1 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 2ce81fe..1806adc 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -42,6 +42,7 @@ extern "C" { #include "program/programopt.h" #include "texenvprogram.h" } +#include "main/uniforms.h" #include "../glsl/glsl_types.h" #include "../glsl/ir.h" #include "../glsl/glsl_symbol_table.h" @@ -1488,22 +1489,40 @@ create_new_program(struct gl_context *ctx, struct state_key *key) /* Set the sampler uniforms, and relink to get them into the linked * program. */ - struct gl_program *fp; - fp = p.shader_program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + struct gl_shader *const fs = + p.shader_program->_LinkedShaders[MESA_SHADER_FRAGMENT]; + struct gl_program *const fp = fs->Program; + + _mesa_generate_parameters_list_for_uniforms(p.shader_program, fs, + fp->Parameters); + + _mesa_associate_uniform_storage(ctx, p.shader_program, fp->Parameters); for (unsigned int i = 0; i < MAX_TEXTURE_UNITS; i++) { char *name = ralloc_asprintf(p.mem_ctx, "sampler_%d", i); int loc = _mesa_get_uniform_location(ctx, p.shader_program, name); if (loc != -1) { +unsigned base; +unsigned idx; + /* Avoid using _mesa_uniform() because it flags state * updates, so if we're generating this shader_program in a * state update, we end up recursing. Instead, just set the * value, which is picked up at re-link. */ -loc = (loc & 0x) + (loc >> 16); -int sampler = fp->Parameters->ParameterValues[loc][0].f; +_mesa_uniform_split_location_offset(loc, &base, &idx); +assert(idx == 0); + +struct gl_uniform_storage *const storage = + &p.shader_program->UniformStorage[base]; -fp->SamplerUnits[sampler] = i; +/* Update the storage, the SamplerUnits in the shader program, and + * the SamplerUnits in the assembly shader. + */ +storage->storage[idx].i = i; +fp->SamplerUnits[storage->sampler] = i; +p.shader_program->SamplerUnits[storage->sampler] = i; +_mesa_propagate_uniforms_to_driver_storage(storage, 0, 1); } } _mesa_update_shader_textures_used(fp); -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] mesa: Use static buffer for uniform name
From: Ian Romanick Signed-off-by: Ian Romanick Cc: Eric Anholt --- src/mesa/main/ff_fragment_shader.cpp |7 ++- 1 files changed, 6 insertions(+), 1 deletions(-) diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index 1806adc..4808855 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -1499,7 +1499,12 @@ create_new_program(struct gl_context *ctx, struct state_key *key) _mesa_associate_uniform_storage(ctx, p.shader_program, fp->Parameters); for (unsigned int i = 0; i < MAX_TEXTURE_UNITS; i++) { - char *name = ralloc_asprintf(p.mem_ctx, "sampler_%d", i); + /* Enough space for 'sampler_999\0'. + */ + char name[12]; + + snprintf(name, sizeof(name), "sampler_%d", i); + int loc = _mesa_get_uniform_location(ctx, p.shader_program, name); if (loc != -1) { unsigned base; -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] mesa: Track fixed-function fragment shader as a shader
From: Ian Romanick Previously the fixed-function fragment shader was tracked as a gl_program. This means that it shows up in the driver as a Mesa IR program instead of as a GLSL IR program. If a driver doesn't generate Mesa IR from the GLSL IR, that program is empty. If the program is empty there is either no rendering or a GPU hang. Signed-off-by: Ian Romanick Cc: Eric Anholt --- src/mesa/drivers/dri/i965/brw_wm.c|2 +- src/mesa/drivers/dri/i965/brw_wm_state.c |4 ++-- src/mesa/drivers/dri/i965/gen6_wm_state.c |4 +++- src/mesa/drivers/dri/i965/gen7_wm_state.c |4 +++- src/mesa/main/mtypes.h|2 ++ src/mesa/main/shaderapi.c | 29 + src/mesa/main/state.c | 10 ++ 7 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 377b8ba..4eccea3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -520,7 +520,7 @@ brw_upload_wm_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_WM_PROG, &key, sizeof(key), &brw->wm.prog_offset, &brw->wm.prog_data)) { - bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp, + bool success = do_wm_prog(brw, ctx->Shader._CurrentFragmentProgram, fp, &key); (void) success; assert(success); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index e1791c2..dd67795 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -169,9 +169,9 @@ brw_upload_wm_unit(struct brw_context *brw) * If using the fragment shader backend, the program is always * 8-wide. If not, it's always 16. */ - if (ctx->Shader.CurrentFragmentProgram) { + if (ctx->Shader._CurrentFragmentProgram) { struct brw_shader *shader = (struct brw_shader *) - ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]; + ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (shader != NULL && shader->ir != NULL) { wm->wm5.enable_8_pix = 1; diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 271a9ae..5312f21 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -135,7 +135,9 @@ upload_wm_state(struct brw_context *brw) dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; /* Use ALT floating point mode for ARB fragment programs, because they -* require 0^0 == 1. +* require 0^0 == 1. Even though _CurrentFragmentProgram is used for +* rendering, CurrentFragmentProgram is used for this check to +* differentiate between the GLSL and non-GLSL cases. */ if (ctx->Shader.CurrentFragmentProgram == NULL) dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index f38d2f1..3d34c51 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -147,7 +147,9 @@ upload_ps_state(struct brw_context *brw) dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Use ALT floating point mode for ARB fragment programs, because they -* require 0^0 == 1. +* require 0^0 == 1. Even though _CurrentFragmentProgram is used for +* rendering, CurrentFragmentProgram is used for this check to +* differentiate between the GLSL and non-GLSL cases. */ if (intel->ctx.Shader.CurrentFragmentProgram == NULL) dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 285ec07..3abc2b3 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2301,6 +2301,8 @@ struct gl_shader_state struct gl_shader_program *CurrentGeometryProgram; struct gl_shader_program *CurrentFragmentProgram; + struct gl_shader_program *_CurrentFragmentProgram; + /** * Program used by glUniform calls. * diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index c4d01ab..85e3406 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -124,6 +124,8 @@ _mesa_free_shader_state(struct gl_context *ctx) NULL); _mesa_reference_shader_program(ctx, &ctx->Shader.CurrentFragmentProgram, NULL); + _mesa_reference_shader_program(ctx, &ctx->Shader._CurrentFragmentProgram, + NULL); _mesa_reference_shader_program(ctx, &ctx->Shader.ActiveProgram, NULL); } @@ -876,6 +878,33 @@ use_shader_program(struct gl_context *ctx, GLenum type, if (*target != shProg) { FLUSH_VERTICES(ctx, _NEW_P
[Mesa-dev] [PATCH] glsl_to_tgsi: Invalidate and revalidate uniform backing storage
From: Ian Romanick If glUniform1i and friends are going to dump data directly in driver-allocated, the pointers have to be updated when the storage moves. This should fix the regressions seen with commit 7199096. I'm not sure if this is the only place that needs this treatment. I'm a little uncertain about the various functions in st_glsl_to_tgsi that modify the TGSI IR and try to propagate changes about that up to the gl_program. That seems sketchy to me. Signed-off-by: Ian Romanick Cc: Vadim Girlin Cc: Bryan Cain --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++ 1 files changed, 14 insertions(+), 0 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 0bf6766..c8385bb 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4571,6 +4571,13 @@ st_translate_program( t->pointSizeOutIndex = -1; t->prevInstWrotePointSize = GL_FALSE; + for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { + struct gl_uniform_storage *const storage = +&program->shader_program->UniformStorage[i]; + + _mesa_uniform_detach_all_driver_storage(storage); + } + /* * Declare input attributes. */ @@ -4797,6 +4804,13 @@ st_translate_program( t->insn[t->labels[i].branch_target]); } + /* This has to be done last. Any operation the can cause +* prog->ParameterValues to get reallocated (e.g., anything that adds a +* program constant) has to happen before creating this linkage. +*/ + _mesa_associate_uniform_storage(ctx, program->shader_program, + proginfo->Parameters); + out: if (t) { FREE(t->insn); -- 1.7.6.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/13] i965 vs: Split vec4_visitor into two classes.
On 16 November 2011 11:07, Paul Berry wrote: > In order to implement transform feedback on Gen6, we need to create a > geometry shader program that will stream out the transformed vertices. > It makes sense to re-use parts of the vec4_visitor infrastructure to > do this, because (a) VS and GS handle data very similarly, (b) it > allows us to take advantage of the optimization and register > allocation code in vec4_visitor, and (c) in the long run when we add > support for GLSL geometry shaders, we are going to need to use the > vec4_visitor infrastructure to compile them. > > This patch series gets us ready for re-using parts of vec4_visitor, by > breaking it into a base class containing the parts we are likely to > re-use, and a derived class containing the parts we aren't. The base > class, vec4_generator, is responsible for code generation, register > allocation, and optimization; it is independent of GLSL IR (with one > trivial exception), and mostly independent of the type of shader being > compiled. The derived class, vec4_visitor, is responsible for > visiting the GLSL IR and performing vertex-shader-specific actions. > > This split was already implicitly present in the code; this patch > series merely makes it more explicit. > > The idea is that for now, we can make a class that derives from > vec4_generator to do transform feedback, and we won't have to worry > too much about interactions with the GLSL IR visiting code in > vec4_visitor. Later, when we add support for GLSL geometry shaders, > we'll have to do further work on vec4_visitor, but we won't have to > touch vec4_generator very much. > > Patches 01-11 are all preparatory refactoring: they make small > adjustments to the functioning of vec4_visitor so that the separation > between its IR visiting parts and its code generation parts is a > little more strict. Patch 12 splits the classes up, and then patch 13 > reformats the class declarations to clarify the relationship between > the two classes. > > The split isn't perfect yet; there are a few places where > vec4_generator still makes implicit assumptions that are vertex-shader > specific (particularly in register allocation), and there are a few > places where vec4_visitor still does things that arguably should be > part of code generation (such as setting up scratch registers). But I > think this is close enough that we can clean those up with later > patches on an as-needed basis. > > Note: To ease reading the diffs, I didn't make any effort to move code > between .cpp files. As a result, vec4_visitor and vec4_generator code > are intermingled in brw_vec4_visitor.cpp, brw_vec4_emit.cpp, > brw_vec4.cpp, brw_vec4_copy_propagation.cpp, and > brw_vec4_reg_allocate.cpp. Once this patch is reviewed and we've > settled on how we want the classes to be split up, I'll make a > follow-up patch that rearranges the .cpp files to match the changes to > the class structure. > > [PATCH 01/13] i965 vs: Remove dead code from the vec4_visitor class. > [PATCH 02/13] i965 vs: Make reg_allocate() return total_grf. > [PATCH 03/13] i965 vs: Return last_scratch from > move_grf_array_access_to_scratch() > [PATCH 04/13] i965 vs: Make first_non_payload_grf an explicit function > arguments. > [PATCH 05/13] i965 vs: Move brw_set_access_mode() call into > generate_code(). > [PATCH 06/13] i965 vs: Move the call to reg_allocate() inside of > generate_code(). > [PATCH 07/13] i965 vs: Streamline the way fail is tracked. > [PATCH 08/13] i965 vs: Add get_debug_flag() > [PATCH 09/13] i965 vs: Add get_debug_name(). > [PATCH 10/13] i965 vs: Make a separate optimize() method. > [PATCH 11/13] i965 vs: Make get_assignment_lhs() a method > [PATCH 12/13] i965 vs: Separate IR visiting from code generation in > vec4_visitor; > [PATCH 13/13] i965 vs: Rearrange class declarations. > I should also mention: if you'd like to look at these patches in context, you can pull them from my github repo (git:// github.com/stereotype441/mesa.git). The branch is called "vec4-split". If you'd like to see what the final vec4_generator and vec4_visitor classes look like, you can view them here: https://github.com/stereotype441/mesa/blob/vec4-split/src/mesa/drivers/dri/i965/brw_vec4.h ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa-demos: Add blinking-teapot demo.
blinking-teapot is an UBO demo. --- src/glsl/CMakeLists.txt |1 + src/glsl/Makefile.am |2 + src/glsl/blinking-teapot.c| 177 + src/glsl/blinking-teapot.frag | 31 +++ src/glsl/blinking-teapot.vert | 16 5 files changed, 227 insertions(+), 0 deletions(-) create mode 100644 src/glsl/blinking-teapot.c create mode 100644 src/glsl/blinking-teapot.frag create mode 100644 src/glsl/blinking-teapot.vert diff --git a/src/glsl/CMakeLists.txt b/src/glsl/CMakeLists.txt index 11f8e37..aeaf526 100644 --- a/src/glsl/CMakeLists.txt +++ b/src/glsl/CMakeLists.txt @@ -24,6 +24,7 @@ set (targets bitmap brick bump + blinking-teapot convolutions deriv fragcoord diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 5a9a802..d9b6f9b 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -37,6 +37,7 @@ bin_PROGRAMS = \ bitmap \ brick \ bump \ + blinking-teapot \ convolutions \ deriv \ fragcoord \ @@ -77,6 +78,7 @@ bitmap_LDADD = ../util/libutil.la bezier_LDADD = ../util/libutil.la brick_LDADD = ../util/libutil.la bump_LDADD = ../util/libutil.la +blinking_teapot_LDADD = ../util/libutil.la convolutions_LDADD = ../util/libutil.la deriv_LDADD = ../util/libutil.la geom_sprites_LDADD = ../util/libutil.la diff --git a/src/glsl/blinking-teapot.c b/src/glsl/blinking-teapot.c new file mode 100644 index 000..53b385b --- /dev/null +++ b/src/glsl/blinking-teapot.c @@ -0,0 +1,177 @@ +/** + * blinking-teapot demo. It displays a teapot whose color go from blue to pink. + * The color variation is handled by uniform buffer object. + * Sources mostly from http://www.jotschi.de/?p=427 which uses UBO SPEC example + * + * Vincent Lejeune 2011 + */ + +#include +#include +#include +#include +#include "glut_wrap.h" +#include "shaderutil.h" + + +static int mouse_old_x, mouse_old_y; +static int mouse_buttons = 0; +static float rotate_x = 0.0, rotate_y = 0.0; +static float translate_z = -2.0; + +static float delta = 0.01; +static GLfloat wf, hf; + + +static const GLchar *names[] = { "SurfaceColor", "WarmColor", "CoolColor", + "DiffuseWarm", "DiffuseCool" +}; + +static GLuint buffer_id, uniformBlockIndex, uindex, vshad_id, fshad_id, + prog_id; + +static GLsizei uniformBlockSize; +static GLint singleSize; +static GLint offset; + +static const GLfloat colors[] = + { 0.45, 0.45, 1, 1, 0.45, 0.45, 1, 1, 0.75, 0.75, 0.75, 1, + 0.0, 0.0, 1.0, 1, 0.0, 1.0, 0.0, 1, +}; + +static GLfloat teapot_color = 1; + +static void +reshape (int w, int h) +{ + wf = (GLfloat) w; + hf = (GLfloat) h; + glMatrixMode (GL_PROJECTION); + glLoadIdentity (); + gluPerspective (60.0, wf / hf, 0.1, 100.0); +} + +static void +init_opengl (void) +{ + + if (!ShadersSupported ()) +exit (1); + + vshad_id = CompileShaderFile (GL_VERTEX_SHADER, "blinking-teapot.vert"); + fshad_id = CompileShaderFile (GL_FRAGMENT_SHADER, "blinking-teapot.frag"); + prog_id = LinkShaders (vshad_id, fshad_id); + + UseProgram (prog_id); + + reshape (680, 400); + + glGenBuffers (1, &buffer_id); + + glBindBuffer (GL_UNIFORM_BUFFER, buffer_id); + glBufferData (GL_UNIFORM_BUFFER, uniformBlockSize, NULL, GL_DYNAMIC_DRAW); + + glBindBufferBase (GL_UNIFORM_BUFFER, 0, buffer_id); + glUniformBlockBinding (prog_id, uniformBlockIndex, 0); + + glGetUniformIndices (prog_id, 1, &names[2], &uindex); + + glGetActiveUniformsiv (prog_id, 1, &uindex, GL_UNIFORM_OFFSET, &offset); + glGetActiveUniformsiv (prog_id, 1, &uindex, GL_UNIFORM_SIZE, &singleSize); + + glViewport (0, 0, 680, 400); + glBufferData (GL_UNIFORM_BUFFER, 80, colors, GL_DYNAMIC_DRAW); +} + +static void +render (void) +{ + glClearColor (0.0, 0.0, 0.0, 0.0); + glClear (GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); + glUseProgram (prog_id); + glEnable (GL_DEPTH_TEST); + + glMatrixMode (GL_MODELVIEW); + glLoadIdentity (); + glTranslatef (0.0, 0.0, translate_z); + glRotatef (rotate_x, 1.0, 0.0, 0.0); + glRotatef (rotate_y, 0.0, 1.0, 0.0); + glColor3f (1.0, 1.0, 1.0); + + glBindBuffer (GL_UNIFORM_BUFFER, buffer_id); + glBufferSubData (GL_UNIFORM_BUFFER, offset, 4 * singleSize, &teapot_color); + + glFrontFace (GL_CW); + glutSolidTeapot (0.6); + glFrontFace (GL_CCW); + glutSwapBuffers (); + glutPostRedisplay (); + + teapot_color += delta; + + if (teapot_color > 1.0) +{ + delta = -0.01; +} + + if (teapot_color < 0.0) +{ + delta = +0.01; +} + +} + +static void +mouse (int button, int state, int x, int y) +{ + if (state == GLUT_DOWN) +{ + mouse_buttons |= 1 << button; +} + else if (state == GLUT_UP) +{ + mouse_buttons = 0; +} + + mouse_old_x = x; + mouse_old_y = y; + glutPostRedisplay (); +} + +static void +motion (int x, int y) +{ + float dx, dy; + dx = x - mouse_old_x; + dy = y - mouse_old_y; + + if (mouse_buttons & 1) +{ + rotate_x += dy * 0
Re: [Mesa-dev] [PATCH] glsl_to_tgsi: Invalidate and revalidate uniform backing storage
This patch fixes memory corruption, thanks. Also there is another regression with Ligthsmark - incorrect rendering. I've bisected it and came to the same commit again. Most noticeable difference is in the very last scene - it should fade away, but it doesn't. I can make the screenshots of good and bad rendering if you need. Vadim On Wed, 2011-11-16 at 11:13 -0800, Ian Romanick wrote: > From: Ian Romanick > > If glUniform1i and friends are going to dump data directly in > driver-allocated, the pointers have to be updated when the storage > moves. This should fix the regressions seen with commit 7199096. > > I'm not sure if this is the only place that needs this treatment. I'm > a little uncertain about the various functions in st_glsl_to_tgsi that > modify the TGSI IR and try to propagate changes about that up to the > gl_program. That seems sketchy to me. > > Signed-off-by: Ian Romanick > Cc: Vadim Girlin > Cc: Bryan Cain > --- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++ > 1 files changed, 14 insertions(+), 0 deletions(-) > > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 0bf6766..c8385bb 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -4571,6 +4571,13 @@ st_translate_program( > t->pointSizeOutIndex = -1; > t->prevInstWrotePointSize = GL_FALSE; > > + for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { > + struct gl_uniform_storage *const storage = > + &program->shader_program->UniformStorage[i]; > + > + _mesa_uniform_detach_all_driver_storage(storage); > + } > + > /* > * Declare input attributes. > */ > @@ -4797,6 +4804,13 @@ st_translate_program( > t->insn[t->labels[i].branch_target]); > } > > + /* This has to be done last. Any operation the can cause > +* prog->ParameterValues to get reallocated (e.g., anything that adds a > +* program constant) has to happen before creating this linkage. > +*/ > + _mesa_associate_uniform_storage(ctx, program->shader_program, > +proginfo->Parameters); > + > out: > if (t) { >FREE(t->insn); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default
> > See the second patch I posted (mesa: initialize stencilMap, Stride if > stencilRb==depthRb) > > -Brian > > Hi Brian, the second patch did it. No more segfaults. Great. Thanks! Regards, Ingo ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl_to_tgsi: Invalidate and revalidate uniform backing storage
On 11/16/2011 12:38 PM, Vadim Girlin wrote: This patch fixes memory corruption, thanks. Also there is another regression with Ligthsmark - incorrect rendering. I've bisected it and came to the same commit again. Most noticeable difference is in the very last scene - it should fade away, but it doesn't. I can make the screenshots of good and bad rendering if you need. I don't think screenshots will help. If there are errors that bisect to that commit, it likely means that some uniform isn't getting set to the right value. I'll poke around a bit more. Vadim On Wed, 2011-11-16 at 11:13 -0800, Ian Romanick wrote: From: Ian Romanick If glUniform1i and friends are going to dump data directly in driver-allocated, the pointers have to be updated when the storage moves. This should fix the regressions seen with commit 7199096. I'm not sure if this is the only place that needs this treatment. I'm a little uncertain about the various functions in st_glsl_to_tgsi that modify the TGSI IR and try to propagate changes about that up to the gl_program. That seems sketchy to me. Signed-off-by: Ian Romanick Cc: Vadim Girlin Cc: Bryan Cain --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++ 1 files changed, 14 insertions(+), 0 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 0bf6766..c8385bb 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4571,6 +4571,13 @@ st_translate_program( t->pointSizeOutIndex = -1; t->prevInstWrotePointSize = GL_FALSE; + for (i = 0; i< program->shader_program->NumUserUniformStorage; i++) { + struct gl_uniform_storage *const storage = + &program->shader_program->UniformStorage[i]; + + _mesa_uniform_detach_all_driver_storage(storage); + } + /* * Declare input attributes. */ @@ -4797,6 +4804,13 @@ st_translate_program( t->insn[t->labels[i].branch_target]); } + /* This has to be done last. Any operation the can cause +* prog->ParameterValues to get reallocated (e.g., anything that adds a +* program constant) has to happen before creating this linkage. +*/ + _mesa_associate_uniform_storage(ctx, program->shader_program, + proginfo->Parameters); + out: if (t) { FREE(t->insn); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] ir_dereference_variable @ 0x1195e90 specifies undeclared variable
Hi there, running piglit test 'shader_runner tests/spec/glsl-1.10/execution/samplers/in-parameter-struct.shader_test -auto -fbo' the program terminated with signal 6, aborted. glxinfo: OpenGL renderer string: Gallium 0.4 on AMD BARTS OpenGL version string: 2.1 Mesa 7.12-devel (git-010dc29) OpenGL shading language version string: 1.20 Here is the backtrace: Core was generated by `bin/shader_runner tests/spec/glsl-1.10/execution/samplers/in-parameter-struct.s'. Program terminated with signal 6, Aborted. #0 0x7fd947363405 in *__GI_raise (sig=) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64 64 ../nptl/sysdeps/unix/sysv/linux/raise.c: Datei oder Verzeichnis nicht gefunden. in ../nptl/sysdeps/unix/sysv/linux/raise.c (gdb) bt #0 0x7fd947363405 in *__GI_raise (sig=) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64 #1 0x7fd947366680 in *__GI_abort () at abort.c:92 #2 0x7fd9449f1c5c in ir_validate::visit(ir_dereference_variable*) () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #3 0x7fd9449ebba5 in ir_dereference_record::accept(ir_hierarchical_visitor*) () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #4 0x7fd9449eb8ee in ir_texture::accept(ir_hierarchical_visitor*) () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #5 0x7fd9449eb858 in ir_expression::accept(ir_hierarchical_visitor*) () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #6 0x7fd9449ebc5e in ir_assignment::accept(ir_hierarchical_visitor*) () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #7 0x7fd9449eb700 in ir_function_signature::accept(ir_hierarchical_visitor*) () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #8 0x7fd9449eb7b4 in ir_function::accept(ir_hierarchical_visitor*) () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #9 0x7fd9449eb4e0 in visit_list_elements(ir_hierarchical_visitor*, exec_list*, bool) () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #10 0x7fd9449f1eda in validate_ir_tree(exec_list*) () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #11 0x7fd9449c07e2 in st_link_shader () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #12 0x7fd9449a633a in _mesa_glsl_link_shader () from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so #13 0x00434a83 in link_and_use_shaders () #14 0x00437853 in piglit_init () #15 0x004386ec in main () Regards, Ingo ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/6] intel: Detile stencil buffer only if necessary
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 On 11/15/2011 06:16 PM, Eric Anholt wrote: > On Sun, 13 Nov 2011 22:32:15 -0800, Chad Versace > wrote: >> In intel_map_renderbuffer_s8(), detile and copy the stencil buffer into >> the temporary buffer only if the renderbuffer is mapped in read mode. If >> the caller never going to read the bits but just clobber them, then it's >> wasted effort to detile the stencil buffer. >> >> CC: Eric Anholt >> Signed-off-by: Chad Versace > > I don't think this is correct. > > Imagine something that isn't writing all of the pixels, but definitely > isn't reading data before writing. It might sensibly ask for > GL_WRITE_BIT but not GL_READ_BIT, and then all the pixels it doesn't > write get trashed when the whole rectangle is written back out. > > If the mode contained GL_MAP_INVALIDATE_RANGE_BIT (basically, > interpreting the mode argument like GL_ARB_map_buffer_range, which was > my intent if not Brian's), this would be reasonable. You're correct. This patch is nakd. - Chad Versace chad.vers...@linux.intel.com -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/ iQIcBAEBAgAGBQJOxCQDAAoJEAIvNt057x8iMnQQALJW9MK18IHdQ08RX0lxLDkW Tzy5U3zv+j8NxzdQl4aQyA2lXmpNZHzPB6L3AV0Yog+I3ey63A0ADNi94brbu4LS EX/tu+Vb0VsSTC9/aSazDDLQJbLecRo2ZdYueOn/3eKJONmHeCZz8XZrn5wgSE1W Uh4dSR7cORw1vBZBugB5vD55JlRrZtDM45d6s/VRzY2O2ujrBQvr/lNbTZ35HNd7 zIL+67y50RBWMYIiyyzyXHa8lWVwllo1K9CM0LBmebxcX5t6rCn5QyZcT5F0DQAn CyKpKBVDt2mkaOqgnFUiXSTETTOKCla9tzBjNS6OlI65wAmYRZZz0he1rrdl1g6Y kKkI61qWwCOyKMLdRxCWoXYY3QmenIEe4++idtlTauFgKtSZE2v4oAx2X1Z4FvY8 vbcpje3tbt7khMQnFVrVU/nHIyKw6Ko2r+WP32Cr2j044f1VAj+N1zIpCNdPTsd6 ZvJWdAUcPM+vN9fXacmWJAheE2sfzXCJ9o6ffCpRs7LYSv2ZuVlpyZ53a06D5dl/ ya9A9dMbjmFr0MGfELWHKgyRv+Cx/0XxrgvmQNxz6O4TUp6V+xiE6Qiyr5kj90wI bah5zurL3mRVTH0Dp/pQFOxiIIzovCTXGHi35ymRL1/rLWyhgApa2jngVqqeawEi 9Ea+IzVuGlyke/v2kocL =PH5l -END PGP SIGNATURE- ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default
On 11/16/2011 01:42 PM, Theiss, Ingo wrote: See the second patch I posted (mesa: initialize stencilMap, Stride if stencilRb==depthRb) -Brian Hi Brian, the second patch did it. No more segfaults. Great. Thanks! OK, I'll push the patch. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] r600g: lazy load for AR register
Vadim Girlin [2011-11-16 22:28]: > On Wed, 2011-11-16 at 18:57 +0100, Tilman Sauerbeck wrote: > > Vadim Girlin [2011-11-13 22:08]: > > > Emit MOVA* instruction only when AR is used. > > > > > > Signed-off-by: Vadim Girlin > > > --- > > > > > > Tested on evergreen: no regressions, fixes some variable-indexing tests. > > > > On my rv730 this patch causes broken rendering in the game Heroes of > > Newerth. I have an apitrace file that reproduces the issue but > > unfortunately that file is 3.x GB. > > > > I've made the results of a piglit run comparing 88a140c (good) 8e366dc (bad) > > available at http://files.code-monkey.de/piglit.tar.bz2 . > > > > Please create the shader dump for one of the broken tests, e.g. : > > R600_DUMP_SHADERS=1 bin/shader_runner > tests/spec/glsl-1.10/execution/variable-indexing/fs-temp-array-mat2-col-row-wr.shader_test > -auto -fbo Attached. Thanks, Tilman -- A: Because it messes up the order in which people normally read text. Q: Why is top-posting such a bad thing? A: Top-posting. Q: What is the most annoying thing on usenet and in e-mail? fs-temp-array-mat2-col-row-wr.dump.gz Description: application/gunzip pgpwGCOR0KfVQ.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] read_rgba_pixels: Don't force clamping if the renderbuffer is already clamped.
On 11/16/2011 09:44 AM, Michel Dänzer wrote: From: Michel Dänzer Signed-off-by: Michel Dänzer --- src/mesa/main/readpix.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 8550618..582adc3 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -285,7 +285,8 @@ read_rgba_pixels( struct gl_context *ctx, return; if ((ctx->Color._ClampReadColor == GL_TRUE || type != GL_FLOAT)&& - !_mesa_is_integer_format(format)) { + !_mesa_is_integer_format(format)&& + _mesa_get_format_datatype(rb->Format) != GL_UNSIGNED_NORMALIZED) { transferOps |= IMAGE_CLAMP_BIT; } Looks good to me. Reviewed-by: Brian Paul ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] st/mesa: rewrite the primitive restart fallback code
Previously we were mapping/unmapping the index buffer each time we found the restart index in the buffer. This is bad when the restart index is frequently used. Now just map the index buffer once, scan it to produce a list of sub-primitives, unmap the buffer, then draw the sub-primitives. Also, clean up the logic of testing for indexed primitives and calling handle_fallback_primitive_restart(). Don't call it for non-indexed primitives. --- src/mesa/state_tracker/st_draw.c | 224 +- 1 files changed, 124 insertions(+), 100 deletions(-) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index cb518e1..bb7decf 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -648,45 +648,89 @@ check_uniforms(struct gl_context *ctx) } } -/** Helper code for primitive restart fallback */ -#define DO_DRAW(pipe, cur_start, cur_count) \ - do { \ - info.start = cur_start; \ - info.count = cur_count; \ - if (u_trim_pipe_prim(info.mode, &info.count)) { \ - if (transfer) \ -pipe_buffer_unmap(pipe, transfer); \ - pipe->draw_vbo(pipe, &info); \ - if (transfer) { \ -ptr = pipe_buffer_map(pipe, ibuffer->buffer, PIPE_TRANSFER_READ, &transfer); \ -assert(ptr != NULL); \ -ptr = ADD_POINTERS(ptr, ibuffer->offset); \ - } \ - } \ - } while(0) - -/** More helper code for primitive restart fallback */ -#define PRIM_RESTART_LOOP(elements) \ - do { \ - for (i = start; i < end; i++) { \ - if (elements[i] == info.restart_index) { \ -if (cur_count > 0) { \ - /* draw elts up to prev pos */ \ - DO_DRAW(pipe, cur_start, cur_count); \ -} \ -/* begin new prim at next elt */ \ -cur_start = i + 1; \ -cur_count = 0; \ - } \ - else { \ -cur_count++; \ + +struct sub_primitive +{ + unsigned start, count; +}; + + +/** + * Scan the elements array to find restart indexes. Return a list + * of primitive (start,count) pairs to indicate how to draw the sub- + * primitives delineated by the restart index. + */ +static struct sub_primitive * +find_sub_primitives(const void *elements, unsigned element_size, +unsigned start, unsigned end, unsigned restart_index, +unsigned *num_sub_prims) +{ + const unsigned max_prims = end - start; + struct sub_primitive *sub_prims; + unsigned i, cur_start, cur_count, num; + + sub_prims = (struct sub_primitive *) + malloc(max_prims * sizeof(struct sub_primitive)); + + if (!sub_prims) { + *num_sub_prims = 0; + return NULL; + } + + cur_start = start; + cur_count = 0; + num = 0; + +#define SCAN_ELEMENTS(TYPE) \ + for (i = start; i < end; i++) { \ + if (((const TYPE *) elements)[i] == restart_index) { \ + if (cur_count > 0) { \ +assert(num < max_prims); \ +sub_prims[num].start = cur_start; \ +sub_prims[num].count = cur_count; \ +num++; \ } \ + cur_start = i + 1; \ + cur_count = 0; \ } \ - if (cur_count > 0) { \ - DO_DRAW(pipe, cur_start, cur_count); \ + else { \ + cur_count++; \ } \ - } while (0) + } \ + if (cur_count > 0) { \ + assert(num < max_prims); \ + sub_prims[num].start = cur_start; \ + sub_prims[num].count = cur_count; \ + num++; \ + } + switch (element_size) { + case 1: + SCAN_ELEMENTS(ubyte); + break; + case 2: + SCAN_ELEMENTS(ushort); + break; + case 4: + SCAN_ELEMENTS(uint); + break; + default: + assert(0 && "bad index_size in find_sub_primitives()"); + } + +#undef SCAN_ELEMENTS + + *num_sub_prims = num; + + return sub_prims; +} + + +/** + * For gallium drivers that don't support the primitive restart + * feature, handle it here by breaking up the indexed primitive into + * sub-primitives. + */ static void handle_fallback_primitive_restart(struct pipe_context *pipe, const struct _mesa_index_buffer *ib, @@ -698,75 +742,57 @@ handle_fallback_primitive_restart(struct pipe_context *pipe, const unsigned end = start + count; struct pipe_draw_info info = *orig_info; struct pipe_transfer *transfer = NULL; - unsigned instance, i, cur_start, cur_count; - const void *ptr; - - info.primitive_restart = FALSE; + unsigned instance, i; + const void *ptr = NULL; + struct sub_primitive *sub_prims; + unsigned num_sub_prims; - if (!info.indexed) { - /* Splitting the draw arrays call is handled by the VBO module */ - if (u_trim_pipe_prim(info.mode, &info.count)) - pipe->draw_vbo(pipe, &info); + assert(info.indexed); + assert(ibuffer->buffer); + assert(ib); + if (!ibuffer->buffer || !ib) return; - } - /* info.i
Re: [Mesa-dev] [PATCH] mesa-demos: Add blinking-teapot demo.
On 11/16/2011 12:45 PM, vlj wrote: blinking-teapot is an UBO demo. --- src/glsl/CMakeLists.txt |1 + src/glsl/Makefile.am |2 + src/glsl/blinking-teapot.c| 177 + src/glsl/blinking-teapot.frag | 31 +++ src/glsl/blinking-teapot.vert | 16 5 files changed, 227 insertions(+), 0 deletions(-) create mode 100644 src/glsl/blinking-teapot.c create mode 100644 src/glsl/blinking-teapot.frag create mode 100644 src/glsl/blinking-teapot.vert This looks better. Two more suggestions: 1. Call glutExtensionSupported("GL_ARB_uniform_buffer_object") to check that the extension is supported. 2. It would be nice if hitting Esc would exit the demo, like the other demos. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] r600g: don't change the order of writes in merge_inst_group
Merge may produce incorrect order of operations for r600-eg: x: inst1 R0.x, ... ; //from current group ... t: inst0 R0.x, ... ; //from previous group, same destination Result of inst1 will be lost. So compare destinations and don't allow this. Signed-off-by: Vadim Girlin --- cc: Tilman Sauerbeck This should fix the regression with "r600g: lazy load for AR register" for r600/r700. src/gallium/drivers/r600/r600_asm.c | 15 ++- 1 files changed, 14 insertions(+), 1 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 6b1ad65..1ab16f2 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1010,6 +1010,11 @@ static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu result[i] = prev[i]; result[4] = slots[i]; } else if (is_alu_any_unit_inst(bc, prev[i])) { + if (slots[i]->dst.sel == prev[i]->dst.sel && + (slots[i]->dst.write == 1 || slots[i]->is_op3) && + (prev[i]->dst.write == 1 || prev[i]->is_op3)) + return 0; + result[i] = slots[i]; result[4] = prev[i]; } else @@ -1018,8 +1023,16 @@ static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu return 0; } else if(!slots[i]) { continue; - } else + } else { + if (max_slots == 5 && slots[i] && prev[4] && + slots[i]->dst.sel == prev[4]->dst.sel && + slots[i]->dst.chan == prev[4]->dst.chan && + (slots[i]->dst.write == 1 || slots[i]->is_op3) && + (prev[4]->dst.write == 1 || prev[4]->is_op3)) + return 0; + result[i] = slots[i]; + } alu = slots[i]; num_once_inst += is_alu_once_inst(bc, alu); -- 1.7.7.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/10] Make gl_array_object use an array of gl_client_attribs
On 11/11/2011 10:07 AM, Mathias Fröhlich wrote: Hi all, following a patch series to make gl_array_object use a VERT_ATTRIB_* indexed array of gl_client_array structs. Since we have already 33 client arrays in an array object the VERT_BIT_* and for vertex shader inputs bitmaps are extended to 64 bits. Drivers and shader code is updated to use the 64 bits bitmasks for the vertex array inputs. A first set of cleanup patches is included. But more is probably possible past this change. The patchset survives a r600 piglit run on my favourite rv670 development machine with no regressions. I get about 10% shorter draw stage times with OpenSceneGraph based applications. Please review. I did a quick read of these patches and they look pretty good. I'd like to go over them again though. Another set of eyes would be good too. I'll try to finish reviewing by the end of the week. -Brian ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 6/6] intel: Detile stencil buffer only if necessary
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 On 11/15/2011 06:17 PM, Eric Anholt wrote: > On Tue, 15 Nov 2011 08:06:34 -0800, Chad Versace > wrote: >> -BEGIN PGP SIGNED MESSAGE- >> Hash: SHA1 >> >> On 11/14/2011 04:03 PM, Eric Anholt wrote: >>> On Mon, 14 Nov 2011 14:29:51 -0800, Chad Versace >>> wrote: Without 3, there were lots of bugs. But now that you ask, I don't remember exactly why the bug happened. I'm digging into that now. (I wrote the patch at midnight, and I dont' remember much from that hour...) >>> >>> Ah! It all makes sense now. >>> >>> I didn't guess that, because my midnight patches never have such good >>> commit messages :) >> >> Do rb's for patches 5 and 6 come with your revelation? >> >> The patches really do fix bugs, and I'm feeling kinda lazy about splitting >> them up now. > > I thought the conclusion yesterday was that the lacks-explanation third > of patch 5 was likely not required. What happened, or am I > misremembering? Part 3 of patch 5 is required. I did some digging and discovered why. Let's carefully walk through the reason. Suppose that irb->wrapped_depth is installed at fb->_DepthBuffer and that irb->wrapped_stencil is installed at fb->_StencilBuffer; that is, this patch is applied without part 3. When swrast_render_start is called, in general it is impossible to know which buffers its caller will access. So we must map all three of fb->Attachment[BUFFER_DEPTH] fb->Attachment[BUFFER_DEPTH]->wrapped_depth (which is fb->_DepthBuffer) fb->Attachment[BUFFER_DEPTH]->wrapped_stencil (which is fb->_StencilBuffer) When swrast_render_finish() is called, in general it is impossible to know which buffers were accessed. So we must unmap all three of the above buffers. The order in which they are unmapped is signficant, because a scatter/gather operation occurs for intel_unmap_renderbuffer(fb->Attachment[BUFFER_DEPTH]) intel_unmap_renderbuffer(fb->Attachment[BUFFER_DEPTH]->wrapped_stencil) Now consider two cases. Case 1 - -- Suppose that swrast_render_finish's caller wrote to fb->_StencilBuffer and did not access fb->Attachment[BUFFER_DEPTH]. Then we must perform the unmaps in this order: 1. fb->Attachment[BUFFER_DEPTH] 2. fb->Attachment[BUFFER_DEPTH]->wrapped_stencil Why? Unmap 1 scatters the s8 bits of the s8z24 buffer into the stencil buffer Unnmap 2 copies the temporary untiled stencil buffer into the tiled stencil buffer. Since fb->_StencilBuffer now holds the real stencil buffer contents, unmap 2 needs to win. Case 2 - -- Suppose that swrast_render_finish's caller does not access fb->_StencilBuffer and writes to the stencil bits of fb->Attachment[BUFFER_DEPTH]. Then we must perform the unmaps in this order: 1. fb->Attachment[BUFFER_DEPTH]->wrapped_stencil 2. fb->Attachment[BUFFER_DEPTH] Since case 1 and 2 require conflicting orderings, intel_renderbuffer_unmap must be incorrect. The only way to escape the problem is by part 3 of my patch. - Chad Versace chad.vers...@linux.intel.com -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/ iQIcBAEBAgAGBQJOxE03AAoJEAIvNt057x8iDGYP/Rcd3uZRc18Vq+3Q4NK5UmMp mGcGW0XMFxNSKhSGk0yoBgDT+LuhOY8WgAYK/qUbwK5Y3cL+zG8/vGJKb67Qp29w 8qsqrz/ONbEufDr6er+srOIGhM91lg23yW1wRAJkcnpxcoujVm2N4ljWyWigbShf YywwbFbxVIIJq5+wskp4H1ENg/pjyCJGrCPKRsGF2B0+2k+Hnr/Cwnqr3ASI0lYt S4CtfXrEkx1PvH3s8ZxxmXS2UFe6fcbgChEnvUko6Q0xTKzurr3Zam+35HKRWK0k DmmF4KeGCgrOWL0fsdOaHYvtr2eB7S/4ZRZVN53L+mpJN+oRVv/JVDk2OnkWsx1n /pNsPrDxzUt4bxV+qsiHwNLDTdgRneWKCT6NccRtQrRGnboh67L/Ebgw3UvsyLWi xkMU3rJ2LeDVQVampGtzSZZl7KEZ8BjhgGUViGAxa2iG93TMRjHl0KwKHztvNLaT 7lIgmBE/A3jRSq7U8DqGscT7wkKJG7OBCwJZju1ezAqujdESfEYwrOayTLbiM9oz tG4F1gtNIjPwyefdyZbdnKlKJ1d8u90xaBvKcx/Qi6Q3jSDawS1NKYC0QLZnwNwH 23JYsDNCiHAz5IrQYr2yYV/FejadGNVCNhMt8ERCtgvXKw3EiaaafmRt6WhdLzWs Gm6Ncq/l64B0ykLVYWPm =5YD9 -END PGP SIGNATURE- ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] Allow glTexImage2D with a depth component cube map
From: Anuj Phogat Hi, Here is a patch to allow glTexImage2D and glCopyTexImage2D with depth component cubemap. It is listed in mesa work queue with a label "CUBE1". I've tested the patch and output looks visually correct. Please review the fix and let me know if i'm missing something. Thanks Anuj --- src/mesa/main/teximage.c | 10 -- 1 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index acf7187..81f75c8 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1596,7 +1596,7 @@ texture_error_check( struct gl_context *ctx, /* additional checks for depth textures */ if (_mesa_base_tex_format(ctx, internalFormat) == GL_DEPTH_COMPONENT) { - /* Only 1D, 2D, rect and array textures supported, not 3D or cubes */ + /* Only 1D, 2D, rect, array and cube textures are supported, not 3D*/ if (target != GL_TEXTURE_1D && target != GL_PROXY_TEXTURE_1D && target != GL_TEXTURE_2D && @@ -1606,7 +1606,13 @@ texture_error_check( struct gl_context *ctx, target != GL_TEXTURE_2D_ARRAY && target != GL_PROXY_TEXTURE_2D_ARRAY && target != GL_TEXTURE_RECTANGLE_ARB && - target != GL_PROXY_TEXTURE_RECTANGLE_ARB) { + target != GL_PROXY_TEXTURE_RECTANGLE_ARB && + target != GL_TEXTURE_CUBE_MAP_POSITIVE_X && + target != GL_TEXTURE_CUBE_MAP_NEGATIVE_X && + target != GL_TEXTURE_CUBE_MAP_POSITIVE_Y && + target != GL_TEXTURE_CUBE_MAP_NEGATIVE_Y && + target != GL_TEXTURE_CUBE_MAP_POSITIVE_Z && + target != GL_TEXTURE_CUBE_MAP_NEGATIVE_Z) { if (!isProxy) _mesa_error(ctx, GL_INVALID_ENUM, "glTexImage(target/internalFormat)"); -- 1.7.7 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/10] Make gl_array_object use an array of gl_client_attribs
Hi, On Thursday, November 17, 2011 00:47:22 Brian Paul wrote: > I did a quick read of these patches and they look pretty good. I'd > like to go over them again though. Another set of eyes would be good > too. I'll try to finish reviewing by the end of the week. Thanks, I have a busy week too, so I won't get back to that before the weekend also. Greetings Mathias ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl_to_tgsi: Invalidate and revalidate uniform backing storage
It's unfortunately been a while since I've done anything with glsl_to_tgsi. What are the "various functions that modify the TGSI IR and try to propagate changes about that up to the gl_program"? If I can see where it is in the code, I can probably remember if there's a reason it was done that way. Bryan On 11/16/2011 01:13 PM, Ian Romanick wrote: > From: Ian Romanick > > If glUniform1i and friends are going to dump data directly in > driver-allocated, the pointers have to be updated when the storage > moves. This should fix the regressions seen with commit 7199096. > > I'm not sure if this is the only place that needs this treatment. I'm > a little uncertain about the various functions in st_glsl_to_tgsi that > modify the TGSI IR and try to propagate changes about that up to the > gl_program. That seems sketchy to me. > > Signed-off-by: Ian Romanick > Cc: Vadim Girlin > Cc: Bryan Cain > --- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++ > 1 files changed, 14 insertions(+), 0 deletions(-) > > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 0bf6766..c8385bb 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -4571,6 +4571,13 @@ st_translate_program( > t->pointSizeOutIndex = -1; > t->prevInstWrotePointSize = GL_FALSE; > > + for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { > + struct gl_uniform_storage *const storage = > + &program->shader_program->UniformStorage[i]; > + > + _mesa_uniform_detach_all_driver_storage(storage); > + } > + > /* > * Declare input attributes. > */ > @@ -4797,6 +4804,13 @@ st_translate_program( > t->insn[t->labels[i].branch_target]); > } > > + /* This has to be done last. Any operation the can cause > +* prog->ParameterValues to get reallocated (e.g., anything that adds a > +* program constant) has to happen before creating this linkage. > +*/ > + _mesa_associate_uniform_storage(ctx, program->shader_program, > +proginfo->Parameters); > + > out: > if (t) { >FREE(t->insn); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev