date:20111116

Re: [Mesa-dev] [PATCH 01/15] i965: Add a table of the surface format information from the PRM.

2011-11-16 Thread Kenneth Graunke

On 11/15/2011 05:55 PM, Eric Anholt wrote:
> This will be used to drive chosing formats and determining framebuffer
> completeness, instead of the bunch of ad-hoc checks we have had until
> now.
> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  204 
> ++
>  1 files changed, 204 insertions(+), 0 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 0cc6201..f0df730 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -71,6 +71,210 @@ translate_tex_target(GLenum target)
> }
>  }
>  
> +struct surface_format_info {
> +   bool exists;
> +   int sampling;
> +   int filtering;
> +   int shadow_compare;
> +   int chroma_key;
> +   int render_target;
> +   int alpha_blend;
> +   int input_vb;
> +   int streamed_output_vb;
> +   int color_processing;
> +};
> +
> +/* This macro allows us to write the table almost as it appears in the PRM,
> + * while restructuring it to turn it into the C code we want.
> + */
> +#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \
> +   [sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color },
> +
> +#define Y 0
> +#define x 999
> +/**
> + * This is the table of support for surface (texture, renderbuffer, and 
> vertex
> + * buffer, but not depthbuffer) formats across the various hardware 
> generations.
> + *
> + * The table is formatted to match the documentation, except that the docs 
> have
> + * this ridiculous mapping of Y[*+~^#&] for "supported on DevWhatever".  To 
> put
> + * it in our table, here's the mapping:
> + *
> + * Y*: 45
> + * Y+: 45 (g45/gm45)
> + * Y~: 50 (gen5)
> + * Y^: 60 (gen6)
> + * Y#: 70 (gen7)
> + *
> + * See page 88 of the Sandybridge PRM VOL4_Part1 PDF.
> + */
> +const struct surface_format_info surface_formats[] = {
> +/* smpl filt shad CK  RT  AB  VB  SO  color */
> +   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x, 
> BRW_SURFACEFORMAT_R32G32B32A32_FLOAT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, 
> BRW_SURFACEFORMAT_R32G32B32A32_SINT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, 
> BRW_SURFACEFORMAT_R32G32B32A32_UINT)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R32G32B32A32_UNORM)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R32G32B32A32_SNORM)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R64G64_FLOAT)
> +   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, 
> BRW_SURFACEFORMAT_R32G32B32X32_FLOAT)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R32G32B32A32_SSCALED)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R32G32B32A32_USCALED)
> +   SF( Y, 50,  x,  x,  x,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32_FLOAT)
> +   SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32_SINT)
> +   SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32_UINT)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32_UNORM)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32_SNORM)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R32G32B32_SSCALED)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R32G32B32_USCALED)
> +   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x, 60, 
> BRW_SURFACEFORMAT_R16G16B16A16_UNORM)
> +   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R16G16B16A16_SNORM)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R16G16B16A16_SINT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R16G16B16A16_UINT)
> +   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R16G16B16A16_FLOAT)
> +   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32_FLOAT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32_SINT)
> +   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32_UINT)
> +   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, 
> BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS)
> +   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x, 
> BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT)
> +   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L32A32_FLOAT)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32_UNORM)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32_SNORM)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R64_FLOAT)
> +   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, 
> BRW_SURFACEFORMAT_R16G16B16X16_UNORM)
> +   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, 
> BRW_SURFACEFORMAT_R16G16B16X16_FLOAT)
> +   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A32X32_FLOAT)
> +   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L32X32_FLOAT)
> +   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I32X32_FLOAT)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R16G16B16A16_SSCALED)
> +   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, 
> BRW_SURFACEFORMAT_R

[Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default

2011-11-16 Thread Theiss, Ingo

Dear devs,

I am getting segmentation faults when running piglit r600.tests with latest 
mesa build from git:

Nov 16 09:21:33 spoc kernel: [74538.198983] radeon :05:00.0: object_init 
failed for (1431699456, 0x0006)
Nov 16 09:21:33 spoc kernel: [74538.198987] [drm:radeon_gem_object_create] 
*ERROR* Failed to allocate GEM object (1431699456, 4, 4096, -12)
Nov 16 09:21:37 spoc kernel: [74542.266472] fbo-depthstenci[6867]: segfault at 
0 ip 7fb271b5414a sp 7fffbfdf8fd0 error 4 in 
r600_dri.so[7fb2716c8000+dba000]

---
Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil 
-auto readpixels defau'.
Program terminated with signal 11, Segmentation fault.
#0  u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63
63 ur->vtbl->transfer_unmap(pipe, transfer);
(gdb) bt
#0  u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63
#1  0x7f4cb36cc0eb in pipe_transfer_unmap (transfer=, 
context=0x2441df0)
at ../../src/gallium/auxiliary/util/u_inlines.h:398
#2  st_UnmapRenderbuffer (ctx=, rb=0x25b7fc0) at 
state_tracker/st_cb_fbo.c:704
#3  0x7f4cb365f925 in slow_read_depth_stencil_pixels_separate 
(ctx=0x254b720, x=, y=, width=123, 
height=123, type=36269, packing=0x7fff653d0750, dst=0x7fff653ee138 "", 
dstStride=984) at main/readpix.c:427
#4  0x7f4cb36601d6 in read_depth_stencil_pixels (packing=0x7fff653d0750, 
pixels=, type=36269, height=, 
width=123, y=0, x=0, ctx=0x254b720) at main/readpix.c:469
#5  _mesa_readpixels (ctx=0x254b720, x=0, y=0, width=123, height=123, 
format=, type=36269, packing=0x255afb8, 
pixels=) at main/readpix.c:508
#6  0x7f4cb3660bbd in _mesa_ReadnPixelsARB (pixels=0x7fff653d0870, 
type=36269, format=34041, height=123, width=123, y=0, x=0, 
bufSize=) at main/readpix.c:753
#7  _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, 
type=36269, pixels=0x7fff653d0870) at main/readpix.c:761
#8  0x0042eb4a in read_32f_8 ()
#9  0x0042ec0b in compare ()
#10 0x0042ef73 in test_readpixels ()
#11 0x0042f7aa in piglit_display ()
#12 0x00430241 in display ()
#13 0x7f4cb6f11808 in ?? () from /usr/lib/libglut.so.3
#14 0x7f4cb6f15339 in fgEnumWindows () from /usr/lib/libglut.so.3
#15 0x7f4cb6f11ca1 in glutMainLoopEvent () from /usr/lib/libglut.so.3
#16 0x7f4cb6f12540 in glutMainLoop () from /usr/lib/libglut.so.3
#17 0x00430967 in main ()
---

My glxinfo:

OpenGL vendor string: X.Org
OpenGL renderer string: Gallium 0.4 on AMD BARTS
OpenGL version string: 2.1 Mesa 7.12-devel (git-4f677ca)
OpenGL shading language version string: 1.20


A general question: I am interested in running regular piglit tests on my 
hardware and would like to help by reporting problems/bugs. What´s the correct 
way of doing so? Should I post any problems to mesa-dev or should I open a bug.

Thanks and keep up the good work!

Regards,

Ingo
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 07/15] i965: Mark texture formats as supported using the surface formats table.

2011-11-16 Thread Kenneth Graunke

On 11/15/2011 05:55 PM, Eric Anholt wrote:
> This is currently duplicated with intel_context.c's setup of the
> formats table, and sets true for exactly the same set of formats on
> gen6.
> ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |   13 -
>  1 files changed, 12 insertions(+), 1 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index 28a40a2..e29a1dc 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -364,6 +364,7 @@ void
>  brw_init_surface_formats(struct brw_context *brw)
>  {
> struct intel_context *intel = &brw->intel;
> +   struct gl_context *ctx = &intel->ctx;
> int gen;
> gl_format format;
>  
> @@ -373,10 +374,11 @@ brw_init_surface_formats(struct brw_context *brw)
>  
> for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) 
> {
>uint32_t texture, render;
> -  const struct surface_format_info *rinfo;
> +  const struct surface_format_info *rinfo, *tinfo;
>bool is_integer = _mesa_is_format_integer_color(format);
>  
>render = texture = brw_format_for_mesa_format(format);
> +  tinfo = &surface_formats[texture];
>  
>/* The value of BRW_SURFACEFORMAT_R32G32B32A32_FLOAT is 0, so don't 
> skip
> * it.
> @@ -384,6 +386,9 @@ brw_init_surface_formats(struct brw_context *brw)
>if (texture == 0 && format != MESA_FORMAT_RGBA_FLOAT32)
>continue;
>  
> +  if (gen >= tinfo->sampling && (gen >= tinfo->filtering || is_integer))
> +  ctx->TextureFormatSupported[format] = true;
> +
>/* Re-map some render target formats to make them supported when they
> * wouldn't be using their format for texturing.
> */
> @@ -434,6 +439,12 @@ brw_init_surface_formats(struct brw_context *brw)
> brw->format_supported_as_render_target[MESA_FORMAT_X8_Z24] = true;
> brw->format_supported_as_render_target[MESA_FORMAT_S8] = true;
> brw->format_supported_as_render_target[MESA_FORMAT_Z16] = true;
> +
> +   /* We remap depth formats to a supported texturing format in
> +* translate_tex_format().
> +*/
> +   ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = true;
> +   ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true;
>  }
>  
>  bool

I think we can actually support MESA_FORMAT_Z16 as well, no?  Though
enabling that should definitely be a separate patch, in case it breaks
things.  (I'm a little wary about it because the Windows team apparently
had some issues with Z16 in GLBenchmark, but they weren't sure if it was
a hardware or software issue.)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] i965 texture formats rework

2011-11-16 Thread Kenneth Graunke

On 11/15/2011 05:55 PM, Eric Anholt wrote:
> There are a couple of core mesa fixes tucked in this series.

I like this!  Great work.

For the series:
Reviewed-by: Kenneth Graunke 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 07/15] i965: Mark texture formats as supported using the surface formats table.

2011-11-16 Thread Kenneth Graunke

On 11/16/2011 12:52 AM, Kenneth Graunke wrote:
> On 11/15/2011 05:55 PM, Eric Anholt wrote:
>> This is currently duplicated with intel_context.c's setup of the
>> formats table, and sets true for exactly the same set of formats on
>> gen6.
>> ---
>>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |   13 -
>>  1 files changed, 12 insertions(+), 1 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
>> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
>> index 28a40a2..e29a1dc 100644
>> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
>> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
>> @@ -364,6 +364,7 @@ void
>>  brw_init_surface_formats(struct brw_context *brw)
>>  {
>> struct intel_context *intel = &brw->intel;
>> +   struct gl_context *ctx = &intel->ctx;
>> int gen;
>> gl_format format;
>>  
>> @@ -373,10 +374,11 @@ brw_init_surface_formats(struct brw_context *brw)
>>  
>> for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; 
>> format++) {
>>uint32_t texture, render;
>> -  const struct surface_format_info *rinfo;
>> +  const struct surface_format_info *rinfo, *tinfo;
>>bool is_integer = _mesa_is_format_integer_color(format);
>>  
>>render = texture = brw_format_for_mesa_format(format);
>> +  tinfo = &surface_formats[texture];
>>  
>>/* The value of BRW_SURFACEFORMAT_R32G32B32A32_FLOAT is 0, so don't 
>> skip
>> * it.
>> @@ -384,6 +386,9 @@ brw_init_surface_formats(struct brw_context *brw)
>>if (texture == 0 && format != MESA_FORMAT_RGBA_FLOAT32)
>>   continue;
>>  
>> +  if (gen >= tinfo->sampling && (gen >= tinfo->filtering || is_integer))
>> + ctx->TextureFormatSupported[format] = true;
>> +
>>/* Re-map some render target formats to make them supported when they
>> * wouldn't be using their format for texturing.
>> */
>> @@ -434,6 +439,12 @@ brw_init_surface_formats(struct brw_context *brw)
>> brw->format_supported_as_render_target[MESA_FORMAT_X8_Z24] = true;
>> brw->format_supported_as_render_target[MESA_FORMAT_S8] = true;
>> brw->format_supported_as_render_target[MESA_FORMAT_Z16] = true;
>> +
>> +   /* We remap depth formats to a supported texturing format in
>> +* translate_tex_format().
>> +*/
>> +   ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = true;
>> +   ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true;
>>  }
>>  
>>  bool
> 
> I think we can actually support MESA_FORMAT_Z16 as well, no?  Though
> enabling that should definitely be a separate patch, in case it breaks
> things.  (I'm a little wary about it because the Windows team apparently
> had some issues with Z16 in GLBenchmark, but they weren't sure if it was
> a hardware or software issue.)

Nevermind. :)  I somehow missed the other message:

[PATCH 4/4] intel: Support native Z16 depth textures (including as
renderbuffer) on i965.

Which is especially amusing since I reviewed it.  Too much email.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/15] mesa: Add fallback from RGB_FLOAT16 to RGBA_FLOAT16 before RGBA_FLOAT32.

2011-11-16 Thread Jose Fonseca

Neither DX9 or DX10 expose equivalent to GL_RGB16F_ARB:
- http://msdn.microsoft.com/en-us/library/bb172558.aspx (D3D9)
- http://msdn.microsoft.com/en-us/library/bb173059.aspx (D3D10)

So probably no hardware ever will support them.

Even for software rendering it is a very nasty format to support, because it is 
not 32bits aligned (48bits).

Unfortunately it doesn't look like we can simply not support GL_RGB16F_ARB 
though.

- Original Message -
> The i965 driver can't do RGB float16 at all.
> ---
>  src/mesa/main/texformat.c |1 +
>  1 files changed, 1 insertions(+), 0 deletions(-)
> 
> diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
> index aebe38e..ea42ced 100644
> --- a/src/mesa/main/texformat.c
> +++ b/src/mesa/main/texformat.c
> @@ -357,6 +357,7 @@ _mesa_choose_tex_format( struct gl_context *ctx,
> GLint internalFormat,
>   case GL_RGB16F_ARB:
>   RETURN_IF_SUPPORTED(MESA_FORMAT_RGB_FLOAT16);
>   RETURN_IF_SUPPORTED(MESA_FORMAT_RGB_FLOAT32);
> + RETURN_IF_SUPPORTED(MESA_FORMAT_RGBA_FLOAT16);

RGB_FLOAT32 is 96bits where as RGBA_FLOAT16 is 64bits, so I think that 
RGBA_FLOAT16 should be tried before RGB_FLOAT32, as it is better from all POVs


>   RETURN_IF_SUPPORTED(MESA_FORMAT_RGBA_FLOAT32);
>   break;
>   case GL_RGB32F_ARB:

Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] TGSI declarations missing type info

2011-11-16 Thread Jose Fonseca



- Original Message -
> On 11/14/2011 10:13 PM, Jose Fonseca wrote:
> > 
> > 
> > - Original Message -
> >> On 11/14/2011 09:48 PM, Christoph Bumiller wrote:
> >>> On 11/14/2011 09:24 PM, Jose Fonseca wrote:
> 
> 
>  - Original Message -
> > On 11/14/2011 08:23 PM, Jose Fonseca wrote:
> >>
> >>
> >> - Original Message -
> >>> The next open question I have is whether this warrants
> >>> additions
> >>> to
> >>> the tgsi_opcode_info struct, mainly whether the return value
> >>> is
> >>> uint/int/float,
> >>> and whether then src regs are uint/int/float, I'm not 100%
> >>> that
> >>> all
> >>> opcodes always take all int/uint/float srcs, but my memory is
> >>> a
> >>> bit
> >>> hazy on what the outliers might be.
> >>
> >> I agree that adding this sort of info to tgsi_opcode_info
> >> would
> >> be
> >> a code typing saver for all drivers.
> >>
> >> However I never looked at the integer/double opcode set, but
> >> from
> >> quickly skimming d3d10/11 headers, there are three buckets:
> >> - arithmetic: (u)ints -> (u)int
> >> - comparison: (u)ints -> mask (and I don't know what's the
> >> semantic
> >> of these masks: 0/~0, 1.0f/0.0f, 0/1, or what)
> >> - conversion: float <-> (u)int
> >>
> >>
> >> And likewise to double, minus the unsigned vs signed split.
> >>
> >> So, probably we need tgsi_opcode_info to distuinguish the
> >> source
> >> type vs dest type, but we likely don't need per-source types.
> >>
> >>
> >>
> >> A much more intricate question, is how to represent boolean
> >> mask
> >> in
> >> a mixed type world. My guess is one of two:
> >>
> >> a) is that there are two we should not mandate the bit
> >> representation of a boolean (drivers are free to implement
> >> booleans as 1.0f/0.0f, 0/~0, etc). The only constrainsts is
> >> that
> >> the state tracker never tries to write a boolean directly to
> >> an
> >> output.
> >>
> >
> >
> > Booleans can be set by the user (CONST) and thus the state
> > tracker
> > has
> > know what drivers want.
> 
>  I forgot about those.
> 
>  One thing that's common to all representations is that
>  0x
>  is false on all of them. So we could establish that as false.
> 
>  Another approach, make cleaner, would be to disallow CONST
>  booleans: i.e., the state tracker would use floats (0.0f/1.0f)
>  or
>  ints, and then explicitely emit a comparison against the null
>  element to determine the boolean value:
> 
> CONST[0] // boolean as float
> CONST[1] // boolean as integer
> CONST[2] // boolean as double
> IMM[0] = {0, 1.0f}
> 
> // TEMP[0] == (bool)CONST[0]
> EQ TEMP[0], CONST[0], IMM[0].
> 
> // TEMP[1] == (bool)CONST[1]
> IEQ TEMP[1], CONST[1], IMM[0].
> 
> // TEMP[2] == (bool)CONST[2]
> DEQ TEMP[2], CONST[2], IMM[0].
>    
>  And the inverse for outputs.
> 
> // OUT[0] = TEMP[0] ? 1.0f : 0.0f
> SELECT OUT[0], TEMP[0], IMM[1], IMM[0]
> 
>  We need some way to encode integer/double immediates though.
> 
> >> b) if by some miracle, all hardware implements boolean vectors
> >> in
> >> the same way, then we could stick to that.  (For
> >> sse/avx/llvm-2.9,
> >> the natural way to represent a boolean vector is a bitmask
> >> (0/~0).
> >> llvm 3.0 finally added support for bit vectors.)
> >>
> >>
> >
> > Currently, classic mesa uses 1/0, gallium uses 1.0f or ~0
> > depending
> > on
> > whether the driver supports integers.
> >
> > Also, the float SET opcodes return 1.0f/0.0f so there has to be
> > conversion if the driver supports integers, and the integer SET
> > opcodes
> > are supposed to return ~0.
> 
>  This is confusing -- so the semantics of e.g., IF's argument,
>  depends on whether the pipe driver supports integers or not?
> 
> >>>
> >>> No, TGSI_OPCODE_IF only checks for zero/nonzero (thus NaN counts
> >>> as
> >>
> >> I mean to say float -0. Anyway, the example was confusing, I just
> >> meant
> >> we do an u32 comparison for drivers that support integers.
> >>
> >> (This is where TGSI_OPCODE_IF may yield different results for
> >> float-only
> >> cards, but in practice it's never actually used with anything
> >> other
> >> than
> >> the result of a SET operation.)
> > 
> > My other reply was about this particular case.
> > 
> > But I'm not convinced we can ignore this case at all. I'm fine with
> > drivers cutting corners. By I can't accept an interface that's
> > broken by design for all drivers.
> > 
> > The argument from TGSI_OPCODE_IF may very well come from the
> > application/state tr

[Mesa-dev] [PATCH 1/6] pipe_video: Add more mpeg1 fields to pipe_mpeg12_picture_desc

2011-11-16 Thread Maarten Lankhorst

Signed-off-by: Maarten Lankhorst 
---
 src/gallium/include/pipe/p_video_state.h  |3 +++
 src/gallium/state_trackers/vdpau/decode.c |3 +++
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/src/gallium/include/pipe/p_video_state.h 
b/src/gallium/include/pipe/p_video_state.h
index 1940bf1..9a70eb7 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -134,6 +134,9 @@ struct pipe_mpeg12_picture_desc
unsigned concealment_motion_vectors;
unsigned intra_dc_precision;
unsigned f_code[2][2];
+   unsigned top_field_first;
+   unsigned full_pel_forward_vector;
+   unsigned full_pel_backward_vector;
unsigned num_slices;
 };
 
diff --git a/src/gallium/state_trackers/vdpau/decode.c 
b/src/gallium/state_trackers/vdpau/decode.c
index f135129..47212e3 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -245,6 +245,9 @@ vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
picture.f_code[1][0] = picture_info->f_code[1][0] - 1;
picture.f_code[1][1] = picture_info->f_code[1][1] - 1;
picture.num_slices = picture_info->slice_count;
+   picture.top_field_first = picture_info->top_field_first;
+   picture.full_pel_forward_vector = picture_info->full_pel_forward_vector;
+   picture.full_pel_backward_vector = picture_info->full_pel_backward_vector;
 
decoder->set_picture_parameters(decoder, &picture.base);
 
-- 
1.7.7.1


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/6] g3dvl: Remove get_sampler_view_planes, and replace with get/put bits

2011-11-16 Thread Maarten Lankhorst

Doesn't break nouveau_video, apparently a check already causes it to use
vl_video_buffer for vdpau.

Signed-off-by: Maarten Lankhorst 
---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c |   15 +++--
 src/gallium/auxiliary/vl/vl_video_buffer.c   |   92 +-
 src/gallium/auxiliary/vl/vl_video_buffer.h   |2 +
 src/gallium/drivers/nouveau/nouveau_video.c  |   37 --
 src/gallium/drivers/nouveau/nouveau_video.h  |1 -
 src/gallium/include/pipe/p_video_decoder.h   |   17 --
 src/gallium/state_trackers/vdpau/surface.c   |   55 +---
 7 files changed, 131 insertions(+), 88 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c 
b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 2442d78..32501e0 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -160,16 +160,18 @@ static bool
 init_idct_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer 
*buffer)
 {
struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
+   struct vl_video_buffer *mc_source = (struct vl_video_buffer*)dec->mc_source;
+   struct vl_video_buffer *idct_source = (struct 
vl_video_buffer*)dec->idct_source;
 
unsigned i;
 
-   assert(dec && buffer);
+   assert(buffer);
 
-   idct_source_sv = 
dec->idct_source->get_sampler_view_planes(dec->idct_source);
+   idct_source_sv = idct_source->get_sampler_view_planes(&idct_source->base);
if (!idct_source_sv)
   goto error_source_sv;
 
-   mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
+   mc_source_sv = mc_source->get_sampler_view_planes(&mc_source->base);
if (!mc_source_sv)
   goto error_mc_source_sv;
 
@@ -553,7 +555,8 @@ vl_mpeg12_set_reference_frames(struct pipe_video_decoder 
*decoder,
assert(num_ref_frames <= VL_MAX_REF_FRAMES);
 
for (i = 0; i < num_ref_frames; ++i) {
-  sv = ref_frames[i]->get_sampler_view_planes(ref_frames[i]);
+  struct vl_video_buffer *ref = (struct vl_video_buffer *)ref_frames[i];
+  sv = ref->get_sampler_view_planes(&ref->base);
   for (j = 0; j < VL_MAX_PLANES; ++j)
  pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]);
}
@@ -713,6 +716,7 @@ static void
 vl_mpeg12_end_frame(struct pipe_video_decoder *decoder)
 {
struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct vl_video_buffer *mc_source;
struct pipe_sampler_view **mc_source_sv;
struct pipe_vertex_buffer vb[3];
struct vl_mpeg12_buffer *buf;
@@ -761,7 +765,8 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder)
  vl_idct_flush(&buf->idct[i], buf->num_ycbcr_blocks[i]);
}
 
-   mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
+   mc_source = (struct vl_video_buffer *)dec->mc_source;
+   mc_source_sv = mc_source->get_sampler_view_planes(&mc_source->base);
for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) {
   if (!dec->target_surfaces[i]) continue;
 
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c 
b/src/gallium/auxiliary/vl/vl_video_buffer.c
index 6d714d4..b982477 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -35,6 +35,7 @@
 #include "util/u_inlines.h"
 #include "util/u_sampler.h"
 #include "util/u_memory.h"
+#include "util/u_rect.h"
 
 #include "vl_video_buffer.h"
 
@@ -151,6 +152,93 @@ error:
return NULL;
 }
 
+static int
+vl_video_buffer_reformat(struct vl_video_buffer *buf, enum pipe_format format)
+{
+   struct pipe_resource templ, *newbuf, *newbuf2 = NULL;
+   struct pipe_screen *screen = buf->base.context->screen;
+   unsigned i;
+   templ = *buf->resources[1];
+   if (format == PIPE_FORMAT_NV12)
+  templ.format = PIPE_FORMAT_R8G8_UNORM;
+   else if (format == PIPE_FORMAT_YV12)
+  templ.format = PIPE_FORMAT_R8_UNORM;
+   else
+  assert(0);
+   newbuf = screen->resource_create(screen, &templ);
+   if (!newbuf)
+  return -1;
+   if (templ.format == PIPE_FORMAT_R8_UNORM) {
+  newbuf2 = screen->resource_create(screen, &templ);
+  if (!newbuf2) {
+ pipe_resource_reference(&newbuf, NULL);
+ return -1;
+  }
+  buf->num_planes = 3;
+   }
+   else
+  buf->num_planes = 2;
+   buf->base.buffer_format = format;
+   pipe_resource_reference(&buf->resources[1], newbuf);
+   pipe_resource_reference(&buf->resources[2], newbuf2);
+   for (i = 1; i < VL_MAX_PLANES; ++i) {
+  pipe_surface_reference(&buf->surfaces[i], NULL);
+  pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL);
+  pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL);
+   }
+   return 0;
+}
+
+static int
+vl_video_buffer_put_bits(struct pipe_video_buffer *buffer,
+ enum pipe_format format,
+ void const *const *source_data,
+ uint32_t const *source_pitches)
+{
+   struct vl_video_buffer *buf = (struct vl_video_buffer *)buffe

[Mesa-dev] [PATCH 3/6] pipe_video: Get rid of a few pipe_video_decoder members

2011-11-16 Thread Maarten Lankhorst

You won't be missed, functionality folded into picture_desc and
decode..

Signed-off-by: Maarten Lankhorst 
---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c |4 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |   68 ---
 src/gallium/drivers/nouveau/nouveau_video.c|   26 ++--
 src/gallium/include/pipe/p_video_decoder.h |   21 +-
 src/gallium/include/pipe/p_video_state.h   |   15 +---
 src/gallium/state_trackers/vdpau/decode.c  |   86 ++-
 src/gallium/state_trackers/xorg/xvmc/surface.c |   19 ++
 7 files changed, 85 insertions(+), 154 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c 
b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 936cf2c..b586a00 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -823,7 +823,7 @@ decode_slice(struct vl_mpg12_bs *bs)
   inc += vl_vlc_get_vlclbf(&bs->vlc, tbl_B1, 11);
   if (x != -1) {
  mb.num_skipped_macroblocks = inc - 1;
- bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1);
+ bs->decoder->decode_macroblock(bs->decoder, NULL, &mb.base, 1);
   }
   mb.x = x += inc;
 
@@ -927,7 +927,7 @@ decode_slice(struct vl_mpg12_bs *bs)
} while (vl_vlc_bits_left(&bs->vlc) && vl_vlc_peekbits(&bs->vlc, 23));
 
mb.num_skipped_macroblocks = 0;
-   bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1);
+   bs->decoder->decode_macroblock(bs->decoder, NULL, &mb.base, 1);
return true;
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c 
b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 32501e0..18ba12d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -507,24 +507,38 @@ vl_mpeg12_set_picture_parameters(struct 
pipe_video_decoder *decoder,
 {
struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc 
*)picture;
+   struct pipe_sampler_view **sv;
+   struct vl_video_buffer *ref;
+   int i = 0, j;
 
assert(dec && pic);
 
dec->picture_desc = *pic;
-}
+   assert(2 <= VL_MAX_REF_FRAMES);
 
-static void
-vl_mpeg12_set_quant_matrix(struct pipe_video_decoder *decoder,
-   const struct pipe_quant_matrix *matrix)
-{
-   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
-   const struct pipe_mpeg12_quant_matrix *m = (const struct 
pipe_mpeg12_quant_matrix *)matrix;
+   ref = (struct vl_video_buffer *)pic->ref_forward;
+   if (ref) {
+  sv = ref->get_sampler_view_planes(&ref->base);
+  for (j = 0; j < VL_MAX_PLANES; ++j)
+ pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]);
+  i++;
+   }
+   ref = (struct vl_video_buffer *)pic->ref_backward;
+   if (ref) {
+  sv = ref->get_sampler_view_planes(&ref->base);
+  for (j = 0; j < VL_MAX_PLANES; ++j)
+ pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]);
+  i++;
+   }
 
-   assert(dec);
-   assert(matrix->codec == PIPE_VIDEO_CODEC_MPEG12);
+   for (; i < VL_MAX_REF_FRAMES; ++i)
+  for (j = 0; j < VL_MAX_PLANES; ++j)
+ pipe_sampler_view_reference(&dec->ref_frames[i][j], NULL);
 
-   memcpy(dec->intra_matrix, m->intra_matrix, 64);
-   memcpy(dec->non_intra_matrix, m->non_intra_matrix, 64);
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+  memcpy(dec->intra_matrix, pic->intra_matrix, 64);
+  memcpy(dec->non_intra_matrix, pic->non_intra_matrix, 64);
+   }
 }
 
 static void
@@ -543,30 +557,6 @@ vl_mpeg12_set_decode_target(struct pipe_video_decoder 
*decoder,
 }
 
 static void
-vl_mpeg12_set_reference_frames(struct pipe_video_decoder *decoder,
-   struct pipe_video_buffer **ref_frames,
-   unsigned num_ref_frames)
-{
-   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
-   struct pipe_sampler_view **sv;
-   unsigned i,j;
-
-   assert(dec);
-   assert(num_ref_frames <= VL_MAX_REF_FRAMES);
-
-   for (i = 0; i < num_ref_frames; ++i) {
-  struct vl_video_buffer *ref = (struct vl_video_buffer *)ref_frames[i];
-  sv = ref->get_sampler_view_planes(&ref->base);
-  for (j = 0; j < VL_MAX_PLANES; ++j)
- pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]);
-   }
-
-   for (; i < VL_MAX_REF_FRAMES; ++i)
-  for (j = 0; j < VL_MAX_PLANES; ++j)
- pipe_sampler_view_reference(&dec->ref_frames[i][j], NULL);
-}
-
-static void
 vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
 {
struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
@@ -626,6 +616,7 @@ vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
 
 static void
 vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder,
+struct pipe_video_buffer *target,
 const struct pipe_macroblock *macroblock

[Mesa-dev] [PATCH 4/6] xorg/xvmc: Remove recursion from RecursiveEndFrame

2011-11-16 Thread Maarten Lankhorst

There should be no way for endframe to ever need recursion..

Signed-off-by: Maarten Lankhorst 
---
 src/gallium/state_trackers/xorg/xvmc/surface.c |   23 +--
 1 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c 
b/src/gallium/state_trackers/xorg/xvmc/surface.c
index ffdac3d..3025f0e 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -122,7 +122,7 @@ SetDecoderStatus(XvMCSurfacePrivate *surface)
 }
 
 static void
-RecursiveEndFrame(XvMCSurfacePrivate *surface)
+EndFrame(XvMCSurfacePrivate *surface)
 {
XvMCContextPrivate *context_priv;
unsigned i;
@@ -134,12 +134,11 @@ RecursiveEndFrame(XvMCSurfacePrivate *surface)
for ( i = 0; i < 2; ++i ) {
   if (surface->ref[i]) {
  XvMCSurface *ref = surface->ref[i];
+ XvMCSurfacePrivate *refpriv;
 
- assert(ref);
-
- surface->ref[i] = NULL;
- RecursiveEndFrame(ref->privData);
- surface->ref[i] = ref;
+ assert(ref && ref->privData);
+ refpriv = ref->privData;
+ assert(!refpriv->picture_structure); // There's just no sane way this 
can happen..
   }
}
 
@@ -260,10 +259,10 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext 
*context, unsigned int pictur
 
// call end frame on all referenced frames
if (past_surface)
-  RecursiveEndFrame(past_surface->privData);
+  EndFrame(past_surface->privData);
 
if (future_surface)
-  RecursiveEndFrame(future_surface->privData);
+  EndFrame(future_surface->privData);
 
xvmc_mb = macroblocks->macro_blocks + first_macroblock;
 
@@ -275,7 +274,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext 
*context, unsigned int pictur
(xvmc_mb->x == 0 && xvmc_mb->y == 0))) {
 
   // If they change anyway we must assume that the current frame is ended
-  RecursiveEndFrame(target_surface_priv);
+  EndFrame(target_surface_priv);
}
 
target_surface_priv->ref[0] = past_surface;
@@ -390,7 +389,11 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, 
Drawable drawable,
assert(desty + desth - 1 < drawable_surface->height);
 */
 
-   RecursiveEndFrame(surface_priv);
+   if (surface_priv->ref[0])
+  EndFrame(surface_priv->ref[0]->privData);
+   if (surface_priv->ref[1])
+  EndFrame(surface_priv->ref[1]->privData);
+   EndFrame(surface_priv);
 
context_priv->decoder->flush(context_priv->decoder);
 
-- 
1.7.7.1


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/6] pipe_video: Get rid of unnecessary state tracking

2011-11-16 Thread Maarten Lankhorst

This requires some special care, vl_mpeg12_decoder was dumping some state
in the decode_buffer, and we can no longer control the order in which
decode_buffer/decoder is freed. As such, vl_idct cannot be used in cleanup.
Fortunately it's not needed. Also, flush will have to be called before
destroying a surface in XvMC, otherwise there exists the possibility
that the decoder still held a reference to it.

Signed-off-by: Maarten Lankhorst 

---
begin_frame was moved, end_frame was renamed to flush for vl_mpeg12_decoder 
(with some checks added)

 src/gallium/auxiliary/vl/vl_idct.c |   14 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |  367 ++--
 src/gallium/auxiliary/vl/vl_video_buffer.c |4 +
 src/gallium/auxiliary/vl/vl_video_buffer.h |2 +
 src/gallium/drivers/nouveau/nouveau_video.c|   12 -
 src/gallium/include/pipe/p_video_decoder.h |   25 --
 src/gallium/state_trackers/vdpau/decode.c  |   40 +--
 src/gallium/state_trackers/xorg/xvmc/surface.c |   65 +
 .../state_trackers/xorg/xvmc/xvmc_private.h|1 -
 9 files changed, 193 insertions(+), 337 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c 
b/src/gallium/auxiliary/vl/vl_idct.c
index a2b3537..8394542 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -614,9 +614,9 @@ init_source(struct vl_idct *idct, struct vl_idct_buffer 
*buffer)
 }
 
 static void
-cleanup_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+cleanup_source(struct vl_idct_buffer *buffer)
 {
-   assert(idct && buffer);
+   assert(buffer);
 
pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
 
@@ -665,13 +665,13 @@ error_surfaces:
 }
 
 static void
-cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+cleanup_intermediate(struct vl_idct_buffer *buffer)
 {
unsigned i;
 
-   assert(idct && buffer);
+   assert(buffer);
 
-   for(i = 0; i < idct->nr_of_render_targets; ++i)
+   for(i = 0; i < buffer->fb_state.nr_cbufs; ++i)
   pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
 
pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, 
NULL);
@@ -823,8 +823,8 @@ vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)
 {
assert(buffer);
 
-   cleanup_source(buffer->idct, buffer);
-   cleanup_intermediate(buffer->idct, buffer);
+   cleanup_source(buffer);
+   cleanup_intermediate(buffer);
 
pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, 
NULL);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c 
b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 18ba12d..4eaad16 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -389,7 +389,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 {
struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
 
-   assert(decoder);
+   assert(dec);
 
/* Asserted in softpipe_delete_fs_state() for some reason */
dec->base.context->bind_vs_state(dec->base.context, NULL);
@@ -424,13 +424,164 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
FREE(dec);
 }
 
-static void *
-vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
+static void
+vl_mpeg12_begin_frame(struct vl_mpeg12_decoder *dec)
+{
+   struct vl_mpeg12_buffer *buf;
+
+   struct pipe_resource *tex;
+   struct pipe_box rect = { 0, 0, 0, 1, 1, 1 };
+
+   unsigned i;
+
+   assert(dec);
+
+   buf = dec->current_buffer;
+
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
+  dec->intra_matrix[0] = 1 << (7 - dec->picture_desc.intra_dc_precision);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+  vl_zscan_upload_quant(&buf->zscan[i], dec->intra_matrix, true);
+  vl_zscan_upload_quant(&buf->zscan[i], dec->non_intra_matrix, false);
+   }
+
+   vl_vb_map(&buf->vertex_stream, dec->base.context);
+
+   tex = buf->zscan_source->texture;
+   rect.width = tex->width0;
+   rect.height = tex->height0;
+
+   buf->tex_transfer = dec->base.context->get_transfer
+   (
+  dec->base.context, tex,
+  0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+  &rect
+   );
+
+   buf->block_num = 0;
+   buf->texels = dec->base.context->transfer_map(dec->base.context, 
buf->tex_transfer);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+  buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
+  buf->num_ycbcr_blocks[i] = 0;
+   }
+
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
+  buf->mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
+
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+  vl_mpg12_bs_set_picture_desc(&buf->bs, &dec->picture_desc);
+
+   } else {
+
+  for (i = 0; i < VL_MAX_PLANES; ++i)
+ vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
+   }
+}
+
+static void

[Mesa-dev] [PATCH 6/6] pipe_video: Remove set_picture_parameters

2011-11-16 Thread Maarten Lankhorst

Signed-off-by: Maarten Lankhorst 

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c |4 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |   12 +-
 src/gallium/drivers/nouveau/nouveau_video.c|   25 +--
 src/gallium/include/pipe/p_video_decoder.h |9 +-
 src/gallium/state_trackers/vdpau/decode.c  |  207 
 src/gallium/state_trackers/xorg/xvmc/surface.c |   38 +
 6 files changed, 129 insertions(+), 166 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c 
b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index b586a00..5e44ede 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -823,7 +823,7 @@ decode_slice(struct vl_mpg12_bs *bs)
   inc += vl_vlc_get_vlclbf(&bs->vlc, tbl_B1, 11);
   if (x != -1) {
  mb.num_skipped_macroblocks = inc - 1;
- bs->decoder->decode_macroblock(bs->decoder, NULL, &mb.base, 1);
+ bs->decoder->decode_macroblock(bs->decoder, NULL, NULL, &mb.base, 1);
   }
   mb.x = x += inc;
 
@@ -927,7 +927,7 @@ decode_slice(struct vl_mpg12_bs *bs)
} while (vl_vlc_bits_left(&bs->vlc) && vl_vlc_peekbits(&bs->vlc, 23));
 
mb.num_skipped_macroblocks = 0;
-   bs->decoder->decode_macroblock(bs->decoder, NULL, &mb.base, 1);
+   bs->decoder->decode_macroblock(bs->decoder, NULL, NULL, &mb.base, 1);
return true;
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c 
b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 4eaad16..c61e95c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -626,11 +626,9 @@ error_vertex_buffer:
 }
 
 static void
-vl_mpeg12_set_picture_parameters(struct pipe_video_decoder *decoder,
- struct pipe_picture_desc *picture)
+vl_mpeg12_set_picture_parameters(struct vl_mpeg12_decoder *dec,
+ struct pipe_mpeg12_picture_desc *pic)
 {
-   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
-   struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc 
*)picture;
struct pipe_sampler_view **sv;
struct vl_video_buffer *ref;
int i = 0, j;
@@ -691,6 +689,7 @@ vl_mpeg12_set_decode_target(struct vl_mpeg12_decoder *dec,
 static void
 vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder,
 struct pipe_video_buffer *target,
+struct pipe_picture_desc *desc,
 const struct pipe_macroblock *macroblocks,
 unsigned num_macroblocks)
 {
@@ -703,6 +702,8 @@ vl_mpeg12_decode_macroblock(struct pipe_video_decoder 
*decoder,
assert(dec);
assert(macroblocks && macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
 
+   if (desc)
+  vl_mpeg12_set_picture_parameters(dec, (struct 
pipe_mpeg12_picture_desc*)desc);
if (target)
   vl_mpeg12_set_decode_target(dec, (struct vl_video_buffer*)target);
buf = dec->current_buffer;
@@ -761,6 +762,7 @@ vl_mpeg12_decode_macroblock(struct pipe_video_decoder 
*decoder,
 static void
 vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder,
struct pipe_video_buffer *target,
+   struct pipe_picture_desc *desc,
unsigned num_bytes, const void *data)
 {
struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
@@ -769,6 +771,7 @@ vl_mpeg12_decode_bitstream(struct pipe_video_decoder 
*decoder,
unsigned i;
 
assert(dec);
+   vl_mpeg12_set_picture_parameters(dec, (struct 
pipe_mpeg12_picture_desc*)desc);
vl_mpeg12_set_decode_target(dec, (struct vl_video_buffer*)target);
 
buf = dec->current_buffer;
@@ -1047,7 +1050,6 @@ vl_create_mpeg12_decoder(struct pipe_context *context,
dec->base.max_references = max_references;
 
dec->base.destroy = vl_mpeg12_destroy;
-   dec->base.set_picture_parameters = vl_mpeg12_set_picture_parameters;
dec->base.decode_macroblock = vl_mpeg12_decode_macroblock;
dec->base.decode_bitstream = vl_mpeg12_decode_bitstream;
dec->base.flush = vl_mpeg12_flush;
diff --git a/src/gallium/drivers/nouveau/nouveau_video.c 
b/src/gallium/drivers/nouveau/nouveau_video.c
index bf67d03..d84d9b3 100644
--- a/src/gallium/drivers/nouveau/nouveau_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_video.c
@@ -424,32 +424,24 @@ nouveau_decoder_surface_index(struct nouveau_decoder *dec,
 }
 
 static void
-nouveau_decoder_set_picture_parameters(struct pipe_video_decoder *decoder,
-   struct pipe_picture_desc *picture_desc)
-{
-   struct nouveau_decoder *dec = (struct nouveau_decoder *)decoder;
-   struct pipe_mpeg12_picture_desc *desc;
-   desc = (struct pipe_mpeg12_picture_desc *)picture_desc;
-   dec->picture_structure = desc->picture_structure;
-
-   if (desc->ref_backward)
-  dec->past = nouveau_decoder_surface_index(dec, desc-

[Mesa-dev] [PATCH 7/6] pipe_video: remove PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED

2011-11-16 Thread Maarten Lankhorst

This is no longer used with the removal of decoder buffers.

Signed-off-by: Maarten Lankhorst 
---
 src/gallium/auxiliary/vl/vl_decoder.c|   13 -
 src/gallium/auxiliary/vl/vl_decoder.h|6 --
 src/gallium/drivers/nouveau/nouveau_video.c  |2 --
 src/gallium/drivers/nvfx/nvfx_screen.c   |2 --
 src/gallium/drivers/r300/r300_screen.c   |2 --
 src/gallium/drivers/r600/r600_pipe.c |2 --
 src/gallium/drivers/softpipe/sp_screen.c |2 --
 src/gallium/include/pipe/p_video_enums.h |3 +--
 src/gallium/state_trackers/vdpau/decode.c|6 --
 src/gallium/state_trackers/vdpau/vdpau_private.h |3 ---
 10 files changed, 1 insertions(+), 40 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_decoder.c 
b/src/gallium/auxiliary/vl/vl_decoder.c
index 383e02d..da905a6 100644
--- a/src/gallium/auxiliary/vl/vl_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_decoder.c
@@ -44,19 +44,6 @@ vl_profile_supported(struct pipe_screen *screen, enum 
pipe_video_profile profile
}
 }
 
-unsigned
-vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile 
profile)
-{
-   assert(screen);
-   switch (u_reduce_video_profile(profile)) {
-  case PIPE_VIDEO_CODEC_MPEG12:
- return 4;
-
-  default:
- return 1;
-   }
-}
-
 struct pipe_video_decoder *
 vl_create_decoder(struct pipe_context *pipe,
   enum pipe_video_profile profile,
diff --git a/src/gallium/auxiliary/vl/vl_decoder.h 
b/src/gallium/auxiliary/vl/vl_decoder.h
index a997516..a7abe9c 100644
--- a/src/gallium/auxiliary/vl/vl_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_decoder.h
@@ -38,12 +38,6 @@ bool
 vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile 
profile);
 
 /**
- * the desired number of buffers for optimal operation
- */
-unsigned
-vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile 
profile);
-
-/**
  * standard implementation of pipe->create_video_decoder
  */
 struct pipe_video_decoder *
diff --git a/src/gallium/drivers/nouveau/nouveau_video.c 
b/src/gallium/drivers/nouveau/nouveau_video.c
index d84d9b3..486210d 100644
--- a/src/gallium/drivers/nouveau/nouveau_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_video.c
@@ -780,8 +780,6 @@ nouveau_screen_get_video_param(struct pipe_screen *pscreen,
case PIPE_VIDEO_CAP_MAX_WIDTH:
case PIPE_VIDEO_CAP_MAX_HEIGHT:
   return vl_video_buffer_max_size(pscreen);
-   case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
-  return vl_num_buffers_desired(pscreen, profile);
default:
   debug_printf("unknown video param: %d\n", param);
   return 0;
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c 
b/src/gallium/drivers/nvfx/nvfx_screen.c
index 6086d43..8f82829 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -224,8 +224,6 @@ nvfx_screen_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_CAP_MAX_WIDTH:
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return vl_video_buffer_max_size(screen);
-   case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
-   return vl_num_buffers_desired(screen, profile);
default:
return 0;
}
diff --git a/src/gallium/drivers/r300/r300_screen.c 
b/src/gallium/drivers/r300/r300_screen.c
index ab1cec5..7b06235 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -322,8 +322,6 @@ static int r300_get_video_param(struct pipe_screen *screen,
   case PIPE_VIDEO_CAP_MAX_WIDTH:
   case PIPE_VIDEO_CAP_MAX_HEIGHT:
  return vl_video_buffer_max_size(screen);
-  case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
- return vl_num_buffers_desired(screen, profile);
   default:
  return 0;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 243de14..120f67c 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -530,8 +530,6 @@ static int r600_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_CAP_MAX_WIDTH:
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return vl_video_buffer_max_size(screen);
-   case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
-   return vl_num_buffers_desired(screen, profile);
default:
return 0;
}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c 
b/src/gallium/drivers/softpipe/sp_screen.c
index 4851049..623e915 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -190,8 +190,6 @@ softpipe_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_CAP_MAX_WIDTH:
case PIPE_VIDEO_CAP_MAX_HEIGHT:
   return vl_video_buffer_max_size(screen);
-   case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
-  return vl_num_buffers_desired(screen, profile);
default:
   return 0;
}

Re: [Mesa-dev] reworking pipe_video_decoder / pipe_video_buffer

2011-11-16 Thread Christian König


On 15.11.2011 17:52, Maarten Lankhorst wrote:

Deleted:
- begin_frame/end_frame: Was only useful for XvMC, should be folded into flush..


I'm not completely happy with the current interface also, but if you 
remove the state tracker ability to control how many buffers are used, 
which in turn lets the VDPAU and XvMC state tracker use the same 
buffering algorithm. This is quite bad, because for XvMC we are forced 
to use a buffer for each destination surface, while in the VDPAU/VAAPI 
case a much simpler method should be used.



- set_quant_matrix/set_reference_frames:
 they should become part of picture_desc,
 not all codecs deal with it in the same way,
 and some may not have all of the calls.
That is true, but also intended. The idea behind it is that it is not 
necessary for all codecs to set all buffer types, but instead inform the 
driver that a specific buffer type has changed. This gives the driver 
the ability to know which parameters has changed between calls to 
decode, and so only perform the (probably expensive) update of those 
parameters on the hardware side.



- set_picture_parameters: Can be passed to decode_bitstream/macroblock
Same as above, take a look at the enum VABufferType  in the VAAPI 
specification (http://cgit.freedesktop.org/libva/tree/va/va.h).

- set_decode_target: Idem
- create_decode_buffer/set_decode_buffer/destroy_decode_buffer:
 Even if a decoder wants it, the state tracker has no business knowing
 about it.
Unfortunately that isn't true, only VDPAU hasn't a concept of giving the 
application a certain control over the buffers. XvMC has the implicit 
association of internal decode buffers with destination surfaces, and 
VAAPI gives the application both an one shot copy and a map/unmap 
interface to provide buffer content.


When we want to avoid unnecessary coping around of buffer content then 
this interface has to become even more complicated, like new buffer 
types, using create user buffer, mapping/unmapping of to be decoded data 
buffers etc.



flush is changed to only flush a single pipe_video_buffer,
this should reduce the state that needs to be maintained for XvMC otherwise.
I can't really see how is that improving the situation, all it does is 
reducing the interface to what is needed for VDPAU, but totally ignoring 
requirements for VAAPI for example.

Note: internally you can still use those calls, as long as the *public* api
would be reduced to this, pipe_video_buffer is specific to each driver,
so you can put in a lot of state there if you choose to do so. For example,
quant matrix is not used in vc-1, and h264 will need a different
way for set_reference_frames, see struct VdpReferenceFrameH264. This is why
I want to remove those calls, and put them into picture_desc..
Correct, those calls need to be extended to support more codecs, but 
just stuffing everything into a single structure also doesn't seems to 
be a solution that will work for a wider view of different state 
trackers or hardware.

struct pipe_video_buffer I'm less sure about, especially wrt interlacing
and alignment it needs more thought. height and width are aligned to 64
on nvidia, and requires a special tiling flag. The compositor will need
to become aware of interlacing, to be able to play back interlaced videos.
Alignment isn't so much of an issue. Take a look at 
vl_video_buffer_create, it is already aligning the width/height to a 
power of two if the hardware needs that. So it is perfectly legal for a 
video buffer to be larger than the requested size, this is already 
tested with the current color conversion code and should work fine with 
both XvMC and VDPAU.



I honestly haven't read up on interlacing yet, and haven't found a video
to test it with, so that part is just speculative crap, and might change
when I find out more about it.
Regarding deinterlacing I'm also not really sure what we should do about 
it, but all interfaces (XvMC/VDPAU/VAAPI/DXVA) seems to treat surfaces 
as progressive. So I think the output of a decode call should already be 
in a progressive format (or can be at least handled as one for sampling).



Testing interlaced  videos that decode correctly with nvidia vdpau would help
a lot to figure out what the proper way to handle interlacing would be, so if
someone has a bunch that play with nvidia accelerated vdpau&  mplayer correctly,
could you please link them? ;)
I will try to look around some more, but at least for MPEG2 I haven't 
found any interlaced videos for testing also. I will leave you a note if 
I find something.

/**
  * output for decoding / input for displaying
  */
struct pipe_video_buffer
{
struct pipe_context *context;

enum pipe_format buffer_format;
// Note: buffer_format may change as a result of put_bits, or call to 
decode_bitstream
// afaict there is no guarantee a buffer filled with put_bits can be used 
as reference
// frame to decode_bitstream
Altering the video buffer format as a result t

Re: [Mesa-dev] Problem building current git with osmesa and no gallium

2011-11-16 Thread Matthew Cattell

Brian Paul  vmware.com> writes:

> 
> On 03/10/2011 08:30 PM, tom fogal wrote:
> > Kenneth Graunke whitecape.org>  writes:
> >> On Thursday, March 10, 2011 01:17:04 PM Alexander Neundorf wrote:
> >>> While at it (sorry for newbie questions), do I need gallium (maybe
> >>> swrast) when I want only osmesa rendering into a software buffer ?
> >>
> >> I don't think OSMesa requires Gallium, but I've never used it.
> >
> > Kenneth's correct.  We build OSMesa all the time without gallium.  In
> > this case OSMesa uses the swrast driver/backend/whatnot.  The 'xlib'
> > driver also uses this backend.
> >
> > I believe there is a way to build both of these on top of softpipe --
> > the software/reference gallium implementation -- but my project hasn't
> > jumped on that boat yet.  Someday.
> 
> OSMesa doesn't work with gallium yet.  I'd like to do that sometime 
> though.  llvmpipe would be much faster than swrast in many cases.
> 
> -Brian
> 

Hi,

I've been trying to build mesa with the following combination:
osmesa
llvm
egl
gles2
(with and without --with-drivers=fbdev,wayland)

I am puzzled about the terminology but when I got my build to succeed, using a 
sha1 from 14th July 2011, I tried testing it and found that eglInitialize was 
failing. This was apparently because it couldn't find the display. I am using 
and X-less environment and want to render directly to fb0 or to some shared 
memory, hence the inclusion of osmesa.

Am I on the right track here? The combinations of autoconf options seem to 
change a lot and I can't find much in the way of documentation to see which 
combinations and options are valid and correct for the particular sha1 I'm 
using.

Can you point me in the right direction?

Best Regards,

Matthew Cattell

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] reworking pipe_video_decoder / pipe_video_buffer

2011-11-16 Thread Maarten Lankhorst

Hey,

On 11/16/2011 02:47 PM, Christian König wrote:
> On 15.11.2011 17:52, Maarten Lankhorst wrote:
>> Deleted:
>> - begin_frame/end_frame: Was only useful for XvMC, should be folded into 
>> flush..
>
> I'm not completely happy with the current interface also, but if you remove 
> the state tracker ability to control how many buffers are used, which in turn 
> lets the VDPAU and XvMC state tracker use the same buffering algorithm. This 
> is quite bad, because for XvMC we are forced to use a buffer for each 
> destination surface, while in the VDPAU/VAAPI case a much simpler method 
> should be used.
I wasn't aware of that, the patch could easily be changed for entrypoint <= 
bitstream.
Only create a single decoder buffer, otherwise attach it to the surface.
I still think it should be removed from the state tracker though, and it would
still be possible..

>> - set_quant_matrix/set_reference_frames:
>>  they should become part of picture_desc,
>>  not all codecs deal with it in the same way,
>>  and some may not have all of the calls.
> That is true, but also intended. The idea behind it is that it is not 
> necessary for all codecs to set all buffer types, but instead inform the 
> driver that a specific buffer type has changed. This gives the driver the 
> ability to know which parameters has changed between calls to decode, and so 
> only perform the (probably expensive) update of those parameters on the 
> hardware side.
This is why I used the flush call for XvMC as a way to signal parameter 
changes. If you looked at the
decode_macroblock for g3dvl, it checks if (dec->current_buffer == 
target->decode_buffer) in
begin_frame, so it doesn't flush state.

For VA-API, I haven't had the .. pleasure(?) of playing around with it, and it 
seems
to be only stubs.

If the decode_bitstream interface is changed to get all bitstream buffers at 
the same time,
there wouldn't be overhead to doing it like this. For a single picture it's 
supposed to stay constant,
so for vdpau the sane way would be: set picture parameters for hardware 
(includes EVERYTHING),
write all bitstream buffers to a hardware bo, wait until magic is done. Afaict, 
there isn't even a sane
way to only submit partial buffers, so it's just a bunch of overhead for me.

nvidia doesn't support va-api, it handles the entire process from picture 
parameters
to a decoded buffer internally so it always convert the picture parameters into
something the hardware can understand, every frame.

>> - set_picture_parameters: Can be passed to decode_bitstream/macroblock
> Same as above, take a look at the enum VABufferType  in the VAAPI 
> specification (http://cgit.freedesktop.org/libva/tree/va/va.h).
>> - set_decode_target: Idem
>> - create_decode_buffer/set_decode_buffer/destroy_decode_buffer:
>>  Even if a decoder wants it, the state tracker has no business knowing
>>  about it.
> Unfortunately that isn't true, only VDPAU hasn't a concept of giving the 
> application a certain control over the buffers. XvMC has the implicit 
> association of internal decode buffers with destination surfaces, and VAAPI 
> gives the application both an one shot copy and a map/unmap interface to 
> provide buffer content.
>
> When we want to avoid unnecessary coping around of buffer content then this 
> interface has to become even more complicated, like new buffer types, using 
> create user buffer, mapping/unmapping of to be decoded data buffers etc.
There is no guarantee a user created video buffer can be used as reference 
frame, I honestly don't even know if it's possible.

Furthermore I don't see any code in state_trackers/va except some stubs, so I 
cant discuss an interface that's not even there to begin with.

>> flush is changed to only flush a single pipe_video_buffer,
>> this should reduce the state that needs to be maintained for XvMC otherwise.
> I can't really see how is that improving the situation, all it does is 
> reducing the interface to what is needed for VDPAU, but totally ignoring 
> requirements for VAAPI for example.
See above.

>> Note: internally you can still use those calls, as long as the *public* api
>> would be reduced to this, pipe_video_buffer is specific to each driver,
>> so you can put in a lot of state there if you choose to do so. For example,
>> quant matrix is not used in vc-1, and h264 will need a different
>> way for set_reference_frames, see struct VdpReferenceFrameH264. This is why
>> I want to remove those calls, and put them into picture_desc..
> Correct, those calls need to be extended to support more codecs, but just 
> stuffing everything into a single structure also doesn't seems to be a 
> solution that will work for a wider view of different state trackers or 
> hardware.
All hardware has its own unique approach with their own api, so it would make 
sense if there is a separate call for decode_va too,
with maybe some bits that tell what changed, so it only has to upload those 
specific parts..

>> struc

Re: [Mesa-dev] [PATCH] mesa: make sure all lighting tables are updated before the computation

2011-11-16 Thread Brian Paul


On 11/15/2011 08:18 PM, Yuanhan Liu wrote:

On Tue, Nov 15, 2011 at 07:26:52AM -0700, Brian Paul wrote:

On 11/15/2011 12:51 AM, Yuanhan Liu wrote:

Make sure all lighting tables are updated before using the table to
calculate something, say using _SpotExpTable to calculate
_VP_inf_spot_attenuation.

Signed-off-by: Yuanhan Liu
---
  src/mesa/main/light.c |3 +++
  1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/light.c b/src/mesa/main/light.c
index c27cf1d..60daa89 100644
--- a/src/mesa/main/light.c
+++ b/src/mesa/main/light.c
@@ -1138,6 +1138,9 @@ compute_light_positions( struct gl_context *ctx )
TRANSFORM_NORMAL( ctx->_EyeZDir, eye_z, 
ctx->ModelviewMatrixStack.Top->m );
 }

+   /* Make sure all the light tables are updated before the computation */
+   _mesa_validate_all_lighting_tables(ctx);
+
 foreach (light,&ctx->Light.EnabledList) {

if (ctx->_NeedEyeCoords) {


Does this fix a specific bug?

Nope(well, I didn't search the bugzilla). But it did fix an issue. I
wrote a piglit case for this issue.

---
 From 47796b40c7f300e289e3a82361164b62e9e3411c Mon Sep 17 00:00:00 2001
From: Yuanhan Liu
Date: Wed, 16 Nov 2011 11:16:18 +0800
Subject: [PATCH] Add an infinite spot light test case

Signed-off-by: Yuanhan Liu
---
  tests/all.tests |1 +
  tests/general/CMakeLists.gl.txt |1 +
  tests/general/infinite-spot-light.c |   99 +++
  3 files changed, 101 insertions(+), 0 deletions(-)
  create mode 100644 tests/general/infinite-spot-light.c

diff --git a/tests/all.tests b/tests/all.tests
index 2a85b7a..664a9b4 100644
--- a/tests/all.tests
+++ b/tests/all.tests
@@ -254,6 +254,7 @@ add_plain_test(general, 'geterror-invalid-enum')
  add_plain_test(general, 'geterror-inside-begin')
  add_plain_test(general, 'gl30basic')
  add_plain_test(general, 'hiz')
+add_plain_test(general, 'infinite-spot-light')
  add_plain_test(general, 'isbufferobj')
  add_plain_test(general, 'line-aa-width')
  add_plain_test(general, 'linestipple')
diff --git a/tests/general/CMakeLists.gl.txt b/tests/general/CMakeLists.gl.txt
index 8ff3771..2cfc7be 100644
--- a/tests/general/CMakeLists.gl.txt
+++ b/tests/general/CMakeLists.gl.txt
@@ -62,6 +62,7 @@ if (UNIX)
target_link_libraries (hiz m)
  endif (UNIX)
  add_executable (early-z early-z.c)
+add_executable (infinite-spot-light infinite-spot-light.c)
  add_executable (isbufferobj isbufferobj.c)
  add_executable (linestipple linestipple.c)
  add_executable (line-aa-width line-aa-width.c)
diff --git a/tests/general/infinite-spot-light.c 
b/tests/general/infinite-spot-light.c
new file mode 100644
index 000..cb8616f
--- /dev/null
+++ b/tests/general/infinite-spot-light.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Yuanhan Liu
+ */
+
+/** @file infinite-spot-light.c
+ *
+ * This is a case that sounds doesn't make sense, but it is allowed by glSpec
+ * (see section 2.14.1 Lightin of glspec 2.1.pdf).  While writing this case,
+ * it servers as two purposes:
+ *
+ *   1. Test if swrast is OK with this case.
+ *  The old mesa code would always compute a zero attenuation, thus always
+ *  get a black lighting color.
+ *
+ *   2. Test if hardware rendering(only i965 tested) is OK with this patch.
+ *  The old mesa code would skip the attenuation and spot computation while
+ *  infinite light is met. This is somehow not permitted by glSpec.
+ */
+
+#include "piglit-util.h"
+
+int piglit_width = 100, piglit_height = 100;
+int piglit_window_mode = GLUT_RGB | GLUT_DOUBLE;
+
+/* Already normalized, and 0.5 would be the expected color */
+static GLfloat dir[3] = {0.866025404, 0.0, 0.5};
+static GLfloat pos[4] = {0.0, 0.0, -1.0, 0.0}; /* infinite */
+static GLfloat light_ambient[3] = {1.0, 0.0, 0.0};
+
+enum pigl

[Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()

2011-11-16 Thread Brian Paul

In slow_read_depth_stencil_pixels_separate() we might have separate
depth and stencil buffers or a combined buffer.  In the later case,
don't map the buffer twice.  This function is used when the depth
scale/bias pixel transfer values are not the defaults.

Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963
---
 src/mesa/main/readpix.c |   14 +++---
 1 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 8550618..0b41de6 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct gl_context 
*ctx,
GLubyte *depthMap, *stencilMap;
int depthStride, stencilStride, j;
 
+   /* The depth and stencil buffers might be separate, or a single buffer.
+* If one buffer, only map it once.
+*/
ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height,
   GL_MAP_READ_BIT, &depthMap, &depthStride);
-   ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
-  GL_MAP_READ_BIT, &stencilMap, &stencilStride);
+   if (stencilRb != depthRb) {
+  ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
+  GL_MAP_READ_BIT, &stencilMap,
+  &stencilStride);
+   }
 
for (j = 0; j < height; j++) {
   GLubyte stencilVals[MAX_WIDTH];
@@ -424,7 +430,9 @@ slow_read_depth_stencil_pixels_separate(struct gl_context 
*ctx,
}
 
ctx->Driver.UnmapRenderbuffer(ctx, depthRb);
-   ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
+   if (stencilRb != depthRb) {
+  ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
+   }
 }
 
 
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default

2011-11-16 Thread Brian Paul


On 11/16/2011 01:38 AM, Theiss, Ingo wrote:

Dear devs,

I am getting segmentation faults when running piglit r600.tests with latest 
mesa build from git:

Nov 16 09:21:33 spoc kernel: [74538.198983] radeon :05:00.0: object_init 
failed for (1431699456, 0x0006)
Nov 16 09:21:33 spoc kernel: [74538.198987] [drm:radeon_gem_object_create] 
*ERROR* Failed to allocate GEM object (1431699456, 4, 4096, -12)
Nov 16 09:21:37 spoc kernel: [74542.266472] fbo-depthstenci[6867]: segfault at 
0 ip 7fb271b5414a sp 7fffbfdf8fd0 error 4 in 
r600_dri.so[7fb2716c8000+dba000]

---
Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil 
-auto readpixels defau'.
Program terminated with signal 11, Segmentation fault.
#0  u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63
63 ur->vtbl->transfer_unmap(pipe, transfer);
(gdb) bt
#0  u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63
#1  0x7f4cb36cc0eb in pipe_transfer_unmap (transfer=, 
context=0x2441df0)
 at ../../src/gallium/auxiliary/util/u_inlines.h:398
#2  st_UnmapRenderbuffer (ctx=, rb=0x25b7fc0) at 
state_tracker/st_cb_fbo.c:704
#3  0x7f4cb365f925 in slow_read_depth_stencil_pixels_separate (ctx=0x254b720, 
x=, y=, width=123,
 height=123, type=36269, packing=0x7fff653d0750, dst=0x7fff653ee138 "", 
dstStride=984) at main/readpix.c:427
#4  0x7f4cb36601d6 in read_depth_stencil_pixels (packing=0x7fff653d0750, 
pixels=, type=36269, height=,
 width=123, y=0, x=0, ctx=0x254b720) at main/readpix.c:469
#5  _mesa_readpixels (ctx=0x254b720, x=0, y=0, width=123, height=123, 
format=, type=36269, packing=0x255afb8,
 pixels=) at main/readpix.c:508
#6  0x7f4cb3660bbd in _mesa_ReadnPixelsARB (pixels=0x7fff653d0870, 
type=36269, format=34041, height=123, width=123, y=0, x=0,
 bufSize=) at main/readpix.c:753
#7  _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, 
type=36269, pixels=0x7fff653d0870) at main/readpix.c:761
#8  0x0042eb4a in read_32f_8 ()
#9  0x0042ec0b in compare ()
#10 0x0042ef73 in test_readpixels ()
#11 0x0042f7aa in piglit_display ()
#12 0x00430241 in display ()
#13 0x7f4cb6f11808 in ?? () from /usr/lib/libglut.so.3
#14 0x7f4cb6f15339 in fgEnumWindows () from /usr/lib/libglut.so.3
#15 0x7f4cb6f11ca1 in glutMainLoopEvent () from /usr/lib/libglut.so.3
#16 0x7f4cb6f12540 in glutMainLoop () from /usr/lib/libglut.so.3
#17 0x00430967 in main ()
---

My glxinfo:

OpenGL vendor string: X.Org
OpenGL renderer string: Gallium 0.4 on AMD BARTS
OpenGL version string: 2.1 Mesa 7.12-devel (git-4f677ca)
OpenGL shading language version string: 1.20


A general question: I am interested in running regular piglit tests on my 
hardware and would like to help by reporting problems/bugs. What´s the correct 
way of doing so? Should I post any problems to mesa-dev or should I open a bug.

Thanks and keep up the good work!


I think that this will be fixed by the patch I just posted (mesa: 
don't map depth+stencil buffer twice in glReadPixels())  Can you try it?


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa-demos: Add blinking-teapot demo.

2011-11-16 Thread Brian Paul


On 11/15/2011 11:16 AM, vlj wrote:

blinking-teapot is an UBO demo.


I think you missed a bunch of other comments I made on the first 
version.  Can you go back and re-read my reply?  Thanks.


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()

2011-11-16 Thread Jose Fonseca

Looks good Brian.

BTW, we should probably be more defensive about map failures on this code. This 
is not a regression, as st_cb_readpixels.c also did a poor job.

Jose

- Original Message -
> In slow_read_depth_stencil_pixels_separate() we might have separate
> depth and stencil buffers or a combined buffer.  In the later case,
> don't map the buffer twice.  This function is used when the depth
> scale/bias pixel transfer values are not the defaults.
> 
> Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963
> ---
>  src/mesa/main/readpix.c |   14 +++---
>  1 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
> index 8550618..0b41de6 100644
> --- a/src/mesa/main/readpix.c
> +++ b/src/mesa/main/readpix.c
> @@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct
> gl_context *ctx,
> GLubyte *depthMap, *stencilMap;
> int depthStride, stencilStride, j;
>  
> +   /* The depth and stencil buffers might be separate, or a single
> buffer.
> +* If one buffer, only map it once.
> +*/
> ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height,
>  GL_MAP_READ_BIT, &depthMap, &depthStride);
> -   ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
> -GL_MAP_READ_BIT, &stencilMap, &stencilStride);
> +   if (stencilRb != depthRb) {
> +  ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width,
> height,
> +  GL_MAP_READ_BIT, &stencilMap,
> +  &stencilStride);
> +   }
>  
> for (j = 0; j < height; j++) {
>GLubyte stencilVals[MAX_WIDTH];
> @@ -424,7 +430,9 @@ slow_read_depth_stencil_pixels_separate(struct
> gl_context *ctx,
> }
>  
> ctx->Driver.UnmapRenderbuffer(ctx, depthRb);
> -   ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
> +   if (stencilRb != depthRb) {
> +  ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
> +   }
>  }

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()

2011-11-16 Thread Brian Paul


On 11/16/2011 08:43 AM, Jose Fonseca wrote:

Looks good Brian.

BTW, we should probably be more defensive about map failures on this code. This 
is not a regression, as st_cb_readpixels.c also did a poor job.


Yeah, it's only a matter of time until we find a case where 
MapRenderbuffer (or MapTextureImage) fails and we need to be more 
robust...


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()

2011-11-16 Thread Vadim Girlin

On Wed, 2011-11-16 at 07:53 -0700, Brian Paul wrote:
> In slow_read_depth_stencil_pixels_separate() we might have separate
> depth and stencil buffers or a combined buffer.  In the later case,
> don't map the buffer twice.  This function is used when the depth
> scale/bias pixel transfer values are not the defaults.
> 
> Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963
> ---
>  src/mesa/main/readpix.c |   14 +++---
>  1 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
> index 8550618..0b41de6 100644
> --- a/src/mesa/main/readpix.c
> +++ b/src/mesa/main/readpix.c
> @@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct 
> gl_context *ctx,
> GLubyte *depthMap, *stencilMap;
> int depthStride, stencilStride, j;
>  
> +   /* The depth and stencil buffers might be separate, or a single buffer.
> +* If one buffer, only map it once.
> +*/
> ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height,
>  GL_MAP_READ_BIT, &depthMap, &depthStride);
> -   ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
> -GL_MAP_READ_BIT, &stencilMap, &stencilStride);
> +   if (stencilRb != depthRb) {
> +  ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
> +  GL_MAP_READ_BIT, &stencilMap,
> +  &stencilStride);
> +   }
>  

Probably we want the following here:

   else
  stencilMap = depthMap;


Vadim

> for (j = 0; j < height; j++) {
>GLubyte stencilVals[MAX_WIDTH];
> @@ -424,7 +430,9 @@ slow_read_depth_stencil_pixels_separate(struct gl_context 
> *ctx,
> }
>  
> ctx->Driver.UnmapRenderbuffer(ctx, depthRb);
> -   ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
> +   if (stencilRb != depthRb) {
> +  ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
> +   }
>  }
>  
> 



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] read_rgba_pixels: Don't force clamping if the renderbuffer is already clamped.

2011-11-16 Thread Michel Dänzer

From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 src/mesa/main/readpix.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 8550618..582adc3 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -285,7 +285,8 @@ read_rgba_pixels( struct gl_context *ctx,
   return;
 
if ((ctx->Color._ClampReadColor == GL_TRUE || type != GL_FLOAT) &&
-   !_mesa_is_integer_format(format)) {
+   !_mesa_is_integer_format(format) &&
+   _mesa_get_format_datatype(rb->Format) != GL_UNSIGNED_NORMALIZED) {
   transferOps |= IMAGE_CLAMP_BIT;
}
 
-- 
1.7.7.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()

2011-11-16 Thread Vadim Girlin

On Wed, 2011-11-16 at 20:41 +0400, Vadim Girlin wrote:
> On Wed, 2011-11-16 at 07:53 -0700, Brian Paul wrote:
> > In slow_read_depth_stencil_pixels_separate() we might have separate
> > depth and stencil buffers or a combined buffer.  In the later case,
> > don't map the buffer twice.  This function is used when the depth
> > scale/bias pixel transfer values are not the defaults.
> > 
> > Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963
> > ---
> >  src/mesa/main/readpix.c |   14 +++---
> >  1 files changed, 11 insertions(+), 3 deletions(-)
> > 
> > diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
> > index 8550618..0b41de6 100644
> > --- a/src/mesa/main/readpix.c
> > +++ b/src/mesa/main/readpix.c
> > @@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct 
> > gl_context *ctx,
> > GLubyte *depthMap, *stencilMap;
> > int depthStride, stencilStride, j;
> >  
> > +   /* The depth and stencil buffers might be separate, or a single buffer.
> > +* If one buffer, only map it once.
> > +*/
> > ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height,
> >GL_MAP_READ_BIT, &depthMap, &depthStride);
> > -   ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
> > -  GL_MAP_READ_BIT, &stencilMap, &stencilStride);
> > +   if (stencilRb != depthRb) {
> > +  ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
> > +  GL_MAP_READ_BIT, &stencilMap,
> > +  &stencilStride);
> > +   }
> >  
> 
> Probably we want the following here:
> 
>else
>   stencilMap = depthMap;
> 
> 

And I also forgot stencilStride.

Vadim

> Vadim
> 
> > for (j = 0; j < height; j++) {
> >GLubyte stencilVals[MAX_WIDTH];
> > @@ -424,7 +430,9 @@ slow_read_depth_stencil_pixels_separate(struct 
> > gl_context *ctx,
> > }
> >  
> > ctx->Driver.UnmapRenderbuffer(ctx, depthRb);
> > -   ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
> > +   if (stencilRb != depthRb) {
> > +  ctx->Driver.UnmapRenderbuffer(ctx, stencilRb);
> > +   }
> >  }
> >  
> > 
> 
> 
> 



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa: don't map depth+stencil buffer twice in glReadPixels()

2011-11-16 Thread Brian Paul


On 11/16/2011 09:54 AM, Vadim Girlin wrote:

On Wed, 2011-11-16 at 20:41 +0400, Vadim Girlin wrote:

On Wed, 2011-11-16 at 07:53 -0700, Brian Paul wrote:

In slow_read_depth_stencil_pixels_separate() we might have separate
depth and stencil buffers or a combined buffer.  In the later case,
don't map the buffer twice.  This function is used when the depth
scale/bias pixel transfer values are not the defaults.

Fixes http://bugs.freedesktop.org/show_bug.cgi?id=42963
---
  src/mesa/main/readpix.c |   14 +++---
  1 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 8550618..0b41de6 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -402,10 +402,16 @@ slow_read_depth_stencil_pixels_separate(struct gl_context 
*ctx,
 GLubyte *depthMap, *stencilMap;
 int depthStride, stencilStride, j;

+   /* The depth and stencil buffers might be separate, or a single buffer.
+* If one buffer, only map it once.
+*/
 ctx->Driver.MapRenderbuffer(ctx, depthRb, x, y, width, height,
   GL_MAP_READ_BIT,&depthMap,&depthStride);
-   ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
-  GL_MAP_READ_BIT,&stencilMap,&stencilStride);
+   if (stencilRb != depthRb) {
+  ctx->Driver.MapRenderbuffer(ctx, stencilRb, x, y, width, height,
+  GL_MAP_READ_BIT,&stencilMap,
+&stencilStride);
+   }



Probably we want the following here:

else
   stencilMap = depthMap;




And I also forgot stencilStride.

Vadim


Thanks for catching that.  I'll post a new patch.

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa: initialize stencilMap, Stride if stencilRb==depthRb

2011-11-16 Thread Brian Paul

---
 src/mesa/main/readpix.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 0b41de6..86b8753 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -412,6 +412,10 @@ slow_read_depth_stencil_pixels_separate(struct gl_context 
*ctx,
   GL_MAP_READ_BIT, &stencilMap,
   &stencilStride);
}
+   else {
+  stencilMap = depthMap;
+  stencilStride = depthStride;
+   }
 
for (j = 0; j < height; j++) {
   GLubyte stencilVals[MAX_WIDTH];
-- 
1.7.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default

2011-11-16 Thread Theiss, Ingo

 
Am Mittwoch, 16. November 2011 15:58 CET, Brian Paul  
schrieb: 
 
> On 11/16/2011 01:38 AM, Theiss, Ingo wrote:
> > Dear devs,
> >
> > I am getting segmentation faults when running piglit r600.tests with latest 
> > mesa build from git:
> >
> > Nov 16 09:21:33 spoc kernel: [74538.198983] radeon :05:00.0: 
> > object_init failed for (1431699456, 0x0006)
> > Nov 16 09:21:33 spoc kernel: [74538.198987] [drm:radeon_gem_object_create] 
> > *ERROR* Failed to allocate GEM object (1431699456, 4, 4096, -12)
> > Nov 16 09:21:37 spoc kernel: [74542.266472] fbo-depthstenci[6867]: segfault 
> > at 0 ip 7fb271b5414a sp 7fffbfdf8fd0 error 4 in 
> > r600_dri.so[7fb2716c8000+dba000]
> >
> > ---
> > Core was generated by 
> > `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil -auto readpixels 
> > defau'.
> > Program terminated with signal 11, Segmentation fault.
> > #0  u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at 
> > util/u_resource.c:63
> > 63 ur->vtbl->transfer_unmap(pipe, transfer);
> > (gdb) bt
> > #0  u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at 
> > util/u_resource.c:63
> > #1  0x7f4cb36cc0eb in pipe_transfer_unmap (transfer=, 
> > context=0x2441df0)
> >  at ../../src/gallium/auxiliary/util/u_inlines.h:398
> > #2  st_UnmapRenderbuffer (ctx=, rb=0x25b7fc0) at 
> > state_tracker/st_cb_fbo.c:704
> > #3  0x7f4cb365f925 in slow_read_depth_stencil_pixels_separate 
> > (ctx=0x254b720, x=, y=, width=123,
> >  height=123, type=36269, packing=0x7fff653d0750, dst=0x7fff653ee138 "", 
> > dstStride=984) at main/readpix.c:427
> > #4  0x7f4cb36601d6 in read_depth_stencil_pixels 
> > (packing=0x7fff653d0750, pixels=, type=36269, 
> > height=,
> >  width=123, y=0, x=0, ctx=0x254b720) at main/readpix.c:469
> > #5  _mesa_readpixels (ctx=0x254b720, x=0, y=0, width=123, height=123, 
> > format=, type=36269, packing=0x255afb8,
> >  pixels=) at main/readpix.c:508
> > #6  0x7f4cb3660bbd in _mesa_ReadnPixelsARB (pixels=0x7fff653d0870, 
> > type=36269, format=34041, height=123, width=123, y=0, x=0,
> >  bufSize=) at main/readpix.c:753
> > #7  _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, 
> > type=36269, pixels=0x7fff653d0870) at main/readpix.c:761
> > #8  0x0042eb4a in read_32f_8 ()
> > #9  0x0042ec0b in compare ()
> > #10 0x0042ef73 in test_readpixels ()
> > #11 0x0042f7aa in piglit_display ()
> > #12 0x00430241 in display ()
> > #13 0x7f4cb6f11808 in ?? () from /usr/lib/libglut.so.3
> > #14 0x7f4cb6f15339 in fgEnumWindows () from /usr/lib/libglut.so.3
> > #15 0x7f4cb6f11ca1 in glutMainLoopEvent () from /usr/lib/libglut.so.3
> > #16 0x7f4cb6f12540 in glutMainLoop () from /usr/lib/libglut.so.3
> > #17 0x00430967 in main ()
> > ---
> >
> > My glxinfo:
> >
> > OpenGL vendor string: X.Org
> > OpenGL renderer string: Gallium 0.4 on AMD BARTS
> > OpenGL version string: 2.1 Mesa 7.12-devel (git-4f677ca)
> > OpenGL shading language version string: 1.20
> >
> >
> > A general question: I am interested in running regular piglit tests on my 
> > hardware and would like to help by reporting problems/bugs. What´s the 
> > correct way of doing so? Should I post any problems to mesa-dev or should I 
> > open a bug.
> >
> > Thanks and keep up the good work!
> 
> I think that this will be fixed by the patch I just posted (mesa: 
> don't map depth+stencil buffer twice in glReadPixels())  Can you try it?
> 
> -Brian
> 
> 

Hi Brian,

I have applied the patch but still getting segfaults from the following piglit 
tests. The initial segfault I have posted seems to be gone. 

1. fbo-depthstencil -auto drawpixels GL_DEPTH24_STENCIL8 32F_24_8_REV
fbo-depthstencil -auto readpixels GL_DEPTH24_STENCIL8 32F_24_8_REV

Here is the backtrace:

Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil 
-auto readpixels GL_DE'.
Program terminated with signal 11, Segmentation fault.
#0  unpack_ubyte_s_Z24_S8 (n=, dst=, 
src=) at main/format_unpack.c:1896
1896  dst[i] = src32[i] >> 24;
(gdb) bt
#0  unpack_ubyte_s_Z24_S8 (n=, dst=, 
src=) at main/format_unpack.c:1896
#1  _mesa_unpack_ubyte_stencil_row (format=, n=123, 
src=0x7fff609b3c79, dst=0x7fff60977d30 "") at main/format_unpack.c:1918
#2  0x7fdfa294e8cd in slow_read_depth_stencil_pixels_separate 
(ctx=0x1963720, x=, y=, width=123, 
height=123, type=36269, packing=0x7fff6097bdf0, dst=0x7fff6097d9f8 "", 
dstStride=984) at main/readpix.c:421
#3  0x7fdfa294f1f6 in read_depth_stencil_pixels (packing=0x7fff6097bdf0, 
pixels=, type=36269, height=, 
width=123, y=0, x=0, ctx=0x1963720) at main/readpix.c:477
#4  _mesa_readpixels (ctx=0x1963720, x=0, y=0, width=123, height=123, 
format=, type=36269, packing=0x1972fb8, 
pixels=) at main/readpix.c:516
#5  0x7fdfa294fbdd in _mesa_ReadnPixelsARB (pixels=0x7fff6097bf10, 
type=36269, format=34041, height=123, width=123, y=0, x=0, 
bufSize=) at main/readpix.c

Re: [Mesa-dev] [PATCH v2] r600g: lazy load for AR register

2011-11-16 Thread Tilman Sauerbeck

Vadim Girlin [2011-11-13 22:08]:
> Emit MOVA* instruction only when AR is used.
> 
> Signed-off-by: Vadim Girlin 
> ---
> 
> Tested on evergreen: no regressions, fixes some variable-indexing tests.

On my rv730 this patch causes broken rendering in the game Heroes of
Newerth. I have an apitrace file that reproduces the issue but
unfortunately that file is 3.x GB.

I've made the results of a piglit run comparing 88a140c (good) 8e366dc (bad)
available at http://files.code-monkey.de/piglit.tar.bz2 .

Regards,
Tilman

-- 
A: Because it messes up the order in which people normally read text.
Q: Why is top-posting such a bad thing?
A: Top-posting.
Q: What is the most annoying thing on usenet and in e-mail?


pgp5VzF609PDf.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] r600g: lazy load for AR register

2011-11-16 Thread Vadim Girlin

On Wed, 2011-11-16 at 18:57 +0100, Tilman Sauerbeck wrote:
> Vadim Girlin [2011-11-13 22:08]:
> > Emit MOVA* instruction only when AR is used.
> > 
> > Signed-off-by: Vadim Girlin 
> > ---
> > 
> > Tested on evergreen: no regressions, fixes some variable-indexing tests.
> 
> On my rv730 this patch causes broken rendering in the game Heroes of
> Newerth. I have an apitrace file that reproduces the issue but
> unfortunately that file is 3.x GB.
> 
> I've made the results of a piglit run comparing 88a140c (good) 8e366dc (bad)
> available at http://files.code-monkey.de/piglit.tar.bz2 .
> 

Please create the shader dump for one of the broken tests, e.g. :

R600_DUMP_SHADERS=1 bin/shader_runner 
tests/spec/glsl-1.10/execution/variable-indexing/fs-temp-array-mat2-col-row-wr.shader_test
 -auto -fbo

Vadim

> Regards,
> Tilman
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default

2011-11-16 Thread Brian Paul


On 11/16/2011 10:09 AM, Theiss, Ingo wrote:


Am Mittwoch, 16. November 2011 15:58 CET, Brian Paul  
schrieb:


On 11/16/2011 01:38 AM, Theiss, Ingo wrote:

Dear devs,

I am getting segmentation faults when running piglit r600.tests with latest 
mesa build from git:

Nov 16 09:21:33 spoc kernel: [74538.198983] radeon :05:00.0: object_init 
failed for (1431699456, 0x0006)
Nov 16 09:21:33 spoc kernel: [74538.198987] [drm:radeon_gem_object_create] 
*ERROR* Failed to allocate GEM object (1431699456, 4, 4096, -12)
Nov 16 09:21:37 spoc kernel: [74542.266472] fbo-depthstenci[6867]: segfault at 
0 ip 7fb271b5414a sp 7fffbfdf8fd0 error 4 in 
r600_dri.so[7fb2716c8000+dba000]

---
Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil 
-auto readpixels defau'.
Program terminated with signal 11, Segmentation fault.
#0  u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63
63 ur->vtbl->transfer_unmap(pipe, transfer);
(gdb) bt
#0  u_transfer_unmap_vtbl (pipe=0x2441df0, transfer=0x0) at util/u_resource.c:63
#1  0x7f4cb36cc0eb in pipe_transfer_unmap (transfer=, 
context=0x2441df0)
  at ../../src/gallium/auxiliary/util/u_inlines.h:398
#2  st_UnmapRenderbuffer (ctx=, rb=0x25b7fc0) at 
state_tracker/st_cb_fbo.c:704
#3  0x7f4cb365f925 in slow_read_depth_stencil_pixels_separate (ctx=0x254b720, 
x=, y=, width=123,
  height=123, type=36269, packing=0x7fff653d0750, dst=0x7fff653ee138 "", 
dstStride=984) at main/readpix.c:427
#4  0x7f4cb36601d6 in read_depth_stencil_pixels (packing=0x7fff653d0750, 
pixels=, type=36269, height=,
  width=123, y=0, x=0, ctx=0x254b720) at main/readpix.c:469
#5  _mesa_readpixels (ctx=0x254b720, x=0, y=0, width=123, height=123, 
format=, type=36269, packing=0x255afb8,
  pixels=) at main/readpix.c:508
#6  0x7f4cb3660bbd in _mesa_ReadnPixelsARB (pixels=0x7fff653d0870, 
type=36269, format=34041, height=123, width=123, y=0, x=0,
  bufSize=) at main/readpix.c:753
#7  _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, 
type=36269, pixels=0x7fff653d0870) at main/readpix.c:761
#8  0x0042eb4a in read_32f_8 ()
#9  0x0042ec0b in compare ()
#10 0x0042ef73 in test_readpixels ()
#11 0x0042f7aa in piglit_display ()
#12 0x00430241 in display ()
#13 0x7f4cb6f11808 in ?? () from /usr/lib/libglut.so.3
#14 0x7f4cb6f15339 in fgEnumWindows () from /usr/lib/libglut.so.3
#15 0x7f4cb6f11ca1 in glutMainLoopEvent () from /usr/lib/libglut.so.3
#16 0x7f4cb6f12540 in glutMainLoop () from /usr/lib/libglut.so.3
#17 0x00430967 in main ()
---

My glxinfo:

OpenGL vendor string: X.Org
OpenGL renderer string: Gallium 0.4 on AMD BARTS
OpenGL version string: 2.1 Mesa 7.12-devel (git-4f677ca)
OpenGL shading language version string: 1.20


A general question: I am interested in running regular piglit tests on my 
hardware and would like to help by reporting problems/bugs. What´s the correct 
way of doing so? Should I post any problems to mesa-dev or should I open a bug.

Thanks and keep up the good work!


I think that this will be fixed by the patch I just posted (mesa:
don't map depth+stencil buffer twice in glReadPixels())  Can you try it?

-Brian




Hi Brian,

I have applied the patch but still getting segfaults from the following piglit 
tests. The initial segfault I have posted seems to be gone.

1. fbo-depthstencil -auto drawpixels GL_DEPTH24_STENCIL8 32F_24_8_REV
 fbo-depthstencil -auto readpixels GL_DEPTH24_STENCIL8 32F_24_8_REV

Here is the backtrace:

Core was generated by `/home/itheiss/build/mesa.git/piglit/bin/fbo-depthstencil 
-auto readpixels GL_DE'.
Program terminated with signal 11, Segmentation fault.
#0  unpack_ubyte_s_Z24_S8 (n=, dst=, src=) at main/format_unpack.c:1896
1896  dst[i] = src32[i]>>  24;
(gdb) bt
#0  unpack_ubyte_s_Z24_S8 (n=, dst=, src=) at main/format_unpack.c:1896
#1  _mesa_unpack_ubyte_stencil_row (format=, n=123, src=0x7fff609b3c79, 
dst=0x7fff60977d30 "") at main/format_unpack.c:1918
#2  0x7fdfa294e8cd in slow_read_depth_stencil_pixels_separate (ctx=0x1963720, 
x=, y=, width=123,
 height=123, type=36269, packing=0x7fff6097bdf0, dst=0x7fff6097d9f8 "", 
dstStride=984) at main/readpix.c:421
#3  0x7fdfa294f1f6 in read_depth_stencil_pixels (packing=0x7fff6097bdf0, 
pixels=, type=36269, height=,
 width=123, y=0, x=0, ctx=0x1963720) at main/readpix.c:477
#4  _mesa_readpixels (ctx=0x1963720, x=0, y=0, width=123, height=123, 
format=, type=36269, packing=0x1972fb8,
 pixels=) at main/readpix.c:516
#5  0x7fdfa294fbdd in _mesa_ReadnPixelsARB (pixels=0x7fff6097bf10, 
type=36269, format=34041, height=123, width=123, y=0, x=0,
 bufSize=) at main/readpix.c:761
#6  _mesa_ReadPixels (x=0, y=0, width=123, height=123, format=34041, 
type=36269, pixels=0x7fff6097bf10) at main/readpix.c:769
#7  0x0042eb4a in read_32f_8 ()
#8  0x0042ec0b in compare ()
#9  0x0042ef73 in test_read

[Mesa-dev] [PATCH 00/13] i965 vs: Split vec4_visitor into two classes.

2011-11-16 Thread Paul Berry

In order to implement transform feedback on Gen6, we need to create a
geometry shader program that will stream out the transformed vertices.
It makes sense to re-use parts of the vec4_visitor infrastructure to
do this, because (a) VS and GS handle data very similarly, (b) it
allows us to take advantage of the optimization and register
allocation code in vec4_visitor, and (c) in the long run when we add
support for GLSL geometry shaders, we are going to need to use the
vec4_visitor infrastructure to compile them.

This patch series gets us ready for re-using parts of vec4_visitor, by
breaking it into a base class containing the parts we are likely to
re-use, and a derived class containing the parts we aren't.  The base
class, vec4_generator, is responsible for code generation, register
allocation, and optimization; it is independent of GLSL IR (with one
trivial exception), and mostly independent of the type of shader being
compiled.  The derived class, vec4_visitor, is responsible for
visiting the GLSL IR and performing vertex-shader-specific actions.

This split was already implicitly present in the code; this patch
series merely makes it more explicit.

The idea is that for now, we can make a class that derives from
vec4_generator to do transform feedback, and we won't have to worry
too much about interactions with the GLSL IR visiting code in
vec4_visitor.  Later, when we add support for GLSL geometry shaders,
we'll have to do further work on vec4_visitor, but we won't have to
touch vec4_generator very much.

Patches 01-11 are all preparatory refactoring: they make small
adjustments to the functioning of vec4_visitor so that the separation
between its IR visiting parts and its code generation parts is a
little more strict.  Patch 12 splits the classes up, and then patch 13
reformats the class declarations to clarify the relationship between
the two classes.

The split isn't perfect yet; there are a few places where
vec4_generator still makes implicit assumptions that are vertex-shader
specific (particularly in register allocation), and there are a few
places where vec4_visitor still does things that arguably should be
part of code generation (such as setting up scratch registers).  But I
think this is close enough that we can clean those up with later
patches on an as-needed basis.

Note: To ease reading the diffs, I didn't make any effort to move code
between .cpp files.  As a result, vec4_visitor and vec4_generator code
are intermingled in brw_vec4_visitor.cpp, brw_vec4_emit.cpp,
brw_vec4.cpp, brw_vec4_copy_propagation.cpp, and
brw_vec4_reg_allocate.cpp.  Once this patch is reviewed and we've
settled on how we want the classes to be split up, I'll make a
follow-up patch that rearranges the .cpp files to match the changes to
the class structure.

[PATCH 01/13] i965 vs: Remove dead code from the vec4_visitor class.
[PATCH 02/13] i965 vs: Make reg_allocate() return total_grf.
[PATCH 03/13] i965 vs: Return last_scratch from 
move_grf_array_access_to_scratch()
[PATCH 04/13] i965 vs: Make first_non_payload_grf an explicit function 
arguments.
[PATCH 05/13] i965 vs: Move brw_set_access_mode() call into generate_code().
[PATCH 06/13] i965 vs: Move the call to reg_allocate() inside of 
generate_code().
[PATCH 07/13] i965 vs: Streamline the way fail is tracked.
[PATCH 08/13] i965 vs: Add get_debug_flag()
[PATCH 09/13] i965 vs: Add get_debug_name().
[PATCH 10/13] i965 vs: Make a separate optimize() method.
[PATCH 11/13] i965 vs: Make get_assignment_lhs() a method
[PATCH 12/13] i965 vs: Separate IR visiting from code generation in 
vec4_visitor;
[PATCH 13/13] i965 vs: Rearrange class declarations.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/13] i965 vs: Remove dead code from the vec4_visitor class.

2011-11-16 Thread Paul Berry

This patch removes two unused functions from vec4_visitor:
implied_mrf_writes() and emit_bool_comparison().  implied_mrf_writes()
is part of instruction scheduling, which has not been implemented for
vec4_visitor yet.  emit_bool_comparison() has never been used.

It also removes two declarations that did't have any functions
associated with them: reladdr_to_temp() and process_move_condition().
These functions appear to be a copy-paste error from when the
vec4_visitor class was first introduced.

Since later patches in this series refactor the vec4_visitor class
into two classes, it seems sensible to eliminate dead code first, so
that the refactoring job will be smaller.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp |   37 
 src/mesa/drivers/dri/i965/brw_vec4.h   |7 
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   14 -
 3 files changed, 0 insertions(+), 58 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 54ef9b6..3971532 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -45,43 +45,6 @@ vec4_instruction::is_math()
   opcode == SHADER_OPCODE_INT_REMAINDER ||
   opcode == SHADER_OPCODE_POW);
 }
-/**
- * Returns how many MRFs an opcode will write over.
- *
- * Note that this is not the 0 or 1 implied writes in an actual gen
- * instruction -- the generate_* functions generate additional MOVs
- * for setup.
- */
-int
-vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
-{
-   if (inst->mlen == 0)
-  return 0;
-
-   switch (inst->opcode) {
-   case SHADER_OPCODE_RCP:
-   case SHADER_OPCODE_RSQ:
-   case SHADER_OPCODE_SQRT:
-   case SHADER_OPCODE_EXP2:
-   case SHADER_OPCODE_LOG2:
-   case SHADER_OPCODE_SIN:
-   case SHADER_OPCODE_COS:
-  return 1;
-   case SHADER_OPCODE_POW:
-  return 2;
-   case VS_OPCODE_URB_WRITE:
-  return 1;
-   case VS_OPCODE_PULL_CONSTANT_LOAD:
-  return 2;
-   case VS_OPCODE_SCRATCH_READ:
-  return 2;
-   case VS_OPCODE_SCRATCH_WRITE:
-  return 3;
-   default:
-  assert(!"not reached");
-  return inst->mlen;
-   }
-}
 
 bool
 src_reg::equals(src_reg *r)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 93ccda9..c03bcad 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -338,8 +338,6 @@ public:
 
dst_reg *variable_storage(ir_variable *var);
 
-   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
-
src_reg src_reg_for_float(float val);
 
/**
@@ -444,8 +442,6 @@ public:
vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
 
-   int implied_mrf_writes(vec4_instruction *inst);
-
bool try_rewrite_rhs_to_dst(ir_assignment *ir,
   dst_reg dst,
   src_reg src,
@@ -456,7 +452,6 @@ public:
void visit_instructions(const exec_list *list);
 
void emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate);
-   void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, 
src_reg src1);
void emit_if_gen6(ir_if *ir);
 
void emit_block_move(dst_reg *dst, src_reg *src,
@@ -512,8 +507,6 @@ public:
bool try_emit_sat(ir_expression *ir);
void resolve_ud_negate(src_reg *reg);
 
-   bool process_move_condition(ir_rvalue *ir);
-
void generate_code();
void generate_vs_instruction(vec4_instruction *inst,
struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 853c3ee..3327eef 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1015,20 +1015,6 @@ vec4_visitor::try_emit_sat(ir_expression *ir)
 }
 
 void
-vec4_visitor::emit_bool_comparison(unsigned int op,
-dst_reg dst, src_reg src0, src_reg src1)
-{
-   /* original gen4 does destination conversion before comparison. */
-   if (intel->gen < 5)
-  dst.type = src0.type;
-
-   emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
-
-   dst.type = BRW_REGISTER_TYPE_D;
-   emit(AND(dst, src_reg(dst), src_reg(0x1)));
-}
-
-void
 vec4_visitor::visit(ir_expression *ir)
 {
unsigned int operand;
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 02/13] i965 vs: Make reg_allocate() return total_grf.

2011-11-16 Thread Paul Berry

Previously, reg_allocate() (and reg_allocate_trivial()) stored the
total number of registers allocated (including payload registers) in
vec4_visitor::total_grf.  Now, it returns this value to
vec4_visitor::run(), which stores it in total_grf.

This helps pave the way for separating the IR visiting parts of
vec4_visitor (which include run()) from the code generation parts
(which include reg_allocate()) by making the data flow explicit
between them.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |4 +-
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|2 +-
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp |   24 ++-
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index c03bcad..b942284 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -390,8 +390,8 @@ public:
int setup_attributes(int payload_reg);
int setup_uniforms(int payload_reg);
void setup_payload();
-   void reg_allocate_trivial();
-   void reg_allocate();
+   int reg_allocate_trivial();
+   int reg_allocate();
void move_grf_array_access_to_scratch();
void move_uniform_array_access_to_pull_constants();
void move_push_constants_to_pull_constants();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 54bbe13..f2a100b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -657,7 +657,7 @@ vec4_visitor::run()
   return false;
 
setup_payload();
-   reg_allocate();
+   prog_data->total_grf = reg_allocate();
 
if (failed)
   return false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 1ace91f..7848656 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -41,7 +41,7 @@ assign(int *reg_hw_locations, reg *reg)
}
 }
 
-void
+int
 vec4_visitor::reg_allocate_trivial()
 {
int hw_reg_mapping[this->virtual_grf_count];
@@ -76,7 +76,7 @@ vec4_visitor::reg_allocate_trivial()
 next += this->virtual_grf_sizes[i];
   }
}
-   prog_data->total_grf = next;
+   int total_grf = next;
 
foreach_iter(exec_list_iterator, iter, this->instructions) {
   vec4_instruction *inst = (vec4_instruction *)iter.get();
@@ -87,10 +87,12 @@ vec4_visitor::reg_allocate_trivial()
   assign(hw_reg_mapping, &inst->src[2]);
}
 
-   if (prog_data->total_grf > BRW_MAX_GRF) {
+   if (total_grf > BRW_MAX_GRF) {
   fail("Ran out of regs on trivial allocator (%d/%d)\n",
-  prog_data->total_grf, BRW_MAX_GRF);
+  total_grf, BRW_MAX_GRF);
}
+
+   return total_grf;
 }
 
 static void
@@ -139,7 +141,7 @@ brw_alloc_reg_set_for_classes(struct brw_context *brw,
ra_set_finalize(brw->vs.regs);
 }
 
-void
+int
 vec4_visitor::reg_allocate()
 {
int hw_reg_mapping[virtual_grf_count];
@@ -152,8 +154,7 @@ vec4_visitor::reg_allocate()
 * register access as a result of broken optimization passes.
 */
if (0) {
-  reg_allocate_trivial();
-  return;
+  return reg_allocate_trivial();
}
 
calculate_live_intervals();
@@ -205,20 +206,19 @@ vec4_visitor::reg_allocate()
if (!ra_allocate_no_spills(g)) {
   ralloc_free(g);
   fail("No register spilling support yet\n");
-  return;
+  return 0;
}
 
/* Get the chosen virtual registers for each node, and map virtual
 * regs in the register classes back down to real hardware reg
 * numbers.
 */
-   prog_data->total_grf = first_assigned_grf;
+   int total_grf = first_assigned_grf;
for (int i = 0; i < virtual_grf_count; i++) {
   int reg = ra_get_node_reg(g, i);
 
   hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
-  prog_data->total_grf = MAX2(prog_data->total_grf,
- hw_reg_mapping[i] + virtual_grf_sizes[i]);
+  total_grf = MAX2(total_grf, hw_reg_mapping[i] + virtual_grf_sizes[i]);
}
 
foreach_list(node, &this->instructions) {
@@ -231,6 +231,8 @@ vec4_visitor::reg_allocate()
}
 
ralloc_free(g);
+
+   return total_grf;
 }
 
 } /* namespace brw */
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 03/13] i965 vs: Return last_scratch from move_grf_array_access_to_scratch()

2011-11-16 Thread Paul Berry

Previously, vec4_visitor::move_grf_array_access_to_scratch() stored
the value of last_scratch in the brw_vs_compile structure directly.
Now it returns it to vec4_visitor::run(), which stores it.

This helps pave the way for sharing vec4 code between the GS and the
VS, by making move_grf_array_access_to_scratch() more independent of
the kind of shader it is compiling for.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |2 +-
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|2 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   13 -
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index b942284..9c4495a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -392,7 +392,7 @@ public:
void setup_payload();
int reg_allocate_trivial();
int reg_allocate();
-   void move_grf_array_access_to_scratch();
+   unsigned move_grf_array_access_to_scratch();
void move_uniform_array_access_to_pull_constants();
void move_push_constants_to_pull_constants();
void split_uniform_registers();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index f2a100b..061d487 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -638,7 +638,7 @@ vec4_visitor::run()
 * that we have reladdr computations available for CSE, since we'll
 * often do repeated subexpressions for those.
 */
-   move_grf_array_access_to_scratch();
+   c->last_scratch = move_grf_array_access_to_scratch();
move_uniform_array_access_to_pull_constants();
pack_uniform_registers();
move_push_constants_to_pull_constants();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3327eef..42214d4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2181,10 +2181,11 @@ vec4_visitor::emit_scratch_write(vec4_instruction *inst,
  * registers.  So, we send all GRF arrays that get variable index
  * access to scratch space.
  */
-void
+unsigned
 vec4_visitor::move_grf_array_access_to_scratch()
 {
int scratch_loc[this->virtual_grf_count];
+   unsigned last_scratch = 0;
 
for (int i = 0; i < this->virtual_grf_count; i++) {
   scratch_loc[i] = -1;
@@ -2199,8 +2200,8 @@ vec4_visitor::move_grf_array_access_to_scratch()
 
   if (inst->dst.file == GRF && inst->dst.reladdr &&
  scratch_loc[inst->dst.reg] == -1) {
-scratch_loc[inst->dst.reg] = c->last_scratch;
-c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
+scratch_loc[inst->dst.reg] = last_scratch;
+last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
   }
 
   for (int i = 0 ; i < 3; i++) {
@@ -2208,8 +2209,8 @@ vec4_visitor::move_grf_array_access_to_scratch()
 
 if (src->file == GRF && src->reladdr &&
 scratch_loc[src->reg] == -1) {
-   scratch_loc[src->reg] = c->last_scratch;
-   c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
+   scratch_loc[src->reg] = last_scratch;
+   last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
 }
   }
}
@@ -2252,6 +2253,8 @@ vec4_visitor::move_grf_array_access_to_scratch()
 inst->src[i].reladdr = NULL;
   }
}
+
+   return last_scratch;
 }
 
 /**
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/13] i965 vs: Make first_non_payload_grf an explicit function argument.

2011-11-16 Thread Paul Berry

Previously, vec4_visitor::setup_payload() stored the location of the
first non-payload GRF in vec4_visitor::first_non_payload_grf, where it
was later picked up by the register allocator.  Now setup_payload()
returns the location to the caller (vec4_visitor::run()), which passes
it directly to the register allocator.

This helps pave the way for separating the IR visiting parts of
vec4_visitor (which include setup_payload()) and the code generation
parts (which include the register allocator), by making the data flow
between the two parts more explicit.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |7 +++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|8 
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp |   10 +-
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 9c4495a..c546235 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -321,7 +321,6 @@ public:
int *virtual_grf_sizes;
int virtual_grf_count;
int virtual_grf_array_size;
-   int first_non_payload_grf;
int *virtual_grf_def;
int *virtual_grf_use;
dst_reg userplane[MAX_CLIP_PLANES];
@@ -389,9 +388,9 @@ public:
void setup_builtin_uniform_values(ir_variable *ir);
int setup_attributes(int payload_reg);
int setup_uniforms(int payload_reg);
-   void setup_payload();
-   int reg_allocate_trivial();
-   int reg_allocate();
+   int setup_payload();
+   int reg_allocate_trivial(int first_non_payload_grf);
+   int reg_allocate(int first_non_payload_grf);
unsigned move_grf_array_access_to_scratch();
void move_uniform_array_access_to_pull_constants();
void move_push_constants_to_pull_constants();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 061d487..3219fe4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -127,7 +127,7 @@ vec4_visitor::setup_uniforms(int reg)
return reg;
 }
 
-void
+int
 vec4_visitor::setup_payload(void)
 {
int reg = 0;
@@ -142,7 +142,7 @@ vec4_visitor::setup_payload(void)
 
reg = setup_attributes(reg);
 
-   this->first_non_payload_grf = reg;
+   return reg;
 }
 
 struct brw_reg
@@ -656,8 +656,8 @@ vec4_visitor::run()
if (failed)
   return false;
 
-   setup_payload();
-   prog_data->total_grf = reg_allocate();
+   int first_non_payload_grf = setup_payload();
+   prog_data->total_grf = reg_allocate(first_non_payload_grf);
 
if (failed)
   return false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 7848656..f6db9b4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -42,7 +42,7 @@ assign(int *reg_hw_locations, reg *reg)
 }
 
 int
-vec4_visitor::reg_allocate_trivial()
+vec4_visitor::reg_allocate_trivial(int first_non_payload_grf)
 {
int hw_reg_mapping[this->virtual_grf_count];
bool virtual_grf_used[this->virtual_grf_count];
@@ -68,7 +68,7 @@ vec4_visitor::reg_allocate_trivial()
   }
}
 
-   hw_reg_mapping[0] = this->first_non_payload_grf;
+   hw_reg_mapping[0] = first_non_payload_grf;
next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
for (i = 1; i < this->virtual_grf_count; i++) {
   if (virtual_grf_used[i]) {
@@ -142,10 +142,10 @@ brw_alloc_reg_set_for_classes(struct brw_context *brw,
 }
 
 int
-vec4_visitor::reg_allocate()
+vec4_visitor::reg_allocate(int first_non_payload_grf)
 {
int hw_reg_mapping[virtual_grf_count];
-   int first_assigned_grf = this->first_non_payload_grf;
+   int first_assigned_grf = first_non_payload_grf;
int base_reg_count = BRW_MAX_GRF - first_assigned_grf;
int class_sizes[base_reg_count];
int class_count = 0;
@@ -154,7 +154,7 @@ vec4_visitor::reg_allocate()
 * register access as a result of broken optimization passes.
 */
if (0) {
-  return reg_allocate_trivial();
+  return reg_allocate_trivial(first_non_payload_grf);
}
 
calculate_live_intervals();
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 05/13] i965 vs: Move brw_set_access_mode() call into generate_code().

2011-11-16 Thread Paul Berry

Previously, vec4_visitor::run() called brw_set_access_mode() right
before calling vec4_visitor::generate_code().  It seems clearer to put
the call inside generate_code(), since the purpose of
brw_set_access_mode() is to set up the proper state for code
generation.

This helps pave the way for separating the IR visiting parts of
vec4_visitor (which include run()) from the code generation parts
(which include generate_code()), by avoiding the need for run() to
access the brw_compile structure.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 3219fe4..81e7ab7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -662,8 +662,6 @@ vec4_visitor::run()
if (failed)
   return false;
 
-   brw_set_access_mode(p, BRW_ALIGN_16);
-
generate_code();
 
return !failed;
@@ -683,6 +681,7 @@ vec4_visitor::generate_code()
int *if_depth_in_loop =
   rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
 
+   brw_set_access_mode(p, BRW_ALIGN_16);
 
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
   printf("Native code for vertex shader %d:\n", prog->Name);
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 06/13] i965 vs: Move the call to reg_allocate() inside of generate_code().

2011-11-16 Thread Paul Berry

Previously, vec4_visitor::run() was responsible for calling
reg_allocate() right before generate_code().  However, since there is
no reason for run() to do any further manipulation of the instructions
between calling reg_allocate() and generate_code(), it's just as
simple to have generate_code() call reg_allocate().

This helps pave the way for separating the IR visiting parts of
vec4_visitor 9which include run()) from the code generation parts
(which include generate_code() and register_allocate()) by reducing
the number of function calls between the two parts.
---
 src/mesa/drivers/dri/i965/brw_vec4.h|2 +-
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |   18 ++
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index c546235..3e30514 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -506,7 +506,7 @@ public:
bool try_emit_sat(ir_expression *ir);
void resolve_ud_negate(src_reg *reg);
 
-   void generate_code();
+   int generate_code(int first_non_payload_grf);
void generate_vs_instruction(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg *src);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 81e7ab7..284cb86 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -657,18 +657,13 @@ vec4_visitor::run()
   return false;
 
int first_non_payload_grf = setup_payload();
-   prog_data->total_grf = reg_allocate(first_non_payload_grf);
-
-   if (failed)
-  return false;
-
-   generate_code();
+   prog_data->total_grf = generate_code(first_non_payload_grf);
 
return !failed;
 }
 
-void
-vec4_visitor::generate_code()
+int
+vec4_visitor::generate_code(int first_non_payload_grf)
 {
int last_native_inst = 0;
const char *last_annotation_string = NULL;
@@ -683,6 +678,11 @@ vec4_visitor::generate_code()
 
brw_set_access_mode(p, BRW_ALIGN_16);
 
+   int total_grf = reg_allocate(first_non_payload_grf);
+
+   if (failed)
+  return 0;
+
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
   printf("Native code for vertex shader %d:\n", prog->Name);
}
@@ -906,6 +906,8 @@ vec4_visitor::generate_code()
 }
   }
}
+
+   return total_grf;
 }
 
 extern "C" {
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/13] i965 vs: Streamline the way fail is tracked.

2011-11-16 Thread Paul Berry

Previously, vec4_visitor flagged a failure by setting the "failed"
boolean and storing a string in "fail_msg" (which, up until the point
of failure, was undefined).  This is redundant--it's just as easy to
have fail_msg be NULL when there is no failure, and a string pointer
when there is a failure.

This patch also adds trivial inline functions failed() and
get_fail_msg() so that this simplification doesn't make client code
any less clear.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |   13 +++--
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|   15 +++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |6 ++
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 3e30514..6fafc97 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -309,7 +309,6 @@ public:
exec_list instructions;
 
char *fail_msg;
-   bool failed;
 
/**
 * GLSL IR currently being processed, which is associated with our
@@ -337,6 +336,16 @@ public:
 
dst_reg *variable_storage(ir_variable *var);
 
+   bool failed() const
+   {
+  return fail_msg != NULL;
+   }
+
+   char *get_fail_msg() const
+   {
+  return fail_msg;
+   }
+
src_reg src_reg_for_float(float val);
 
/**
@@ -379,7 +388,7 @@ public:
 
struct hash_table *variable_ht;
 
-   bool run(void);
+   void run(void);
void fail(const char *msg, ...);
 
int virtual_grf_alloc(int size);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 284cb86..e5dc763 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -619,7 +619,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction 
*instruction,
}
 }
 
-bool
+void
 vec4_visitor::run()
 {
if (c->key.userclip_active && !c->key.uses_clip_distance)
@@ -653,13 +653,11 @@ vec4_visitor::run()
} while (progress);
 
 
-   if (failed)
-  return false;
+   if (failed())
+  return;
 
int first_non_payload_grf = setup_payload();
prog_data->total_grf = generate_code(first_non_payload_grf);
-
-   return !failed;
 }
 
 int
@@ -680,7 +678,7 @@ vec4_visitor::generate_code(int first_non_payload_grf)
 
int total_grf = reg_allocate(first_non_payload_grf);
 
-   if (failed)
+   if (failed())
   return 0;
 
if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
@@ -930,9 +928,10 @@ brw_vs_emit(struct gl_shader_program *prog, struct 
brw_vs_compile *c)
}
 
vec4_visitor v(c, prog, shader);
-   if (!v.run()) {
+   v.run();
+   if (v.failed()) {
   prog->LinkStatus = false;
-  ralloc_strcat(&prog->InfoLog, v.fail_msg);
+  ralloc_strcat(&prog->InfoLog, v.get_fail_msg());
   return false;
}
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 42214d4..338106c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2375,7 +2375,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
this->shader = shader;
 
this->mem_ctx = ralloc_context(NULL);
-   this->failed = false;
+   this->fail_msg = NULL;
 
this->base_ir = NULL;
this->current_annotation = NULL;
@@ -2418,11 +2418,9 @@ vec4_visitor::fail(const char *format, ...)
va_list va;
char *msg;
 
-   if (failed)
+   if (failed())
   return;
 
-   failed = true;
-
va_start(va, format);
msg = ralloc_vasprintf(mem_ctx, format, va);
va_end(va);
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 08/13] i965 vs: Add get_debug_flag()

2011-11-16 Thread Paul Berry

To determine whether a debug dump should be performed, vec4_visitor
checks the DEBUG_VS bit in INTEL_DEBUG.  This patch moves that check
into a single common function, get_debug_flag().

This helps pave the way for sharing code between VS and GS code
generation, by creating a function that can be made virtual.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |2 ++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|   21 -
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |2 +-
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 6fafc97..4e17d15 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -515,6 +515,8 @@ public:
bool try_emit_sat(ir_expression *ir);
void resolve_ud_negate(src_reg *reg);
 
+   bool get_debug_flag() const;
+
int generate_code(int first_non_payload_grf);
void generate_vs_instruction(vec4_instruction *inst,
struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index e5dc763..0ee6525 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -660,9 +660,20 @@ vec4_visitor::run()
prog_data->total_grf = generate_code(first_non_payload_grf);
 }
 
+/**
+ * Return a bool indicating whether debugging output should be generated
+ * for this shader.
+ */
+bool
+vec4_visitor::get_debug_flag() const
+{
+   return INTEL_DEBUG & DEBUG_VS;
+}
+
 int
 vec4_visitor::generate_code(int first_non_payload_grf)
 {
+   bool debug_flag = get_debug_flag();
int last_native_inst = 0;
const char *last_annotation_string = NULL;
ir_instruction *last_annotation_ir = NULL;
@@ -681,7 +692,7 @@ vec4_visitor::generate_code(int first_non_payload_grf)
if (failed())
   return 0;
 
-   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+   if (unlikely(debug_flag)) {
   printf("Native code for vertex shader %d:\n", prog->Name);
}
 
@@ -689,7 +700,7 @@ vec4_visitor::generate_code(int first_non_payload_grf)
   vec4_instruction *inst = (vec4_instruction *)node;
   struct brw_reg src[3], dst;
 
-  if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+  if (unlikely(debug_flag)) {
 if (last_annotation_ir != inst->ir) {
last_annotation_ir = inst->ir;
if (last_annotation_ir) {
@@ -862,7 +873,7 @@ vec4_visitor::generate_code(int first_non_payload_grf)
 break;
   }
 
-  if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+  if (unlikely(debug_flag)) {
 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
if (0) {
   printf("0x%08x 0x%08x 0x%08x 0x%08x ",
@@ -878,7 +889,7 @@ vec4_visitor::generate_code(int first_non_payload_grf)
   last_native_inst = p->nr_insn;
}
 
-   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+   if (unlikely(debug_flag)) {
   printf("\n");
}
 
@@ -893,7 +904,7 @@ vec4_visitor::generate_code(int first_non_payload_grf)
 * case you're doing that.
 */
if (0) {
-  if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+  if (unlikely(debug_flag)) {
 for (unsigned int i = 0; i < p->nr_insn; i++) {
printf("0x%08x 0x%08x 0x%08x 0x%08x ",
   ((uint32_t *)&p->store[i])[3],
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 338106c..6262093 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2428,7 +2428,7 @@ vec4_visitor::fail(const char *format, ...)
 
this->fail_msg = msg;
 
-   if (INTEL_DEBUG & DEBUG_VS) {
+   if (get_debug_flag()) {
   fprintf(stderr, "%s",  msg);
}
 }
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 09/13] i965 vs: Add get_debug_name().

2011-11-16 Thread Paul Berry

Previously, vec4_visitor contained several explicit references to
vertex shading in debug messages.  This patch moves all of those
references into a single function, get_debug_name(), which returns a
human-readable description of the shader being compiled.

This helps pave the way for sharing code between VS and GS code
generation, by creating a function that can be made virtual.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |1 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|   20 
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |3 ++-
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 4e17d15..c0284fc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -516,6 +516,7 @@ public:
void resolve_ud_negate(src_reg *reg);
 
bool get_debug_flag() const;
+   const char *get_debug_name() const;
 
int generate_code(int first_non_payload_grf);
void generate_vs_instruction(vec4_instruction *inst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 0ee6525..dceacc7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -611,10 +611,12 @@ vec4_visitor::generate_vs_instruction(vec4_instruction 
*instruction,
 
default:
   if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
-fail("unsupported opcode in `%s' in VS\n",
- brw_opcodes[inst->opcode].name);
+fail("unsupported opcode in `%s' in %s\n",
+ brw_opcodes[inst->opcode].name,
+  get_debug_name());
   } else {
-fail("Unsupported opcode %d in VS", inst->opcode);
+fail("Unsupported opcode %d in %s", inst->opcode,
+  get_debug_name());
   }
}
 }
@@ -670,6 +672,16 @@ vec4_visitor::get_debug_flag() const
return INTEL_DEBUG & DEBUG_VS;
 }
 
+/**
+ * Return a string describing the program being compiled, for debugging
+ * purposes.  Caller should not free this string.
+ */
+const char *
+vec4_visitor::get_debug_name() const
+{
+   return ralloc_asprintf(mem_ctx, "vertex shader %d", prog->Name);
+}
+
 int
 vec4_visitor::generate_code(int first_non_payload_grf)
 {
@@ -693,7 +705,7 @@ vec4_visitor::generate_code(int first_non_payload_grf)
   return 0;
 
if (unlikely(debug_flag)) {
-  printf("Native code for vertex shader %d:\n", prog->Name);
+  printf("Native code for %s:\n", get_debug_name());
}
 
foreach_list(node, &this->instructions) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 6262093..5025115 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2424,7 +2424,8 @@ vec4_visitor::fail(const char *format, ...)
va_start(va, format);
msg = ralloc_vasprintf(mem_ctx, format, va);
va_end(va);
-   msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
+   msg = ralloc_asprintf(mem_ctx, "compile failed for %s: %s\n",
+ get_debug_name(), msg);
 
this->fail_msg = msg;
 
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 10/13] i965 vs: Make a separate optimize() method.

2011-11-16 Thread Paul Berry

This patch moves vec4_visitor::run()'s optimization loop to its own
method.

This helps pave the way for separating the IR visiting parts of
vec4_visitor from the code generation parts, by reducing the number of
function calls between the two parts.
---
 src/mesa/drivers/dri/i965/brw_vec4.h|1 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |   23 ++-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index c0284fc..5d2f120 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -411,6 +411,7 @@ public:
bool opt_copy_propagation();
bool opt_algebraic();
bool opt_compute_to_mrf();
+   void optimize();
 
vec4_instruction *emit(vec4_instruction *inst);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index dceacc7..5724a29 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -622,6 +622,19 @@ vec4_visitor::generate_vs_instruction(vec4_instruction 
*instruction,
 }
 
 void
+vec4_visitor::optimize()
+{
+   bool progress;
+   do {
+  progress = false;
+  progress = dead_code_eliminate() || progress;
+  progress = opt_copy_propagation() || progress;
+  progress = opt_algebraic() || progress;
+  progress = opt_compute_to_mrf() || progress;
+   } while (progress);
+}
+
+void
 vec4_visitor::run()
 {
if (c->key.userclip_active && !c->key.uses_clip_distance)
@@ -645,15 +658,7 @@ vec4_visitor::run()
pack_uniform_registers();
move_push_constants_to_pull_constants();
 
-   bool progress;
-   do {
-  progress = false;
-  progress = dead_code_eliminate() || progress;
-  progress = opt_copy_propagation() || progress;
-  progress = opt_algebraic() || progress;
-  progress = opt_compute_to_mrf() || progress;
-   } while (progress);
-
+   optimize();
 
if (failed())
   return;
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 11/13] i965 vs: Make get_assignment_lhs() a method

2011-11-16 Thread Paul Berry

Previously, get_assignment_lhs() was a static function.  But it makes
just as much sense to make it a private member functions of
vec4_visitor, because it is one of vec4_visitor's implementation
details, just like the other functions in brw_vec4_visitor.cpp.

This will allow the implementation details of vec4_visitor to be made
private.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |1 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |   10 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5d2f120..226413b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -556,6 +556,7 @@ public:
void generate_pull_constant_load(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg index);
+   dst_reg get_assignment_lhs(ir_dereference *ir);
 };
 
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 5025115..cf71ef4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1460,8 +1460,8 @@ vec4_visitor::visit(ir_dereference_record *ir)
  * instead of potentially using a temporary like we might with the
  * ir_dereference handler.
  */
-static dst_reg
-get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
+dst_reg
+vec4_visitor::get_assignment_lhs(ir_dereference *ir)
 {
/* The LHS must be a dereference.  If the LHS is a variable indexed array
 * access of a vector, it must be separated into a series conditional moves
@@ -1476,8 +1476,8 @@ get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
/* Use the rvalue deref handler for the most part.  We'll ignore
 * swizzles in it and write swizzles using writemask, though.
 */
-   ir->accept(v);
-   return dst_reg(v->result);
+   ir->accept(this);
+   return dst_reg(this->result);
 }
 
 void
@@ -1590,7 +1590,7 @@ vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
 void
 vec4_visitor::visit(ir_assignment *ir)
 {
-   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+   dst_reg dst = get_assignment_lhs(ir->lhs);
uint32_t predicate = BRW_PREDICATE_NONE;
 
if (!ir->lhs->type->is_scalar() &&
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 12/13] i965 vs: Separate IR visiting from code generation in vec4_visitor.

2011-11-16 Thread Paul Berry

This patch separates vec4_visitor into a base class, vec4_generator,
and a derived class, vec4_visitor.  vec4_generator is responsible for
code generation, register allocation, and optimization; it is
independent of GLSL IR (with one trivial exception), and mostly
independent of the type of shader being compiled.  vec4_visitor is
responsible for visiting the GLSL IR and performing
vertex-shader-specific actions.

This helps pave the way for sharing code between VS and GS code
generation, by creating a new class for the parts of vec4_visitor that
are (or should soon be) independent of the type of shader being
compiled.  It is also important that the base class be as independent
of GLSL IR as possible, since our first use of the GS will be to
create a GS thread that performs transform feedback on Gen6, and this
thread will not be related to any GLSL IR.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp |   10 +-
 src/mesa/drivers/dri/i965/brw_vec4.h   |  199 +++-
 .../drivers/dri/i965/brw_vec4_copy_propagation.cpp |2 +-
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp|   80 -
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp |4 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  140 +++---
 6 files changed, 228 insertions(+), 207 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 3971532..8632b9f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -63,7 +63,7 @@ src_reg::equals(src_reg *r)
 }
 
 void
-vec4_visitor::calculate_live_intervals()
+vec4_generator::calculate_live_intervals()
 {
int *def = ralloc_array(mem_ctx, int, virtual_grf_count);
int *use = ralloc_array(mem_ctx, int, virtual_grf_count);
@@ -139,7 +139,7 @@ vec4_visitor::calculate_live_intervals()
 }
 
 bool
-vec4_visitor::virtual_grf_interferes(int a, int b)
+vec4_generator::virtual_grf_interferes(int a, int b)
 {
int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
@@ -165,7 +165,7 @@ vec4_visitor::virtual_grf_interferes(int a, int b)
  * interfere with other regs.
  */
 bool
-vec4_visitor::dead_code_eliminate()
+vec4_generator::dead_code_eliminate()
 {
bool progress = false;
int pc = 0;
@@ -347,7 +347,7 @@ src_reg::is_one() const
  * instructions involving 0.
  */
 bool
-vec4_visitor::opt_algebraic()
+vec4_generator::opt_algebraic()
 {
bool progress = false;
 
@@ -491,7 +491,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
  * the GRF write directly to the MRF instead.
  */
 bool
-vec4_visitor::opt_compute_to_mrf()
+vec4_generator::opt_compute_to_mrf()
 {
bool progress = false;
int next_ip = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 226413b..6eb6e5d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -164,7 +164,7 @@ public:
bool is_zero() const;
bool is_one() const;
 
-   src_reg(class vec4_visitor *v, const struct glsl_type *type);
+   src_reg(class vec4_generator *v, const struct glsl_type *type);
 
explicit src_reg(dst_reg reg);
 
@@ -218,7 +218,7 @@ public:
   this->fixed_hw_reg = reg;
}
 
-   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
+   dst_reg(class vec4_generator *v, const struct glsl_type *type);
 
explicit dst_reg(src_reg reg);
 
@@ -241,7 +241,7 @@ public:
   return node;
}
 
-   vec4_instruction(vec4_visitor *v, enum opcode opcode,
+   vec4_instruction(vec4_generator *v, enum opcode opcode,
dst_reg dst = dst_reg(),
src_reg src0 = src_reg(),
src_reg src1 = src_reg(),
@@ -279,12 +279,11 @@ public:
bool is_math();
 };
 
-class vec4_visitor : public ir_visitor
+class vec4_generator
 {
 public:
-   vec4_visitor(struct brw_vs_compile *c,
-   struct gl_shader_program *prog, struct brw_shader *shader);
-   ~vec4_visitor();
+   vec4_generator(struct brw_compile *p);
+   ~vec4_generator();
 
dst_reg dst_null_f()
{
@@ -297,14 +296,9 @@ public:
}
 
struct brw_context *brw;
-   const struct gl_vertex_program *vp;
struct intel_context *intel;
struct gl_context *ctx;
-   struct brw_vs_compile *c;
-   struct brw_vs_prog_data *prog_data;
struct brw_compile *p;
-   struct brw_shader *shader;
-   struct gl_shader_program *prog;
void *mem_ctx;
exec_list instructions;
 
@@ -322,7 +316,6 @@ public:
int virtual_grf_array_size;
int *virtual_grf_def;
int *virtual_grf_use;
-   dst_reg userplane[MAX_CLIP_PLANES];
 
/**
 * This is the size to be used for an array with an element per
@@ -334,8 +327,6 @@ public:
 
bool live_intervals_valid;
 
-   dst_reg *variable_storage(ir_variable *var);
-
bool failed() const
{
   return fail_msg != NULL;
@@ -348,63 +339

[Mesa-dev] [PATCH 13/13] i965 vs: Rearrange class declarations.

2011-11-16 Thread Paul Berry

This patch rearranges the class declarations for vec4_generator and
vec4_visitor, and adds "private" and "protected" modifiers, to clarify
how the code is organized and how the two classes relate to each
other.  There is no functional change.
---
 src/mesa/drivers/dri/i965/brw_vec4.h |  310 +-
 1 files changed, 189 insertions(+), 121 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 6eb6e5d..3118c97 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -285,48 +285,6 @@ public:
vec4_generator(struct brw_compile *p);
~vec4_generator();
 
-   dst_reg dst_null_f()
-   {
-  return dst_reg(brw_null_reg());
-   }
-
-   dst_reg dst_null_d()
-   {
-  return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   }
-
-   struct brw_context *brw;
-   struct intel_context *intel;
-   struct gl_context *ctx;
-   struct brw_compile *p;
-   void *mem_ctx;
-   exec_list instructions;
-
-   char *fail_msg;
-
-   /**
-* GLSL IR currently being processed, which is associated with our
-* driver IR instructions for debugging purposes.
-*/
-   ir_instruction *base_ir;
-   const char *current_annotation;
-
-   int *virtual_grf_sizes;
-   int virtual_grf_count;
-   int virtual_grf_array_size;
-   int *virtual_grf_def;
-   int *virtual_grf_use;
-
-   /**
-* This is the size to be used for an array with an element per
-* reg_offset
-*/
-   int virtual_grf_reg_count;
-   /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */
-   int *virtual_grf_reg_map;
-
-   bool live_intervals_valid;
-
bool failed() const
{
   return fail_msg != NULL;
@@ -337,36 +295,26 @@ public:
   return fail_msg;
}
 
-   src_reg src_reg_for_float(float val);
-
-   void fail(const char *msg, ...);
-
-   int virtual_grf_alloc(int size);
-   int reg_allocate_trivial(int first_non_payload_grf);
-   int reg_allocate(int first_non_payload_grf);
-   unsigned move_grf_array_access_to_scratch();
-   void calculate_live_intervals();
-   bool dead_code_eliminate();
-   bool virtual_grf_interferes(int a, int b);
-   bool opt_copy_propagation();
-   bool opt_algebraic();
-   bool opt_compute_to_mrf();
-   void optimize();
-
-   vec4_instruction *emit(vec4_instruction *inst);
-
-   vec4_instruction *emit(enum opcode opcode);
-
-   vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
-
-   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
- src_reg src0, src_reg src1);
+protected:
+   /**
+* Return a bool indicating whether debugging output should be generated
+* for this shader.
+*/
+   virtual bool get_debug_flag() const = 0;
 
-   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
- src_reg src0, src_reg src1, src_reg src2);
+   /**
+* Return a string describing the program being compiled, for debugging
+* purposes.  Caller should not free this string.
+*/
+   virtual const char *get_debug_name() const = 0;
 
-   vec4_instruction *emit_before(vec4_instruction *inst,
-vec4_instruction *new_inst);
+   /**
+* \name Instruction constructors
+*
+* These functions create new vec4_instruction objects.
+*
+* @{
+*/
 
vec4_instruction *MOV(dst_reg dst, src_reg src0);
vec4_instruction *NOT(dst_reg dst, src_reg src0);
@@ -387,15 +335,32 @@ public:
 uint32_t condition);
vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition);
vec4_instruction *IF(uint32_t predicate);
-   vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index);
-   vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
-   vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
 
-   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
-  dst_reg dst,
-  src_reg src,
-  vec4_instruction *pre_rhs_inst,
-  vec4_instruction *last_rhs_inst);
+   /** @} */
+
+   /**
+* \name Emitter functions
+*
+* These functions place new vec4_instruction objects in the generator's
+* instruction list.
+*
+* @{
+*/
+
+   vec4_instruction *emit(vec4_instruction *inst);
+
+   vec4_instruction *emit(enum opcode opcode);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1, src_reg src2);
+
+   vec4_instruction *emit_before(vec4_instruction *inst,
+vec4_instruction *new_inst);
 
/**
 * Emit the correct dot-product instruction for the type of arguments
@@ -411,17 +376,9

[Mesa-dev] [PATCH 1/3] mesa: Use uniform interfaces in fixed-function fragment shader code

2011-11-16 Thread Ian Romanick

From: Ian Romanick 

Poking directly at the backing resources works only by luck.  Core
Mesa code should only know about the gl_uniform_storage structure.
Soon other code that looks at samplers will use the gl_uniform_storage
structures instead of the data in the gl_program.

Signed-off-by: Ian Romanick 
Cc: Eric Anholt 
---
 src/mesa/main/ff_fragment_shader.cpp |   29 -
 1 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 2ce81fe..1806adc 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -42,6 +42,7 @@ extern "C" {
 #include "program/programopt.h"
 #include "texenvprogram.h"
 }
+#include "main/uniforms.h"
 #include "../glsl/glsl_types.h"
 #include "../glsl/ir.h"
 #include "../glsl/glsl_symbol_table.h"
@@ -1488,22 +1489,40 @@ create_new_program(struct gl_context *ctx, struct 
state_key *key)
/* Set the sampler uniforms, and relink to get them into the linked
 * program.
 */
-   struct gl_program *fp;
-   fp = p.shader_program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
+   struct gl_shader *const fs =
+  p.shader_program->_LinkedShaders[MESA_SHADER_FRAGMENT];
+   struct gl_program *const fp = fs->Program;
+
+   _mesa_generate_parameters_list_for_uniforms(p.shader_program, fs,
+  fp->Parameters);
+
+   _mesa_associate_uniform_storage(ctx, p.shader_program, fp->Parameters);
 
for (unsigned int i = 0; i < MAX_TEXTURE_UNITS; i++) {
   char *name = ralloc_asprintf(p.mem_ctx, "sampler_%d", i);
   int loc = _mesa_get_uniform_location(ctx, p.shader_program, name);
   if (loc != -1) {
+unsigned base;
+unsigned idx;
+
 /* Avoid using _mesa_uniform() because it flags state
  * updates, so if we're generating this shader_program in a
  * state update, we end up recursing.  Instead, just set the
  * value, which is picked up at re-link.
  */
-loc = (loc & 0x) + (loc >> 16);
-int sampler = fp->Parameters->ParameterValues[loc][0].f;
+_mesa_uniform_split_location_offset(loc, &base, &idx);
+assert(idx == 0);
+
+struct gl_uniform_storage *const storage =
+   &p.shader_program->UniformStorage[base];
 
-fp->SamplerUnits[sampler] = i;
+/* Update the storage, the SamplerUnits in the shader program, and
+ * the SamplerUnits in the assembly shader.
+ */
+storage->storage[idx].i = i;
+fp->SamplerUnits[storage->sampler] = i;
+p.shader_program->SamplerUnits[storage->sampler] = i;
+_mesa_propagate_uniforms_to_driver_storage(storage, 0, 1);
   }
}
_mesa_update_shader_textures_used(fp);
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] mesa: Use static buffer for uniform name

2011-11-16 Thread Ian Romanick

From: Ian Romanick 

Signed-off-by: Ian Romanick 
Cc: Eric Anholt 
---
 src/mesa/main/ff_fragment_shader.cpp |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 1806adc..4808855 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -1499,7 +1499,12 @@ create_new_program(struct gl_context *ctx, struct 
state_key *key)
_mesa_associate_uniform_storage(ctx, p.shader_program, fp->Parameters);
 
for (unsigned int i = 0; i < MAX_TEXTURE_UNITS; i++) {
-  char *name = ralloc_asprintf(p.mem_ctx, "sampler_%d", i);
+  /* Enough space for 'sampler_999\0'.
+   */
+  char name[12];
+
+  snprintf(name, sizeof(name), "sampler_%d", i);
+
   int loc = _mesa_get_uniform_location(ctx, p.shader_program, name);
   if (loc != -1) {
 unsigned base;
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] mesa: Track fixed-function fragment shader as a shader

2011-11-16 Thread Ian Romanick

From: Ian Romanick 

Previously the fixed-function fragment shader was tracked as a
gl_program.  This means that it shows up in the driver as a Mesa IR
program instead of as a GLSL IR program.  If a driver doesn't generate
Mesa IR from the GLSL IR, that program is empty.  If the program is
empty there is either no rendering or a GPU hang.

Signed-off-by: Ian Romanick 
Cc: Eric Anholt 
---
 src/mesa/drivers/dri/i965/brw_wm.c|2 +-
 src/mesa/drivers/dri/i965/brw_wm_state.c  |4 ++--
 src/mesa/drivers/dri/i965/gen6_wm_state.c |4 +++-
 src/mesa/drivers/dri/i965/gen7_wm_state.c |4 +++-
 src/mesa/main/mtypes.h|2 ++
 src/mesa/main/shaderapi.c |   29 +
 src/mesa/main/state.c |   10 ++
 7 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index 377b8ba..4eccea3 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -520,7 +520,7 @@ brw_upload_wm_prog(struct brw_context *brw)
if (!brw_search_cache(&brw->cache, BRW_WM_PROG,
 &key, sizeof(key),
 &brw->wm.prog_offset, &brw->wm.prog_data)) {
-  bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp,
+  bool success = do_wm_prog(brw, ctx->Shader._CurrentFragmentProgram, fp,
&key);
   (void) success;
   assert(success);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_state.c
index e1791c2..dd67795 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -169,9 +169,9 @@ brw_upload_wm_unit(struct brw_context *brw)
 * If using the fragment shader backend, the program is always
 * 8-wide.  If not, it's always 16.
 */
-   if (ctx->Shader.CurrentFragmentProgram) {
+   if (ctx->Shader._CurrentFragmentProgram) {
   struct brw_shader *shader = (struct brw_shader *)
-
ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
+
ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
 
   if (shader != NULL && shader->ir != NULL) {
 wm->wm5.enable_8_pix = 1;
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c 
b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 271a9ae..5312f21 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -135,7 +135,9 @@ upload_wm_state(struct brw_context *brw)
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
 
/* Use ALT floating point mode for ARB fragment programs, because they
-* require 0^0 == 1.
+* require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
+* rendering, CurrentFragmentProgram is used for this check to
+* differentiate between the GLSL and non-GLSL cases.
 */
if (ctx->Shader.CurrentFragmentProgram == NULL)
   dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index f38d2f1..3d34c51 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -147,7 +147,9 @@ upload_ps_state(struct brw_context *brw)
dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
 
/* Use ALT floating point mode for ARB fragment programs, because they
-* require 0^0 == 1.
+* require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
+* rendering, CurrentFragmentProgram is used for this check to
+* differentiate between the GLSL and non-GLSL cases.
 */
if (intel->ctx.Shader.CurrentFragmentProgram == NULL)
   dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 285ec07..3abc2b3 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2301,6 +2301,8 @@ struct gl_shader_state
struct gl_shader_program *CurrentGeometryProgram;
struct gl_shader_program *CurrentFragmentProgram;
 
+   struct gl_shader_program *_CurrentFragmentProgram;
+
/**
 * Program used by glUniform calls.
 *
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index c4d01ab..85e3406 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -124,6 +124,8 @@ _mesa_free_shader_state(struct gl_context *ctx)
  NULL);
_mesa_reference_shader_program(ctx, &ctx->Shader.CurrentFragmentProgram,
  NULL);
+   _mesa_reference_shader_program(ctx, &ctx->Shader._CurrentFragmentProgram,
+ NULL);
_mesa_reference_shader_program(ctx, &ctx->Shader.ActiveProgram, NULL);
 }
 
@@ -876,6 +878,33 @@ use_shader_program(struct gl_context *ctx, GLenum type,
 
if (*target != shProg) {
   FLUSH_VERTICES(ctx, _NEW_P

[Mesa-dev] [PATCH] glsl_to_tgsi: Invalidate and revalidate uniform backing storage

2011-11-16 Thread Ian Romanick

From: Ian Romanick 

If glUniform1i and friends are going to dump data directly in
driver-allocated, the pointers have to be updated when the storage
moves.  This should fix the regressions seen with commit 7199096.

I'm not sure if this is the only place that needs this treatment.  I'm
a little uncertain about the various functions in st_glsl_to_tgsi that
modify the TGSI IR and try to propagate changes about that up to the
gl_program.  That seems sketchy to me.

Signed-off-by: Ian Romanick 
Cc: Vadim Girlin 
Cc: Bryan Cain 
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   14 ++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 0bf6766..c8385bb 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4571,6 +4571,13 @@ st_translate_program(
t->pointSizeOutIndex = -1;
t->prevInstWrotePointSize = GL_FALSE;
 
+   for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
+  struct gl_uniform_storage *const storage =
+&program->shader_program->UniformStorage[i];
+
+  _mesa_uniform_detach_all_driver_storage(storage);
+   }
+
/*
 * Declare input attributes.
 */
@@ -4797,6 +4804,13 @@ st_translate_program(
t->insn[t->labels[i].branch_target]);
}
 
+   /* This has to be done last.  Any operation the can cause
+* prog->ParameterValues to get reallocated (e.g., anything that adds a
+* program constant) has to happen before creating this linkage.
+*/
+   _mesa_associate_uniform_storage(ctx, program->shader_program,
+  proginfo->Parameters);
+
 out:
if (t) {
   FREE(t->insn);
-- 
1.7.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/13] i965 vs: Split vec4_visitor into two classes.

2011-11-16 Thread Paul Berry

On 16 November 2011 11:07, Paul Berry  wrote:

> In order to implement transform feedback on Gen6, we need to create a
> geometry shader program that will stream out the transformed vertices.
> It makes sense to re-use parts of the vec4_visitor infrastructure to
> do this, because (a) VS and GS handle data very similarly, (b) it
> allows us to take advantage of the optimization and register
> allocation code in vec4_visitor, and (c) in the long run when we add
> support for GLSL geometry shaders, we are going to need to use the
> vec4_visitor infrastructure to compile them.
>
> This patch series gets us ready for re-using parts of vec4_visitor, by
> breaking it into a base class containing the parts we are likely to
> re-use, and a derived class containing the parts we aren't.  The base
> class, vec4_generator, is responsible for code generation, register
> allocation, and optimization; it is independent of GLSL IR (with one
> trivial exception), and mostly independent of the type of shader being
> compiled.  The derived class, vec4_visitor, is responsible for
> visiting the GLSL IR and performing vertex-shader-specific actions.
>
> This split was already implicitly present in the code; this patch
> series merely makes it more explicit.
>
> The idea is that for now, we can make a class that derives from
> vec4_generator to do transform feedback, and we won't have to worry
> too much about interactions with the GLSL IR visiting code in
> vec4_visitor.  Later, when we add support for GLSL geometry shaders,
> we'll have to do further work on vec4_visitor, but we won't have to
> touch vec4_generator very much.
>
> Patches 01-11 are all preparatory refactoring: they make small
> adjustments to the functioning of vec4_visitor so that the separation
> between its IR visiting parts and its code generation parts is a
> little more strict.  Patch 12 splits the classes up, and then patch 13
> reformats the class declarations to clarify the relationship between
> the two classes.
>
> The split isn't perfect yet; there are a few places where
> vec4_generator still makes implicit assumptions that are vertex-shader
> specific (particularly in register allocation), and there are a few
> places where vec4_visitor still does things that arguably should be
> part of code generation (such as setting up scratch registers).  But I
> think this is close enough that we can clean those up with later
> patches on an as-needed basis.
>
> Note: To ease reading the diffs, I didn't make any effort to move code
> between .cpp files.  As a result, vec4_visitor and vec4_generator code
> are intermingled in brw_vec4_visitor.cpp, brw_vec4_emit.cpp,
> brw_vec4.cpp, brw_vec4_copy_propagation.cpp, and
> brw_vec4_reg_allocate.cpp.  Once this patch is reviewed and we've
> settled on how we want the classes to be split up, I'll make a
> follow-up patch that rearranges the .cpp files to match the changes to
> the class structure.
>
> [PATCH 01/13] i965 vs: Remove dead code from the vec4_visitor class.
> [PATCH 02/13] i965 vs: Make reg_allocate() return total_grf.
> [PATCH 03/13] i965 vs: Return last_scratch from
> move_grf_array_access_to_scratch()
> [PATCH 04/13] i965 vs: Make first_non_payload_grf an explicit function
> arguments.
> [PATCH 05/13] i965 vs: Move brw_set_access_mode() call into
> generate_code().
> [PATCH 06/13] i965 vs: Move the call to reg_allocate() inside of
> generate_code().
> [PATCH 07/13] i965 vs: Streamline the way fail is tracked.
> [PATCH 08/13] i965 vs: Add get_debug_flag()
> [PATCH 09/13] i965 vs: Add get_debug_name().
> [PATCH 10/13] i965 vs: Make a separate optimize() method.
> [PATCH 11/13] i965 vs: Make get_assignment_lhs() a method
> [PATCH 12/13] i965 vs: Separate IR visiting from code generation in
> vec4_visitor;
> [PATCH 13/13] i965 vs: Rearrange class declarations.
>

I should also mention: if you'd like to look at these patches in context,
you can pull them from my github repo (git://
github.com/stereotype441/mesa.git).  The branch is called "vec4-split".

If you'd like to see what the final vec4_generator and vec4_visitor classes
look like, you can view them here:
https://github.com/stereotype441/mesa/blob/vec4-split/src/mesa/drivers/dri/i965/brw_vec4.h
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa-demos: Add blinking-teapot demo.

2011-11-16 Thread vlj

   blinking-teapot is an UBO demo.
---
 src/glsl/CMakeLists.txt   |1 +
 src/glsl/Makefile.am  |2 +
 src/glsl/blinking-teapot.c|  177 +
 src/glsl/blinking-teapot.frag |   31 +++
 src/glsl/blinking-teapot.vert |   16 
 5 files changed, 227 insertions(+), 0 deletions(-)
 create mode 100644 src/glsl/blinking-teapot.c
 create mode 100644 src/glsl/blinking-teapot.frag
 create mode 100644 src/glsl/blinking-teapot.vert

diff --git a/src/glsl/CMakeLists.txt b/src/glsl/CMakeLists.txt
index 11f8e37..aeaf526 100644
--- a/src/glsl/CMakeLists.txt
+++ b/src/glsl/CMakeLists.txt
@@ -24,6 +24,7 @@ set (targets
bitmap
brick
bump
+   blinking-teapot
convolutions
deriv
fragcoord
diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index 5a9a802..d9b6f9b 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -37,6 +37,7 @@ bin_PROGRAMS = \
bitmap \
brick \
bump \
+   blinking-teapot \
convolutions \
deriv \
fragcoord \
@@ -77,6 +78,7 @@ bitmap_LDADD = ../util/libutil.la
 bezier_LDADD = ../util/libutil.la
 brick_LDADD = ../util/libutil.la
 bump_LDADD = ../util/libutil.la
+blinking_teapot_LDADD = ../util/libutil.la
 convolutions_LDADD = ../util/libutil.la
 deriv_LDADD = ../util/libutil.la
 geom_sprites_LDADD = ../util/libutil.la
diff --git a/src/glsl/blinking-teapot.c b/src/glsl/blinking-teapot.c
new file mode 100644
index 000..53b385b
--- /dev/null
+++ b/src/glsl/blinking-teapot.c
@@ -0,0 +1,177 @@
+/**
+ * blinking-teapot demo. It displays a teapot whose color go from blue to pink.
+ * The color variation is handled by uniform buffer object.
+ * Sources mostly from http://www.jotschi.de/?p=427 which uses UBO SPEC example
+ *
+ * Vincent Lejeune 2011
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include "glut_wrap.h"
+#include "shaderutil.h"
+
+
+static int mouse_old_x, mouse_old_y;
+static int mouse_buttons = 0;
+static float rotate_x = 0.0, rotate_y = 0.0;
+static float translate_z = -2.0;
+
+static float delta = 0.01;
+static GLfloat wf, hf;
+
+
+static const GLchar *names[] = { "SurfaceColor", "WarmColor", "CoolColor",
+  "DiffuseWarm", "DiffuseCool"
+};
+
+static GLuint buffer_id, uniformBlockIndex, uindex, vshad_id, fshad_id,
+  prog_id;
+
+static GLsizei uniformBlockSize;
+static GLint singleSize;
+static GLint offset;
+
+static const GLfloat colors[] =
+  { 0.45, 0.45, 1, 1, 0.45, 0.45, 1, 1, 0.75, 0.75, 0.75, 1,
+  0.0, 0.0, 1.0, 1, 0.0, 1.0, 0.0, 1,
+};
+
+static GLfloat teapot_color = 1;
+
+static void
+reshape (int w, int h)
+{
+  wf = (GLfloat) w;
+  hf = (GLfloat) h;
+  glMatrixMode (GL_PROJECTION);
+  glLoadIdentity ();
+  gluPerspective (60.0, wf / hf, 0.1, 100.0);
+}
+
+static void
+init_opengl (void)
+{
+
+  if (!ShadersSupported ())
+exit (1);
+
+  vshad_id = CompileShaderFile (GL_VERTEX_SHADER, "blinking-teapot.vert");
+  fshad_id = CompileShaderFile (GL_FRAGMENT_SHADER, "blinking-teapot.frag");
+  prog_id = LinkShaders (vshad_id, fshad_id);
+
+  UseProgram (prog_id);
+
+  reshape (680, 400);
+
+  glGenBuffers (1, &buffer_id);
+
+  glBindBuffer (GL_UNIFORM_BUFFER, buffer_id);
+  glBufferData (GL_UNIFORM_BUFFER, uniformBlockSize, NULL, GL_DYNAMIC_DRAW);
+
+  glBindBufferBase (GL_UNIFORM_BUFFER, 0, buffer_id);
+  glUniformBlockBinding (prog_id, uniformBlockIndex, 0);
+
+  glGetUniformIndices (prog_id, 1, &names[2], &uindex);
+
+  glGetActiveUniformsiv (prog_id, 1, &uindex, GL_UNIFORM_OFFSET, &offset);
+  glGetActiveUniformsiv (prog_id, 1, &uindex, GL_UNIFORM_SIZE, &singleSize);
+
+  glViewport (0, 0, 680, 400);
+  glBufferData (GL_UNIFORM_BUFFER, 80, colors, GL_DYNAMIC_DRAW);
+}
+
+static void
+render (void)
+{
+  glClearColor (0.0, 0.0, 0.0, 0.0);
+  glClear (GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT);
+  glUseProgram (prog_id);
+  glEnable (GL_DEPTH_TEST);
+
+  glMatrixMode (GL_MODELVIEW);
+  glLoadIdentity ();
+  glTranslatef (0.0, 0.0, translate_z);
+  glRotatef (rotate_x, 1.0, 0.0, 0.0);
+  glRotatef (rotate_y, 0.0, 1.0, 0.0);
+  glColor3f (1.0, 1.0, 1.0);
+
+  glBindBuffer (GL_UNIFORM_BUFFER, buffer_id);
+  glBufferSubData (GL_UNIFORM_BUFFER, offset, 4 * singleSize, &teapot_color);
+
+  glFrontFace (GL_CW);
+  glutSolidTeapot (0.6);
+  glFrontFace (GL_CCW);
+  glutSwapBuffers ();
+  glutPostRedisplay ();
+
+  teapot_color += delta;
+
+  if (teapot_color > 1.0)
+{
+  delta = -0.01;
+}
+
+  if (teapot_color < 0.0)
+{
+  delta = +0.01;
+}
+
+}
+
+static void
+mouse (int button, int state, int x, int y)
+{
+  if (state == GLUT_DOWN)
+{
+  mouse_buttons |= 1 << button;
+}
+  else if (state == GLUT_UP)
+{
+  mouse_buttons = 0;
+}
+
+  mouse_old_x = x;
+  mouse_old_y = y;
+  glutPostRedisplay ();
+}
+
+static void
+motion (int x, int y)
+{
+  float dx, dy;
+  dx = x - mouse_old_x;
+  dy = y - mouse_old_y;
+
+  if (mouse_buttons & 1)
+{
+  rotate_x += dy * 0

Re: [Mesa-dev] [PATCH] glsl_to_tgsi: Invalidate and revalidate uniform backing storage

2011-11-16 Thread Vadim Girlin

This patch fixes memory corruption, thanks.

Also there is another regression with Ligthsmark - incorrect rendering.
I've bisected it and came to the same commit again. Most noticeable
difference is in the very last scene - it should fade away, but it
doesn't. I can make the screenshots of good and bad rendering if you
need.

Vadim

On Wed, 2011-11-16 at 11:13 -0800, Ian Romanick wrote:
> From: Ian Romanick 
> 
> If glUniform1i and friends are going to dump data directly in
> driver-allocated, the pointers have to be updated when the storage
> moves.  This should fix the regressions seen with commit 7199096.
> 
> I'm not sure if this is the only place that needs this treatment.  I'm
> a little uncertain about the various functions in st_glsl_to_tgsi that
> modify the TGSI IR and try to propagate changes about that up to the
> gl_program.  That seems sketchy to me.
> 
> Signed-off-by: Ian Romanick 
> Cc: Vadim Girlin 
> Cc: Bryan Cain 
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   14 ++
>  1 files changed, 14 insertions(+), 0 deletions(-)
> 
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 0bf6766..c8385bb 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -4571,6 +4571,13 @@ st_translate_program(
> t->pointSizeOutIndex = -1;
> t->prevInstWrotePointSize = GL_FALSE;
>  
> +   for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
> +  struct gl_uniform_storage *const storage =
> +  &program->shader_program->UniformStorage[i];
> +
> +  _mesa_uniform_detach_all_driver_storage(storage);
> +   }
> +
> /*
>  * Declare input attributes.
>  */
> @@ -4797,6 +4804,13 @@ st_translate_program(
> t->insn[t->labels[i].branch_target]);
> }
>  
> +   /* This has to be done last.  Any operation the can cause
> +* prog->ParameterValues to get reallocated (e.g., anything that adds a
> +* program constant) has to happen before creating this linkage.
> +*/
> +   _mesa_associate_uniform_storage(ctx, program->shader_program,
> +proginfo->Parameters);
> +
>  out:
> if (t) {
>FREE(t->insn);





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default

2011-11-16 Thread Theiss, Ingo


> 
> See the second patch I posted (mesa: initialize stencilMap, Stride if 
> stencilRb==depthRb)
> 
> -Brian
> 
> 

Hi Brian,

the second patch did it. No more segfaults.

Great. Thanks!

Regards,

Ingo


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl_to_tgsi: Invalidate and revalidate uniform backing storage

2011-11-16 Thread Ian Romanick


On 11/16/2011 12:38 PM, Vadim Girlin wrote:

This patch fixes memory corruption, thanks.

Also there is another regression with Ligthsmark - incorrect rendering.
I've bisected it and came to the same commit again. Most noticeable
difference is in the very last scene - it should fade away, but it
doesn't. I can make the screenshots of good and bad rendering if you
need.


I don't think screenshots will help.  If there are errors that bisect to 
that commit, it likely means that some uniform isn't getting set to the 
right value.  I'll poke around a bit more.



Vadim

On Wed, 2011-11-16 at 11:13 -0800, Ian Romanick wrote:

From: Ian Romanick

If glUniform1i and friends are going to dump data directly in
driver-allocated, the pointers have to be updated when the storage
moves.  This should fix the regressions seen with commit 7199096.

I'm not sure if this is the only place that needs this treatment.  I'm
a little uncertain about the various functions in st_glsl_to_tgsi that
modify the TGSI IR and try to propagate changes about that up to the
gl_program.  That seems sketchy to me.

Signed-off-by: Ian Romanick
Cc: Vadim Girlin
Cc: Bryan Cain
---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   14 ++
  1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 0bf6766..c8385bb 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4571,6 +4571,13 @@ st_translate_program(
 t->pointSizeOutIndex = -1;
 t->prevInstWrotePointSize = GL_FALSE;

+   for (i = 0; i<  program->shader_program->NumUserUniformStorage; i++) {
+  struct gl_uniform_storage *const storage =
+   &program->shader_program->UniformStorage[i];
+
+  _mesa_uniform_detach_all_driver_storage(storage);
+   }
+
 /*
  * Declare input attributes.
  */
@@ -4797,6 +4804,13 @@ st_translate_program(
 t->insn[t->labels[i].branch_target]);
 }

+   /* This has to be done last.  Any operation the can cause
+* prog->ParameterValues to get reallocated (e.g., anything that adds a
+* program constant) has to happen before creating this linkage.
+*/
+   _mesa_associate_uniform_storage(ctx, program->shader_program,
+  proginfo->Parameters);
+
  out:
 if (t) {
FREE(t->insn);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] ir_dereference_variable @ 0x1195e90 specifies undeclared variable

2011-11-16 Thread Theiss, Ingo

Hi there,

running piglit test 'shader_runner 
tests/spec/glsl-1.10/execution/samplers/in-parameter-struct.shader_test -auto 
-fbo' the program terminated with signal 6, aborted.

glxinfo:

OpenGL renderer string: Gallium 0.4 on AMD BARTS
OpenGL version string: 2.1 Mesa 7.12-devel (git-010dc29)
OpenGL shading language version string: 1.20


Here is the backtrace:

Core was generated by `bin/shader_runner 
tests/spec/glsl-1.10/execution/samplers/in-parameter-struct.s'.
Program terminated with signal 6, Aborted.
#0  0x7fd947363405 in *__GI_raise (sig=) at 
../nptl/sysdeps/unix/sysv/linux/raise.c:64
64  ../nptl/sysdeps/unix/sysv/linux/raise.c: Datei oder Verzeichnis nicht 
gefunden.
in ../nptl/sysdeps/unix/sysv/linux/raise.c
(gdb) bt
#0  0x7fd947363405 in *__GI_raise (sig=) at 
../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1  0x7fd947366680 in *__GI_abort () at abort.c:92
#2  0x7fd9449f1c5c in ir_validate::visit(ir_dereference_variable*) () from 
/opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#3  0x7fd9449ebba5 in 
ir_dereference_record::accept(ir_hierarchical_visitor*) () from 
/opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#4  0x7fd9449eb8ee in ir_texture::accept(ir_hierarchical_visitor*) () from 
/opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#5  0x7fd9449eb858 in ir_expression::accept(ir_hierarchical_visitor*) () 
from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#6  0x7fd9449ebc5e in ir_assignment::accept(ir_hierarchical_visitor*) () 
from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#7  0x7fd9449eb700 in 
ir_function_signature::accept(ir_hierarchical_visitor*) () from 
/opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#8  0x7fd9449eb7b4 in ir_function::accept(ir_hierarchical_visitor*) () from 
/opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#9  0x7fd9449eb4e0 in visit_list_elements(ir_hierarchical_visitor*, 
exec_list*, bool) ()
   from /opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#10 0x7fd9449f1eda in validate_ir_tree(exec_list*) () from 
/opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#11 0x7fd9449c07e2 in st_link_shader () from 
/opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#12 0x7fd9449a633a in _mesa_glsl_link_shader () from 
/opt/mesa/lib/x86_64-linux-gnu/dri/r600_dri.so
#13 0x00434a83 in link_and_use_shaders ()
#14 0x00437853 in piglit_init ()
#15 0x004386ec in main ()


Regards,

Ingo
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/6] intel: Detile stencil buffer only if necessary

2011-11-16 Thread Chad Versace

-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 11/15/2011 06:16 PM, Eric Anholt wrote:
> On Sun, 13 Nov 2011 22:32:15 -0800, Chad Versace 
>  wrote:
>> In intel_map_renderbuffer_s8(), detile and copy the stencil buffer into
>> the temporary buffer only if the renderbuffer is mapped in read mode. If
>> the caller never going to read the bits but just clobber them, then it's
>> wasted effort to detile the stencil buffer.
>>
>> CC: Eric Anholt 
>> Signed-off-by: Chad Versace 
> 
> I don't think this is correct.
> 
> Imagine something that isn't writing all of the pixels, but definitely
> isn't reading data before writing.  It might sensibly ask for
> GL_WRITE_BIT but not GL_READ_BIT, and then all the pixels it doesn't
> write get trashed when the whole rectangle is written back out.
> 
> If the mode contained GL_MAP_INVALIDATE_RANGE_BIT (basically,
> interpreting the mode argument like GL_ARB_map_buffer_range, which was
> my intent if not Brian's), this would be reasonable.

You're correct. This patch is nakd.

- 
Chad Versace
chad.vers...@linux.intel.com
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQIcBAEBAgAGBQJOxCQDAAoJEAIvNt057x8iMnQQALJW9MK18IHdQ08RX0lxLDkW
Tzy5U3zv+j8NxzdQl4aQyA2lXmpNZHzPB6L3AV0Yog+I3ey63A0ADNi94brbu4LS
EX/tu+Vb0VsSTC9/aSazDDLQJbLecRo2ZdYueOn/3eKJONmHeCZz8XZrn5wgSE1W
Uh4dSR7cORw1vBZBugB5vD55JlRrZtDM45d6s/VRzY2O2ujrBQvr/lNbTZ35HNd7
zIL+67y50RBWMYIiyyzyXHa8lWVwllo1K9CM0LBmebxcX5t6rCn5QyZcT5F0DQAn
CyKpKBVDt2mkaOqgnFUiXSTETTOKCla9tzBjNS6OlI65wAmYRZZz0he1rrdl1g6Y
kKkI61qWwCOyKMLdRxCWoXYY3QmenIEe4++idtlTauFgKtSZE2v4oAx2X1Z4FvY8
vbcpje3tbt7khMQnFVrVU/nHIyKw6Ko2r+WP32Cr2j044f1VAj+N1zIpCNdPTsd6
ZvJWdAUcPM+vN9fXacmWJAheE2sfzXCJ9o6ffCpRs7LYSv2ZuVlpyZ53a06D5dl/
ya9A9dMbjmFr0MGfELWHKgyRv+Cx/0XxrgvmQNxz6O4TUp6V+xiE6Qiyr5kj90wI
bah5zurL3mRVTH0Dp/pQFOxiIIzovCTXGHi35ymRL1/rLWyhgApa2jngVqqeawEi
9Ea+IzVuGlyke/v2kocL
=PH5l
-END PGP SIGNATURE-
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] piglit: Segmentation fault running fbo-depthstencil -auto readpixels default

2011-11-16 Thread Brian Paul


On 11/16/2011 01:42 PM, Theiss, Ingo wrote:




See the second patch I posted (mesa: initialize stencilMap, Stride if
stencilRb==depthRb)

-Brian




Hi Brian,

the second patch did it. No more segfaults.

Great. Thanks!


OK, I'll push the patch.

-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] r600g: lazy load for AR register

2011-11-16 Thread Tilman Sauerbeck

Vadim Girlin [2011-11-16 22:28]:
> On Wed, 2011-11-16 at 18:57 +0100, Tilman Sauerbeck wrote:
> > Vadim Girlin [2011-11-13 22:08]:
> > > Emit MOVA* instruction only when AR is used.
> > > 
> > > Signed-off-by: Vadim Girlin 
> > > ---
> > > 
> > > Tested on evergreen: no regressions, fixes some variable-indexing tests.
> > 
> > On my rv730 this patch causes broken rendering in the game Heroes of
> > Newerth. I have an apitrace file that reproduces the issue but
> > unfortunately that file is 3.x GB.
> > 
> > I've made the results of a piglit run comparing 88a140c (good) 8e366dc (bad)
> > available at http://files.code-monkey.de/piglit.tar.bz2 .
> > 
> 
> Please create the shader dump for one of the broken tests, e.g. :
> 
> R600_DUMP_SHADERS=1 bin/shader_runner 
> tests/spec/glsl-1.10/execution/variable-indexing/fs-temp-array-mat2-col-row-wr.shader_test
>  -auto -fbo

Attached.

Thanks,
Tilman

-- 
A: Because it messes up the order in which people normally read text.
Q: Why is top-posting such a bad thing?
A: Top-posting.
Q: What is the most annoying thing on usenet and in e-mail?


fs-temp-array-mat2-col-row-wr.dump.gz
Description: application/gunzip


pgpwGCOR0KfVQ.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] read_rgba_pixels: Don't force clamping if the renderbuffer is already clamped.

2011-11-16 Thread Brian Paul


On 11/16/2011 09:44 AM, Michel Dänzer wrote:

From: Michel Dänzer

Signed-off-by: Michel Dänzer
---
  src/mesa/main/readpix.c |3 ++-
  1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index 8550618..582adc3 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -285,7 +285,8 @@ read_rgba_pixels( struct gl_context *ctx,
return;

 if ((ctx->Color._ClampReadColor == GL_TRUE || type != GL_FLOAT)&&
-   !_mesa_is_integer_format(format)) {
+   !_mesa_is_integer_format(format)&&
+   _mesa_get_format_datatype(rb->Format) != GL_UNSIGNED_NORMALIZED) {
transferOps |= IMAGE_CLAMP_BIT;
 }



Looks good to me.

Reviewed-by: Brian Paul 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] st/mesa: rewrite the primitive restart fallback code

2011-11-16 Thread Brian Paul

Previously we were mapping/unmapping the index buffer each time we
found the restart index in the buffer.  This is bad when the restart
index is frequently used.  Now just map the index buffer once, scan
it to produce a list of sub-primitives, unmap the buffer, then draw
the sub-primitives.

Also, clean up the logic of testing for indexed primitives and calling
handle_fallback_primitive_restart().  Don't call it for non-indexed
primitives.
---
 src/mesa/state_tracker/st_draw.c |  224 +-
 1 files changed, 124 insertions(+), 100 deletions(-)

diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index cb518e1..bb7decf 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -648,45 +648,89 @@ check_uniforms(struct gl_context *ctx)
}
 }
 
-/** Helper code for primitive restart fallback */
-#define DO_DRAW(pipe, cur_start, cur_count) \
-   do { \
-  info.start = cur_start; \
-  info.count = cur_count; \
-  if (u_trim_pipe_prim(info.mode, &info.count)) { \
- if (transfer) \
-pipe_buffer_unmap(pipe, transfer); \
- pipe->draw_vbo(pipe, &info); \
- if (transfer) { \
-ptr = pipe_buffer_map(pipe, ibuffer->buffer, PIPE_TRANSFER_READ, 
&transfer); \
-assert(ptr != NULL); \
-ptr = ADD_POINTERS(ptr, ibuffer->offset); \
- } \
-  } \
-   } while(0)
-  
-/** More helper code for primitive restart fallback */
-#define PRIM_RESTART_LOOP(elements) \
-   do { \
-  for (i = start; i < end; i++) { \
- if (elements[i] == info.restart_index) { \
-if (cur_count > 0) { \
-   /* draw elts up to prev pos */ \
-   DO_DRAW(pipe, cur_start, cur_count); \
-} \
-/* begin new prim at next elt */ \
-cur_start = i + 1; \
-cur_count = 0; \
- } \
- else { \
-cur_count++; \
+
+struct sub_primitive
+{
+   unsigned start, count;
+};
+
+
+/**
+ * Scan the elements array to find restart indexes.  Return a list
+ * of primitive (start,count) pairs to indicate how to draw the sub-
+ * primitives delineated by the restart index.
+ */
+static struct sub_primitive *
+find_sub_primitives(const void *elements, unsigned element_size,
+unsigned start, unsigned end, unsigned restart_index,
+unsigned *num_sub_prims)
+{
+   const unsigned max_prims = end - start;
+   struct sub_primitive *sub_prims;
+   unsigned i, cur_start, cur_count, num;
+
+   sub_prims = (struct sub_primitive *)
+  malloc(max_prims * sizeof(struct sub_primitive));
+
+   if (!sub_prims) {
+  *num_sub_prims = 0;
+  return NULL;
+   }
+
+   cur_start = start;
+   cur_count = 0;
+   num = 0;
+
+#define SCAN_ELEMENTS(TYPE) \
+   for (i = start; i < end; i++) { \
+  if (((const TYPE *) elements)[i] == restart_index) { \
+ if (cur_count > 0) { \
+assert(num < max_prims); \
+sub_prims[num].start = cur_start; \
+sub_prims[num].count = cur_count; \
+num++; \
  } \
+ cur_start = i + 1; \
+ cur_count = 0; \
   } \
-  if (cur_count > 0) { \
- DO_DRAW(pipe, cur_start, cur_count); \
+  else { \
+ cur_count++; \
   } \
-   } while (0)
+   } \
+   if (cur_count > 0) { \
+  assert(num < max_prims); \
+  sub_prims[num].start = cur_start; \
+  sub_prims[num].count = cur_count; \
+  num++; \
+   }
 
+   switch (element_size) {
+   case 1:
+  SCAN_ELEMENTS(ubyte);
+  break;
+   case 2:
+  SCAN_ELEMENTS(ushort);
+  break;
+   case 4:
+  SCAN_ELEMENTS(uint);
+  break;
+   default:
+  assert(0 && "bad index_size in find_sub_primitives()");
+   }
+
+#undef SCAN_ELEMENTS
+
+   *num_sub_prims = num;
+
+   return sub_prims;
+}
+
+
+/**
+ * For gallium drivers that don't support the primitive restart
+ * feature, handle it here by breaking up the indexed primitive into
+ * sub-primitives.
+ */
 static void
 handle_fallback_primitive_restart(struct pipe_context *pipe,
   const struct _mesa_index_buffer *ib,
@@ -698,75 +742,57 @@ handle_fallback_primitive_restart(struct pipe_context 
*pipe,
const unsigned end = start + count;
struct pipe_draw_info info = *orig_info;
struct pipe_transfer *transfer = NULL;
-   unsigned instance, i, cur_start, cur_count;
-   const void *ptr;
-
-   info.primitive_restart = FALSE;
+   unsigned instance, i;
+   const void *ptr = NULL;
+   struct sub_primitive *sub_prims;
+   unsigned num_sub_prims;
 
-   if (!info.indexed) {
-  /* Splitting the draw arrays call is handled by the VBO module */
-  if (u_trim_pipe_prim(info.mode, &info.count))
- pipe->draw_vbo(pipe, &info);
+   assert(info.indexed);
+   assert(ibuffer->buffer);
+   assert(ib);
 
+   if (!ibuffer->buffer || !ib)
   return;
-   }
 
-   /* info.i

Re: [Mesa-dev] [PATCH] mesa-demos: Add blinking-teapot demo.

2011-11-16 Thread Brian Paul


On 11/16/2011 12:45 PM, vlj wrote:

blinking-teapot is an UBO demo.
---
  src/glsl/CMakeLists.txt   |1 +
  src/glsl/Makefile.am  |2 +
  src/glsl/blinking-teapot.c|  177 +
  src/glsl/blinking-teapot.frag |   31 +++
  src/glsl/blinking-teapot.vert |   16 
  5 files changed, 227 insertions(+), 0 deletions(-)
  create mode 100644 src/glsl/blinking-teapot.c
  create mode 100644 src/glsl/blinking-teapot.frag
  create mode 100644 src/glsl/blinking-teapot.vert



This looks better.  Two more suggestions:

1. Call glutExtensionSupported("GL_ARB_uniform_buffer_object") to 
check that the extension is supported.


2. It would be nice if hitting Esc would exit the demo, like the other 
demos.


-Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] r600g: don't change the order of writes in merge_inst_group

2011-11-16 Thread Vadim Girlin

Merge may produce incorrect order of operations for r600-eg:

x: inst1 R0.x, ... ;  //from current group
...
t: inst0 R0.x, ... ;  //from previous group, same destination

Result of inst1 will be lost.

So compare destinations and don't allow this.

Signed-off-by: Vadim Girlin 
---

cc: Tilman Sauerbeck 

This should fix the regression with "r600g: lazy load for AR register" for
r600/r700.

 src/gallium/drivers/r600/r600_asm.c |   15 ++-
 1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 6b1ad65..1ab16f2 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1010,6 +1010,11 @@ static int merge_inst_groups(struct r600_bytecode *bc, 
struct r600_bytecode_alu
result[i] = prev[i];
result[4] = slots[i];
} else if (is_alu_any_unit_inst(bc, prev[i])) {
+   if (slots[i]->dst.sel == 
prev[i]->dst.sel &&
+   (slots[i]->dst.write == 1 || 
slots[i]->is_op3) &&
+   (prev[i]->dst.write == 1 || 
prev[i]->is_op3))
+   return 0;
+
result[i] = slots[i];
result[4] = prev[i];
} else
@@ -1018,8 +1023,16 @@ static int merge_inst_groups(struct r600_bytecode *bc, 
struct r600_bytecode_alu
return 0;
} else if(!slots[i]) {
continue;
-   } else
+   } else {
+   if (max_slots == 5 && slots[i] && prev[4] &&
+   slots[i]->dst.sel == prev[4]->dst.sel &&
+   slots[i]->dst.chan == prev[4]->dst.chan 
&&
+   (slots[i]->dst.write == 1 || 
slots[i]->is_op3) &&
+   (prev[4]->dst.write == 1 || 
prev[4]->is_op3))
+   return 0;
+
result[i] = slots[i];
+   }
 
alu = slots[i];
num_once_inst += is_alu_once_inst(bc, alu);
-- 
1.7.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/10] Make gl_array_object use an array of gl_client_attribs

2011-11-16 Thread Brian Paul


On 11/11/2011 10:07 AM, Mathias Fröhlich wrote:


Hi all,

following a patch series to make gl_array_object use a VERT_ATTRIB_*
indexed array of gl_client_array structs.
Since we have already 33 client arrays in an array object the VERT_BIT_* and
for vertex shader inputs bitmaps are extended to 64 bits. Drivers
and shader code is updated to use the 64 bits bitmasks for
the vertex array inputs.
A first set of cleanup patches is included. But more is
probably possible past this change.

The patchset survives a r600 piglit run on my favourite rv670 development
machine with no regressions.

I get about 10% shorter draw stage times with OpenSceneGraph based
applications.

Please review.


I did a quick read of these patches and they look pretty good.  I'd 
like to go over them again though.  Another set of eyes would be good 
too.  I'll try to finish reviewing by the end of the week.


-Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/6] intel: Detile stencil buffer only if necessary

2011-11-16 Thread Chad Versace

-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 11/15/2011 06:17 PM, Eric Anholt wrote:
> On Tue, 15 Nov 2011 08:06:34 -0800, Chad Versace
>  wrote:
>> -BEGIN PGP SIGNED MESSAGE-
>> Hash: SHA1
>>
>> On 11/14/2011 04:03 PM, Eric Anholt wrote:
>>> On Mon, 14 Nov 2011 14:29:51 -0800, Chad Versace
>>>  wrote:
 Without 3, there were lots of bugs. But now that you ask, I don't
 remember
 exactly why the bug happened. I'm digging into that now.
 (I wrote the patch at midnight, and I dont' remember much from that
 hour...) 
>>>
>>> Ah!  It all makes sense now.
>>>
>>> I didn't guess that, because my midnight patches never have such good
>>> commit messages :)
>>
>> Do rb's for patches 5 and 6 come with your revelation?
>>
>> The patches really do fix bugs, and I'm feeling kinda lazy about splitting
>> them up now.
> 
> I thought the conclusion yesterday was that the lacks-explanation third
> of patch 5 was likely not required.  What happened, or am I
> misremembering?

Part 3 of patch 5 is required. I did some digging and discovered why. Let's
carefully walk through the reason.

Suppose that irb->wrapped_depth is installed at fb->_DepthBuffer and that
irb->wrapped_stencil is installed at fb->_StencilBuffer; that is, this patch
is applied without part 3.

When swrast_render_start is called, in general it is impossible to know which
buffers its caller will access. So we must map all three of
fb->Attachment[BUFFER_DEPTH]
fb->Attachment[BUFFER_DEPTH]->wrapped_depth   (which is fb->_DepthBuffer)
fb->Attachment[BUFFER_DEPTH]->wrapped_stencil (which is fb->_StencilBuffer)

When swrast_render_finish() is called, in general it is impossible to know
which buffers were accessed. So we must unmap  all three of the above buffers.
The order in which they are unmapped is signficant, because a scatter/gather
operation occurs for
intel_unmap_renderbuffer(fb->Attachment[BUFFER_DEPTH])
intel_unmap_renderbuffer(fb->Attachment[BUFFER_DEPTH]->wrapped_stencil)

Now consider two cases.

Case 1
- --
Suppose that swrast_render_finish's caller wrote to fb->_StencilBuffer and did
not access fb->Attachment[BUFFER_DEPTH]. Then we must perform the unmaps in
this order:
1. fb->Attachment[BUFFER_DEPTH]
2. fb->Attachment[BUFFER_DEPTH]->wrapped_stencil
Why? Unmap 1 scatters the s8 bits of the s8z24 buffer into the stencil buffer
Unnmap 2 copies the temporary untiled stencil buffer into the tiled stencil
buffer. Since fb->_StencilBuffer now holds the real stencil buffer contents,
unmap 2 needs to win.

Case 2
- --
Suppose that swrast_render_finish's caller does not access fb->_StencilBuffer
and writes to the stencil bits of fb->Attachment[BUFFER_DEPTH]. Then we must
perform the unmaps in this order:
1. fb->Attachment[BUFFER_DEPTH]->wrapped_stencil
2. fb->Attachment[BUFFER_DEPTH]


Since case 1 and 2 require conflicting orderings, intel_renderbuffer_unmap
must be incorrect. The only way to escape the problem is by part 3 of my
patch.

- 
Chad Versace
chad.vers...@linux.intel.com

-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQIcBAEBAgAGBQJOxE03AAoJEAIvNt057x8iDGYP/Rcd3uZRc18Vq+3Q4NK5UmMp
mGcGW0XMFxNSKhSGk0yoBgDT+LuhOY8WgAYK/qUbwK5Y3cL+zG8/vGJKb67Qp29w
8qsqrz/ONbEufDr6er+srOIGhM91lg23yW1wRAJkcnpxcoujVm2N4ljWyWigbShf
YywwbFbxVIIJq5+wskp4H1ENg/pjyCJGrCPKRsGF2B0+2k+Hnr/Cwnqr3ASI0lYt
S4CtfXrEkx1PvH3s8ZxxmXS2UFe6fcbgChEnvUko6Q0xTKzurr3Zam+35HKRWK0k
DmmF4KeGCgrOWL0fsdOaHYvtr2eB7S/4ZRZVN53L+mpJN+oRVv/JVDk2OnkWsx1n
/pNsPrDxzUt4bxV+qsiHwNLDTdgRneWKCT6NccRtQrRGnboh67L/Ebgw3UvsyLWi
xkMU3rJ2LeDVQVampGtzSZZl7KEZ8BjhgGUViGAxa2iG93TMRjHl0KwKHztvNLaT
7lIgmBE/A3jRSq7U8DqGscT7wkKJG7OBCwJZju1ezAqujdESfEYwrOayTLbiM9oz
tG4F1gtNIjPwyefdyZbdnKlKJ1d8u90xaBvKcx/Qi6Q3jSDawS1NKYC0QLZnwNwH
23JYsDNCiHAz5IrQYr2yYV/FejadGNVCNhMt8ERCtgvXKw3EiaaafmRt6WhdLzWs
Gm6Ncq/l64B0ykLVYWPm
=5YD9
-END PGP SIGNATURE-
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] Allow glTexImage2D with a depth component cube map

2011-11-16 Thread Anuj Phogat

From: Anuj Phogat 

Hi,

Here is a patch to allow glTexImage2D and glCopyTexImage2D with depth component 
cubemap.
It is listed in mesa work queue with a label "CUBE1". I've tested the patch and 
output looks visually correct. 
Please review the fix and let me know if i'm missing something.

Thanks
Anuj
---
 src/mesa/main/teximage.c |   10 --
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index acf7187..81f75c8 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1596,7 +1596,7 @@ texture_error_check( struct gl_context *ctx,
 
/* additional checks for depth textures */
if (_mesa_base_tex_format(ctx, internalFormat) == GL_DEPTH_COMPONENT) {
-  /* Only 1D, 2D, rect and array textures supported, not 3D or cubes */
+  /* Only 1D, 2D, rect, array and cube textures are supported, not 3D*/
   if (target != GL_TEXTURE_1D &&
   target != GL_PROXY_TEXTURE_1D &&
   target != GL_TEXTURE_2D &&
@@ -1606,7 +1606,13 @@ texture_error_check( struct gl_context *ctx,
   target != GL_TEXTURE_2D_ARRAY &&
   target != GL_PROXY_TEXTURE_2D_ARRAY &&
   target != GL_TEXTURE_RECTANGLE_ARB &&
-  target != GL_PROXY_TEXTURE_RECTANGLE_ARB) {
+  target != GL_PROXY_TEXTURE_RECTANGLE_ARB &&
+ target != GL_TEXTURE_CUBE_MAP_POSITIVE_X &&
+ target != GL_TEXTURE_CUBE_MAP_NEGATIVE_X &&
+ target != GL_TEXTURE_CUBE_MAP_POSITIVE_Y &&
+  target != GL_TEXTURE_CUBE_MAP_NEGATIVE_Y &&
+ target != GL_TEXTURE_CUBE_MAP_POSITIVE_Z &&
+ target != GL_TEXTURE_CUBE_MAP_NEGATIVE_Z) {
  if (!isProxy)
 _mesa_error(ctx, GL_INVALID_ENUM,
 "glTexImage(target/internalFormat)");
-- 
1.7.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/10] Make gl_array_object use an array of gl_client_attribs

2011-11-16 Thread Mathias Fröhlich


Hi,

On Thursday, November 17, 2011 00:47:22 Brian Paul wrote:
> I did a quick read of these patches and they look pretty good.  I'd
> like to go over them again though.  Another set of eyes would be good
> too.  I'll try to finish reviewing by the end of the week.

Thanks, I have a busy week too, so I won't get back to that before the weekend 
also.

Greetings

Mathias
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl_to_tgsi: Invalidate and revalidate uniform backing storage

2011-11-16 Thread Bryan Cain

It's unfortunately been a while since I've done anything with
glsl_to_tgsi.  What are the "various functions that modify the TGSI IR
and try to propagate changes about that up to the gl_program"?  If I can
see where it is in the code, I can probably remember if there's a reason
it was done that way.

Bryan

On 11/16/2011 01:13 PM, Ian Romanick wrote:
> From: Ian Romanick 
>
> If glUniform1i and friends are going to dump data directly in
> driver-allocated, the pointers have to be updated when the storage
> moves.  This should fix the regressions seen with commit 7199096.
>
> I'm not sure if this is the only place that needs this treatment.  I'm
> a little uncertain about the various functions in st_glsl_to_tgsi that
> modify the TGSI IR and try to propagate changes about that up to the
> gl_program.  That seems sketchy to me.
>
> Signed-off-by: Ian Romanick 
> Cc: Vadim Girlin 
> Cc: Bryan Cain 
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |   14 ++
>  1 files changed, 14 insertions(+), 0 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 0bf6766..c8385bb 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -4571,6 +4571,13 @@ st_translate_program(
> t->pointSizeOutIndex = -1;
> t->prevInstWrotePointSize = GL_FALSE;
>  
> +   for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
> +  struct gl_uniform_storage *const storage =
> +  &program->shader_program->UniformStorage[i];
> +
> +  _mesa_uniform_detach_all_driver_storage(storage);
> +   }
> +
> /*
>  * Declare input attributes.
>  */
> @@ -4797,6 +4804,13 @@ st_translate_program(
> t->insn[t->labels[i].branch_target]);
> }
>  
> +   /* This has to be done last.  Any operation the can cause
> +* prog->ParameterValues to get reallocated (e.g., anything that adds a
> +* program constant) has to happen before creating this linkage.
> +*/
> +   _mesa_associate_uniform_storage(ctx, program->shader_program,
> +proginfo->Parameters);
> +
>  out:
> if (t) {
>FREE(t->insn);

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

68 matches

Mail list logo