Looks good to me. Thanks Roland. Jose
----- Original Message ----- > From: Roland Scheidegger <srol...@vmware.com> > > Some rounding errors could crop up when calculating a0. Use a more accurate > method (barycentric interpolation essentially) to fix this, though to fix > the REAL problem (which is that our interpolation will give very bad results > with small triangles far away from the origin when they have steep gradients) > this does absolutely nothing (actually makes it worse). (To fix the real > problem, either would need to use a vertex corner (or some other point inside > the tri) as starting point value instead of fb origin and pass that down to > interpolation, or mimic what hw does, use barycentric interpolation (using > the coordinates extracted from the rasterizer edge functions) - maybe another > time.) > Some (silly) tests though really want a high accuracy at fb origin and don't > care much about anything else (Just. Don't. Ask.). > --- > src/gallium/drivers/llvmpipe/lp_state_setup.c | 88 > +++++++++++++++++++++++-- > 1 file changed, 82 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c > b/src/gallium/drivers/llvmpipe/lp_state_setup.c > index 59ab467..ef000fb 100644 > --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c > +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c > @@ -49,6 +49,15 @@ > #include "lp_state_fs.h" > #include "lp_state_setup.h" > > +/* > + * Set if the start point for interpolation should be calculated with a > + * more accurate method (barycentric interpolation). > + * Unfortunately, actual interpolation results of small tris with steep > + * gradients far away from the origin are still very busted, this does > + * nothing to change that (in fact it may make it worse), but some tests > + * (don't ask) really want accurate values at origin (and ONLY origin). > + */ > +#define ACCURATE_A0 1 > > > /* currently organized to interpolate full float[4] attributes even > @@ -77,6 +86,9 @@ struct lp_setup_args > LLVMValueRef dy01_ooa; > LLVMValueRef dx20_ooa; > LLVMValueRef dx01_ooa; > + LLVMValueRef e01o; > + LLVMValueRef e20o; > + LLVMValueRef e12o; > struct lp_build_context bld; > }; > > @@ -376,6 +388,19 @@ load_attribute(struct gallivm_state *gallivm, > } > } > > +/* > + * FIXME: interpolation is always done wrt fb origin (0/0). > + * However, if some (small) tri is far away from the origin and gradients > + * are large, this can lead to HUGE errors, since the a0 value calculated > + * here can get very large (with the actual values inside the triangle way > + * smaller), leading to complete loss of accuracy. This could be prevented > + * by using some point inside (or at corner) of the tri as interpolation > + * origin, or just use barycentric interpolation (which GL suggests and is > + * what real hw does - you can get the barycentric coordinates from the > + * edge functions in rasterization in principle (though we skip these > + * sometimes completely in case of tris covering a block fully, > + * which obviously wouldn't work)). > + */ > static void > emit_coef4( struct gallivm_state *gallivm, > struct lp_setup_args *args, > @@ -385,6 +410,8 @@ emit_coef4( struct gallivm_state *gallivm, > LLVMValueRef a2) > { > LLVMBuilderRef b = gallivm->builder; > + bool accurate_a0 = ACCURATE_A0; > + LLVMValueRef attr_0; > LLVMValueRef dy20_ooa = args->dy20_ooa; > LLVMValueRef dy01_ooa = args->dy01_ooa; > LLVMValueRef dx20_ooa = args->dx20_ooa; > @@ -408,10 +435,19 @@ emit_coef4( struct gallivm_state *gallivm, > > /* Calculate a0 - the attribute value at the origin > */ > - LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, > "dadx_x0"); > - LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, > "dady_y0"); > - LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, > "attr_v0"); > - LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); > + if (!accurate_a0) { > + LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, > "dadx_x0"); > + LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, > "dady_y0"); > + LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, > "attr_v0"); > + attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); > + } > + else { > + LLVMValueRef ao2 = LLVMBuildFMul(b, args->e01o, a2, ""); > + LLVMValueRef ao1 = LLVMBuildFMul(b, args->e20o, a1, ""); > + LLVMValueRef ao0 = LLVMBuildFMul(b, args->e12o, a0, ""); > + attr_0 = LLVMBuildFAdd(b, ao0, ao1, ""); > + attr_0 = LLVMBuildFAdd(b, attr_0, ao2, ""); > + } > > store_coef(gallivm, args, slot, attr_0, dadx, dady); > } > @@ -623,10 +659,11 @@ init_args(struct gallivm_state *gallivm, > LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); > LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20; > LLVMValueRef e, f, ef, ooa; > - LLVMValueRef shuffles[4]; > + LLVMValueRef shuffles[4], shuf10; > LLVMValueRef attr_pos[3]; > struct lp_type typef4 = lp_type_float_vec(32, 128); > struct lp_build_context bld; > + bool accurate_a0 = ACCURATE_A0; > > lp_build_context_init(&bld, gallivm, typef4); > args->bld = bld; > @@ -651,8 +688,9 @@ init_args(struct gallivm_state *gallivm, > shuffles[1] = zeroi; > shuffles[2] = LLVMGetUndef(shuf_type); > shuffles[3] = LLVMGetUndef(shuf_type); > + shuf10 = LLVMConstVector(shuffles, 4); > > - dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, LLVMConstVector(shuffles, > 4), ""); > + dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, ""); > > ef = LLVMBuildFMul(b, dxy01, dyx20, "ef"); > e = LLVMBuildExtractElement(b, ef, zeroi, ""); > @@ -670,6 +708,44 @@ init_args(struct gallivm_state *gallivm, > dxy20 = LLVMBuildFMul(b, dxy20, ooa, ""); > dxy01 = LLVMBuildFMul(b, dxy01, ooa, ""); > > + if (accurate_a0) { > + LLVMValueRef xy1xy2, xy1xy2_center, dxy12, dyx01, dyx12yx20; > + LLVMValueRef p0, p1p2, tmp0, tmp1, shuf0145, shuf1054, shuf1u3u; > + > + shuffles[0] = zeroi; > + shuffles[1] = onei; > + shuffles[2] = lp_build_const_int32(gallivm, 4); > + shuffles[3] = lp_build_const_int32(gallivm, 5); > + shuf0145 = LLVMConstVector(shuffles, 4); > + shuffles[0] = onei; > + shuffles[1] = zeroi; > + shuffles[2] = lp_build_const_int32(gallivm, 5); > + shuffles[3] = lp_build_const_int32(gallivm, 4); > + shuf1054 = LLVMConstVector(shuffles, 4); > + shuffles[0] = onei; > + shuffles[1] = LLVMGetUndef(shuf_type); > + shuffles[2] = lp_build_const_int32(gallivm, 3); > + shuffles[3] = LLVMGetUndef(shuf_type); > + shuf1u3u = LLVMConstVector(shuffles, 4); > + > + xy1xy2 = LLVMBuildShuffleVector(b, attr_pos[1], attr_pos[2], shuf0145, > ""); > + xy1xy2_center = LLVMBuildFSub(b, xy1xy2, pixel_center, ""); > + dxy12 = LLVMBuildFSub(b, attr_pos[1], attr_pos[2], "dxy12"); > + dxy12 = LLVMBuildFMul(b, dxy12, ooa, ""); > + dyx12yx20 = LLVMBuildShuffleVector(b, dxy12, dxy20, shuf1054, > "dyx12yx20"); > + dyx01 = LLVMBuildShuffleVector(b, dxy01, dxy01, shuf10, ""); > + p0 = LLVMBuildFMul(b, dyx01, xy0_center, ""); > + p1p2 = LLVMBuildFMul(b, dyx12yx20, xy1xy2_center, ""); > + tmp0 = LLVMBuildExtractElement(b, p0, zeroi, ""); > + tmp1 = LLVMBuildExtractElement(b, p0, onei, ""); > + args->e01o = lp_build_broadcast_scalar(&bld, LLVMBuildFSub(b, tmp0, > tmp1, "e01o")); > + tmp1 = LLVMBuildShuffleVector(b, p1p2, p1p2, shuf1u3u, ""); > + tmp0 = LLVMBuildFSub(b, p1p2, tmp1, "e12o20o"); > + args->e12o = lp_build_extract_broadcast(gallivm, typef4, typef4, tmp0, > zeroi); > + args->e20o = lp_build_extract_broadcast(gallivm, typef4, typef4, tmp0, > + lp_build_const_int32(gallivm, > 2)); > + } > + > args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, > dxy20, onei); > args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, > dxy01, onei); > > -- > 1.7.9.5 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev