Had some problems to figure out where the factor 2 came from, but in the end, this series is
Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> On Tue, Jan 10, 2017 at 5:35 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > As remarked by the comment in the original code, the old algorithm fails when > (tc + deriv) points at a different cube face. Instead, simply project the > derivative directly to the plane of the selected cube face. > > The new code is based on exactly differentiating (using the chain rule) > the projection onto a plane corresponding to a fixed cube map face (which > is still selected in the usual way based on the texture coordinate itself). > The computations end up fairly involved, but we do save two reciprocal > computations. > > Fixes GL45-CTS.texture_cube_map_array.sampling. > > v2: add 0.5 offset to tex coords only after derivative calculation > -- > A small change in si_prepare_cube_coords: we re-use coords[i] for the > derivative computation, which requires the scaled value (sc/ma) wihtout > the offset. I've locally rebased subsequent patches as well, and updated > the version at https://cgit.freedesktop.org/~nh/mesa/log/?h=cubemaps. > --- > src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 130 > ++++++++++++++++++---- > 1 file changed, 107 insertions(+), 23 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c > b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c > index c0d7220..bef1afe 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c > +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c > @@ -952,20 +952,76 @@ static void build_cube_intrinsic(struct gallivm_state > *gallivm, > lp_build_const_int32(gallivm, 0), ""); > out->stc[0] = LLVMBuildExtractElement(builder, tmp, > lp_build_const_int32(gallivm, 1), ""); > out->ma = LLVMBuildExtractElement(builder, tmp, > lp_build_const_int32(gallivm, 2), ""); > out->id = LLVMBuildExtractElement(builder, tmp, > lp_build_const_int32(gallivm, 3), ""); > } > } > > +/** > + * Build a manual selection sequence for cube face sc/tc coordinates and > + * major axis vector (multiplied by 2 for consistency) for the given > + * vec3 \p coords, for the face implied by \p selcoords. > + * > + * For the major axis, we always adjust the sign to be in the direction of > + * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards > + * the selcoords major axis. > + */ > +static void build_cube_select(LLVMBuilderRef builder, > + const struct cube_selection_coords *selcoords, > + const LLVMValueRef *coords, > + LLVMValueRef *out_st, > + LLVMValueRef *out_ma) > +{ > + LLVMTypeRef f32 = LLVMTypeOf(coords[0]); > + LLVMValueRef is_ma_positive; > + LLVMValueRef sgn_ma; > + LLVMValueRef is_ma_z, is_not_ma_z; > + LLVMValueRef is_ma_y; > + LLVMValueRef is_ma_x; > + LLVMValueRef sgn; > + LLVMValueRef tmp; > + > + is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, > + selcoords->ma, LLVMConstReal(f32, 0.0), ""); > + sgn_ma = LLVMBuildSelect(builder, is_ma_positive, > + LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); > + > + is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, > LLVMConstReal(f32, 4.0), ""); > + is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); > + is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, > + LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, > LLVMConstReal(f32, 2.0), ""), ""); > + is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, > is_ma_y, ""), ""); > + > + /* Select sc */ > + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); > + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), > + LLVMBuildSelect(builder, is_ma_x, sgn_ma, > + LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); > + out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); > + > + /* Select tc */ > + tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); > + sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, > sgn_ma, ""), > + LLVMConstReal(f32, -1.0), ""); > + out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); > + > + /* Select ma */ > + tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], > + LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), > ""); > + sgn = LLVMBuildSelect(builder, is_ma_positive, > + LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); > + *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); > +} > + > static void si_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base, > LLVMValueRef *in, LLVMValueRef *out) > { > struct gallivm_state *gallivm = bld_base->base.gallivm; > LLVMBuilderRef builder = gallivm->builder; > LLVMTypeRef type = bld_base->base.elem_type; > struct cube_selection_coords coords; > LLVMValueRef invma; > LLVMValueRef mad_args[3]; > > @@ -990,59 +1046,87 @@ static void si_llvm_cube_to_2d_coords(struct > lp_build_tgsi_context *bld_base, > void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base, > struct lp_build_emit_data *emit_data, > LLVMValueRef *coords_arg, > LLVMValueRef *derivs_arg) > { > > unsigned target = emit_data->inst->Texture.Texture; > unsigned opcode = emit_data->inst->Instruction.Opcode; > struct gallivm_state *gallivm = bld_base->base.gallivm; > LLVMBuilderRef builder = gallivm->builder; > + LLVMTypeRef type = bld_base->base.elem_type; > + struct cube_selection_coords selcoords; > LLVMValueRef coords[4]; > - unsigned i; > + LLVMValueRef invma; > > - si_llvm_cube_to_2d_coords(bld_base, coords_arg, coords); > + build_cube_intrinsic(gallivm, coords_arg, &selcoords); > + > + invma = lp_build_intrinsic(builder, "llvm.fabs.f32", > + type, &selcoords.ma, 1, LP_FUNC_ATTR_READNONE); > + invma = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, invma); > + > + for (int i = 0; i < 2; ++i) > + coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, > ""); > + > + coords[2] = selcoords.id; > > if (opcode == TGSI_OPCODE_TXD && derivs_arg) { > LLVMValueRef derivs[4]; > int axis; > > /* Convert cube derivatives to 2D derivatives. */ > for (axis = 0; axis < 2; axis++) { > - LLVMValueRef shifted_cube_coords[4], > shifted_coords[4]; > - > - /* Shift the cube coordinates by the derivatives to > get > - * the cube coordinates of the "neighboring pixel". > - */ > - for (i = 0; i < 3; i++) > - shifted_cube_coords[i] = > - LLVMBuildFAdd(builder, coords_arg[i], > - derivs_arg[axis*3+i], > ""); > - shifted_cube_coords[3] = > LLVMGetUndef(bld_base->base.elem_type); > - > - /* Project the shifted cube coordinates onto the > face. */ > - si_llvm_cube_to_2d_coords(bld_base, > shifted_cube_coords, > - shifted_coords); > - > - /* Subtract both sets of 2D coordinates to get 2D > derivatives. > - * This won't work if the shifted coordinates ended up > - * in a different face. > + LLVMValueRef deriv_st[2]; > + LLVMValueRef deriv_ma; > + > + /* Transform the derivative alongside the texture > + * coordinate. Mathematically, the correct formula is > + * as follows. Assume we're projecting onto the +Z > face > + * and denote by dx/dh the derivative of the > (original) > + * X texture coordinate with respect to horizontal > + * window coordinates. The projection onto the +Z face > + * plane is: > + * > + * f(x,z) = x/z > + * > + * Then df/dh = df/dx * dx/dh + df/dz * dz/dh > + * = 1/z * dx/dh - x/z * 1/z * dz/dh. > + * > + * This motivatives the implementation below. > + * > + * Whether this actually gives the expected results > for > + * apps that might feed in derivatives obtained via > + * finite differences is anyone's guess. The OpenGL > spec > + * seems awfully quiet about how textureGrad for cube > + * maps should be handled. > */ > - for (i = 0; i < 2; i++) > + build_cube_select(builder, &selcoords, > &derivs_arg[axis * 3], > + deriv_st, &deriv_ma); > + > + deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, > ""); > + > + for (int i = 0; i < 2; ++i) > derivs[axis * 2 + i] = > - LLVMBuildFSub(builder, > shifted_coords[i], > - coords[i], ""); > + LLVMBuildFSub(builder, > + LLVMBuildFMul(builder, > deriv_st[i], invma, ""), > + LLVMBuildFMul(builder, > deriv_ma, coords[i], ""), ""); > } > > memcpy(derivs_arg, derivs, sizeof(derivs)); > } > > + /* Shift the texture coordinate. This must be applied after the > + * derivative calculation. > + */ > + for (int i = 0; i < 2; ++i) > + coords[i] = LLVMBuildFAdd(builder, coords[i], > LLVMConstReal(type, 0.5), ""); > + > if (target == TGSI_TEXTURE_CUBE_ARRAY || > target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { > /* for cube arrays coord.z = coord.w(array_index) * 8 + face > */ > /* coords_arg.w component - array_index for cube arrays */ > coords[2] = lp_build_emit_llvm_ternary(bld_base, > TGSI_OPCODE_MAD, > coords_arg[3], > lp_build_const_float(gallivm, 8.0), coords[2]); > } > > /* Preserve compare/lod/bias. Put it in coords.w. */ > if (opcode == TGSI_OPCODE_TEX2 || > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev