There's no SVGA3D_DECLTYPE that directly corresponds to PIPE_FORMAT_R8G8B8_SNORM. Previously, we used the swtnl fallback path to handle this but that's slow and causes invariance issues. Now we fetch the attribute as SVGA3D_DECLTYPE_UBYTE4N and insert some extra VS instructions to remap the attributes from the range [0,1] to the range[-1,1].
Fixes Sauerbraten sw fallback. Fixes piglit normal3b3s-invariance test. --- src/gallium/drivers/svga/svga_context.h | 1 + src/gallium/drivers/svga/svga_state_need_swtnl.c | 28 ++++- src/gallium/drivers/svga/svga_state_vs.c | 4 + src/gallium/drivers/svga/svga_tgsi.h | 1 + src/gallium/drivers/svga/svga_tgsi_emit.h | 2 +- src/gallium/drivers/svga/svga_tgsi_insn.c | 128 ++++++++++++++++++++-- 6 files changed, 150 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 0daab0b..426995e 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -305,6 +305,7 @@ struct svga_hw_draw_state struct svga_sw_state { unsigned ve_format[PIPE_MAX_ATTRIBS]; /* NEW_VELEMENT */ + unsigned adjust_attrib_range; /* bitmask of attrs needing range adjustment */ /* which parts we need */ boolean need_swvfetch; diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index c0bfd2c..6cdd792 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -57,6 +57,9 @@ svga_translate_vertex_format(enum pipe_format format) case PIPE_FORMAT_R16G16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_2; case PIPE_FORMAT_R16G16B16A16_FLOAT: return SVGA3D_DECLTYPE_FLOAT16_4; + /* See attrib_needs_adjustment() below */ + case PIPE_FORMAT_R8G8B8_SNORM: return SVGA3D_DECLTYPE_UBYTE4N; + default: /* There are many formats without hardware support. This case * will be hit regularly, meaning we'll need swvfetch. @@ -66,6 +69,23 @@ svga_translate_vertex_format(enum pipe_format format) } +/** + * Does the given vertex attrib format need range adjustment in the VS? + * Range adjustment scales and biases values from [0,1] to [-1,1]. + * This lets us avoid the swtnl path. + */ +static boolean +attrib_needs_range_adjustment(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R8G8B8_SNORM: + return TRUE; + default: + return FALSE; + } +} + + static enum pipe_error update_need_swvfetch(struct svga_context *svga, unsigned dirty) { @@ -77,13 +97,19 @@ update_need_swvfetch(struct svga_context *svga, unsigned dirty) return PIPE_OK; } + svga->state.sw.adjust_attrib_range = 0x0; + for (i = 0; i < svga->curr.velems->count; i++) { - svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format); + enum pipe_format attribFormat = svga->curr.velems->velem[i].src_format; + svga->state.sw.ve_format[i] = svga_translate_vertex_format(attribFormat); if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) { /* Unsupported format - use software fetch */ need_swvfetch = TRUE; break; } + if (attrib_needs_range_adjustment(attribFormat)) { + svga->state.sw.adjust_attrib_range |= (1 << i); + } } if (need_swvfetch != svga->state.sw.need_swvfetch) { diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 2f130aec..e50cd93 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -159,6 +159,9 @@ make_vs_key(struct svga_context *svga, struct svga_vs_compile_key *key) /* SVGA_NEW_FS */ key->fs_generic_inputs = svga->curr.fs->generic_inputs; + + /* SVGA_NEW_VELEMENT */ + key->adjust_attrib_range = svga->state.sw.adjust_attrib_range; } @@ -248,6 +251,7 @@ struct svga_tracked_state svga_hw_vs = (SVGA_NEW_VS | SVGA_NEW_FS | SVGA_NEW_PRESCALE | + SVGA_NEW_VELEMENT | SVGA_NEW_NEED_SWTNL), emit_hw_vs }; diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h index 4fe88b3..cb40560 100644 --- a/src/gallium/drivers/svga/svga_tgsi.h +++ b/src/gallium/drivers/svga/svga_tgsi.h @@ -49,6 +49,7 @@ struct svga_vs_compile_key unsigned fs_generic_inputs; unsigned need_prescale:1; unsigned allow_psiz:1; + unsigned adjust_attrib_range:16; }; struct svga_fs_compile_key diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index 1a9731f..1894296e 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -86,7 +86,7 @@ struct svga_shader_emitter boolean in_main_func; boolean created_common_immediate; - int common_immediate_idx; + int common_immediate_idx[2]; boolean created_loop_const; int loop_const_idx; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index d357058..e798b17 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -859,8 +859,20 @@ create_common_immediate( struct svga_shader_emitter *emit ) if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 0.0f, 0.5f, -1.0f, 1.0f )) return FALSE; + emit->common_immediate_idx[0] = idx; + idx++; + + /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */ + if (emit->key.vkey.adjust_attrib_range) { + if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, + idx, 2.0f, 0.0f, 0.0f, 0.0f )) + return FALSE; + emit->common_immediate_idx[1] = idx; + } + else { + emit->common_immediate_idx[1] = -1; + } - emit->common_immediate_idx = idx; emit->created_common_immediate = TRUE; return TRUE; @@ -889,7 +901,7 @@ common_immediate_swizzle(float value) /** - * Returns an immediate reg where all the terms are either 0, 1, -1 or 0.5 + * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5 */ static struct src_register get_immediate(struct svga_shader_emitter *emit, @@ -900,8 +912,8 @@ get_immediate(struct svga_shader_emitter *emit, unsigned sz = common_immediate_swizzle(z); unsigned sw = common_immediate_swizzle(w); assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); - return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx), + assert(emit->common_immediate_idx[0] >= 0); + return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), sx, sy, sz, sw); } @@ -913,9 +925,9 @@ static struct src_register get_zero_immediate( struct svga_shader_emitter *emit ) { assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); + assert(emit->common_immediate_idx[0] >= 0); return swizzle(src_register( SVGA3DREG_CONST, - emit->common_immediate_idx), + emit->common_immediate_idx[0]), 0, 0, 0, 0); } @@ -927,9 +939,9 @@ static struct src_register get_one_immediate( struct svga_shader_emitter *emit ) { assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); + assert(emit->common_immediate_idx[0] >= 0); return swizzle(src_register( SVGA3DREG_CONST, - emit->common_immediate_idx), + emit->common_immediate_idx[0]), 3, 3, 3, 3); } @@ -941,13 +953,28 @@ static struct src_register get_half_immediate( struct svga_shader_emitter *emit ) { assert(emit->created_common_immediate); - assert(emit->common_immediate_idx >= 0); - return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx), + assert(emit->common_immediate_idx[0] >= 0); + return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 1, 1, 1, 1); } /** + * returns {2, 2, 2, 2} immediate + */ +static struct src_register +get_two_immediate( struct svga_shader_emitter *emit ) +{ + /* Note we use the second common immediate here */ + assert(emit->created_common_immediate); + assert(emit->common_immediate_idx[1] >= 0); + return swizzle(src_register( SVGA3DREG_CONST, + emit->common_immediate_idx[1]), + 0, 0, 0, 0); +} + + +/** * returns the loop const */ static struct src_register @@ -3498,6 +3525,74 @@ emit_inverted_texcoords(struct svga_shader_emitter *emit) /** + * Emit code to invert the T component of the incoming texture coordinate. + * This is used for drawing point sprites when + * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT. + */ +static boolean +emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) +{ + unsigned adjust_attrib_range = emit->key.vkey.adjust_attrib_range; + + while (adjust_attrib_range) { + /* The vertex input/attribute is supposed to be a signed value in + * the range [-1,1] but we actually fetched/converted it to the + * range [0,1]. This most likely happens when the app specifies a + * signed byte attribute but we interpreted it as unsigned bytes. + * See also svga_translate_vertex_format(). + * + * Here, we emit some extra instructions to adjust + * the attribute values from [0,1] to [-1,1]. + * + * The adjustment we implement is: + * new_attrib = attrib * 2.0; + * if (attrib >= 0.5) + * new_attrib = new_attrib - 2.0; + * This isn't exactly right (it's off by a bit or so) but close enough. + */ + const unsigned index = u_bit_scan(&adjust_attrib_range); + struct src_register tmp; + + SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0); + + /* allocate a temp reg */ + tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); + emit->nr_hw_temp++; + + /* tmp = attrib * 2.0 */ + if (!submit_op2(emit, + inst_token(SVGA3DOP_MUL), + dst(tmp), + emit->input_map[index], + get_two_immediate(emit))) + return FALSE; + + /* pred = (attrib >= 0.5) */ + if (!submit_op2(emit, + inst_token_setp(SVGA3DOPCOMP_GE), + pred_reg, + emit->input_map[index], /* vert attrib */ + get_half_immediate(emit))) /* 0.5 */ + return FALSE; + + /* sub(pred) tmp, tmp, 2.0 */ + if (!submit_op3(emit, + inst_token_predicated(SVGA3DOP_SUB), + dst(tmp), + src(pred_reg), + tmp, + get_two_immediate(emit))) + return FALSE; + + /* Reassign the input_map entry to the new tmp register */ + emit->input_map[index] = tmp; + } + + return TRUE; +} + + +/** * Determine if we need to create the "common" immediate value which is * used for generating useful vector constants such as {0,0,0,0} and * {1,1,1,1}. @@ -3542,10 +3637,11 @@ needs_to_create_common_immediate(const struct svga_shader_emitter *emit) return TRUE; } } - - if (emit->unit == PIPE_SHADER_VERTEX) { + else if (emit->unit == PIPE_SHADER_VERTEX) { if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1) return TRUE; + if (emit->key.vkey.adjust_attrib_range) + return TRUE; } if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || @@ -3705,6 +3801,14 @@ svga_shader_emit_helpers(struct svga_shader_emitter *emit) return FALSE; } } + else { + assert(emit->unit == PIPE_SHADER_VERTEX); + if (emit->key.vkey.adjust_attrib_range) { + if (!emit_adjusted_vertex_attribs(emit)) + return FALSE; + } + } + return TRUE; } -- 1.7.10.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev