On Saturday, July 12, 2014 09:16:25 PM Chris Forbes wrote: > SIMD8-only for now. > > V5: - Fix style complaints > - Move prototype to be with other oddball emit functions > - Use unreachable() instead of assert() where possible > > Signed-off-by: Chris Forbes <chr...@ijw.co.nz> > --- > src/mesa/drivers/dri/i965/brw_fs.h | 2 + > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 133 ++++++++++++++++++++++++++- > 2 files changed, 133 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h > index 1d3f9d0..6f169dc 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -439,6 +439,8 @@ public: > void emit_untyped_surface_read(unsigned surf_index, fs_reg dst, > fs_reg offset); > > + void emit_interpolate_expression(ir_expression *ir); > + > bool try_rewrite_rhs_to_dst(ir_assignment *ir, > fs_reg dst, > fs_reg src, > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 8e8affa..fed5f28 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -344,6 +344,116 @@ fs_visitor::try_emit_mad(ir_expression *ir) > return true; > } > > +static int > +pack_pixel_offset(float x) > +{ > + int n = MIN2((int)(x * 16), 7); > + return n & 0xf; > +} > + > +void > +fs_visitor::emit_interpolate_expression(ir_expression *ir) > +{ > + /* in SIMD16 mode, the pixel interpolator returns coords interleaved > + * 8 channels at a time, same as the barycentric coords presented in > + * the FS payload. this requires a bit of extra work to support. > + */ > + no16("interpolate_at_* not yet supported in SIMD16 mode."); > + > + ir_dereference * deref = ir->operands[0]->as_dereference(); > + ir_swizzle * swiz = NULL; > + if (!deref) { > + /* the api does not allow a swizzle here, but the varying packing code > + * may have pushed one into here. > + */ > + swiz = ir->operands[0]->as_swizzle(); > + assert(swiz); > + deref = swiz->val->as_dereference(); > + } > + assert(deref); > + ir_variable * var = deref->variable_referenced(); > + assert(var); > + > + /* 1. collect interpolation factors */ > + > + fs_reg dst_x = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 2, 1)); > + fs_reg dst_y = dst_x; > + dst_y.reg_offset++; > + > + /* for most messages, we need one reg of ignored data; the hardware requires mlen==1 > + * even when there is no payload. in the per-slot offset case, we'll replace this with > + * the proper source data. */ > + fs_reg src = fs_reg(this, glsl_type::float_type); > + int mlen = 1; /* one reg unless overriden */ > + fs_inst *inst; > + > + switch (ir->operation) { > + case ir_unop_interpolate_at_centroid: > + inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u)); > + break; > + > + case ir_binop_interpolate_at_sample: { > + ir_constant *sample_num = ir->operands[1]->as_constant(); > + assert(sample_num || !"nonconstant sample number should have been lowered."); > + > + unsigned msg_data = sample_num->value.i[0] << 4; > + inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data)); > + break; > + } > + > + case ir_binop_interpolate_at_offset: { > + ir_constant *const_offset = ir->operands[1]->as_constant(); > + if (const_offset) { > + unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) | > + (pack_pixel_offset(const_offset->value.f[1]) << 4); > + inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src, > + fs_reg(msg_data)); > + } else { > + /* pack the operands: hw wants offsets as 4 bit signed ints */ > + ir->operands[1]->accept(this); > + src = fs_reg(this, glsl_type::ivec2_type); > + fs_reg src2 = src; > + for (int i = 0; i < 2; i++) { > + fs_reg temp = fs_reg(this, glsl_type::float_type); > + emit(MUL(temp, this->result, fs_reg(16.0f))); > + emit(MOV(src2, temp)); /* float to int */ > + fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7)); > + inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
There's something unusual going on here that I think deserves a comment: GL_ARB_gpu_shader5 requires GL_MIN_FRAGMENT_INTERPOLATION_OFFSET and GL_MAX_FRAGMENT_INTERPOLATION_OFFSET to be -0.5 and 0.5, respectively; offsets outside of that range produce undefined values. However, our hardware actually only supports -8/16 to 7/16 = [-0.5, 0.4375]. So if the application used an offset of +0.5, the integer value would be +8 aka 0b1000, which when interpreted as an S4 2's complement number would be -8, aka -8/16 = -0.5. The exact opposite of what they wanted. So, the clamp here is not to try and deal with offsets that are out of the advertised range - it's to make +0.5 turn into +0.4375. Which I believe is legal due to the fuzzy quantization rules in ARB_gpu_shader5: "Not all values of <offset> may be supported; x and y offsets may be rounded to fixed-point values with the number of fraction bits given by the implementation-dependent constant FRAGMENT_INTERPOLATION_OFFSET_BITS." > + > + src2.reg_offset++; > + this->result.reg_offset++; > + } > + > + mlen = 2 * dispatch_width / 8; Normally we create "int reg_width = dispatch_width / 8" and use that in expressions like this. Might be a touch tidier. Either way's fine though. With some sort of comment added explaining the above, this series gets a: Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> and I think you're good to go on it. Nice work! > + inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src, > + fs_reg(0u)); > + } > + break; > + } > + > + default: > + unreachable("not reached"); > + } > + > + inst->mlen = mlen; > + inst->regs_written = 2 * dispatch_width / 8; /* 2 floats per slot returned */ > + inst->pi_noperspective = var->determine_interpolation_mode(key- >flat_shade) == > + INTERP_QUALIFIER_NOPERSPECTIVE; > + > + /* 2. emit linterp */ > + > + fs_reg res(this, ir->type); > + this->result = res; > + > + for (int i = 0; i < ir->type->vector_elements; i++) { > + int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i; > + emit(FS_OPCODE_LINTERP, res, > + dst_x, dst_y, > + fs_reg(interp_reg(var->data.location, ch))); > + res.reg_offset++; > + } > +} > + > void > fs_visitor::visit(ir_expression *ir) > { > @@ -355,9 +465,22 @@ fs_visitor::visit(ir_expression *ir) > > if (try_emit_saturate(ir)) > return; > - if (ir->operation == ir_binop_add) { > + > + /* Deal with the real oddball stuff first */ > + switch (ir->operation) { > + case ir_binop_add: > if (try_emit_mad(ir)) > - return; > + return; > + break; > + > + case ir_unop_interpolate_at_centroid: > + case ir_binop_interpolate_at_offset: > + case ir_binop_interpolate_at_sample: > + emit_interpolate_expression(ir); > + return; > + > + default: > + break; > } > > for (operand = 0; operand < ir->get_num_operands(); operand++) { > @@ -815,6 +938,12 @@ fs_visitor::visit(ir_expression *ir) > inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]); > inst->predicate = BRW_PREDICATE_NORMAL; > break; > + > + case ir_unop_interpolate_at_centroid: > + case ir_binop_interpolate_at_offset: > + case ir_binop_interpolate_at_sample: > + unreachable("already handled above"); > + break; > } > } > >
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev