Rebased ref, commits from common ancestor: commit 3b48f6a4c06db57a7203d247994b05e55c9418c1 Author: Chris Forbes <chr...@ijw.co.nz> Date: Sun Aug 3 19:55:55 2014 +1200
mesa: Add a new function for getting the nonconst sampler array index If the array index is not a constant expression, the existing support will assume a zero offset (giving us the sampler index of the base of the array). For dynamically uniform indexing of sampler arrays, we need both that and the indexing expression. Signed-off-by: Chris Forbes <chr...@ijw.co.nz> Reviewed-by: Matt Turner <matts...@gmail.com> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index e6532be..29a5408 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -134,3 +134,14 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler, return shader_program->UniformStorage[location].sampler[shader].index + getname.offset; } + + +extern "C" class ir_rvalue * +_mesa_get_sampler_array_nonconst_index(class ir_dereference *sampler) +{ + ir_dereference_array *deref_arr = sampler->as_dereference_array(); + if (!deref_arr || deref_arr->array_index->as_constant()) + return NULL; + + return deref_arr->array_index; +} diff --git a/src/mesa/program/sampler.h b/src/mesa/program/sampler.h index 22467e9..8b7c3b6 100644 --- a/src/mesa/program/sampler.h +++ b/src/mesa/program/sampler.h @@ -27,3 +27,6 @@ int _mesa_get_sampler_uniform_value(class ir_dereference *sampler, struct gl_shader_program *shader_program, const struct gl_program *prog); + +class ir_rvalue * +_mesa_get_sampler_array_nonconst_index(class ir_dereference *sampler); commit 1b4761bc27a50208dba2bc028c9835fed572e696 Author: Chris Forbes <chr...@ijw.co.nz> Date: Sun Aug 3 17:57:05 2014 +1200 glsl: Allow dynamically uniform sampler array indexing with 4.0/gs5 V2: Expand comment to explain what dynamically uniform expressions are about. Signed-off-by: Chris Forbes <chr...@ijw.co.nz> Reviewed-by: Matt Turner <matts...@gmail.com> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp index 50f9987..5ca85f6 100644 --- a/src/glsl/ast_array_index.cpp +++ b/src/glsl/ast_array_index.cpp @@ -213,6 +213,13 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, * as using a loop counter as the index to an array of samplers. If the * loop in unrolled, the code should compile correctly. Instead, emit a * warning. + * + * In GLSL 4.00 / ARB_gpu_shader5, this requirement is relaxed again to allow + * indexing with dynamically uniform expressions. Note that these are not + * required to be uniforms or expressions based on them, but merely that the + * values must not diverge between shader invocations run together. If the + * values *do* diverge, then the behavior of the operation requiring a + * dynamically uniform expression is undefined. */ if (array->type->element_type()->is_sampler()) { if (!state->is_version(130, 100)) { @@ -227,7 +234,7 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, "expressions will be forbidden in GLSL 1.30 " "and later"); } - } else { + } else if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { _mesa_glsl_error(&loc, state, "sampler arrays indexed with non-constant " "expressions is forbidden in GLSL 1.30 and " commit f525bd01d1430a5e33f57805f50fe4e89aa86ae8 Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Wed Aug 6 23:45:05 2014 -0400 nvc0/ir: describe the tex arguments for fermi/kepler Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index ade315d..7da9b0b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -567,6 +567,31 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) const int lyr = arg - (i->tex.target.isMS() ? 2 : 1); const int chipset = prog->getTarget()->getChipset(); + // Arguments to the TEX instruction are a little insane. Even though the + // encoding is identical between SM20 and SM30, the arguments mean + // different things between Fermi and Kepler+. A lot of arguments are + // optional based on flags passed to the instruction. This summarizes the + // order of things. + // + // Fermi: + // array/indirect + // coords + // sample + // lod bias + // depth compare + // offsets: + // - tg4: 8 bits each, either 2 (1 offset reg) or 8 (2 offset reg) + // - other: 4 bits each, single reg + // + // Kepler+: + // indirect handle + // array (+ offsets for txd in upper 16 bits) + // coords + // sample + // lod bias + // depth compare + // offsets (same as fermi, except txd which takes it with array) + if (chipset >= NVISA_GK104_CHIPSET) { if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { // XXX this ignores tsc, and assumes a 1:1 mapping commit b3cbd862242e0ff75584fef706f2b2a3da8e49f2 Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Wed Jul 9 00:41:11 2014 -0400 nvc0/ir: add kepler+ support for indirect texture references Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 4a9e48f..ade315d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -569,9 +569,17 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) if (chipset >= NVISA_GK104_CHIPSET) { if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { - WARN("indirect TEX not implemented\n"); - } - if (i->tex.r == i->tex.s) { + // XXX this ignores tsc, and assumes a 1:1 mapping + assert(i->tex.rIndirectSrc >= 0); + Value *hnd = loadTexHandle( + bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), + i->getIndirectR(), bld.mkImm(2)), + i->tex.r); + i->tex.r = 0xff; + i->tex.s = 0x1f; + i->setIndirectR(hnd); + i->setIndirectS(NULL); + } else if (i->tex.r == i->tex.s) { i->tex.r += prog->driver->io.texBindBase / 4; i->tex.s = 0; // only a single cX[] value possible here } else { @@ -595,6 +603,16 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) i->setSrc(s, i->getSrc(s - 1)); i->setSrc(0, layer); } + // Move the indirect reference to the first place + if (i->tex.rIndirectSrc >= 0) { + Value *hnd = i->getIndirectR(); + + i->setIndirectR(NULL); + i->moveSources(0, 1); + i->setSrc(0, hnd); + i->tex.rIndirectSrc = 0; + i->tex.sIndirectSrc = -1; + } } else // (nvc0) generate and move the tsc/tic/array source to the front if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { @@ -688,14 +706,14 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) // The offset goes into the upper 16 bits of the array index. So // create it if it's not already there, and INSBF it if it already // is. + s = (i->tex.rIndirectSrc >= 0) ? 1 : 0; if (i->tex.target.isArray()) { bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(0), bld.loadImm(NULL, imm), bld.mkImm(0xc10), - i->getSrc(0)); + i->getSrc(s)); } else { - for (int s = dim; s >= 1; --s) - i->setSrc(s, i->getSrc(s - 1)); - i->setSrc(0, bld.loadImm(NULL, imm << 16)); + i->moveSources(s, 1); + i->setSrc(s, bld.loadImm(NULL, imm << 16)); } } else { i->setSrc(s, bld.loadImm(NULL, imm)); @@ -792,6 +810,8 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd) if (chipset >= NVISA_GK104_CHIPSET) { if (!txd->tex.target.isArray() && txd->tex.useOffsets) expected_args++; + if (txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0) + expected_args++; } else { if (txd->tex.useOffsets) expected_args++; commit af3619e88043ce85560b8220dc16244f8898a926 Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Wed Aug 6 01:22:49 2014 -0400 nvc0/ir: add base tex offset for fermi indirect tex case Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index f010767..4a9e48f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -603,10 +603,18 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) Value *ticRel = i->getIndirectR(); Value *tscRel = i->getIndirectS(); - if (ticRel) + if (ticRel) { i->setSrc(i->tex.rIndirectSrc, NULL); - if (tscRel) + if (i->tex.r) + ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), + ticRel, bld.mkImm(i->tex.r)); + } + if (tscRel) { i->setSrc(i->tex.sIndirectSrc, NULL); + if (i->tex.s) + tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), + tscRel, bld.mkImm(i->tex.s)); + } Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL; for (int s = dim; s >= 1; --s) commit f73594778b0ef9804fc7839d5e2051d051d4ef48 Author: Kenneth Graunke <kenn...@whitecape.org> Date: Mon Aug 11 15:05:54 2014 -0700 i965: Revert part of f5cc3fdcf1680b116612fac7c39f1bd79f5e555e. Fixes non-termination in various Piglit tests. Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 1b8c987..29d2e02 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -82,7 +82,7 @@ is_expression(const vec4_instruction *const inst) case SHADER_OPCODE_COS: return inst->mlen == 0; default: - return !inst->has_side_effects(); + return false; } } commit 602a3f92d4d695e116794597db81623a8fd4c653 Author: Eric Anholt <e...@anholt.net> Date: Sat Aug 9 11:01:53 2014 -0700 vc4: Flip which primitives are considered front-facing. This mostly fixes glxgears rendering. diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 69fd218..d2c53a5 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -101,7 +101,7 @@ vc4_create_rasterizer_state(struct pipe_context *pctx, /* XXX: per_vertex */ so->point_size = cso->point_size; - if (!cso->front_ccw) + if (cso->front_ccw) so->config_bits[0] |= VC4_CONFIG_BITS_CW_PRIMITIVES; if (cso->offset_tri) commit f097516505daaaf5c25c919d56cbce54eb441a48 Author: Eric Anholt <e...@anholt.net> Date: Sat Aug 9 11:00:51 2014 -0700 vc4: Don't forget to set the depth clear value in the packet. This gets glxgears partially rendering again. diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index ffcbbb2..8ca4031 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -105,7 +105,7 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); cl_u32(&vc4->rcl, vc4->clear_color[0]); cl_u32(&vc4->rcl, vc4->clear_color[1]); - cl_u32(&vc4->rcl, 0); + cl_u32(&vc4->rcl, vc4->clear_depth); cl_u8(&vc4->rcl, 0); cl_start_reloc(&vc4->rcl, 1); commit e63598aecb5d1cc2a20b8db1ef85790e301f4241 Author: Eric Anholt <e...@anholt.net> Date: Tue Aug 5 14:24:29 2014 -0700 vc4: Add support for gl_FragCoord. This isn't passing all tests (glsl-fs-fragcoord-zw-ortho, for example), but it does get a bunch more tests passing. v2: Rebase on helpers change. diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 8109f63..d871dcd 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -570,6 +570,20 @@ emit_vertex_input(struct tgsi_to_qir *trans, int attr) } static void +emit_fragcoord_input(struct tgsi_to_qir *trans, int attr) +{ + struct qcompile *c = trans->c; + + trans->inputs[attr * 4 + 0] = qir_FRAG_X(c); + trans->inputs[attr * 4 + 1] = qir_FRAG_Y(c); + trans->inputs[attr * 4 + 2] = + qir_FMUL(c, + qir_FRAG_Z(c), + qir_uniform_f(trans, 1.0 / 0xffffff)); + trans->inputs[attr * 4 + 3] = qir_FRAG_RCP_W(c); +} + +static void emit_fragment_input(struct tgsi_to_qir *trans, int attr) { struct qcompile *c = trans->c; @@ -599,7 +613,12 @@ emit_tgsi_declaration(struct tgsi_to_qir *trans, i <= decl->Range.Last; i++) { if (c->stage == QSTAGE_FRAG) { - emit_fragment_input(trans, i); + if (decl->Semantic.Name == + TGSI_SEMANTIC_POSITION) { + emit_fragcoord_input(trans, i); + } else { + emit_fragment_input(trans, i); + } } else { emit_vertex_input(trans, i); } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 9462da5..6509a2b 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -65,6 +65,11 @@ static const struct qir_op_info qir_op_info[] = { [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true }, [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 }, + [QOP_FRAG_X] = { "frag_x", 1, 0 }, + [QOP_FRAG_Y] = { "frag_y", 1, 0 }, + [QOP_FRAG_Z] = { "frag_z", 1, 0 }, + [QOP_FRAG_RCP_W] = { "frag_rcp_w", 1, 0 }, + [QOP_TEX_S] = { "tex_s", 0, 2 }, [QOP_TEX_T] = { "tex_t", 0, 2 }, [QOP_TEX_R] = { "tex_r", 0, 2 }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 5d1f088..7d98062 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -74,6 +74,11 @@ enum qop { QOP_TLB_COLOR_WRITE, QOP_VARY_ADD_C, + QOP_FRAG_X, + QOP_FRAG_Y, + QOP_FRAG_Z, + QOP_FRAG_RCP_W, + /** Texture x coordinate parameter write */ QOP_TEX_S, /** Texture y coordinate parameter write */ @@ -204,6 +209,15 @@ bool qir_opt_algebraic(struct qcompile *c); bool qir_opt_copy_propagation(struct qcompile *c); bool qir_opt_dead_code(struct qcompile *c); +#define QIR_ALU0(name) \ +static inline struct qreg \ +qir_##name(struct qcompile *c) \ +{ \ + struct qreg t = qir_get_temp(c); \ + qir_emit(c, qir_inst(QOP_##name, t, c->undef, c->undef)); \ + return t; \ +} + #define QIR_ALU1(name) \ static inline struct qreg \ qir_##name(struct qcompile *c, struct qreg a) \ @@ -257,6 +271,10 @@ QIR_NODST_2(TEX_S) QIR_NODST_2(TEX_T) QIR_NODST_2(TEX_R) QIR_NODST_2(TEX_B) +QIR_ALU0(FRAG_X) +QIR_ALU0(FRAG_Y) +QIR_ALU0(FRAG_Z) +QIR_ALU0(FRAG_RCP_W) static inline struct qreg qir_CMP(struct qcompile *c, struct qreg cmp, struct qreg a, struct qreg b) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 33abf6d..63f37dd 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -213,7 +213,8 @@ vc4_generate_code(struct qcompile *c) if (qinst->src[i].file == QFILE_TEMP) reg_uses_remaining[qinst->src[i].index]++; } - if (qinst->op == QOP_TLB_PASSTHROUGH_Z_WRITE) + if (qinst->op == QOP_TLB_PASSTHROUGH_Z_WRITE || + qinst->op == QOP_FRAG_Z) reg_in_use[3 + 32 + QPU_R_FRAG_PAYLOAD_ZW] = true; } @@ -460,6 +461,33 @@ vc4_generate_code(struct qcompile *c) break; + case QOP_FRAG_X: + queue(c, qpu_inst(qpu_a_ITOF(dst, + qpu_ra(QPU_R_XY_PIXEL_COORD)), + qpu_m_NOP())); + break; + + case QOP_FRAG_Y: + queue(c, qpu_inst(qpu_a_ITOF(dst, + qpu_rb(QPU_R_XY_PIXEL_COORD)), + qpu_m_NOP())); + break; + + case QOP_FRAG_Z: + queue(c, qpu_inst(qpu_a_ITOF(dst, + qpu_rb(QPU_R_FRAG_PAYLOAD_ZW)), + qpu_m_NOP())); + break; + + case QOP_FRAG_RCP_W: + queue(c, qpu_inst(qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP), + qpu_ra(QPU_R_FRAG_PAYLOAD_ZW)), + qpu_m_NOP())); + + queue(c, qpu_inst(qpu_a_MOV(dst, qpu_r4()), + qpu_m_NOP())); + break; + case QOP_TLB_PASSTHROUGH_Z_WRITE: queue(c, qpu_inst(qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), qpu_rb(QPU_R_FRAG_PAYLOAD_ZW)), diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 940596e..4ddc9ac 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -102,6 +102,10 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_VIEWPORTS: return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + /* Unsupported features. */ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: case PIPE_CAP_ANISOTROPIC_FILTER: @@ -111,8 +115,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_TGSI_INSTANCEID: commit d34fbdda120b2b0e0a33a4a18fa112e58f8a2f4a Author: Eric Anholt <e...@anholt.net> Date: Tue Aug 5 14:23:40 2014 -0700 vc4: Refactor shader input setup again. This makes some space for handling special inputs like fragcoords. diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 8252630..8109f63 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -570,6 +570,24 @@ emit_vertex_input(struct tgsi_to_qir *trans, int attr) } static void +emit_fragment_input(struct tgsi_to_qir *trans, int attr) +{ + struct qcompile *c = trans->c; + + for (int i = 0; i < 4; i++) { + struct qreg vary = { + QFILE_VARY, + attr * 4 + i + }; + + /* XXX: multiply by W */ + trans->inputs[attr * 4 + i] = + qir_VARY_ADD_C(c, qir_MOV(c, vary)); + c->num_inputs++; + } +} + +static void emit_tgsi_declaration(struct tgsi_to_qir *trans, struct tgsi_full_declaration *decl) { @@ -577,23 +595,12 @@ emit_tgsi_declaration(struct tgsi_to_qir *trans, switch (decl->Declaration.File) { case TGSI_FILE_INPUT: - if (c->stage == QSTAGE_FRAG) { - for (int i = decl->Range.First * 4; - i < (decl->Range.Last + 1) * 4; - i++) { - struct qreg vary = { - QFILE_VARY, - i - }; - trans->inputs[i] = - qir_VARY_ADD_C(c, qir_MOV(c, vary)); - - c->num_inputs++; - } - } else { - for (int i = decl->Range.First; - i <= decl->Range.Last; - i++) { + for (int i = decl->Range.First; + i <= decl->Range.Last; + i++) { + if (c->stage == QSTAGE_FRAG) { + emit_fragment_input(trans, i); + } else { emit_vertex_input(trans, i); } } commit a7faca5d2716c5f87f228c6f82eaf10373154852 Author: Eric Anholt <e...@anholt.net> Date: Tue Aug 5 11:00:51 2014 -0700 vc4: Clean up the tile alloc buffer size. This prevents some simulator assertion failures, but it does mean (since I've dropped the "* 16" padding) that on real hardware you need a kernel that does overflow memory management (currently, "drm/vc4: Add support for binner overflow memory allocation." in my kernel tree). diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 2fb57aa..ec218d3 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -45,7 +45,15 @@ vc4_start_draw(struct vc4_context *vc4) uint32_t tilew = align(width, 64) / 64; uint32_t tileh = align(height, 64) / 64; - uint32_t tile_alloc_size = 32 * tilew * tileh * 16; + /* Tile alloc memory setup: We use an initial alloc size of 32b. The + * hardware then aligns that to 256b (we use 4096, because all of our + * BO allocations align to that anyway), then for some reason the + * simulator wants an extra page available, even if you have overflow + * memory set up. + */ + uint32_t tile_alloc_size = 32 * tilew * tileh; + tile_alloc_size = align(tile_alloc_size, 4096); + tile_alloc_size += 4096; uint32_t tile_state_size = 48 * tilew * tileh; if (!vc4->tile_alloc || vc4->tile_alloc->size < tile_alloc_size) { vc4_bo_unreference(&vc4->tile_alloc); commit 7050ab510decce2606ffcd0298b3c7fb13a1401e Author: Eric Anholt <e...@anholt.net> Date: Tue Aug 5 11:00:08 2014 -0700 vc4: Clarify some values implicitly chosen for binning config. These #defines are 0, but it should help make math above make more sense. diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index a76880c..2fb57aa 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -67,7 +67,10 @@ vc4_start_draw(struct vc4_context *vc4) cl_reloc(vc4, &vc4->bcl, vc4->tile_state, 0); cl_u8(&vc4->bcl, tilew); cl_u8(&vc4->bcl, tileh); - cl_u8(&vc4->bcl, VC4_BIN_CONFIG_AUTO_INIT_TSDA); + cl_u8(&vc4->bcl, + VC4_BIN_CONFIG_AUTO_INIT_TSDA | + VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 | + VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32); cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING); commit ed5cb5d7d5d3d3cfeafcf67b2020044da9825abe Author: Eric Anholt <e...@anholt.net> Date: Tue Aug 5 10:54:56 2014 -0700 vc4: Improve simulator memory allocation. This should reduce a bunch of spurious failures in sim. diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index fc3d554..827d617 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -32,6 +32,8 @@ #include "vc4_simulator_validate.h" #include "simpenrose/simpenrose.h" +#define OVERFLOW_SIZE (32 * 1024 * 1024) + static struct drm_gem_cma_object * vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo) { @@ -234,7 +236,7 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args, struct exec_info exec; struct drm_device local_dev = { .vc4 = vc4, - .simulator_mem_next = 0, + .simulator_mem_next = OVERFLOW_SIZE, }; struct drm_device *dev = &local_dev; int ret; @@ -290,9 +292,23 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args, void vc4_simulator_init(struct vc4_screen *screen) { - simpenrose_init_hardware(); - screen->simulator_mem_base = simpenrose_get_mem_start(); - screen->simulator_mem_size = simpenrose_get_mem_size(); + screen->simulator_mem_size = 256 * 1024 * 1024; + screen->simulator_mem_base = malloc(screen->simulator_mem_size); + + /* We supply our own memory so that we can have more aperture + * available (256MB instead of simpenrose's default 64MB). + */ + simpenrose_init_hardware_supply_mem(screen->simulator_mem_base, + screen->simulator_mem_size); + + /* Carve out low memory for tile allocation overflow. The kernel + * should be automatically handling overflow memory setup on real + * hardware, but for simulation we just get one shot to set up enough + * overflow memory before execution. This overflow mem will be used + * up over the whole lifetime of simpenrose (not reused on each + * flush), so it had better be big. + */ + simpenrose_supply_overflow_mem(0, OVERFLOW_SIZE); } #endif /* USE_VC4_SIMULATOR */ commit f5f8dd29c30e727c10ff087321d87c40c064de04 Author: Eric Anholt <e...@anholt.net> Date: Mon Aug 4 18:30:33 2014 -0700 vc4: Handle stride==0 in VBO validation diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.c b/src/gallium/drivers/vc4/vc4_simulator_validate.c index 421107a..038de0f 100644 --- a/src/gallium/drivers/vc4/vc4_simulator_validate.c +++ b/src/gallium/drivers/vc4/vc4_simulator_validate.c @@ -942,18 +942,20 @@ validate_shader_rec(struct drm_device *dev, stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff; if (vbo->base.size < offset || - vbo->base.size - offset < attr_size || - stride == 0) { + vbo->base.size - offset < attr_size) { DRM_ERROR("BO offset overflow (%d + %d > %d)\n", offset, attr_size, vbo->base.size); return -EINVAL; } - max_index = (vbo->base.size - offset - attr_size) / stride; - if (state->max_index > max_index) { - DRM_ERROR("primitives use index %d out of supplied %d\n", - state->max_index, max_index); - return -EINVAL; + if (stride != 0) { + max_index = ((vbo->base.size - offset - attr_size) / + stride); + if (state->max_index > max_index) { + DRM_ERROR("primitives use index %d out of supplied %d\n", + state->max_index, max_index); + return -EINVAL; + } } *(uint32_t *)(pkt_v + o) = vbo->paddr + offset; commit 0f034055f96b9dd7b1c54e8fa5422d22c26f2269 Author: Eric Anholt <e...@anholt.net> Date: Mon Aug 4 16:38:07 2014 -0700 vc4: Stash some debug code for looking at what BOs are at what hindex. When you're debugging validation, it's nice to know what the BOs are for. diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index faec853..a87cdfa 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -106,6 +106,7 @@ vc4_bo_open_name(struct vc4_screen *screen, uint32_t name, bo->screen = screen; bo->handle = o.handle; bo->size = o.size; + bo->name = "winsys"; #ifdef USE_VC4_SIMULATOR vc4_bo_map(bo); diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 8038fee5..fc3d554 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -75,6 +75,10 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec) struct vc4_bo *bo = bos[i]; struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo); +#if 0 + fprintf(stderr, "bo hindex %d: %s\n", i, bo->name); +#endif + vc4_bo_map(bo); memcpy(obj->vaddr, bo->map, bo->size); commit 8ebfa8fdb27bb5efaeda4fe567622d5de4779342 Author: Eric Anholt <e...@anholt.net> Date: Mon Aug 4 13:01:29 2014 -0700 vc4: Use GEM under simulation even for non-winsys BOs. In addition to reducing sim-specific code, it also avoids our local handle allocation conflicting with the host GEM's handle numbering, which was causing vc4_gem_hindex() to not distinguish between winsys BOs and the same-numbered non-winsys bo. diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 581ba89..faec853 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -45,7 +45,6 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name) bo->size = size; bo->name = name; -#ifndef USE_VC4_SIMULATOR struct drm_mode_create_dumb create; memset(&create, 0, sizeof(create)); @@ -59,12 +58,6 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name) bo->handle = create.handle; assert(create.size >= size); -#else /* USE_VC4_SIMULATOR */ - static int next_handle = 0; - bo->handle = next_handle++; - - bo->map = malloc(size); -#endif /* USE_VC4_SIMULATOR */ return bo; } @@ -72,20 +65,23 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name) void vc4_bo_free(struct vc4_bo *bo) { -#ifndef USE_VC4_SIMULATOR struct vc4_screen *screen = bo->screen; - if (bo->map) + if (bo->map) { +#ifdef USE_VC4_SIMULATOR + if (bo->simulator_winsys_map) { + free(bo->map); + bo->map = bo->simulator_winsys_map; + } +#endif munmap(bo->map, bo->size); + } struct drm_gem_close c; c.handle = bo->handle; int ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c); if (ret != 0) fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno)); -#else - free(bo->map); -#endif free(bo); } @@ -137,7 +133,6 @@ vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data, uint32_t size, bool vc4_bo_flink(struct vc4_bo *bo, uint32_t *name) { -#ifndef USE_VC4_SIMULATOR struct drm_gem_flink flink = { .handle = bo->handle, }; @@ -150,7 +145,6 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name) } *name = flink.name; -#endif /* USE_VC4_SIMULATOR */ return true; } diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 88eda4f..8038fee5 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -75,6 +75,7 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec) struct vc4_bo *bo = bos[i]; struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo); + vc4_bo_map(bo); memcpy(obj->vaddr, bo->map, bo->size); exec->bo[i].bo = obj; commit cdc208bdaf90017c2e1aaa54d2318b956e801ca0 Author: Eric Anholt <e...@anholt.net> Date: Mon Aug 4 13:00:56 2014 -0700 vc4: Don't forget to unmap the GEM BO when freeing. Otherwise it'll stick around forever. diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 653787e..581ba89 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -75,6 +75,9 @@ vc4_bo_free(struct vc4_bo *bo) #ifndef USE_VC4_SIMULATOR struct vc4_screen *screen = bo->screen; + if (bo->map) + munmap(bo->map, bo->size); + struct drm_gem_close c; c.handle = bo->handle; int ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c); commit d2cc7f97df655bbca6486fbe81e35139215d7b72 Author: Eric Anholt <e...@anholt.net> Date: Sat Aug 2 21:28:34 2014 -0700 vc4: Add validation of raster-format textures. ... and reject everything else, for now. v2: Rebase on v2 of the rendering config validation change. diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.c b/src/gallium/drivers/vc4/vc4_simulator_validate.c index 241ca17..421107a 100644 --- a/src/gallium/drivers/vc4/vc4_simulator_validate.c +++ b/src/gallium/drivers/vc4/vc4_simulator_validate.c @@ -101,8 +101,9 @@ gl_shader_rec_size(uint32_t pointer_bits) } static bool -check_fbo_size(struct exec_info *exec, struct drm_gem_cma_object *fbo, - uint32_t offset, uint8_t tiling_format, uint8_t cpp) +check_tex_size(struct exec_info *exec, struct drm_gem_cma_object *fbo, + uint32_t offset, uint8_t tiling_format, + uint32_t width, uint32_t height, uint8_t cpp) { uint32_t width_align, height_align; uint32_t aligned_row_len, aligned_h, size; @@ -125,14 +126,14 @@ check_fbo_size(struct exec_info *exec, struct drm_gem_cma_object *fbo, return false; } - /* The values are limited by the packet bitfields, so we don't need to - * worry as much about integer overflow. + /* The values are limited by the packet/texture parameter bitfields, + * so we don't need to worry as much about integer overflow. */ - BUG_ON(exec->fb_width > 65535); - BUG_ON(exec->fb_height > 65535); + BUG_ON(width > 65535); + BUG_ON(height > 65535); - aligned_row_len = roundup(exec->fb_width * cpp, width_align); - aligned_h = roundup(exec->fb_height, height_align); + aligned_row_len = roundup(width * cpp, width_align); + aligned_h = roundup(height, height_align); if (INT_MAX / aligned_row_len < aligned_h) { DRM_ERROR("Overflow in fbo size (%d * %d)\n", @@ -144,8 +145,7 @@ check_fbo_size(struct exec_info *exec, struct drm_gem_cma_object *fbo, if (size + offset < size || size + offset > fbo->base.size) { DRM_ERROR("Overflow in %dx%d fbo size (%d + %d > %d)\n", - exec->fb_width, exec->fb_height, size, offset, - fbo->base.size); + width, height, size, offset, fbo->base.size); return false; } @@ -247,11 +247,11 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS) offset = *(uint32_t *)(untrusted + 2); - if (!check_fbo_size(exec, fbo, offset, + if (!check_tex_size(exec, fbo, offset, ((packet_b0 & VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK) >> VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT), - cpp)) { + exec->fb_width, exec->fb_height, cpp)) { return -EINVAL; } @@ -499,11 +499,11 @@ validate_tile_rendering_mode_config(VALIDATE_ARGS) } offset = *(uint32_t *)untrusted; - if (!check_fbo_size(exec, fbo, offset, + if (!check_tex_size(exec, fbo, offset, ((flags & VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK) >> VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT), - cpp)) { + exec->fb_width, exec->fb_height, cpp)) { return -EINVAL; } @@ -699,14 +699,91 @@ reloc_tex(struct exec_info *exec, { struct drm_gem_cma_object *tex; - uint32_t unvalidated_p0 = *(uint32_t *)(uniform_data_u + - sample->p_offset[0]); + uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]); + uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]); uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0]; + uint32_t offset = p0 & ~0xfff; + uint32_t miplevels = (p0 & 0x15); + uint32_t width = (p1 >> 8) & 2047; + uint32_t height = (p1 >> 20) & 2047; + uint32_t type, cpp, tiling_format; + int i; + + if (width == 0) + width = 2048; + if (height == 0) + height = 2048; + + if (p0 & (1 << 9)) { + DRM_ERROR("Cube maps unsupported\n"); + return false; + } + + type = ((p0 >> 4) & 15) | ((p1 >> 31) << 4); + + switch (type) { + case 0: /* RGBA8888 */ + case 1: /* RGBX8888 */ + case 16: /* RGBA32R */ + cpp = 4; + break; + case 2: /* RGBA4444 */ + case 3: /* RGBA5551 */ + case 4: /* RGB565 */ + case 7: /* LUMALPHA */ + case 9: /* S16F */ + case 11: /* S16 */ + cpp = 2; -- To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: https://lists.debian.org/e1xh9ut-0006hv...@moszumanska.debian.org