Just looking at the channel enables is not sufficient, at least not on Sky Lake. Channels that are disabled by the sample_mask may show up in the channel enable register as being enabled even if they are not executing. This can cause FIND_LIVE_CHANNEL to return a channel that isn't actually executing. In our handling of interpolateAtSample we do a clever trick with emit_uniformize to call the interpolator once for each unique sample id. Thanks to FIND_LIVE_CHANNEL returning a dead channel, we can get an infinite loop which hangs the GPU.
Signed-off-by: Jason Ekstrand <ja...@jlekstrand.net> --- src/mesa/drivers/dri/i965/brw_eu.h | 3 ++- src/mesa/drivers/dri/i965/brw_eu_emit.c | 22 +++++++++++++++------- src/mesa/drivers/dri/i965/brw_fs_builder.h | 3 ++- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +- 5 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 3e52764..9aaab78 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -488,7 +488,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p, void brw_find_live_channel(struct brw_codegen *p, - struct brw_reg dst); + struct brw_reg dst, + struct brw_reg sample_mask); void brw_broadcast(struct brw_codegen *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 3b12030..f593a8d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3361,7 +3361,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p, } void -brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) +brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, + struct brw_reg sample_mask) { const struct gen_device_info *devinfo = p->devinfo; const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current); @@ -3377,13 +3378,20 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) if (devinfo->gen >= 8) { /* Getting the first active channel index is easy on Gen8: Just find - * the first bit set in the mask register. The same register exists - * on HSW already but it reads back as all ones when the current - * instruction has execution masking disabled, so it's kind of - * useless. + * the first bit set in the mask register AND the sample mask. The + * same register exists on HSW already but it reads back as all ones + * when the current instruction has execution masking disabled, so + * it's kind of useless. */ - inst = brw_FBL(p, vec1(dst), - retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg mask_reg = retype(brw_mask_reg(0), + BRW_REGISTER_TYPE_UD); + if (sample_mask.file != BRW_IMMEDIATE_VALUE || + sample_mask.ud != 0xffffffff) { + brw_AND(p, vec1(dst), mask_reg, sample_mask); + mask_reg = vec1(dst); + } + + inst = brw_FBL(p, vec1(dst), mask_reg); /* Quarter control has the effect of magically shifting the value of * this register so you'll get the first active channel relative to diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index 483672f..45b5f88 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -407,7 +407,8 @@ namespace brw { const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); const dst_reg dst = vgrf(src.type); - ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index, + sample_mask_reg()); ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0)); return src_reg(component(dst, 0)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 2f4ba7b..d923b0b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2041,7 +2041,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case SHADER_OPCODE_FIND_LIVE_CHANNEL: - brw_find_live_channel(p, dst); + brw_find_live_channel(p, dst, src[0]); break; case SHADER_OPCODE_BROADCAST: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 256abae..63fca6f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1863,7 +1863,7 @@ generate_code(struct brw_codegen *p, break; case SHADER_OPCODE_FIND_LIVE_CHANNEL: - brw_find_live_channel(p, dst); + brw_find_live_channel(p, dst, brw_null_reg()); break; case SHADER_OPCODE_BROADCAST: -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev