--- Here's an idea for an SNB performance improvement from the specs. It says that on GT2 you should be able to use 80 threads if "WIZ Hashing Disable in GT_MODE register enabled". On my system (supposedly GT2), that bit (bit 6 of 0x20d0) is unset. In testing, with intel_reg_write 0x20d0 0x00400040 (it only successfully took once, I suspect due to FORCEWAKE, which also means that I can't necessarily trust that the bit was unset originally), I got only hangs from 3D.
src/mesa/drivers/dri/i965/brw_context.c | 6 +++++- src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +- src/mesa/drivers/dri/intel/intel_chipset.h | 10 ++++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 9483ec6..3555e44 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -184,7 +184,11 @@ GLboolean brwCreateContext( int api, if (intel->gen >= 6) { brw->urb.size = 1024; brw->vs_max_threads = 60; - brw->wm_max_threads = 80; + if (IS_GT2(intel->intelScreen->deviceID)) { + brw->wm_max_threads = 80; + } else { + brw->wm_max_threads = 40; + } } else if (intel->gen == 5) { brw->urb.size = 1024; brw->vs_max_threads = 72; diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 78901ec..b91e1c1 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -144,7 +144,7 @@ upload_wm_state(struct brw_context *brw) dw4 |= (brw->wm.prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); - dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; + dw5 |= (brw->wm.max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->dispatch_width == 8) diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h index 4ff9140..f7dcf47 100644 --- a/src/mesa/drivers/dri/intel/intel_chipset.h +++ b/src/mesa/drivers/dri/intel/intel_chipset.h @@ -125,12 +125,14 @@ /* Compat macro for intel_decode.c */ #define IS_IRONLAKE(devid) IS_GEN5(devid) -#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ - devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ +#define IS_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ - devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ - devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS) + +#define IS_GEN6(devid) (IS_GT2(devid) || \ + devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ devid == PCI_CHIP_SANDYBRIDGE_S) #define IS_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ -- 1.7.4.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx