The previous calculation neglected to multiply by nr_vs_entries, instead comparing whether twice the size of one VS URB entry was bigger than the entire URB space. Furthermore, it neglected to take into account that vs_size is in units of 128 byte blocks, while urb_size is in bytes.
Despite the above problems, the calculations resulted in an acceptable programming of the URB in most cases. --- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/gen6_urb.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 13 deletions(-) I tested this patch with: piglit-run.py -t glsl -x glsl-max-varyings tests/quick.tests results and also ran the Khronos ES2 Conformance suite, and found no regressions. diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d63e8a4..6be7188 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -560,7 +560,7 @@ struct brw_context * a number of 1024-bit (128-byte) units. Should be >= 1. */ GLuint vs_size; -/* GLuint gs_size; */ + GLuint gs_size; GLuint vs_start; GLuint gs_start; diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index 38382f1..c9abc36 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -35,6 +35,7 @@ static void prepare_urb( struct brw_context *brw ) { int urb_size; /* total size of the URB, in bytes */ + int nr_vs_entries; int max_urb_entry; struct intel_context *intel = &brw->intel; @@ -49,23 +50,28 @@ prepare_urb( struct brw_context *brw ) max_urb_entry = 256; } + /* CACHE_NEW_VS_PROG */ + brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); + + /* Calculate how many VS URB entries fit in the total URB size */ + nr_vs_entries = urb_size / (brw->urb.vs_size * 128); + /* According to volume 2a, nr_vs_entries must be a multiple of 4 in the range * [24, 256] on GT2 and [24, 128] on GT1. */ - brw->urb.nr_vs_entries = max_urb_entry; + if (nr_vs_entries > max_urb_entry) + nr_vs_entries = max_urb_entry; + + brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4); /* According to volume 2a, nr_gs_entries must be a multiple of 4 in the range - * [0, 256] on GT2 and [0, 254] on GT1. This seems unlikely and is probably a typo. + * [0, 256] on GT2 and [0, 254] on GT1. We think 254 is likely a typo. + * + * Since we currently don't support Geometry Shaders, we always put the GS unit + * in passthrough mode and don't need to give it any URB space. */ - brw->urb.nr_gs_entries = max_urb_entry; - - /* CACHE_NEW_VS_PROG */ - brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); - - /* Try to divide the URB space in two. This code is currently incorrect. */ - if (2 * brw->urb.vs_size > urb_size) - brw->urb.nr_vs_entries = brw->urb.nr_gs_entries = - (urb_size ) / (2 * brw->urb.vs_size); + brw->urb.nr_gs_entries = 0; + brw->urb.gs_size = 1; /* Incorrect, but with 0 GS entries it doesn't matter. */ } static void @@ -73,6 +79,7 @@ upload_urb(struct brw_context *brw) { struct intel_context *intel = &brw->intel; + assert(brw->urb.nr_vs_entries >= 24); assert(brw->urb.nr_vs_entries % 4 == 0); assert(brw->urb.nr_gs_entries % 4 == 0); /* GS requirement */ @@ -82,7 +89,7 @@ upload_urb(struct brw_context *brw) OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); - OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | + OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT)); ADVANCE_BATCH(); } -- 1.7.4.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev