This seems to be a much larger win than I expected. I wrote the following little benchmark:
#include <EGL/egl.h> int main(int argc, char *argv) { EGLDisplay display = eglGetDisplay(EGL_DEFAULT_DISPLAY); for (unsigned i = 0; i < 1000; i++) { eglInitialize(display, NULL, NULL); eglTerminate(display); } } This basically just triggers 1000 screen initializations so you can time it. With mesa built in debug mode, this patch takes it from 27.78s to 9.65s, a change of around 65%! With mesa built in release mode, it goes from 13.12s to 4.54s (again 65%). This is without getting rid of the (now unneeded) conflict list allocations. On Sat, Aug 15, 2015 at 11:31 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote: > Instead of adding transitive conflicts as we go, we now add regular > conflicts and them make them all transitive at the end. This should be > more efficient. > --- > src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 10 ++++++++-- > src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 5 ++++- > 2 files changed, 12 insertions(+), 3 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > index b70895e..1d39858 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp > @@ -232,7 +232,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int > dispatch_width) > for (int base_reg = j; > base_reg < j + (class_sizes[i] + 1) / 2; > base_reg++) { > - ra_add_transitive_reg_conflict(regs, base_reg, reg); > + ra_add_reg_conflict(regs, base_reg, reg); > } > > reg++; > @@ -246,7 +246,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int > dispatch_width) > for (int base_reg = j; > base_reg < j + class_sizes[i]; > base_reg++) { > - ra_add_transitive_reg_conflict(regs, base_reg, reg); > + ra_add_reg_conflict(regs, base_reg, reg); > } > > reg++; > @@ -255,6 +255,12 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int > dispatch_width) > } > assert(reg == ra_reg_count); > > + /* Applying transitivity to all of the base registers gives us the > + * appropreate register conflict relationships everywhere. > + */ > + for (int reg = 0; reg < base_reg_count; reg++) > + ra_make_reg_conflicts_transitive(regs, reg); > + > /* Add a special class for aligned pairs, which we'll put delta_xy > * in on Gen <= 6 so that we can do PLN. > */ > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp > index 617c988..d7b6ad5 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp > @@ -140,7 +140,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) > for (int base_reg = j; > base_reg < j + class_sizes[i]; > base_reg++) { > - ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, > base_reg, reg); > + ra_add_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); > } > > reg++; > @@ -158,6 +158,9 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) > } > assert(reg == ra_reg_count); > > + for (int reg = 0; reg < base_reg_count; reg++) > + ra_make_reg_conflicts_transitive(compiler->vec4_reg_set.regs, reg); > + > ra_set_finalize(compiler->vec4_reg_set.regs, q_values); > > for (int i = 0; i < MAX_VGRF_SIZE; i++) > -- > 2.4.3 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev