Avoid frequent use of reralloc() for tracking the conflicts list, and walking that list every time we add a transitive conflict, by making the observation we apply the indirect conflicts by combining the conflicts of a conflicting register in a second pass.
Reduces brw_compiler_create() from 18351.5us to 4787.1us on my ivb i7-3720QM (in context that 18ms represents about 50% of the time it takes to start X, though why X instantiates an intel_screen at all remains a mystery). Signed-off-by: Chris Wilson <[email protected]> Cc: Matt Turner <[email protected]> Cc: Jason Ekstrand <[email protected]> Cc: Martin Peres <[email protected] --- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 18 +++++++- .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 16 ++++++- src/util/register_allocate.c | 53 +++++++++++++--------- src/util/register_allocate.h | 2 + 4 files changed, 64 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 8e5621d..7f87221 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -223,7 +223,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) for (int base_reg = j; base_reg < j + (class_sizes[i] + 1) / 2; base_reg++) { - ra_add_transitive_reg_conflict(regs, base_reg, reg); + ra_mark_transitive_reg_conflict(regs, base_reg, reg); } reg++; @@ -237,7 +237,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(regs, base_reg, reg); + ra_mark_transitive_reg_conflict(regs, base_reg, reg); } reg++; @@ -246,6 +246,20 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) } assert(reg == ra_reg_count); + reg = 0; + for (int i = 0; i < class_count; i++) { + int class_size = class_sizes[i]; + int class_reg_count = base_reg_count - (class_size - 1); + if (devinfo->gen <= 5 && reg_width == 2) + class_size = (class_size + 1) / 2; + for (int j = 0; j < class_reg_count; j++) { + for (int base_reg = j; base_reg < j + class_size; base_reg++) + ra_add_transitive_reg_conflict(regs, base_reg, reg); + reg++; + } + } + assert(reg == ra_reg_count); + /* Add a special class for aligned pairs, which we'll put delta_xy * in on Gen <= 6 so that we can do PLN. */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 555c42e..93b7297 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -140,7 +140,7 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); + ra_mark_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); } reg++; @@ -158,6 +158,20 @@ brw_vec4_alloc_reg_set(struct brw_compiler *compiler) } assert(reg == ra_reg_count); + reg = 0; + for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); + for (int j = 0; j < class_reg_count; j++) { + for (int base_reg = j; + base_reg < j + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); + } + reg++; + } + } + assert(reg == ra_reg_count); + ra_set_finalize(compiler->vec4_reg_set.regs, q_values); for (int i = 0; i < MAX_VGRF_SIZE; i++) diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c index f5f7c04..2bbab7f 100644 --- a/src/util/register_allocate.c +++ b/src/util/register_allocate.c @@ -83,19 +83,17 @@ struct ra_reg { BITSET_WORD *conflicts; - unsigned int *conflict_list; - unsigned int conflict_list_size; - unsigned int num_conflicts; + unsigned int conflict_range[2]; }; struct ra_regs { struct ra_reg *regs; - unsigned int count; struct ra_class **classes; unsigned int class_count; bool round_robin; + unsigned int count; }; struct ra_class { @@ -200,11 +198,8 @@ ra_alloc_reg_set(void *mem_ctx, unsigned int count) conflicts += bitset_count; BITSET_SET(regs->regs[i].conflicts, i); - - regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4); - regs->regs[i].conflict_list_size = 4; - regs->regs[i].conflict_list[0] = i; - regs->regs[i].num_conflicts = 1; + regs->regs[i].conflict_range[0] = i; + regs->regs[i].conflict_range[1] = i; } return regs; @@ -231,13 +226,11 @@ ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) { struct ra_reg *reg1 = ®s->regs[r1]; - if (reg1->conflict_list_size == reg1->num_conflicts) { - reg1->conflict_list_size *= 2; - reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, - unsigned int, reg1->conflict_list_size); - } - reg1->conflict_list[reg1->num_conflicts++] = r2; BITSET_SET(reg1->conflicts, r2); + if (r2 < reg1->conflict_range[0]) + reg1->conflict_range[0] = r2; + else if (r2 > reg1->conflict_range[1]) + reg1->conflict_range[1] = r2; } void @@ -261,13 +254,27 @@ void ra_add_transitive_reg_conflict(struct ra_regs *regs, unsigned int base_reg, unsigned int reg) { + struct ra_reg *b = ®s->regs[base_reg]; + struct ra_reg *r = ®s->regs[reg]; unsigned int i; - ra_add_reg_conflict(regs, reg, base_reg); + if (b->conflict_range[0] < r->conflict_range[0]) + r->conflict_range[0] = b->conflict_range[0]; - for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) { - ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]); - } + if (b->conflict_range[1] > r->conflict_range[1]) + r->conflict_range[1] = b->conflict_range[1]; + + for (i = BITSET_BITWORD(b->conflict_range[0]); + i <= BITSET_BITWORD(b->conflict_range[1]); + i++) + r->conflicts[i] |= b->conflicts[i]; +} + +void +ra_mark_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg) +{ + ra_add_conflict_list(regs, base_reg, reg); } unsigned int @@ -343,9 +350,11 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values) if (!reg_belongs_to_class(rc, regs->classes[c])) continue; - for (i = 0; i < regs->regs[rc].num_conflicts; i++) { - unsigned int rb = regs->regs[rc].conflict_list[i]; - if (reg_belongs_to_class(rb, regs->classes[b])) + for (i = regs->regs[rc].conflict_range[0]; + i <= regs->regs[rc].conflict_range[1]; + i++) { + if (BITSET_TEST(regs->regs[rc].conflicts, i) && + reg_belongs_to_class(i, regs->classes[b])) conflicts++; } max_conflicts = MAX2(max_conflicts, conflicts); diff --git a/src/util/register_allocate.h b/src/util/register_allocate.h index 61f182e..1ceea79 100644 --- a/src/util/register_allocate.h +++ b/src/util/register_allocate.h @@ -51,6 +51,8 @@ void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2); void ra_add_transitive_reg_conflict(struct ra_regs *regs, unsigned int base_reg, unsigned int reg); +void ra_mark_transitive_reg_conflict(struct ra_regs *regs, + unsigned int base_reg, unsigned int reg); void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg); void ra_set_num_conflicts(struct ra_regs *regs, unsigned int class_a, unsigned int class_b, unsigned int num_conflicts); -- 2.1.4 _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
