--- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 2 + src/mesa/drivers/dri/i965/brw_fs.h | 1 + .../drivers/dri/i965/brw_fs_combine_constants.cpp | 222 +++++++++++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 26bf458..8dc89f4 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -54,6 +54,7 @@ i965_FILES = \ brw_ff_gs_emit.c \ brw_fs.cpp \ brw_fs_channel_expressions.cpp \ + brw_fs_combine_constants.cpp \ brw_fs_copy_propagation.cpp \ brw_fs_cse.cpp \ brw_fs_dead_code_eliminate.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b594331..d763edf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3606,6 +3606,8 @@ fs_visitor::run() dead_code_eliminate(); } + opt_combine_constants(); + lower_uniform_pull_constant_loads(); assign_curb_setup(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2fa0949..f9cb99f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -454,6 +454,7 @@ public: void no16(const char *msg, ...); void lower_uniform_pull_constant_loads(); bool lower_load_payload(); + void opt_combine_constants(); void push_force_uncompressed(); void pop_force_uncompressed(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp new file mode 100644 index 0000000..eef6bb8 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -0,0 +1,222 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file brw_fs_combine_constants.cpp + * + * This file contains the opt_combine_constants() pass that runs after the + * regular optimization loop. It passes over the instruction list and + * selectively promotes immediate values to registers by emitting a mov(1) + * instruction. + * + * This is useful on Gen 7 particularly, because a few instructions can be + * coissued (i.e., issued in the same cycle as another thread on the same EU + * issues an instruction) under some circumstances, one of which is that they + * cannot use immediate values. + */ + +#include "brw_fs.h" +#include "brw_fs_live_variables.h" +#include "brw_cfg.h" + +static bool +could_coissue(const fs_inst *inst) +{ + /* MAD can coissue, but while the PRM lists various restrictions for Align1 + * instructions related to data alignment and regioning, it doesn't list + * similar restrictions for Align16 instructions. I don't expect that there + * are any restrictions, since Align16 doesn't allow the kinds of operations + * that are restricted in Align1 mode. + * + * Since MAD is an Align16 instruction we assume it can always coissue. + */ + switch (inst->opcode) { + case BRW_OPCODE_MOV: + case BRW_OPCODE_CMP: + case BRW_OPCODE_ADD: + case BRW_OPCODE_MUL: + return true; + default: + return false; + } +} + +struct imm { + bblock_t *block; + float val; + + uint8_t subreg_offset; + uint16_t reg; + + uint16_t uses_by_coissue; +}; + +struct table { + struct imm *imm; + int size; + int len; +}; + +static struct imm * +find_imm(struct table *table, float val) +{ + assert(signbit(val) == 0); + + for (int i = 0; i < table->len; i++) { + if (table->imm[i].val == val) { + return &table->imm[i]; + } + } + return NULL; +} + +static struct imm * +new_imm(struct table *table, void *mem_ctx) +{ + if (table->len == table->size) { + table->size *= 2; + table->imm = reralloc(mem_ctx, table->imm, struct imm, table->size); + } + return &table->imm[table->len++]; +} + +static int +compare(const void *a, const void *b) +{ + return ((struct imm *)a)->block->num - ((struct imm *)b)->block->num; +} + +void +fs_visitor::opt_combine_constants() +{ + void *const_ctx = ralloc_context(NULL); + + struct table table; + table.size = 8; + table.len = 0; + table.imm = ralloc_array(const_ctx, struct imm, table.size); + + cfg->calculate_idom(); + + /* Make a pass through all instructions and count the number of times each + * constant is used by coissueable instructions. + */ + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (!could_coissue(inst)) + continue; + + for (int i = 0; i < inst->sources; i++) { + if (inst->src[i].file != IMM) + continue; + + float val = fabsf(inst->src[i].fixed_hw_reg.dw1.f); + struct imm *imm = find_imm(&table, val); + + if (imm) { + imm->block = cfg_t::intersect(block, imm->block); + imm->uses_by_coissue += could_coissue(inst); + } else { + imm = new_imm(&table, const_ctx); + imm->block = block; + imm->val = val; + imm->uses_by_coissue = could_coissue(inst); + } + } + } + + /* Remove constants from the table that don't have enough uses to make them + * profitable to store in a register. + */ + for (int i = 0; i < table.len;) { + struct imm *imm = &table.imm[i]; + + if (imm->uses_by_coissue < 4) { + table.imm[i] = table.imm[table.len - 1]; + table.len--; + continue; + } + i++; + } + if (table.len == 0) { + ralloc_free(const_ctx); + return; + } + if (cfg->num_blocks != 1) + qsort(table.imm, table.len, sizeof(struct imm), compare); + + fs_reg reg(this, glsl_type::float_type); + reg.stride = 0; + int i = 0; + + foreach_block(block, cfg) { + backend_instruction *inst = block->first_non_control_flow_inst(); + + for (; i < table.len; i++) { + struct imm *imm = &table.imm[i]; + + if (imm->block->num != block->num) + break; + + fs_inst *mov = MOV(reg, fs_reg(imm->val)); + mov->exec_size = 1; + inst->insert_before(block, mov); + imm->reg = reg.reg; + imm->subreg_offset = reg.subreg_offset; + + reg.subreg_offset += sizeof(float); + if (reg.subreg_offset == 32) { + reg.reg = virtual_grf_alloc(1); + reg.subreg_offset = 0; + } + } + + foreach_inst_in_block(fs_inst, inst, block) { + if (inst->opcode == BRW_OPCODE_MOV && + inst->src[0].file == IMM && + inst->dst.stride == 0) + continue; + + /* Tell Matt privately if you actually read this far. */ + for (int i = 0; i < inst->sources; i++) { + if (inst->src[i].file != IMM || + inst->src[i].type != BRW_REGISTER_TYPE_F) + continue; + + float val = fabsf(inst->src[i].fixed_hw_reg.dw1.f); + struct imm *imm = find_imm(&table, val); + if (!imm) + continue; + + inst->src[i].file = GRF; + inst->src[i].reg = imm->reg; + inst->src[i].subreg_offset = imm->subreg_offset; + inst->src[i].stride = 0; + inst->src[i].negate = signbit(inst->src[i].fixed_hw_reg.dw1.f) != + signbit(imm->val); + assert(fabsf(inst->src[i].fixed_hw_reg.dw1.f) == imm->val); + } + } + } + + ralloc_free(const_ctx); + invalidate_live_intervals(); +} -- 2.0.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev