Add support for OpenCL global memory buffers, note this has only been tested with regular load and stores and likely needs more work for e.g. atomic ops.
Signed-off-by: Hans de Goede <hdego...@redhat.com> --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 + .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 31 +++++++++++++++++----- .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 5 +++- .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 10 ++++--- .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 26 +++++++++++++----- .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 14 +++++++--- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 5 +++- .../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + .../nouveau/codegen/nv50_ir_target_nv50.cpp | 1 + .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 + 10 files changed, 74 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index fdc2195..5141fc6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -333,6 +333,7 @@ enum DataFile FILE_SHADER_INPUT, FILE_SHADER_OUTPUT, FILE_MEMORY_BUFFER, + FILE_MEMORY_GLOBAL, FILE_MEMORY_SHARED, FILE_MEMORY_LOCAL, FILE_SYSTEM_VALUE, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 02a1101..62f1598 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1641,8 +1641,15 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) int32_t offset = SDATA(i->src(0)).offset; switch (i->src(0).getFile()) { - case FILE_MEMORY_BUFFER: code[1] = 0xe0000000; code[0] = 0x00000000; break; - case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; + case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: + code[0] = 0x00000000; + code[1] = 0xe0000000; + break; + case FILE_MEMORY_LOCAL: + code[0] = 0x00000002; + code[1] = 0x7a800000; + break; case FILE_MEMORY_SHARED: code[0] = 0x00000002; if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) @@ -1678,7 +1685,8 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) srcId(i->src(1), 2); srcId(i->src(0).getIndirect(0), 10); - if (i->src(0).getFile() == FILE_MEMORY_BUFFER && + if ((i->src(0).getFile() == FILE_MEMORY_BUFFER || + i->src(0).getFile() == FILE_MEMORY_GLOBAL) && i->src(0).isIndirect(0) && i->getIndirect(0, 0)->reg.size == 8) code[1] |= 1 << 23; @@ -1690,8 +1698,15 @@ CodeEmitterGK110::emitLOAD(const Instruction *i) int32_t offset = SDATA(i->src(0)).offset; switch (i->src(0).getFile()) { - case FILE_MEMORY_BUFFER: code[1] = 0xc0000000; code[0] = 0x00000000; break; - case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break; + case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: + code[0] = 0x00000000; + code[1] = 0xc0000000; + break; + case FILE_MEMORY_LOCAL: + code[0] = 0x00000002; + code[1] = 0x7a000000; + break; case FILE_MEMORY_SHARED: code[0] = 0x00000002; if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) @@ -1800,7 +1815,8 @@ CodeEmitterGK110::emitMOV(const Instruction *i) static inline bool uses64bitAddress(const Instruction *ldst) { - return ldst->src(0).getFile() == FILE_MEMORY_BUFFER && + return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER || + ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) && ldst->src(0).isIndirect(0) && ldst->getIndirect(0, 0)->reg.size == 8; } @@ -1862,7 +1878,8 @@ CodeEmitterGK110::emitCCTL(const Instruction *i) code[0] = 0x00000002 | (i->subOp << 2); - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { code[1] = 0x7b000000; } else { code[1] = 0x7c000000; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 27f287f..3fcdc55 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -2417,7 +2417,8 @@ void CodeEmitterGM107::emitCCTL() { unsigned width; - if (insn->src(0).getFile() == FILE_MEMORY_BUFFER) { + if (insn->src(0).getFile() == FILE_MEMORY_BUFFER || + insn->src(0).getFile() == FILE_MEMORY_GLOBAL) { emitInsn(0xef600000); width = 30; } else { @@ -2989,6 +2990,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case FILE_MEMORY_LOCAL : emitLDL(); break; case FILE_MEMORY_SHARED: emitLDS(); break; case FILE_MEMORY_BUFFER: emitLD(); break; + case FILE_MEMORY_GLOBAL: emitLD(); break; default: assert(!"invalid load"); emitNOP(); @@ -3000,6 +3002,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case FILE_MEMORY_LOCAL : emitSTL(); break; case FILE_MEMORY_SHARED: emitSTS(); break; case FILE_MEMORY_BUFFER: emitST(); break; + case FILE_MEMORY_GLOBAL: emitST(); break; default: assert(!"invalid load"); emitNOP(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 7476e21..2653c82 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -663,6 +663,7 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) code[1] = 0x40000000; break; case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); code[1] = 0x80000000; break; @@ -671,7 +672,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) break; } if (sf == FILE_MEMORY_LOCAL || - sf == FILE_MEMORY_BUFFER) + sf == FILE_MEMORY_BUFFER || + sf == FILE_MEMORY_GLOBAL) emitLoadStoreSizeLG(i->sType, 21 + 32); setDst(i, 0); @@ -679,7 +681,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) emitFlagsRd(i); emitFlagsWr(i); - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { srcId(*i->src(0).getIndirect(0), 9); } else { setAReg16(i, 0); @@ -700,6 +703,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i) srcId(i->src(1), 32 + 14); break; case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); code[1] = 0xa0000000; emitLoadStoreSizeLG(i->dType, 21 + 32); @@ -737,7 +741,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i) break; } - if (f == FILE_MEMORY_BUFFER) + if (f == FILE_MEMORY_BUFFER || f == FILE_MEMORY_GLOBAL) srcId(*i->src(0).getIndirect(0), 9); else setAReg16(i, 0); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 6236659..ca475ce 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -281,6 +281,7 @@ CodeEmitterNVC0::setAddressByFile(const ValueRef& src) { switch (src.getFile()) { case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: srcAddr32(src, 26, 0); break; case FILE_MEMORY_LOCAL: @@ -1768,7 +1769,8 @@ CodeEmitterNVC0::emitCachingMode(CacheMode c) static inline bool uses64bitAddress(const Instruction *ldst) { - return ldst->src(0).getFile() == FILE_MEMORY_BUFFER && + return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER || + ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) && ldst->src(0).isIndirect(0) && ldst->getIndirect(0, 0)->reg.size == 8; } @@ -1779,8 +1781,13 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) uint32_t opc; switch (i->src(0).getFile()) { - case FILE_MEMORY_BUFFER: opc = 0x90000000; break; - case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; + case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: + opc = 0x90000000; + break; + case FILE_MEMORY_LOCAL: + opc = 0xc8000000; + break; case FILE_MEMORY_SHARED: if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { if (targ->getChipset() >= NVISA_GK104_CHIPSET) @@ -1828,8 +1835,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) code[0] = 0x00000005; switch (i->src(0).getFile()) { - case FILE_MEMORY_BUFFER: opc = 0x80000000; break; - case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; + case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: + opc = 0x80000000; + break; + case FILE_MEMORY_LOCAL: + opc = 0xc0000000; + break; case FILE_MEMORY_SHARED: if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { if (targ->getChipset() >= NVISA_GK104_CHIPSET) @@ -2090,7 +2102,8 @@ CodeEmitterNVC0::emitCCTL(const Instruction *i) { code[0] = 0x00000005 | (i->subOp << 5); - if (i->src(0).getFile() == FILE_MEMORY_BUFFER) { + if (i->src(0).getFile() == FILE_MEMORY_BUFFER || + i->src(0).getFile() == FILE_MEMORY_GLOBAL) { code[1] = 0x98000000; srcAddr32(i->src(0), 28, 2); } else { @@ -3122,6 +3135,7 @@ SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const case FILE_MEMORY_CONST: case FILE_MEMORY_SHARED: case FILE_MEMORY_BUFFER: + case FILE_MEMORY_GLOBAL: case FILE_SYSTEM_VALUE: // TODO: any restrictions here ? break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 91879e4..c167c4a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -374,7 +374,7 @@ static nv50_ir::DataFile translateFile(uint file) case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER; - case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_BUFFER; + case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; case TGSI_FILE_SAMPLER: case TGSI_FILE_NULL: default: @@ -1284,7 +1284,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (dst.isIndirect(0)) indirectTempArrays.insert(dst.getArrayId()); } else - if (dst.getFile() == TGSI_FILE_BUFFER) { + if (dst.getFile() == TGSI_FILE_BUFFER || + (dst.getFile() == TGSI_FILE_MEMORY && + memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { info->io.globalAccess |= 0x2; } } @@ -1295,7 +1297,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (src.isIndirect(0)) indirectTempArrays.insert(src.getArrayId()); } else - if (src.getFile() == TGSI_FILE_BUFFER) { + if (src.getFile() == TGSI_FILE_BUFFER || + (src.getFile() == TGSI_FILE_MEMORY && + memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) { info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? 0x1 : 0x2; } else @@ -1529,6 +1533,10 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) if (tgsiFile == TGSI_FILE_MEMORY) { switch (code->memoryFiles[fileIdx].mem_type) { + case TGSI_MEMORY_TYPE_GLOBAL: + /* No-op this is the default for TGSI_FILE_MEMORY */ + sym->setFile(FILE_MEMORY_GLOBAL); + break; case TGSI_MEMORY_TYPE_SHARED: sym->setFile(FILE_MEMORY_SHARED); break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 4a96d04..84d2944 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2581,6 +2581,7 @@ MemoryOpt::runOpt(BasicBlock *bb) ldst->op == OP_MEMBAR) { purgeRecords(NULL, FILE_MEMORY_LOCAL); purgeRecords(NULL, FILE_MEMORY_BUFFER); + purgeRecords(NULL, FILE_MEMORY_GLOBAL); purgeRecords(NULL, FILE_MEMORY_SHARED); purgeRecords(NULL, FILE_SHADER_OUTPUT); } else @@ -2588,6 +2589,7 @@ MemoryOpt::runOpt(BasicBlock *bb) if (ldst->src(0).getFile() == FILE_MEMORY_BUFFER) { purgeRecords(NULL, FILE_MEMORY_LOCAL); purgeRecords(NULL, FILE_MEMORY_BUFFER); + purgeRecords(NULL, FILE_MEMORY_GLOBAL); purgeRecords(NULL, FILE_MEMORY_SHARED); } else { purgeRecords(NULL, ldst->src(0).getFile()); @@ -2607,7 +2609,8 @@ MemoryOpt::runOpt(BasicBlock *bb) DataFile file = ldst->src(0).getFile(); // if ld l[]/g[] look for previous store to eliminate the reload - if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL) { + if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL || + file == FILE_MEMORY_GLOBAL) { // TODO: shared memory ? rec = findRecord(ldst, false, isAdjacent); if (rec && !isAdjacent) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 73ed753..3917768 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -456,6 +456,7 @@ int Symbol::print(char *buf, size_t size, case FILE_SHADER_INPUT: c = 'a'; break; case FILE_SHADER_OUTPUT: c = 'o'; break; case FILE_MEMORY_BUFFER: c = 'g'; break; + case FILE_MEMORY_GLOBAL: c = 'g'; break; case FILE_MEMORY_SHARED: c = 's'; break; case FILE_MEMORY_LOCAL: c = 'l'; break; default: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp index 1cd45a2..5c60b22 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp @@ -208,6 +208,7 @@ TargetNV50::getFileSize(DataFile file) const case FILE_SHADER_INPUT: return 0x200; case FILE_SHADER_OUTPUT: return 0x200; case FILE_MEMORY_BUFFER: return 0xffffffff; + case FILE_MEMORY_GLOBAL: return 0xffffffff; case FILE_MEMORY_SHARED: return 16 << 10; case FILE_MEMORY_LOCAL: return 48 << 10; case FILE_SYSTEM_VALUE: return 16; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index bda59a5..9e1e7bf 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -249,6 +249,7 @@ TargetNVC0::getFileSize(DataFile file) const case FILE_SHADER_INPUT: return 0x400; case FILE_SHADER_OUTPUT: return 0x400; case FILE_MEMORY_BUFFER: return 0xffffffff; + case FILE_MEMORY_GLOBAL: return 0xffffffff; case FILE_MEMORY_SHARED: return 16 << 10; case FILE_MEMORY_LOCAL: return 48 << 10; case FILE_SYSTEM_VALUE: return 32; -- 2.7.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev