Signed-off-by: Rhys Perry <pendingchao...@gmail.com> --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 2 + .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 7 + .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 91 ++++++++++-- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 2 + src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_resource.h | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 35 +++++ src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 12 +- src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 3 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 3 +- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 11 ++ .../drivers/nouveau/nvc0/nvc0_state_validate.c | 155 +++++++++++++++++---- 13 files changed, 283 insertions(+), 41 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 3d0782f86b..7c835ceab8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -73,6 +73,7 @@ struct nv50_ir_prog_symbol #define NVISA_GK104_CHIPSET 0xe0 #define NVISA_GK20A_CHIPSET 0xea #define NVISA_GM107_CHIPSET 0x110 +#define NVISA_GM200_CHIPSET 0x120 struct nv50_ir_prog_info { @@ -145,6 +146,7 @@ struct nv50_ir_prog_info bool persampleInvocation; bool usesSampleMaskIn; bool readsFramebuffer; + bool readsSampleLocations; } fp; struct { uint32_t inputOffset; /* base address for user args */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 3c5bad05fe..d7844d7381 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1520,6 +1520,10 @@ void Source::scanInstructionSrc(const Instruction& insn, info->out[src.getIndex(0)].oread = 1; } } + if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) { + if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS) + info->prop.fp.readsSampleLocations = true; + } if (src.getFile() != TGSI_FILE_INPUT) return; @@ -1560,6 +1564,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (insn.getOpcode() == TGSI_OPCODE_FBFETCH) info->prop.fp.readsFramebuffer = true; + if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE) + info->prop.fp.readsSampleLocations = true; + if (insn.dstCount()) { Instruction::DstRegister dst = insn.getDst(0); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 29f674b451..f64439dbdc 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2662,17 +2662,33 @@ NVC0LoweringPass::handleRDSV(Instruction *i) ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; break; case SV_SAMPLE_POS: { - Value *off = new_LValue(func, FILE_GPR); - ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); + Value *sampleID = bld.getScratch(); + ld = bld.mkOp1(OP_PIXLD, TYPE_U32, sampleID, bld.mkImm(0)); ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; - bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3)); - bld.mkLoad(TYPE_F32, - i->getDef(0), - bld.mkSymbol( - FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, - TYPE_U32, prog->driver->io.sampleInfoBase + - 4 * sym->reg.data.sv.index), - off); + Value *offset = calculateSampleOffset(sampleID); + + assert(prog->driver->prop.fp.readsSampleLocations); + + if (targ->getChipset() >= NVISA_GM200_CHIPSET) { + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase), + offset); + bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0), + bld.mkImm(0x040c + sym->reg.data.sv.index * 16)); + bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_U32, i->getDef(0)); + bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), bld.mkImm(1.0f/16.0f)); + } else { + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase + + 4 * sym->reg.data.sv.index), + offset); + } break; } case SV_SAMPLE_MASK: { @@ -2832,6 +2848,58 @@ NVC0LoweringPass::handleOUT(Instruction *i) return true; } +Value * +NVC0LoweringPass::calculateSampleOffset(Value *sampleID) +{ + Value *offset = bld.getScratch(); + if (targ->getChipset() >= NVISA_GM200_CHIPSET) { + // Add sample ID + bld.mkOp3(OP_INSBF, TYPE_U32, offset, sampleID, bld.mkImm(0x0302), bld.mkImm(0x0)); + + Symbol *xSym = bld.mkSysVal(SV_POSITION, 0); + Symbol *ySym = bld.mkSysVal(SV_POSITION, 1); + Value *coord = bld.getScratch(); + + // Add X coordinate + bld.mkInterp(NV50_IR_INTERP_LINEAR, coord, + targ->getSVAddress(FILE_SHADER_INPUT, xSym), NULL); + bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord) + ->rnd = ROUND_ZI; + bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0105), offset); + + // Add Y coordinate + bld.mkInterp(NV50_IR_INTERP_LINEAR, coord, + targ->getSVAddress(FILE_SHADER_INPUT, ySym), NULL); + bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord) + ->rnd = ROUND_ZI; + bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0206), offset); + } else { + bld.mkOp2(OP_SHL, TYPE_U32, offset, sampleID, bld.mkImm(3)); + } + return offset; +} + +// Handle programmable sample locations for GM20x+ +void +NVC0LoweringPass::handlePIXLD(Instruction *i) +{ + if (i->subOp != NV50_IR_SUBOP_PIXLD_OFFSET) + return; + if (targ->getChipset() < NVISA_GM200_CHIPSET) + return; + + assert(prog->driver->prop.fp.readsSampleLocations); + + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase), + calculateSampleOffset(i->getSrc(0))); + + bld.getBB()->remove(i); +} + // Generate a binary predicate if an instruction is predicated by // e.g. an f32 value. void @@ -2931,6 +2999,9 @@ NVC0LoweringPass::visit(Instruction *i) case OP_BUFQ: handleBUFQ(i); break; + case OP_PIXLD: + handlePIXLD(i); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 1b2b36d3cc..91771fbf7e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -116,6 +116,7 @@ protected: void handleSharedATOMNVE4(Instruction *); void handleLDST(Instruction *); bool handleBUFQ(Instruction *); + void handlePIXLD(Instruction *); void checkPredicate(Instruction *); @@ -142,6 +143,7 @@ private: void processSurfaceCoordsNVE4(TexInstruction *); void processSurfaceCoordsNVC0(TexInstruction *); void convertSurfaceFormat(TexInstruction *); + Value *calculateSampleOffset(Value *sampleID); protected: Value *loadTexHandle(Value *ptr, unsigned int slot); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c index f2e304fde6..ac76a9446b 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c @@ -209,6 +209,7 @@ const struct u_resource_vtbl nv50_miptree_vtbl = static inline bool nv50_miptree_init_ms_mode(struct nv50_miptree *mt) { + mt->multisampling = mt->base.base.nr_samples > 0; switch (mt->base.base.nr_samples) { case 8: mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS8; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h index 5d03925b0d..ba4fc0c64c 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h @@ -54,6 +54,7 @@ struct nv50_miptree { uint8_t ms_x; /* log2 of number of samples in x/y dimension */ uint8_t ms_y; uint8_t ms_mode; + bool multisampling; /* true if nr_samples > 0 on creation */ }; static inline struct nv50_miptree * diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 2e4490b8d9..7b97e8806d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -352,6 +352,10 @@ static void nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned, float *); +static void +nvc0_context_get_sample_pixel_grid(struct pipe_context *, unsigned, + unsigned *, unsigned *); + struct pipe_context * nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) { @@ -403,6 +407,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) pipe->texture_barrier = nvc0_texture_barrier; pipe->memory_barrier = nvc0_memory_barrier; pipe->get_sample_position = nvc0_context_get_sample_position; + pipe->get_sample_pixel_grid = nvc0_context_get_sample_pixel_grid; pipe->emit_string_marker = nvc0_emit_string_marker; nouveau_context_init(&nvc0->base); @@ -566,3 +571,33 @@ nvc0_context_get_sample_position(struct pipe_context *pipe, xy[0] = ptr[sample_index][0] * 0.0625f; xy[1] = ptr[sample_index][1] * 0.0625f; } + +static void +nvc0_context_get_sample_pixel_grid(struct pipe_context *pipe, + unsigned sample_count, + unsigned *width, unsigned *height) +{ + switch (sample_count) { + case 0: + case 1: + /* this could be 4x4, but the GL state tracker makes it difficult to + * create a 1x MSAA texture and smaller grids save CB space */ + *width = 2; + *height = 4; + break; + case 2: + *width = 2; + *height = 4; + break; + case 4: + *width = 2; + *height = 2; + break; + case 8: + *width = 1; + *height = 2; + break; + default: + assert(0); + } +} diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 0729c88dff..5396bdfca9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -134,20 +134,21 @@ #define NVC0_CB_AUX_UBO_SIZE ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4) /* 8 sets of 32-bits integer pairs sample offsets */ #define NVC0_CB_AUX_SAMPLE_INFO 0x1a0 /* FP */ -#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2) +/* 256 bytes, though only 64 bytes used before GM200 */ +#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 2 * 4 * 4) /* draw parameters (index bais, base instance, drawid) */ #define NVC0_CB_AUX_DRAW_INFO 0x1a0 /* VP */ /* 32 user buffers, at 4 32-bits integers each */ -#define NVC0_CB_AUX_BUF_INFO(i) 0x220 + (i) * 4 * 4 +#define NVC0_CB_AUX_BUF_INFO(i) 0x2a0 + (i) * 4 * 4 #define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4) /* 8 surfaces, at 16 32-bits integers each */ -#define NVC0_CB_AUX_SU_INFO(i) 0x420 + (i) * 16 * 4 +#define NVC0_CB_AUX_SU_INFO(i) 0x4a0 + (i) * 16 * 4 #define NVC0_CB_AUX_SU_SIZE (NVC0_MAX_IMAGES * 16 * 4) /* 1 64-bits address and 1 32-bits sequence */ -#define NVC0_CB_AUX_MP_INFO 0x620 +#define NVC0_CB_AUX_MP_INFO 0x6a0 #define NVC0_CB_AUX_MP_SIZE 3 * 4 /* 512 64-byte blocks for bindless image handles */ -#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4 +#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4 #define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4) /* 4 32-bits floats for the vertex runout, put at the end */ #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6) @@ -229,6 +230,7 @@ struct nvc0_context { struct list_head img_head; struct pipe_framebuffer_state framebuffer; + struct pipe_sample_locations_state sample_locations; struct pipe_blend_color blend_colour; struct pipe_stencil_ref stencil_ref; struct pipe_poly_stipple stipple; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index 7983c40308..4607d53576 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -139,6 +139,7 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed) static inline bool nvc0_miptree_init_ms_mode(struct nv50_miptree *mt) { + mt->multisampling = mt->base.base.nr_samples > 0; switch (mt->base.base.nr_samples) { case 8: mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 9520d984bb..57d98753f4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -481,6 +481,9 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) } } } + /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */ + if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET) + fp->hdr[5] |= 0x30000000; for (i = 0; i < info->numOutputs; ++i) { if (info->out[i].sn == TGSI_SEMANTIC_COLOR) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index c6b1aa3ca8..7c2c581103 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -264,6 +264,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: case PIPE_CAP_POST_DEPTH_COVERAGE: + case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: + return class_3d >= GM200_3D_CLASS; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_TGSI_BALLOT: case PIPE_CAP_BINDLESS_TEXTURE: @@ -308,7 +310,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FENCE_SIGNAL: case PIPE_CAP_CONSTBUF0_FLAGS: case PIPE_CAP_PACKED_UNIFORMS: - case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 3e204f09a4..0351c5644d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -843,6 +843,16 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe, nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; } +static void +nvc0_set_sample_locations_state(struct pipe_context *pipe, + const struct pipe_sample_locations_state *locations) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->sample_locations = *locations; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; +} + static void nvc0_set_polygon_stipple(struct pipe_context *pipe, const struct pipe_poly_stipple *stipple) @@ -1393,6 +1403,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) pipe->set_min_samples = nvc0_set_min_samples; pipe->set_constant_buffer = nvc0_set_constant_buffer; pipe->set_framebuffer_state = nvc0_set_framebuffer_state; + pipe->set_sample_locations_state = nvc0_set_sample_locations_state; pipe->set_polygon_stipple = nvc0_set_polygon_stipple; pipe->set_scissor_states = nvc0_set_scissor_states; pipe->set_viewport_states = nvc0_set_viewport_states; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 8e2192d3de..fee612990b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -71,6 +71,126 @@ nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers) PUSH_DATA (push, 0); // base layer } +static uint32_t +nv120_encode_cb_sample_location(uint8_t x, uint8_t y) +{ + static const uint8_t lut[] = { + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7}; + uint32_t result = 0; + /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */ + result |= lut[x]<<8 | lut[y]<<24; + /* fill in gaps with data in a representation for SV_SAMPLE_POS */ + result |= x<<12 | y<<28; + return result; +} + +static void +nv120_validate_sample_locations(struct nvc0_context *nvc0, + unsigned ms, bool multisampling) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + unsigned grid_width, grid_height, hw_grid_width; + uint8_t sample_locations[16][2]; + unsigned cb[64]; + unsigned i, pixel, pixel_y, pixel_x, sample; + + nvc0->base.pipe.get_sample_pixel_grid(&nvc0->base.pipe, ms, + &grid_width, &grid_height); + + hw_grid_width = grid_width; + if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */ + hw_grid_width = 4; + + if (!multisampling) { + memset(sample_locations, 8, sizeof(sample_locations)); + } else if (nvc0->sample_locations.enabled) { + struct pipe_sample_locations_state locs_state = nvc0->sample_locations; + util_sample_locations_flip_y(&nvc0->base.pipe, &locs_state, &nvc0->framebuffer); + + for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) { + for (sample = 0; sample < ms; sample++) { + unsigned pixel_x = pixel % hw_grid_width; + unsigned pixel_y = pixel / hw_grid_width; + unsigned wi = pixel * ms + sample; + unsigned ri = (pixel_y * grid_width + pixel_x % grid_width); + ri = ri * ms + sample; + sample_locations[wi][0] = locs_state.locations[ri] & 0xf; + sample_locations[wi][1] = 16 - (locs_state.locations[ri] >> 4); + } + } + } else { + const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms); + for (i = 0; i < 16; i++) { + sample_locations[i][0] = ptr[i%ms][0]; + sample_locations[i][1] = ptr[i%ms][1]; + } + } + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 65); + PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); + for (pixel_y = 0; pixel_y < 4; pixel_y++) { + for (pixel_x = 0; pixel_x < 2; pixel_x++) { + for (sample = 0; sample < ms; sample++) { + unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample; + unsigned read_index = (pixel_y % grid_height * hw_grid_width + pixel_x % grid_width) * ms + sample; + uint8_t x = sample_locations[read_index][0]; + uint8_t y = sample_locations[read_index][1]; + cb[write_index] = nv120_encode_cb_sample_location(x, y); + } + } + } + PUSH_DATAp(push, cb, 64); + + if (screen->base.class_3d >= GM200_3D_CLASS) { + uint32_t val[4] = {}; + + for (i = 0; i < 16; i++) { + val[i / 4] |= sample_locations[i][0] << ((i % 4) * 8); + val[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4); + } + + BEGIN_NVC0(push, SUBC_3D(0x11e0), 4); + PUSH_DATAp(push, val, 4); + } +} + +static void +nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + unsigned i; + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); + PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); + for (i = 0; i < ms; i++) { + float xy[2]; + nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy); + PUSH_DATAf(push, xy[0]); + PUSH_DATAf(push, xy[1]); + } + + if (screen->base.class_3d >= GM200_3D_CLASS) { + const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms); + uint32_t val[4] = {}; + + for (i = 0; i < 16; i++) { + val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0); + val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4); + } + } +} + static void nvc0_validate_fb(struct nvc0_context *nvc0) { @@ -81,6 +201,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; unsigned nr_cbufs = fb->nr_cbufs; bool serialize = false; + bool multisampling = false; nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); @@ -120,6 +241,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) PUSH_DATA(push, sf->base.u.tex.first_layer); ms_mode = mt->ms_mode; + multisampling = mt->multisampling; } else { if (res->base.target == PIPE_BUFFER) { PUSH_DATA(push, 262144); @@ -170,6 +292,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) PUSH_DATA (push, sf->base.u.tex.first_layer); ms_mode = mt->ms_mode; + multisampling = mt->multisampling; if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING) serialize = true; @@ -188,8 +311,10 @@ nvc0_validate_fb(struct nvc0_context *nvc0) nvc0_fb_set_null_rt(push, 0, fb->layers); - if (fb->samples > 1) + if (fb->samples > 1) { ms_mode = ffs(fb->samples) - 1; + multisampling = true; + } nr_cbufs = 1; } @@ -198,31 +323,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0) IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode); ms = 1 << ms_mode; - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, NVC0_CB_AUX_SIZE); - PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); - PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); - BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); - PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); - for (i = 0; i < ms; i++) { - float xy[2]; - nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy); - PUSH_DATAf(push, xy[0]); - PUSH_DATAf(push, xy[1]); - } - - if (screen->base.class_3d >= GM200_3D_CLASS) { - const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms); - uint32_t val[4] = {}; - for (i = 0; i < 16; i++) { - val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0); - val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4); - } - - BEGIN_NVC0(push, SUBC_3D(0x11e0), 4); - PUSH_DATAp(push, val, 4); - } + if (screen->base.class_3d>=GM200_3D_CLASS) + nv120_validate_sample_locations(nvc0, ms, multisampling); + else + nvc0_validate_sample_locations(nvc0, ms); if (serialize) IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev