Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 23 +----- src/mesa/drivers/dri/i965/brw_fs.h | 7 ++ src/mesa/drivers/dri/i965/brw_fs_emitter.cpp | 110 +++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 22 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 9745c28..d8dc49b 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1177,28 +1177,7 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w) emit(OR(Yp, t1, t2)); SWAP_XY_AND_XPYP(); } else { - /* Applying the same logic as above, but in reverse, we obtain the - * formulas: - * - * X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1 - * Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2 - */ - emit(AND(t1, X, brw_imm_uw(0xfffa))); /* X & ~0b101 */ - emit(SHL(t1, t1, brw_imm_uw(1))); /* (X & ~0b101) << 1 */ - emit(AND(t2, Y, brw_imm_uw(2))); /* Y & 0b10 */ - emit(SHL(t2, t2, brw_imm_uw(2))); /* (Y & 0b10) << 2 */ - emit(OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2 */ - emit(AND(t2, Y, brw_imm_uw(1))); /* Y & 0b1 */ - emit(SHL(t2, t2, brw_imm_uw(1))); /* (Y & 0b1) << 1 */ - emit(OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2 - | (Y & 0b1) << 1 */ - emit(AND(t2, X, brw_imm_uw(1))); /* X & 0b1 */ - emit(OR(Xp, t1, t2)); - emit(AND(t1, Y, brw_imm_uw(0xfffc))); /* Y & ~0b11 */ - emit(SHR(t1, t1, brw_imm_uw(1))); /* (Y & ~0b11) >> 1 */ - emit(AND(t2, X, brw_imm_uw(4))); /* X & 0b100 */ - emit(SHR(t2, t2, brw_imm_uw(2))); /* (X & 0b100) >> 2 */ - emit(OR(Yp, t1, t2)); + emit_translate_w_to_y_tiling(t1, t2, X, Y, Xp, Yp); SWAP_XY_AND_XPYP(); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 5b0687e..e02c025 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -290,6 +290,13 @@ protected: fs_inst *emit(fs_inst *inst); void emit(exec_list list); + void emit_translate_w_to_y_tiling(const fs_reg& t1, + const fs_reg& t2, + const fs_reg& src_x, + const fs_reg& src_y, + const fs_reg& dst_x, + const fs_reg& dst_y); + void push_force_uncompressed(); void pop_force_uncompressed(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp index 6f1e2dd..6ba6516 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp @@ -194,6 +194,116 @@ fs_emitter::pop_force_uncompressed() assert(force_uncompressed_stack >= 0); } +/** + * Emit translation of pixel coordinates src_x and src_y in W-tiled layout + * to corresponding coordinates dst_x and dst_y in Y-tiled layout. + * The operation requires two temporary registers in addition to the source + * and destination. Note also that source and destination registers cannot + * overlap. + * + * Both W-tiling and Y-tiling have equal tile size of one page. The difference + * is in how pixels are organised within the page: W-tile has 64 rows each + * holding in turn two 32 byte sub-tiles whereas Y-tile has 32 rows each + * holding eight 16 byte sub-tiles. The sub-tiles in turn have different + * layout: W is 8x4 bytes and Y is 16x1. Now, in Y-tiling two subsequent tiles + * are on top of each other. If each pair is thought to form one tile instead + * one can think Y-tiling to consist of 16 rows and eight columns of 32-byte + * subtiles. + * + * This organisation is independent of the pixel format used and + * hence the number of pixels within a tile varies based on how many bytes + * per pixel are needed. + * + * The operation here is fixed to one-byte-per-pixel formats only - it + * assumes that Y-subtile holds 16 pixels per row (and W 8 respectively). + * + * First examine the X coordinate representing an address using W-tiling. + * The lowest six bits represent a column within a tile while the higher bits + * designate a tile number horizontally. + * As a Y-tile can hold twice as many pixels horizontally than W-tile, the + * tile number needs to be multiplied by two in order to move to the desired + * tile horizontally: + * + * (X & ~0b111) << 1 == (X & 0xFFF8) << 1 (1) + * + * The lowest six can be further divided in two parts - the subtile number + * and then the remaining coordinate within the subtile. These are three and + * three respectively for W-tiling. Then unlike Y-subtile W is further + * divided into 4x4 and again into 2x2 tiles. Hence the third lowest bit + * represents the 4x4-subtile number, the second lowest the 2x2-subtile + * number and finally the lowest the offset within the 2x2 block. + * + * 01 23 45 67 0123456789ABCDEF W Y + * ++==+==++==+==++ +---------------+ a: 3,1 7,0 + * 0 || | || | || 0 | a | b: 1,2 1,1 + * 1 || | a|| | || 1 | b c | c: 5,3 10,1 + * ++--+--++--+--++ +---------------+ + * 2 || b| || | || + * 3 || | ||c | || + * ++==+==++==+==++ + * + * Observing the W layout it can be seen that x-coordinates creater or equal + * to four reside on the second half of the subtile - in Y-tile this + * corresponds to the second row calling for the following compensation in + * the vertical coordinate: + * + * (X & 0b100) >> 2 == (X & 0x4) >> 2 (2) + * + * The 2x2 subtile in turn results into two x-coordinates x and x + 2 in the + * same row (in the same 8x4 subtile) to be 4 bytes apart in linear memory. + * As addresses in Y-subtile itself are linear, the compensation in the + * horizontal coordinate is: + * + * (X & 0b01) << 1 (3) + * + * Combined with (1): + * + * (X & ~0b101) << 1 == (X & 0xFFFA) << 1 (4) + * + * Similarly in 4x4 W-subtile in the same column any two y-coordinates y and + * y + 2 are 8 bytes apart in linear memory addresses. In 2x2-subtile in turn + * y and y + 1 are two bytes apart. This results into horizontal compensation + * in Y layout: + * + * (Y & 0b10) << 2 | (Y & 0b1) << 1 (5) + * + * Taking into account (4) and (5), one gets: + * + * X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1 + * + * For y-coordinate one needs to consider full tiles and (2). As Y-layout + * has twice as many tiles as W horizontally, the number of tiles in vertical + * direction needs to be divided by two. Every two tiles on top each other + * in W-layout are "re-layouted" horiontally. + * + * Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2 + */ +void +fs_emitter::emit_translate_w_to_y_tiling(const fs_reg& t1, + const fs_reg& t2, + const fs_reg& src_x, + const fs_reg& src_y, + const fs_reg& dst_x, + const fs_reg& dst_y) +{ + emit(AND(t1, src_x, brw_imm_uw(0xfffa))); /* X & ~0b101 */ + emit(SHL(t1, t1, brw_imm_uw(1))); /* (X & ~0b101) << 1 */ + emit(AND(t2, src_y, brw_imm_uw(2))); /* Y & 0b10 */ + emit(SHL(t2, t2, brw_imm_uw(2))); /* (Y & 0b10) << 2 */ + emit(OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2 */ + emit(AND(t2, src_y, brw_imm_uw(1))); /* Y & 0b1 */ + emit(SHL(t2, t2, brw_imm_uw(1))); /* (Y & 0b1) << 1 */ + emit(OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2 + | (Y & 0b1) << 1 */ + emit(AND(t2, src_x, brw_imm_uw(1))); /* X & 0b1 */ + emit(OR(dst_x, t1, t2)); + emit(AND(t1, src_y, brw_imm_uw(0xfffc))); /* Y & ~0b11 */ + emit(SHR(t1, t1, brw_imm_uw(1))); /* (Y & ~0b11) >> 1 */ + emit(AND(t2, src_x, brw_imm_uw(4))); /* X & 0b100 */ + emit(SHR(t2, t2, brw_imm_uw(2))); /* (X & 0b100) >> 2 */ + emit(OR(dst_y, t1, t2)); +} + fs_emitter::fs_emitter(struct brw_context *brw, struct brw_wm_compile *_c, unsigned _dispatch_width) -- 1.8.3.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev