On Thu, May 21, 2015 at 5:46 PM, Anuj Phogat <anuj.pho...@gmail.com> wrote: > On Thu, May 21, 2015 at 12:26 PM, Ben Widawsky <b...@bwidawsk.net> wrote: >> A lot of opinion stuff is below, feel free to ignore them if you don't think >> there are improvements. >> >> On Fri, Apr 17, 2015 at 04:51:38PM -0700, Anuj Phogat wrote: >>> This patch enables using XY_FAST_COPY_BLT only for Yf/Ys tiled buffers. >>> Later It can be turned on for other tiling patterns (X,Y). >>> >>> Signed-off-by: Anuj Phogat <anuj.pho...@gmail.com> >>> --- >>> src/mesa/drivers/dri/i965/intel_blit.c | 292 >>> +++++++++++++++++++++++---- >>> src/mesa/drivers/dri/i965/intel_blit.h | 3 + >>> src/mesa/drivers/dri/i965/intel_copy_image.c | 3 + >>> src/mesa/drivers/dri/i965/intel_reg.h | 33 +++ >>> 4 files changed, 291 insertions(+), 40 deletions(-) >>> >>> diff --git a/src/mesa/drivers/dri/i965/intel_blit.c >>> b/src/mesa/drivers/dri/i965/intel_blit.c >>> index 9500bd7..36746c4 100644 >>> --- a/src/mesa/drivers/dri/i965/intel_blit.c >>> +++ b/src/mesa/drivers/dri/i965/intel_blit.c >>> @@ -43,6 +43,23 @@ >>> >>> #define FILE_DEBUG_FLAG DEBUG_BLIT >>> >>> +#define SET_TILING_XY_FAST_COPY_BLT(tiling, tr_mode, type) \ >>> +({ \ >>> + switch (tiling) { \ >>> + case I915_TILING_X: \ >>> + CMD |= type ## _TILED_X; \ >>> + break; \ >>> + case I915_TILING_Y: \ >> >> assert(tr_mode != INTEL_MIPTREE_TRMODE_YF); ? >> > INTEL_MIPTREE_TRMODE_{YF, NONE} are allowed and covered in else case. > >>> + if (tr_mode == INTEL_MIPTREE_TRMODE_YS) \ >>> + CMD |= type ## _TILED_64K; \ >>> + else \ >>> + CMD |= type ## _TILED_Y; \ >>> + break; \ >>> + default: \ >>> + unreachable("not reached"); \ >>> + } \ >>> +}) >>> + >>> static void >>> intel_miptree_set_alpha_to_one(struct brw_context *brw, >>> struct intel_mipmap_tree *mt, >>> @@ -75,6 +92,12 @@ static uint32_t >>> br13_for_cpp(int cpp) >>> { >>> switch (cpp) { >>> + case 16:nn >>> + return BR13_32323232; >>> + break; >>> + case 8: >>> + return BR13_16161616; >>> + break; >>> case 4: >>> return BR13_8888; >>> break; >>> @@ -89,6 +112,132 @@ br13_for_cpp(int cpp) >>> } >>> } >>> >>> +static uint32_t >>> +get_tr_horizontal_align(uint32_t tr_mode, uint32_t cpp, bool is_src) { >>> + /*Alignment tables for YF/YS tiled surfaces. */ >>> + const uint32_t align_2d_yf[] = {64, 64, 32, 32, 16}; >>> + const uint32_t align_2d_ys[] = {256, 256, 128, 128, 64}; >>> + const uint32_t bpp = cpp * 8; >>> + uint32_t align; >>> + int i = 0; >>> + >>> + if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) >>> + return 0; >>> + >>> + /* Compute array index. */ >>> + assert (bpp >= 8 && bpp <= 128 && (bpp & (bpp - 1)) == 0); >> >> assert(bpp >= 8 && bpp <= 128 && _mesa_bitcount(bpp) == 1); >> >> (I couldn't find a is_pow2, but one must exist). >> > There is a is_power_of_two() in main/macros.h. I'll use that here. > >>> + while (bpp >> (i + 4)) >>> + i++; >>> + >> Since you just asserted this was a power of 2 above, isn't this just: >> ffs(bpp/8) - 1; >> > Yes, ffs() will work too. I'll use it. > >>> + if (tr_mode == INTEL_MIPTREE_TRMODE_YF) >>> + align = align_2d_yf[i]; >>> + else >>> + align = align_2d_ys[i]; >>> + >>> + switch(align) { >>> + case 512: >>> + return is_src ? XY_SRC_H_ALIGN_512 : XY_DST_H_ALIGN_512; >>> + case 256: >>> + return is_src ? XY_SRC_H_ALIGN_256 : XY_DST_H_ALIGN_256; >>> + case 128: >>> + return is_src ? XY_SRC_H_ALIGN_128 : XY_DST_H_ALIGN_128; >>> + case 64: >>> + return is_src ? XY_SRC_H_ALIGN_64 : XY_DST_H_ALIGN_64; >>> + case 32: >>> + /* XY_FAST_COPY_BLT doesn't support horizontal alignment of 16. */ >>> + case 16: >>> + return is_src ? XY_SRC_H_ALIGN_32 : XY_DST_H_ALIGN_32; >>> + default: >>> + unreachable("not reached"); >>> + } >>> +} >>> + >>> +static uint32_t >>> +get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, bool is_src) { >>> + /* Vertical alignment tables for YF/YS tiled surfaces. */ >>> + const unsigned align_2d_yf[] = {64, 32, 32, 16, 16}; >>> + const unsigned align_2d_ys[] = {256, 128, 128, 64, 64}; >>> + const uint32_t bpp = cpp * 8; >>> + uint32_t align; >>> + int i = 0; >>> + >>> + if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) >>> + return 0; >>> + >>> + /* Compute array index. */ >>> + assert (bpp >= 8 && bpp <= 128 && (bpp & (bpp - 1)) == 0); >>> + while (bpp >> (i + 4)) >>> + i++; >>> + >>> + if (tr_mode == INTEL_MIPTREE_TRMODE_YF) >>> + align = align_2d_yf[i]; >>> + else >>> + align = align_2d_ys[i]; >>> + >> >> Comments above apply here too. Also, it looks easy enough to combine these >> two >> functions, but I'm okay with leaving them separate if^Wwhen things change in >> the >> future. >> >>> + switch(align) { >>> + case 256: >>> + return is_src ? XY_SRC_V_ALIGN_256 : XY_DST_V_ALIGN_256; >>> + case 128: >>> + return is_src ? XY_SRC_V_ALIGN_128 : XY_DST_V_ALIGN_128; >>> + case 64: >>> + /* XY_FAST_COPY_BLT doesn't support vertical alignments of 16 and 32. */ >>> + case 32: >>> + case 16: >>> + return is_src ? XY_SRC_V_ALIGN_64 : XY_DST_V_ALIGN_64; >>> + default: >>> + unreachable("not reached"); >>> + } >>> +} >>> + >>> +static bool >>> +fast_copy_blit_error_check(uintptr_t src_addr, uint32_t src_pitch, >>> + uint32_t src_tiling, uintptr_t dst_addr, >>> + uint32_t dst_pitch, uint32_t dst_tiling, >>> + uint32_t cpp) >> >> I made some suggestions below on how I think you should use this function. >> >>> +{ >>> + const bool dst_tiling_none = dst_tiling == I915_TILING_NONE; >>> + const bool src_tiling_none = src_tiling == I915_TILING_NONE; >>> + >>> + /* When destination tiling is enabled, this address is 64Byte aligned. >>> */ >>> + if (!dst_tiling_none) { >>> + if (dst_addr & 63) >>> + return false; >>> + } >>> + >>> + /* When source tiling is enabled, this address should be 4 KB aligned. >>> */ >>> + if (!src_tiling_none) { >>> + if (src_addr & 4095) >>> + return false; >>> + } >>> + >>> + /* When either source or destination tiling is enabled, this address is >>> + * 16-byte aligned. >>> + */ >>> + if (!src_tiling_none || !dst_tiling_none) { >>> + if (src_addr & 15) >>> + return false; >>> + } >>> + >>> + assert(cpp <= 16); >>> + /* For Fast Copy Blits the pitch cannot be a negative number. */ >>> + assert(dst_pitch >= 0); >>> + >>> + /* For Linear surfaces, the pitch has to be an OWord (16byte) multiple. >>> */ >>> + if ((src_tiling_none && src_pitch % 16 != 0) || >>> + (dst_tiling_none && dst_pitch % 16 != 0)) >>> + return false; >>> + >>> + /* For Tiled surfaces, the pitch has to be a multiple of the Tile width >>> + * (X direction width of the Tile). This means the pitch value will >>> + * always be Cache Line aligned (64byte multiple). >>> + */ >>> + if ((!dst_tiling_none && dst_pitch % 64 != 0) || >>> + (!src_tiling_none && src_pitch % 64 != 0)) >>> + return false; >>> + >> >> Just FYI, Ken has added a patch since which has a similar check, beacause we >> need it for untiled surfaces with XY_SRC_COPY_BLT also. I have a comment on >> this >> below. >> > Yes. He added alignment_valid() to check alignment restrictions on > src_offset and dst_offset. Here I'm doing a check on {src, dst}_pitch > >>> + return true; >>> +} >>> + >>> /** >>> * Emits the packet for switching the blitter from X to Y tiled or back. >>> * >>> @@ -156,6 +305,7 @@ intel_miptree_blit(struct brw_context *brw, >>> uint32_t width, uint32_t height, >>> GLenum logicop) >>> { >>> + bool overlap = false; >>> /* The blitter doesn't understand multisampling at all. */ >>> if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) >>> return false; >>> @@ -216,6 +366,12 @@ intel_miptree_blit(struct brw_context *brw, >>> intel_miptree_resolve_color(brw, src_mt); >>> intel_miptree_resolve_color(brw, dst_mt); >>> >>> + /* Check for an overlap for SKL+ platforms. */ >>> + if (brw->gen >= 9 && >>> + ((src_x < dst_x && (src_x + width) > dst_x) || >>> + (src_x > dst_x && (dst_x + width) > src_x))) >>> + overlap = true; >>> + >> >> I was once doing something similar and noticed that gallium and swrast have a >> region_overlap which seems like something we should move into the helpers and >> start using. Not necessary to the series, just thinking outloud. >> > Yes. Good to make a helper out of it. >>> if (src_flip) >>> src_y = minify(src_mt->physical_height0, src_level - >>> src_mt->first_level) - src_y - height; >>> >>> @@ -251,12 +407,15 @@ intel_miptree_blit(struct brw_context *brw, >>> src_pitch, >>> src_mt->bo, src_mt->offset, >>> src_mt->tiling, >>> + src_mt->tr_mode, >>> dst_mt->pitch, >>> dst_mt->bo, dst_mt->offset, >>> dst_mt->tiling, >>> + dst_mt->tr_mode, >>> src_x, src_y, >>> dst_x, dst_y, >>> width, height, >>> + overlap, >>> logicop)) { >>> return false; >> >> I don't think overlap should be passed as an argument. The caller should >> check, >> the function should assert there is no overlap in the can_do_fast_copy... >> > Right. I'll move overlap computation inside intel_miptree_blit(). >>> } >>> @@ -280,36 +439,41 @@ intelEmitCopyBlit(struct brw_context *brw, >>> drm_intel_bo *src_buffer, >>> GLuint src_offset, >>> uint32_t src_tiling, >>> + uint32_t src_tr_mode, >>> GLshort dst_pitch, >>> drm_intel_bo *dst_buffer, >>> GLuint dst_offset, >>> uint32_t dst_tiling, >>> + uint32_t dst_tr_mode, >>> GLshort src_x, GLshort src_y, >>> GLshort dst_x, GLshort dst_y, >>> GLshort w, GLshort h, >>> + bool overlap, >>> GLenum logic_op) >> >> I need to go back in the series to check, but I'm surprised to see the tr >> mode >> as a separate argument from the tiling type. I was expecting, NONE, X, Y, >> YF, YS >> > I915_TILING_{NONE, X, Y} are defined in libdrm because it is making > use of tilinginformation. But, In case of Yf, Ys libdrm need not know > about it. See my libdrm patches on intel-gfx. > http://patchwork.freedesktop.org/patch/46944/ > http://patchwork.freedesktop.org/patch/46945/ I got distracted and left the explanation in middle. Here are the remaining bits: It doesn't make sense to add to the list of macros in libdrm where they have no use. So, I decided to keep them internal to mesa and name them differently to avoid any confusion. It also matches the way docs define these concepts: tiling and tr_mode.
>>> { >>> GLuint CMD, BR13, pass = 0; >>> int dst_y2 = dst_y + h; >>> int dst_x2 = dst_x + w; >>> drm_intel_bo *aper_array[3]; >>> - bool dst_y_tiled = dst_tiling == I915_TILING_Y; >>> - bool src_y_tiled = src_tiling == I915_TILING_Y; >>> + bool dst_tr_mode_none = dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE; >>> + bool src_tr_mode_none = src_tr_mode == INTEL_MIPTREE_TRMODE_NONE; >>> + >>> + bool dst_y_tiled = dst_tiling == I915_TILING_Y || !dst_tr_mode_none; >>> + bool src_y_tiled = src_tiling == I915_TILING_Y || !src_tr_mode_none; >>> + bool use_fast_copy_blit = brw->gen >= 9 && !overlap && >>> + (!src_tr_mode_none || !dst_tr_mode_none); >> >> How about? >> bool use_fast_copy_blit = >> can_fast_copy_blit(brw, src_offset, src_pitch, src_tiling, >> src_tr_mode, >> dst_offset, dst_pitch, dst_tiling, >> dst_tr_mode, >> cpp)); >> > I like it. >> I am really confused... The docs say the fast copy blit can be used for any >> surface type, however bits 31, and 30 and BR13 force you to define a Y tile >> type. Do you know if this actually works for linear surfaces? If it doesn't: >> assert((src_tiling != I915_TILING_Y) || src_tr_mode); >> assert((dst_tiling != I915_TILING_Y) || dst_tr_mode)); >> > Bits 30, 31 in BR13 are relevant only if you select Tile Y in bits 13, 14 in > BR00. Fast copy blit does work for linear surfaces. I just didn't switch it > ON in this patch. I'm planning to do it later after seeing the performance > impact. > >>> >>> - if (dst_tiling != I915_TILING_NONE) { >>> - if (dst_offset & 4095) >>> - return false; >>> - } >>> - if (src_tiling != I915_TILING_NONE) { >>> - if (src_offset & 4095) >>> - return false; >>> - } >> >> As I mentioned above wrt the CL alignment, I think it's safe to do for both >> modes. Something like: >> >> /* Check both are at least cacheline aligned */ >> if (brw->gen >= 8 && (src_offset | dst_offset) & 63) >> return false; >> > We do this check for SRC_COPY_BLT in alignemnet_valid(). I added > the restrictions for FAST_COPY_BLT in fast_copy_blit_error_check(). > Doing this check for both satisy all alignment restrictions of both modes > except one in fast copy blit: when src is untiled and dst is tiled, dst > should be 64 byte aligned and src should be 16 bytes aligned. See > fast_copy_blit_error_check(). > But we're adding a restriction for it to be 64 byte by the suggested > change. Is there any advantage of the suggested check for both? > >>> if ((dst_y_tiled || src_y_tiled) && brw->gen < 6) >>> return false; >>> >>> assert(!dst_y_tiled || (dst_pitch % 128) == 0); >>> assert(!src_y_tiled || (src_pitch % 128) == 0); >>> >>> + if (use_fast_copy_blit == false && >>> + (src_tr_mode != INTEL_MIPTREE_TRMODE_NONE || >>> + dst_tr_mode != INTEL_MIPTREE_TRMODE_NONE)) >>> + return false; >>> + >> >> It seems like we should make it an error to use this improperly: >> >> /* It's an error for a caller to try to do a regular blit with a yf/ys >> region */ >> assert(use_fast_copy_blit && >> (src_tr_mode != INTEL_MIPTREE_TRMODE_NONE) && >> (dst_tr_mode != INTEL_MIPTREE_TRMODE_NONE)) >> > Agree. >>> /* do space check before going any further */ >>> do { >>> aper_array[0] = brw->batch.bo; >>> @@ -334,13 +498,6 @@ intelEmitCopyBlit(struct brw_context *brw, >>> src_buffer, src_pitch, src_offset, src_x, src_y, >>> dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); >>> >>> - /* Blit pitch must be dword-aligned. Otherwise, the hardware appears >>> to drop >>> - * the low bits. Offsets must be naturally aligned. >>> - */ >>> - if (src_pitch % 4 != 0 || src_offset % cpp != 0 || >>> - dst_pitch % 4 != 0 || dst_offset % cpp != 0) >>> - return false; >>> - >>> /* For big formats (such as floating point), do the copy using 16 or >>> 32bpp >>> * and multiply the coordinates. >>> */ >>> @@ -359,27 +516,76 @@ intelEmitCopyBlit(struct brw_context *brw, >>> } >>> } >>> >>> - BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; >>> + if (use_fast_copy_blit && >>> + fast_copy_blit_error_check(src_offset, src_pitch, src_tiling, >>> + dst_offset, dst_pitch, dst_tiling, cpp)) >>> { >>> + BR13 = br13_for_cpp(cpp); >>> >> >> If you follow about you can remove the error check here as we know it's safe >> to >> proceed. >> > Right. As i understand you mean to move the fast_copy_blit_error_check() > inside > can_fast_copy_blit() helper ? > >>> - switch (cpp) { >>> - case 1: >>> - case 2: >>> - CMD = XY_SRC_COPY_BLT_CMD; >>> - break; >>> - case 4: >>> - CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; >>> - break; >>> - default: >>> - return false; >>> - } >>> + if (src_tr_mode == INTEL_MIPTREE_TRMODE_YF) >>> + BR13 |= XY_SRC_TRMODE_YF; >>> >>> - if (dst_tiling != I915_TILING_NONE) { >>> - CMD |= XY_DST_TILED; >>> - dst_pitch /= 4; >>> - } >>> - if (src_tiling != I915_TILING_NONE) { >>> - CMD |= XY_SRC_TILED; >>> - src_pitch /= 4; >>> + if (dst_tr_mode == INTEL_MIPTREE_TRMODE_YF) >>> + BR13 |= XY_DST_TRMODE_YF; >>> + >>> + CMD = XY_FAST_COPY_BLT_CMD; >>> + >>> + if (dst_tiling != I915_TILING_NONE) { >>> + SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_DST); >>> + /* Pitch value should be specified as a number of Dwords. */ >>> + dst_pitch /= 4; >>> + } >>> + if (src_tiling != I915_TILING_NONE) { >>> + SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_SRC); >>> + /* Pitch value should be specified as a number of Dwords. */ >>> + src_pitch /= 4; >>> + } >>> + >>> + CMD |= get_tr_horizontal_align(src_tr_mode, cpp, true /* is_src */); >>> + CMD |= get_tr_vertical_align(src_tr_mode, cpp, true /* is_src */); >>> + >>> + CMD |= get_tr_horizontal_align(dst_tr_mode, cpp, false /* is_src */); >>> + CMD |= get_tr_vertical_align(dst_tr_mode, cpp, false /* is_src */); >>> + >>> + } else { >>> + /* Source and destination base addresses should be 4 KB aligned. */ >>> + if (dst_tiling != I915_TILING_NONE) { >>> + if (dst_offset & 4095) >>> + return false; >>> + } >>> + if (src_tiling != I915_TILING_NONE) { >>> + if (src_offset & 4095) >>> + return false; >>> + } >>> + >>> + /* Blit pitch must be dword-aligned. Otherwise, the hardware >>> appears to drop >>> + * the low bits. Offsets must be naturally aligned. >>> + */ >>> + if (src_pitch % 4 != 0 || src_offset % cpp != 0 || >>> + dst_pitch % 4 != 0 || dst_offset % cpp != 0) >>> + return false; >>> + >>> + assert(cpp <= 4); >>> + BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; >>> + switch (cpp) { >>> + case 1: >>> + case 2: >>> + CMD = XY_SRC_COPY_BLT_CMD; >>> + break; >>> + case 4: >>> + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; >>> + break; >>> + default: >>> + return false; >>> + } >>> + >>> + if (dst_tiling != I915_TILING_NONE) { >>> + CMD |= XY_DST_TILED; >>> + dst_pitch /= 4; >>> + } >>> + if (src_tiling != I915_TILING_NONE) { >>> + CMD |= XY_SRC_TILED; >>> + src_pitch /= 4; >>> + } >> >> I'd be in favor of extracting the CMD out into a helper to cut down the size >> of >> this function. Up to you. >> > I'll try to make a helper for CMD. > >> CMD = get_copy_blit_cmd() >> BR_13 = br13_for_cpp(cpp) >> if (!fast_copy) >> BR_13 |= translate_raster_op(logic_op) << 16; >> >> I think also the pitch division is common to both. >> >>> } >>> >>> if (dst_y2 <= dst_y || dst_x2 <= dst_x) { >>> @@ -533,11 +739,14 @@ intel_emit_linear_blit(struct brw_context *brw, >>> pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4); >>> height = (pitch == 0) ? 1 : size / pitch; >>> ok = intelEmitCopyBlit(brw, 1, >>> - pitch, src_bo, src_offset, I915_TILING_NONE, >>> - pitch, dst_bo, dst_offset, I915_TILING_NONE, >>> + pitch, src_bo, src_offset, >>> + I915_TILING_NONE, INTEL_MIPTREE_TRMODE_NONE, >>> + pitch, dst_bo, dst_offset, >>> + I915_TILING_NONE, INTEL_MIPTREE_TRMODE_NONE, >>> 0, 0, /* src x/y */ >>> 0, 0, /* dst x/y */ >>> pitch, height, /* w, h */ >>> + false, /* overlap */ >>> GL_COPY); >>> if (!ok) >>> _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height); >>> @@ -549,11 +758,14 @@ intel_emit_linear_blit(struct brw_context *brw, >>> pitch = ALIGN(size, 4); >>> if (size != 0) { >>> ok = intelEmitCopyBlit(brw, 1, >>> - pitch, src_bo, src_offset, I915_TILING_NONE, >>> - pitch, dst_bo, dst_offset, I915_TILING_NONE, >>> + pitch, src_bo, src_offset, >>> + I915_TILING_NONE, INTEL_MIPTREE_TRMODE_NONE, >>> + pitch, dst_bo, dst_offset, >>> + I915_TILING_NONE, INTEL_MIPTREE_TRMODE_NONE, >>> 0, 0, /* src x/y */ >>> 0, 0, /* dst x/y */ >>> size, 1, /* w, h */ >>> + false, /* overlap */ >>> GL_COPY); >>> if (!ok) >>> _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1); >>> diff --git a/src/mesa/drivers/dri/i965/intel_blit.h >>> b/src/mesa/drivers/dri/i965/intel_blit.h >>> index f563939..29c9ead 100644 >>> --- a/src/mesa/drivers/dri/i965/intel_blit.h >>> +++ b/src/mesa/drivers/dri/i965/intel_blit.h >>> @@ -37,13 +37,16 @@ intelEmitCopyBlit(struct brw_context *brw, >>> drm_intel_bo *src_buffer, >>> GLuint src_offset, >>> uint32_t src_tiling, >>> + uint32_t src_tr_mode, >>> GLshort dst_pitch, >>> drm_intel_bo *dst_buffer, >>> GLuint dst_offset, >>> uint32_t dst_tiling, >>> + uint32_t dst_tr_mode, >>> GLshort srcx, GLshort srcy, >>> GLshort dstx, GLshort dsty, >>> GLshort w, GLshort h, >>> + bool overlap, >>> GLenum logicop ); >>> >>> bool intel_miptree_blit(struct brw_context *brw, >>> diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c >>> b/src/mesa/drivers/dri/i965/intel_copy_image.c >>> index f4c7eff..1e4a1e2 100644 >>> --- a/src/mesa/drivers/dri/i965/intel_copy_image.c >>> +++ b/src/mesa/drivers/dri/i965/intel_copy_image.c >>> @@ -126,12 +126,15 @@ copy_image_with_blitter(struct brw_context *brw, >>> src_mt->pitch, >>> src_mt->bo, src_mt->offset, >>> src_mt->tiling, >>> + src_mt->tr_mode, >>> dst_mt->pitch, >>> dst_mt->bo, dst_mt->offset, >>> dst_mt->tiling, >>> + dst_mt->tr_mode, >>> src_x, src_y, >>> dst_x, dst_y, >>> src_width, src_height, >>> + false /* overlap */, >>> GL_COPY); >>> } >>> >>> diff --git a/src/mesa/drivers/dri/i965/intel_reg.h >>> b/src/mesa/drivers/dri/i965/intel_reg.h >>> index 488fb5b..6830148 100644 >>> --- a/src/mesa/drivers/dri/i965/intel_reg.h >>> +++ b/src/mesa/drivers/dri/i965/intel_reg.h >>> @@ -87,6 +87,8 @@ >>> >>> #define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22)) >>> >>> +#define XY_FAST_COPY_BLT_CMD (CMD_2D | (0x42 << 22)) >>> + >>> #define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22)) >>> # define XY_TEXT_BYTE_PACKED (1 << 16) >>> >>> @@ -95,11 +97,42 @@ >>> #define XY_BLT_WRITE_RGB (1 << 20) >>> #define XY_SRC_TILED (1 << 15) >>> #define XY_DST_TILED (1 << 11) >>> +#define XY_SRC_TILED_X (1 << 20) >>> +#define XY_SRC_TILED_Y (2 << 20) >>> +#define XY_SRC_TILED_64K (3 << 20) >>> +#define XY_DST_TILED_X (1 << 13) >>> +#define XY_DST_TILED_Y (2 << 13) >>> +#define XY_DST_TILED_64K (3 << 13) >>> + >> >> You've mixed bits of two commands. Name these something like XY_FAST_*. Put >> them >> in a separate chunk of code. > OK >> Also, I'm OCD, but I would have reversed the order here, highest value on >> top, >> lowest on the bottom >> > I'll follow that order. >>> +#define XY_SRC_H_ALIGN_32 (0 << 17) >>> +#define XY_SRC_H_ALIGN_64 (1 << 17) >>> +#define XY_SRC_H_ALIGN_128 (2 << 17) >>> +#define XY_SRC_H_ALIGN_256 (3 << 17) >>> +#define XY_SRC_H_ALIGN_512 (4 << 17) >>> + >>> +#define XY_SRC_V_ALIGN_64 (0 << 15) >>> +#define XY_SRC_V_ALIGN_128 (1 << 15) >>> +#define XY_SRC_V_ALIGN_256 (2 << 15) >>> + >>> +#define XY_DST_H_ALIGN_32 (0 << 10) >>> +#define XY_DST_H_ALIGN_64 (1 << 10) >>> +#define XY_DST_H_ALIGN_128 (2 << 10) >>> +#define XY_DST_H_ALIGN_256 (3 << 10) >>> +#define XY_DST_H_ALIGN_512 (4 << 10) >>> + >>> +#define XY_DST_V_ALIGN_64 (0 << 8) >>> +#define XY_DST_V_ALIGN_128 (1 << 8) >>> +#define XY_DST_V_ALIGN_256 (2 << 8) >>> >>> /* BR13 */ >>> #define BR13_8 (0x0 << 24) >>> #define BR13_565 (0x1 << 24) >>> #define BR13_8888 (0x3 << 24) >>> +#define BR13_16161616 (0x4 << 24) >>> +#define BR13_32323232 (0x5 << 24) >>> + >>> +#define XY_SRC_TRMODE_YF (1 << 31) >>> +#define XY_DST_TRMODE_YF (1 << 30) >>> >>> /* Pipeline Statistics Counter Registers */ >>> #define IA_VERTICES_COUNT 0x2310 >> >> One thing I don't see addressed in this patch is >> >> "When two sequential fast copy blits have different source surfaces, but >> their >> destinations refer to the same destination surface and therefore destinations >> overlap, a Flush must be inserted between the two blits." > I didn't notice this. I'll take care of it in V3. >> >> >> -- >> Ben Widawsky, Intel Open Source Technology Center _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev