On Mon, Mar 9, 2015 at 9:43 PM, Ben Widawsky <benjamin.widaw...@intel.com> wrote: > The blit engine is limited to 32Kx32K transfer. In cases where we have to fall > back to the blitter, and when trying to blit a slice of a 2d texture array, or > face of a cube map, we don't need to transfer the entire texture. > > I doubt this patch will get exercised at this point since we'll always > allocate > a linear BO for huge buffers. The next patch changes that. > > v2: Fix NDEBUG warning > > v3: Rebased with new blit computation function. > Modify computation to account of tiling constraints (Jason, Jordan) > Use the new computation function in y adjust function (Jason, Jordan) > Dropped slice parameter from the y adjusting function (~Jason) > Add assert that adjusted y offset is within bounds > Renamed and moved the helper functions "public" in intel_blit.h > > v3.1: > Fixed assertion fail from v3 (Jordan) > Remove conditional y adjusted calculation, replace with comment (Jordan + > Jason) > > Signed-off-by: Ben Widawsky <b...@bwidawsk.net> > --- > src/mesa/drivers/dri/i965/intel_blit.c | 101 > +++++++++++++++++++++++++++++++-- > src/mesa/drivers/dri/i965/intel_blit.h | 24 +++++++- > 2 files changed, 118 insertions(+), 7 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_blit.c > b/src/mesa/drivers/dri/i965/intel_blit.c > index c7f4cf3..832dad1 100644 > --- a/src/mesa/drivers/dri/i965/intel_blit.c > +++ b/src/mesa/drivers/dri/i965/intel_blit.c > @@ -130,6 +130,92 @@ set_blitter_tiling(struct brw_context *brw, > ADVANCE_BATCH(); \ > } while (0) > > +/* This function returns the offset to be used by the blit operation. It may > + * modify the y if the texture would otherwise fail to be able to perform a > + * blit. The x offset will not need to change based on the computations made > by > + * this function. > + * > + * By the time we get to this function, the miptree creation code should have > + * already determined it's possible to blit the texture, so there should > never > + * be a case where this function fails. > + */ > +static GLuint > +intel_miptree_get_adjusted_y_offset(struct intel_mipmap_tree *mt, uint32_t > *y) > +{ > + GLuint offset = mt->offset; > + > + /* Convert an input number of rows: y into 2 values: an offset (page > aligned > + * in byte units), and the remaining rows of y. The resulting 2 values > will > + * be used as parameters for a blit operation [using the HW blit engine]. > + * They will therefore conform to whatever restrictions are needed. > + * > + * XXX: This code assumes that LOD0 is always guaranteed to be properly > + * aligned for the blit operation. The round down only mutates y if the > LOD > + * being adjusted isn't tile aligned. In other words, if input y is > pointing > + * to LOD0 of a slice, the adjusted y should always be 0. Similarly if > input > + * y is pointing to another LOD, and the offset happens to be tile > aligned, y > + * will again be 0. > + * > + * The following diagram shows how the blit parameters are modified. In > the > + * example, is is trying to blit with LOD1 from slice[x] as a surface, and It is trying > + * LOD1 is not properly tile aligned. "TA" means tile aligned. The > rectangle > + * is the BO that contains the mipmaps. There may be an offset from the > start > + * of the BO to the first slice. > + * > + * INPUT OUTPUT > + * 0 +---------------------------+ > + * | | > +---------------------------+ > + * offset | slice[0]...slice[x-2] | offset | +----------+ > | > + * | | | | lod0 | slice[x] > | > + * TA | +----------+ | | | | > | > + * | | lod0 | slice[x-1] | | +----------+ > | > + * | | | | y---> | +---+ +-+ > | > + * | +----------+ | | | | +-+ > | > + * | +---+ +-+ | | +---+ * > | > + * | | | +-+ | | > | > + * | +---+ * | | slice[x+1]... > | > + * | | > +---------------------------+ > + * | // qpitch padding | > + * | | > + * TA | +----------+ | > + * | | lod0 | slice[x] | > + * | | | | > + * | +----------+ | > + * y---> | +---+ +-+ | > + * | | | +-+ | > + * | +---+ * | > + * | | > + * | slice[x+1]... | > + * +---------------------------+ > + */ > + > + /* The following calculation looks fancy. In the common case, slice == 0 > + * and/or the full mipmap fits within blitter constraints, it should be > + * equivalent to the simple: > + * return offset; > + */ > + const long TILE_MASK = > + mt->tiling != I915_TILING_NONE ? sysconf(_SC_PAGE_SIZE) - 1 : 0; > + (void) TILE_MASK; > + > + /* Since we need to output a page aligned offset, the original offset must > + * also be page aligned. For tiled buffers, it always should be. */ > + assert((offset & TILE_MASK) == 0); > + > + /* Adjust the y value to pick the nearest tile aligned mipmap row */ > + unsigned tile_aligned_row = > + ROUND_DOWN_TO(*y, intel_blit_tile_height(mt->tiling)); > + *y -= tile_aligned_row; > + > + /* Convert tiled aligned row to a byte offset for use by the blitter */ > + tile_aligned_row *= mt->pitch; > + assert((tile_aligned_row & TILE_MASK) == 0); > + offset += tile_aligned_row; > + > + assert(*y < intel_blit_max_height(mt->tiling)); > + return offset; > +} > + > /** > * Implements a rectangular block transfer (blit) of pixels between two > * miptrees. > @@ -240,22 +326,27 @@ intel_miptree_blit(struct brw_context *brw, > dst_x += dst_image_x; > dst_y += dst_image_y; > > + GLuint src_offset = intel_miptree_get_adjusted_y_offset(src_mt, &src_y); > + GLuint dst_offset = intel_miptree_get_adjusted_y_offset(dst_mt, &dst_y); > + > if (src_x >= INTEL_MAX_BLIT_PITCH || dst_x >= INTEL_MAX_BLIT_PITCH || > - src_y >= intel_blit_max_height() || > - dst_y >= intel_blit_max_height()) { > + src_y >= intel_blit_max_height(src_mt->tiling) || > + dst_y >= intel_blit_max_height(dst_mt->tiling)) { > perf_debug("Falling back due to >=%dk offset [src(%d, %d) dst(%d, > %d)]\n", > src_x, src_y, dst_x, dst_y, > - MAX2(intel_blit_max_height(), INTEL_MAX_BLIT_PITCH) >> 20); > + MAX3(intel_blit_max_height(src_mt->tiling), > + intel_blit_max_height(dst_mt->tiling), > + INTEL_MAX_BLIT_PITCH) >> 20); Should be >> 10 to get 32.
> return false; > } > > if (!intelEmitCopyBlit(brw, > src_mt->cpp, > src_pitch, > - src_mt->bo, src_mt->offset, > + src_mt->bo, src_offset, > src_mt->tiling, > dst_mt->pitch, > - dst_mt->bo, dst_mt->offset, > + dst_mt->bo, dst_offset, > dst_mt->tiling, > src_x, src_y, > dst_x, dst_y, > diff --git a/src/mesa/drivers/dri/i965/intel_blit.h > b/src/mesa/drivers/dri/i965/intel_blit.h > index 52dd67c..aff2d58 100644 > --- a/src/mesa/drivers/dri/i965/intel_blit.h > +++ b/src/mesa/drivers/dri/i965/intel_blit.h > @@ -78,14 +78,34 @@ void intel_emit_linear_blit(struct brw_context *brw, > unsigned int src_offset, > unsigned int size); > > + > +/* Returns the height of the tiling format. This would be measured in > scanlines > + * (of pitch bytes) > + */ > +static inline uint32_t > +intel_blit_tile_height(uint32_t tiling) > +{ > + const long PAGE_SIZE = sysconf(_SC_PAGE_SIZE); > + switch (tiling) { > + case I915_TILING_X: > + return PAGE_SIZE / 512; > + case I915_TILING_Y: > + return PAGE_SIZE / 128; > + case I915_TILING_NONE: > + return 1; > + default: > + unreachable("Unknown tiling format\n"); > + } > +} > + > static inline uint32_t > -intel_blit_max_height(void) > +intel_blit_max_height(uint32_t tiling) > { > /* The docs say that the blitter is capable of transferring 65536 > scanlines > * per blit, however the commands we use only have a signed 16b value thus > * making the practical limit 15b. > */ > - return INTEL_MAX_BLIT_ROWS; > + return INTEL_MAX_BLIT_ROWS - intel_blit_tile_height(tiling); > } > > #endif > -- > 2.3.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev