The readpixels microbenchmark in mesa-demos goes from 47Mpix/sec on 1000x1000 to 450Mpix/sec. The 10x10 sizes stay about the same. --- src/mesa/drivers/dri/intel/intel_fbo.c | 89 +++++++++++++++++++++++++++----- src/mesa/drivers/dri/intel/intel_fbo.h | 2 +- 2 files changed, 76 insertions(+), 15 deletions(-)
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index c9a1df5..f570339 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -41,6 +41,7 @@ #include "intel_context.h" #include "intel_batchbuffer.h" #include "intel_buffers.h" +#include "intel_blit.h" #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" @@ -94,7 +95,7 @@ intel_map_renderbuffer(struct gl_context *ctx, struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *irb = intel_renderbuffer(rb); GLubyte *map; - int stride; + int stride, flip_stride; /* We sometimes get called with this by our intel_span.c usage. */ if (!irb->region) { @@ -103,28 +104,82 @@ intel_map_renderbuffer(struct gl_context *ctx, return; } - if (drm_intel_bo_references(intel->batch.bo, irb->region->bo)) { - intel_batchbuffer_flush(intel); - } + stride = irb->region->pitch * irb->region->cpp; - drm_intel_gem_bo_map_gtt(irb->region->bo); + /* On gen6+, we have LLC sharing, which means we can get high-performance + * access to linear-mapped buffers. So, blit out a tiled buffer (if + * possible, which it isn't really for Y tiling) to a temporary BO and return + * a map of that. + */ + if (intel->gen >= 6 && + !(mode & GL_MAP_WRITE_BIT) && + irb->region->tiling == I915_TILING_X) { + int dst_stride = w * irb->region->cpp; + int src_x, src_y; - map = irb->region->bo->virtual; - stride = irb->region->pitch * irb->region->cpp; + if (rb->Name) { + src_x = x + irb->draw_x; + src_y = y + irb->draw_y; + } else { + src_x = x; + src_y = irb->region->height - y - h; + } + + irb->map_bo = drm_intel_bo_alloc(intel->bufmgr, "MapRenderbuffer() temp", + dst_stride * h, 4096); + + /* We don't do the flip in the blit, because it's always so tricky to get + * right. + */ + if (irb->map_bo && + intelEmitCopyBlit(intel, + irb->region->cpp, + irb->region->pitch, irb->region->bo, + 0, irb->region->tiling, + dst_stride / irb->region->cpp, irb->map_bo, + 0, I915_TILING_NONE, + src_x, src_y, + 0, 0, + w, h, + GL_COPY)) { + intel_batchbuffer_flush(intel); + drm_intel_bo_map(irb->map_bo, false); + + if (rb->Name) { + *out_map = irb->map_bo->virtual; + *out_stride = dst_stride; + } else { + *out_map = irb->map_bo->virtual + (h - 1) * dst_stride; + *out_stride = -dst_stride; + } + return; + } else { + drm_intel_bo_unreference(irb->map_bo); + irb->map_bo = NULL; + } + } if (rb->Name == 0) { - map += stride * (irb->region->height - 1); - stride = -stride; + y = irb->region->height - 1 - y; + flip_stride = -stride; } else { - map += irb->draw_x * irb->region->cpp; - map += (int)irb->draw_y * stride; + x += irb->draw_x; + y += irb->draw_y; + flip_stride = stride; } + if (drm_intel_bo_references(intel->batch.bo, irb->region->bo)) { + intel_batchbuffer_flush(intel); + } + + drm_intel_gem_bo_map_gtt(irb->region->bo); + + map = irb->region->bo->virtual; map += x * irb->region->cpp; map += (int)y * stride; *out_map = map; - *out_stride = stride; + *out_stride = flip_stride; } static void @@ -133,8 +188,14 @@ intel_unmap_renderbuffer(struct gl_context *ctx, { struct intel_renderbuffer *irb = intel_renderbuffer(rb); - if (irb->region) - drm_intel_gem_bo_unmap_gtt(irb->region->bo); + if (irb->map_bo) { + drm_intel_bo_unmap(irb->map_bo); + drm_intel_bo_unreference(irb->map_bo); + irb->map_bo = 0; + } else { + if (irb->region) + drm_intel_gem_bo_unmap_gtt(irb->region->bo); + } } /** diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index e12d0fd..0aafa0d 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -44,6 +44,7 @@ struct intel_renderbuffer { struct gl_renderbuffer Base; struct intel_region *region; + drm_intel_bo *map_bo; /** Only used by depth renderbuffers for which HiZ is enabled. */ struct intel_region *hiz_region; @@ -57,7 +58,6 @@ struct intel_renderbuffer */ struct gl_renderbuffer *wrapped_depth; struct gl_renderbuffer *wrapped_stencil; - /** \} */ GLuint draw_x, draw_y; /**< Offset of drawing within the region */ -- 1.7.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev