Thanks for implementing this, I just have one comment. On Thu, Mar 14, 2013 at 7:45 PM, Marek Olšák <mar...@gmail.com> wrote: > Initial version contributed by: Martin Andersson <g02ma...@gmail.com> > > This is only used if the memcpy path cannot be used and if no transfer ops > are needed. It's pretty similar to our TexImage and GetTexImage > implementations. > > The motivation behind this is to be able to use ReadPixels every frame and > still have at least 20 fps (or 60 fps with a powerful GPU and CPU) > instead of 0.5 fps. > --- > src/mesa/state_tracker/st_cb_readpixels.c | 184 > +++++++++++++++++++++++++++-- > src/mesa/state_tracker/st_cb_texture.c | 12 +- > src/mesa/state_tracker/st_cb_texture.h | 6 + > 3 files changed, 189 insertions(+), 13 deletions(-) > > diff --git a/src/mesa/state_tracker/st_cb_readpixels.c > b/src/mesa/state_tracker/st_cb_readpixels.c > index 6b824b1..b524738 100644 > --- a/src/mesa/state_tracker/st_cb_readpixels.c > +++ b/src/mesa/state_tracker/st_cb_readpixels.c > @@ -25,35 +25,205 @@ > * > **************************************************************************/ > > - > +#include "main/image.h" > +#include "main/pbo.h" > #include "main/imports.h" > #include "main/readpix.h" > +#include "main/enums.h" > +#include "main/framebuffer.h" > +#include "util/u_inlines.h" > +#include "util/u_format.h" > > +#include "st_cb_fbo.h" > #include "st_atom.h" > #include "st_context.h" > #include "st_cb_bitmap.h" > #include "st_cb_readpixels.h" > +#include "state_tracker/st_cb_texture.h" > +#include "state_tracker/st_format.h" > +#include "state_tracker/st_texture.h" > > > /** > - * The only special thing we need to do for the state tracker's > - * glReadPixels is to validate state (to be sure we have up-to-date > - * framebuffer surfaces) and flush the bitmap cache prior to reading. > + * This uses a blit to copy the read buffer to a texture format which matches > + * the format and type combo and then a fast read-back is done using memcpy. > + * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is > + * a format which matches the swizzling. > + * > + * If such a format isn't available, we fall back to _mesa_readpixels. > + * > + * NOTE: Some drivers use a blit to convert between tiled and linear > + * texture layouts during texture uploads/downloads, so the blit > + * we do here should be free in such cases. > */ > static void > st_readpixels(struct gl_context *ctx, GLint x, GLint y, > GLsizei width, GLsizei height, > GLenum format, GLenum type, > const struct gl_pixelstore_attrib *pack, > - GLvoid *dest) > + GLvoid *pixels) > { > struct st_context *st = st_context(ctx); > + struct gl_renderbuffer *rb = > + _mesa_get_read_renderbuffer_for_format(ctx, format); > + struct st_renderbuffer *strb = st_renderbuffer(rb); > + struct pipe_context *pipe = st->pipe; > + struct pipe_screen *screen = pipe->screen; > + struct pipe_resource *src; > + struct pipe_resource *dst = NULL; > + struct pipe_resource dst_templ; > + enum pipe_format dst_format, src_format; > + struct pipe_blit_info blit; > + unsigned bind = PIPE_BIND_TRANSFER_READ; > + struct pipe_transfer *tex_xfer; > + ubyte *map = NULL; > > + /* Validate state (to be sure we have up-to-date framebuffer surfaces) > + * and flush the bitmap cache prior to reading. */ > st_validate_state(st); > st_flush_bitmap_cache(st); > - _mesa_readpixels(ctx, x, y, width, height, format, type, pack, dest); > -} > > + /* This must be done after state validation. */ > + src = strb->texture; > + > + /* XXX Fallback for depth-stencil formats due to an incomplete > + * stencil blit implementation in some drivers. */ > + if (format == GL_DEPTH_STENCIL) { > + goto fallback; > + } > + > + /* We are creating a texture of the size of the region being read back. > + * Need to check for NPOT texture support. */ > + if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) && > + (!util_is_power_of_two(width) || > + !util_is_power_of_two(height))) { > + goto fallback; > + } > + > + /* If the base internal format and the texture format don't match, we have > + * to use the slow path. */ > + if (rb->_BaseFormat != > + _mesa_get_format_base_format(rb->Format)) { > + goto fallback; > + } > + > + /* See if the texture format already matches the format and type, > + * in which case the memcpy-based fast path will likely be used and > + * we don't have to blit. */ > + if (_mesa_format_matches_format_and_type(rb->Format, format, > + type, pack->SwapBytes)) { > + goto fallback; > + }
On my system (Intel core i7 and AMD 6950) the memcpy fast-path takes around 210 milliseconds and the blit path takes around 9 milliseconds, for a 1920x1200 image. So it is much faster, at least on my system, to use the blit path even when the mesa format matches format and type. To test this I forced the use of the memcpy fast-path (the mesa format was MESA_FORMAT_XRGB8888, the format was GL_BGRA and the type was GL_UNSIGNED_BYTE). I visually inspected the resulting image and I could not see anything wrong with it. But perhaps forcing the use of the memcpy fast-path invalidates the results. Or there might be some other issues that I am missing, but I just wanted to say that on my system it is better to remove this check. > + > + if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) { > + goto fallback; > + } > + > + /* Convert the source format to what is expected by ReadPixels > + * and see if it's supported. */ > + src_format = util_format_linear(src->format); > + src_format = util_format_luminance_to_red(src_format); > + src_format = util_format_intensity_to_red(src_format); > + > + if (!src_format || > + !screen->is_format_supported(screen, src_format, src->target, > + src->nr_samples, > + PIPE_BIND_SAMPLER_VIEW)) { > + printf("fallback: src format unsupported %s\n", > util_format_short_name(src_format)); > + goto fallback; > + } > + > + if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL) > + bind |= PIPE_BIND_DEPTH_STENCIL; > + else > + bind |= PIPE_BIND_RENDER_TARGET; > + > + /* Choose the destination format by finding the best match > + * for the format+type combo. */ > + dst_format = st_choose_matching_format(screen, bind, format, type, > + pack->SwapBytes); > + if (dst_format == PIPE_FORMAT_NONE) { > + printf("fallback: no matching format for %s, %s\n", > + _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type)); > + goto fallback; > + } > + > + /* create the destination texture */ > + memset(&dst_templ, 0, sizeof(dst_templ)); > + dst_templ.target = PIPE_TEXTURE_2D; > + dst_templ.format = dst_format; > + dst_templ.bind = bind; > + dst_templ.usage = PIPE_USAGE_STAGING; > + > + st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1, > + &dst_templ.width0, &dst_templ.height0, > + &dst_templ.depth0, &dst_templ.array_size); > + > + dst = screen->resource_create(screen, &dst_templ); > + if (!dst) { > + goto fallback; > + } > + > + blit.src.resource = src; > + blit.src.level = strb->rtt_level; > + blit.src.format = src_format; > + blit.dst.resource = dst; > + blit.dst.level = 0; > + blit.dst.format = dst->format; > + blit.src.box.x = x; > + blit.dst.box.x = 0; > + blit.src.box.y = y; > + blit.dst.box.y = 0; > + blit.src.box.z = strb->rtt_face + strb->rtt_slice; > + blit.dst.box.z = 0; > + blit.src.box.width = blit.dst.box.width = width; > + blit.src.box.height = blit.dst.box.height = height; > + blit.src.box.depth = blit.dst.box.depth = 1; > + blit.mask = st_get_blit_mask(rb->_BaseFormat, format); > + blit.filter = PIPE_TEX_FILTER_NEAREST; > + blit.scissor_enable = FALSE; > + > + if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { > + blit.src.box.y = rb->Height - blit.src.box.y; > + blit.src.box.height = -blit.src.box.height; > + } > + > + /* blit */ > + st->pipe->blit(st->pipe, &blit); > + > + /* map resources */ > + pixels = _mesa_map_pbo_dest(ctx, pack, pixels); > + > + map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ, > + 0, 0, 0, width, height, 1, &tex_xfer); > + if (!map) { > + _mesa_unmap_pbo_dest(ctx, pack); > + pipe_resource_reference(&dst, NULL); > + goto fallback; > + } > + > + /* memcpy data into a user buffer */ > + { > + const uint bytesPerRow = width * util_format_get_blocksize(dst_format); > + GLuint row; > + > + for (row = 0; row < height; row++) { > + GLvoid *dest = _mesa_image_address3d(pack, pixels, > + width, height, format, > + type, 0, row, 0); > + memcpy(dest, map, bytesPerRow); > + map += tex_xfer->stride; > + } > + } > + > + pipe_transfer_unmap(pipe, tex_xfer); > + _mesa_unmap_pbo_dest(ctx, pack); > + pipe_resource_reference(&dst, NULL); > + return; > + > +fallback: > + _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); > +} > > void st_init_readpixels_functions(struct dd_function_table *functions) > { > diff --git a/src/mesa/state_tracker/st_cb_texture.c > b/src/mesa/state_tracker/st_cb_texture.c > index c922a31..7307c60 100644 > --- a/src/mesa/state_tracker/st_cb_texture.c > +++ b/src/mesa/state_tracker/st_cb_texture.c > @@ -68,7 +68,7 @@ > #define DBG if (0) printf > > > -static enum pipe_texture_target > +enum pipe_texture_target > gl_target_to_pipe(GLenum target) > { > switch (target) { > @@ -542,8 +542,8 @@ prep_teximage(struct gl_context *ctx, struct > gl_texture_image *texImage, > * Return a writemask for the gallium blit. The parameters can be base > * formats or "format" from glDrawPixels/glTexImage/glGetTexImage. > */ > -static unsigned > -get_blit_mask(GLenum srcFormat, GLenum dstFormat) > +unsigned > +st_get_blit_mask(GLenum srcFormat, GLenum dstFormat) > { > switch (dstFormat) { > case GL_DEPTH_STENCIL: > @@ -769,7 +769,7 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, > blit.src.box.width = blit.dst.box.width = width; > blit.src.box.height = blit.dst.box.height = height; > blit.src.box.depth = blit.dst.box.depth = depth; > - blit.mask = get_blit_mask(format, texImage->_BaseFormat); > + blit.mask = st_get_blit_mask(format, texImage->_BaseFormat); > blit.filter = PIPE_TEX_FILTER_NEAREST; > blit.scissor_enable = FALSE; > > @@ -996,7 +996,7 @@ st_GetTexImage(struct gl_context * ctx, > blit.src.box.width = blit.dst.box.width = width; > blit.src.box.height = blit.dst.box.height = height; > blit.src.box.depth = blit.dst.box.depth = depth; > - blit.mask = get_blit_mask(texImage->_BaseFormat, format); > + blit.mask = st_get_blit_mask(texImage->_BaseFormat, format); > blit.filter = PIPE_TEX_FILTER_NEAREST; > blit.scissor_enable = FALSE; > > @@ -1370,7 +1370,7 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims, > blit.dst.box.width = width; > blit.dst.box.height = height; > blit.dst.box.depth = 1; > - blit.mask = get_blit_mask(rb->_BaseFormat, texImage->_BaseFormat); > + blit.mask = st_get_blit_mask(rb->_BaseFormat, texImage->_BaseFormat); > blit.filter = PIPE_TEX_FILTER_NEAREST; > > /* 1D array textures need special treatment. > diff --git a/src/mesa/state_tracker/st_cb_texture.h > b/src/mesa/state_tracker/st_cb_texture.h > index 27956bc..7f70d0b 100644 > --- a/src/mesa/state_tracker/st_cb_texture.h > +++ b/src/mesa/state_tracker/st_cb_texture.h > @@ -38,6 +38,12 @@ struct gl_texture_object; > struct pipe_context; > struct st_context; > > +extern enum pipe_texture_target > +gl_target_to_pipe(GLenum target); > + > +unsigned > +st_get_blit_mask(GLenum srcFormat, GLenum dstFormat); > + > extern GLboolean > st_finalize_texture(struct gl_context *ctx, > struct pipe_context *pipe, > -- > 1.7.10.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev