On Tue, Mar 11, 2014 at 12:37:15PM -0400, Jan Vesely wrote:
> On Tue, 2014-03-11 at 11:37 -0400, Jan Vesely wrote:
> > On Tue, 2014-03-11 at 15:09 +0100, Marek Olšák wrote:
> > > From: Marek Olšák <marek.ol...@amd.com>
> > > 
> > > v2: - allow byte-aligned DMA buffer copies on Evergreen
> > >     - fix piglit/texsubimage regression
> > >     - use the fallback for 3D copies (depth > 1) as well
> > > ---
> > >  src/gallium/drivers/r600/evergreen_state.c      | 40 +++++++++--------
> > >  src/gallium/drivers/r600/r600_state.c           | 44 +++++++++++--------
> > >  src/gallium/drivers/radeon/r600_buffer_common.c | 58 
> > > +++++++++++--------------
> > >  src/gallium/drivers/radeon/r600_pipe_common.h   | 17 ++++----
> > >  src/gallium/drivers/radeon/r600_texture.c       | 18 +++-----
> > >  src/gallium/drivers/radeonsi/si_state.c         | 19 ++++----
> > >  6 files changed, 99 insertions(+), 97 deletions(-)
> > > 
> > > diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> > > b/src/gallium/drivers/r600/evergreen_state.c
> > > index dca7c58..1e14b01 100644
> > > --- a/src/gallium/drivers/r600/evergreen_state.c
> > > +++ b/src/gallium/drivers/r600/evergreen_state.c
> > > @@ -3329,13 +3329,13 @@ static void evergreen_dma_copy_tile(struct 
> > > r600_context *rctx,
> > >   }
> > >  }
> > >  
> > > -static boolean evergreen_dma_blit(struct pipe_context *ctx,
> > > -                           struct pipe_resource *dst,
> > > -                           unsigned dst_level,
> > > -                           unsigned dst_x, unsigned dst_y, unsigned 
> > > dst_z,
> > > -                           struct pipe_resource *src,
> > > -                           unsigned src_level,
> > > -                           const struct pipe_box *src_box)
> > > +static void evergreen_dma_blit(struct pipe_context *ctx,
> > > +                        struct pipe_resource *dst,
> > > +                        unsigned dst_level,
> > > +                        unsigned dstx, unsigned dsty, unsigned dstz,
> > > +                        struct pipe_resource *src,
> > > +                        unsigned src_level,
> > > +                        const struct pipe_box *src_box)
> > >  {
> > >   struct r600_context *rctx = (struct r600_context *)ctx;
> > >   struct r600_texture *rsrc = (struct r600_texture*)src;
> > > @@ -3343,22 +3343,22 @@ static boolean evergreen_dma_blit(struct 
> > > pipe_context *ctx,
> > >   unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height;
> > >   unsigned src_w, dst_w;
> > >   unsigned src_x, src_y;
> > > + unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
> > >  
> > >   if (rctx->b.rings.dma.cs == NULL) {
> > > -         return FALSE;
> > > +         goto fallback;
> > >   }
> > >  
> > >   if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
> > >           evergreen_dma_copy(rctx, dst, src, dst_x, src_box->x, 
> > > src_box->width);
> > > -         return TRUE;
> > > +         return;
> > >   }
> > >  
> > > - if (src->format != dst->format) {
> > > -         return FALSE;
> > > - }
> > > - if (rdst->dirty_level_mask != 0) {
> > > -         return FALSE;
> > > + if (src->format != dst->format || src_box->depth > 1 ||
> > > +     rdst->dirty_level_mask != 0) {
> > > +         goto fallback;
> > >   }
> > > +
> > >   if (rsrc->dirty_level_mask) {
> > >           ctx->flush_resource(ctx, src);
> > >   }
> > > @@ -3383,13 +3383,13 @@ static boolean evergreen_dma_blit(struct 
> > > pipe_context *ctx,
> > >  
> > >   if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
> > >           /* FIXME evergreen can do partial blit */
> > > -         return FALSE;
> > > +         goto fallback;
> > >   }
> > >   /* the x test here are currently useless (because we don't support 
> > > partial blit)
> > >    * but keep them around so we don't forget about those
> > >    */
> > >   if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) || 
> > > (src_box->y & 0x7) || (dst_y & 0x7)) {
> > > -         return FALSE;
> > > +         goto fallback;
> > >   }
> > >  
> > >   /* 128 bpp surfaces require non_disp_tiling for both
> > > @@ -3400,7 +3400,7 @@ static boolean evergreen_dma_blit(struct 
> > > pipe_context *ctx,
> > >   if ((rctx->b.chip_class == CAYMAN) &&
> > >       (src_mode != dst_mode) &&
> > >       (util_format_get_blocksize(src->format) >= 16)) {
> > > -         return FALSE;
> > > +         goto fallback;
> > >   }
> > >  
> > >   if (src_mode == dst_mode) {
> > > @@ -3423,7 +3423,11 @@ static boolean evergreen_dma_blit(struct 
> > > pipe_context *ctx,
> > >                                   src, src_level, src_x, src_y, 
> > > src_box->z,
> > >                                   copy_height, dst_pitch, bpp);
> > >   }
> > > - return TRUE;
> > > + return;
> > > +
> > > +fallback:
> > > + ctx->resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
> > > +                           src, src_level, src_box);
> > >  }
> > >  
> > >  void evergreen_init_state_functions(struct r600_context *rctx)
> > > diff --git a/src/gallium/drivers/r600/r600_state.c 
> > > b/src/gallium/drivers/r600/r600_state.c
> > > index 6d89e6c..2a4813b 100644
> > > --- a/src/gallium/drivers/r600/r600_state.c
> > > +++ b/src/gallium/drivers/r600/r600_state.c
> > > @@ -2883,13 +2883,13 @@ static boolean r600_dma_copy_tile(struct 
> > > r600_context *rctx,
> > >   return TRUE;
> > >  }
> > >  
> > > -static boolean r600_dma_blit(struct pipe_context *ctx,
> > > -                      struct pipe_resource *dst,
> > > -                      unsigned dst_level,
> > > -                      unsigned dst_x, unsigned dst_y, unsigned dst_z,
> > > -                      struct pipe_resource *src,
> > > -                      unsigned src_level,
> > > -                      const struct pipe_box *src_box)
> > > +static void r600_dma_blit(struct pipe_context *ctx,
> > > +                   struct pipe_resource *dst,
> > > +                   unsigned dst_level,
> > > +                   unsigned dstx, unsigned dsty, unsigned dstz,
> > > +                   struct pipe_resource *src,
> > > +                   unsigned src_level,
> > > +                   const struct pipe_box *src_box)
> > >  {
> > >   struct r600_context *rctx = (struct r600_context *)ctx;
> > >   struct r600_texture *rsrc = (struct r600_texture*)src;
> > > @@ -2897,18 +2897,22 @@ static boolean r600_dma_blit(struct pipe_context 
> > > *ctx,
> > >   unsigned dst_pitch, src_pitch, bpp, dst_mode, src_mode, copy_height;
> > >   unsigned src_w, dst_w;
> > >   unsigned src_x, src_y;
> > > + unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
> > >  
> > >   if (rctx->b.rings.dma.cs == NULL) {
> > > -         return FALSE;
> > > +         goto fallback;
> > >   }
> > >  
> > >   if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
> > > +         if (dst_x % 4 || src_box->x % 4 || src_box->width % 4)
> > > +                 goto fallback;
> > > +
> > >           r600_dma_copy(rctx, dst, src, dst_x, src_box->x, 
> > > src_box->width);
> > > -         return TRUE;
> > > +         return;
> > >   }
> > >  
> > > - if (src->format != dst->format) {
> > > -         return FALSE;
> > > + if (src->format != dst->format || src_box->depth > 1) {
> > > +         goto fallback;
> > >   }
> > >  
> > >   src_x = util_format_get_nblocksx(src->format, src_box->x);
> > > @@ -2931,11 +2935,11 @@ static boolean r600_dma_blit(struct pipe_context 
> > > *ctx,
> > >  
> > >   if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
> > >           /* strick requirement on r6xx/r7xx */
> > > -         return FALSE;
> > > +         goto fallback;
> > >   }
> > >   /* lot of constraint on alignment this should capture them all */
> > >   if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) {
> > > -         return FALSE;
> > > +         goto fallback;
> > >   }
> > >  
> > >   if (src_mode == dst_mode) {
> > > @@ -2955,15 +2959,21 @@ static boolean r600_dma_blit(struct pipe_context 
> > > *ctx,
> > >           size = src_box->height * src_pitch;
> > >           /* must be dw aligned */
> > >           if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) {
> > > -                 return FALSE;
> > > +                 goto fallback;
> > >           }
> > >           r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size);
> > >   } else {
> > > -         return r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, 
> > > dst_z,
> > > +         if (!r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, 
> > > dst_z,
> > >                                   src, src_level, src_x, src_y, 
> > > src_box->z,
> > > -                                 copy_height, dst_pitch, bpp);
> > > +                                 copy_height, dst_pitch, bpp)) {
> > > +                 goto fallback;
> > > +         }
> > >   }
> > > - return TRUE;
> > > + return;
> > > +
> > > +fallback:
> > > + ctx->resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
> > > +                           src, src_level, src_box);
> > >  }
> > >  
> > >  void r600_init_state_functions(struct r600_context *rctx)
> > > diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
> > > b/src/gallium/drivers/radeon/r600_buffer_common.c
> > > index 90ca8cb..a7ecfb3 100644
> > > --- a/src/gallium/drivers/radeon/r600_buffer_common.c
> > > +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
> > > @@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct 
> > > pipe_context *ctx,
> > >   return data;
> > >  }
> > >  
> > > +static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
> > > +                              unsigned dstx, unsigned srcx, unsigned 
> > > size)
> > > +{
> > > + bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
> > > +
> > > + return rctx->screen->has_cp_dma ||
> > > +        (dword_aligned && (rctx->rings.dma.cs ||
> > > +                           rctx->screen->has_streamout));
> > > +
> > > +}
> > > +
> > >  static void *r600_buffer_transfer_map(struct pipe_context *ctx,
> > >                                        struct pipe_resource *resource,
> > >                                        unsigned level,
> > > @@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct 
> > > pipe_context *ctx,
> > >   else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
> > >            !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
> > >            !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
> > > -          (rscreen->has_cp_dma ||
> > > -           (rscreen->has_streamout &&
> > > -            /* The buffer range must be aligned to 4 with streamout. */
> > > -            box->x % 4 == 0 && box->width % 4 == 0))) {
> > > +          r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
> > >           assert(usage & PIPE_TRANSFER_WRITE);
> > >  
> > >           /* Check if mapping this buffer would cause waiting for the 
> > > GPU. */
> > > @@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct 
> > > pipe_context *ctx,
> > >           /* At this point, the buffer is always idle (we checked it 
> > > above). */
> > >           usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
> > >   }
> > > - /* Using DMA for larger reads is much faster */
> > > + /* Using a staging buffer in GTT for larger reads is much faster. */
> > >   else if ((usage & PIPE_TRANSFER_READ) &&
> > >            !(usage & PIPE_TRANSFER_WRITE) &&
> > > -          (rbuffer->domains == RADEON_DOMAIN_VRAM)) {
> > > +          rbuffer->domains == RADEON_DOMAIN_VRAM &&
> > > +          r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
> > >           unsigned offset;
> > >           struct r600_resource *staging = NULL;
> > >  
> > > @@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct 
> > > pipe_context *ctx,
> > >           if (staging) {
> > >                   data += box->x % R600_MAP_BUFFER_ALIGNMENT;
> > >  
> > > -                 /* Copy the staging buffer into the original one. */
> > > -                 if (rctx->dma_copy(ctx, (struct pipe_resource*)staging, 
> > > 0,
> > > -                                          box->x % 
> > > R600_MAP_BUFFER_ALIGNMENT,
> > > -                                          0, 0, resource, level, box)) {
> > > -                         rctx->rings.gfx.flush(rctx, 0);
> > > -                         if (rctx->rings.dma.cs)
> > > -                                 rctx->rings.dma.flush(rctx, 0);
> > > -
> > > -                         /* Wait for any offloaded CS flush to complete
> > > -                          * to avoid busy-waiting in the winsys. */
> > > -                         rctx->ws->cs_sync_flush(rctx->rings.gfx.cs);
> > > -                         if (rctx->rings.dma.cs)
> > > -                                 
> > > rctx->ws->cs_sync_flush(rctx->rings.dma.cs);
> > > -
> > > -                         rctx->ws->buffer_wait(staging->buf, 
> > > RADEON_USAGE_WRITE);
> > > -                         return r600_buffer_get_transfer(ctx, resource, 
> > > level, usage, box,
> > > -                                                         ptransfer, 
> > > data, staging, offset);
> > > -                 } else {
> > > -                         pipe_resource_reference((struct 
> > > pipe_resource**)&staging, NULL);
> > > -                 }
> > > +                 /* Copy the VRAM buffer to the staging buffer. */
> > > +                 rctx->dma_copy(ctx, &staging->b.b, 0,
> > > +                                box->x % R600_MAP_BUFFER_ALIGNMENT,
> > > +                                0, 0, resource, level, box);
> > > +
> > > +                 /* Just do the synchronization. The buffer is mapped 
> > > already. */
> > > +                 r600_buffer_map_sync_with_rings(rctx, staging, 
> > > PIPE_TRANSFER_READ);
> > > +
> > > +                 return r600_buffer_get_transfer(ctx, resource, level, 
> > > usage, box,
> > > +                                                 ptransfer, data, 
> > > staging, offset);
> > >           }
> > >   }
> > >  
> > > @@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct 
> > > pipe_context *ctx,
> > >                   u_box_1d(soffset, size, &box);
> > >  
> > >                   /* Copy the staging buffer into the original one. */
> > > -                 if (!(size % 4) && !(doffset % 4) && !(soffset % 4) &&
> > > -                     rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, 
> > > &box)) {
> > > -                         /* DONE. */
> > > -                 } else {
> > > -                         ctx->resource_copy_region(ctx, dst, 0, doffset, 
> > > 0, 0, src, 0, &box);
> > > -                 }
> > > +                 rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, 
> > > &box);
> > >           }
> > >           pipe_resource_reference((struct 
> > > pipe_resource**)&rtransfer->staging, NULL);
> > >   }
> > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
> > > b/src/gallium/drivers/radeon/r600_pipe_common.h
> > > index 493a8fc..5e544c5 100644
> > > --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> > > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> > > @@ -344,15 +344,14 @@ struct r600_common_context {
> > >   boolean                         current_render_cond_cond;
> > >   boolean                         predicate_drawing;
> > >  
> > > - /* Copy one resource to another using async DMA.
> > > -  * False is returned if the copy couldn't be done. */
> > > - boolean (*dma_copy)(struct pipe_context *ctx,
> > > -                     struct pipe_resource *dst,
> > > -                     unsigned dst_level,
> > > -                     unsigned dst_x, unsigned dst_y, unsigned dst_z,
> > > -                     struct pipe_resource *src,
> > > -                     unsigned src_level,
> > > -                     const struct pipe_box *src_box);
> > > + /* Copy one resource to another using async DMA. */
> > > + void (*dma_copy)(struct pipe_context *ctx,
> > > +                  struct pipe_resource *dst,
> > > +                  unsigned dst_level,
> > > +                  unsigned dst_x, unsigned dst_y, unsigned dst_z,
> > > +                  struct pipe_resource *src,
> > > +                  unsigned src_level,
> > > +                  const struct pipe_box *src_box);
> > >  
> > >   void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource 
> > > *dst,
> > >                        unsigned offset, unsigned size, unsigned value);
> > > diff --git a/src/gallium/drivers/radeon/r600_texture.c 
> > > b/src/gallium/drivers/radeon/r600_texture.c
> > > index e3b918e..5c32a84 100644
> > > --- a/src/gallium/drivers/radeon/r600_texture.c
> > > +++ b/src/gallium/drivers/radeon/r600_texture.c
> > > @@ -80,12 +80,8 @@ static void r600_copy_to_staging_texture(struct 
> > > pipe_context *ctx, struct r600_t
> > >           return;
> > >   }
> > >  
> > > - if (!rctx->dma_copy(ctx, dst, 0, 0, 0, 0,
> > > -                       src, transfer->level,
> > > -                       &transfer->box)) {
> > > -         ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0,
> > > -                                   src, transfer->level, &transfer->box);
> > > - }
> > > + rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
> > > +                &transfer->box);
> > >  }
> > >  
> > >  /* Copy from a transfer's staging texture to a full GPU one. */
> > > @@ -106,13 +102,9 @@ static void r600_copy_from_staging_texture(struct 
> > > pipe_context *ctx, struct r600
> > >           return;
> > >   }
> > >  
> > > - if (!rctx->dma_copy(ctx, dst, transfer->level,
> > > -                       transfer->box.x, transfer->box.y, transfer->box.z,
> > > -                       src, 0, &sbox)) {
> > > -         ctx->resource_copy_region(ctx, dst, transfer->level,
> > > -                                   transfer->box.x, transfer->box.y, 
> > > transfer->box.z,
> > > -                                   src, 0, &sbox);
> > > - }
> > > + rctx->dma_copy(ctx, dst, transfer->level,
> > > +                transfer->box.x, transfer->box.y, transfer->box.z,
> > > +                src, 0, &sbox);
> > >  }
> > >  
> > >  static unsigned r600_texture_get_offset(struct r600_texture *rtex, 
> > > unsigned level,
> > > diff --git a/src/gallium/drivers/radeonsi/si_state.c 
> > > b/src/gallium/drivers/radeonsi/si_state.c
> > > index f3d7f21..3015611 100644
> > > --- a/src/gallium/drivers/radeonsi/si_state.c
> > > +++ b/src/gallium/drivers/radeonsi/si_state.c
> > > @@ -2924,16 +2924,19 @@ static void *si_create_blend_custom(struct 
> > > si_context *sctx, unsigned mode)
> > >   return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
> > >  }
> > >  
> > > -static boolean si_dma_copy(struct pipe_context *ctx,
> > > -                    struct pipe_resource *dst,
> > > -                    unsigned dst_level,
> > > -                    unsigned dst_x, unsigned dst_y, unsigned dst_z,
> > > -                    struct pipe_resource *src,
> > > -                    unsigned src_level,
> > > -                    const struct pipe_box *src_box)
> > > +static void si_dma_copy(struct pipe_context *ctx,
> > > +                 struct pipe_resource *dst,
> > > +                 unsigned dst_level,
> > > +                 unsigned dst_x, unsigned dst_y, unsigned dst_z,
> > > +                 struct pipe_resource *src,
> > > +                 unsigned src_level,
> > > +                 const struct pipe_box *src_box)
> > >  {
> > >   /* XXX implement this or share evergreen_dma_blit with r600g */
> > > - return FALSE;
> > > +
> > > + /* Fallback: */
> > > + ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
> > > +                           src, src_level, src_box);
> > >  }
> > >  
> > >  static void si_set_occlusion_query_state(struct pipe_context *ctx, bool 
> > > enable)
> > 
> > This patch together with 2/3 also fixes ~900 opencl piglit regressions
> > introduced by f112ba03bbd6072df9f8879bb231f7f0abb14d2e radeon: Use
> > upload manager for buffer downloads
> 
> well, almost all. two piglits still regress even with these patches:
> r600 create release buffer bug  (pass->crash)
> gegl-rgb-gamma-u8-to-ragabaf  (pass->fail)
> 

Does this patch fix these regressions:
http://lists.freedesktop.org/archives/mesa-dev/2014-March/055221.html

-Tom

> > 
> > you can add my tested by
> > 
> > regards,
> 
> -- 
> Jan Vesely <jan.ves...@rutgers.edu>



> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to