From: Jerome Glisse <jgli...@redhat.com>

This simplify and cleanup the async dma checking.

Signed-off-by: Jerome Glisse <jglisse at redhat.com>
---
 drivers/gpu/drm/radeon/evergreen.c    |  16 +-
 drivers/gpu/drm/radeon/evergreen_cs.c | 807 +++++++++++++++++-----------------
 drivers/gpu/drm/radeon/evergreend.h   |  29 +-
 3 files changed, 417 insertions(+), 435 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index f92f6bb..28f8d4f 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3223,14 +3223,14 @@ void evergreen_dma_fence_ring_emit(struct radeon_device 
*rdev,
        struct radeon_ring *ring = &rdev->ring[fence->ring];
        u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
        /* write the fence */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0));
        radeon_ring_write(ring, addr & 0xfffffffc);
        radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
        radeon_ring_write(ring, fence->seq);
        /* generate an interrupt */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0));
        /* flush HDP */
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0));
        radeon_ring_write(ring, (0xf << 16) | HDP_MEM_COHERENCY_FLUSH_CNTL);
        radeon_ring_write(ring, 1);
 }
@@ -3253,7 +3253,7 @@ void evergreen_dma_ring_ib_execute(struct radeon_device 
*rdev,
                while ((next_rptr & 7) != 5)
                        next_rptr++;
                next_rptr += 3;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1));
                radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
                radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) 
& 0xff);
                radeon_ring_write(ring, next_rptr);
@@ -3263,8 +3263,8 @@ void evergreen_dma_ring_ib_execute(struct radeon_device 
*rdev,
         * Pad as necessary with NOPs.
         */
        while ((ring->wptr & 7) != 5)
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 
0));
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
+       radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0));
        radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
        radeon_ring_write(ring, (ib->length_dw << 12) | 
(upper_32_bits(ib->gpu_addr) & 0xFF));

@@ -3323,7 +3323,7 @@ int evergreen_copy_dma(struct radeon_device *rdev,
                if (cur_size_in_dw > 0xFFFFF)
                        cur_size_in_dw = 0xFFFFF;
                size_in_dw -= cur_size_in_dw;
-               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, 
cur_size_in_dw));
+               radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 
cur_size_in_dw));
                radeon_ring_write(ring, dst_offset & 0xfffffffc);
                radeon_ring_write(ring, src_offset & 0xfffffffc);
                radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
@@ -3431,7 +3431,7 @@ static int evergreen_startup(struct radeon_device *rdev)
        ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
        r = radeon_ring_init(rdev, ring, ring->ring_size, 
R600_WB_DMA_RPTR_OFFSET,
                             DMA_RB_RPTR, DMA_RB_WPTR,
-                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+                            2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
        if (r)
                return r;

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c 
b/drivers/gpu/drm/radeon/evergreen_cs.c
index 7a44566..32c07bb 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2858,16 +2858,6 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
        return 0;
 }

-/*
- *  DMA
- */
-
-#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
-#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
-#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
-#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
-#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
-
 /**
  * evergreen_dma_cs_parse() - parse the DMA IB
  * @p:         parser structure holding parsing context.
@@ -2881,9 +2871,9 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 {
        struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
        struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
-       u32 header, cmd, count, tiled, new_cmd, misc;
+       u32 header, cmd, count, sub_cmd;
        volatile u32 *ib = p->ib.ptr;
-       u32 idx, idx_value;
+       u32 idx;
        u64 src_offset, dst_offset, dst2_offset;
        int r;

@@ -2897,9 +2887,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
                header = radeon_get_ib_value(p, idx);
                cmd = GET_DMA_CMD(header);
                count = GET_DMA_COUNT(header);
-               tiled = GET_DMA_T(header);
-               new_cmd = GET_DMA_NEW(header);
-               misc = GET_DMA_MISC(header);
+               sub_cmd = GET_DMA_SUB_CMD(header);

                switch (cmd) {
                case DMA_PACKET_WRITE:
@@ -2908,19 +2896,27 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
                                DRM_ERROR("bad DMA_PACKET_WRITE\n");
                                return -EINVAL;
                        }
-                       if (tiled) {
+                       switch (sub_cmd) {
+                       /* tiled */
+                       case 8:
                                dst_offset = ib[idx+1];
                                dst_offset <<= 8;

                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset 
>> 8);
                                p->idx += count + 7;
-                       } else {
+                               break;
+                       /* linear */
+                       case 0:
                                dst_offset = ib[idx+1];
                                dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;

                                ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 
0xfffffffc);
                                ib[idx+2] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
                                p->idx += count + 3;
+                               break;
+                       default:
+                               DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x 
sub cmd is not 0 or 8\n", idx, ib[idx+0]);
+                               return -EINVAL;
                        }
                        if ((dst_offset + (count * 4)) > 
radeon_bo_size(dst_reloc->robj)) {
                                dev_warn(p->dev, "DMA write buffer too small 
(%llu %lu)\n",
@@ -2939,338 +2935,330 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
                                DRM_ERROR("bad DMA_PACKET_COPY\n");
                                return -EINVAL;
                        }
-                       if (tiled) {
-                               idx_value = radeon_get_ib_value(p, idx + 2);
-                               if (new_cmd) {
-                                       switch (misc) {
-                                       case 0:
-                                               /* L2T, frame to fields */
-                                               if (idx_value & (1 << 31)) {
-                                                       DRM_ERROR("bad L2T, 
frame to fields DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               r = r600_dma_cs_next_reloc(p, 
&dst2_reloc);
-                                               if (r) {
-                                                       DRM_ERROR("bad L2T, 
frame to fields DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               dst_offset = ib[idx+1];
-                                               dst_offset <<= 8;
-                                               dst2_offset = ib[idx+2];
-                                               dst2_offset <<= 8;
-                                               src_offset = ib[idx+8];
-                                               src_offset |= ((u64)(ib[idx+9] 
& 0xff)) << 32;
-                                               if ((src_offset + (count * 4)) 
> radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, frame to fields src buffer too small (%llu %lu)\n",
-                                                                src_offset + 
(count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) 
> radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, frame to fields buffer too small (%llu %lu)\n",
-                                                                dst_offset + 
(count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst2_offset + (count * 4)) 
> radeon_bo_size(dst2_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, frame to fields buffer too small (%llu %lu)\n",
-                                                                dst2_offset + 
(count * 4), radeon_bo_size(dst2_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+2] += 
(u32)(dst2_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+8] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+9] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 10;
-                                               break;
-                                       case 1:
-                                               /* L2T, T2L partial */
-                                               if (p->family < CHIP_CAYMAN) {
-                                                       DRM_ERROR("L2T, T2L 
Partial is cayman only !\n");
-                                                       return -EINVAL;
-                                               }
-                                               /* detile bit */
-                                               if (idx_value & (1 << 31)) {
-                                                       /* tiled src, linear 
dst */
-                                                       ib[idx+1] += 
(u32)(src_reloc->lobj.gpu_offset >> 8);
-
-                                                       ib[idx+7] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               } else {
-                                                       /* linear src, tiled 
dst */
-                                                       ib[idx+7] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-
-                                                       ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               }
-                                               p->idx += 12;
-                                               break;
-                                       case 3:
-                                               /* L2T, broadcast */
-                                               if (idx_value & (1 << 31)) {
-                                                       DRM_ERROR("bad L2T, 
broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               r = r600_dma_cs_next_reloc(p, 
&dst2_reloc);
-                                               if (r) {
-                                                       DRM_ERROR("bad L2T, 
broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               dst_offset = ib[idx+1];
-                                               dst_offset <<= 8;
-                                               dst2_offset = ib[idx+2];
-                                               dst2_offset <<= 8;
-                                               src_offset = ib[idx+8];
-                                               src_offset |= ((u64)(ib[idx+9] 
& 0xff)) << 32;
-                                               if ((src_offset + (count * 4)) 
> radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, broadcast src buffer too small (%llu %lu)\n",
-                                                                src_offset + 
(count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) 
> radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, broadcast dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + 
(count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst2_offset + (count * 4)) 
> radeon_bo_size(dst2_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, broadcast dst2 buffer too small (%llu %lu)\n",
-                                                                dst2_offset + 
(count * 4), radeon_bo_size(dst2_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+2] += 
(u32)(dst2_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+8] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+9] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 10;
-                                               break;
-                                       case 4:
-                                               /* L2T, T2L */
-                                               /* detile bit */
-                                               if (idx_value & (1 << 31)) {
-                                                       /* tiled src, linear 
dst */
-                                                       src_offset = ib[idx+1];
-                                                       src_offset <<= 8;
-                                                       ib[idx+1] += 
(u32)(src_reloc->lobj.gpu_offset >> 8);
-
-                                                       dst_offset = ib[idx+7];
-                                                       dst_offset |= 
((u64)(ib[idx+8] & 0xff)) << 32;
-                                                       ib[idx+7] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               } else {
-                                                       /* linear src, tiled 
dst */
-                                                       src_offset = ib[idx+7];
-                                                       src_offset |= 
((u64)(ib[idx+8] & 0xff)) << 32;
-                                                       ib[idx+7] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-
-                                                       dst_offset = ib[idx+1];
-                                                       dst_offset <<= 8;
-                                                       ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               }
-                                               if ((src_offset + (count * 4)) 
> radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, T2L src buffer too small (%llu %lu)\n",
-                                                                src_offset + 
(count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) 
> radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, T2L dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + 
(count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               p->idx += 9;
-                                               break;
-                                       case 5:
-                                               /* T2T partial */
-                                               if (p->family < CHIP_CAYMAN) {
-                                                       DRM_ERROR("L2T, T2L 
Partial is cayman only !\n");
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += 
(u32)(src_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+4] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               p->idx += 13;
-                                               break;
-                                       case 7:
-                                               /* L2T, broadcast */
-                                               if (idx_value & (1 << 31)) {
-                                                       DRM_ERROR("bad L2T, 
broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               r = r600_dma_cs_next_reloc(p, 
&dst2_reloc);
-                                               if (r) {
-                                                       DRM_ERROR("bad L2T, 
broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               dst_offset = ib[idx+1];
-                                               dst_offset <<= 8;
-                                               dst2_offset = ib[idx+2];
-                                               dst2_offset <<= 8;
-                                               src_offset = ib[idx+8];
-                                               src_offset |= ((u64)(ib[idx+9] 
& 0xff)) << 32;
-                                               if ((src_offset + (count * 4)) 
> radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, broadcast src buffer too small (%llu %lu)\n",
-                                                                src_offset + 
(count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) 
> radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, broadcast dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + 
(count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst2_offset + (count * 4)) 
> radeon_bo_size(dst2_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, broadcast dst2 buffer too small (%llu %lu)\n",
-                                                                dst2_offset + 
(count * 4), radeon_bo_size(dst2_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+2] += 
(u32)(dst2_reloc->lobj.gpu_offset >> 8);
-                                               ib[idx+8] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+9] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 10;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY 
misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
+                       switch (sub_cmd) {
+                       /* Copy L2L, DW aligned */
+                       case 0x00:
+                               /* L2L, dw */
+                               src_offset = ib[idx+2];
+                               src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+                               dst_offset = ib[idx+1];
+                               dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > 
radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw src 
buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 
4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > 
radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw dst 
buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 
4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 
0xfffffffc);
+                               ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 
0xfffffffc);
+                               ib[idx+3] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+4] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 5;
+                               break;
+                       /* Copy L2T/T2L */
+                       case 0x08:
+                               /* detile bit */
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       /* tiled src, linear dst */
+                                       src_offset = ib[idx+1];
+                                       src_offset <<= 8;
+                                       ib[idx+1] += 
(u32)(src_reloc->lobj.gpu_offset >> 8);
+
+                                       dst_offset = radeon_get_ib_value(p, idx 
+ 7);
+                                       dst_offset |= ((u64)(ib[idx+8] & 0xff)) 
<< 32;
+                                       ib[idx+7] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
                                } else {
-                                       switch (misc) {
-                                       case 0:
-                                               /* detile bit */
-                                               if (idx_value & (1 << 31)) {
-                                                       /* tiled src, linear 
dst */
-                                                       src_offset = ib[idx+1];
-                                                       src_offset <<= 8;
-                                                       ib[idx+1] += 
(u32)(src_reloc->lobj.gpu_offset >> 8);
-
-                                                       dst_offset = ib[idx+7];
-                                                       dst_offset |= 
((u64)(ib[idx+8] & 0xff)) << 32;
-                                                       ib[idx+7] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               } else {
-                                                       /* linear src, tiled 
dst */
-                                                       src_offset = ib[idx+7];
-                                                       src_offset |= 
((u64)(ib[idx+8] & 0xff)) << 32;
-                                                       ib[idx+7] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                                       ib[idx+8] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-
-                                                       dst_offset = ib[idx+1];
-                                                       dst_offset <<= 8;
-                                                       ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
-                                               }
-                                               if ((src_offset + (count * 4)) 
> radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, broadcast src buffer too small (%llu %lu)\n",
-                                                                src_offset + 
(count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) 
> radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2T, broadcast dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + 
(count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               p->idx += 9;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY 
misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
+                                       /* linear src, tiled dst */
+                                       src_offset = ib[idx+7];
+                                       src_offset |= ((u64)(ib[idx+8] & 0xff)) 
<< 32;
+                                       ib[idx+7] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+                                       dst_offset = ib[idx+1];
+                                       dst_offset <<= 8;
+                                       ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
                                }
-                       } else {
-                               if (new_cmd) {
-                                       switch (misc) {
-                                       case 0:
-                                               /* L2L, byte */
-                                               src_offset = ib[idx+2];
-                                               src_offset |= ((u64)(ib[idx+4] 
& 0xff)) << 32;
-                                               dst_offset = ib[idx+1];
-                                               dst_offset |= ((u64)(ib[idx+3] 
& 0xff)) << 32;
-                                               if ((src_offset + count) > 
radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2L, byte src buffer too small (%llu %lu)\n",
-                                                                src_offset + 
count, radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + count) > 
radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2L, byte dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + 
count, radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
-                                               ib[idx+2] += 
(u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
-                                               ib[idx+3] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               ib[idx+4] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 5;
-                                               break;
-                                       case 1:
-                                               /* L2L, partial */
-                                               if (p->family < CHIP_CAYMAN) {
-                                                       DRM_ERROR("L2L Partial 
is cayman only !\n");
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += 
(u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
-                                               ib[idx+2] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               ib[idx+4] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
-                                               ib[idx+5] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-
-                                               p->idx += 9;
-                                               break;
-                                       case 4:
-                                               /* L2L, dw, broadcast */
-                                               r = r600_dma_cs_next_reloc(p, 
&dst2_reloc);
-                                               if (r) {
-                                                       DRM_ERROR("bad L2L, dw, 
broadcast DMA_PACKET_COPY\n");
-                                                       return -EINVAL;
-                                               }
-                                               dst_offset = ib[idx+1];
-                                               dst_offset |= ((u64)(ib[idx+4] 
& 0xff)) << 32;
-                                               dst2_offset = ib[idx+2];
-                                               dst2_offset |= ((u64)(ib[idx+5] 
& 0xff)) << 32;
-                                               src_offset = ib[idx+3];
-                                               src_offset |= ((u64)(ib[idx+6] 
& 0xff)) << 32;
-                                               if ((src_offset + (count * 4)) 
> radeon_bo_size(src_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2L, dw, broadcast src buffer too small (%llu %lu)\n",
-                                                                src_offset + 
(count * 4), radeon_bo_size(src_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst_offset + (count * 4)) 
> radeon_bo_size(dst_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
-                                                                dst_offset + 
(count * 4), radeon_bo_size(dst_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               if ((dst2_offset + (count * 4)) 
> radeon_bo_size(dst2_reloc->robj)) {
-                                                       dev_warn(p->dev, "DMA 
L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
-                                                                dst2_offset + 
(count * 4), radeon_bo_size(dst2_reloc->robj));
-                                                       return -EINVAL;
-                                               }
-                                               ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+2] += 
(u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+3] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                               ib[idx+4] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                               ib[idx+5] += 
upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
-                                               ib[idx+6] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                               p->idx += 7;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY 
misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
+                               if ((src_offset + (count * 4)) > 
radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, src buffer 
too small (%llu %lu)\n",
+                                                       src_offset + (count * 
4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > 
radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, dst buffer 
too small (%llu %lu)\n",
+                                                       dst_offset + (count * 
4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               p->idx += 9;
+                               break;
+                       /* Copy L2L, byte aligned */
+                       case 0x40:
+                               /* L2L, byte */
+                               src_offset = ib[idx+2];
+                               src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+                               dst_offset = ib[idx+1];
+                               dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+                               if ((src_offset + count) > 
radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, byte src 
buffer too small (%llu %lu)\n",
+                                                       src_offset + count, 
radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + count) > 
radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, byte dst 
buffer too small (%llu %lu)\n",
+                                                       dst_offset + count, 
radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 
0xffffffff);
+                               ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 
0xffffffff);
+                               ib[idx+3] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+4] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 5;
+                               break;
+                       /* Copy L2L, partial */
+                       case 0x41:
+                               /* L2L, partial */
+                               if (p->family < CHIP_CAYMAN) {
+                                       DRM_ERROR("L2L Partial is cayman only 
!\n");
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 
0xffffffff);
+                               ib[idx+2] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 
0xffffffff);
+                               ib[idx+5] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+
+                               p->idx += 9;
+                               break;
+                       /* Copy L2L, DW aligned, broadcast */
+                       case 0x44:
+                               /* L2L, dw, broadcast */
+                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+                               if (r) {
+                                       DRM_ERROR("bad L2L, dw, broadcast 
DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               dst_offset = ib[idx+1];
+                               dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+                               dst2_offset = ib[idx+2];
+                               dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
+                               src_offset = ib[idx+3];
+                               src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > 
radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw, 
broadcast src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 
4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > 
radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw, 
broadcast dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 
4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst2_offset + (count * 4)) > 
radeon_bo_size(dst2_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2L, dw, 
broadcast dst2 buffer too small (%llu %lu)\n",
+                                                       dst2_offset + (count * 
4), radeon_bo_size(dst2_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 
0xfffffffc);
+                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset 
& 0xfffffffc);
+                               ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 
0xfffffffc);
+                               ib[idx+4] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+5] += 
upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
+                               ib[idx+6] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 7;
+                               break;
+                       /* Copy L2T Frame to Field */
+                       case 0x48:
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       DRM_ERROR("bad L2T, frame to fields 
DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+                               if (r) {
+                                       DRM_ERROR("bad L2T, frame to fields 
DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               dst_offset = ib[idx+1];
+                               dst_offset <<= 8;
+                               dst2_offset = ib[idx+2];
+                               dst2_offset <<= 8;
+                               src_offset = ib[idx+8];
+                               src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > 
radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, frame to 
fields src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 
4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > 
radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, frame to 
fields buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 
4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst2_offset + (count * 4)) > 
radeon_bo_size(dst2_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, frame to 
fields buffer too small (%llu %lu)\n",
+                                                       dst2_offset + (count * 
4), radeon_bo_size(dst2_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset 
>> 8);
+                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset 
>> 8);
+                               ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 
0xfffffffc);
+                               ib[idx+9] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 10;
+                               break;
+                       /* Copy L2T/T2L, partial */
+                       case 0x49:
+                               /* L2T, T2L partial */
+                               if (p->family < CHIP_CAYMAN) {
+                                       DRM_ERROR("L2T, T2L Partial is cayman 
only !\n");
+                                       return -EINVAL;
+                               }
+                               /* detile bit */
+                               if (ib[idx + 2 ] & (1 << 31)) {
+                                       /* tiled src, linear dst */
+                                       ib[idx+1] += 
(u32)(src_reloc->lobj.gpu_offset >> 8);
+
+                                       ib[idx+7] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+                               } else {
+                                       /* linear src, tiled dst */
+                                       ib[idx+7] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+                                       ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
+                               }
+                               p->idx += 12;
+                               break;
+                       /* Copy L2T broadcast */
+                       case 0x4b:
+                               /* L2T, broadcast */
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       DRM_ERROR("bad L2T, broadcast 
DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+                               if (r) {
+                                       DRM_ERROR("bad L2T, broadcast 
DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               dst_offset = ib[idx+1];
+                               dst_offset <<= 8;
+                               dst2_offset = ib[idx+2];
+                               dst2_offset <<= 8;
+                               src_offset = ib[idx+8];
+                               src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > 
radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast 
src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 
4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > 
radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast 
dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 
4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst2_offset + (count * 4)) > 
radeon_bo_size(dst2_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast 
dst2 buffer too small (%llu %lu)\n",
+                                                       dst2_offset + (count * 
4), radeon_bo_size(dst2_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset 
>> 8);
+                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset 
>> 8);
+                               ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 
0xfffffffc);
+                               ib[idx+9] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 10;
+                               break;
+                       /* Copy L2T/T2L (tile units) */
+                       case 0x4c:
+                               /* L2T, T2L */
+                               /* detile bit */
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       /* tiled src, linear dst */
+                                       src_offset = ib[idx+1];
+                                       src_offset <<= 8;
+                                       ib[idx+1] += 
(u32)(src_reloc->lobj.gpu_offset >> 8);
+
+                                       dst_offset = ib[idx+7];
+                                       dst_offset |= ((u64)(ib[idx+8] & 0xff)) 
<< 32;
+                                       ib[idx+7] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
                                } else {
-                                       /* L2L, dw */
-                                       src_offset = ib[idx+2];
-                                       src_offset |= ((u64)(ib[idx+4] & 0xff)) 
<< 32;
+                                       /* linear src, tiled dst */
+                                       src_offset = ib[idx+7];
+                                       src_offset |= ((u64)(ib[idx+8] & 0xff)) 
<< 32;
+                                       ib[idx+7] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+                                       ib[idx+8] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
                                        dst_offset = ib[idx+1];
-                                       dst_offset |= ((u64)(ib[idx+3] & 0xff)) 
<< 32;
-                                       if ((src_offset + (count * 4)) > 
radeon_bo_size(src_reloc->robj)) {
-                                               dev_warn(p->dev, "DMA L2L, dw 
src buffer too small (%llu %lu)\n",
-                                                        src_offset + (count * 
4), radeon_bo_size(src_reloc->robj));
-                                               return -EINVAL;
-                                       }
-                                       if ((dst_offset + (count * 4)) > 
radeon_bo_size(dst_reloc->robj)) {
-                                               dev_warn(p->dev, "DMA L2L, dw 
dst buffer too small (%llu %lu)\n",
-                                                        dst_offset + (count * 
4), radeon_bo_size(dst_reloc->robj));
-                                               return -EINVAL;
-                                       }
-                                       ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-                                       ib[idx+2] += 
(u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-                                       ib[idx+3] += 
upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-                                       ib[idx+4] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-                                       p->idx += 5;
+                                       dst_offset <<= 8;
+                                       ib[idx+1] += 
(u32)(dst_reloc->lobj.gpu_offset >> 8);
                                }
+                               if ((src_offset + (count * 4)) > 
radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, T2L src 
buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 
4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > 
radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, T2L dst 
buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 
4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               p->idx += 9;
+                               break;
+                       /* Copy T2T, partial (tile units) */
+                       case 0x4d:
+                               /* T2T partial */
+                               if (p->family < CHIP_CAYMAN) {
+                                       DRM_ERROR("L2T, T2L Partial is cayman 
only !\n");
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset 
>> 8);
+                               ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset 
>> 8);
+                               p->idx += 13;
+                               break;
+                       /* Copy L2T broadcast (tile units) */
+                       case 0x4f:
+                               /* L2T, broadcast */
+                               if (ib[idx + 2] & (1 << 31)) {
+                                       DRM_ERROR("bad L2T, broadcast 
DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+                               if (r) {
+                                       DRM_ERROR("bad L2T, broadcast 
DMA_PACKET_COPY\n");
+                                       return -EINVAL;
+                               }
+                               dst_offset = ib[idx+1];
+                               dst_offset <<= 8;
+                               dst2_offset = ib[idx+2];
+                               dst2_offset <<= 8;
+                               src_offset = ib[idx+8];
+                               src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+                               if ((src_offset + (count * 4)) > 
radeon_bo_size(src_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast 
src buffer too small (%llu %lu)\n",
+                                                       src_offset + (count * 
4), radeon_bo_size(src_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst_offset + (count * 4)) > 
radeon_bo_size(dst_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast 
dst buffer too small (%llu %lu)\n",
+                                                       dst_offset + (count * 
4), radeon_bo_size(dst_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               if ((dst2_offset + (count * 4)) > 
radeon_bo_size(dst2_reloc->robj)) {
+                                       dev_warn(p->dev, "DMA L2T, broadcast 
dst2 buffer too small (%llu %lu)\n",
+                                                       dst2_offset + (count * 
4), radeon_bo_size(dst2_reloc->robj));
+                                       return -EINVAL;
+                               }
+                               ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset 
>> 8);
+                               ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset 
>> 8);
+                               ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 
0xfffffffc);
+                               ib[idx+9] += 
upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+                               p->idx += 10;
+                               break;
+                       default:
+                               DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x 
invalid sub cmd\n", idx, ib[idx+0]);
+                               return -EINVAL;
                        }
                        break;
                case DMA_PACKET_CONSTANT_FILL:
@@ -3623,88 +3611,79 @@ int evergreen_ib_parse(struct radeon_device *rdev, 
struct radeon_ib *ib)
 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
 {
        u32 idx = 0;
-       u32 header, cmd, count, tiled, new_cmd, misc;
+       u32 header, cmd, count, sub_cmd;

        do {
                header = ib->ptr[idx];
                cmd = GET_DMA_CMD(header);
                count = GET_DMA_COUNT(header);
-               tiled = GET_DMA_T(header);
-               new_cmd = GET_DMA_NEW(header);
-               misc = GET_DMA_MISC(header);
+               sub_cmd = GET_DMA_SUB_CMD(header);

                switch (cmd) {
                case DMA_PACKET_WRITE:
-                       if (tiled)
+                       switch (sub_cmd) {
+                       /* tiled */
+                       case 8:
                                idx += count + 7;
-                       else
+                               break;
+                       /* linear */
+                       case 0:
                                idx += count + 3;
+                               break;
+                       default:
+                               DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x 
sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
+                               return -EINVAL;
+                       }
                        break;
                case DMA_PACKET_COPY:
-                       if (tiled) {
-                               if (new_cmd) {
-                                       switch (misc) {
-                                       case 0:
-                                               /* L2T, frame to fields */
-                                               idx += 10;
-                                               break;
-                                       case 1:
-                                               /* L2T, T2L partial */
-                                               idx += 12;
-                                               break;
-                                       case 3:
-                                               /* L2T, broadcast */
-                                               idx += 10;
-                                               break;
-                                       case 4:
-                                               /* L2T, T2L */
-                                               idx += 9;
-                                               break;
-                                       case 5:
-                                               /* T2T partial */
-                                               idx += 13;
-                                               break;
-                                       case 7:
-                                               /* L2T, broadcast */
-                                               idx += 10;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY 
misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
-                               } else {
-                                       switch (misc) {
-                                       case 0:
-                                               idx += 9;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY 
misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
-                               }
-                       } else {
-                               if (new_cmd) {
-                                       switch (misc) {
-                                       case 0:
-                                               /* L2L, byte */
-                                               idx += 5;
-                                               break;
-                                       case 1:
-                                               /* L2L, partial */
-                                               idx += 9;
-                                               break;
-                                       case 4:
-                                               /* L2L, dw, broadcast */
-                                               idx += 7;
-                                               break;
-                                       default:
-                                               DRM_ERROR("bad DMA_PACKET_COPY 
misc %u\n", misc);
-                                               return -EINVAL;
-                                       }
-                               } else {
-                                       /* L2L, dw */
-                                       idx += 5;
-                               }
+                       switch (sub_cmd) {
+                       /* Copy L2L, DW aligned */
+                       case 0x00:
+                               idx += 5;
+                               break;
+                       /* Copy L2T/T2L */
+                       case 0x08:
+                               idx += 9;
+                               break;
+                       /* Copy L2L, byte aligned */
+                       case 0x40:
+                               idx += 5;
+                               break;
+                       /* Copy L2L, partial */
+                       case 0x41:
+                               idx += 9;
+                               break;
+                       /* Copy L2L, DW aligned, broadcast */
+                       case 0x44:
+                               idx += 7;
+                               break;
+                       /* Copy L2T Frame to Field */
+                       case 0x48:
+                               idx += 10;
+                               break;
+                       /* Copy L2T/T2L, partial */
+                       case 0x49:
+                               idx += 12;
+                               break;
+                       /* Copy L2T broadcast */
+                       case 0x4b:
+                               idx += 10;
+                               break;
+                       /* Copy L2T/T2L (tile units) */
+                       case 0x4c:
+                               idx += 9;
+                               break;
+                       /* Copy T2T, partial (tile units) */
+                       case 0x4d:
+                               idx += 13;
+                               break;
+                       /* Copy L2T broadcast (tile units) */
+                       case 0x4f:
+                               idx += 10;
+                               break;
+                       default:
+                               DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x 
invalid sub cmd\n", idx, ib->ptr[idx]);
+                               return -EINVAL;
                        }
                        break;
                case DMA_PACKET_CONSTANT_FILL:
diff --git a/drivers/gpu/drm/radeon/evergreend.h 
b/drivers/gpu/drm/radeon/evergreend.h
index 5786a32..abb1571 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -924,20 +924,23 @@
 #define CAYMAN_DMA1_CNTL                                  0xd82c

 /* async DMA packets */
-#define DMA_PACKET(cmd, t, s, n)       ((((cmd) & 0xF) << 28) |        \
-                                        (((t) & 0x1) << 23) |          \
-                                        (((s) & 0x1) << 22) |          \
-                                        (((n) & 0xFFFFF) << 0))
+#define DMA_PACKET(cmd, sub_cmd, n) ((((cmd) & 0xF) << 28) |    \
+                                    (((sub_cmd) & 0xFF) << 20) |\
+                                    (((n) & 0xFFFFF) << 0))
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
+#define GET_DMA_SUB_CMD(h) (((h) & 0x0ff00000) >> 20)
+
 /* async DMA Packet types */
-#define        DMA_PACKET_WRITE                                  0x2
-#define        DMA_PACKET_COPY                                   0x3
-#define        DMA_PACKET_INDIRECT_BUFFER                        0x4
-#define        DMA_PACKET_SEMAPHORE                              0x5
-#define        DMA_PACKET_FENCE                                  0x6
-#define        DMA_PACKET_TRAP                                   0x7
-#define        DMA_PACKET_SRBM_WRITE                             0x9
-#define        DMA_PACKET_CONSTANT_FILL                          0xd
-#define        DMA_PACKET_NOP                                    0xf
+#define        DMA_PACKET_WRITE                        0x2
+#define        DMA_PACKET_COPY                         0x3
+#define        DMA_PACKET_INDIRECT_BUFFER              0x4
+#define        DMA_PACKET_SEMAPHORE                    0x5
+#define        DMA_PACKET_FENCE                        0x6
+#define        DMA_PACKET_TRAP                         0x7
+#define        DMA_PACKET_SRBM_WRITE                   0x9
+#define        DMA_PACKET_CONSTANT_FILL                0xd
+#define        DMA_PACKET_NOP                          0xf

 /* PCIE link stuff */
 #define PCIE_LC_TRAINING_CNTL                             0xa1 /* PCIE_P */
-- 
1.7.11.7

Reply via email to