[PATCH 5/7] drm/nouveau: do not map evicted vram buffers in nouveau_bo_vma_add
From: Maarten Lankhorst Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 9ecb874..bb3734d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1549,7 +1549,8 @@ nouveau_bo_vma_add(struct nouveau_bo *nvbo, struct nouveau_vm *vm, if (nvbo->bo.mem.mem_type == TTM_PL_VRAM) nouveau_vm_map(vma, nvbo->bo.mem.mm_node); - else if (nvbo->bo.mem.mem_type == TTM_PL_TT) { + else if (nvbo->bo.mem.mem_type == TTM_PL_TT && +nvbo->page_shift == vma->vm->vmm->spg_shift) { if (node->sg) nouveau_vm_map_sg_table(vma, 0, size, node); else -- 1.8.4
[PATCH 4/7] drm/nvc0-/gr: shift wrapping bug in nvc0_grctx_generate_r406800
From: Dan Carpenter We care about the upper 32 bits here so we have to use 1ULL instead of 1 to avoid a shift wrapping bug. Signed-off-by: Dan Carpenter Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c index 64dca26..fe67415 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c @@ -1039,7 +1039,7 @@ nvc0_grctx_generate_r406800(struct nvc0_graph_priv *priv) } while (!tpcnr[gpc]); tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--; - tpc_set |= 1 << ((gpc * 8) + tpc); + tpc_set |= 1ULL << ((gpc * 8) + tpc); } nv_wr32(priv, 0x406800 + (i * 0x20), lower_32_bits(tpc_set)); -- 1.8.4
[PATCH 7/7] drm/nouveau: use a single vma for display
From: Maarten Lankhorst No need to map the same vma multiple times. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_fence.h | 4 ++-- drivers/gpu/drm/nouveau/nv50_display.c | 13 ++--- drivers/gpu/drm/nouveau/nv50_display.h | 2 +- drivers/gpu/drm/nouveau/nv50_fence.c| 24 +--- drivers/gpu/drm/nouveau/nv84_fence.c| 21 + 5 files changed, 31 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index c57bb61..60ae4e7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -82,7 +82,7 @@ struct nv84_fence_chan { struct nouveau_fence_chan base; struct nouveau_vma vma; struct nouveau_vma vma_gart; - struct nouveau_vma dispc_vma[4]; + struct nouveau_vma dispc_vma; }; struct nv84_fence_priv { @@ -92,7 +92,7 @@ struct nv84_fence_priv { u32 *suspend; }; -u64 nv84_fence_crtc(struct nouveau_channel *, int); +u64 nv84_fence_crtc(struct nouveau_channel *); int nv84_fence_context_new(struct nouveau_channel *); #endif diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index f8e66c0..4153c8a 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -433,7 +433,7 @@ evo_kick(u32 *push, void *evoc) static bool evo_sync_wait(void *data) { - if (nouveau_bo_rd32(data, EVO_MAST_NTFY) != 0x) + if (nouveau_bo_rd32(data, EVO_MAST_NTFY / 4) != 0x) return true; usleep_range(1, 2); return false; @@ -447,7 +447,7 @@ evo_sync(struct drm_device *dev) struct nv50_mast *mast = nv50_mast(dev); u32 *push = evo_wait(mast, 8); if (push) { - nouveau_bo_wr32(disp->sync, EVO_MAST_NTFY, 0x); + nouveau_bo_wr32(disp->sync, EVO_MAST_NTFY / 4, 0x); evo_mthd(push, 0x0084, 1); evo_data(push, 0x8000 | EVO_MAST_NTFY); evo_mthd(push, 0x0080, 2); @@ -465,7 +465,7 @@ evo_sync(struct drm_device *dev) * Page flipping channel */ struct nouveau_bo * -nv50_display_crtc_sema(struct drm_device *dev, int crtc) +nv50_display_crtc_sema(struct drm_device *dev) { return nv50_disp(dev)->sync; } @@ -517,7 +517,6 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct nouveau_channel *chan, u32 swap_interval) { struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb); - struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); struct nv50_head *head = nv50_head(crtc); struct nv50_sync *sync = nv50_sync(crtc); u32 *push; @@ -539,7 +538,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, return ret; BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2); - OUT_RING (chan, NvEvoSema0 + nv_crtc->index); + OUT_RING (chan, NvEvoSema0); OUT_RING (chan, sync->addr ^ 0x10); BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1); OUT_RING (chan, sync->data + 1); @@ -548,7 +547,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, OUT_RING (chan, sync->data); } else if (chan && nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) { - u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr; + u64 addr = nv84_fence_crtc(chan) + sync->addr; ret = RING_SPACE(chan, 12); if (ret) return ret; @@ -567,7 +566,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb, OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); } else if (chan) { - u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr; + u64 addr = nv84_fence_crtc(chan) + sync->addr; ret = RING_SPACE(chan, 10); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nv50_display.h b/drivers/gpu/drm/nouveau/nv50_display.h index 70da347..ea681be 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.h +++ b/drivers/gpu/drm/nouveau/nv50_display.h @@ -40,6 +40,6 @@ void nv50_display_flip_stop(struct drm_crtc *); int nv50_display_flip_next(struct drm_crtc *, struct drm_framebuffer *, struct nouveau_channel *, u32 swap_interval); -struct nouveau_bo *nv50_display_crtc_sema(struct drm_device *, int head); +struct nouveau_bo *nv50_display_crtc_sema(struct drm_device *); #endif /* __NV50_DISPLAY_H__ */ diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv
[PATCH 2/7] drm/nv50-: untile mmap'd bo's
From: Maarten Lankhorst Map the GART to the bar and use that mapping, to hide all the tiling details from users. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/core/subdev/bar/nv50.c | 5 - drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c | 5 - drivers/gpu/drm/nouveau/nouveau_bo.c | 20 +--- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/bar/nv50.c index 160d27f..9907a25 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bar/nv50.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bar/nv50.c @@ -67,7 +67,10 @@ nv50_bar_umap(struct nouveau_bar *bar, struct nouveau_mem *mem, if (ret) return ret; - nouveau_vm_map(vma, mem); + if (mem->pages) + nouveau_vm_map_sg(vma, 0, mem->size << 12, mem); + else + nouveau_vm_map(vma, mem); return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c index b2ec741..badd835 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c @@ -66,7 +66,10 @@ nvc0_bar_umap(struct nouveau_bar *bar, struct nouveau_mem *mem, if (ret) return ret; - nouveau_vm_map(vma, mem); + if (mem->pages) + nouveau_vm_map_sg(vma, 0, mem->size << 12, mem); + else + nouveau_vm_map(vma, mem); return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 639d7cd..9ecb874 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1259,6 +1259,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) { struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; struct nouveau_drm *drm = nouveau_bdev(bdev); + struct nouveau_mem *node = mem->mm_node; struct drm_device *dev = drm->dev; int ret; @@ -1281,14 +1282,16 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) mem->bus.is_iomem = !dev->agp->cant_use_aperture; } #endif - break; + if (!node->memtype) + /* untiled */ + break; + /* fallthrough, tiled memory */ case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; mem->bus.base = pci_resource_start(dev->pdev, 1); mem->bus.is_iomem = true; if (nv_device(drm->device)->card_type >= NV_50) { struct nouveau_bar *bar = nouveau_bar(drm->device); - struct nouveau_mem *node = mem->mm_node; ret = bar->umap(bar, node, NV_MEM_ACCESS_RW, &node->bar_vma); @@ -1324,6 +1327,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo) struct nouveau_bo *nvbo = nouveau_bo(bo); struct nouveau_device *device = nv_device(drm->device); u32 mappable = pci_resource_len(device->pdev, 1) >> PAGE_SHIFT; + int ret; /* as long as the bo isn't in vram, and isn't tiled, we've got * nothing to do here. @@ -1332,10 +1336,20 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo) if (nv_device(drm->device)->card_type < NV_50 || !nouveau_bo_tile_layout(nvbo)) return 0; + + if (bo->mem.mem_type == TTM_PL_SYSTEM) { + nouveau_bo_placement_set(nvbo, TTM_PL_TT, 0); + + ret = nouveau_bo_validate(nvbo, false, false); + if (ret) + return ret; + } + return 0; } /* make sure bo is in mappable vram */ - if (bo->mem.start + bo->mem.num_pages < mappable) + if (nv_device(drm->device)->card_type >= NV_50 || + bo->mem.start + bo->mem.num_pages < mappable) return 0; -- 1.8.4
[PATCH 6/7] drm/nouveau: more paranoia in nouveau_bo_fixup_align
From: Maarten Lankhorst Make sure that buffers are always aligned. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_bo.c | 40 +++- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index bb3734d..635a192 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -160,24 +160,20 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags, struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct nouveau_device *device = nv_device(drm->device); - if (device->card_type < NV_50) { - if (nvbo->tile_mode) { - if (device->chipset >= 0x40) { - *align = 65536; - *size = roundup(*size, 64 * nvbo->tile_mode); - - } else if (device->chipset >= 0x30) { - *align = 32768; - *size = roundup(*size, 64 * nvbo->tile_mode); - - } else if (device->chipset >= 0x20) { - *align = 16384; - *size = roundup(*size, 64 * nvbo->tile_mode); - - } else if (device->chipset >= 0x10) { - *align = 16384; - *size = roundup(*size, 32 * nvbo->tile_mode); - } + if (device->chipset >= 0x10 && device->card_type < NV_50 && + nvbo->tile_mode) { + if (device->chipset >= 0x40) { + *align = 65536; + *size = roundup(*size, 64 * nvbo->tile_mode); + } else if (device->chipset >= 0x30) { + *align = 32768; + *size = roundup(*size, 64 * nvbo->tile_mode); + } else if (device->chipset >= 0x20) { + *align = 16384; + *size = roundup(*size, 64 * nvbo->tile_mode); + } else { + *align = 16384; + *size = roundup(*size, 32 * nvbo->tile_mode); } } else { *size = roundup(*size, (1 << nvbo->page_shift)); @@ -228,8 +224,14 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024) nvbo->page_shift = drm->client.base.vm->vmm->lpg_shift; } - nouveau_bo_fixup_align(nvbo, flags, &align, &size); + if (size <= 0) { + nv_warn(drm, "invalid size %x after setting alignment %x\n", + size, align); + kfree(nvbo); + return -EINVAL; + } + nvbo->bo.mem.num_pages = size >> PAGE_SHIFT; nouveau_bo_placement_set(nvbo, flags, 0); -- 1.8.4
[PATCH 1/7] drm/nouveau: fix m2mf copy to tiled gart
From: Maarten Lankhorst Commit de7b7d59d54852c introduced tiled GART, but a linear copy is still performed. This may result in errors on eviction, fix it by checking tiling from memtype. Signed-off-by: Maarten Lankhorst Cc: stable at vger.kernel.org #3.10+ --- drivers/gpu/drm/nouveau/nouveau_bo.c | 33 - 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index f5b0201..639d7cd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -803,25 +803,25 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem) { struct nouveau_mem *node = old_mem->mm_node; - struct nouveau_bo *nvbo = nouveau_bo(bo); u64 length = (new_mem->num_pages << PAGE_SHIFT); u64 src_offset = node->vma[0].offset; u64 dst_offset = node->vma[1].offset; + int src_tiled = !!node->memtype; + int dst_tiled = !!((struct nouveau_mem *)new_mem->mm_node)->memtype; int ret; while (length) { u32 amount, stride, height; + ret = RING_SPACE(chan, 18 + 6 * (src_tiled + dst_tiled)); + if (ret) + return ret; + amount = min(length, (u64)(4 * 1024 * 1024)); stride = 16 * 4; height = amount / stride; - if (old_mem->mem_type == TTM_PL_VRAM && - nouveau_bo_tile_layout(nvbo)) { - ret = RING_SPACE(chan, 8); - if (ret) - return ret; - + if (src_tiled) { BEGIN_NV04(chan, NvSubCopy, 0x0200, 7); OUT_RING (chan, 0); OUT_RING (chan, 0); @@ -831,19 +831,10 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, OUT_RING (chan, 0); OUT_RING (chan, 0); } else { - ret = RING_SPACE(chan, 2); - if (ret) - return ret; - BEGIN_NV04(chan, NvSubCopy, 0x0200, 1); OUT_RING (chan, 1); } - if (new_mem->mem_type == TTM_PL_VRAM && - nouveau_bo_tile_layout(nvbo)) { - ret = RING_SPACE(chan, 8); - if (ret) - return ret; - + if (dst_tiled) { BEGIN_NV04(chan, NvSubCopy, 0x021c, 7); OUT_RING (chan, 0); OUT_RING (chan, 0); @@ -853,18 +844,10 @@ nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo, OUT_RING (chan, 0); OUT_RING (chan, 0); } else { - ret = RING_SPACE(chan, 2); - if (ret) - return ret; - BEGIN_NV04(chan, NvSubCopy, 0x021c, 1); OUT_RING (chan, 1); } - ret = RING_SPACE(chan, 14); - if (ret) - return ret; - BEGIN_NV04(chan, NvSubCopy, 0x0238, 2); OUT_RING (chan, upper_32_bits(src_offset)); OUT_RING (chan, upper_32_bits(dst_offset)); -- 1.8.4
[PATCH 3/7] drm/nouveau: fixup locking inversion between mmap_sem and reservations
From: Maarten Lankhorst Allocate and copy all kernel memory before doing reservations. This prevents a locking inversion between mmap_sem and reservation_class, and allows us to drop the trylocking in ttm_bo_vm_fault without upsetting lockdep. Relocations are handled by trying with __copy_from_user_inatomic first. If that fails all validation will be undone, memory copied from userspace and all validations retried. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_gem.c | 188 +- 1 file changed, 119 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index f32b712..41a4bf6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -464,8 +464,6 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, uint64_t user_pbbo_ptr) { struct nouveau_drm *drm = chan->drm; - struct drm_nouveau_gem_pushbuf_bo __user *upbbo = - (void __force __user *)(uintptr_t)user_pbbo_ptr; struct nouveau_bo *nvbo; int ret, relocs = 0; @@ -499,7 +497,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, return ret; } - if (nv_device(drm->device)->card_type < NV_50) { + if (nv_device(drm->device)->card_type < NV_50 && !relocs) { if (nvbo->bo.offset == b->presumed.offset && ((nvbo->bo.mem.mem_type == TTM_PL_VRAM && b->presumed.domain & NOUVEAU_GEM_DOMAIN_VRAM) || @@ -507,32 +505,53 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli, b->presumed.domain & NOUVEAU_GEM_DOMAIN_GART))) continue; - if (nvbo->bo.mem.mem_type == TTM_PL_TT) - b->presumed.domain = NOUVEAU_GEM_DOMAIN_GART; - else - b->presumed.domain = NOUVEAU_GEM_DOMAIN_VRAM; - b->presumed.offset = nvbo->bo.offset; - b->presumed.valid = 0; - relocs++; - - if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index].presumed, -&b->presumed, sizeof(b->presumed))) - return -EFAULT; + relocs = 1; } } return relocs; } +static inline void * +u_memcpya(uint64_t user, unsigned nmemb, unsigned size, unsigned inatomic) +{ + void *mem; + void __user *userptr = (void __force __user *)(uintptr_t)user; + + mem = drm_malloc_ab(size, nmemb); + if (!mem) + return ERR_PTR(-ENOMEM); + size *= nmemb; + + if (inatomic && (!access_ok(VERIFY_READ, userptr, size) || + __copy_from_user_inatomic(mem, userptr, size))) { + drm_free_large(mem); + return ERR_PTR(-EFAULT); + } else if (!inatomic && copy_from_user(mem, userptr, size)) { + drm_free_large(mem); + return ERR_PTR(-EFAULT); + } + + return mem; +} + +static int +nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, + struct drm_nouveau_gem_pushbuf *req, + struct drm_nouveau_gem_pushbuf_bo *bo, + struct drm_nouveau_gem_pushbuf_reloc *reloc); + static int nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, struct drm_file *file_priv, struct drm_nouveau_gem_pushbuf_bo *pbbo, +struct drm_nouveau_gem_pushbuf *req, uint64_t user_buffers, int nr_buffers, -struct validate_op *op, int *apply_relocs) +struct validate_op *op, int *do_reloc) { struct nouveau_cli *cli = nouveau_cli(file_priv); int ret, relocs = 0; + struct drm_nouveau_gem_pushbuf_reloc *reloc = NULL; INIT_LIST_HEAD(&op->vram_list); INIT_LIST_HEAD(&op->gart_list); @@ -541,19 +560,19 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, if (nr_buffers == 0) return 0; +restart: ret = validate_init(chan, file_priv, pbbo, nr_buffers, op); if (unlikely(ret)) { if (ret != -ERESTARTSYS) NV_ERROR(cli, "validate_init\n"); - return ret; + goto err; } ret = validate_list(chan, cli, &op->vram_list, pbbo, user_buffers); if (unlikely(ret < 0)) { if (ret != -ERESTARTSYS) NV_ERROR(cli, "validate vram_list\n"); - validate_fini(op, NULL); - return ret; +