[PATCH] drm/radeon: fix cut and paste issue for hawaii.

2014-07-24 Thread j.gli...@gmail.com
From: Jerome Glisse 

This is a halfway fix for hawaii acceleration. More fixes to come
but hopefully isolated to userspace.

Signed-off-by: J?r?me Glisse 
Cc: stable at vger.kernel.org
---
 drivers/gpu/drm/radeon/cik.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index fc560b0..2b87edb 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -2521,6 +2521,7 @@ static void cik_tiling_mode_table_init(struct 
radeon_device *rdev)
gb_tile_moden = 0;
break;
}
+   rdev->config.cik.macrotile_mode_array[reg_offset] = 
gb_tile_moden;
WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), 
gb_tile_moden);
}
} else if (num_pipe_configs == 8) {
-- 
1.8.3.1



[PATCH] radeon: fix pll/ctrc mapping on dce2 and dce3 hardware

2012-11-27 Thread j.gli...@gmail.com
From: Jerome Glisse 

This fix black screen on resume issue that some people are
experiencing. There is a bug in the atombios code regarding
pll/crtc mapping. The atombios code reverse the logic for
the pll and crtc mapping.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/atombios_crtc.c | 54 +-
 1 file changed, 20 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c 
b/drivers/gpu/drm/radeon/atombios_crtc.c
index 3bce029..7c1f080 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1696,42 +1696,28 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
return ATOM_PPLL2;
DRM_ERROR("unable to allocate a PPLL\n");
return ATOM_PPLL_INVALID;
-   } else if (ASIC_IS_AVIVO(rdev)) {
-   /* in DP mode, the DP ref clock can come from either PPLL
-* depending on the asic:
-* DCE3: PPLL1 or PPLL2
-*/
-   if 
(ENCODER_MODE_IS_DP(atombios_get_encoder_mode(radeon_crtc->encoder))) {
-   /* use the same PPLL for all DP monitors */
-   pll = radeon_get_shared_dp_ppll(crtc);
-   if (pll != ATOM_PPLL_INVALID)
-   return pll;
-   } else {
-   /* use the same PPLL for all monitors with the same 
clock */
-   pll = radeon_get_shared_nondp_ppll(crtc);
-   if (pll != ATOM_PPLL_INVALID)
-   return pll;
-   }
-   /* all other cases */
-   pll_in_use = radeon_get_pll_use_mask(crtc);
-   /* the order shouldn't matter here, but we probably
-* need this until we have atomic modeset
-*/
-   if (rdev->flags & RADEON_IS_IGP) {
-   if (!(pll_in_use & (1 << ATOM_PPLL1)))
-   return ATOM_PPLL1;
-   if (!(pll_in_use & (1 << ATOM_PPLL2)))
-   return ATOM_PPLL2;
-   } else {
-   if (!(pll_in_use & (1 << ATOM_PPLL2)))
-   return ATOM_PPLL2;
-   if (!(pll_in_use & (1 << ATOM_PPLL1)))
-   return ATOM_PPLL1;
-   }
-   DRM_ERROR("unable to allocate a PPLL\n");
-   return ATOM_PPLL_INVALID;
} else {
/* on pre-R5xx asics, the crtc to pll mapping is hardcoded */
+   /* some atombios (observed in some DCE2/DCE3) code have a bug,
+* the matching btw pll and crtc is done through
+* PCLK_CRTC[1|2]_CNTL (0x480/0x484) but atombios code use the
+* pll (1 or 2) to select which register to write. ie if using
+* pll1 it will use PCLK_CRTC1_CNTL (0x480) and if using pll2
+* it will use PCLK_CRTC2_CNTL (0x484), it then use crtc id to
+* choose which value to write. Which is reverse order from
+* register logic. So only case that works is when pllid is
+* same as crtcid or when both pll and crtc are enabled and
+* both use same clock.
+*
+* So just return crtc id as if crtc and pll were hard linked
+* together even if they aren't
+*/
+   if (radeon_crtc->crtc_id > 1) {
+   /* crtc other than crtc1 and crtc2 can only be use for
+* DP those doesn't need a valid pll to work.
+*/
+   return ATOM_PPLL_INVALID;
+   }
return radeon_crtc->crtc_id;
}
 }
-- 
1.7.11.7



[PATCH 1/2] radeon: fix pll/ctrc mapping on dce2 and dce3 hardware v2

2012-11-27 Thread j.gli...@gmail.com
From: Jerome Glisse 

This fix black screen on resume issue that some people are
experiencing. There is a bug in the atombios code regarding
pll/crtc mapping. The atombios code reverse the logic for
the pll and crtc mapping.

v2: DCE3 or DCE2 only have 2 crtc

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/atombios_crtc.c | 48 ++
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c 
b/drivers/gpu/drm/radeon/atombios_crtc.c
index 3bce029..24d932f 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1696,42 +1696,22 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
return ATOM_PPLL2;
DRM_ERROR("unable to allocate a PPLL\n");
return ATOM_PPLL_INVALID;
-   } else if (ASIC_IS_AVIVO(rdev)) {
-   /* in DP mode, the DP ref clock can come from either PPLL
-* depending on the asic:
-* DCE3: PPLL1 or PPLL2
-*/
-   if 
(ENCODER_MODE_IS_DP(atombios_get_encoder_mode(radeon_crtc->encoder))) {
-   /* use the same PPLL for all DP monitors */
-   pll = radeon_get_shared_dp_ppll(crtc);
-   if (pll != ATOM_PPLL_INVALID)
-   return pll;
-   } else {
-   /* use the same PPLL for all monitors with the same 
clock */
-   pll = radeon_get_shared_nondp_ppll(crtc);
-   if (pll != ATOM_PPLL_INVALID)
-   return pll;
-   }
-   /* all other cases */
-   pll_in_use = radeon_get_pll_use_mask(crtc);
-   /* the order shouldn't matter here, but we probably
-* need this until we have atomic modeset
-*/
-   if (rdev->flags & RADEON_IS_IGP) {
-   if (!(pll_in_use & (1 << ATOM_PPLL1)))
-   return ATOM_PPLL1;
-   if (!(pll_in_use & (1 << ATOM_PPLL2)))
-   return ATOM_PPLL2;
-   } else {
-   if (!(pll_in_use & (1 << ATOM_PPLL2)))
-   return ATOM_PPLL2;
-   if (!(pll_in_use & (1 << ATOM_PPLL1)))
-   return ATOM_PPLL1;
-   }
-   DRM_ERROR("unable to allocate a PPLL\n");
-   return ATOM_PPLL_INVALID;
} else {
/* on pre-R5xx asics, the crtc to pll mapping is hardcoded */
+   /* some atombios (observed in some DCE2/DCE3) code have a bug,
+* the matching btw pll and crtc is done through
+* PCLK_CRTC[1|2]_CNTL (0x480/0x484) but atombios code use the
+* pll (1 or 2) to select which register to write. ie if using
+* pll1 it will use PCLK_CRTC1_CNTL (0x480) and if using pll2
+* it will use PCLK_CRTC2_CNTL (0x484), it then use crtc id to
+* choose which value to write. Which is reverse order from
+* register logic. So only case that works is when pllid is
+* same as crtcid or when both pll and crtc are enabled and
+* both use same clock.
+*
+* So just return crtc id as if crtc and pll were hard linked
+* together even if they aren't
+*/
return radeon_crtc->crtc_id;
}
 }
-- 
1.7.11.7



[PATCH 2/2] drm/radeon: fix deadlock when bo is associated to different handle

2012-11-27 Thread j.gli...@gmail.com
From: Jerome Glisse 

There is a rare case, that seems to only happen accross suspend/resume
cycle, where a bo is associated with several different handle. This
lead to a deadlock in ttm buffer reservation path. This could only
happen with flinked(globaly exported) object. Userspace should not
reopen multiple time a globaly exported object.

However the kernel should handle gracefully this corner case and not
keep rejecting the userspace command stream. This is the object of
this patch.

Fix suspend/resume issue where user see following message :
[drm:radeon_cs_ioctl] *ERROR* Failed to parse relocation -35!

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_cs.c | 53 ++
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc..064e64d 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -54,39 +54,48 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser 
*p)
return -ENOMEM;
}
for (i = 0; i < p->nrelocs; i++) {
-   struct drm_radeon_cs_reloc *r;
-
+   struct drm_radeon_cs_reloc *reloc;
+
+   /* One bo could be associated with several different handle.
+* Only happen for flinked bo that are open several time.
+*
+* FIXME:
+* Maybe we should consider an alternative to idr for gem
+* object to insure a 1:1 uniq mapping btw handle and gem
+* object.
+*/
duplicate = false;
-   r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
+   reloc = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
+   p->relocs[i].handle = 0;
+   p->relocs[i].flags = reloc->flags;
+   p->relocs[i].gobj = drm_gem_object_lookup(ddev,
+ p->filp,
+ reloc->handle);
+   if (p->relocs[i].gobj == NULL) {
+   DRM_ERROR("gem object lookup failed 0x%x\n",
+ reloc->handle);
+   return -ENOENT;
+   }
+   p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
+   p->relocs[i].lobj.bo = p->relocs[i].robj;
+   p->relocs[i].lobj.wdomain = reloc->write_domain;
+   p->relocs[i].lobj.rdomain = reloc->read_domains;
+   p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
+
for (j = 0; j < i; j++) {
-   if (r->handle == p->relocs[j].handle) {
+   if (p->relocs[i].lobj.bo == p->relocs[j].lobj.bo) {
p->relocs_ptr[i] = &p->relocs[j];
duplicate = true;
break;
}
}
+
if (!duplicate) {
-   p->relocs[i].gobj = drm_gem_object_lookup(ddev,
- p->filp,
- r->handle);
-   if (p->relocs[i].gobj == NULL) {
-   DRM_ERROR("gem object lookup failed 0x%x\n",
- r->handle);
-   return -ENOENT;
-   }
p->relocs_ptr[i] = &p->relocs[i];
-   p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
-   p->relocs[i].lobj.bo = p->relocs[i].robj;
-   p->relocs[i].lobj.wdomain = r->write_domain;
-   p->relocs[i].lobj.rdomain = r->read_domains;
-   p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
-   p->relocs[i].handle = r->handle;
-   p->relocs[i].flags = r->flags;
+   p->relocs[i].handle = reloc->handle;
radeon_bo_list_add_object(&p->relocs[i].lobj,
  &p->validated);
-
-   } else
-   p->relocs[i].handle = 0;
+   }
}
return radeon_bo_list_validate(&p->validated);
 }
-- 
1.7.11.7



[PATCH] drm/radeon: track global bo name and always return the same

2012-11-27 Thread j.gli...@gmail.com
From: Jerome Glisse 

To avoid kernel rejecting cs if we return different global name
for same bo keep track of global name and always return the same.
Seems to fix issue with suspend/resume failing and repeatly printing
following message :
[drm:radeon_cs_ioctl] *ERROR* Failed to parse relocation -35!

There might still be way for a rogue program to trigger this issue.

Signed-off-by: Jerome Glisse 
---
 radeon/radeon_bo_gem.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/radeon/radeon_bo_gem.c b/radeon/radeon_bo_gem.c
index 265f177..fca0aaf 100644
--- a/radeon/radeon_bo_gem.c
+++ b/radeon/radeon_bo_gem.c
@@ -47,11 +47,11 @@
 #include "radeon_bo_gem.h"
 #include 
 struct radeon_bo_gem {
-struct radeon_bo_int base;
-uint32_tname;
-int map_count;
-atomic_treloc_in_cs;
-void *priv_ptr;
+struct radeon_bo_intbase;
+uint32_tname;
+int map_count;
+atomic_treloc_in_cs;
+void*priv_ptr;
 };

 struct bo_manager_gem {
@@ -320,15 +320,21 @@ void *radeon_gem_get_reloc_in_cs(struct radeon_bo *bo)

 int radeon_gem_get_kernel_name(struct radeon_bo *bo, uint32_t *name)
 {
+struct radeon_bo_gem *bo_gem = (struct radeon_bo_gem*)bo;
 struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
 struct drm_gem_flink flink;
 int r;

+if (bo_gem->name) {
+*name = bo_gem->name;
+return 0;
+}
 flink.handle = bo->handle;
 r = drmIoctl(boi->bom->fd, DRM_IOCTL_GEM_FLINK, &flink);
 if (r) {
 return r;
 }
+bo_gem->name = flink.name;
 *name = flink.name;
 return 0;
 }
-- 
1.7.11.7



[PATCH] drm/ttm: do not try to preserve caching state

2012-11-28 Thread j.gli...@gmail.com
From: Jerome Glisse 

It make no sense to preserve caching state especialy when
moving from vram to system. It burden the page allocator to
match the vram caching (often WC) which just burn CPU cycle
for no good reasons.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/ttm/ttm_bo.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index bf6e4b5..39dcc58 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -896,19 +896,12 @@ static int ttm_bo_mem_force_space(struct 
ttm_buffer_object *bo,
 }

 static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man,
- uint32_t cur_placement,
  uint32_t proposed_placement)
 {
uint32_t caching = proposed_placement & TTM_PL_MASK_CACHING;
uint32_t result = proposed_placement & ~TTM_PL_MASK_CACHING;

-   /**
-* Keep current caching if possible.
-*/
-
-   if ((cur_placement & caching) != 0)
-   result |= (cur_placement & caching);
-   else if ((man->default_caching & caching) != 0)
+   if ((man->default_caching & caching) != 0)
result |= man->default_caching;
else if ((TTM_PL_FLAG_CACHED & caching) != 0)
result |= TTM_PL_FLAG_CACHED;
@@ -978,8 +971,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
if (!type_ok)
continue;

-   cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
- cur_flags);
+   cur_flags = ttm_bo_select_caching(man, cur_flags);
/*
 * Use the access and other non-mapping-related flag bits from
 * the memory placement flags to the current flags
@@ -1023,8 +1015,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
&cur_flags))
continue;

-   cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
- cur_flags);
+   cur_flags = ttm_bo_select_caching(man, cur_flags);
/*
 * Use the access and other non-mapping-related flag bits from
 * the memory placement flags to the current flags
-- 
1.7.11.7



[RFC] drm/ttm: add minimum residency constraint for bo eviction

2012-11-28 Thread j.gli...@gmail.com
So i spend the day looking at ttm and eviction. The first patch i sent
earlier is i believe something that should be merged. This patch however
is more about discussing if other people are interested in similar mecanism
to be share among driver through ttm. I could otherwise just move its logic
to the radeon driver.

So the idea of this patch is that we don't want to constantly move object
in and out of certain memory pool, mostly VRAM. So it adds a minimum
residency time and no object that have been in the given pool for less
than this residency time can be moved out. It closely solve regression
we are having with radeon since gallium driver change and probably improve
some other workload.

Statistic i gathered on xonotic/realquake showed that we can have as much
as 1GB in each direction (VRAM to system and system to vram) over a second.
So we are obviously not saturating the PCIE bandwidth. Profiling shows that
80-90% of the cost of this eviction is in memory allocation/deallocation for
the system memory (lot of irqlock, and mostly kernel spending time
allocating pages thing 256 000 or more page per second to allocate/deallocate.

I used this WIP patch to gather statistic and play with various combination :
http://people.freedesktop.org/~glisse/0001-TTM-EVICT-WIP.patch

Some numbers with xonotic :
17.369fps stock 3.7 kernel
27.883fps 3.7 kernel + do not preserve caching patch ~ +60%
49.292fps 3.7 kernel + WIP with 500ms residency for all pool and no bo wait
  for eviction
49.258fps 3.7 kernel + WIP with 500ms residency for all pool and bo wait
48.213fps 3.7 kernel always allowing GTT placement (basicly revent the
  gallium patch effect)

Other design i am thinking of is changing the way radeon handle it's memory
and stop trying to revalidate object to different memory pool at each cs,
instead i think we should keep a vram lru list probably per process and move
bo out of vram according to this lru and following some euristic. So radeon
would only move bo into vram when there is room.

Other improvement i am thinking of is to reuse GTT memory of object that are
moved in for object that are evicted as statistic i gathered showed that it's
often close amount that move in and out. But this would require true dma
as it would mean scheduling in/out move on page granularity or group of
page (write 4 pages from vram to scratch 4pages into sys, write 4 pages of
system memory bo to vram 4 pages, write 4pages of vram to the just moved
4pages of system memory ...).

Cheers,
Jerome



[PATCH] drm/ttm: add minimum residency constraint for bo eviction

2012-11-28 Thread j.gli...@gmail.com
From: Jerome Glisse 

This patch add a minimum residency time configurable for each memory
pool (VRAM, GTT, ...). Intention is to avoid having a lot of memory
eviction from VRAM up to a point where the GPU pretty much spend all
it's time moving things in and out.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 3 +++
 drivers/gpu/drm/ttm/ttm_bo.c| 7 +++
 include/drm/ttm/ttm_bo_api.h| 1 +
 include/drm/ttm/ttm_bo_driver.h | 1 +
 4 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 5ebe1b3..88722c4 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -129,11 +129,13 @@ static int radeon_init_mem_type(struct ttm_bo_device 
*bdev, uint32_t type,
switch (type) {
case TTM_PL_SYSTEM:
/* System memory */
+   man->minimum_residency_time_ms = 0;
man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
man->available_caching = TTM_PL_MASK_CACHING;
man->default_caching = TTM_PL_FLAG_CACHED;
break;
case TTM_PL_TT:
+   man->minimum_residency_time_ms = 0;
man->func = &ttm_bo_manager_func;
man->gpu_offset = rdev->mc.gtt_start;
man->available_caching = TTM_PL_MASK_CACHING;
@@ -156,6 +158,7 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, 
uint32_t type,
break;
case TTM_PL_VRAM:
/* "On-card" video ram */
+   man->minimum_residency_time_ms = 500;
man->func = &ttm_bo_manager_func;
man->gpu_offset = rdev->mc.vram_start;
man->flags = TTM_MEMTYPE_FLAG_FIXED |
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 39dcc58..40476121 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -452,6 +452,7 @@ moved:
bo->cur_placement = bo->mem.placement;
} else
bo->offset = 0;
+   bo->jiffies = jiffies;

return 0;

@@ -810,6 +811,12 @@ retry:
}

bo = list_first_entry(&man->lru, struct ttm_buffer_object, lru);
+
+   if (time_after(jiffies, bo->jiffies) && jiffies_to_msecs(jiffies - 
bo->jiffies) >= man->minimum_residency_time_ms) {
+   spin_unlock(&glob->lru_lock);
+   return -EBUSY;
+   }
+
kref_get(&bo->list_kref);

if (!list_empty(&bo->ddestroy)) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index e8028ad..9e12313 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -275,6 +275,7 @@ struct ttm_buffer_object {

unsigned long offset;
uint32_t cur_placement;
+   unsigned long jiffies;

struct sg_table *sg;
 };
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d803b92..7f60a18e6 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -280,6 +280,7 @@ struct ttm_mem_type_manager {
struct mutex io_reserve_mutex;
bool use_io_reserve_lru;
bool io_reserve_fastpath;
+   unsigned long minimum_residency_time_ms;

/*
 * Protected by @io_reserve_mutex:
-- 
1.7.11.7



[PATCH] drm/radeon: use cached memory when evicting for vram on non agp

2012-11-28 Thread j.gli...@gmail.com
From: Jerome Glisse 

Force the use of cached memory when evicting from vram on non agp
hardware. Also force write combine on agp hw. This is to insure
the minimum cache type change when allocating memory and improving
memory eviction especialy on pci/pcie hw.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_object.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index b91118c..3f9f3bb 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -88,10 +88,20 @@ void radeon_ttm_placement_from_domain(struct radeon_bo 
*rbo, u32 domain)
if (domain & RADEON_GEM_DOMAIN_VRAM)
rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_VRAM;
-   if (domain & RADEON_GEM_DOMAIN_GTT)
-   rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
-   if (domain & RADEON_GEM_DOMAIN_CPU)
-   rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+   if (domain & RADEON_GEM_DOMAIN_GTT) {
+   if (rbo->rdev->flags & RADEON_IS_AGP) {
+   rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
+   } else {
+   rbo->placements[c++] = TTM_PL_FLAG_CACHED | 
TTM_PL_FLAG_TT;
+   }
+   }
+   if (domain & RADEON_GEM_DOMAIN_CPU) {
+   if (rbo->rdev->flags & RADEON_IS_AGP) {
+   rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
+   } else {
+   rbo->placements[c++] = TTM_PL_FLAG_CACHED | 
TTM_PL_FLAG_TT;
+   }
+   }
if (!c)
rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
rbo->placement.num_placement = c;
-- 
1.7.11.7



[PATCH] drm/radeon: fix rare segfault after gpu lockup on r7xx

2012-11-29 Thread j.gli...@gmail.com
From: Jerome Glisse 

If GPU reset fails the gart table ptr might be NULL avoid a
kernel segfault in this rare event.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/r600.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index cda280d..0e3a68a 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -843,7 +843,9 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev)
 * method for them.
 */
WREG32(HDP_DEBUG1, 0);
-   tmp = readl((void __iomem *)ptr);
+   if (ptr) {
+   tmp = readl((void __iomem *)ptr);
+   }
} else
WREG32(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);

-- 
1.7.11.7



[RFC] improve memory placement for radeon

2012-11-29 Thread j.gli...@gmail.com
So as a followup is 2 patch. The first one just stop trying to move
object at each cs ioctl i believe it could be included in 3.7 as it
improve performances (especialy with vram change from userspace).

The second one implement a vram eviction policy. It's a simple one,
buffer used for write operation are more important than buffer used
for read operation. Buffer get evicted from vram only if they haven't
been use in the last 50ms (so in the last few frames) and only if
there is buffer that have been recently use and that could be move
into vram. This is mostly were i believe discussion should be,
what kind of heuristic would work better than tat.

So without first patch and with mesa master xonotic high is at 17fps,
with first patch it goes to 40fps, with second patch it goes to 48fps.

Cheers,
Jerome



[PATCH 1/2] drm/radeon: do not move bo to different placement at each cs

2012-11-29 Thread j.gli...@gmail.com
From: Jerome Glisse 

The bo creation placement is where the bo will be. Instead of trying
to move bo at each command stream let this work to another worker
thread that will use more advance heuristic.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|  1 +
 drivers/gpu/drm/radeon/radeon_object.c | 17 -
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8c42d54..0a2664c 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -313,6 +313,7 @@ struct radeon_bo {
struct list_headlist;
/* Protected by tbo.reserved */
u32 placements[3];
+   u32 busy_placements[3];
struct ttm_placementplacement;
struct ttm_buffer_objecttbo;
struct ttm_bo_kmap_obj  kmap;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index 3f9f3bb..e25ae20 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -84,7 +84,6 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, 
u32 domain)
rbo->placement.fpfn = 0;
rbo->placement.lpfn = 0;
rbo->placement.placement = rbo->placements;
-   rbo->placement.busy_placement = rbo->placements;
if (domain & RADEON_GEM_DOMAIN_VRAM)
rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_VRAM;
@@ -105,6 +104,14 @@ void radeon_ttm_placement_from_domain(struct radeon_bo 
*rbo, u32 domain)
if (!c)
rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
rbo->placement.num_placement = c;
+
+   c = 0;
+   rbo->placement.busy_placement = rbo->busy_placements;
+   if (rbo->rdev->flags & RADEON_IS_AGP) {
+   rbo->busy_placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
+   } else {
+   rbo->busy_placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
+   }
rbo->placement.num_busy_placement = c;
 }

@@ -360,17 +367,9 @@ int radeon_bo_list_validate(struct list_head *head)
list_for_each_entry(lobj, head, tv.head) {
bo = lobj->bo;
if (!bo->pin_count) {
-   domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain;
-   
-   retry:
-   radeon_ttm_placement_from_domain(bo, domain);
r = ttm_bo_validate(&bo->tbo, &bo->placement,
true, false, false);
if (unlikely(r)) {
-   if (r != -ERESTARTSYS && domain == 
RADEON_GEM_DOMAIN_VRAM) {
-   domain |= RADEON_GEM_DOMAIN_GTT;
-   goto retry;
-   }
return r;
}
}
-- 
1.7.11.7



[PATCH 2/2] drm/radeon: buffer memory placement work thread WIP

2012-11-29 Thread j.gli...@gmail.com
From: Jerome Glisse 

Use delayed work thread to move buffer out of vram if they haven't
been use over some period of time. This allow to make room for
buffer that are actively use.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|  13 ++
 drivers/gpu/drm/radeon/radeon_cs.c |   2 +-
 drivers/gpu/drm/radeon/radeon_device.c |   8 ++
 drivers/gpu/drm/radeon/radeon_object.c | 241 -
 drivers/gpu/drm/radeon/radeon_object.h |   3 +-
 5 files changed, 262 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 0a2664c..a2e92da 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -102,6 +102,8 @@ extern int radeon_lockup_timeout;
  */
 #define RADEON_MAX_USEC_TIMEOUT10  /* 100 ms */
 #define RADEON_FENCE_JIFFIES_TIMEOUT   (HZ / 2)
+#define RADEON_PLACEMENT_WORK_MS   500
+#define RADEON_PLACEMENT_MAX_EVICTION  8
 /* RADEON_IB_POOL_SIZE must be a power of 2 */
 #define RADEON_IB_POOL_SIZE16
 #define RADEON_DEBUGFS_MAX_COMPONENTS  32
@@ -311,6 +313,10 @@ struct radeon_bo_va {
 struct radeon_bo {
/* Protected by gem.mutex */
struct list_headlist;
+   /* Protected by rdev->placement_mutex */
+   struct list_headplist;
+   struct list_head*head;
+   unsigned long   last_use_jiffies;
/* Protected by tbo.reserved */
u32 placements[3];
u32 busy_placements[3];
@@ -1523,6 +1529,13 @@ struct radeon_device {
struct drm_device   *ddev;
struct pci_dev  *pdev;
struct rw_semaphore exclusive_lock;
+   struct mutexplacement_mutex;
+   struct list_headwvram_in_list;
+   struct list_headrvram_in_list;
+   struct list_headwvram_out_list;
+   struct list_headrvram_out_list;
+   struct delayed_work placement_work;
+   unsigned long   vram_in_size;
/* ASIC */
union radeon_asic_configconfig;
enum radeon_family  family;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc..e9e90bc 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -88,7 +88,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
} else
p->relocs[i].handle = 0;
}
-   return radeon_bo_list_validate(&p->validated);
+   return radeon_bo_list_validate(p->rdev, &p->validated);
 }

 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 
priority)
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f88..0c4c874 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1001,6 +1001,14 @@ int radeon_device_init(struct radeon_device *rdev,
init_rwsem(&rdev->pm.mclk_lock);
init_rwsem(&rdev->exclusive_lock);
init_waitqueue_head(&rdev->irq.vblank_queue);
+
+   mutex_init(&rdev->placement_mutex);
+   INIT_LIST_HEAD(&rdev->wvram_in_list);
+   INIT_LIST_HEAD(&rdev->rvram_in_list);
+   INIT_LIST_HEAD(&rdev->wvram_out_list);
+   INIT_LIST_HEAD(&rdev->rvram_out_list);
+   INIT_DELAYED_WORK(&rdev->placement_work, radeon_placement_work_handler);
+
r = radeon_gem_init(rdev);
if (r)
return r;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index e25ae20..f2bcc5f 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -64,6 +64,10 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object 
*tbo)
mutex_lock(&bo->rdev->gem.mutex);
list_del_init(&bo->list);
mutex_unlock(&bo->rdev->gem.mutex);
+   mutex_lock(&bo->rdev->placement_mutex);
+   list_del_init(&bo->plist);
+   bo->head = NULL;
+   mutex_unlock(&bo->rdev->placement_mutex);
radeon_bo_clear_surface_reg(bo);
radeon_bo_clear_va(bo);
drm_gem_object_release(&bo->gem_base);
@@ -153,6 +157,8 @@ int radeon_bo_create(struct radeon_device *rdev,
bo->surface_reg = -1;
INIT_LIST_HEAD(&bo->list);
INIT_LIST_HEAD(&bo->va);
+   INIT_LIST_HEAD(&bo->plist);
+   bo->head = NULL;
radeon_ttm_placement_from_domain(bo, domain);
/* Kernel allocation are uninterruptible */
down_read(&rdev->pm.mclk_lock);
@@ -263,8 +269,14 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 
domain, u64 max_offset,
if (gpu_addr != NULL)
*g

[PATCH] drm/ttm: Pass the buffer object on backend creation

2011-10-10 Thread j.gli...@gmail.com
From: Jerome Glisse 

In case of multiple page table for GART, driver want to know which
buffer object is being bind/unbind. This allow driver to bind/unbind
buffer object from several different GART.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c   |3 ++-
 drivers/gpu/drm/radeon/radeon_ttm.c|   11 +++
 drivers/gpu/drm/ttm/ttm_bo.c   |4 ++--
 drivers/gpu/drm/ttm/ttm_tt.c   |9 ++---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |3 ++-
 include/drm/ttm/ttm_bo_driver.h|5 -
 6 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 890d50e..9f65371 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -344,7 +344,8 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, 
u32 val)
 }

 static struct ttm_backend *
-nouveau_bo_create_ttm_backend_entry(struct ttm_bo_device *bdev)
+nouveau_bo_create_ttm_backend_entry(struct ttm_bo_device *bdev,
+   struct ttm_buffer_object *bo)
 {
struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
struct drm_device *dev = dev_priv->dev;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 0b5468b..0bad266 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -114,10 +114,12 @@ static void radeon_ttm_global_fini(struct radeon_device 
*rdev)
}
 }

-struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev);
+struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev,
+ struct ttm_buffer_object *bo);

 static struct ttm_backend*
-radeon_create_ttm_backend_entry(struct ttm_bo_device *bdev)
+radeon_create_ttm_backend_entry(struct ttm_bo_device *bdev,
+   struct ttm_buffer_object *bo)
 {
struct radeon_device *rdev;

@@ -128,7 +130,7 @@ radeon_create_ttm_backend_entry(struct ttm_bo_device *bdev)
} else
 #endif
{
-   return radeon_ttm_backend_create(rdev);
+   return radeon_ttm_backend_create(rdev, bo);
}
 }

@@ -778,7 +780,8 @@ static struct ttm_backend_func radeon_backend_func = {
.destroy = &radeon_ttm_backend_destroy,
 };

-struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev)
+struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev,
+ struct ttm_buffer_object *bo)
 {
struct radeon_ttm_backend *gtt;

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index ef06194..fe957e7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -337,13 +337,13 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
if (zero_alloc)
page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC;
case ttm_bo_type_kernel:
-   bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
+   bo->ttm = ttm_tt_create(bdev, bo, bo->num_pages << PAGE_SHIFT,
page_flags, glob->dummy_read_page);
if (unlikely(bo->ttm == NULL))
ret = -ENOMEM;
break;
case ttm_bo_type_user:
-   bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
+   bo->ttm = ttm_tt_create(bdev, bo, bo->num_pages << PAGE_SHIFT,
page_flags | TTM_PAGE_FLAG_USER,
glob->dummy_read_page);
if (unlikely(bo->ttm == NULL)) {
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 58c271e..202e16e 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -379,8 +379,11 @@ int ttm_tt_set_user(struct ttm_tt *ttm,
return 0;
 }

-struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size,
-uint32_t page_flags, struct page *dummy_read_page)
+struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev,
+struct ttm_buffer_object *bo,
+unsigned long size,
+uint32_t page_flags,
+struct page *dummy_read_page)
 {
struct ttm_bo_driver *bo_driver = bdev->driver;
struct ttm_tt *ttm;
@@ -407,7 +410,7 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, 
unsigned long size,
printk(KERN_ERR TTM_PFX "Failed allocating page table\n");
return NULL;
}
-   ttm->be = bo_driver->create_ttm_backend_entry(bdev);
+   ttm->be = bo_driver->create_ttm_backend_entry(bdev, bo);
if (!ttm->be) {
ttm_tt_destroy(ttm);
printk(KERN_ERR TTM_PFX 

[PATCH] drm/radeon/kms: consolidate GART code, fix memory fault after GPU lockup

2011-10-13 Thread j.gli...@gmail.com
From: Jerome Glisse 

After GPU lockup VRAM gart table is unpinned and thus its pointer
becomes unvalid. This patch move the unpin code to a common helper
function and set pointer to NULL so that page update code can check
if it should update GPU page table or not. That way bo still bound
to GART can be unbound (pci_unmap_page for all there page) properly
while there is no need to update the GPU page table.

Signed-off-by: Jerome Glisse 
cc: stable at kernel.org
---
 drivers/gpu/drm/radeon/evergreen.c   |   12 +-
 drivers/gpu/drm/radeon/ni.c  |   13 +--
 drivers/gpu/drm/radeon/r100.c|6 ++-
 drivers/gpu/drm/radeon/r300.c|   16 ++--
 drivers/gpu/drm/radeon/r600.c|   17 +++--
 drivers/gpu/drm/radeon/radeon.h  |   22 +++-
 drivers/gpu/drm/radeon/radeon_gart.c |   66 -
 drivers/gpu/drm/radeon/rs400.c   |5 ++-
 drivers/gpu/drm/radeon/rs600.c   |   16 ++--
 drivers/gpu/drm/radeon/rv770.c   |   13 ++-
 10 files changed, 72 insertions(+), 114 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index c4ffa14f..fe5cf3e 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -893,7 +893,7 @@ int evergreen_pcie_gart_enable(struct radeon_device *rdev)
u32 tmp;
int r;

-   if (rdev->gart.table.vram.robj == NULL) {
+   if (rdev->gart.robj == NULL) {
dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
@@ -942,7 +942,6 @@ int evergreen_pcie_gart_enable(struct radeon_device *rdev)
 void evergreen_pcie_gart_disable(struct radeon_device *rdev)
 {
u32 tmp;
-   int r;

/* Disable all tables */
WREG32(VM_CONTEXT0_CNTL, 0);
@@ -962,14 +961,7 @@ void evergreen_pcie_gart_disable(struct radeon_device 
*rdev)
WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
-   if (rdev->gart.table.vram.robj) {
-   r = radeon_bo_reserve(rdev->gart.table.vram.robj, false);
-   if (likely(r == 0)) {
-   radeon_bo_kunmap(rdev->gart.table.vram.robj);
-   radeon_bo_unpin(rdev->gart.table.vram.robj);
-   radeon_bo_unreserve(rdev->gart.table.vram.robj);
-   }
-   }
+   radeon_gart_table_vram_unpin(rdev);
 }

 void evergreen_pcie_gart_fini(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 8c79ca9..529aaee 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -931,7 +931,7 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev)
 {
int r;

-   if (rdev->gart.table.vram.robj == NULL) {
+   if (rdev->gart.robj == NULL) {
dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
@@ -973,8 +973,6 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev)

 void cayman_pcie_gart_disable(struct radeon_device *rdev)
 {
-   int r;
-
/* Disable all tables */
WREG32(VM_CONTEXT0_CNTL, 0);
WREG32(VM_CONTEXT1_CNTL, 0);
@@ -990,14 +988,7 @@ void cayman_pcie_gart_disable(struct radeon_device *rdev)
WREG32(VM_L2_CNTL2, 0);
WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
   L2_CACHE_BIGK_FRAGMENT_SIZE(6));
-   if (rdev->gart.table.vram.robj) {
-   r = radeon_bo_reserve(rdev->gart.table.vram.robj, false);
-   if (likely(r == 0)) {
-   radeon_bo_kunmap(rdev->gart.table.vram.robj);
-   radeon_bo_unpin(rdev->gart.table.vram.robj);
-   radeon_bo_unreserve(rdev->gart.table.vram.robj);
-   }
-   }
+   radeon_gart_table_vram_unpin(rdev);
 }

 void cayman_pcie_gart_fini(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7fcdbbb..8ad6769 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -474,7 +474,7 @@ int r100_pci_gart_init(struct radeon_device *rdev)
 {
int r;

-   if (rdev->gart.table.ram.ptr) {
+   if (rdev->gart.ptr) {
WARN(1, "R100 PCI GART already initialized\n");
return 0;
}
@@ -530,10 +530,12 @@ void r100_pci_gart_disable(struct radeon_device *rdev)

 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 {
+   u32 *gtt = rdev->gart.ptr;
+
if (i < 0 || i > rdev->gart.num_gpu_pages) {
return -EINVAL;
}
-   rdev->gart.table.ram.ptr[i] = cpu_to_le32(lower_32_bits(addr));
+   gtt[i] = cpu_to_le32(lower_32_bits(addr));
return 0;
 }

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 55a7f19..6c62d88 100644
--- a/drivers/gpu/d

[PATCH] drm/radeon: avoid bouncing connector status btw disconnected & unknown

2011-10-24 Thread j.gli...@gmail.com
From: Jerome Glisse 

Since force handling rework of d0d0a225e6ad43314c9aa7ea081f76adc5098ad4
we could end up bouncing connector status btw disconnected and unknown.
When connector status change a call to output_poll_changed happen which
in turn ask again for detect but with force set.

So set the load detect flags whenever we report the connector as
connected or unknown this avoid bouncing btw disconnected and unknown.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_connectors.c |5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c 
b/drivers/gpu/drm/radeon/radeon_connectors.c
index dec6cbe..ff6a2e0 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -764,7 +764,7 @@ radeon_vga_detect(struct drm_connector *connector, bool 
force)
if (radeon_connector->dac_load_detect && encoder) {
encoder_funcs = encoder->helper_private;
ret = encoder_funcs->detect(encoder, connector);
-   if (ret == connector_status_connected)
+   if (ret != connector_status_disconnected)
radeon_connector->detected_by_load = true;
}
}
@@ -1005,8 +1005,9 @@ radeon_dvi_detect(struct drm_connector *connector, bool 
force)
ret = encoder_funcs->detect(encoder, 
connector);
if (ret == connector_status_connected) {
radeon_connector->use_digital = 
false;
-   
radeon_connector->detected_by_load = true;
}
+   if (ret != 
connector_status_disconnected)
+   
radeon_connector->detected_by_load = true;
}
break;
}
-- 
1.7.1



[PATCH] drm/radeon: flush read cache for gtt with fence on r6xx and newer GPU V2

2011-10-26 Thread j.gli...@gmail.com
From: Jerome Glisse 

Cayman seems to be particularly sensitive to read cache returning
old data after bind/unbind to GTT. Flush read cache for GTT range
with each fences for all new hw. Should fix several rendering glitches.
Like

V2 flush whole address space

https://bugs.freedesktop.org/show_bug.cgi?id=40221
https://bugs.freedesktop.org/show_bug.cgi?id=38022
https://bugzilla.redhat.com/show_bug.cgi?id=738790

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/evergreen_blit_kms.c |4 ++--
 drivers/gpu/drm/radeon/r600.c   |   12 
 drivers/gpu/drm/radeon/r600_blit_kms.c  |4 ++--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c 
b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index dcf11bb..e9aeeed 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -613,9 +613,9 @@ int evergreen_blit_init(struct radeon_device *rdev)
rdev->r600_blit.primitives.set_default_state = set_default_state;

rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
-   rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+   rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
rdev->r600_blit.ring_size_common += 5; /* done copy */
-   rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+   rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */

rdev->r600_blit.ring_size_per_loop = 74;

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 12470b0..983808a 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2331,6 +2331,12 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
if (rdev->wb.use_event) {
u64 addr = rdev->wb.gpu_addr + R600_WB_EVENT_OFFSET +
(u64)(rdev->fence_drv.scratch_reg - 
rdev->scratch.reg_base);
+   /* flush read cache over gart */
+   radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
+   radeon_ring_write(rdev, PACKET3_TC_ACTION_ENA | 
PACKET3_VC_ACTION_ENA);
+   radeon_ring_write(rdev, 0x);
+   radeon_ring_write(rdev, 0);
+   radeon_ring_write(rdev, 10); /* poll interval */
/* EVENT_WRITE_EOP - flush caches, send int */
radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
radeon_ring_write(rdev, 
EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
@@ -2339,6 +2345,12 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
radeon_ring_write(rdev, fence->seq);
radeon_ring_write(rdev, 0);
} else {
+   /* flush read cache over gart */
+   radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
+   radeon_ring_write(rdev, PACKET3_TC_ACTION_ENA | 
PACKET3_VC_ACTION_ENA);
+   radeon_ring_write(rdev, 0x);
+   radeon_ring_write(rdev, 0);
+   radeon_ring_write(rdev, 10); /* poll interval */
radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
radeon_ring_write(rdev, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT) | 
EVENT_INDEX(0));
/* wait for 3D idle clean */
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c 
b/drivers/gpu/drm/radeon/r600_blit_kms.c
index c4cf130..36e62f2 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -500,9 +500,9 @@ int r600_blit_init(struct radeon_device *rdev)
rdev->r600_blit.primitives.set_default_state = set_default_state;

rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
-   rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+   rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
rdev->r600_blit.ring_size_common += 5; /* done copy */
-   rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+   rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */

rdev->r600_blit.ring_size_per_loop = 76;
/* set_render_target emits 2 extra dwords on rv6xx */
-- 
1.7.1



[PATCH] drm/radeon: flush read cache for gtt with fence on r6xx and newer GPU V3

2011-10-26 Thread j.gli...@gmail.com
From: Jerome Glisse 

Cayman seems to be particularly sensitive to read cache returning
old data after bind/unbind to GTT. Flush read cache for GTT range
with each fences for all new hw. Should fix several rendering glitches.
Like

V2 flush whole address space
V3 also flush shader read cache

https://bugs.freedesktop.org/show_bug.cgi?id=40221
https://bugs.freedesktop.org/show_bug.cgi?id=38022
https://bugzilla.redhat.com/show_bug.cgi?id=738790

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/evergreen_blit_kms.c |4 ++--
 drivers/gpu/drm/radeon/r600.c   |   16 
 drivers/gpu/drm/radeon/r600_blit_kms.c  |4 ++--
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c 
b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index dcf11bb..e9aeeed 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -613,9 +613,9 @@ int evergreen_blit_init(struct radeon_device *rdev)
rdev->r600_blit.primitives.set_default_state = set_default_state;

rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
-   rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+   rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
rdev->r600_blit.ring_size_common += 5; /* done copy */
-   rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+   rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */

rdev->r600_blit.ring_size_per_loop = 74;

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 12470b0..1f007ad 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2331,6 +2331,14 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
if (rdev->wb.use_event) {
u64 addr = rdev->wb.gpu_addr + R600_WB_EVENT_OFFSET +
(u64)(rdev->fence_drv.scratch_reg - 
rdev->scratch.reg_base);
+   /* flush read cache over gart */
+   radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
+   radeon_ring_write(rdev, PACKET3_TC_ACTION_ENA |
+   PACKET3_VC_ACTION_ENA |
+   PACKET3_SH_ACTION_ENA);
+   radeon_ring_write(rdev, 0x);
+   radeon_ring_write(rdev, 0);
+   radeon_ring_write(rdev, 10); /* poll interval */
/* EVENT_WRITE_EOP - flush caches, send int */
radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
radeon_ring_write(rdev, 
EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
@@ -2339,6 +2347,14 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
radeon_ring_write(rdev, fence->seq);
radeon_ring_write(rdev, 0);
} else {
+   /* flush read cache over gart */
+   radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
+   radeon_ring_write(rdev, PACKET3_TC_ACTION_ENA |
+   PACKET3_VC_ACTION_ENA |
+   PACKET3_SH_ACTION_ENA);
+   radeon_ring_write(rdev, 0x);
+   radeon_ring_write(rdev, 0);
+   radeon_ring_write(rdev, 10); /* poll interval */
radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
radeon_ring_write(rdev, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT) | 
EVENT_INDEX(0));
/* wait for 3D idle clean */
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c 
b/drivers/gpu/drm/radeon/r600_blit_kms.c
index c4cf130..36e62f2 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -500,9 +500,9 @@ int r600_blit_init(struct radeon_device *rdev)
rdev->r600_blit.primitives.set_default_state = set_default_state;

rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
-   rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+   rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
rdev->r600_blit.ring_size_common += 5; /* done copy */
-   rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+   rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */

rdev->r600_blit.ring_size_per_loop = 76;
/* set_render_target emits 2 extra dwords on rv6xx */
-- 
1.7.1



[PATCH] drm/radeon: set hpd polarity at init time so hotplug detect works

2011-10-28 Thread j.gli...@gmail.com
From: Jerome Glisse 

Polarity needs to be set accordingly to connector status (connected
or disconnected). Set it up at module init so first hotplug works
reliably no matter what is the initial set of connector.

Signed-off-by: Jerome Glisse 
cc: stable at kernel.org
---
 drivers/gpu/drm/radeon/radeon_connectors.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c 
b/drivers/gpu/drm/radeon/radeon_connectors.c
index dec6cbe..bfdd48b 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1789,6 +1789,7 @@ radeon_add_atom_connector(struct drm_device *dev,
connector->polled = DRM_CONNECTOR_POLL_CONNECT;
} else
connector->polled = DRM_CONNECTOR_POLL_HPD;
+   radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);

connector->display_info.subpixel_order = subpixel_order;
drm_sysfs_connector_add(connector);
-- 
1.7.6.4



[RFC] ttm merge move/move_notify/invalidate_caches callback

2012-04-05 Thread j.gli...@gmail.com
This was discussed previously :
http://lists.freedesktop.org/archives/dri-devel/2012-January/018463.html

The plat de resistance is in patch4, the first 3 are simplification that
i believe can go as is.

Idea is that driver needs to know when ever a bo move happen and instead
of having 3 set of callback to allow driver to do things btw common ttm
move handling. Move the ttm move handling into each driver and let driver
customize it at appropriate point.

I intentionaly didn't not fully take advantage of that in neither radeon
or nouveau. ie i tried to keep the current code as much as possible so
one can see that i don't disrupt old behavior. There is opportunity to
simplify the move_notify driver function i believe.

I tested it on radeon and nouveau. This time i limited the 2G vram nvidia
gpu i have to behave like a 64M vram or 96M vram so i actually see the
vram eviction path being exercice while testing (this is why in my last
ttm patch set i missed the nouveau regression, again sorry about that).
So i am more confident that with this one i didn't regressed anything.

Thomas i tried to be carefull with vmwgfx but i didn't tested it. Only
built tested.

Cheers,
Jerome



[PATCH 1/4] drm/ttm: simplify ttm_bo_move_ttm

2012-04-05 Thread j.gli...@gmail.com
From: Jerome Glisse 

ttm_bo_move_ttm have no use to know if bo is evicted or to know
if it shouldn't wait (as the function never wait anyway)

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |4 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c  |4 ++--
 drivers/gpu/drm/ttm/ttm_bo.c |2 +-
 drivers/gpu/drm/ttm/ttm_bo_util.c|3 +--
 include/drm/ttm/ttm_bo_driver.h  |6 +-
 5 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index ec54364..edf745c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -767,7 +767,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool 
evict, bool intr,
if (ret)
goto out;

-   ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
+   ret = ttm_bo_move_ttm(bo, new_mem);
 out:
ttm_bo_mem_put(bo, &tmp_mem);
return ret;
@@ -793,7 +793,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool 
evict, bool intr,
if (ret)
return ret;

-   ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
+   ret = ttm_bo_move_ttm(bo, &tmp_mem);
if (ret)
goto out;

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index f493c64..4bd519d 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -343,7 +343,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object 
*bo,
if (unlikely(r)) {
goto out_cleanup;
}
-   r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
+   r = ttm_bo_move_ttm(bo, new_mem);
 out_cleanup:
ttm_bo_mem_put(bo, &tmp_mem);
return r;
@@ -375,7 +375,7 @@ static int radeon_move_ram_vram(struct ttm_buffer_object 
*bo,
if (unlikely(r)) {
return r;
}
-   r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
+   r = ttm_bo_move_ttm(bo, &tmp_mem);
if (unlikely(r)) {
goto out_cleanup;
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1f5c67c..97b6ffc 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -409,7 +409,7 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,

if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
-   ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, 
mem);
+   ret = ttm_bo_move_ttm(bo, mem);
else if (bdev->driver->move)
ret = bdev->driver->move(bo, evict, interruptible,
 no_wait_reserve, no_wait_gpu, mem);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index f8187ea..ccfc30d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -43,8 +43,7 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
 }

 int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-   bool evict, bool no_wait_reserve,
-   bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+   struct ttm_mem_reg *new_mem)
 {
struct ttm_tt *ttm = bo->ttm;
struct ttm_mem_reg *old_mem = &bo->mem;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d43e892..69cd447 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -918,9 +918,6 @@ extern int ttm_bo_wait_unreserved(struct ttm_buffer_object 
*bo,
  * ttm_bo_move_ttm
  *
  * @bo: A pointer to a struct ttm_buffer_object.
- * @evict: 1: This is an eviction. Don't try to pipeline.
- * @no_wait_reserve: Return immediately if other buffers are busy.
- * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
  * Optimized move function for a buffer object with both old and
@@ -934,8 +931,7 @@ extern int ttm_bo_wait_unreserved(struct ttm_buffer_object 
*bo,
  */

 extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-  bool evict, bool no_wait_reserve,
-  bool no_wait_gpu, struct ttm_mem_reg *new_mem);
+  struct ttm_mem_reg *new_mem);

 /**
  * ttm_bo_move_memcpy
-- 
1.7.7.6



[PATCH 2/4] drm/ttm: simplify ttm_bo_move_accel_cleanup remove useless arg

2012-04-05 Thread j.gli...@gmail.com
From: Jerome Glisse 

ttm_bo_move_accel_cleanup have no use to know if it can wait on
reserve or wait on gpu as there is no such wait in this helper.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |3 +--
 drivers/gpu/drm/radeon/radeon_ttm.c  |3 +--
 drivers/gpu/drm/ttm/ttm_bo_util.c|3 +--
 include/drm/ttm/ttm_bo_driver.h  |5 +
 4 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index edf745c..6343862 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -477,8 +477,7 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
if (ret)
return ret;

-   ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, NULL, evict,
-   no_wait_reserve, no_wait_gpu, new_mem);
+   ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, NULL, evict, new_mem);
nouveau_fence_unref(&fence);
return ret;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 4bd519d..f261537 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -296,8 +296,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
new_mem->num_pages * (PAGE_SIZE / 
RADEON_GPU_PAGE_SIZE), /* GPU pages */
fence);
/* FIXME: handle copy error */
-   r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
- evict, no_wait_reserve, no_wait_gpu, 
new_mem);
+   r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, new_mem);
radeon_fence_unref(&fence);
return r;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index ccfc30d..1013725 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -611,8 +611,7 @@ EXPORT_SYMBOL(ttm_bo_kunmap);
 int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
  void *sync_obj,
  void *sync_obj_arg,
- bool evict, bool no_wait_reserve,
- bool no_wait_gpu,
+ bool evict,
  struct ttm_mem_reg *new_mem)
 {
struct ttm_bo_device *bdev = bo->bdev;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 69cd447..0b12df2 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -973,8 +973,6 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object 
*bo);
  * @sync_obj_arg: An argument to pass to the sync object idle / wait
  * functions.
  * @evict: This is an evict move. Don't return until the buffer is idle.
- * @no_wait_reserve: Return immediately if other buffers are busy.
- * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
  * Accelerated move function to be called when an accelerated move
@@ -988,8 +986,7 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object 
*bo);
 extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 void *sync_obj,
 void *sync_obj_arg,
-bool evict, bool no_wait_reserve,
-bool no_wait_gpu,
+bool evict,
 struct ttm_mem_reg *new_mem);
 /**
  * ttm_io_prot
-- 
1.7.7.6



[PATCH 3/4] drm/ttm: simplify ttm_bo_move_memcpy remove useless arg

2012-04-05 Thread j.gli...@gmail.com
From: Jerome Glisse 

ttm_bo_move_memcpy have no use to know if it can wait on
reserve or wait on gpu as there is no such wait in this helper.
Doesn't care either to know if it's an eviction, memcpy
is not a GPU pipelineable case.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |4 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c  |2 +-
 drivers/gpu/drm/ttm/ttm_bo.c |2 +-
 drivers/gpu/drm/ttm/ttm_bo_util.c|1 -
 include/drm/ttm/ttm_bo_driver.h  |6 +-
 5 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 6343862..6f18c3b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -891,7 +891,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, 
bool intr,

/* Software copy if the card isn't up and running yet. */
if (!dev_priv->channel) {
-   ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, 
no_wait_gpu, new_mem);
+   ret = ttm_bo_move_memcpy(bo, new_mem);
goto out;
}

@@ -907,7 +907,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, 
bool intr,
goto out;

/* Fallback to software copy. */
-   ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, 
new_mem);
+   ret = ttm_bo_move_memcpy(bo, new_mem);

 out:
if (dev_priv->card_type < NV_50) {
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index f261537..65b4d2f 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -429,7 +429,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,

if (r) {
 memcpy:
-   r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, 
new_mem);
+   r = ttm_bo_move_memcpy(bo, new_mem);
}
return r;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 97b6ffc..f2aa2e2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -414,7 +414,7 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
ret = bdev->driver->move(bo, evict, interruptible,
 no_wait_reserve, no_wait_gpu, mem);
else
-   ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, 
no_wait_gpu, mem);
+   ret = ttm_bo_move_memcpy(bo, mem);

if (ret) {
if (bdev->driver->move_notify) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 1013725..04e62e9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -313,7 +313,6 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void 
*dst,
 }

 int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-  bool evict, bool no_wait_reserve, bool no_wait_gpu,
   struct ttm_mem_reg *new_mem)
 {
struct ttm_bo_device *bdev = bo->bdev;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 0b12df2..47f6f9d 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -937,9 +937,6 @@ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
  * ttm_bo_move_memcpy
  *
  * @bo: A pointer to a struct ttm_buffer_object.
- * @evict: 1: This is an eviction. Don't try to pipeline.
- * @no_wait_reserve: Return immediately if other buffers are busy.
- * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
  * Fallback move function for a mappable buffer object in mappable memory.
@@ -953,8 +950,7 @@ extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
  */

 extern int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
- bool evict, bool no_wait_reserve,
- bool no_wait_gpu, struct ttm_mem_reg *new_mem);
+ struct ttm_mem_reg *new_mem);

 /**
  * ttm_bo_free_old_node
-- 
1.7.7.6



[PATCH 4/4] drm/ttm: move bo move logic inside driver to avoid callback

2012-04-05 Thread j.gli...@gmail.com
From: Jerome Glisse 

Move buffer object move logic inside driver callback so we don't
have complex move_notify and cache_invalidate callback in error
path. This simplify driver at the expense of some code duplication
among drivers.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c   |  318 
 drivers/gpu/drm/radeon/radeon_ttm.c|  277 +---
 drivers/gpu/drm/ttm/ttm_bo.c   |   87 ++
 drivers/gpu/drm/ttm/ttm_tt.c   |2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |   67 +++-
 include/drm/ttm/ttm_bo_driver.h|   37 ++--
 6 files changed, 472 insertions(+), 316 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 6f18c3b..2ddeb0f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -39,6 +39,18 @@
 #include 
 #include 

+/* gcc should kill that code */
+#if 1
+#define ASSERT(expr)   \
+   if (!(expr)) {  \
+   printk("radeon: assertion failed! %s[%d]: %s\n",\
+   __func__, __LINE__, #expr); \
+   panic("radeon: %s", __func__);  \
+   }
+#else
+#define ASSERT(expr) do {} while (0)
+#endif
+
 static void
 nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 {
@@ -376,13 +388,6 @@ nouveau_ttm_tt_create(struct ttm_bo_device *bdev,
 }

 static int
-nouveau_bo_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
-{
-   /* We'll do this from user space. */
-   return 0;
-}
-
-static int
 nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 struct ttm_mem_type_manager *man)
 {
@@ -467,7 +472,6 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct 
ttm_placement *pl)
 static int
 nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
  struct nouveau_bo *nvbo, bool evict,
- bool no_wait_reserve, bool no_wait_gpu,
  struct ttm_mem_reg *new_mem)
 {
struct nouveau_fence *fence = NULL;
@@ -687,8 +691,7 @@ nouveau_vma_getmap(struct nouveau_channel *chan, struct 
nouveau_bo *nvbo,
 }

 static int
-nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-bool no_wait_reserve, bool no_wait_gpu,
+nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
 struct ttm_mem_reg *new_mem)
 {
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
@@ -727,9 +730,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int 
evict, bool intr,
else
ret = nvc0_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
if (ret == 0) {
-   ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict,
-   no_wait_reserve,
-   no_wait_gpu, new_mem);
+   ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict, new_mem);
}

 out:
@@ -738,73 +739,6 @@ out:
return ret;
 }

-static int
-nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait_reserve, bool no_wait_gpu,
- struct ttm_mem_reg *new_mem)
-{
-   u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
-   struct ttm_placement placement;
-   struct ttm_mem_reg tmp_mem;
-   int ret;
-
-   placement.fpfn = placement.lpfn = 0;
-   placement.num_placement = placement.num_busy_placement = 1;
-   placement.placement = placement.busy_placement = &placement_memtype;
-
-   tmp_mem = *new_mem;
-   tmp_mem.mm_node = NULL;
-   ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_reserve, 
no_wait_gpu);
-   if (ret)
-   return ret;
-
-   ret = ttm_tt_bind(bo->ttm, &tmp_mem);
-   if (ret)
-   goto out;
-
-   ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, 
no_wait_gpu, &tmp_mem);
-   if (ret)
-   goto out;
-
-   ret = ttm_bo_move_ttm(bo, new_mem);
-out:
-   ttm_bo_mem_put(bo, &tmp_mem);
-   return ret;
-}
-
-static int
-nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait_reserve, bool no_wait_gpu,
- struct ttm_mem_reg *new_mem)
-{
-   u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
-   struct ttm_placement placement;
-   struct ttm_mem_reg tmp_mem;
-   int ret;
-
-   placement.fpfn = placement.lpfn = 0;
-   placement.num_placement = placement.num_busy_placement = 1;
-   placement.placement = placement.busy_placement = &placement_memtype;
-
-   tmp_mem = *new_mem;
-   tmp_mem.mm_node = NULL;
- 

[PATCH] radeon: fix r600/agp when vram is after AGP (v3)

2012-04-17 Thread j.gli...@gmail.com
From: Jerome Glisse 

If AGP is placed in the middle, the size_af is off-by-one, it results
in VRAM being placed at 0x7fff instead of 0x800.

v2: fix the vram_start setup.
v3: also fix r7xx & newer ASIC

Reported-by: russiane39 on #radeon

Signed-off-by: Dave Airlie 
Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/r600.c  |4 ++--
 drivers/gpu/drm/radeon/rv770.c |4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 391bd26..96e3fa3 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1135,7 +1135,7 @@ static void r600_vram_gtt_location(struct radeon_device 
*rdev, struct radeon_mc
}
if (rdev->flags & RADEON_IS_AGP) {
size_bf = mc->gtt_start;
-   size_af = 0x - mc->gtt_end + 1;
+   size_af = 0x - mc->gtt_end;
if (size_bf > size_af) {
if (mc->mc_vram_size > size_bf) {
dev_warn(rdev->dev, "limiting VRAM\n");
@@ -1149,7 +1149,7 @@ static void r600_vram_gtt_location(struct radeon_device 
*rdev, struct radeon_mc
mc->real_vram_size = size_af;
mc->mc_vram_size = size_af;
}
-   mc->vram_start = mc->gtt_end;
+   mc->vram_start = mc->gtt_end + 1;
}
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
dev_info(rdev->dev, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM 
used)\n",
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index c62ae4b..cdab1ae 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -969,7 +969,7 @@ void r700_vram_gtt_location(struct radeon_device *rdev, 
struct radeon_mc *mc)
}
if (rdev->flags & RADEON_IS_AGP) {
size_bf = mc->gtt_start;
-   size_af = 0x - mc->gtt_end + 1;
+   size_af = 0x - mc->gtt_end;
if (size_bf > size_af) {
if (mc->mc_vram_size > size_bf) {
dev_warn(rdev->dev, "limiting VRAM\n");
@@ -983,7 +983,7 @@ void r700_vram_gtt_location(struct radeon_device *rdev, 
struct radeon_mc *mc)
mc->real_vram_size = size_af;
mc->mc_vram_size = size_af;
}
-   mc->vram_start = mc->gtt_end;
+   mc->vram_start = mc->gtt_end + 1;
}
mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
dev_info(rdev->dev, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM 
used)\n",
-- 
1.7.9.3



Reworking of GPU reset logic + dumping

2012-04-25 Thread j.gli...@gmail.com
Patches also available at:
http://people.freedesktop.org/~glisse/debug/

So it's the Christian series minus all the debugfs related to
ring/ib/mc. The last patch add a new blob dumping facilities
that dump everythings (pm4, relocs table, bo content). It's
just a proof of concept to show what i meant because code
speaks more clearly on this kind of topic.

The blob format we dump could be different i want with a
simple binary dword format:
type, id, size, [data (present if size > 0)]

Note that the benefit (simpler code, less code) of dumping
current debugfs seems to me greater than their usefullness.

Cheers,
Jerome



[PATCH 01/24] drm/radeon: remove fence/ring/ib debugfs files

2012-04-25 Thread j.gli...@gmail.com
From: Jerome Glisse 

Those file never were really helpfull in debuging.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/r100.c |  186 -
 drivers/gpu/drm/radeon/r300.c |   50 -
 drivers/gpu/drm/radeon/r420.c |   45 
 drivers/gpu/drm/radeon/r520.c |1 -
 drivers/gpu/drm/radeon/r600.c |   35 --
 drivers/gpu/drm/radeon/radeon_asic.h  |5 -
 drivers/gpu/drm/radeon/radeon_fence.c |   47 
 drivers/gpu/drm/radeon/radeon_ring.c  |  107 ---
 drivers/gpu/drm/radeon/rs400.c|   88 
 drivers/gpu/drm/radeon/rs600.c|7 --
 drivers/gpu/drm/radeon/rs690.c|1 -
 drivers/gpu/drm/radeon/rv515.c|   77 --
 12 files changed, 0 insertions(+), 649 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index fe33d35..9e69a95 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1094,9 +1094,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned 
ring_size)
uint32_t tmp;
int r;

-   if (r100_debugfs_cp_init(rdev)) {
-   DRM_ERROR("Failed to register debugfs file for CP !\n");
-   }
if (!rdev->me_fw) {
r = r100_cp_init_microcode(rdev);
if (r) {
@@ -2604,178 +2601,6 @@ void r100_set_safe_registers(struct radeon_device *rdev)
}
 }

-/*
- * Debugfs info
- */
-#if defined(CONFIG_DEBUG_FS)
-static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
-{
-   struct drm_info_node *node = (struct drm_info_node *) m->private;
-   struct drm_device *dev = node->minor->dev;
-   struct radeon_device *rdev = dev->dev_private;
-   uint32_t reg, value;
-   unsigned i;
-
-   seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
-   seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
-   seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
-   for (i = 0; i < 64; i++) {
-   WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
-   reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
-   WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
-   value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
-   seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
-   }
-   return 0;
-}
-
-static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
-{
-   struct drm_info_node *node = (struct drm_info_node *) m->private;
-   struct drm_device *dev = node->minor->dev;
-   struct radeon_device *rdev = dev->dev_private;
-   struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-   uint32_t rdp, wdp;
-   unsigned count, i, j;
-
-   radeon_ring_free_size(rdev, ring);
-   rdp = RREG32(RADEON_CP_RB_RPTR);
-   wdp = RREG32(RADEON_CP_RB_WPTR);
-   count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
-   seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
-   seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
-   seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
-   seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
-   seq_printf(m, "%u dwords in ring\n", count);
-   for (j = 0; j <= count; j++) {
-   i = (rdp + j) & ring->ptr_mask;
-   seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
-   }
-   return 0;
-}
-
-
-static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
-{
-   struct drm_info_node *node = (struct drm_info_node *) m->private;
-   struct drm_device *dev = node->minor->dev;
-   struct radeon_device *rdev = dev->dev_private;
-   uint32_t csq_stat, csq2_stat, tmp;
-   unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
-   unsigned i;
-
-   seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
-   seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
-   csq_stat = RREG32(RADEON_CP_CSQ_STAT);
-   csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
-   r_rptr = (csq_stat >> 0) & 0x3ff;
-   r_wptr = (csq_stat >> 10) & 0x3ff;
-   ib1_rptr = (csq_stat >> 20) & 0x3ff;
-   ib1_wptr = (csq2_stat >> 0) & 0x3ff;
-   ib2_rptr = (csq2_stat >> 10) & 0x3ff;
-   ib2_wptr = (csq2_stat >> 20) & 0x3ff;
-   seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
-   seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
-   seq_printf(m, "Ring rptr %u\n", r_rptr);
-   seq_printf(m, "Ring wptr %u\n", r_wptr);
-   seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
-   seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
-   seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
-   seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
-   /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
-* 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
-   seq_printf(m, "Ring fifo:\n");
-   for 

[PATCH 02/24] drm/radeon: make radeon_gpu_is_lockup a per ring function

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Different rings have different criteria to test
if they are stuck.

v2: rebased on current drm-next

Signed-off-by: Christian K?nig 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/radeon/radeon.h   |4 +-
 drivers/gpu/drm/radeon/radeon_asic.c  |   44 ++--
 drivers/gpu/drm/radeon/radeon_fence.c |2 +-
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 138b952..bea99e3 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1144,7 +1144,6 @@ struct radeon_asic {
int (*resume)(struct radeon_device *rdev);
int (*suspend)(struct radeon_device *rdev);
void (*vga_set_state)(struct radeon_device *rdev, bool state);
-   bool (*gpu_is_lockup)(struct radeon_device *rdev, struct radeon_ring 
*cp);
int (*asic_reset)(struct radeon_device *rdev);
/* ioctl hw specific callback. Some hw might want to perform special
 * operation on specific ioctl. For instance on wait idle some hw
@@ -1173,6 +1172,7 @@ struct radeon_asic {
void (*ring_start)(struct radeon_device *rdev, struct 
radeon_ring *cp);
int (*ring_test)(struct radeon_device *rdev, struct radeon_ring 
*cp);
int (*ib_test)(struct radeon_device *rdev, struct radeon_ring 
*cp);
+   bool (*is_lockup)(struct radeon_device *rdev, struct 
radeon_ring *cp);
} ring[RADEON_NUM_RINGS];
/* irqs */
struct {
@@ -1730,7 +1730,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t 
v);
 #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
 #define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)].cs_parse((p))
 #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), 
(state))
-#define radeon_gpu_is_lockup(rdev, cp) (rdev)->asic->gpu_is_lockup((rdev), 
(cp))
 #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
 #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
 #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), 
(i), (p))
@@ -1739,6 +1738,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t 
v);
 #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), 
(cp))
 #define radeon_ring_ib_execute(rdev, r, ib) 
(rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
 #define radeon_ring_ib_parse(rdev, r, ib) 
(rdev)->asic->ring[(r)].ib_parse((rdev), (ib))
+#define radeon_ring_is_lockup(rdev, r, cp) 
(rdev)->asic->ring[(r)].is_lockup((rdev), (cp))
 #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev))
 #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) 
(rdev)->asic->display.get_vblank_counter((rdev), (crtc))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c 
b/drivers/gpu/drm/radeon/radeon_asic.c
index be4dc2f..958b9ea 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -134,7 +134,6 @@ static struct radeon_asic r100_asic = {
.suspend = &r100_suspend,
.resume = &r100_resume,
.vga_set_state = &r100_vga_set_state,
-   .gpu_is_lockup = &r100_gpu_is_lockup,
.asic_reset = &r100_asic_reset,
.ioctl_wait_idle = NULL,
.gui_idle = &r100_gui_idle,
@@ -152,6 +151,7 @@ static struct radeon_asic r100_asic = {
.ring_start = &r100_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
@@ -208,7 +208,6 @@ static struct radeon_asic r200_asic = {
.suspend = &r100_suspend,
.resume = &r100_resume,
.vga_set_state = &r100_vga_set_state,
-   .gpu_is_lockup = &r100_gpu_is_lockup,
.asic_reset = &r100_asic_reset,
.ioctl_wait_idle = NULL,
.gui_idle = &r100_gui_idle,
@@ -226,6 +225,7 @@ static struct radeon_asic r200_asic = {
.ring_start = &r100_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
@@ -282,7 +282,6 @@ static struct radeon_asic r300_asic = {
.suspend = &r300_suspend,
.resume = &r300_resume,
.vga_set_state = &r100_vga_set_state,
-   .gpu_is_lockup = &r300_gpu_is_lockup,
.asic_reset = &r300_asic_reset,
.ioctl_wait_idle = NULL,
.gui_idle = &r100_gui_idle,
@@ -300,6 +299,7 @@ static struct radeon_asic r300_asic = {
.ring_start = &r300_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
+   .is_lockup = &r300_gpu_is_lockup,

[PATCH 03/24] drm/radeon: replace gpu_lockup with ring->ready flag

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

It makes no sense at all to have more than one flag.

Signed-off-by: Christian K?nig 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/radeon/r100.c  |1 -
 drivers/gpu/drm/radeon/r300.c  |1 -
 drivers/gpu/drm/radeon/radeon.h|1 -
 drivers/gpu/drm/radeon/radeon_device.c |1 -
 drivers/gpu/drm/radeon/radeon_fence.c  |   36 +++
 drivers/gpu/drm/radeon/rs600.c |1 -
 6 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 9e69a95..01e597ad 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2293,7 +2293,6 @@ int r100_asic_reset(struct radeon_device *rdev)
if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
dev_err(rdev->dev, "failed to reset GPU\n");
-   rdev->gpu_lockup = true;
ret = -1;
} else
dev_info(rdev->dev, "GPU reset succeed\n");
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index c5237ab..40baaec 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -444,7 +444,6 @@ int r300_asic_reset(struct radeon_device *rdev)
/* Check if GPU is idle */
if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) {
dev_err(rdev->dev, "failed to reset GPU\n");
-   rdev->gpu_lockup = true;
ret = -1;
} else
dev_info(rdev->dev, "GPU reset succeed\n");
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index bea99e3..365334b 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1529,7 +1529,6 @@ struct radeon_device {
struct radeon_mutex cs_mutex;
struct radeon_wbwb;
struct radeon_dummy_pagedummy_page;
-   boolgpu_lockup;
boolshutdown;
boolsuspend;
boolneed_dma32;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index ea7df16..eb63a06 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -714,7 +714,6 @@ int radeon_device_init(struct radeon_device *rdev,
rdev->is_atom_bios = false;
rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT;
rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
-   rdev->gpu_lockup = false;
rdev->accel_working = false;

DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 
0x%04X:0x%04X).\n",
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index 79ce829..c0ec19d 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -71,14 +71,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct 
radeon_fence *fence)
return 0;
}
fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq);
-   if (!rdev->ring[fence->ring].ready)
-   /* FIXME: cp is not running assume everythings is done right
-* away
-*/
-   radeon_fence_write(rdev, fence->seq, fence->ring);
-   else
-   radeon_fence_ring_emit(rdev, fence->ring, fence);
-
+   radeon_fence_ring_emit(rdev, fence->ring, fence);
trace_radeon_fence_emit(rdev->ddev, fence->seq);
fence->emitted = true;
list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
@@ -191,9 +184,6 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
if (!fence)
return true;

-   if (fence->rdev->gpu_lockup)
-   return true;
-
write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
signaled = fence->signaled;
/* if we are shuting down report all fence as signaled */
@@ -260,18 +250,16 @@ retry:
 */
if (seq == rdev->fence_drv[fence->ring].last_seq &&
radeon_ring_is_lockup(rdev, fence->ring, 
&rdev->ring[fence->ring])) {
+
/* good news we believe it's a lockup */
printk(KERN_WARNING "GPU lockup (waiting for 0x%08X 
last fence id 0x%08X)\n",
 fence->seq, seq);
-   /* FIXME: what should we do ? marking everyone
-* as signaled for now
-*/
-   rdev->gpu_lockup = true;
+
+   /* mark the ring as not ready any more */
+   rdev->ring[fence->ring].ready = false;
r = radeon_gpu_reset(rdev);
if (r)
  

[PATCH 04/24] drm/radeon: use central function for IB testing

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Removing all the different error messages and
having just one standard behaviour over all
chipset generations.

Signed-off-by: Christian K?nig 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/radeon/evergreen.c   |7 ++-
 drivers/gpu/drm/radeon/ni.c  |7 ++-
 drivers/gpu/drm/radeon/r100.c|7 ++-
 drivers/gpu/drm/radeon/r300.c|7 ++-
 drivers/gpu/drm/radeon/r420.c|7 ++-
 drivers/gpu/drm/radeon/r520.c|8 +++-
 drivers/gpu/drm/radeon/r600.c|7 ++-
 drivers/gpu/drm/radeon/radeon.h  |1 +
 drivers/gpu/drm/radeon/radeon_ring.c |   30 ++
 drivers/gpu/drm/radeon/rs400.c   |7 ++-
 drivers/gpu/drm/radeon/rs600.c   |7 ++-
 drivers/gpu/drm/radeon/rs690.c   |7 ++-
 drivers/gpu/drm/radeon/rv515.c   |8 +++-
 drivers/gpu/drm/radeon/rv770.c   |7 ++-
 14 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index cfa372c..ca47f52 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3248,12 +3248,9 @@ static int evergreen_startup(struct radeon_device *rdev)
if (r)
return r;

-   r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, 
&rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-   if (r) {
-   DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-   rdev->accel_working = false;
+   r = radeon_ib_ring_tests(rdev);
+   if (r)
return r;
-   }

r = r600_audio_init(rdev);
if (r) {
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index a48ca53..0146428 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1601,12 +1601,9 @@ static int cayman_startup(struct radeon_device *rdev)
if (r)
return r;

-   r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, 
&rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-   if (r) {
-   DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-   rdev->accel_working = false;
+   r = radeon_ib_ring_tests(rdev);
+   if (r)
return r;
-   }

r = radeon_vm_manager_start(rdev);
if (r)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 01e597ad..be51f7b 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3780,12 +3780,9 @@ static int r100_startup(struct radeon_device *rdev)
if (r)
return r;

-   r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, 
&rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-   if (r) {
-   dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-   rdev->accel_working = false;
+   r = radeon_ib_ring_tests(rdev);
+   if (r)
return r;
-   }

return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 40baaec..ae15c10 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -1367,12 +1367,9 @@ static int r300_startup(struct radeon_device *rdev)
if (r)
return r;

-   r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, 
&rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-   if (r) {
-   dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-   rdev->accel_working = false;
+   r = radeon_ib_ring_tests(rdev);
+   if (r)
return r;
-   }

return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 2eba44c..744348b 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -269,12 +269,9 @@ static int r420_startup(struct radeon_device *rdev)
if (r)
return r;

-   r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, 
&rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-   if (r) {
-   dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-   rdev->accel_working = false;
+   r = radeon_ib_ring_tests(rdev);
+   if (r)
return r;
-   }

return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index 2e5ff86..2ef0c34 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -207,12 +207,10 @@ static int r520_startup(struct radeon_device *rdev)
if (r)
return r;

-   r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, 
&rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-   if (r) {
-   dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-   rdev->accel_working = false;
+   r = radeon_ib_ring_tests(rdev);
+   if (r)
return r;
-   }
+
return 0;
 }

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/

[PATCH 05/24] drm/radeon: rework gpu lockup detection and processing

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Previusly multiple rings could trigger multiple GPU
resets at the same time.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h   |3 +-
 drivers/gpu/drm/radeon/radeon_fence.c |  150 +
 2 files changed, 77 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8801657..85a3aa9 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -255,8 +255,7 @@ struct radeon_fence_driver {
volatile uint32_t   *cpu_addr;
atomic_tseq;
uint32_tlast_seq;
-   unsigned long   last_jiffies;
-   unsigned long   last_timeout;
+   unsigned long   last_activity;
wait_queue_head_t   queue;
struct list_headcreated;
struct list_heademitted;
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index c0ec19d..5d2ca1d 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -74,6 +74,10 @@ int radeon_fence_emit(struct radeon_device *rdev, struct 
radeon_fence *fence)
radeon_fence_ring_emit(rdev, fence->ring, fence);
trace_radeon_fence_emit(rdev->ddev, fence->seq);
fence->emitted = true;
+   /* are we the first fence on a previusly idle ring? */
+   if (list_empty(&rdev->fence_drv[fence->ring].emitted)) {
+   rdev->fence_drv[fence->ring].last_activity = jiffies;
+   }
list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
return 0;
@@ -85,34 +89,14 @@ static bool radeon_fence_poll_locked(struct radeon_device 
*rdev, int ring)
struct list_head *i, *n;
uint32_t seq;
bool wake = false;
-   unsigned long cjiffies;

seq = radeon_fence_read(rdev, ring);
-   if (seq != rdev->fence_drv[ring].last_seq) {
-   rdev->fence_drv[ring].last_seq = seq;
-   rdev->fence_drv[ring].last_jiffies = jiffies;
-   rdev->fence_drv[ring].last_timeout = 
RADEON_FENCE_JIFFIES_TIMEOUT;
-   } else {
-   cjiffies = jiffies;
-   if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) {
-   cjiffies -= rdev->fence_drv[ring].last_jiffies;
-   if (time_after(rdev->fence_drv[ring].last_timeout, 
cjiffies)) {
-   /* update the timeout */
-   rdev->fence_drv[ring].last_timeout -= cjiffies;
-   } else {
-   /* the 500ms timeout is elapsed we should test
-* for GPU lockup
-*/
-   rdev->fence_drv[ring].last_timeout = 1;
-   }
-   } else {
-   /* wrap around update last jiffies, we will just wait
-* a little longer
-*/
-   rdev->fence_drv[ring].last_jiffies = cjiffies;
-   }
+   if (seq == rdev->fence_drv[ring].last_seq)
return false;
-   }
+
+   rdev->fence_drv[ring].last_seq = seq;
+   rdev->fence_drv[ring].last_activity = jiffies;
+
n = NULL;
list_for_each(i, &rdev->fence_drv[ring].emitted) {
fence = list_entry(i, struct radeon_fence, list);
@@ -207,66 +191,84 @@ int radeon_fence_wait(struct radeon_fence *fence, bool 
intr)
struct radeon_device *rdev;
unsigned long irq_flags, timeout;
u32 seq;
-   int r;
+   int i, r;
+   bool signaled;

if (fence == NULL) {
WARN(1, "Querying an invalid fence : %p !\n", fence);
-   return 0;
+   return -EINVAL;
}
+
rdev = fence->rdev;
-   if (radeon_fence_signaled(fence)) {
-   return 0;
-   }
-   timeout = rdev->fence_drv[fence->ring].last_timeout;
-retry:
-   /* save current sequence used to check for GPU lockup */
-   seq = rdev->fence_drv[fence->ring].last_seq;
-   trace_radeon_fence_wait_begin(rdev->ddev, seq);
-   if (intr) {
+   signaled = radeon_fence_signaled(fence);
+   while (!signaled) {
+   read_lock_irqsave(&rdev->fence_lock, irq_flags);
+   timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
+   if (time_after(rdev->fence_drv[fence->ring].last_activity, 
timeout)) {
+   /* the normal case, timeout is somewhere before 
last_activity */
+   timeout = rdev->fence_drv[fence->ring].last_activity - 
timeout;
+   } else {
+   /* either jiffies wrapped around,

[PATCH 06/24] drm/radeon: fix a critical bug in the SA code

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Aligning offset can make it bigger than tmp->offset
leading to an overrun bug in the following subtraction.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon_sa.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_sa.c 
b/drivers/gpu/drm/radeon/radeon_sa.c
index 4cce47e..8fbfe69 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -150,7 +150,7 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
offset = 0;
list_for_each_entry(tmp, &sa_manager->sa_bo, list) {
/* room before this object ? */
-   if ((tmp->offset - offset) >= size) {
+   if (offset < tmp->offset && (tmp->offset - offset) >= size) {
head = tmp->list.prev;
goto out;
}
-- 
1.7.7.6



[PATCH 07/24] drm/radeon: add proper locking to the SA v2

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Make the suballocator self containing to locking.

v2: split the bugfix into a seperate patch.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h|1 +
 drivers/gpu/drm/radeon/radeon_sa.c |   17 +++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 85a3aa9..1aefbd9 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -381,6 +381,7 @@ struct radeon_bo_list {
  * alignment).
  */
 struct radeon_sa_manager {
+   spinlock_t  lock;
struct radeon_bo*bo;
struct list_headsa_bo;
unsignedsize;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c 
b/drivers/gpu/drm/radeon/radeon_sa.c
index 8fbfe69..4ce5c51 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -37,6 +37,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 {
int r;

+   spin_lock_init(&sa_manager->lock);
sa_manager->bo = NULL;
sa_manager->size = size;
sa_manager->domain = domain;
@@ -136,18 +137,19 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
struct radeon_sa_bo *tmp;
struct list_head *head;
unsigned offset = 0, wasted = 0;
+   unsigned long flags;

BUG_ON(align > RADEON_GPU_PAGE_SIZE);
BUG_ON(size > sa_manager->size);
+   spin_lock_irqsave(&sa_manager->lock, flags);

/* no one ? */
-   head = sa_manager->sa_bo.prev;
if (list_empty(&sa_manager->sa_bo)) {
+   head = &sa_manager->sa_bo;
goto out;
}

/* look for a hole big enough */
-   offset = 0;
list_for_each_entry(tmp, &sa_manager->sa_bo, list) {
/* room before this object ? */
if (offset < tmp->offset && (tmp->offset - offset) >= size) {
@@ -157,9 +159,8 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
offset = tmp->offset + tmp->size;
wasted = offset % align;
if (wasted) {
-   wasted = align - wasted;
+   offset += align - wasted;
}
-   offset += wasted;
}
/* room at the end ? */
head = sa_manager->sa_bo.prev;
@@ -167,11 +168,11 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
offset = tmp->offset + tmp->size;
wasted = offset % align;
if (wasted) {
-   wasted = align - wasted;
+   offset += wasted = align - wasted;
}
-   offset += wasted;
if ((sa_manager->size - offset) < size) {
/* failed to find somethings big enough */
+   spin_unlock_irqrestore(&sa_manager->lock, flags);
return -ENOMEM;
}

@@ -180,10 +181,14 @@ out:
sa_bo->offset = offset;
sa_bo->size = size;
list_add(&sa_bo->list, head);
+   spin_unlock_irqrestore(&sa_manager->lock, flags);
return 0;
 }

 void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo)
 {
+   unsigned long flags;
+   spin_lock_irqsave(&sa_bo->manager->lock, flags);
list_del_init(&sa_bo->list);
+   spin_unlock_irqrestore(&sa_bo->manager->lock, flags);
 }
-- 
1.7.7.6



[PATCH 08/24] drm/radeon: add biggest hole tracking and wakequeue to the sa v4

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

With that in place clients are automatically blocking
until their memory request can be handled.

v2: block only if the memory request can't be satisfied
in the first try, the first version actually lacked
a night of sleep.

v3: make blocking optional, update comments and fix
another bug with biggest hole tracking.

v4: drop debugfs portion

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h|5 +-
 drivers/gpu/drm/radeon/radeon_gart.c   |2 +-
 drivers/gpu/drm/radeon/radeon_object.h |2 +-
 drivers/gpu/drm/radeon/radeon_ring.c   |   20 ++--
 drivers/gpu/drm/radeon/radeon_sa.c |  178 ++--
 5 files changed, 138 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 1aefbd9..415a496 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -381,17 +381,16 @@ struct radeon_bo_list {
  * alignment).
  */
 struct radeon_sa_manager {
-   spinlock_t  lock;
+   wait_queue_head_t   queue;
struct radeon_bo*bo;
struct list_headsa_bo;
unsignedsize;
+   struct list_head*biggest_hole;
uint64_tgpu_addr;
void*cpu_ptr;
uint32_tdomain;
 };

-struct radeon_sa_bo;
-
 /* sub-allocation buffer */
 struct radeon_sa_bo {
struct list_headlist;
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index c58a036..7af4ff9 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -395,7 +395,7 @@ int radeon_vm_bind(struct radeon_device *rdev, struct 
radeon_vm *vm)
 retry:
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo,
 RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8),
-RADEON_GPU_PAGE_SIZE);
+RADEON_GPU_PAGE_SIZE, false);
if (r) {
if (list_empty(&rdev->vm_manager.lru_vm)) {
return r;
diff --git a/drivers/gpu/drm/radeon/radeon_object.h 
b/drivers/gpu/drm/radeon/radeon_object.h
index f9104be..a181c2f 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -158,7 +158,7 @@ extern int radeon_sa_bo_manager_suspend(struct 
radeon_device *rdev,
 extern int radeon_sa_bo_new(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager,
struct radeon_sa_bo *sa_bo,
-   unsigned size, unsigned align);
+   unsigned size, unsigned align, bool block);
 extern void radeon_sa_bo_free(struct radeon_device *rdev,
  struct radeon_sa_bo *sa_bo);

diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
b/drivers/gpu/drm/radeon/radeon_ring.c
index e394131..b06e04f 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -121,7 +121,7 @@ retry:
if (rdev->ib_pool.ibs[idx].fence == NULL) {
r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager,
 &rdev->ib_pool.ibs[idx].sa_bo,
-size, 256);
+size, 256, false);
if (!r) {
*ib = &rdev->ib_pool.ibs[idx];
(*ib)->ptr = rdev->ib_pool.sa_manager.cpu_ptr;
@@ -202,10 +202,16 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct 
radeon_ib *ib)

 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
-   struct radeon_sa_manager tmp;
int i, r;

-   r = radeon_sa_bo_manager_init(rdev, &tmp,
+   radeon_mutex_lock(&rdev->ib_pool.mutex);
+   if (rdev->ib_pool.ready) {
+   return 0;
+   }
+   rdev->ib_pool.ready = true;
+   radeon_mutex_unlock(&rdev->ib_pool.mutex);
+
+   r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
  RADEON_IB_POOL_SIZE*64*1024,
  RADEON_GEM_DOMAIN_GTT);
if (r) {
@@ -213,14 +219,6 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
}

radeon_mutex_lock(&rdev->ib_pool.mutex);
-   if (rdev->ib_pool.ready) {
-   radeon_mutex_unlock(&rdev->ib_pool.mutex);
-   radeon_sa_bo_manager_fini(rdev, &tmp);
-   return 0;
-   }
-
-   rdev->ib_pool.sa_manager = tmp;
-   INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo);
for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
rdev->ib_pool.ibs[i].fence = NULL;
rdev->ib_pool.ibs[i].idx = i;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c 
b/drivers/gpu/drm/radeon/ra

[PATCH 09/24] drm/radeon: simplify semaphore handling

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Directly use the suballocator to get small chunks
of memory. It's equally fast and doesn't crash when
we encounter a GPU reset.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/evergreen.c|1 -
 drivers/gpu/drm/radeon/ni.c   |1 -
 drivers/gpu/drm/radeon/r600.c |1 -
 drivers/gpu/drm/radeon/radeon.h   |   29 +--
 drivers/gpu/drm/radeon/radeon_device.c|2 -
 drivers/gpu/drm/radeon/radeon_semaphore.c |  137 +
 drivers/gpu/drm/radeon/rv770.c|1 -
 drivers/gpu/drm/radeon/si.c   |1 -
 8 files changed, 26 insertions(+), 147 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index ca47f52..a76389c 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3431,7 +3431,6 @@ void evergreen_fini(struct radeon_device *rdev)
evergreen_pcie_gart_fini(rdev);
r600_vram_scratch_fini(rdev);
radeon_gem_fini(rdev);
-   radeon_semaphore_driver_fini(rdev);
radeon_fence_driver_fini(rdev);
radeon_agp_fini(rdev);
radeon_bo_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 0146428..c0b0956 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1773,7 +1773,6 @@ void cayman_fini(struct radeon_device *rdev)
cayman_pcie_gart_fini(rdev);
r600_vram_scratch_fini(rdev);
radeon_gem_fini(rdev);
-   radeon_semaphore_driver_fini(rdev);
radeon_fence_driver_fini(rdev);
radeon_bo_fini(rdev);
radeon_atombios_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index bc3a2ef..24e68fd 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2667,7 +2667,6 @@ void r600_fini(struct radeon_device *rdev)
r600_vram_scratch_fini(rdev);
radeon_agp_fini(rdev);
radeon_gem_fini(rdev);
-   radeon_semaphore_driver_fini(rdev);
radeon_fence_driver_fini(rdev);
radeon_bo_fini(rdev);
radeon_atombios_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 415a496..222939f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -427,34 +427,12 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv,
 /*
  * Semaphores.
  */
-struct radeon_ring;
-
-#defineRADEON_SEMAPHORE_BO_SIZE256
-
-struct radeon_semaphore_driver {
-   rwlock_tlock;
-   struct list_headbo;
-};
-
-struct radeon_semaphore_bo;
-
-/* everything here is constant */
 struct radeon_semaphore {
-   struct list_headlist;
-   uint64_tgpu_addr;
-   uint32_t*cpu_ptr;
-   struct radeon_semaphore_bo  *bo;
-};
-
-struct radeon_semaphore_bo {
-   struct list_headlist;
-   struct radeon_ib*ib;
-   struct list_headfree;
-   struct radeon_semaphore semaphores[RADEON_SEMAPHORE_BO_SIZE/8];
-   unsignednused;
+   struct radeon_sa_bo sa_bo;
+   signed  waiters;
+   uint64_tgpu_addr;
 };

-void radeon_semaphore_driver_fini(struct radeon_device *rdev);
 int radeon_semaphore_create(struct radeon_device *rdev,
struct radeon_semaphore **semaphore);
 void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
@@ -1518,7 +1496,6 @@ struct radeon_device {
struct radeon_mman  mman;
rwlock_tfence_lock;
struct radeon_fence_driver  fence_drv[RADEON_NUM_RINGS];
-   struct radeon_semaphore_driver  semaphore_drv;
struct radeon_ring  ring[RADEON_NUM_RINGS];
struct radeon_ib_pool   ib_pool;
struct radeon_irq   irq;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index eb63a06..f314819 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -733,11 +733,9 @@ int radeon_device_init(struct radeon_device *rdev,
mutex_init(&rdev->pm.mutex);
mutex_init(&rdev->vram_mutex);
rwlock_init(&rdev->fence_lock);
-   rwlock_init(&rdev->semaphore_drv.lock);
INIT_LIST_HEAD(&rdev->gem.objects);
init_waitqueue_head(&rdev->irq.vblank_queue);
init_waitqueue_head(&rdev->irq.idle_queue);
-   INIT_LIST_HEAD(&rdev->semaphore_drv.bo);
/* initialize vm here */
rdev->vm_manager.use_bitmap = 1;
rdev->vm_manager.max_pfn = 1 << 20;
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c 
b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 61dd4e3..b67c259 100644
--- a/drive

[PATCH 10/24] drm/radeon: return -ENOENT in fence_wait_next v2

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

We should signal the caller that we haven't waited at all.

v2: only change fence_wait_next not fence_wait_last.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon_fence.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index 5d2ca1d..b8db542 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -286,7 +286,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int 
ring)
}
if (list_empty(&rdev->fence_drv[ring].emitted)) {
write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-   return 0;
+   return -ENOENT;
}
fence = list_entry(rdev->fence_drv[ring].emitted.next,
   struct radeon_fence, list);
-- 
1.7.7.6



[PATCH 11/24] drm/radeon: rename fence_wait_last to fence_wait_empty

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

As discussed with Michel that name better
describes the behavior of this function.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h|2 +-
 drivers/gpu/drm/radeon/radeon_device.c |2 +-
 drivers/gpu/drm/radeon/radeon_fence.c  |4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 222939f..7f73f40 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -285,7 +285,7 @@ void radeon_fence_process(struct radeon_device *rdev, int 
ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
 int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
-int radeon_fence_wait_last(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 int radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index f314819..d77f19f 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -913,7 +913,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t 
state)
radeon_bo_evict_vram(rdev);
/* wait for gpu to finish processing current batch */
for (i = 0; i < RADEON_NUM_RINGS; i++)
-   radeon_fence_wait_last(rdev, i);
+   radeon_fence_wait_empty(rdev, i);

radeon_save_bios_scratch_regs(rdev);

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index b8db542..c96d112 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -297,7 +297,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int 
ring)
return r;
 }

-int radeon_fence_wait_last(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
 {
unsigned long irq_flags;
struct radeon_fence *fence;
@@ -439,7 +439,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
if (!rdev->fence_drv[ring].initialized)
continue;
-   radeon_fence_wait_last(rdev, ring);
+   radeon_fence_wait_empty(rdev, ring);
wake_up_all(&rdev->fence_drv[ring].queue);
write_lock_irqsave(&rdev->fence_lock, irq_flags);
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
-- 
1.7.7.6



[PATCH 12/24] drm/radeon: rip out the ib pool v2

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

It isn't necessary any more and the suballocator
seems to perform even better.

v2: drop debugfs

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h   |   22 +---
 drivers/gpu/drm/radeon/radeon_device.c|1 -
 drivers/gpu/drm/radeon/radeon_fence.c |   44 ++-
 drivers/gpu/drm/radeon/radeon_gart.c  |   12 +-
 drivers/gpu/drm/radeon/radeon_ring.c  |  187 ++---
 drivers/gpu/drm/radeon/radeon_semaphore.c |6 +-
 6 files changed, 116 insertions(+), 156 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7f73f40..575dc05 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -249,6 +249,8 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, 
unsigned *bankw,
 /*
  * Fences.
  */
+struct radeon_ib;
+
 struct radeon_fence_driver {
uint32_tscratch_reg;
uint64_tgpu_addr;
@@ -259,7 +261,6 @@ struct radeon_fence_driver {
wait_queue_head_t   queue;
struct list_headcreated;
struct list_heademitted;
-   struct list_headsignaled;
boolinitialized;
 };

@@ -274,6 +275,7 @@ struct radeon_fence {
/* RB, DMA, etc. */
int ring;
struct radeon_semaphore *semaphore;
+   struct radeon_ib*ib;
 };

 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -289,6 +291,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int 
ring);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 int radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
+bool radeon_fence_set_associated_ib(struct radeon_fence *fence, struct 
radeon_ib *ib);

 /*
  * Tiling registers
@@ -603,7 +606,6 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device 
*rdev, int crtc);

 struct radeon_ib {
struct radeon_sa_bo sa_bo;
-   unsignedidx;
uint32_tlength_dw;
uint64_tgpu_addr;
uint32_t*ptr;
@@ -612,18 +614,6 @@ struct radeon_ib {
boolis_const_ib;
 };

-/*
- * locking -
- * mutex protects scheduled_ibs, ready, alloc_bm
- */
-struct radeon_ib_pool {
-   struct radeon_mutex mutex;
-   struct radeon_sa_managersa_manager;
-   struct radeon_ibibs[RADEON_IB_POOL_SIZE];
-   boolready;
-   unsignedhead_id;
-};
-
 struct radeon_ring {
struct radeon_bo*ring_obj;
volatile uint32_t   *ring;
@@ -764,7 +754,6 @@ struct si_rlc {
 int radeon_ib_get(struct radeon_device *rdev, int ring,
  struct radeon_ib **ib, unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
-bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
@@ -1497,7 +1486,8 @@ struct radeon_device {
rwlock_tfence_lock;
struct radeon_fence_driver  fence_drv[RADEON_NUM_RINGS];
struct radeon_ring  ring[RADEON_NUM_RINGS];
-   struct radeon_ib_pool   ib_pool;
+   boolib_pool_ready;
+   struct radeon_sa_managersa_manager;
struct radeon_irq   irq;
struct radeon_asic  *asic;
struct radeon_gem   gem;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index d77f19f..26fb9da 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -723,7 +723,6 @@ int radeon_device_init(struct radeon_device *rdev,
/* mutex initialization are all done here so we
 * can recall function without having locking issues */
radeon_mutex_init(&rdev->cs_mutex);
-   radeon_mutex_init(&rdev->ib_pool.mutex);
for (i = 0; i < RADEON_NUM_RINGS; ++i)
mutex_init(&rdev->ring[i].mutex);
mutex_init(&rdev->dc_hw_i2c_mutex);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index c96d112..abd10f9 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -83,7 +83,8 @@ int radeon_fence_emit(struct radeon_device *rdev, struct 
radeon_fence *fence)
return 0;
 }

-static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
+static bool radeon_fence_poll_locked(struct radeon_device *rde

[PATCH 13/24] drm/radeon: fix a bug with the ring syncing code

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Rings need to lock in order, otherwise
the ring subsystem can deadlock.

v2: fix error handling and number of locked doublewords.
v3: stop creating unneeded semaphores.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h   |4 ++
 drivers/gpu/drm/radeon/radeon_cs.c|   35 ++
 drivers/gpu/drm/radeon/radeon_semaphore.c |   56 +
 drivers/gpu/drm/radeon/radeon_ttm.c   |   48 +++--
 4 files changed, 93 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 575dc05..c7f14d7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -442,6 +442,10 @@ void radeon_semaphore_emit_signal(struct radeon_device 
*rdev, int ring,
  struct radeon_semaphore *semaphore);
 void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
struct radeon_semaphore *semaphore);
+int radeon_semaphore_sync_rings(struct radeon_device *rdev,
+   struct radeon_semaphore *semaphore,
+   bool sync_to[RADEON_NUM_RINGS],
+   int dst_ring);
 void radeon_semaphore_free(struct radeon_device *rdev,
   struct radeon_semaphore *semaphore);

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 5cac832..64b86e7 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -118,6 +118,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, 
u32 ring, s32 priority
 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 {
bool sync_to_ring[RADEON_NUM_RINGS] = { };
+   bool need_sync = false;
int i, r;

for (i = 0; i < p->nrelocs; i++) {
@@ -126,36 +127,24 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser 
*p)

if (!(p->relocs[i].flags & RADEON_RELOC_DONT_SYNC)) {
struct radeon_fence *fence = 
p->relocs[i].robj->tbo.sync_obj;
-   if (!radeon_fence_signaled(fence)) {
+   if (fence->ring != p->ring && 
!radeon_fence_signaled(fence)) {
sync_to_ring[fence->ring] = true;
+   need_sync = true;
}
}
}

-   for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-   /* no need to sync to our own or unused rings */
-   if (i == p->ring || !sync_to_ring[i] || !p->rdev->ring[i].ready)
-   continue;
-
-   if (!p->ib->fence->semaphore) {
-   r = radeon_semaphore_create(p->rdev, 
&p->ib->fence->semaphore);
-   if (r)
-   return r;
-   }
-
-   r = radeon_ring_lock(p->rdev, &p->rdev->ring[i], 3);
-   if (r)
-   return r;
-   radeon_semaphore_emit_signal(p->rdev, i, 
p->ib->fence->semaphore);
-   radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[i]);
+   if (!need_sync) {
+   return 0;
+   }

-   r = radeon_ring_lock(p->rdev, &p->rdev->ring[p->ring], 3);
-   if (r)
-   return r;
-   radeon_semaphore_emit_wait(p->rdev, p->ring, 
p->ib->fence->semaphore);
-   radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[p->ring]);
+   r = radeon_semaphore_create(p->rdev, &p->ib->fence->semaphore);
+   if (r) {
+   return r;
}
-   return 0;
+
+   return radeon_semaphore_sync_rings(p->rdev, p->ib->fence->semaphore,
+  sync_to_ring, p->ring);
 }

 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c 
b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 9e2cf40..042dbfe 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -75,6 +75,62 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, 
int ring,
radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, 
true);
 }

+int radeon_semaphore_sync_rings(struct radeon_device *rdev,
+   struct radeon_semaphore *semaphore,
+   bool sync_to[RADEON_NUM_RINGS],
+   int dst_ring)
+{
+   int i, r;
+
+   for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+   unsigned num_ops = i == dst_ring ? RADEON_NUM_RINGS : 1;
+
+   /* don't lock unused rings */
+   if (!sync_to[i] && i != dst_ring)
+   continue;
+
+   /* prevent GPU deadlocks */
+   if (!rdev->ring[i].ready) {
+   dev_err(rdev->dev, "

[PATCH 14/24] drm/radeon: rework recursive gpu reset handling

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Instead of all this humpy pumpy with recursive
mutex (which also fixes only halve of the problem)
move the actual gpu reset out of the fence code,
return -EDEADLK and then reset the gpu in the
calling ioctl function.

v2: Split removal of radeon_mutex into separate patch.
Return -EAGAIN if reset is successful.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon_cs.c |   13 +
 drivers/gpu/drm/radeon/radeon_device.c |5 -
 drivers/gpu/drm/radeon/radeon_fence.c  |   10 +++---
 drivers/gpu/drm/radeon/radeon_gem.c|   16 
 4 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 64b86e7..a0826bb 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -489,6 +489,16 @@ out:
return r;
 }

+static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
+{
+   if (r == -EDEADLK) {
+   r = radeon_gpu_reset(rdev);
+   if (!r)
+   r = -EAGAIN;
+   }
+   return r;
+}
+
 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
struct radeon_device *rdev = dev->dev_private;
@@ -510,6 +520,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
radeon_cs_parser_fini(&parser, r);
+   r = radeon_cs_handle_lockup(rdev, r);
radeon_mutex_unlock(&rdev->cs_mutex);
return r;
}
@@ -518,6 +529,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
if (r != -ERESTARTSYS)
DRM_ERROR("Failed to parse relocation %d!\n", r);
radeon_cs_parser_fini(&parser, r);
+   r = radeon_cs_handle_lockup(rdev, r);
radeon_mutex_unlock(&rdev->cs_mutex);
return r;
}
@@ -531,6 +543,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
}
 out:
radeon_cs_parser_fini(&parser, r);
+   r = radeon_cs_handle_lockup(rdev, r);
radeon_mutex_unlock(&rdev->cs_mutex);
return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index 26fb9da..1dac27d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -984,9 +984,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
int r;
int resched;

-   /* Prevent CS ioctl from interfering */
-   radeon_mutex_lock(&rdev->cs_mutex);
-
radeon_save_bios_scratch_regs(rdev);
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
@@ -1001,8 +998,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
}

-   radeon_mutex_unlock(&rdev->cs_mutex);
-
if (r) {
/* bad news, how to tell it to userspace ? */
dev_info(rdev->dev, "GPU reset failed\n");
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index abd10f9..2868eda 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -267,6 +267,8 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
/* change sequence value on all rings, so nobody else 
things there is a lockup */
for (i = 0; i < RADEON_NUM_RINGS; ++i)
rdev->fence_drv[i].last_seq -= 0x1;
+
+   rdev->fence_drv[fence->ring].last_activity = jiffies;
write_unlock_irqrestore(&rdev->fence_lock, irq_flags);

if (radeon_ring_is_lockup(rdev, fence->ring, 
&rdev->ring[fence->ring])) {
@@ -277,13 +279,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool 
intr)

/* mark the ring as not ready any more */
rdev->ring[fence->ring].ready = false;
-   r = radeon_gpu_reset(rdev);
-   if (r)
-   return r;
-
-   write_lock_irqsave(&rdev->fence_lock, 
irq_flags);
-   rdev->fence_drv[fence->ring].last_activity = 
jiffies;
-   write_unlock_irqrestore(&rdev->fence_lock, 
irq_flags);
+   return -EDEADLK;
}
}
}
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
b/drivers/gpu/drm/radeon/radeon_gem.c
index c7008b5..e15cb1f 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -154,6 +154,17 @@ void 

[PATCH 15/24] drm/radeon: remove recursive mutex implementation

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Not needed anymore.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h|   44 +---
 drivers/gpu/drm/radeon/radeon_cs.c |   10 +++---
 drivers/gpu/drm/radeon/radeon_device.c |2 +-
 drivers/gpu/drm/radeon/radeon_gart.c   |   16 ++--
 drivers/gpu/drm/radeon/radeon_gem.c|4 +-
 5 files changed, 17 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c7f14d7..6ec4025 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -155,48 +155,6 @@ static inline int radeon_atrm_get_bios_chunk(uint8_t 
*bios, int offset, int len)
 #endif
 bool radeon_get_bios(struct radeon_device *rdev);

-
-/*
- * Mutex which allows recursive locking from the same process.
- */
-struct radeon_mutex {
-   struct mutexmutex;
-   struct task_struct  *owner;
-   int level;
-};
-
-static inline void radeon_mutex_init(struct radeon_mutex *mutex)
-{
-   mutex_init(&mutex->mutex);
-   mutex->owner = NULL;
-   mutex->level = 0;
-}
-
-static inline void radeon_mutex_lock(struct radeon_mutex *mutex)
-{
-   if (mutex_trylock(&mutex->mutex)) {
-   /* The mutex was unlocked before, so it's ours now */
-   mutex->owner = current;
-   } else if (mutex->owner != current) {
-   /* Another process locked the mutex, take it */
-   mutex_lock(&mutex->mutex);
-   mutex->owner = current;
-   }
-   /* Otherwise the mutex was already locked by this process */
-
-   mutex->level++;
-}
-
-static inline void radeon_mutex_unlock(struct radeon_mutex *mutex)
-{
-   if (--mutex->level > 0)
-   return;
-
-   mutex->owner = NULL;
-   mutex_unlock(&mutex->mutex);
-}
-
-
 /*
  * Dummy page
  */
@@ -1497,7 +1455,7 @@ struct radeon_device {
struct radeon_gem   gem;
struct radeon_pmpm;
uint32_tbios_scratch[RADEON_BIOS_NUM_SCRATCH];
-   struct radeon_mutex cs_mutex;
+   struct mutexcs_mutex;
struct radeon_wbwb;
struct radeon_dummy_pagedummy_page;
boolshutdown;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index a0826bb..38e1496 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -505,9 +505,9 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
struct radeon_cs_parser parser;
int r;

-   radeon_mutex_lock(&rdev->cs_mutex);
+   mutex_lock(&rdev->cs_mutex);
if (!rdev->accel_working) {
-   radeon_mutex_unlock(&rdev->cs_mutex);
+   mutex_unlock(&rdev->cs_mutex);
return -EBUSY;
}
/* initialize parser */
@@ -521,7 +521,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
DRM_ERROR("Failed to initialize parser !\n");
radeon_cs_parser_fini(&parser, r);
r = radeon_cs_handle_lockup(rdev, r);
-   radeon_mutex_unlock(&rdev->cs_mutex);
+   mutex_unlock(&rdev->cs_mutex);
return r;
}
r = radeon_cs_parser_relocs(&parser);
@@ -530,7 +530,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
DRM_ERROR("Failed to parse relocation %d!\n", r);
radeon_cs_parser_fini(&parser, r);
r = radeon_cs_handle_lockup(rdev, r);
-   radeon_mutex_unlock(&rdev->cs_mutex);
+   mutex_unlock(&rdev->cs_mutex);
return r;
}
r = radeon_cs_ib_chunk(rdev, &parser);
@@ -544,7 +544,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
 out:
radeon_cs_parser_fini(&parser, r);
r = radeon_cs_handle_lockup(rdev, r);
-   radeon_mutex_unlock(&rdev->cs_mutex);
+   mutex_unlock(&rdev->cs_mutex);
return r;
 }

diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index 1dac27d..5df53dd 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -722,7 +722,7 @@ int radeon_device_init(struct radeon_device *rdev,

/* mutex initialization are all done here so we
 * can recall function without having locking issues */
-   radeon_mutex_init(&rdev->cs_mutex);
+   mutex_init(&rdev->cs_mutex);
for (i = 0; i < RADEON_NUM_RINGS; ++i)
mutex_init(&rdev->ring[i].mutex);
mutex_init(&rdev->dc_hw_i2c_mutex);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index 220dbb8..b4a4982 100644
--- a/drivers/gpu/

[PATCH 16/24] drm/radeon: move lockup detection code into radeon_ring.c v2

2012-04-25 Thread j.gli...@gmail.com
From: Jerome Glisse 

It isn't chipset specific, so it makes no sense
to have that inside r100.c.

v2: rebase on debugfs removal

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/evergreen.c   |5 +--
 drivers/gpu/drm/radeon/ni.c  |5 +--
 drivers/gpu/drm/radeon/r100.c|   57 +
 drivers/gpu/drm/radeon/r300.c|4 +-
 drivers/gpu/drm/radeon/r600.c|   10 +-
 drivers/gpu/drm/radeon/radeon.h  |   16 ++---
 drivers/gpu/drm/radeon/radeon_asic.h |5 ---
 drivers/gpu/drm/radeon/radeon_ring.c |   53 +++
 drivers/gpu/drm/radeon/si.c  |5 +--
 9 files changed, 69 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index a76389c..353d10a 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2424,7 +2424,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *rin
u32 srbm_status;
u32 grbm_status;
u32 grbm_status_se0, grbm_status_se1;
-   struct r100_gpu_lockup *lockup = &rdev->config.evergreen.lockup;
int r;

srbm_status = RREG32(SRBM_STATUS);
@@ -2432,7 +2431,7 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *rin
grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
if (!(grbm_status & GUI_ACTIVE)) {
-   r100_gpu_lockup_update(lockup, ring);
+   radeon_ring_lockup_update(ring);
return false;
}
/* force CP activities */
@@ -2444,7 +2443,7 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *rin
radeon_ring_unlock_commit(rdev, ring);
}
ring->rptr = RREG32(CP_RB_RPTR);
-   return r100_gpu_cp_is_lockup(rdev, lockup, ring);
+   return radeon_ring_test_lockup(rdev, ring);
 }

 static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index c0b0956..4327b32 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1397,7 +1397,6 @@ bool cayman_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *ring)
u32 srbm_status;
u32 grbm_status;
u32 grbm_status_se0, grbm_status_se1;
-   struct r100_gpu_lockup *lockup = &rdev->config.cayman.lockup;
int r;

srbm_status = RREG32(SRBM_STATUS);
@@ -1405,7 +1404,7 @@ bool cayman_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *ring)
grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
if (!(grbm_status & GUI_ACTIVE)) {
-   r100_gpu_lockup_update(lockup, ring);
+   radeon_ring_lockup_update(ring);
return false;
}
/* force CP activities */
@@ -1418,7 +1417,7 @@ bool cayman_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *ring)
}
/* XXX deal with CP0,1,2 */
ring->rptr = RREG32(ring->rptr_reg);
-   return r100_gpu_cp_is_lockup(rdev, lockup, ring);
+   return radeon_ring_test_lockup(rdev, ring);
 }

 static int cayman_gpu_soft_reset(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index be51f7b..0a62ada 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2152,59 +2152,6 @@ int r100_mc_wait_for_idle(struct radeon_device *rdev)
return -1;
 }

-void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_ring 
*ring)
-{
-   lockup->last_cp_rptr = ring->rptr;
-   lockup->last_jiffies = jiffies;
-}
-
-/**
- * r100_gpu_cp_is_lockup() - check if CP is lockup by recording information
- * @rdev:  radeon device structure
- * @lockup:r100_gpu_lockup structure holding CP lockup tracking 
informations
- * @cp:radeon_cp structure holding CP information
- *
- * We don't need to initialize the lockup tracking information as we will 
either
- * have CP rptr to a different value of jiffies wrap around which will force
- * initialization of the lockup tracking informations.
- *
- * A possible false positivie is if we get call after while and last_cp_rptr ==
- * the current CP rptr, even if it's unlikely it might happen. To avoid this
- * if the elapsed time since last call is bigger than 2 second than we return
- * false and update the tracking information. Due to this the caller must call
- * r100_gpu_cp_is_lockup several time in less than 2sec for lockup to be 
reported
- * the fencing code should be cautious about that.
- *
- * Caller should write to the ring to force CP to do something so we don't get
- * false positive when CP is just gived nothing to do.
- *
- **/
-bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup 

[PATCH 17/24] drm/radeon: make lockup timeout a module param

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Don't hard code the 10 seconds timeout. Compute jobs
can run much longer.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h  |1 +
 drivers/gpu/drm/radeon/radeon_drv.c  |4 
 drivers/gpu/drm/radeon/radeon_ring.c |2 +-
 3 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c0ed077..fde5eaa 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -94,6 +94,7 @@ extern int radeon_disp_priority;
 extern int radeon_hw_i2c;
 extern int radeon_pcie_gen2;
 extern int radeon_msi;
+extern int radeon_lockup_timeout;

 /*
  * Copy from radeon_drv.h so we don't have to include both and have conflicting
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c 
b/drivers/gpu/drm/radeon/radeon_drv.c
index ef7bb3f..e62e56a 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -128,6 +128,7 @@ int radeon_disp_priority = 0;
 int radeon_hw_i2c = 0;
 int radeon_pcie_gen2 = 0;
 int radeon_msi = -1;
+int radeon_lockup_timeout = 1;

 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
 module_param_named(no_wb, radeon_no_wb, int, 0444);
@@ -177,6 +178,9 @@ module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444);
 MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(msi, radeon_msi, int, 0444);

+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (defaul 1 = 10 
seconds, 0 = disable)");
+module_param_named(lockup_timeout, radeon_lockup_timeout, int, 0444);
+
 static int radeon_suspend(struct drm_device *dev, pm_message_t state)
 {
drm_radeon_private_t *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
b/drivers/gpu/drm/radeon/radeon_ring.c
index f2dfcbf..d7245db 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -370,7 +370,7 @@ bool radeon_ring_test_lockup(struct radeon_device *rdev, 
struct radeon_ring *rin
return false;
}
elapsed = jiffies_to_msecs(cjiffies - ring->last_activity);
-   if (elapsed >= 1) {
+   if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) {
dev_err(rdev->dev, "GPU lockup CP stall for more than 
%lumsec\n", elapsed);
return true;
}
-- 
1.7.7.6



[PATCH 18/24] drm/radeon: unlock the ring mutex while waiting for the next fence

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Fixing just another deadlock problem with gpu reset tests.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon_ring.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
b/drivers/gpu/drm/radeon/radeon_ring.c
index d7245db..bb5715a 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -275,7 +275,9 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct 
radeon_ring *ring, unsi
if (ndw < ring->ring_free_dw) {
break;
}
+   mutex_unlock(&ring->mutex);
r = radeon_fence_wait_next(rdev, radeon_ring_index(rdev, ring));
+   mutex_lock(&ring->mutex);
if (r)
return r;
}
-- 
1.7.7.6



[PATCH 19/24] drm/radeon: make forcing ring activity a common function

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Nothing chipset or ring specific with it,
so also move it to radon_ring.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/evergreen.c   |   10 +-
 drivers/gpu/drm/radeon/ni.c  |   11 +--
 drivers/gpu/drm/radeon/r100.c|   10 +-
 drivers/gpu/drm/radeon/r300.c|   10 +-
 drivers/gpu/drm/radeon/r600.c|   10 +-
 drivers/gpu/drm/radeon/radeon.h  |1 +
 drivers/gpu/drm/radeon/radeon_ring.c |   16 
 drivers/gpu/drm/radeon/si.c  |   11 +--
 8 files changed, 23 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 353d10a..ec61194 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2424,7 +2424,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *rin
u32 srbm_status;
u32 grbm_status;
u32 grbm_status_se0, grbm_status_se1;
-   int r;

srbm_status = RREG32(SRBM_STATUS);
grbm_status = RREG32(GRBM_STATUS);
@@ -2435,14 +2434,7 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *rin
return false;
}
/* force CP activities */
-   r = radeon_ring_lock(rdev, ring, 2);
-   if (!r) {
-   /* PACKET2 NOP */
-   radeon_ring_write(ring, 0x8000);
-   radeon_ring_write(ring, 0x8000);
-   radeon_ring_unlock_commit(rdev, ring);
-   }
-   ring->rptr = RREG32(CP_RB_RPTR);
+   radeon_ring_force_activity(rdev, ring);
return radeon_ring_test_lockup(rdev, ring);
 }

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 4327b32..8a9c85d 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1397,7 +1397,6 @@ bool cayman_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *ring)
u32 srbm_status;
u32 grbm_status;
u32 grbm_status_se0, grbm_status_se1;
-   int r;

srbm_status = RREG32(SRBM_STATUS);
grbm_status = RREG32(GRBM_STATUS);
@@ -1408,15 +1407,7 @@ bool cayman_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *ring)
return false;
}
/* force CP activities */
-   r = radeon_ring_lock(rdev, ring, 2);
-   if (!r) {
-   /* PACKET2 NOP */
-   radeon_ring_write(ring, 0x8000);
-   radeon_ring_write(ring, 0x8000);
-   radeon_ring_unlock_commit(rdev, ring);
-   }
-   /* XXX deal with CP0,1,2 */
-   ring->rptr = RREG32(ring->rptr_reg);
+   radeon_ring_force_activity(rdev, ring);
return radeon_ring_test_lockup(rdev, ring);
 }

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 0a62ada..68b1674 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2155,7 +2155,6 @@ int r100_mc_wait_for_idle(struct radeon_device *rdev)
 bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
u32 rbbm_status;
-   int r;

rbbm_status = RREG32(R_000E40_RBBM_STATUS);
if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
@@ -2163,14 +2162,7 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, 
struct radeon_ring *ring)
return false;
}
/* force CP activities */
-   r = radeon_ring_lock(rdev, ring, 2);
-   if (!r) {
-   /* PACKET2 NOP */
-   radeon_ring_write(ring, 0x8000);
-   radeon_ring_write(ring, 0x8000);
-   radeon_ring_unlock_commit(rdev, ring);
-   }
-   ring->rptr = RREG32(ring->rptr_reg);
+   radeon_ring_force_activity(rdev, ring);
return radeon_ring_test_lockup(rdev, ring);
 }

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index e46bbd0..c496778 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -375,7 +375,6 @@ void r300_gpu_init(struct radeon_device *rdev)
 bool r300_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
u32 rbbm_status;
-   int r;

rbbm_status = RREG32(R_000E40_RBBM_STATUS);
if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
@@ -383,14 +382,7 @@ bool r300_gpu_is_lockup(struct radeon_device *rdev, struct 
radeon_ring *ring)
return false;
}
/* force CP activities */
-   r = radeon_ring_lock(rdev, ring, 2);
-   if (!r) {
-   /* PACKET2 NOP */
-   radeon_ring_write(ring, 0x8000);
-   radeon_ring_write(ring, 0x8000);
-   radeon_ring_unlock_commit(rdev, ring);
-   }
-   ring->rptr = RREG32(RADEON_CP_RB_RPTR);
+   radeon_ring_force_activity(rdev, ring);
return radeon_ring_test_lockup(rdev, ring);
 }

diff --git a/drivers/gpu/drm/radeon/r600

[PATCH 20/24] drm/radeon: remove r300_gpu_is_lockup

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Since it is now identical to r100_gpu_is_lockup.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/r300.c|   14 --
 drivers/gpu/drm/radeon/radeon_asic.c |   16 
 drivers/gpu/drm/radeon/radeon_asic.h |1 -
 3 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index c496778..7c53729 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -372,20 +372,6 @@ void r300_gpu_init(struct radeon_device *rdev)
 rdev->num_gb_pipes, rdev->num_z_pipes);
 }

-bool r300_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-   u32 rbbm_status;
-
-   rbbm_status = RREG32(R_000E40_RBBM_STATUS);
-   if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
-   radeon_ring_lockup_update(ring);
-   return false;
-   }
-   /* force CP activities */
-   radeon_ring_force_activity(rdev, ring);
-   return radeon_ring_test_lockup(rdev, ring);
-}
-
 int r300_asic_reset(struct radeon_device *rdev)
 {
struct r100_mc_save save;
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c 
b/drivers/gpu/drm/radeon/radeon_asic.c
index 958b9ea..5e5694e 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -299,7 +299,7 @@ static struct radeon_asic r300_asic = {
.ring_start = &r300_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
-   .is_lockup = &r300_gpu_is_lockup,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
@@ -373,7 +373,7 @@ static struct radeon_asic r300_asic_pcie = {
.ring_start = &r300_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
-   .is_lockup = &r300_gpu_is_lockup,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
@@ -447,7 +447,7 @@ static struct radeon_asic r420_asic = {
.ring_start = &r300_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
-   .is_lockup = &r300_gpu_is_lockup,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
@@ -521,7 +521,7 @@ static struct radeon_asic rs400_asic = {
.ring_start = &r300_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
-   .is_lockup = &r300_gpu_is_lockup,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
@@ -595,7 +595,7 @@ static struct radeon_asic rs600_asic = {
.ring_start = &r300_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
-   .is_lockup = &r300_gpu_is_lockup,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
@@ -669,7 +669,7 @@ static struct radeon_asic rs690_asic = {
.ring_start = &r300_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
-   .is_lockup = &r300_gpu_is_lockup,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
@@ -743,7 +743,7 @@ static struct radeon_asic rv515_asic = {
.ring_start = &rv515_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
-   .is_lockup = &r300_gpu_is_lockup,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
@@ -817,7 +817,7 @@ static struct radeon_asic r520_asic = {
.ring_start = &rv515_ring_start,
.ring_test = &r100_ring_test,
.ib_test = &r100_ib_test,
-   .is_lockup = &r300_gpu_is_lockup,
+   .is_lockup = &r100_gpu_is_lockup,
}
},
.irq = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h 
b/drivers/gpu/drm/radeon/radeon_asic.h
index 84f3552..58454b4 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -151,7 +151,6 @@ extern int r300_init(struct radeon_device *rdev);
 extern void r300_fini(struct radeon_device *rdev);
 extern int r300_suspend(struct radeon_device *rdev);
 extern int r300_resume(struct radeon_device *rdev);
-extern bool r300_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring 
*cp);
 extern int r300_asic_reset(struct radeon_de

[PATCH 21/24] drm/radeon: remove cayman_gpu_is_lockup

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Since it is now identical to evergreen_gpu_is_lockup.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/ni.c  |   19 ---
 drivers/gpu/drm/radeon/radeon_asic.c |   12 ++--
 drivers/gpu/drm/radeon/radeon_asic.h |1 -
 3 files changed, 6 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 8a9c85d..107b217 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1392,25 +1392,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
return 0;
 }

-bool cayman_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-   u32 srbm_status;
-   u32 grbm_status;
-   u32 grbm_status_se0, grbm_status_se1;
-
-   srbm_status = RREG32(SRBM_STATUS);
-   grbm_status = RREG32(GRBM_STATUS);
-   grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
-   grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
-   if (!(grbm_status & GUI_ACTIVE)) {
-   radeon_ring_lockup_update(ring);
-   return false;
-   }
-   /* force CP activities */
-   radeon_ring_force_activity(rdev, ring);
-   return radeon_ring_test_lockup(rdev, ring);
-}
-
 static int cayman_gpu_soft_reset(struct radeon_device *rdev)
 {
struct evergreen_mc_save save;
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c 
b/drivers/gpu/drm/radeon/radeon_asic.c
index 5e5694e..f533df5 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1339,7 +1339,7 @@ static struct radeon_asic cayman_asic = {
.cs_parse = &evergreen_cs_parse,
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
-   .is_lockup = &cayman_gpu_is_lockup,
+   .is_lockup = &evergreen_gpu_is_lockup,
},
[CAYMAN_RING_TYPE_CP1_INDEX] = {
.ib_execute = &cayman_ring_ib_execute,
@@ -1349,7 +1349,7 @@ static struct radeon_asic cayman_asic = {
.cs_parse = &evergreen_cs_parse,
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
-   .is_lockup = &cayman_gpu_is_lockup,
+   .is_lockup = &evergreen_gpu_is_lockup,
},
[CAYMAN_RING_TYPE_CP2_INDEX] = {
.ib_execute = &cayman_ring_ib_execute,
@@ -1359,7 +1359,7 @@ static struct radeon_asic cayman_asic = {
.cs_parse = &evergreen_cs_parse,
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
-   .is_lockup = &cayman_gpu_is_lockup,
+   .is_lockup = &evergreen_gpu_is_lockup,
}
},
.irq = {
@@ -1433,7 +1433,7 @@ static struct radeon_asic trinity_asic = {
.cs_parse = &evergreen_cs_parse,
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
-   .is_lockup = &cayman_gpu_is_lockup,
+   .is_lockup = &evergreen_gpu_is_lockup,
},
[CAYMAN_RING_TYPE_CP1_INDEX] = {
.ib_execute = &cayman_ring_ib_execute,
@@ -1443,7 +1443,7 @@ static struct radeon_asic trinity_asic = {
.cs_parse = &evergreen_cs_parse,
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
-   .is_lockup = &cayman_gpu_is_lockup,
+   .is_lockup = &evergreen_gpu_is_lockup,
},
[CAYMAN_RING_TYPE_CP2_INDEX] = {
.ib_execute = &cayman_ring_ib_execute,
@@ -1453,7 +1453,7 @@ static struct radeon_asic trinity_asic = {
.cs_parse = &evergreen_cs_parse,
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
-   .is_lockup = &cayman_gpu_is_lockup,
+   .is_lockup = &evergreen_gpu_is_lockup,
}
},
.irq = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h 
b/drivers/gpu/drm/radeon/radeon_asic.h
index 58454b4..ddb145d 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -435,7 +435,6 @@ int cayman_init(struct radeon_device *rdev);
 void cayman_fini(struct radeon_device *rdev);
 int cayman_suspend(struct radeon_device *rdev);
 int cayman_resume(struct radeon_device *rdev);
-bool cayman_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 int cayman_asic_reset(struct radeon_device *rdev);
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int cayman_vm_init(struct radeon_device *rdev);
-- 
1.7.7.6



[PATCH 22/24] drm/radeon: extend ring debugfs files with fence info c2

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

That should aid in debugging multi ring lockups.

v2 rebase on top of debugfs removal

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h   |1 +
 drivers/gpu/drm/radeon/radeon_fence.c |1 +
 2 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 91b48ac..6377f8c 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -233,6 +233,7 @@ struct radeon_fence {
boolsignaled;
/* RB, DMA, etc. */
int ring;
+   unsignedemitted_at;
struct radeon_semaphore *semaphore;
struct radeon_ib*ib;
 };
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index 2868eda..2e56101 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -71,6 +71,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct 
radeon_fence *fence)
return 0;
}
fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq);
+   fence->emitted_at = rdev->ring[fence->ring].wptr;
radeon_fence_ring_emit(rdev, fence->ring, fence);
trace_radeon_fence_emit(rdev->ddev, fence->seq);
fence->emitted = true;
-- 
1.7.7.6



[PATCH 23/24] drm/radeon: keep the cs relocs inside the ib

2012-04-25 Thread j.gli...@gmail.com
From: Christian K?nig 

Free them wenn the ib is freed, another
step to better debugging.

Signed-off-by: Christian K?nig 
---
 drivers/gpu/drm/radeon/radeon.h  |3 +++
 drivers/gpu/drm/radeon/radeon_cs.c   |   14 --
 drivers/gpu/drm/radeon/radeon_ring.c |3 +++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 6377f8c..7b2125b 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -567,6 +567,7 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device 
*rdev, int crtc);
 /*
  * CP & rings.
  */
+struct radeon_cs_reloc;

 struct radeon_ib {
struct radeon_sa_bo sa_bo;
@@ -576,6 +577,8 @@ struct radeon_ib {
struct radeon_fence *fence;
unsignedvm_id;
boolis_const_ib;
+   unsignednrelocs;
+   struct radeon_cs_reloc  *relocs;
 };

 struct radeon_ring {
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 38e1496..ecef708 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -308,7 +308,6 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error)
}
}
kfree(parser->track);
-   kfree(parser->relocs);
kfree(parser->relocs_ptr);
for (i = 0; i < parser->nchunks; i++) {
kfree(parser->chunks[i].kdata);
@@ -317,7 +316,18 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error)
}
kfree(parser->chunks);
kfree(parser->chunks_array);
-   radeon_ib_free(parser->rdev, &parser->ib);
+
+   if (parser->ib) {
+   /* keep the relocs for debugging */
+   parser->ib->nrelocs = parser->nrelocs;
+   parser->ib->relocs = parser->relocs;
+
+   /* even if we locally free it the ib stays
+  alive until it is processed */
+   radeon_ib_free(parser->rdev, &parser->ib);
+   } else {
+   kfree(parser->relocs);
+   }
 }

 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
b/drivers/gpu/drm/radeon/radeon_ring.c
index f256eae..c635aad 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -95,6 +95,8 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
(*ib)->gpu_addr += (*ib)->sa_bo.offset;
(*ib)->vm_id = 0;
(*ib)->is_const_ib = false;
+   (*ib)->nrelocs = 0;
+   (*ib)->relocs = NULL;

return 0;
 }
@@ -116,6 +118,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct 
radeon_ib **ib)
if (destroy) {
radeon_sa_bo_free(rdev, &tmp->sa_bo);
radeon_fence_unref(&tmp->fence);
+   kfree(tmp->relocs);
kfree(tmp);
}
 }
-- 
1.7.7.6



[PATCH 24/24] drm/radeon: add faulty command buffer dump facilities

2012-04-25 Thread j.gli...@gmail.com
From: Jerome Glisse 

This add a command buffer dumping facilities, that will
dump command buffer and all associated bo that most likely
triggered a lockup.

Idea is that we go through unsignaled fence and we dump the
ib of the oldest unsignaled fence. Dumping is a 2 step process
on lockup detection we try to allocate a big object that will
old all the the current state (ib pm4 packet, bo content,
relocation table). Upon reading radeon_lockup_blob debugfs
file user will get this big blob and kernel will free memory.

Kernel side try to handle as gracefully as possible failure
such as mapping bo by not dumping such bo. Userspace tools
those need to have enough logic to handle such cases.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|   14 -
 drivers/gpu/drm/radeon/radeon_cs.c |   20 --
 drivers/gpu/drm/radeon/radeon_device.c |3 +
 drivers/gpu/drm/radeon/radeon_fence.c  |   19 +
 drivers/gpu/drm/radeon/radeon_gart.c   |   10 ++-
 drivers/gpu/drm/radeon/radeon_ring.c   |  118 
 6 files changed, 173 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7b2125b..c9f51be 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -241,6 +241,7 @@ struct radeon_fence {
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
+void radeon_fence_blob_faulty_ib(struct radeon_device *rdev, int ring);
 int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence 
**fence, int ring);
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
@@ -569,6 +570,10 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device 
*rdev, int crtc);
  */
 struct radeon_cs_reloc;

+#define RADEON_IB_TYPE_NONE0
+#define RADEON_IB_TYPE_CS  1
+#define RADEON_IB_TYPE_CS_VM   2
+
 struct radeon_ib {
struct radeon_sa_bo sa_bo;
uint32_tlength_dw;
@@ -579,6 +584,7 @@ struct radeon_ib {
boolis_const_ib;
unsignednrelocs;
struct radeon_cs_reloc  *relocs;
+   unsignedtype;
 };

 struct radeon_ring {
@@ -745,6 +751,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct 
radeon_ring *cp, unsigne
 unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
 u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop);
 void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
+void radeon_lockup_build_blob(struct radeon_device *rdev, struct radeon_ib 
*ib);


 /*
@@ -756,6 +763,7 @@ struct radeon_cs_reloc {
struct radeon_bo_list   lobj;
uint32_thandle;
uint32_tflags;
+   uint64_tgpu_addr;
 };

 struct radeon_cs_chunk {
@@ -1496,6 +1504,9 @@ struct radeon_device {
unsigneddebugfs_count;
/* virtual memory */
struct radeon_vm_managervm_manager;
+   uint32_t*blob;
+   unsignedblob_size_dw;
+   struct mutexblob_mutex;
 };

 int radeon_device_init(struct radeon_device *rdev,
@@ -1742,7 +1753,8 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct 
radeon_vm *vm);
 int radeon_vm_bo_update_pte(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_bo *bo,
-   struct ttm_mem_reg *mem);
+   struct ttm_mem_reg *mem,
+   uint64_t *gpu_addr);
 void radeon_vm_bo_invalidate(struct radeon_device *rdev,
 struct radeon_bo *bo);
 int radeon_vm_bo_add(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index ecef708..0c0bcaa 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -334,6 +334,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
  struct radeon_cs_parser *parser)
 {
struct radeon_cs_chunk *ib_chunk;
+   unsigned i;
int r;

if (parser->chunk_ib_idx == -1)
@@ -369,6 +370,10 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
DRM_ERROR("Failed to synchronize rings !\n");
}
parser->ib->vm_id = 0;
+   parser->ib->type = RADEON_IB_TYPE_CS;
+   for (i = 0; i < parser->nrelocs; ++i) {
+   parser->relocs[i].gpu_addr = parser->relocs[i].lobj.gpu_offset;
+   }
r = radeon_ib_schedule(rdev, parser->ib);
if (r) {
DRM_ERROR("

[PATCH] drm/radeon: fix virtual memory locking in case of reset

2012-08-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

Lock/unlock mutex in proper order to avoid deadlock in case
of GPU reset triggered from VM code path.

Cc: stable at vger.kernel.org [3.5]
Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_gart.c |   11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index b372005..7eabb59 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -508,14 +508,19 @@ static void radeon_vm_unbind_locked(struct radeon_device 
*rdev,
while (vm->fence) {
int r;
r = radeon_fence_wait(vm->fence, false);
-   if (r)
+   if (r) {
DRM_ERROR("error while waiting for fence: %d\n", r);
+   }
if (r == -EDEADLK) {
+   /* release mutex and lock in right order */
mutex_unlock(&rdev->vm_manager.lock);
+   mutex_unlock(&vm->mutex);
r = radeon_gpu_reset(rdev);
mutex_lock(&rdev->vm_manager.lock);
-   if (!r)
+   mutex_lock(&vm->mutex);
+   if (!r) {
continue;
+   }
}
break;
}
@@ -551,7 +556,9 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
mutex_lock(&rdev->vm_manager.lock);
/* unbind all active vm */
list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
+   mutex_lock(&vm->mutex);
radeon_vm_unbind_locked(rdev, vm);
+   mutex_unlock(&vm->mutex);
}
rdev->vm_manager.funcs->fini(rdev);
mutex_unlock(&rdev->vm_manager.lock);
-- 
1.7.10.4



[PATCH] drm/radeon: fence virtual address and free it once idle v2

2012-08-03 Thread j.gli...@gmail.com
From: Jerome Glisse 

Virtual address need to be fenced to know when we can safely remove it.
This patch also properly clear the pagetable. Previously it was
serouisly broken.

v2: For to update pagetable when unbinding bo (don't bailout if
bo_va->valid is true).

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|1 +
 drivers/gpu/drm/radeon/radeon_cs.c |   32 +---
 drivers/gpu/drm/radeon/radeon_gart.c   |   24 ++--
 drivers/gpu/drm/radeon/radeon_gem.c|   13 ++---
 drivers/gpu/drm/radeon/radeon_object.c |6 +-
 5 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5431af2..8d75c65 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -300,6 +300,7 @@ struct radeon_bo_va {
uint64_tsoffset;
uint64_teoffset;
uint32_tflags;
+   struct radeon_fence *fence;
boolvalid;
 };

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 8a4c49e..995f3ab 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
return 0;
 }

+static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
+ struct radeon_fence *fence)
+{
+   struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+   struct radeon_vm *vm = &fpriv->vm;
+   struct radeon_bo_list *lobj;
+   int r;
+
+   if (parser->chunk_ib_idx == -1)
+   return;
+   if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
+   return;
+
+   list_for_each_entry(lobj, &parser->validated, tv.head) {
+   struct radeon_bo_va *bo_va;
+   struct radeon_bo *rbo = lobj->bo;
+
+   bo_va = radeon_bo_va(rbo, vm);
+   radeon_fence_unref(&bo_va->fence);
+   bo_va->fence = radeon_fence_ref(fence);
+   }
+   return 0;
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:parser structure holding parsing context.
@@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error)
 {
unsigned i;

-   if (!error)
+   if (!error) {
+   /* fence all bo va before ttm_eu_fence_buffer_objects so bo are 
still reserved */
+   radeon_bo_vm_fence_va(parser, parser->ib.fence);
ttm_eu_fence_buffer_objects(&parser->validated,
parser->ib.fence);
-   else
+   } else {
ttm_eu_backoff_reservation(&parser->validated);
+   }

if (parser->relocs != NULL) {
for (i = 0; i < parser->nrelocs; i++) {
@@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,

if (parser->chunk_ib_idx == -1)
return 0;
-
if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
return 0;

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index b372005..9912182 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
return -EINVAL;
}

-   if (bo_va->valid)
+   if (bo_va->valid && mem)
return 0;

ngpu_pages = radeon_bo_ngpu_pages(bo);
@@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 struct radeon_bo *bo)
 {
struct radeon_bo_va *bo_va;
+   int r;

bo_va = radeon_bo_va(bo, vm);
if (bo_va == NULL)
return 0;

+   /* wait for va use to end */
+   while (bo_va->fence) {
+   r = radeon_fence_wait(bo_va->fence, false);
+   if (r) {
+   DRM_ERROR("error while waiting for fence: %d\n", r);
+   }
+   if (r == -EDEADLK) {
+   r = radeon_gpu_reset(rdev);
+   if (!r)
+   continue;
+   }
+   break;
+   }
+   radeon_fence_unref(&bo_va->fence);
+
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&vm->mutex);
radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
@@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct 
radeon_vm *vm)
radeon_vm_unbind_locked(rdev, vm);
mutex_unlock(&rdev->vm_manager.lock);

-   /* remove all bo */
+   /* remove all bo at this point non are busy any more because unbind
+* waited for the last vm fence to signal
+*/
r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);

[PATCH] drm/radeon: fence virtual address and free it once idle [3.5] v2

2012-08-03 Thread j.gli...@gmail.com
From: Jerome Glisse 

Virtual address need to be fenced to know when we can safely remove it.
This patch also properly clear the pagetable. Previously it was
serouisly broken.

v2: For to update pagetable when unbinding bo (don't bailout if
bo_va->valid is true).

This version is for stable 3.5 only.

cc: stable at vger.kernel.org
Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|  1 +
 drivers/gpu/drm/radeon/radeon_cs.c | 32 +---
 drivers/gpu/drm/radeon/radeon_gart.c   | 24 ++--
 drivers/gpu/drm/radeon/radeon_gem.c| 13 ++---
 drivers/gpu/drm/radeon/radeon_object.c |  6 +-
 5 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index fefcca5..01d2a87 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -323,6 +323,7 @@ struct radeon_bo_va {
uint64_tsoffset;
uint64_teoffset;
uint32_tflags;
+   struct radeon_fence *fence;
boolvalid;
 };

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 142f894..70f6d08 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -294,6 +294,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
return 0;
 }

+static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
+ struct radeon_fence *fence)
+{
+   struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+   struct radeon_vm *vm = &fpriv->vm;
+   struct radeon_bo_list *lobj;
+   int r;
+
+   if (parser->chunk_ib_idx == -1)
+   return;
+   if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
+   return;
+
+   list_for_each_entry(lobj, &parser->validated, tv.head) {
+   struct radeon_bo_va *bo_va;
+   struct radeon_bo *rbo = lobj->bo;
+
+   bo_va = radeon_bo_va(rbo, vm);
+   radeon_fence_unref(&bo_va->fence);
+   bo_va->fence = radeon_fence_ref(fence);
+   }
+   return 0;
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:parser structure holding parsing context.
@@ -306,11 +330,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error)
 {
unsigned i;

-   if (!error)
+   if (!error) {
+   /* fence all bo va before ttm_eu_fence_buffer_objects so bo are 
still reserved */
+   radeon_bo_vm_fence_va(parser, parser->ib.fence);
ttm_eu_fence_buffer_objects(&parser->validated,
parser->ib.fence);
-   else
+   } else {
ttm_eu_backoff_reservation(&parser->validated);
+   }

if (parser->relocs != NULL) {
for (i = 0; i < parser->nrelocs; i++) {
@@ -407,7 +434,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,

if (parser->chunk_ib_idx == -1)
return 0;
-
if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
return 0;

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index 84b648a..f651f22 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -564,7 +564,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
return -EINVAL;
}

-   if (bo_va->valid)
+   if (bo_va->valid && mem)
return 0;

ngpu_pages = radeon_bo_ngpu_pages(bo);
@@ -597,11 +597,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 struct radeon_bo *bo)
 {
struct radeon_bo_va *bo_va;
+   int r;

bo_va = radeon_bo_va(bo, vm);
if (bo_va == NULL)
return 0;

+   /* wait for va use to end */
+   while (bo_va->fence) {
+   r = radeon_fence_wait(bo_va->fence, false);
+   if (r) {
+   DRM_ERROR("error while waiting for fence: %d\n", r);
+   }
+   if (r == -EDEADLK) {
+   r = radeon_gpu_reset(rdev);
+   if (!r)
+   continue;
+   }
+   break;
+   }
+   radeon_fence_unref(&bo_va->fence);
+
radeon_mutex_lock(&rdev->cs_mutex);
mutex_lock(&vm->mutex);
radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
@@ -661,12 +677,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct 
radeon_vm *vm)
radeon_vm_unbind_locked(rdev, vm);
radeon_mutex_unlock(&rdev->cs_mutex);

-   /* remove all bo */
+   /* remove all bo at this point non are busy any more because unbind
+* waited for the last vm fence to signal
+*/
 

[PATCH] drm/radeon: fence virtual address and free it once idle v3

2012-08-03 Thread j.gli...@gmail.com
From: Jerome Glisse 

Virtual address need to be fenced to know when we can safely remove it.
This patch also properly clear the pagetable. Previously it was
serouisly broken.

Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex locking.

v2: For to update pagetable when unbinding bo (don't bailout if
bo_va->valid is true).
v3: Add kernel 3.5/3.4 comment.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|1 +
 drivers/gpu/drm/radeon/radeon_cs.c |   32 +---
 drivers/gpu/drm/radeon/radeon_gart.c   |   24 ++--
 drivers/gpu/drm/radeon/radeon_gem.c|   13 ++---
 drivers/gpu/drm/radeon/radeon_object.c |6 +-
 5 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5431af2..8d75c65 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -300,6 +300,7 @@ struct radeon_bo_va {
uint64_tsoffset;
uint64_teoffset;
uint32_tflags;
+   struct radeon_fence *fence;
boolvalid;
 };

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 8a4c49e..995f3ab 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
return 0;
 }

+static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
+ struct radeon_fence *fence)
+{
+   struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+   struct radeon_vm *vm = &fpriv->vm;
+   struct radeon_bo_list *lobj;
+   int r;
+
+   if (parser->chunk_ib_idx == -1)
+   return;
+   if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
+   return;
+
+   list_for_each_entry(lobj, &parser->validated, tv.head) {
+   struct radeon_bo_va *bo_va;
+   struct radeon_bo *rbo = lobj->bo;
+
+   bo_va = radeon_bo_va(rbo, vm);
+   radeon_fence_unref(&bo_va->fence);
+   bo_va->fence = radeon_fence_ref(fence);
+   }
+   return 0;
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:parser structure holding parsing context.
@@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error)
 {
unsigned i;

-   if (!error)
+   if (!error) {
+   /* fence all bo va before ttm_eu_fence_buffer_objects so bo are 
still reserved */
+   radeon_bo_vm_fence_va(parser, parser->ib.fence);
ttm_eu_fence_buffer_objects(&parser->validated,
parser->ib.fence);
-   else
+   } else {
ttm_eu_backoff_reservation(&parser->validated);
+   }

if (parser->relocs != NULL) {
for (i = 0; i < parser->nrelocs; i++) {
@@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,

if (parser->chunk_ib_idx == -1)
return 0;
-
if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
return 0;

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index b372005..9912182 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
return -EINVAL;
}

-   if (bo_va->valid)
+   if (bo_va->valid && mem)
return 0;

ngpu_pages = radeon_bo_ngpu_pages(bo);
@@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 struct radeon_bo *bo)
 {
struct radeon_bo_va *bo_va;
+   int r;

bo_va = radeon_bo_va(bo, vm);
if (bo_va == NULL)
return 0;

+   /* wait for va use to end */
+   while (bo_va->fence) {
+   r = radeon_fence_wait(bo_va->fence, false);
+   if (r) {
+   DRM_ERROR("error while waiting for fence: %d\n", r);
+   }
+   if (r == -EDEADLK) {
+   r = radeon_gpu_reset(rdev);
+   if (!r)
+   continue;
+   }
+   break;
+   }
+   radeon_fence_unref(&bo_va->fence);
+
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&vm->mutex);
radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
@@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct 
radeon_vm *vm)
radeon_vm_unbind_locked(rdev, vm);
mutex_unlock(&rdev->vm_manager.lock);

-   /* remove all bo */
+   /* remove all bo at this point non are busy any more because unbind
+  

[PATCH] drm/radeon: fence virtual address and free it once idle [3.5] v3

2012-08-06 Thread j.gli...@gmail.com
From: Jerome Glisse 

Virtual address need to be fenced to know when we can safely remove it.
This patch also properly clear the pagetable. Previously it was
serouisly broken.

v2: For to update pagetable when unbinding bo (don't bailout if
bo_va->valid is true).
v3: Fix compilation warnings

This version is for stable 3.5 only.

cc: stable at vger.kernel.org
Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|  1 +
 drivers/gpu/drm/radeon/radeon_cs.c | 30 +++---
 drivers/gpu/drm/radeon/radeon_gart.c   | 24 ++--
 drivers/gpu/drm/radeon/radeon_gem.c| 13 ++---
 drivers/gpu/drm/radeon/radeon_object.c |  6 +-
 5 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index fefcca5..01d2a87 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -323,6 +323,7 @@ struct radeon_bo_va {
uint64_tsoffset;
uint64_teoffset;
uint32_tflags;
+   struct radeon_fence *fence;
boolvalid;
 };

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 142f894..3680df0 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -294,6 +294,28 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
return 0;
 }

+static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
+ struct radeon_fence *fence)
+{
+   struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+   struct radeon_vm *vm = &fpriv->vm;
+   struct radeon_bo_list *lobj;
+
+   if (parser->chunk_ib_idx == -1)
+   return;
+   if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
+   return;
+
+   list_for_each_entry(lobj, &parser->validated, tv.head) {
+   struct radeon_bo_va *bo_va;
+   struct radeon_bo *rbo = lobj->bo;
+
+   bo_va = radeon_bo_va(rbo, vm);
+   radeon_fence_unref(&bo_va->fence);
+   bo_va->fence = radeon_fence_ref(fence);
+   }
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:parser structure holding parsing context.
@@ -306,11 +328,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error)
 {
unsigned i;

-   if (!error)
+   if (!error) {
+   /* fence all bo va before ttm_eu_fence_buffer_objects so bo are 
still reserved */
+   radeon_bo_vm_fence_va(parser, parser->ib.fence);
ttm_eu_fence_buffer_objects(&parser->validated,
parser->ib.fence);
-   else
+   } else {
ttm_eu_backoff_reservation(&parser->validated);
+   }

if (parser->relocs != NULL) {
for (i = 0; i < parser->nrelocs; i++) {
@@ -407,7 +432,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,

if (parser->chunk_ib_idx == -1)
return 0;
-
if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
return 0;

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index 84b648a..f651f22 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -564,7 +564,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
return -EINVAL;
}

-   if (bo_va->valid)
+   if (bo_va->valid && mem)
return 0;

ngpu_pages = radeon_bo_ngpu_pages(bo);
@@ -597,11 +597,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 struct radeon_bo *bo)
 {
struct radeon_bo_va *bo_va;
+   int r;

bo_va = radeon_bo_va(bo, vm);
if (bo_va == NULL)
return 0;

+   /* wait for va use to end */
+   while (bo_va->fence) {
+   r = radeon_fence_wait(bo_va->fence, false);
+   if (r) {
+   DRM_ERROR("error while waiting for fence: %d\n", r);
+   }
+   if (r == -EDEADLK) {
+   r = radeon_gpu_reset(rdev);
+   if (!r)
+   continue;
+   }
+   break;
+   }
+   radeon_fence_unref(&bo_va->fence);
+
radeon_mutex_lock(&rdev->cs_mutex);
mutex_lock(&vm->mutex);
radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
@@ -661,12 +677,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct 
radeon_vm *vm)
radeon_vm_unbind_locked(rdev, vm);
radeon_mutex_unlock(&rdev->cs_mutex);

-   /* remove all bo */
+   /* remove all bo at this point non are busy any more because unbind
+* waited for the last vm fence to signal
+*/
   

[PATCH] drm/radeon: fence virtual address and free it once idle v4

2012-08-06 Thread j.gli...@gmail.com
From: Jerome Glisse 

Virtual address need to be fenced to know when we can safely remove it.
This patch also properly clear the pagetable. Previously it was
serouisly broken.

Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex locking.

v2: For to update pagetable when unbinding bo (don't bailout if
bo_va->valid is true).
v3: Add kernel 3.5/3.4 comment.
v4: Fix compilation warnings.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|  1 +
 drivers/gpu/drm/radeon/radeon_cs.c | 32 +---
 drivers/gpu/drm/radeon/radeon_gart.c   | 24 ++--
 drivers/gpu/drm/radeon/radeon_gem.c| 13 ++---
 drivers/gpu/drm/radeon/radeon_object.c |  6 +-
 5 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5431af2..8d75c65 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -300,6 +300,7 @@ struct radeon_bo_va {
uint64_tsoffset;
uint64_teoffset;
uint32_tflags;
+   struct radeon_fence *fence;
boolvalid;
 };

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 8a4c49e..b4a0db24 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
return 0;
 }

+static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
+ struct radeon_fence *fence)
+{
+   struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+   struct radeon_vm *vm = &fpriv->vm;
+   struct radeon_bo_list *lobj;
+
+   if (parser->chunk_ib_idx == -1) {
+   return;
+   }
+   if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) {
+   return;
+   }
+
+   list_for_each_entry(lobj, &parser->validated, tv.head) {
+   struct radeon_bo_va *bo_va;
+   struct radeon_bo *rbo = lobj->bo;
+
+   bo_va = radeon_bo_va(rbo, vm);
+   radeon_fence_unref(&bo_va->fence);
+   bo_va->fence = radeon_fence_ref(fence);
+   }
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:parser structure holding parsing context.
@@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error)
 {
unsigned i;

-   if (!error)
+   if (!error) {
+   /* fence all bo va before ttm_eu_fence_buffer_objects so bo are 
still reserved */
+   radeon_bo_vm_fence_va(parser, parser->ib.fence);
ttm_eu_fence_buffer_objects(&parser->validated,
parser->ib.fence);
-   else
+   } else {
ttm_eu_backoff_reservation(&parser->validated);
+   }

if (parser->relocs != NULL) {
for (i = 0; i < parser->nrelocs; i++) {
@@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,

if (parser->chunk_ib_idx == -1)
return 0;
-
if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
return 0;

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index b372005..9912182 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
return -EINVAL;
}

-   if (bo_va->valid)
+   if (bo_va->valid && mem)
return 0;

ngpu_pages = radeon_bo_ngpu_pages(bo);
@@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 struct radeon_bo *bo)
 {
struct radeon_bo_va *bo_va;
+   int r;

bo_va = radeon_bo_va(bo, vm);
if (bo_va == NULL)
return 0;

+   /* wait for va use to end */
+   while (bo_va->fence) {
+   r = radeon_fence_wait(bo_va->fence, false);
+   if (r) {
+   DRM_ERROR("error while waiting for fence: %d\n", r);
+   }
+   if (r == -EDEADLK) {
+   r = radeon_gpu_reset(rdev);
+   if (!r)
+   continue;
+   }
+   break;
+   }
+   radeon_fence_unref(&bo_va->fence);
+
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&vm->mutex);
radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
@@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct 
radeon_vm *vm)
radeon_vm_unbind_locked(rdev, vm);
mutex_unlock(&rdev->vm_manager.lock);

-   /* remove all bo */
+   /* remove all bo at this point non are busy any more because un

[PATCH] drm/radeon: fence virtual address and free it once idle [3.5] v4

2012-08-06 Thread j.gli...@gmail.com
From: Jerome Glisse 

Virtual address need to be fenced to know when we can safely remove it.
This patch also properly clear the pagetable. Previously it was
serouisly broken.

v2: For to update pagetable when unbinding bo (don't bailout if
bo_va->valid is true).
v3: Fix compilation warnings
v4: We need a special version for 3.5 because the locking scheme
is different btw 3.5 and 3.6. There is no longer cs mutex in
3.6 instead there is a global vm mutex.

This version is for stable 3.5 only.

cc: stable at vger.kernel.org
Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|  1 +
 drivers/gpu/drm/radeon/radeon_cs.c | 30 +++---
 drivers/gpu/drm/radeon/radeon_gart.c   | 24 ++--
 drivers/gpu/drm/radeon/radeon_gem.c| 13 ++---
 drivers/gpu/drm/radeon/radeon_object.c |  6 +-
 5 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index fefcca5..01d2a87 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -323,6 +323,7 @@ struct radeon_bo_va {
uint64_tsoffset;
uint64_teoffset;
uint32_tflags;
+   struct radeon_fence *fence;
boolvalid;
 };

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index 142f894..3680df0 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -294,6 +294,28 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void 
*data)
return 0;
 }

+static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
+ struct radeon_fence *fence)
+{
+   struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+   struct radeon_vm *vm = &fpriv->vm;
+   struct radeon_bo_list *lobj;
+
+   if (parser->chunk_ib_idx == -1)
+   return;
+   if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
+   return;
+
+   list_for_each_entry(lobj, &parser->validated, tv.head) {
+   struct radeon_bo_va *bo_va;
+   struct radeon_bo *rbo = lobj->bo;
+
+   bo_va = radeon_bo_va(rbo, vm);
+   radeon_fence_unref(&bo_va->fence);
+   bo_va->fence = radeon_fence_ref(fence);
+   }
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:parser structure holding parsing context.
@@ -306,11 +328,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser 
*parser, int error)
 {
unsigned i;

-   if (!error)
+   if (!error) {
+   /* fence all bo va before ttm_eu_fence_buffer_objects so bo are 
still reserved */
+   radeon_bo_vm_fence_va(parser, parser->ib.fence);
ttm_eu_fence_buffer_objects(&parser->validated,
parser->ib.fence);
-   else
+   } else {
ttm_eu_backoff_reservation(&parser->validated);
+   }

if (parser->relocs != NULL) {
for (i = 0; i < parser->nrelocs; i++) {
@@ -407,7 +432,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,

if (parser->chunk_ib_idx == -1)
return 0;
-
if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
return 0;

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index 84b648a..f651f22 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -564,7 +564,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
return -EINVAL;
}

-   if (bo_va->valid)
+   if (bo_va->valid && mem)
return 0;

ngpu_pages = radeon_bo_ngpu_pages(bo);
@@ -597,11 +597,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 struct radeon_bo *bo)
 {
struct radeon_bo_va *bo_va;
+   int r;

bo_va = radeon_bo_va(bo, vm);
if (bo_va == NULL)
return 0;

+   /* wait for va use to end */
+   while (bo_va->fence) {
+   r = radeon_fence_wait(bo_va->fence, false);
+   if (r) {
+   DRM_ERROR("error while waiting for fence: %d\n", r);
+   }
+   if (r == -EDEADLK) {
+   r = radeon_gpu_reset(rdev);
+   if (!r)
+   continue;
+   }
+   break;
+   }
+   radeon_fence_unref(&bo_va->fence);
+
radeon_mutex_lock(&rdev->cs_mutex);
mutex_lock(&vm->mutex);
radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
@@ -661,12 +677,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct 
radeon_vm *vm)
radeon_vm_unbind_locked(rdev, vm);
radeon_mutex_unlock(&rdev->cs_mutex);

[PATCH] drm/radeon: delay virtual address destruction to bo destruction

2012-08-08 Thread j.gli...@gmail.com
From: Jerome Glisse 

Use the ttm bo delayed destruction queue so that we don't block
userspace when destroying bo. The virtual address destruction
will happen at same time as the real bo destruction when everythings
using the bo is done.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_drv.c |2 +-
 drivers/gpu/drm/radeon/radeon_gem.c |   20 
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_drv.c 
b/drivers/gpu/drm/radeon/radeon_drv.c
index dcea6f0..38443e7 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -368,7 +368,7 @@ static struct drm_driver kms_driver = {
.gem_init_object = radeon_gem_object_init,
.gem_free_object = radeon_gem_object_free,
.gem_open_object = radeon_gem_object_open,
-   .gem_close_object = radeon_gem_object_close,
+   .gem_close_object = NULL,
.dma_ioctl = radeon_dma_ioctl_kms,
.dumb_create = radeon_mode_dumb_create,
.dumb_map_offset = radeon_mode_dumb_mmap,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
b/drivers/gpu/drm/radeon/radeon_gem.c
index 1b57b00..b5835c8 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -127,26 +127,6 @@ int radeon_gem_object_open(struct drm_gem_object *obj, 
struct drm_file *file_pri
return 0;
 }

-void radeon_gem_object_close(struct drm_gem_object *obj,
-struct drm_file *file_priv)
-{
-   struct radeon_bo *rbo = gem_to_radeon_bo(obj);
-   struct radeon_device *rdev = rbo->rdev;
-   struct radeon_fpriv *fpriv = file_priv->driver_priv;
-   struct radeon_vm *vm = &fpriv->vm;
-
-   if (rdev->family < CHIP_CAYMAN) {
-   return;
-   }
-
-   if (radeon_bo_reserve(rbo, false)) {
-   dev_err(rdev->dev, "leaking bo va because we fail to reserve 
bo\n");
-   return;
-   }
-   radeon_vm_bo_rmv(rdev, vm, rbo);
-   radeon_bo_unreserve(rbo);
-}
-
 static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r)
 {
if (r == -EDEADLK) {
-- 
1.7.10.4



[PATCH] drm/edid: limit printk when facing bad edid

2012-08-09 Thread j.gli...@gmail.com
From: Jerome Glisse 

Limit printing bad edid information at one time per connector.
Connector that are connected to a bad monitor/kvm will likely
stay connected to the same bad monitor/kvm and it makes no
sense to keep printing the bad edid message.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/drm_edid.c  | 22 ++
 drivers/gpu/drm/drm_edid_load.c |  6 --
 include/drm/drm_crtc.h  |  3 ++-
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index a8743c3..7380ee3 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -158,7 +158,7 @@ MODULE_PARM_DESC(edid_fixup,
  * Sanity check the EDID block (base or extension).  Return 0 if the block
  * doesn't check out, or 1 if it's valid.
  */
-bool drm_edid_block_valid(u8 *raw_edid, int block)
+bool drm_edid_block_valid(u8 *raw_edid, int block, bool print_bad_edid)
 {
int i;
u8 csum = 0;
@@ -181,7 +181,9 @@ bool drm_edid_block_valid(u8 *raw_edid, int block)
for (i = 0; i < EDID_LENGTH; i++)
csum += raw_edid[i];
if (csum) {
-   DRM_ERROR("EDID checksum is invalid, remainder is %d\n", csum);
+   if (print_bad_edid) {
+   DRM_ERROR("EDID checksum is invalid, remainder is 
%d\n", csum);
+   }

/* allow CEA to slide through, switches mangle this */
if (raw_edid[0] != 0x02)
@@ -207,7 +209,7 @@ bool drm_edid_block_valid(u8 *raw_edid, int block)
return 1;

 bad:
-   if (raw_edid) {
+   if (raw_edid && print_bad_edid) {
printk(KERN_ERR "Raw EDID:\n");
print_hex_dump(KERN_ERR, " \t", DUMP_PREFIX_NONE, 16, 1,
   raw_edid, EDID_LENGTH, false);
@@ -231,7 +233,7 @@ bool drm_edid_is_valid(struct edid *edid)
return false;

for (i = 0; i <= edid->extensions; i++)
-   if (!drm_edid_block_valid(raw + i * EDID_LENGTH, i))
+   if (!drm_edid_block_valid(raw + i * EDID_LENGTH, i, true))
return false;

return true;
@@ -303,6 +305,7 @@ drm_do_get_edid(struct drm_connector *connector, struct 
i2c_adapter *adapter)
 {
int i, j = 0, valid_extensions = 0;
u8 *block, *new;
+   bool print_bad_edid = !connector->bad_edid_counter || (drm_debug & 
DRM_UT_KMS);

if ((block = kmalloc(EDID_LENGTH, GFP_KERNEL)) == NULL)
return NULL;
@@ -311,7 +314,7 @@ drm_do_get_edid(struct drm_connector *connector, struct 
i2c_adapter *adapter)
for (i = 0; i < 4; i++) {
if (drm_do_probe_ddc_edid(adapter, block, 0, EDID_LENGTH))
goto out;
-   if (drm_edid_block_valid(block, 0))
+   if (drm_edid_block_valid(block, 0, print_bad_edid))
break;
if (i == 0 && drm_edid_is_zero(block, EDID_LENGTH)) {
connector->null_edid_counter++;
@@ -336,7 +339,7 @@ drm_do_get_edid(struct drm_connector *connector, struct 
i2c_adapter *adapter)
  block + (valid_extensions + 1) * EDID_LENGTH,
  j, EDID_LENGTH))
goto out;
-   if (drm_edid_block_valid(block + (valid_extensions + 1) 
* EDID_LENGTH, j)) {
+   if (drm_edid_block_valid(block + (valid_extensions + 1) 
* EDID_LENGTH, j, print_bad_edid)) {
valid_extensions++;
break;
}
@@ -359,8 +362,11 @@ drm_do_get_edid(struct drm_connector *connector, struct 
i2c_adapter *adapter)
return block;

 carp:
-   dev_warn(connector->dev->dev, "%s: EDID block %d invalid.\n",
-drm_get_connector_name(connector), j);
+   if (print_bad_edid) {
+   dev_warn(connector->dev->dev, "%s: EDID block %d invalid.\n",
+drm_get_connector_name(connector), j);
+   }
+   connector->bad_edid_counter++;

 out:
kfree(block);
diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c
index 66d4a28..14f46dd 100644
--- a/drivers/gpu/drm/drm_edid_load.c
+++ b/drivers/gpu/drm/drm_edid_load.c
@@ -123,6 +123,7 @@ static int edid_load(struct drm_connector *connector, char 
*name,
int fwsize, expected;
int builtin = 0, err = 0;
int i, valid_extensions = 0;
+   bool print_bad_edid = !connector->bad_edid_counter || (drm_debug & 
DRM_UT_KMS);

pdev = platform_device_register_simple(connector_name, -1, NULL, 0);
if (IS_ERR(pdev)) {
@@ -173,7 +174,8 @@ static int edid_load(struct drm_connector *connector, char 
*name,
}
memcpy(edid, fwdata, fwsize);

-   if (!drm_edid_block_valid(edid, 0)) {
+   if (!drm_edid_block_valid(edid, 0, print_bad_edid)) {
+   

[PATCH] drm/radeon: avoid turning off spread spectrum for used pll

2012-08-17 Thread j.gli...@gmail.com
From: Jerome Glisse 

If spread spectrum is enabled and in use for a given pll we
should not turn it off as it will lead to turning off display
for crtc that use the pll (this behavior was observed on chelsea
edp).

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/atombios_crtc.c |   25 +
 1 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c 
b/drivers/gpu/drm/radeon/atombios_crtc.c
index c6fcb5b..cb18813 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -444,11 +444,28 @@ union atom_enable_ss {
 static void atombios_crtc_program_ss(struct radeon_device *rdev,
 int enable,
 int pll_id,
+int crtc_id,
 struct radeon_atom_ss *ss)
 {
+   unsigned i;
int index = GetIndexIntoMasterTable(COMMAND, 
EnableSpreadSpectrumOnPPLL);
union atom_enable_ss args;

+   if (!enable) {
+   for (i = 0; i < 6; i++) {
+   if (rdev->mode_info.crtcs[i] &&
+   rdev->mode_info.crtcs[i]->enabled &&
+   i != crtc_id &&
+   pll_id == rdev->mode_info.crtcs[i]->pll_id) {
+   /* one other crtc is using this pll don't turn
+* off spread spectrum as it might turn off
+* display on active crtc
+*/
+   return;
+   }
+   }
+   }
+
memset(&args, 0, sizeof(args));

if (ASIC_IS_DCE5(rdev)) {
@@ -1028,7 +1045,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, 
struct drm_display_mode
radeon_compute_pll_legacy(pll, adjusted_clock, &pll_clock, 
&fb_div, &frac_fb_div,
  &ref_div, &post_div);

-   atombios_crtc_program_ss(rdev, ATOM_DISABLE, radeon_crtc->pll_id, &ss);
+   atombios_crtc_program_ss(rdev, ATOM_DISABLE, radeon_crtc->pll_id, 
radeon_crtc->crtc_id, &ss);

atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, 
radeon_crtc->pll_id,
  encoder_mode, radeon_encoder->encoder_id, 
mode->clock,
@@ -1051,7 +1068,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, 
struct drm_display_mode
ss.step = step_size;
}

-   atombios_crtc_program_ss(rdev, ATOM_ENABLE, 
radeon_crtc->pll_id, &ss);
+   atombios_crtc_program_ss(rdev, ATOM_ENABLE, 
radeon_crtc->pll_id, radeon_crtc->crtc_id, &ss);
}
 }

@@ -1572,11 +1589,11 @@ void radeon_atom_disp_eng_pll_init(struct radeon_device 
*rdev)
   
ASIC_INTERNAL_SS_ON_DCPLL,
   
rdev->clock.default_dispclk);
if (ss_enabled)
-   atombios_crtc_program_ss(rdev, ATOM_DISABLE, 
ATOM_DCPLL, &ss);
+   atombios_crtc_program_ss(rdev, ATOM_DISABLE, 
ATOM_DCPLL, -1, &ss);
/* XXX: DCE5, make sure voltage, dispclk is high enough */
atombios_crtc_set_disp_eng_pll(rdev, 
rdev->clock.default_dispclk);
if (ss_enabled)
-   atombios_crtc_program_ss(rdev, ATOM_ENABLE, ATOM_DCPLL, 
&ss);
+   atombios_crtc_program_ss(rdev, ATOM_ENABLE, ATOM_DCPLL, 
-1, &ss);
}

 }
-- 
1.7.1



[PATCH] drm/radeon: force dma32 on rs400, rs690, rs740 IGP

2012-08-28 Thread j.gli...@gmail.com
From: Jerome Glisse 

It seems some of those IGP dislike non dma32 page.

https://bugzilla.redhat.com/show_bug.cgi?id=785375

Signed-off-by: Jerome Glisse 
Cc: 
---
 drivers/gpu/drm/radeon/radeon_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index 066c98b..8867400 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -774,7 +774,7 @@ int radeon_device_init(struct radeon_device *rdev,
if (rdev->flags & RADEON_IS_AGP)
rdev->need_dma32 = true;
if ((rdev->flags & RADEON_IS_PCI) &&
-   (rdev->family < CHIP_RS400))
+   (rdev->family <= CHIP_RS740))
rdev->need_dma32 = true;

dma_bits = rdev->need_dma32 ? 32 : 40;
-- 
1.7.11.2



[PATCH] drm/radeon: force dma32 to fix regression rs4xx,rs6xx,rs740

2012-08-28 Thread j.gli...@gmail.com
From: Jerome Glisse 

It seems some of those IGP dislike non dma32 page despite what
documentation says. Fix regression since we allowed non dma32
pages. It seems it only affect some revision of those IGP chips
as we don't know which one just force dma32 for all of them.

https://bugzilla.redhat.com/show_bug.cgi?id=785375

Signed-off-by: Jerome Glisse 
Cc: 
---
 drivers/gpu/drm/radeon/radeon_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index 066c98b..8867400 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -774,7 +774,7 @@ int radeon_device_init(struct radeon_device *rdev,
if (rdev->flags & RADEON_IS_AGP)
rdev->need_dma32 = true;
if ((rdev->flags & RADEON_IS_PCI) &&
-   (rdev->family < CHIP_RS400))
+   (rdev->family <= CHIP_RS740))
rdev->need_dma32 = true;

dma_bits = rdev->need_dma32 ? 32 : 40;
-- 
1.7.11.2



[PATCH] drm/radeon: fix amd afusion gpu setup aka sumo v2

2012-12-11 Thread j.gli...@gmail.com
From: Jerome Glisse 

Set the proper number of tile pipe that should be a multiple of
pipe depending on the number of se engine.

Fix:
https://bugs.freedesktop.org/show_bug.cgi?id=56405
https://bugs.freedesktop.org/show_bug.cgi?id=56720

v2: Don't change sumo2

Signed-off-by: Jerome Glisse 
Cc: stable at vger.kernel.org
---
 drivers/gpu/drm/radeon/evergreen.c  | 8 
 drivers/gpu/drm/radeon/evergreend.h | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 14313ad..b957de1 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1819,7 +1819,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
case CHIP_SUMO:
rdev->config.evergreen.num_ses = 1;
rdev->config.evergreen.max_pipes = 4;
-   rdev->config.evergreen.max_tile_pipes = 2;
+   rdev->config.evergreen.max_tile_pipes = 4;
if (rdev->pdev->device == 0x9648)
rdev->config.evergreen.max_simds = 3;
else if ((rdev->pdev->device == 0x9647) ||
@@ -1842,7 +1842,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
rdev->config.evergreen.sc_prim_fifo_size = 0x40;
rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
-   gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN;
+   gb_addr_config = SUMO_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_SUMO2:
rdev->config.evergreen.num_ses = 1;
@@ -1864,7 +1864,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
rdev->config.evergreen.sc_prim_fifo_size = 0x40;
rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
-   gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN;
+   gb_addr_config = SUMO2_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_BARTS:
rdev->config.evergreen.num_ses = 2;
@@ -1912,7 +1912,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
break;
case CHIP_CAICOS:
rdev->config.evergreen.num_ses = 1;
-   rdev->config.evergreen.max_pipes = 4;
+   rdev->config.evergreen.max_pipes = 2;
rdev->config.evergreen.max_tile_pipes = 2;
rdev->config.evergreen.max_simds = 2;
rdev->config.evergreen.max_backends = 1 * 
rdev->config.evergreen.num_ses;
diff --git a/drivers/gpu/drm/radeon/evergreend.h 
b/drivers/gpu/drm/radeon/evergreend.h
index df542f1..52c89c9 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -45,6 +45,8 @@
 #define TURKS_GB_ADDR_CONFIG_GOLDEN  0x02010002
 #define CEDAR_GB_ADDR_CONFIG_GOLDEN  0x02010001
 #define CAICOS_GB_ADDR_CONFIG_GOLDEN 0x02010001
+#define SUMO_GB_ADDR_CONFIG_GOLDEN   0x02010002
+#define SUMO2_GB_ADDR_CONFIG_GOLDEN  0x02010002

 /* Registers */

-- 
1.7.11.7



[PATCH] drm/radeon: fix fence driver for dma ring when wb is disabled

2012-12-12 Thread j.gli...@gmail.com
From: Jerome Glisse 

The dma ring can't write to register thus have to write to memory
its fence value. This ensure that it doesn't try to use scratch
register for dma ring fence driver.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/r600.c | 3 ++-
 drivers/gpu/drm/radeon/radeon_fence.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index a76eca1..2aaf147 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2533,11 +2533,12 @@ void r600_dma_fence_ring_emit(struct radeon_device 
*rdev,
 {
struct radeon_ring *ring = &rdev->ring[fence->ring];
u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+
/* write the fence */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
radeon_ring_write(ring, addr & 0xfffc);
radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
-   radeon_ring_write(ring, fence->seq);
+   radeon_ring_write(ring, lower_32_bits(fence->seq));
/* generate an interrupt */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
 }
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index 22bd6c2..410a975 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -772,7 +772,7 @@ int radeon_fence_driver_start_ring(struct radeon_device 
*rdev, int ring)
int r;

radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
-   if (rdev->wb.use_event) {
+   if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, 
&rdev->ring[ring])) {
rdev->fence_drv[ring].scratch_reg = 0;
index = R600_WB_EVENT_OFFSET + ring * 4;
} else {
-- 
1.8.0



[PATCH] drm/radeon: fix htile buffer size computation for command stream checker

2012-12-13 Thread j.gli...@gmail.com
From: Jerome Glisse 

Fix the size computation of the htile buffer.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/evergreen_cs.c | 17 +--
 drivers/gpu/drm/radeon/r600_cs.c  | 92 ---
 drivers/gpu/drm/radeon/radeon_drv.c   |  3 +-
 3 files changed, 35 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c 
b/drivers/gpu/drm/radeon/evergreen_cs.c
index 62c2271..fc7e613 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -507,20 +507,28 @@ static int evergreen_cs_track_validate_htile(struct 
radeon_cs_parser *p,
/* height is npipes htiles aligned == npipes * 8 pixel aligned 
*/
nby = round_up(nby, track->npipes * 8);
} else {
+   /* always assume 8x8 htile */
+   /* align is htile align * 8, htile align vary according to
+* number of pipe and tile width and nby
+*/
switch (track->npipes) {
case 8:
+   /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 64 * 8);
break;
case 4:
+   /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 32 * 8);
break;
case 2:
+   /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
case 1:
+   /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
@@ -531,9 +539,10 @@ static int evergreen_cs_track_validate_htile(struct 
radeon_cs_parser *p,
}
}
/* compute number of htile */
-   nbx = nbx / 8;
-   nby = nby / 8;
-   size = nbx * nby * 4;
+   nbx = nbx >> 3;
+   nby = nby >> 3;
+   /* size must be aligned on npipes * 2K boundary */
+   size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
size += track->htile_offset;

if (size > radeon_bo_size(track->htile_bo)) {
@@ -1790,6 +1799,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser 
*p, u32 reg, u32 idx)
case DB_HTILE_SURFACE:
/* 8x8 only */
track->htile_surface = radeon_get_ib_value(p, idx);
+   /* force 8x8 htile width and height */
+   ib[idx] |= 3;
track->db_dirty = true;
break;
case CB_IMMED0_BASE:
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 5d6e7f9..0b4d833 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -657,87 +657,30 @@ static int r600_cs_track_validate_db(struct 
radeon_cs_parser *p)
/* nby is npipes htiles aligned == npipes * 8 pixel 
aligned */
nby = round_up(nby, track->npipes * 8);
} else {
-   /* htile widht & nby (8 or 4) make 2 bits number */
-   tmp = track->htile_surface & 3;
+   /* always assume 8x8 htile */
/* align is htile align * 8, htile align vary according 
to
 * number of pipe and tile width and nby
 */
switch (track->npipes) {
case 8:
-   switch (tmp) {
-   case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
-   nbx = round_up(nbx, 64 * 8);
-   nby = round_up(nby, 64 * 8);
-   break;
-   case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
-   case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
-   nbx = round_up(nbx, 64 * 8);
-   nby = round_up(nby, 32 * 8);
-   break;
-   case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
-   nbx = round_up(nbx, 32 * 8);
-   nby = round_up(nby, 32 * 8);
-   break;
-   default:
-   return -EINVAL;
-   }
+   /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+   nbx = round_up(nbx, 64 * 8);
+   nby = round_up(nby, 64 * 8);
  

[PATCH] drm/radeon: resume fence driver to last sync sequence on lockup

2012-12-14 Thread j.gli...@gmail.com
From: Jerome Glisse 

After lockup we need to resume fence to last sync sequence and not
last received sequence so that all thread waiting on command stream
that lockedup resume. Otherwise GPU reset will be ineffective in most
cases.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_fence.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index 22bd6c2..38233e7 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -787,7 +787,7 @@ int radeon_fence_driver_start_ring(struct radeon_device 
*rdev, int ring)
}
rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
-   radeon_fence_write(rdev, 
atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
+   radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
rdev->fence_drv[ring].initialized = true;
dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and 
cpu addr 0x%p\n",
 ring, rdev->fence_drv[ring].gpu_addr, 
rdev->fence_drv[ring].cpu_addr);
-- 
1.7.11.7



[PATCH] drm/radeon: restore modeset late in GPU reset path

2012-12-14 Thread j.gli...@gmail.com
From: Jerome Glisse 

Modeset path seems to conflict sometimes with the memory management
leading to kernel deadlock. This move modesetting reset after GPU
acceleration reset.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f88..ffd5534 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1337,7 +1337,6 @@ retry:
}

radeon_restore_bios_scratch_regs(rdev);
-   drm_helper_resume_force_mode(rdev->ddev);

if (!r) {
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -1362,6 +1361,8 @@ retry:
}
}

+   drm_helper_resume_force_mode(rdev->ddev);
+
ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
if (r) {
/* bad news, how to tell it to userspace ? */
-- 
1.7.11.7



[PATCH] drm/radeon: don't leave fence blocked process on failed GPU reset

2012-12-17 Thread j.gli...@gmail.com
From: Jerome Glisse 

Force all fence to signal if GPU reset failed so no process get stuck
on waiting fence.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|  1 +
 drivers/gpu/drm/radeon/radeon_device.c |  1 +
 drivers/gpu/drm/radeon/radeon_fence.c  | 19 +++
 3 files changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5d68346..9c7625c 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -225,6 +225,7 @@ struct radeon_fence {
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
+void radeon_fence_driver_force_completion(struct radeon_device *rdev);
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, 
int ring);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f88..774fae7 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1357,6 +1357,7 @@ retry:
}
}
} else {
+   radeon_fence_driver_force_completion(rdev);
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
kfree(ring_data[i]);
}
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index 22bd6c2..bf7b20e 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -868,6 +868,25 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
mutex_unlock(&rdev->ring_lock);
 }

+/**
+ * radeon_fence_driver_force_completion - force all fence waiter to complete
+ *
+ * @rdev: radeon device pointer
+ *
+ * In case of GPU reset failure make sure no process keep waiting on fence
+ * that will never complete.
+ */
+void radeon_fence_driver_force_completion(struct radeon_device *rdev)
+{
+   int ring;
+
+   for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
+   if (!rdev->fence_drv[ring].initialized)
+   continue;
+   radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], 
ring);
+   }
+}
+

 /*
  * Fence debugfs
-- 
1.7.11.7



[PATCH] drm/radeon: avoid deadlock in pm path when waiting for fence

2012-12-17 Thread j.gli...@gmail.com
From: Jerome Glisse 

radeon_fence_wait_empty_locked should not trigger GPU reset as no
place where it's call from would benefit from such thing and it
actually lead to a kernel deadlock in case the reset is triggered
from pm codepath. Instead force ring completion in place where it
makes sense or return early in others.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|  2 +-
 drivers/gpu/drm/radeon/radeon_device.c | 13 +++--
 drivers/gpu/drm/radeon/radeon_fence.c  | 30 ++
 drivers/gpu/drm/radeon/radeon_pm.c | 15 ---
 4 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 9c7625c..071b2d7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -231,7 +231,7 @@ void radeon_fence_process(struct radeon_device *rdev, int 
ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
-void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
 int radeon_fence_wait_any(struct radeon_device *rdev,
  struct radeon_fence **fences,
  bool intr);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index 774fae7..53a9223 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1163,6 +1163,7 @@ int radeon_suspend_kms(struct drm_device *dev, 
pm_message_t state)
struct drm_crtc *crtc;
struct drm_connector *connector;
int i, r;
+   bool force_completion = false;

if (dev == NULL || dev->dev_private == NULL) {
return -ENODEV;
@@ -1205,8 +1206,16 @@ int radeon_suspend_kms(struct drm_device *dev, 
pm_message_t state)

mutex_lock(&rdev->ring_lock);
/* wait for gpu to finish processing current batch */
-   for (i = 0; i < RADEON_NUM_RINGS; i++)
-   radeon_fence_wait_empty_locked(rdev, i);
+   for (i = 0; i < RADEON_NUM_RINGS; i++) {
+   r = radeon_fence_wait_empty_locked(rdev, i);
+   if (r) {
+   /* delay GPU reset to resume */
+   force_completion = true;
+   }
+   }
+   if (force_completion) {
+   radeon_fence_driver_force_completion(rdev);
+   }
mutex_unlock(&rdev->ring_lock);

radeon_save_bios_scratch_regs(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c 
b/drivers/gpu/drm/radeon/radeon_fence.c
index bf7b20e..28c09b6 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -609,26 +609,20 @@ int radeon_fence_wait_next_locked(struct radeon_device 
*rdev, int ring)
  * Returns 0 if the fences have passed, error for all other cases.
  * Caller must hold ring lock.
  */
-void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
 {
uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
+   int r;

-   while(1) {
-   int r;
-   r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+   r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+   if (r) {
if (r == -EDEADLK) {
-   mutex_unlock(&rdev->ring_lock);
-   r = radeon_gpu_reset(rdev);
-   mutex_lock(&rdev->ring_lock);
-   if (!r)
-   continue;
-   }
-   if (r) {
-   dev_err(rdev->dev, "error waiting for ring to become"
-   " idle (%d)\n", r);
+   return -EDEADLK;
}
-   return;
+   dev_err(rdev->dev, "error waiting for ring[%d] to become idle 
(%d)\n",
+   ring, r);
}
+   return 0;
 }

 /**
@@ -854,13 +848,17 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
  */
 void radeon_fence_driver_fini(struct radeon_device *rdev)
 {
-   int ring;
+   int ring, r;

mutex_lock(&rdev->ring_lock);
for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
if (!rdev->fence_drv[ring].initialized)
continue;
-   radeon_fence_wait_empty_locked(rdev, ring);
+   r = radeon_fence_wait_empty_locked(rdev, ring);
+   if (r) {
+   /* no need to trigger GPU reset as we are unloading */
+   radeon_fence_driver_force_completion(rdev);
+   }
wake_up_all(&rdev->fence_queue);
radeon_scratch_free(

[PATCH] drm/radeon: add support for MEM_WRITE packet

2012-12-19 Thread j.gli...@gmail.com
From: Jerome Glisse 

To make it easier to debug some lockup from userspace add support
to MEM_WRITE packet.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/evergreen_cs.c | 29 +
 drivers/gpu/drm/radeon/r600_cs.c  | 29 +
 drivers/gpu/drm/radeon/radeon_drv.c   |  3 ++-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c 
b/drivers/gpu/drm/radeon/evergreen_cs.c
index 74c6b42..5cea852 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2654,6 +2654,35 @@ static int evergreen_packet3_check(struct 
radeon_cs_parser *p,
ib[idx+4] = upper_32_bits(offset) & 0xff;
}
break;
+   case PACKET3_MEM_WRITE:
+   {
+   u64 offset;
+
+   if (pkt->count != 3) {
+   DRM_ERROR("bad MEM_WRITE (invalid count)\n");
+   return -EINVAL;
+   }
+   r = evergreen_cs_packet_next_reloc(p, &reloc);
+   if (r) {
+   DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
+   return -EINVAL;
+   }
+   offset = radeon_get_ib_value(p, idx+0);
+   offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
+   if (offset & 0x7) {
+   DRM_ERROR("bad MEM_WRITE (address not qwords 
aligned)\n");
+   return -EINVAL;
+   }
+   if ((offset + 8) > radeon_bo_size(reloc->robj)) {
+   DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
+ offset + 8, radeon_bo_size(reloc->robj));
+   return -EINVAL;
+   }
+   offset += reloc->lobj.gpu_offset;
+   ib[idx+0] = offset;
+   ib[idx+1] = upper_32_bits(offset) & 0xff;
+   break;
+   }
case PACKET3_COPY_DW:
if (pkt->count != 4) {
DRM_ERROR("bad COPY_DW (invalid count)\n");
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 0be768b..9ea13d0 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -2294,6 +2294,35 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
ib[idx+4] = upper_32_bits(offset) & 0xff;
}
break;
+   case PACKET3_MEM_WRITE:
+   {
+   u64 offset;
+
+   if (pkt->count != 3) {
+   DRM_ERROR("bad MEM_WRITE (invalid count)\n");
+   return -EINVAL;
+   }
+   r = r600_cs_packet_next_reloc(p, &reloc);
+   if (r) {
+   DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
+   return -EINVAL;
+   }
+   offset = radeon_get_ib_value(p, idx+0);
+   offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
+   if (offset & 0x7) {
+   DRM_ERROR("bad MEM_WRITE (address not qwords 
aligned)\n");
+   return -EINVAL;
+   }
+   if ((offset + 8) > radeon_bo_size(reloc->robj)) {
+   DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
+ offset + 8, radeon_bo_size(reloc->robj));
+   return -EINVAL;
+   }
+   offset += reloc->lobj.gpu_offset;
+   ib[idx+0] = offset;
+   ib[idx+1] = upper_32_bits(offset) & 0xff;
+   break;
+   }
case PACKET3_COPY_DW:
if (pkt->count != 4) {
DRM_ERROR("bad COPY_DW (invalid count)\n");
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c 
b/drivers/gpu/drm/radeon/radeon_drv.c
index 9b1a727..ff75934 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -68,9 +68,10 @@
  *   2.25.0 - eg+: new info request for num SE and num SH
  *   2.26.0 - r600-eg: fix htile size computation
  *   2.27.0 - r600-SI: Add CS ioctl support for async DMA
+ *   2.28.0 - r600-eg: Add MEM_WRITE packet support
  */
 #define KMS_DRIVER_MAJOR   2
-#define KMS_DRIVER_MINOR   27
+#define KMS_DRIVER_MINOR   28
 #define KMS_DRIVER_PATCHLEVEL  0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
-- 
1.7.11.7



[PATCH] drm/radeon: add support for evergreen/ni tiling informations v11

2012-02-10 Thread j.gli...@gmail.com
From: Jerome Glisse 

evergreen and northern island gpu needs more informations for 2D tiling
than previous r6xx/r7xx. Add field to tiling ioctl to allow userspace
to provide those.

The v8 cs checking change to track color view on r6xx/r7xx doesn't
affect old userspace as old userspace always emited 0 for this register.

v2 fix r6xx/r7xx 2D tiling computation
v3 fix r6xx/r7xx height align for untiled surface & add support for
   tile split on evergreen and newer
v4 improve tiling debugging output
v5 fix tile split code for evergreen and newer
v6 set proper tile split for crtc register
v7 fix tile split limit value
v8 add COLOR_VIEW checking to r6xx/r7xx checker, add evergreen cs
   checking, update safe reg for r600, evergreen and cayman.
   Evergreen checking need some work around for stencil alignment
   issues
v9 fix tile split value range, fix compressed texture handling and
   mipmap calculation, allow evergreen check to be silencious in
   front of current broken userspace (depth/stencil alignment issue)
v10 fix eg 3d texture and compressed texture, fix r600 depth array,
fix r600 color view computation, add support for evergreen stencil
split
v11 more verbose debugging in some case

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/atombios_crtc.c|   20 +-
 drivers/gpu/drm/radeon/evergreen.c|   31 ++
 drivers/gpu/drm/radeon/evergreen_cs.c |  777 +++--
 drivers/gpu/drm/radeon/evergreend.h   |  368 ++
 drivers/gpu/drm/radeon/r600_cs.c  |  103 +++--
 drivers/gpu/drm/radeon/r600d.h|   14 +
 drivers/gpu/drm/radeon/radeon.h   |   13 +
 drivers/gpu/drm/radeon/radeon_drv.c   |3 +-
 drivers/gpu/drm/radeon/radeon_object.c|   46 ++
 drivers/gpu/drm/radeon/reg_srcs/cayman|1 -
 drivers/gpu/drm/radeon/reg_srcs/evergreen |1 -
 drivers/gpu/drm/radeon/reg_srcs/r600  |8 -
 include/drm/radeon_drm.h  |   24 +-
 13 files changed, 1299 insertions(+), 110 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c 
b/drivers/gpu/drm/radeon/atombios_crtc.c
index 8919352..955d2bc 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1031,6 +1031,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc,
struct radeon_bo *rbo;
uint64_t fb_location;
uint32_t fb_format, fb_pitch_pixels, tiling_flags;
+   unsigned bankw, bankh, mtaspect, tile_split;
u32 fb_swap = EVERGREEN_GRPH_ENDIAN_SWAP(EVERGREEN_GRPH_ENDIAN_NONE);
u32 tmp, viewport_w, viewport_h;
int r;
@@ -1121,20 +1122,13 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc,
break;
}

-   switch ((tmp & 0xf000) >> 12) {
-   case 0: /* 1KB rows */
-   default:
-   fb_format |= 
EVERGREEN_GRPH_TILE_SPLIT(EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB);
-   break;
-   case 1: /* 2KB rows */
-   fb_format |= 
EVERGREEN_GRPH_TILE_SPLIT(EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB);
-   break;
-   case 2: /* 4KB rows */
-   fb_format |= 
EVERGREEN_GRPH_TILE_SPLIT(EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB);
-   break;
-   }
-
fb_format |= 
EVERGREEN_GRPH_ARRAY_MODE(EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1);
+
+   evergreen_tiling_fields(tiling_flags, &bankw, &bankh, 
&mtaspect, &tile_split);
+   fb_format |= EVERGREEN_GRPH_TILE_SPLIT(tile_split);
+   fb_format |= EVERGREEN_GRPH_BANK_WIDTH(bankw);
+   fb_format |= EVERGREEN_GRPH_BANK_HEIGHT(bankh);
+   fb_format |= EVERGREEN_GRPH_MACRO_TILE_ASPECT(mtaspect);
} else if (tiling_flags & RADEON_TILING_MICRO)
fb_format |= 
EVERGREEN_GRPH_ARRAY_MODE(EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1);

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index ae09fe8..b7a7102 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -43,6 +43,37 @@ void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
 extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
 int ring, u32 cp_int_cntl);

+void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
+unsigned *bankh, unsigned *mtaspect,
+unsigned *tile_split)
+{
+   *bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & 
RADEON_TILING_EG_BANKW_MASK;
+   *bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & 
RADEON_TILING_EG_BANKH_MASK;
+   *mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) 
& RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
+   *tile_split = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & 
RADEON_TILING_EG_TILE_SPLIT_

[PATCH] drm/radeon/kms: add htile support to the cs checker

2012-02-10 Thread j.gli...@gmail.com
From: Jerome Glisse 

For 6xx+.  Required for mesa to use htile support for HiZ/HiS.
Userspace will check radeon version 2.14 with is bumped either
by tiling patch or stream out patch.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
Signed-off-by: Alex Deucher 
Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/evergreen_cs.c |   56 ++-
 drivers/gpu/drm/radeon/evergreend.h   |7 +
 drivers/gpu/drm/radeon/r600_cs.c  |  292 ++---
 drivers/gpu/drm/radeon/r600d.h|7 +
 drivers/gpu/drm/radeon/reg_srcs/cayman|1 -
 drivers/gpu/drm/radeon/reg_srcs/evergreen |1 -
 drivers/gpu/drm/radeon/reg_srcs/r600  |1 -
 7 files changed, 249 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c 
b/drivers/gpu/drm/radeon/evergreen_cs.c
index 2ed17f7..6e269ff 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -85,6 +85,9 @@ struct evergreen_cs_track {
u32 db_s_write_offset;
struct radeon_bo*db_s_read_bo;
struct radeon_bo*db_s_write_bo;
+   u32 htile_offset;
+   u32 htile_surface;
+   struct radeon_bo*htile_bo;
 };

 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
@@ -155,6 +158,9 @@ static void evergreen_cs_track_init(struct 
evergreen_cs_track *track)
track->db_s_write_offset = 0x;
track->db_s_read_bo = NULL;
track->db_s_write_bo = NULL;
+   track->htile_bo = NULL;
+   track->htile_offset = 0x;
+   track->htile_surface = 0;

for (i = 0; i < 4; i++) {
track->vgt_strmout_size[i] = 0;
@@ -627,6 +633,40 @@ static int evergreen_cs_track_validate_depth(struct 
radeon_cs_parser *p)
return -EINVAL;
}

+   /* hyperz */
+   if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
+   unsigned long size;
+   unsigned nbx, nby;
+
+   if (track->htile_bo == NULL) {
+   dev_warn(p->dev, "%s:%d htile enabled without htile 
surface 0x%08x\n",
+__func__, __LINE__, track->db_z_info);
+   return -EINVAL;
+   }
+
+   nbx = surf.nbx;
+   nby = surf.nby;
+   if (G_028ABC_HTILE_WIDTH(track->htile_surface)) {
+   nbx = (nbx + 7) / 8;
+   } else {
+   nbx = (nbx + 3) / 4;
+   }
+   if (G_028ABC_HTILE_HEIGHT(track->htile_surface)) {
+   nby = (nby + 7) / 8;
+   } else {
+   nby = (nby + 3) / 4;
+   }
+   size = nbx * nby * 4;
+   size += track->htile_offset;
+
+   if (size > radeon_bo_size(track->htile_bo)) {
+   dev_warn(p->dev, "%s:%d htile surface too small %ld for 
%ld (%d %d)\n",
+__func__, __LINE__, 
radeon_bo_size(track->htile_bo),
+size, nbx, nby);
+   return -EINVAL;
+   }
+   }
+
return 0;
 }

@@ -1611,6 +1651,21 @@ static int evergreen_cs_check_reg(struct 
radeon_cs_parser *p, u32 reg, u32 idx)
track->cb_color_base_last[tmp] = ib[idx];
track->cb_color_bo[tmp] = reloc->robj;
break;
+   case DB_HTILE_DATA_BASE:
+   r = evergreen_cs_packet_next_reloc(p, &reloc);
+   if (r) {
+   dev_warn(p->dev, "bad SET_CONTEXT_REG "
+   "0x%04X\n", reg);
+   return -EINVAL;
+   }
+   track->htile_offset = radeon_get_ib_value(p, idx);
+   ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0x);
+   track->htile_bo = reloc->robj;
+   break;
+   case DB_HTILE_SURFACE:
+   /* 8x8 only */
+   track->htile_surface = radeon_get_ib_value(p, idx);
+   break;
case CB_IMMED0_BASE:
case CB_IMMED1_BASE:
case CB_IMMED2_BASE:
@@ -1623,7 +1678,6 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser 
*p, u32 reg, u32 idx)
case CB_IMMED9_BASE:
case CB_IMMED10_BASE:
case CB_IMMED11_BASE:
-   case DB_HTILE_DATA_BASE:
case SQ_PGM_START_FS:
case SQ_PGM_START_ES:
case SQ_PGM_START_VS:
diff --git a/drivers/gpu/drm/radeon/evergreend.h 
b/drivers/gpu/drm/radeon/evergreend.h
index eb5708c..b4d1c42 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -991,6 +991,13 @@
 #define   G_028008_SLICE_MAX(x)(((x) >> 13) & 0x7FF)
 #define   C_028008_SLICE_MAX   0xFF001FFF
 #define DB_HTILE_DATA_BASE 0x28014
+#de

[PATCH] drm/radeon/kms: properly set accel working flag and bailout when false

2012-02-20 Thread j.gli...@gmail.com
From: Jerome Glisse 

If accel is not working many subsystem such as the ib pool might not be
initialized properly that can lead to segfault inside kernel when cs
ioctl is call with non working acceleration. To avoid this make sure
the accel working flag is false when an error in GPU startup happen and
return EBUSY from cs ioctl if accel is not working.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/evergreen.c |1 +
 drivers/gpu/drm/radeon/ni.c|1 +
 drivers/gpu/drm/radeon/r100.c  |8 +++-
 drivers/gpu/drm/radeon/r300.c  |8 +++-
 drivers/gpu/drm/radeon/r420.c  |8 +++-
 drivers/gpu/drm/radeon/r520.c  |8 +++-
 drivers/gpu/drm/radeon/r600.c  |1 +
 drivers/gpu/drm/radeon/radeon_cs.c |4 
 drivers/gpu/drm/radeon/rs400.c |8 +++-
 drivers/gpu/drm/radeon/rs600.c |8 +++-
 drivers/gpu/drm/radeon/rs690.c |8 +++-
 drivers/gpu/drm/radeon/rv515.c |8 +++-
 drivers/gpu/drm/radeon/rv770.c |1 +
 13 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 1a816ea..cb86330 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3254,6 +3254,7 @@ int evergreen_resume(struct radeon_device *rdev)
r = evergreen_startup(rdev);
if (r) {
DRM_ERROR("evergreen startup failed on resume\n");
+   rdev->accel_working = false;
return r;
}

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index db09065..2509c50 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1547,6 +1547,7 @@ int cayman_resume(struct radeon_device *rdev)
r = cayman_startup(rdev);
if (r) {
DRM_ERROR("cayman startup failed on resume\n");
+   rdev->accel_working = false;
return r;
}
return r;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index bfd36ab..3563756 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3930,6 +3930,8 @@ static int r100_startup(struct radeon_device *rdev)

 int r100_resume(struct radeon_device *rdev)
 {
+   int r;
+
/* Make sur GART are not working */
if (rdev->flags & RADEON_IS_PCI)
r100_pci_gart_disable(rdev);
@@ -3949,7 +3951,11 @@ int r100_resume(struct radeon_device *rdev)
radeon_surface_init(rdev);

rdev->accel_working = true;
-   return r100_startup(rdev);
+   r = r100_startup(rdev);
+   if (r) {
+   rdev->accel_working = false;
+   }
+   return r;
 }

 int r100_suspend(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 3fc0d29..6829638 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -1431,6 +1431,8 @@ static int r300_startup(struct radeon_device *rdev)

 int r300_resume(struct radeon_device *rdev)
 {
+   int r;
+
/* Make sur GART are not working */
if (rdev->flags & RADEON_IS_PCIE)
rv370_pcie_gart_disable(rdev);
@@ -1452,7 +1454,11 @@ int r300_resume(struct radeon_device *rdev)
radeon_surface_init(rdev);

rdev->accel_working = true;
-   return r300_startup(rdev);
+   r = r300_startup(rdev);
+   if (r) {
+   rdev->accel_working = false;
+   }
+   return r;
 }

 int r300_suspend(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 666e28f..b143230 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -291,6 +291,8 @@ static int r420_startup(struct radeon_device *rdev)

 int r420_resume(struct radeon_device *rdev)
 {
+   int r;
+
/* Make sur GART are not working */
if (rdev->flags & RADEON_IS_PCIE)
rv370_pcie_gart_disable(rdev);
@@ -316,7 +318,11 @@ int r420_resume(struct radeon_device *rdev)
radeon_surface_init(rdev);

rdev->accel_working = true;
-   return r420_startup(rdev);
+   r = r420_startup(rdev);
+   if (r) {
+   rdev->accel_working = false;
+   }
+   return r;
 }

 int r420_suspend(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index 4ae1615..25084e8 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -218,6 +218,8 @@ static int r520_startup(struct radeon_device *rdev)

 int r520_resume(struct radeon_device *rdev)
 {
+   int r;
+
/* Make sur GART are not working */
if (rdev->flags & RADEON_IS_PCIE)
rv370_pcie_gart_disable(rdev);
@@ -237,7 +239,11 @@ int r520_resume(struct radeon_device *rdev)
radeon_surface_init(rdev);

rdev->accel_working = true;
-   return r520_st

[PATCH] drm/radeon/kms: add htile support to the cs checker v2

2012-02-22 Thread j.gli...@gmail.com
From: Jerome Glisse 

For 6xx+.  Required for mesa to use htile support for HiZ/HiS.
Userspace will check radeon version 2.14 with is bumped either
by tiling patch or stream out patch. This patch only add support
for htile relocation which should be enough for any userspace
to implement the hyperz (using htile buffer) feature.

v2: Jerome: Fix size checking for htile buffer.

Signed-off-by: Pierre-Eric Pelloux-Prayer 
Signed-off-by: Alex Deucher 
Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/evergreen_cs.c |   78 ++-
 drivers/gpu/drm/radeon/evergreend.h   |8 +
 drivers/gpu/drm/radeon/r600_cs.c  |  379 -
 drivers/gpu/drm/radeon/r600d.h|8 +
 drivers/gpu/drm/radeon/reg_srcs/cayman|1 -
 drivers/gpu/drm/radeon/reg_srcs/evergreen |1 -
 drivers/gpu/drm/radeon/reg_srcs/r600  |1 -
 7 files changed, 360 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c 
b/drivers/gpu/drm/radeon/evergreen_cs.c
index 2ed17f7..54a320a 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -85,6 +85,9 @@ struct evergreen_cs_track {
u32 db_s_write_offset;
struct radeon_bo*db_s_read_bo;
struct radeon_bo*db_s_write_bo;
+   u32 htile_offset;
+   u32 htile_surface;
+   struct radeon_bo*htile_bo;
 };

 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
@@ -155,6 +158,9 @@ static void evergreen_cs_track_init(struct 
evergreen_cs_track *track)
track->db_s_write_offset = 0x;
track->db_s_read_bo = NULL;
track->db_s_write_bo = NULL;
+   track->htile_bo = NULL;
+   track->htile_offset = 0x;
+   track->htile_surface = 0;

for (i = 0; i < 4; i++) {
track->vgt_strmout_size[i] = 0;
@@ -627,6 +633,62 @@ static int evergreen_cs_track_validate_depth(struct 
radeon_cs_parser *p)
return -EINVAL;
}

+   /* hyperz */
+   if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
+   unsigned long size;
+   unsigned nbx, nby;
+
+   if (track->htile_bo == NULL) {
+   dev_warn(p->dev, "%s:%d htile enabled without htile 
surface 0x%08x\n",
+__func__, __LINE__, track->db_z_info);
+   return -EINVAL;
+   }
+
+   nbx = surf.nbx;
+   nby = surf.nby;
+   if (G_028ABC_LINEAR(track->htile_surface)) {
+   /* pitch must be 16 htiles aligned == 16 * 8 pixel 
aligned */
+   nbx = round_up(nbx, 16 * 8);
+   /* height is npipes htiles aligned == npipes * 8 pixel 
aligned */
+   nby = round_up(nby, track->npipes * 8);
+   } else {
+   switch (track->npipes) {
+   case 8:
+   nbx = round_up(nbx, 64 * 8);
+   nby = round_up(nby, 64 * 8);
+   break;
+   case 4:
+   nbx = round_up(nbx, 64 * 8);
+   nby = round_up(nby, 32 * 8);
+   break;
+   case 2:
+   nbx = round_up(nbx, 32 * 8);
+   nby = round_up(nby, 32 * 8);
+   break;
+   case 1:
+   nbx = round_up(nbx, 32 * 8);
+   nby = round_up(nby, 16 * 8);
+   break;
+   default:
+   dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
+__func__, __LINE__, track->npipes);
+   return -EINVAL;
+   }
+   }
+   /* compute number of htile */
+   nbx = nbx / 8;
+   nby = nby / 8;
+   size = nbx * nby * 4;
+   size += track->htile_offset;
+
+   if (size > radeon_bo_size(track->htile_bo)) {
+   dev_warn(p->dev, "%s:%d htile surface too small %ld for 
%ld (%d %d)\n",
+__func__, __LINE__, 
radeon_bo_size(track->htile_bo),
+size, nbx, nby);
+   return -EINVAL;
+   }
+   }
+
return 0;
 }

@@ -1611,6 +1673,21 @@ static int evergreen_cs_check_reg(struct 
radeon_cs_parser *p, u32 reg, u32 idx)
track->cb_color_base_last[tmp] = ib[idx];
track->cb_color_bo[tmp] = reloc->robj;
break;
+   case DB_HTILE_DATA_BASE:
+   r = evergreen_cs_packet_next_reloc(p, &reloc);
+   if (r) {

[PATCH] drm/nouveau: fix ttm move notify callback

2012-01-05 Thread j.gli...@gmail.com
From: Jerome Glisse 

ttm might call the move notify with null new mem placement,
properly handle this case inside nouveau move notify callback.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index f12dd0f..65f5b0b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -808,9 +808,8 @@ out:
 }

 static void
-nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem)
+nouveau_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_mem_reg 
*new_mem)
 {
-   struct nouveau_mem *node = new_mem->mm_node;
struct nouveau_bo *nvbo = nouveau_bo(bo);
struct nouveau_vma *vma;

@@ -820,6 +819,7 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct 
ttm_mem_reg *new_mem)
} else
if (new_mem && new_mem->mem_type == TTM_PL_TT &&
nvbo->page_shift == vma->vm->spg_shift) {
+   struct nouveau_mem *node = new_mem->mm_node;
nouveau_vm_map_sg(vma, 0, new_mem->
  num_pages << PAGE_SHIFT,
  node, node->pages);
@@ -1131,7 +1131,7 @@ struct ttm_bo_driver nouveau_bo_driver = {
.invalidate_caches = nouveau_bo_invalidate_caches,
.init_mem_type = nouveau_bo_init_mem_type,
.evict_flags = nouveau_bo_evict_flags,
-   .move_notify = nouveau_bo_move_ntfy,
+   .move_notify = nouveau_bo_move_notify,
.move = nouveau_bo_move,
.verify_access = nouveau_bo_verify_access,
.sync_obj_signaled = __nouveau_fence_signalled,
-- 
1.7.7.1



[PATCH] drm/radeon: silence out possible lock dependency warning

2012-01-24 Thread j.gli...@gmail.com
From: Jerome Glisse 

Silence out the lock dependency warning by moving bo allocation out
of ib mutex protected section. Might lead to useless temporary
allocation but it's not harmful as such things only happen at
initialization.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_ring.c |   19 +++
 1 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
b/drivers/gpu/drm/radeon/radeon_ring.c
index e8bc709..1cb4b94 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -204,22 +204,25 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct 
radeon_ib *ib)

 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
+   struct radeon_sa_manager tmp;
int i, r;

-   mutex_lock(&rdev->ib_pool.mutex);
-   if (rdev->ib_pool.ready) {
-   mutex_unlock(&rdev->ib_pool.mutex);
-   return 0;
-   }
-
-   r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
+   r = radeon_sa_bo_manager_init(rdev, &tmp,
  RADEON_IB_POOL_SIZE*64*1024,
  RADEON_GEM_DOMAIN_GTT);
if (r) {
-   mutex_unlock(&rdev->ib_pool.mutex);
return r;
}

+   mutex_lock(&rdev->ib_pool.mutex);
+   if (rdev->ib_pool.ready) {
+   mutex_unlock(&rdev->ib_pool.mutex);
+   radeon_sa_bo_manager_fini(rdev, &tmp);
+   return 0;
+   }
+
+   rdev->ib_pool.sa_manager = tmp;
+   INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo);
for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
rdev->ib_pool.ibs[i].fence = NULL;
rdev->ib_pool.ibs[i].idx = i;
-- 
1.7.7.1



[PATCH] drm/radeon: add an exclusive lock for GPU reset

2012-07-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

GPU reset need to be exclusive, one happening at a time. For this
add a rw semaphore so that any path that trigger GPU activities
have to take the semaphore as a reader thus allowing concurency.

The GPU reset path take the semaphore as a writer ensuring that
no concurrent reset take place.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|1 +
 drivers/gpu/drm/radeon/radeon_cs.c |5 +
 drivers/gpu/drm/radeon/radeon_device.c |2 ++
 drivers/gpu/drm/radeon/radeon_gem.c|7 +++
 4 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 77b4519b..29d6986 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1446,6 +1446,7 @@ struct radeon_device {
struct device   *dev;
struct drm_device   *ddev;
struct pci_dev  *pdev;
+   struct rw_semaphore exclusive_lock;
/* ASIC */
union radeon_asic_configconfig;
enum radeon_family  family;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index f1b7527..7ee6491 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -499,7 +499,9 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
struct radeon_cs_parser parser;
int r;

+   down_read(&rdev->exclusive_lock);
if (!rdev->accel_working) {
+   up_read(&rdev->exclusive_lock);
return -EBUSY;
}
/* initialize parser */
@@ -512,6 +514,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
radeon_cs_parser_fini(&parser, r);
+   up_read(&rdev->exclusive_lock);
r = radeon_cs_handle_lockup(rdev, r);
return r;
}
@@ -520,6 +523,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
if (r != -ERESTARTSYS)
DRM_ERROR("Failed to parse relocation %d!\n", r);
radeon_cs_parser_fini(&parser, r);
+   up_read(&rdev->exclusive_lock);
r = radeon_cs_handle_lockup(rdev, r);
return r;
}
@@ -533,6 +537,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
}
 out:
radeon_cs_parser_fini(&parser, r);
+   up_read(&rdev->exclusive_lock);
r = radeon_cs_handle_lockup(rdev, r);
return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index f654ba8..c8fdb40 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -988,6 +988,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
int r;
int resched;

+   down_write(&rdev->exclusive_lock);
radeon_save_bios_scratch_regs(rdev);
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
@@ -1007,6 +1008,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
dev_info(rdev->dev, "GPU reset failed\n");
}

+   up_write(&rdev->exclusive_lock);
return r;
 }

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
b/drivers/gpu/drm/radeon/radeon_gem.c
index d9b0809..f99db63 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -215,12 +215,14 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void 
*data,
uint32_t handle;
int r;

+   down_read(&rdev->exclusive_lock);
/* create a gem object to contain this object in */
args->size = roundup(args->size, PAGE_SIZE);
r = radeon_gem_object_create(rdev, args->size, args->alignment,
args->initial_domain, false,
false, &gobj);
if (r) {
+   up_read(&rdev->exclusive_lock);
r = radeon_gem_handle_lockup(rdev, r);
return r;
}
@@ -228,10 +230,12 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void 
*data,
/* drop reference from allocate - handle holds it now */
drm_gem_object_unreference_unlocked(gobj);
if (r) {
+   up_read(&rdev->exclusive_lock);
r = radeon_gem_handle_lockup(rdev, r);
return r;
}
args->handle = handle;
+   up_read(&rdev->exclusive_lock);
return 0;
 }

@@ -240,6 +244,7 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, 
void *data,
 {
/* transition the BO to a domain -
 * just validate the BO into a certain domain */
+   struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_set_dom

[PATCH] drm/radeon: fix rare segfault

2012-07-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

In gem idle/busy ioctl the radeon object was derefenced after
drm_gem_object_unreference_unlocked which in case the object
have been destroyed lead to use of a possibly free pointer with
possibly wrong data.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon_gem.c |   10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
b/drivers/gpu/drm/radeon/radeon_gem.c
index 74176c5..c8838fc 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -325,6 +325,7 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void 
*data,
 int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
  struct drm_file *filp)
 {
+   struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_busy *args = data;
struct drm_gem_object *gobj;
struct radeon_bo *robj;
@@ -350,13 +351,14 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void 
*data,
break;
}
drm_gem_object_unreference_unlocked(gobj);
-   r = radeon_gem_handle_lockup(robj->rdev, r);
+   r = radeon_gem_handle_lockup(rdev, r);
return r;
 }

 int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
  struct drm_file *filp)
 {
+   struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_wait_idle *args = data;
struct drm_gem_object *gobj;
struct radeon_bo *robj;
@@ -369,10 +371,10 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, 
void *data,
robj = gem_to_radeon_bo(gobj);
r = radeon_bo_wait(robj, NULL, false);
/* callback hw specific functions if any */
-   if (robj->rdev->asic->ioctl_wait_idle)
-   robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
+   if (rdev->asic->ioctl_wait_idle)
+   robj->rdev->asic->ioctl_wait_idle(rdev, robj);
drm_gem_object_unreference_unlocked(gobj);
-   r = radeon_gem_handle_lockup(robj->rdev, r);
+   r = radeon_gem_handle_lockup(rdev, r);
return r;
 }

-- 
1.7.10.2



[PATCH] drm/radeon: add an exclusive lock for GPU reset v2

2012-07-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

GPU reset need to be exclusive, one happening at a time. For this
add a rw semaphore so that any path that trigger GPU activities
have to take the semaphore as a reader thus allowing concurency.

The GPU reset path take the semaphore as a writer ensuring that
no concurrent reset take place.

v2: init rw semaphore

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h|1 +
 drivers/gpu/drm/radeon/radeon_cs.c |5 +
 drivers/gpu/drm/radeon/radeon_device.c |3 +++
 drivers/gpu/drm/radeon/radeon_gem.c|8 
 4 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 77b4519b..29d6986 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1446,6 +1446,7 @@ struct radeon_device {
struct device   *dev;
struct drm_device   *ddev;
struct pci_dev  *pdev;
+   struct rw_semaphore exclusive_lock;
/* ASIC */
union radeon_asic_configconfig;
enum radeon_family  family;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index f1b7527..7ee6491 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -499,7 +499,9 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
struct radeon_cs_parser parser;
int r;

+   down_read(&rdev->exclusive_lock);
if (!rdev->accel_working) {
+   up_read(&rdev->exclusive_lock);
return -EBUSY;
}
/* initialize parser */
@@ -512,6 +514,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
radeon_cs_parser_fini(&parser, r);
+   up_read(&rdev->exclusive_lock);
r = radeon_cs_handle_lockup(rdev, r);
return r;
}
@@ -520,6 +523,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
if (r != -ERESTARTSYS)
DRM_ERROR("Failed to parse relocation %d!\n", r);
radeon_cs_parser_fini(&parser, r);
+   up_read(&rdev->exclusive_lock);
r = radeon_cs_handle_lockup(rdev, r);
return r;
}
@@ -533,6 +537,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
}
 out:
radeon_cs_parser_fini(&parser, r);
+   up_read(&rdev->exclusive_lock);
r = radeon_cs_handle_lockup(rdev, r);
return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index f654ba8..254fdb4 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -734,6 +734,7 @@ int radeon_device_init(struct radeon_device *rdev,
mutex_init(&rdev->gem.mutex);
mutex_init(&rdev->pm.mutex);
init_rwsem(&rdev->pm.mclk_lock);
+   init_rwsem(&rdev->exclusive_lock);
init_waitqueue_head(&rdev->irq.vblank_queue);
init_waitqueue_head(&rdev->irq.idle_queue);
r = radeon_gem_init(rdev);
@@ -988,6 +989,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
int r;
int resched;

+   down_write(&rdev->exclusive_lock);
radeon_save_bios_scratch_regs(rdev);
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
@@ -1007,6 +1009,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
dev_info(rdev->dev, "GPU reset failed\n");
}

+   up_write(&rdev->exclusive_lock);
return r;
 }

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
b/drivers/gpu/drm/radeon/radeon_gem.c
index d9b0809..b0be9c4 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -215,12 +215,14 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void 
*data,
uint32_t handle;
int r;

+   down_read(&rdev->exclusive_lock);
/* create a gem object to contain this object in */
args->size = roundup(args->size, PAGE_SIZE);
r = radeon_gem_object_create(rdev, args->size, args->alignment,
args->initial_domain, false,
false, &gobj);
if (r) {
+   up_read(&rdev->exclusive_lock);
r = radeon_gem_handle_lockup(rdev, r);
return r;
}
@@ -228,10 +230,12 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void 
*data,
/* drop reference from allocate - handle holds it now */
drm_gem_object_unreference_unlocked(gobj);
if (r) {
+   up_read(&rdev->exclusive_lock);
r = radeon_gem_handle_lockup(rdev, r);
r

[PATCH] drm/ttm: ensure ttm for new node is bound before calling move_notify()

2011-08-23 Thread j.gli...@gmail.com
From: Ben Skeggs 

This was true for new TTM_PL_SYSTEM and new TTM_PL_TT cases, but wasn't
the case on TTM_PL_SYSTEM<->TTM_PL_TT moves, which causes trouble on some
paths as nouveau's move_notify() hook requires that the dma addresses be
valid at this point.

Signed-off-by: Ben Skeggs 
Reviewed-by: Jerome Glisse 
---
 drivers/gpu/drm/ttm/ttm_bo.c |   10 ++
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 56619f6..ac332de 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -390,10 +390,12 @@ static int ttm_bo_handle_move_mem(struct 
ttm_buffer_object *bo,
 * Create and bind a ttm if required.
 */

-   if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm == NULL)) {
-   ret = ttm_bo_add_ttm(bo, false);
-   if (ret)
-   goto out_err;
+   if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
+   if (bo->ttm == NULL) {
+   ret = ttm_bo_add_ttm(bo, false);
+   if (ret)
+   goto out_err;
+   }

ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement);
if (ret)
-- 
1.7.4.4



[PATCH] drm/radeon/kms: evergreen & ni reset SPI block on CP resume

2011-08-24 Thread j.gli...@gmail.com
From: Jerome Glisse 

For some reason SPI block is in broken state after module
unloading. This lead to broken rendering after reloading
module. Fix this by reseting SPI block in CP resume function

Signed-off-by: Jerome Glisse 

Make ib allocation size function of cs size

2011-12-05 Thread j.gli...@gmail.com
Two following patch are on top of
http://cgit.freedesktop.org/~glisse/linux

They make the ib allocation size a function of the cs size, this
allow to avoid wasting pool space and avoid to trigger fence_wait
in ib_get. I am still evaluating how much fence_wait we avoid
with this.

Cheers,
Jerome



[PATCH 1/2] drm/radeon: make ib size variable

2011-12-05 Thread j.gli...@gmail.com
From: Jerome Glisse 

This avoid to waste ib pool size and avoid a bunch of wait for
previous ib to finish.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/r100.c  |2 +-
 drivers/gpu/drm/radeon/r600.c  |2 +-
 drivers/gpu/drm/radeon/r600_blit_kms.c |   16 +---
 drivers/gpu/drm/radeon/radeon.h|3 ++-
 drivers/gpu/drm/radeon/radeon_cs.c |6 --
 drivers/gpu/drm/radeon/radeon_ring.c   |7 +--
 6 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 657040b..947ba22 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3708,7 +3708,7 @@ int r100_ib_test(struct radeon_device *rdev)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
-   r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib);
+   r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, 256);
if (r) {
return r;
}
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 951566f..4f08e5e 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2711,7 +2711,7 @@ int r600_ib_test(struct radeon_device *rdev, int ring)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
-   r = radeon_ib_get(rdev, ring, &ib);
+   r = radeon_ib_get(rdev, ring, &ib, 256);
if (r) {
DRM_ERROR("radeon: failed to get ib (%d).\n", r);
return r;
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c 
b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 02a7574..d996f43 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -619,16 +619,17 @@ void r600_blit_fini(struct radeon_device *rdev)
radeon_bo_unref(&rdev->r600_blit.shader_obj);
 }

-static int r600_vb_ib_get(struct radeon_device *rdev)
+static int r600_vb_ib_get(struct radeon_device *rdev, unsigned size)
 {
int r;
-   r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, 
&rdev->r600_blit.vb_ib);
+   r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX,
+ &rdev->r600_blit.vb_ib, size);
if (r) {
DRM_ERROR("failed to get IB for vertex buffer\n");
return r;
}

-   rdev->r600_blit.vb_total = 64*1024;
+   rdev->r600_blit.vb_total = size;
rdev->r600_blit.vb_used = 0;
return 0;
 }
@@ -693,10 +694,6 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, 
unsigned num_gpu_pages)
int num_loops = 0;
int dwords_per_loop = rdev->r600_blit.ring_size_per_loop;

-   r = r600_vb_ib_get(rdev);
-   if (r)
-   return r;
-
/* num loops */
while (num_gpu_pages) {
num_gpu_pages -=
@@ -705,6 +702,11 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, 
unsigned num_gpu_pages)
num_loops++;
}

+   /* 48 bytes for vertex per loop */
+   r = r600_vb_ib_get(rdev, (num_loops*48)+256);
+   if (r)
+   return r;
+
/* calculate number of loops correctly */
ring_size = num_loops * dwords_per_loop;
ring_size += rdev->r600_blit.ring_size_common;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 6673f19..8cb6a58 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -713,7 +713,8 @@ struct r600_blit {

 void r600_blit_suspend(struct radeon_device *rdev);

-int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib **ib);
+int radeon_ib_get(struct radeon_device *rdev, int ring,
+ struct radeon_ib **ib, unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
b/drivers/gpu/drm/radeon/radeon_cs.c
index b3bbf37..fdfc31b 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -288,7 +288,8 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 * input memory (cached) and write to the IB (which can be
 * uncached).
 */
-   r =  radeon_ib_get(rdev, parser->ring, &parser->ib);
+   r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
+  ib_chunk->length_dw * 4);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
@@ -348,7 +349,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
return -EINVAL;
}
-   r =  radeon_ib_get(rdev, parser->ring, &parser->ib);
+   r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
+  ib_chunk->length_dw * 4);
  

[PATCH 2/2] drm/radeon: allocate semaphore from the ib pool

2011-12-05 Thread j.gli...@gmail.com
From: Jerome Glisse 

This allow to share the ib pool with semaphore and avoid
having more bo around.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/radeon.h   |   67 -
 drivers/gpu/drm/radeon/radeon_device.c|2 +-
 drivers/gpu/drm/radeon/radeon_ring.c  |5 +-
 drivers/gpu/drm/radeon/radeon_semaphore.c |  157 -
 4 files changed, 131 insertions(+), 100 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8cb6a58..5e35423 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -247,32 +247,6 @@ void radeon_fence_unref(struct radeon_fence **fence);
 int radeon_fence_count_emitted(struct radeon_device *rdev, int ring);

 /*
- * Semaphores.
- */
-struct radeon_ring;
-
-struct radeon_semaphore_driver {
-   rwlock_tlock;
-   struct list_headfree;
-};
-
-struct radeon_semaphore {
-   struct radeon_bo*robj;
-   struct list_headlist;
-   uint64_tgpu_addr;
-};
-
-void radeon_semaphore_driver_fini(struct radeon_device *rdev);
-int radeon_semaphore_create(struct radeon_device *rdev,
-   struct radeon_semaphore **semaphore);
-void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
- struct radeon_semaphore *semaphore);
-void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
-   struct radeon_semaphore *semaphore);
-void radeon_semaphore_free(struct radeon_device *rdev,
-  struct radeon_semaphore *semaphore);
-
-/*
  * Tiling registers
  */
 struct radeon_surface_reg {
@@ -410,6 +384,46 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv,
 uint32_t handle);

 /*
+ * Semaphores.
+ */
+struct radeon_ring;
+
+#defineRADEON_SEMAPHORE_BO_SIZE256
+
+struct radeon_semaphore_driver {
+   rwlock_tlock;
+   struct list_headbo;
+};
+
+struct radeon_semaphore_bo;
+
+/* everything here is constant */
+struct radeon_semaphore {
+   struct list_headlist;
+   uint64_tgpu_addr;
+   uint32_t*cpu_ptr;
+   struct radeon_semaphore_bo  *bo;
+};
+
+struct radeon_semaphore_bo {
+   struct list_headlist;
+   struct radeon_ib*ib;
+   struct list_headfree;
+   struct radeon_semaphore semaphores[RADEON_SEMAPHORE_BO_SIZE/8];
+   unsignednused;
+};
+
+void radeon_semaphore_driver_fini(struct radeon_device *rdev);
+int radeon_semaphore_create(struct radeon_device *rdev,
+   struct radeon_semaphore **semaphore);
+void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
+ struct radeon_semaphore *semaphore);
+void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
+   struct radeon_semaphore *semaphore);
+void radeon_semaphore_free(struct radeon_device *rdev,
+  struct radeon_semaphore *semaphore);
+
+/*
  * GART structures, functions & helpers
  */
 struct radeon_mc;
@@ -716,6 +730,7 @@ void r600_blit_suspend(struct radeon_device *rdev);
 int radeon_ib_get(struct radeon_device *rdev, int ring,
  struct radeon_ib **ib, unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
+bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c 
b/drivers/gpu/drm/radeon/radeon_device.c
index 6566860..aa9a11e 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -731,7 +731,7 @@ int radeon_device_init(struct radeon_device *rdev,
INIT_LIST_HEAD(&rdev->gem.objects);
init_waitqueue_head(&rdev->irq.vblank_queue);
init_waitqueue_head(&rdev->irq.idle_queue);
-   INIT_LIST_HEAD(&rdev->semaphore_drv.free);
+   INIT_LIST_HEAD(&rdev->semaphore_drv.bo);
/* initialize vm here */
rdev->vm_manager.use_bitmap = 1;
rdev->vm_manager.max_pfn = 1 << 20;
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
b/drivers/gpu/drm/radeon/radeon_ring.c
index 5f9edea..4fe320f 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -77,8 +77,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 /*
  * IB.
  */
-static bool radeon_ib_try_free(struct radeon_device *rdev,
-  struct radeon_ib *ib)
+bool radeon_ib_try_free(struct radeon_device *rdev, struct 

[RFC] ttm merge ttm_backend & ttm_t V2

2011-11-02 Thread j.gli...@gmail.com
Hi,

So attached is last batch of patch, i split the ttm put page
fix and i fixed a bug in the pages alloc when clear flags
wasn't set. I tested them on a bunch of radeon and everythings
seems fine (several gl app, firefox, compositor ...). I will
do more testing on agp and nouveau tomorrow.

The last patch add callback for populating and unpopulating
(better name if any welcome) a ttm_tt. Allowing the driver
to choose btw different choice, idea is that Konrad dma
allocator would provide helper function the driver can
call.

I choosed to allocate all page at once because ttm_tt object
are meant to be bind and thus to be fully populated in their
lifetime (vmwgfx might be different in this regard). It
simplify code in several place. I didn't see any performances
impact in the few gl benchmark i ran.

Konrad so i am planning on rebasing the last 4 patch of your
patchset on top of that. They will likely shrink in size a
bit.

Cheers,
Jerome Glisse



[PATCH 1/8] drm/ttm: remove userspace backed ttm object support

2011-11-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

This was never use in none of the driver, properly using userspace
page for bo would need more code (vma interaction mostly). Removing
this dead code in preparation of ttm_tt & backend merge.

Signed-off-by: Jerome Glisse 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/gpu/drm/ttm/ttm_bo.c|   22 
 drivers/gpu/drm/ttm/ttm_tt.c|  105 +--
 include/drm/ttm/ttm_bo_api.h|5 --
 include/drm/ttm/ttm_bo_driver.h |   24 -
 4 files changed, 1 insertions(+), 155 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 617b646..4bde335 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -342,22 +342,6 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
if (unlikely(bo->ttm == NULL))
ret = -ENOMEM;
break;
-   case ttm_bo_type_user:
-   bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
-   page_flags | TTM_PAGE_FLAG_USER,
-   glob->dummy_read_page);
-   if (unlikely(bo->ttm == NULL)) {
-   ret = -ENOMEM;
-   break;
-   }
-
-   ret = ttm_tt_set_user(bo->ttm, current,
- bo->buffer_start, bo->num_pages);
-   if (unlikely(ret != 0)) {
-   ttm_tt_destroy(bo->ttm);
-   bo->ttm = NULL;
-   }
-   break;
default:
printk(KERN_ERR TTM_PFX "Illegal buffer object type\n");
ret = -EINVAL;
@@ -907,16 +891,12 @@ static uint32_t ttm_bo_select_caching(struct 
ttm_mem_type_manager *man,
 }

 static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man,
-bool disallow_fixed,
 uint32_t mem_type,
 uint32_t proposed_placement,
 uint32_t *masked_placement)
 {
uint32_t cur_flags = ttm_bo_type_flags(mem_type);

-   if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && disallow_fixed)
-   return false;
-
if ((cur_flags & proposed_placement & TTM_PL_MASK_MEM) == 0)
return false;

@@ -961,7 +941,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
man = &bdev->man[mem_type];

type_ok = ttm_bo_mt_compatible(man,
-   bo->type == ttm_bo_type_user,
mem_type,
placement->placement[i],
&cur_flags);
@@ -1009,7 +988,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
if (!man->has_type)
continue;
if (!ttm_bo_mt_compatible(man,
-   bo->type == ttm_bo_type_user,
mem_type,
placement->busy_placement[i],
&cur_flags))
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 58c271e..82a1161 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -62,43 +62,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
ttm->dma_address = NULL;
 }

-static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
-{
-   int write;
-   int dirty;
-   struct page *page;
-   int i;
-   struct ttm_backend *be = ttm->be;
-
-   BUG_ON(!(ttm->page_flags & TTM_PAGE_FLAG_USER));
-   write = ((ttm->page_flags & TTM_PAGE_FLAG_WRITE) != 0);
-   dirty = ((ttm->page_flags & TTM_PAGE_FLAG_USER_DIRTY) != 0);
-
-   if (be)
-   be->func->clear(be);
-
-   for (i = 0; i < ttm->num_pages; ++i) {
-   page = ttm->pages[i];
-   if (page == NULL)
-   continue;
-
-   if (page == ttm->dummy_read_page) {
-   BUG_ON(write);
-   continue;
-   }
-
-   if (write && dirty && !PageReserved(page))
-   set_page_dirty_lock(page);
-
-   ttm->pages[i] = NULL;
-   ttm_mem_global_free(ttm->glob->mem_glob, PAGE_SIZE);
-   put_page(page);
-   }
-   ttm->state = tt_unpopulated;
-   ttm->first_himem_page = ttm->num_pages;
-   ttm->last_lomem_page = -1;
-}
-
 static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 {
struct page *p;
@@ -325,10 +288,7 @@ void ttm_tt_destroy(struct ttm_tt *ttm)
}

if (likely(ttm->pages != NULL)) {
-   if (ttm->page_flags & TTM_PAGE_FLAG_USER)
-   t

[PATCH 2/8] drm/ttm: remove split btw highmen and lowmem page

2011-11-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

Split btw highmem and lowmem page was rendered useless by the
pool code. Remove it. Note further cleanup would change the
ttm page allocation helper to actualy take an array instead
of relying on list this could drasticly reduce the number of
function call in the common case of allocation whole buffer.

Signed-off-by: Jerome Glisse 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/gpu/drm/ttm/ttm_tt.c|   11 ++-
 include/drm/ttm/ttm_bo_driver.h |7 ---
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 82a1161..8b7a6d0 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -69,7 +69,7 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int 
index)
struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
int ret;

-   while (NULL == (p = ttm->pages[index])) {
+   if (NULL == (p = ttm->pages[index])) {

INIT_LIST_HEAD(&h);

@@ -85,10 +85,7 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, 
int index)
if (unlikely(ret != 0))
goto out_err;

-   if (PageHighMem(p))
-   ttm->pages[--ttm->first_himem_page] = p;
-   else
-   ttm->pages[++ttm->last_lomem_page] = p;
+   ttm->pages[index] = p;
}
return p;
 out_err:
@@ -270,8 +267,6 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
  ttm->dma_address);
ttm->state = tt_unpopulated;
-   ttm->first_himem_page = ttm->num_pages;
-   ttm->last_lomem_page = -1;
 }

 void ttm_tt_destroy(struct ttm_tt *ttm)
@@ -315,8 +310,6 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, 
unsigned long size,

ttm->glob = bdev->glob;
ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-   ttm->first_himem_page = ttm->num_pages;
-   ttm->last_lomem_page = -1;
ttm->caching_state = tt_cached;
ttm->page_flags = page_flags;

diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 37527d6..9da182b 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -136,11 +136,6 @@ enum ttm_caching_state {
  * @dummy_read_page: Page to map where the ttm_tt page array contains a NULL
  * pointer.
  * @pages: Array of pages backing the data.
- * @first_himem_page: Himem pages are put last in the page array, which
- * enables us to run caching attribute changes on only the first part
- * of the page array containing lomem pages. This is the index of the
- * first himem page.
- * @last_lomem_page: Index of the last lomem page in the page array.
  * @num_pages: Number of pages in the page array.
  * @bdev: Pointer to the current struct ttm_bo_device.
  * @be: Pointer to the ttm backend.
@@ -157,8 +152,6 @@ enum ttm_caching_state {
 struct ttm_tt {
struct page *dummy_read_page;
struct page **pages;
-   long first_himem_page;
-   long last_lomem_page;
uint32_t page_flags;
unsigned long num_pages;
struct ttm_bo_global *glob;
-- 
1.7.1



[PATCH 3/8] drm/ttm: remove unused backend flags field

2011-11-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

This field is not use by any of the driver just drop it.

Signed-off-by: Jerome Glisse 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/gpu/drm/radeon/radeon_ttm.c |1 -
 include/drm/ttm/ttm_bo_driver.h |2 --
 2 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 0b5468b..97c76ae 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -787,7 +787,6 @@ struct ttm_backend *radeon_ttm_backend_create(struct 
radeon_device *rdev)
return NULL;
}
gtt->backend.bdev = &rdev->mman.bdev;
-   gtt->backend.flags = 0;
gtt->backend.func = &radeon_backend_func;
gtt->rdev = rdev;
gtt->pages = NULL;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 9da182b..6d17140 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -106,7 +106,6 @@ struct ttm_backend_func {
  * struct ttm_backend
  *
  * @bdev: Pointer to a struct ttm_bo_device.
- * @flags: For driver use.
  * @func: Pointer to a struct ttm_backend_func that describes
  * the backend methods.
  *
@@ -114,7 +113,6 @@ struct ttm_backend_func {

 struct ttm_backend {
struct ttm_bo_device *bdev;
-   uint32_t flags;
struct ttm_backend_func *func;
 };

-- 
1.7.1



[PATCH 4/8] drm/ttm: use ttm put pages function to properly restore cache attribute

2011-11-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

On failure we need to make sure the page we free has wb cache
attribute. Do this pas call the proper ttm page helper function.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/ttm/ttm_tt.c |5 -
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 8b7a6d0..3fb4c6d 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -89,7 +89,10 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, 
int index)
}
return p;
 out_err:
-   put_page(p);
+   INIT_LIST_HEAD(&h);
+   list_add(&p->lru, &h);
+   ttm_put_pages(&h, 1, ttm->page_flags,
+ ttm->caching_state, &ttm->dma_address[index]);
return NULL;
 }

-- 
1.7.1



[PATCH 5/8] drm/ttm: convert page allocation to use page ptr array instead of list V2

2011-11-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

Use the ttm_tt page ptr array for page allocation, move the list to
array unwinding into the page allocation functions.

V2 split the fix to use ttm put page as a separate fix
properly fill pages array when TTM_PAGE_FLAG_ZERO_ALLOC is not
set

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/ttm/ttm_memory.c |   44 +
 drivers/gpu/drm/ttm/ttm_page_alloc.c |   70 +++---
 drivers/gpu/drm/ttm/ttm_tt.c |   61 ++
 include/drm/ttm/ttm_memory.h |   11 +++--
 include/drm/ttm/ttm_page_alloc.h |   17 
 5 files changed, 101 insertions(+), 102 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index e70ddd8..3a3a58b 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -543,41 +543,53 @@ int ttm_mem_global_alloc(struct ttm_mem_global *glob, 
uint64_t memory,
 }
 EXPORT_SYMBOL(ttm_mem_global_alloc);

-int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
- struct page *page,
- bool no_wait, bool interruptible)
+int ttm_mem_global_alloc_pages(struct ttm_mem_global *glob,
+  struct page **pages,
+  unsigned npages,
+  bool no_wait, bool interruptible)
 {

struct ttm_mem_zone *zone = NULL;
+   unsigned i;
+   int r;

/**
 * Page allocations may be registed in a single zone
 * only if highmem or !dma32.
 */
-
+   for (i = 0; i < npages; i++) {
 #ifdef CONFIG_HIGHMEM
-   if (PageHighMem(page) && glob->zone_highmem != NULL)
-   zone = glob->zone_highmem;
+   if (PageHighMem(pages[i]) && glob->zone_highmem != NULL)
+   zone = glob->zone_highmem;
 #else
-   if (glob->zone_dma32 && page_to_pfn(page) > 0x0010UL)
-   zone = glob->zone_kernel;
+   if (glob->zone_dma32 && page_to_pfn(pages[i]) > 0x0010UL)
+   zone = glob->zone_kernel;
 #endif
-   return ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait,
-interruptible);
+   r = ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait,
+ interruptible);
+   if (r) {
+   return r;
+   }
+   }
+   return 0;
 }

-void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page)
+void ttm_mem_global_free_pages(struct ttm_mem_global *glob,
+  struct page **pages, unsigned npages)
 {
struct ttm_mem_zone *zone = NULL;
+   unsigned i;

+   for (i = 0; i < npages; i++) {
 #ifdef CONFIG_HIGHMEM
-   if (PageHighMem(page) && glob->zone_highmem != NULL)
-   zone = glob->zone_highmem;
+   if (PageHighMem(pages[i]) && glob->zone_highmem != NULL)
+   zone = glob->zone_highmem;
 #else
-   if (glob->zone_dma32 && page_to_pfn(page) > 0x0010UL)
-   zone = glob->zone_kernel;
+   if (glob->zone_dma32 && page_to_pfn(pages[i]) > 0x0010UL)
+   zone = glob->zone_kernel;
 #endif
-   ttm_mem_global_free_zone(glob, zone, PAGE_SIZE);
+   ttm_mem_global_free_zone(glob, zone, PAGE_SIZE);
+   }
 }


diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 727e93d..e94ff12 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -619,8 +619,10 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool 
*pool,
  * @return count of pages still required to fulfill the request.
  */
 static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool,
-   struct list_head *pages, int ttm_flags,
-   enum ttm_caching_state cstate, unsigned count)
+   struct list_head *pages,
+   int ttm_flags,
+   enum ttm_caching_state cstate,
+   unsigned count)
 {
unsigned long irq_flags;
struct list_head *p;
@@ -664,13 +666,14 @@ out:
  * On success pages list will hold count number of correctly
  * cached pages.
  */
-int ttm_get_pages(struct list_head *pages, int flags,
- enum ttm_caching_state cstate, unsigned count,
- dma_addr_t *dma_address)
+int ttm_get_pages(struct page **pages, unsigned npages, int flags,
+ enum ttm_caching_state cstate, dma_addr_t *dma_address)
 {
struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
struct page *p = NULL;
+   struct list_head plist;
gfp_t gfp_flags = GFP_USER;
+   unsigned count = 0;
int r;

/* set zer

[PATCH 6/8] drm/ttm: test for dma_address array allocation failure

2011-11-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/ttm/ttm_tt.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2dd45ca..58ea7dc 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -298,7 +298,7 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, 
unsigned long size,
ttm->dummy_read_page = dummy_read_page;

ttm_tt_alloc_page_directory(ttm);
-   if (!ttm->pages) {
+   if (!ttm->pages || !ttm->dma_address) {
ttm_tt_destroy(ttm);
printk(KERN_ERR TTM_PFX "Failed allocating page table\n");
return NULL;
-- 
1.7.1



[PATCH 7/8] drm/ttm: merge ttm_backend and ttm_tt

2011-11-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

ttm_backend will exist only and only with a ttm_tt, and ttm_tt
will be of interesting use only when bind to a backend. Thus to
avoid code & data duplication btw the two merge them.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c|   14 ++-
 drivers/gpu/drm/nouveau/nouveau_drv.h   |5 +-
 drivers/gpu/drm/nouveau/nouveau_sgdma.c |  188 --
 drivers/gpu/drm/radeon/radeon_ttm.c |  222 ---
 drivers/gpu/drm/ttm/ttm_agp_backend.c   |   88 +
 drivers/gpu/drm/ttm/ttm_bo.c|9 +-
 drivers/gpu/drm/ttm/ttm_tt.c|   59 ++---
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c  |   66 +++--
 include/drm/ttm/ttm_bo_driver.h |  104 ++-
 9 files changed, 295 insertions(+), 460 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 7226f41..b060fa4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -343,8 +343,10 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, 
u32 val)
*mem = val;
 }

-static struct ttm_backend *
-nouveau_bo_create_ttm_backend_entry(struct ttm_bo_device *bdev)
+static struct ttm_tt *
+nouveau_ttm_tt_create(struct ttm_bo_device *bdev,
+ unsigned long size, uint32_t page_flags,
+ struct page *dummy_read_page)
 {
struct drm_nouveau_private *dev_priv = nouveau_bdev(bdev);
struct drm_device *dev = dev_priv->dev;
@@ -352,11 +354,13 @@ nouveau_bo_create_ttm_backend_entry(struct ttm_bo_device 
*bdev)
switch (dev_priv->gart_info.type) {
 #if __OS_HAS_AGP
case NOUVEAU_GART_AGP:
-   return ttm_agp_backend_init(bdev, dev->agp->bridge);
+   return ttm_agp_tt_create(bdev, dev->agp->bridge,
+size, page_flags, dummy_read_page);
 #endif
case NOUVEAU_GART_PDMA:
case NOUVEAU_GART_HW:
-   return nouveau_sgdma_init_ttm(dev);
+   return nouveau_sgdma_create_ttm(bdev, size, page_flags,
+   dummy_read_page);
default:
NV_ERROR(dev, "Unknown GART type %d\n",
 dev_priv->gart_info.type);
@@ -1045,7 +1049,7 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct 
nouveau_fence *fence)
 }

 struct ttm_bo_driver nouveau_bo_driver = {
-   .create_ttm_backend_entry = nouveau_bo_create_ttm_backend_entry,
+   .ttm_tt_create = &nouveau_ttm_tt_create,
.invalidate_caches = nouveau_bo_invalidate_caches,
.init_mem_type = nouveau_bo_init_mem_type,
.evict_flags = nouveau_bo_evict_flags,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 29837da..0c53e39 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -1000,7 +1000,10 @@ extern int nouveau_sgdma_init(struct drm_device *);
 extern void nouveau_sgdma_takedown(struct drm_device *);
 extern uint32_t nouveau_sgdma_get_physical(struct drm_device *,
   uint32_t offset);
-extern struct ttm_backend *nouveau_sgdma_init_ttm(struct drm_device *);
+extern struct ttm_tt *nouveau_sgdma_create_ttm(struct ttm_bo_device *bdev,
+  unsigned long size,
+  uint32_t page_flags,
+  struct page *dummy_read_page);

 /* nouveau_debugfs.c */
 #if defined(CONFIG_DRM_NOUVEAU_DEBUG)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c 
b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index b75258a..bc2ab90 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -8,44 +8,23 @@
 #define NV_CTXDMA_PAGE_MASK  (NV_CTXDMA_PAGE_SIZE - 1)

 struct nouveau_sgdma_be {
-   struct ttm_backend backend;
+   struct ttm_tt ttm;
struct drm_device *dev;
-
-   dma_addr_t *pages;
-   unsigned nr_pages;
-   bool unmap_pages;
-
u64 offset;
-   bool bound;
 };

 static int
-nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
-  struct page **pages, struct page *dummy_read_page,
-  dma_addr_t *dma_addrs)
+nouveau_sgdma_dma_map(struct ttm_tt *ttm)
 {
-   struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
+   struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
struct drm_device *dev = nvbe->dev;
int i;

-   NV_DEBUG(nvbe->dev, "num_pages = %ld\n", num_pages);
-
-   nvbe->pages = dma_addrs;
-   nvbe->nr_pages = num_pages;
-   nvbe->unmap_pages = true;
-
-   /* this code path isn't called and is incorrect anyways */
-   if (0) { /* dma_addrs[0] != DMA_ERROR_CODE) { */
-   nvbe->unmap_pages = false;
-

[PATCH 8/8] drm/ttm: introduce callback for ttm_tt populate & unpopulate

2011-11-02 Thread j.gli...@gmail.com
From: Jerome Glisse 

Move the page allocation and freeing to driver callback and
provide ttm code helper function for those.

Most intrusive change, is the fact that we now only fully
populate an object this simplify some of code designed around
the page fault design.

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c   |3 +
 drivers/gpu/drm/radeon/radeon_ttm.c|2 +
 drivers/gpu/drm/ttm/ttm_bo_util.c  |   31 ++-
 drivers/gpu/drm/ttm/ttm_bo_vm.c|   13 ++--
 drivers/gpu/drm/ttm/ttm_page_alloc.c   |   42 ++
 drivers/gpu/drm/ttm/ttm_tt.c   |   97 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c |3 +
 include/drm/ttm/ttm_bo_driver.h|   41 --
 include/drm/ttm/ttm_page_alloc.h   |   18 ++
 9 files changed, 125 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index b060fa4..7e5ca3f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -28,6 +28,7 @@
  */

 #include "drmP.h"
+#include "ttm/ttm_page_alloc.h"

 #include "nouveau_drm.h"
 #include "nouveau_drv.h"
@@ -1050,6 +1051,8 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct 
nouveau_fence *fence)

 struct ttm_bo_driver nouveau_bo_driver = {
.ttm_tt_create = &nouveau_ttm_tt_create,
+   .ttm_tt_populate = &ttm_page_alloc_ttm_tt_populate,
+   .ttm_tt_unpopulate = &ttm_page_alloc_ttm_tt_unpopulate,
.invalidate_caches = nouveau_bo_invalidate_caches,
.init_mem_type = nouveau_bo_init_mem_type,
.evict_flags = nouveau_bo_evict_flags,
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 53ff62b..490afce 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -584,6 +584,8 @@ struct ttm_tt *radeon_ttm_tt_create(struct ttm_bo_device 
*bdev,

 static struct ttm_bo_driver radeon_bo_driver = {
.ttm_tt_create = &radeon_ttm_tt_create,
+   .ttm_tt_populate = &ttm_page_alloc_ttm_tt_populate,
+   .ttm_tt_unpopulate = &ttm_page_alloc_ttm_tt_unpopulate,
.invalidate_caches = &radeon_invalidate_caches,
.init_mem_type = &radeon_init_mem_type,
.evict_flags = &radeon_evict_flags,
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 082fcae..60f204d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -244,7 +244,7 @@ static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void 
*src,
unsigned long page,
pgprot_t prot)
 {
-   struct page *d = ttm_tt_get_page(ttm, page);
+   struct page *d = ttm->pages[page];
void *dst;

if (!d)
@@ -281,7 +281,7 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void 
*dst,
unsigned long page,
pgprot_t prot)
 {
-   struct page *s = ttm_tt_get_page(ttm, page);
+   struct page *s = ttm->pages[page];
void *src;

if (!s)
@@ -342,6 +342,12 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
if (old_iomap == NULL && ttm == NULL)
goto out2;

+   if (ttm->state == tt_unpopulated) {
+   ret = ttm->bdev->driver->ttm_tt_populate(ttm);
+   if (ret)
+   goto out1;
+   }
+
add = 0;
dir = 1;

@@ -502,10 +508,16 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
 {
struct ttm_mem_reg *mem = &bo->mem; pgprot_t prot;
struct ttm_tt *ttm = bo->ttm;
-   struct page *d;
-   int i;
+   int ret;

BUG_ON(!ttm);
+
+   if (ttm->state == tt_unpopulated) {
+   ret = ttm->bdev->driver->ttm_tt_populate(ttm);
+   if (ret)
+   return ret;
+   }
+
if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) {
/*
 * We're mapping a single page, and the desired
@@ -513,18 +525,9 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo,
 */

map->bo_kmap_type = ttm_bo_map_kmap;
-   map->page = ttm_tt_get_page(ttm, start_page);
+   map->page = ttm->pages[start_page];
map->virtual = kmap(map->page);
} else {
-   /*
-* Populate the part we're mapping;
-*/
-   for (i = start_page; i < start_page + num_pages; ++i) {
-   d = ttm_tt_get_page(ttm, i);
-   if (!d)
-   return -ENOMEM;
-   }
-
/*
 * We need to use vmap to get the desired page protection
 * or to make the buffer object look contiguous.
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
inde

[PATCH] drm/radeon/kms: consolidate GART code, fix segfault after GPU lockup V2

2011-11-03 Thread j.gli...@gmail.com
From: Jerome Glisse 

After GPU lockup VRAM gart table is unpinned and thus its pointer
becomes unvalid. This patch move the unpin code to a common helper
function and set pointer to NULL so that page update code can check
if it should update GPU page table or not. That way bo still bound
to GART can be unbound (pci_unmap_page for all there page) properly
while there is no need to update the GPU page table.

V2 move the test for null gart out of the loop, small optimization

Signed-off-by: Jerome Glisse 
---
 drivers/gpu/drm/radeon/evergreen.c   |   12 +-
 drivers/gpu/drm/radeon/ni.c  |   13 +-
 drivers/gpu/drm/radeon/r100.c|6 ++-
 drivers/gpu/drm/radeon/r300.c|   16 ++--
 drivers/gpu/drm/radeon/r600.c|   17 ++--
 drivers/gpu/drm/radeon/radeon.h  |   22 ++
 drivers/gpu/drm/radeon/radeon_gart.c |   71 -
 drivers/gpu/drm/radeon/rs400.c   |5 +-
 drivers/gpu/drm/radeon/rs600.c   |   16 ++--
 drivers/gpu/drm/radeon/rv770.c   |   13 +-
 10 files changed, 75 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index ed406e8..ebd2092 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -893,7 +893,7 @@ int evergreen_pcie_gart_enable(struct radeon_device *rdev)
u32 tmp;
int r;

-   if (rdev->gart.table.vram.robj == NULL) {
+   if (rdev->gart.robj == NULL) {
dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
@@ -945,7 +945,6 @@ int evergreen_pcie_gart_enable(struct radeon_device *rdev)
 void evergreen_pcie_gart_disable(struct radeon_device *rdev)
 {
u32 tmp;
-   int r;

/* Disable all tables */
WREG32(VM_CONTEXT0_CNTL, 0);
@@ -965,14 +964,7 @@ void evergreen_pcie_gart_disable(struct radeon_device 
*rdev)
WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
-   if (rdev->gart.table.vram.robj) {
-   r = radeon_bo_reserve(rdev->gart.table.vram.robj, false);
-   if (likely(r == 0)) {
-   radeon_bo_kunmap(rdev->gart.table.vram.robj);
-   radeon_bo_unpin(rdev->gart.table.vram.robj);
-   radeon_bo_unreserve(rdev->gart.table.vram.robj);
-   }
-   }
+   radeon_gart_table_vram_unpin(rdev);
 }

 void evergreen_pcie_gart_fini(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 556b7bc..927af99 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -932,7 +932,7 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev)
 {
int r;

-   if (rdev->gart.table.vram.robj == NULL) {
+   if (rdev->gart.robj == NULL) {
dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
@@ -977,8 +977,6 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev)

 void cayman_pcie_gart_disable(struct radeon_device *rdev)
 {
-   int r;
-
/* Disable all tables */
WREG32(VM_CONTEXT0_CNTL, 0);
WREG32(VM_CONTEXT1_CNTL, 0);
@@ -994,14 +992,7 @@ void cayman_pcie_gart_disable(struct radeon_device *rdev)
WREG32(VM_L2_CNTL2, 0);
WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
   L2_CACHE_BIGK_FRAGMENT_SIZE(6));
-   if (rdev->gart.table.vram.robj) {
-   r = radeon_bo_reserve(rdev->gart.table.vram.robj, false);
-   if (likely(r == 0)) {
-   radeon_bo_kunmap(rdev->gart.table.vram.robj);
-   radeon_bo_unpin(rdev->gart.table.vram.robj);
-   radeon_bo_unreserve(rdev->gart.table.vram.robj);
-   }
-   }
+   radeon_gart_table_vram_unpin(rdev);
 }

 void cayman_pcie_gart_fini(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 8f8b8fa..00d2fa9 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -576,7 +576,7 @@ int r100_pci_gart_init(struct radeon_device *rdev)
 {
int r;

-   if (rdev->gart.table.ram.ptr) {
+   if (rdev->gart.ptr) {
WARN(1, "R100 PCI GART already initialized\n");
return 0;
}
@@ -635,10 +635,12 @@ void r100_pci_gart_disable(struct radeon_device *rdev)

 int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
 {
+   u32 *gtt = rdev->gart.ptr;
+
if (i < 0 || i > rdev->gart.num_gpu_pages) {
return -EINVAL;
}
-   rdev->gart.table.ram.ptr[i] = cpu_to_le32(lower_32_bits(addr));
+   gtt[i] = cpu_to_le32(lower_32_bits(addr));
return 0;
 }

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 33f2b68

ttm: merge ttm_backend & ttm_tt, introduce ttm dma allocator

2011-11-03 Thread j.gli...@gmail.com
Hi,

So updated patchset, only patch 5 seen change since last set.
Last 3 patch are from your patchset, modified on top of mine.

Konrad so i added you dma pool allocator on top of that
and added support for it to radeon. All in all it's slightly
smaller than your patchset.

Biggest change is use of a list_head in ttm_tt to keep the
dma_page list inside the ttm_tt object allowing faster and
lot simpler deallocation of page.

I only briefly test this code, it seems ok so far. Did you
tested booting kernel with swiotlb=force and with your patchset ?
Because here it doesn't work. I still don't understand why
swiotlb want to create a bounce page when the page supplied
fit the constraint. Need to dig into kernel history to see if
there is any good reasons for that.

Otherwise i believe this whole patchset make things cleaner
and simpler for ttm.

Cheers,
Jerome Glisse



[PATCH 01/11] drm/ttm: remove userspace backed ttm object support

2011-11-03 Thread j.gli...@gmail.com
From: Jerome Glisse 

This was never use in none of the driver, properly using userspace
page for bo would need more code (vma interaction mostly). Removing
this dead code in preparation of ttm_tt & backend merge.

Signed-off-by: Jerome Glisse 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/gpu/drm/ttm/ttm_bo.c|   22 
 drivers/gpu/drm/ttm/ttm_tt.c|  105 +--
 include/drm/ttm/ttm_bo_api.h|5 --
 include/drm/ttm/ttm_bo_driver.h |   24 -
 4 files changed, 1 insertions(+), 155 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 617b646..4bde335 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -342,22 +342,6 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, 
bool zero_alloc)
if (unlikely(bo->ttm == NULL))
ret = -ENOMEM;
break;
-   case ttm_bo_type_user:
-   bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
-   page_flags | TTM_PAGE_FLAG_USER,
-   glob->dummy_read_page);
-   if (unlikely(bo->ttm == NULL)) {
-   ret = -ENOMEM;
-   break;
-   }
-
-   ret = ttm_tt_set_user(bo->ttm, current,
- bo->buffer_start, bo->num_pages);
-   if (unlikely(ret != 0)) {
-   ttm_tt_destroy(bo->ttm);
-   bo->ttm = NULL;
-   }
-   break;
default:
printk(KERN_ERR TTM_PFX "Illegal buffer object type\n");
ret = -EINVAL;
@@ -907,16 +891,12 @@ static uint32_t ttm_bo_select_caching(struct 
ttm_mem_type_manager *man,
 }

 static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man,
-bool disallow_fixed,
 uint32_t mem_type,
 uint32_t proposed_placement,
 uint32_t *masked_placement)
 {
uint32_t cur_flags = ttm_bo_type_flags(mem_type);

-   if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && disallow_fixed)
-   return false;
-
if ((cur_flags & proposed_placement & TTM_PL_MASK_MEM) == 0)
return false;

@@ -961,7 +941,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
man = &bdev->man[mem_type];

type_ok = ttm_bo_mt_compatible(man,
-   bo->type == ttm_bo_type_user,
mem_type,
placement->placement[i],
&cur_flags);
@@ -1009,7 +988,6 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
if (!man->has_type)
continue;
if (!ttm_bo_mt_compatible(man,
-   bo->type == ttm_bo_type_user,
mem_type,
placement->busy_placement[i],
&cur_flags))
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 58c271e..82a1161 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -62,43 +62,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
ttm->dma_address = NULL;
 }

-static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
-{
-   int write;
-   int dirty;
-   struct page *page;
-   int i;
-   struct ttm_backend *be = ttm->be;
-
-   BUG_ON(!(ttm->page_flags & TTM_PAGE_FLAG_USER));
-   write = ((ttm->page_flags & TTM_PAGE_FLAG_WRITE) != 0);
-   dirty = ((ttm->page_flags & TTM_PAGE_FLAG_USER_DIRTY) != 0);
-
-   if (be)
-   be->func->clear(be);
-
-   for (i = 0; i < ttm->num_pages; ++i) {
-   page = ttm->pages[i];
-   if (page == NULL)
-   continue;
-
-   if (page == ttm->dummy_read_page) {
-   BUG_ON(write);
-   continue;
-   }
-
-   if (write && dirty && !PageReserved(page))
-   set_page_dirty_lock(page);
-
-   ttm->pages[i] = NULL;
-   ttm_mem_global_free(ttm->glob->mem_glob, PAGE_SIZE);
-   put_page(page);
-   }
-   ttm->state = tt_unpopulated;
-   ttm->first_himem_page = ttm->num_pages;
-   ttm->last_lomem_page = -1;
-}
-
 static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 {
struct page *p;
@@ -325,10 +288,7 @@ void ttm_tt_destroy(struct ttm_tt *ttm)
}

if (likely(ttm->pages != NULL)) {
-   if (ttm->page_flags & TTM_PAGE_FLAG_USER)
-   t

[PATCH 02/11] drm/ttm: remove split btw highmen and lowmem page

2011-11-03 Thread j.gli...@gmail.com
From: Jerome Glisse 

Split btw highmem and lowmem page was rendered useless by the
pool code. Remove it. Note further cleanup would change the
ttm page allocation helper to actualy take an array instead
of relying on list this could drasticly reduce the number of
function call in the common case of allocation whole buffer.

Signed-off-by: Jerome Glisse 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/gpu/drm/ttm/ttm_tt.c|   11 ++-
 include/drm/ttm/ttm_bo_driver.h |7 ---
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 82a1161..8b7a6d0 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -69,7 +69,7 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int 
index)
struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
int ret;

-   while (NULL == (p = ttm->pages[index])) {
+   if (NULL == (p = ttm->pages[index])) {

INIT_LIST_HEAD(&h);

@@ -85,10 +85,7 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, 
int index)
if (unlikely(ret != 0))
goto out_err;

-   if (PageHighMem(p))
-   ttm->pages[--ttm->first_himem_page] = p;
-   else
-   ttm->pages[++ttm->last_lomem_page] = p;
+   ttm->pages[index] = p;
}
return p;
 out_err:
@@ -270,8 +267,6 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
  ttm->dma_address);
ttm->state = tt_unpopulated;
-   ttm->first_himem_page = ttm->num_pages;
-   ttm->last_lomem_page = -1;
 }

 void ttm_tt_destroy(struct ttm_tt *ttm)
@@ -315,8 +310,6 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, 
unsigned long size,

ttm->glob = bdev->glob;
ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-   ttm->first_himem_page = ttm->num_pages;
-   ttm->last_lomem_page = -1;
ttm->caching_state = tt_cached;
ttm->page_flags = page_flags;

diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 37527d6..9da182b 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -136,11 +136,6 @@ enum ttm_caching_state {
  * @dummy_read_page: Page to map where the ttm_tt page array contains a NULL
  * pointer.
  * @pages: Array of pages backing the data.
- * @first_himem_page: Himem pages are put last in the page array, which
- * enables us to run caching attribute changes on only the first part
- * of the page array containing lomem pages. This is the index of the
- * first himem page.
- * @last_lomem_page: Index of the last lomem page in the page array.
  * @num_pages: Number of pages in the page array.
  * @bdev: Pointer to the current struct ttm_bo_device.
  * @be: Pointer to the ttm backend.
@@ -157,8 +152,6 @@ enum ttm_caching_state {
 struct ttm_tt {
struct page *dummy_read_page;
struct page **pages;
-   long first_himem_page;
-   long last_lomem_page;
uint32_t page_flags;
unsigned long num_pages;
struct ttm_bo_global *glob;
-- 
1.7.7.1



[PATCH 03/11] drm/ttm: remove unused backend flags field

2011-11-03 Thread j.gli...@gmail.com
From: Jerome Glisse 

This field is not use by any of the driver just drop it.

Signed-off-by: Jerome Glisse 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/gpu/drm/radeon/radeon_ttm.c |1 -
 include/drm/ttm/ttm_bo_driver.h |2 --
 2 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 0b5468b..97c76ae 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -787,7 +787,6 @@ struct ttm_backend *radeon_ttm_backend_create(struct 
radeon_device *rdev)
return NULL;
}
gtt->backend.bdev = &rdev->mman.bdev;
-   gtt->backend.flags = 0;
gtt->backend.func = &radeon_backend_func;
gtt->rdev = rdev;
gtt->pages = NULL;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 9da182b..6d17140 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -106,7 +106,6 @@ struct ttm_backend_func {
  * struct ttm_backend
  *
  * @bdev: Pointer to a struct ttm_bo_device.
- * @flags: For driver use.
  * @func: Pointer to a struct ttm_backend_func that describes
  * the backend methods.
  *
@@ -114,7 +113,6 @@ struct ttm_backend_func {

 struct ttm_backend {
struct ttm_bo_device *bdev;
-   uint32_t flags;
struct ttm_backend_func *func;
 };

-- 
1.7.7.1



[PATCH 04/11] drm/ttm: use ttm put pages function to properly restore cache attribute

2011-11-03 Thread j.gli...@gmail.com
From: Jerome Glisse 

On failure we need to make sure the page we free has wb cache
attribute. Do this pas call the proper ttm page helper function.

Signed-off-by: Jerome Glisse 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/gpu/drm/ttm/ttm_tt.c |5 -
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 8b7a6d0..3fb4c6d 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -89,7 +89,10 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, 
int index)
}
return p;
 out_err:
-   put_page(p);
+   INIT_LIST_HEAD(&h);
+   list_add(&p->lru, &h);
+   ttm_put_pages(&h, 1, ttm->page_flags,
+ ttm->caching_state, &ttm->dma_address[index]);
return NULL;
 }

-- 
1.7.7.1



[PATCH 05/11] drm/ttm: convert page allocation to use page ptr array instead of list V3

2011-11-03 Thread j.gli...@gmail.com
From: Jerome Glisse 

Use the ttm_tt page ptr array for page allocation, move the list to
array unwinding into the page allocation functions.

V2 split the fix to use ttm put page as a separate fix
properly fill pages array when TTM_PAGE_FLAG_ZERO_ALLOC is not
set
V3 Added back page_count()==1 check when freeing page

Signed-off-by: Jerome Glisse 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 drivers/gpu/drm/ttm/ttm_memory.c |   44 +++--
 drivers/gpu/drm/ttm/ttm_page_alloc.c |   90 --
 drivers/gpu/drm/ttm/ttm_tt.c |   61 ---
 include/drm/ttm/ttm_memory.h |   11 ++--
 include/drm/ttm/ttm_page_alloc.h |   17 +++---
 5 files changed, 115 insertions(+), 108 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index e70ddd8..3a3a58b 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -543,41 +543,53 @@ int ttm_mem_global_alloc(struct ttm_mem_global *glob, 
uint64_t memory,
 }
 EXPORT_SYMBOL(ttm_mem_global_alloc);

-int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
- struct page *page,
- bool no_wait, bool interruptible)
+int ttm_mem_global_alloc_pages(struct ttm_mem_global *glob,
+  struct page **pages,
+  unsigned npages,
+  bool no_wait, bool interruptible)
 {

struct ttm_mem_zone *zone = NULL;
+   unsigned i;
+   int r;

/**
 * Page allocations may be registed in a single zone
 * only if highmem or !dma32.
 */
-
+   for (i = 0; i < npages; i++) {
 #ifdef CONFIG_HIGHMEM
-   if (PageHighMem(page) && glob->zone_highmem != NULL)
-   zone = glob->zone_highmem;
+   if (PageHighMem(pages[i]) && glob->zone_highmem != NULL)
+   zone = glob->zone_highmem;
 #else
-   if (glob->zone_dma32 && page_to_pfn(page) > 0x0010UL)
-   zone = glob->zone_kernel;
+   if (glob->zone_dma32 && page_to_pfn(pages[i]) > 0x0010UL)
+   zone = glob->zone_kernel;
 #endif
-   return ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait,
-interruptible);
+   r = ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait,
+ interruptible);
+   if (r) {
+   return r;
+   }
+   }
+   return 0;
 }

-void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page)
+void ttm_mem_global_free_pages(struct ttm_mem_global *glob,
+  struct page **pages, unsigned npages)
 {
struct ttm_mem_zone *zone = NULL;
+   unsigned i;

+   for (i = 0; i < npages; i++) {
 #ifdef CONFIG_HIGHMEM
-   if (PageHighMem(page) && glob->zone_highmem != NULL)
-   zone = glob->zone_highmem;
+   if (PageHighMem(pages[i]) && glob->zone_highmem != NULL)
+   zone = glob->zone_highmem;
 #else
-   if (glob->zone_dma32 && page_to_pfn(page) > 0x0010UL)
-   zone = glob->zone_kernel;
+   if (glob->zone_dma32 && page_to_pfn(pages[i]) > 0x0010UL)
+   zone = glob->zone_kernel;
 #endif
-   ttm_mem_global_free_zone(glob, zone, PAGE_SIZE);
+   ttm_mem_global_free_zone(glob, zone, PAGE_SIZE);
+   }
 }


diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 727e93d..c4f18b9 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -619,8 +619,10 @@ static void ttm_page_pool_fill_locked(struct ttm_page_pool 
*pool,
  * @return count of pages still required to fulfill the request.
  */
 static unsigned ttm_page_pool_get_pages(struct ttm_page_pool *pool,
-   struct list_head *pages, int ttm_flags,
-   enum ttm_caching_state cstate, unsigned count)
+   struct list_head *pages,
+   int ttm_flags,
+   enum ttm_caching_state cstate,
+   unsigned count)
 {
unsigned long irq_flags;
struct list_head *p;
@@ -664,13 +666,14 @@ out:
  * On success pages list will hold count number of correctly
  * cached pages.
  */
-int ttm_get_pages(struct list_head *pages, int flags,
- enum ttm_caching_state cstate, unsigned count,
- dma_addr_t *dma_address)
+int ttm_get_pages(struct page **pages, unsigned npages, int flags,
+ enum ttm_caching_state cstate, dma_addr_t *dma_address)
 {
struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
struct page *p = NULL;
+   struct list_head plist;
gfp_t gfp_flag

  1   2   3   4   >