On Mon, May 7, 2012 at 7:42 AM, Christian König <deathsim...@vodafone.de> wrote: > From: Jerome Glisse <jgli...@redhat.com> > > This convert fence to use uint64_t sequence number intention is > to use the fact that uin64_t is big enough that we don't need to > care about wrap around. > > Tested with and without writeback using 0xFFFFF000 as initial > fence sequence and thus allowing to test the wrap around from > 32bits to 64bits. > > v2: Add comment about possible race btw CPU & GPU, add comment > stressing that we need 2 dword aligned for R600_WB_EVENT_OFFSET > Read fence sequenc in reverse order of GPU write them so we > mitigate the race btw CPU and GPU. > > v3: Drop the need for ring to emit the 64bits fence, and just have > each ring emit the lower 32bits of the fence sequence. We > handle the wrap over 32bits in fence_process. > > v4: Just a small optimization: Don't reread the last_seq value > if loop restarts, since we already know its value anyway. > Also start at zero not one for seq value and use pre instead > of post increment in emmit, otherwise wait_empty will deadlock.
Why changing that v3 was already good no deadlock. I started at 1 especialy for that, a signaled fence is set to 0 so it always compare as signaled. Just using preincrement is exactly like starting at one. I don't see the need for this change but if it makes you happy. Cheers, Jerome > > Signed-off-by: Jerome Glisse <jgli...@redhat.com> > Signed-off-by: Christian König <deathsim...@vodafone.de> > --- > drivers/gpu/drm/radeon/radeon.h | 39 ++++++----- > drivers/gpu/drm/radeon/radeon_fence.c | 116 > +++++++++++++++++++++++---------- > drivers/gpu/drm/radeon/radeon_ring.c | 9 ++- > 3 files changed, 107 insertions(+), 57 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index e99ea81..cdf46bc 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -100,28 +100,32 @@ extern int radeon_lockup_timeout; > * Copy from radeon_drv.h so we don't have to include both and have > conflicting > * symbol; > */ > -#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ > -#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) > +#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ > +#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) > /* RADEON_IB_POOL_SIZE must be a power of 2 */ > -#define RADEON_IB_POOL_SIZE 16 > -#define RADEON_DEBUGFS_MAX_COMPONENTS 32 > -#define RADEONFB_CONN_LIMIT 4 > -#define RADEON_BIOS_NUM_SCRATCH 8 > +#define RADEON_IB_POOL_SIZE 16 > +#define RADEON_DEBUGFS_MAX_COMPONENTS 32 > +#define RADEONFB_CONN_LIMIT 4 > +#define RADEON_BIOS_NUM_SCRATCH 8 > > /* max number of rings */ > -#define RADEON_NUM_RINGS 3 > +#define RADEON_NUM_RINGS 3 > + > +/* fence seq are set to this number when signaled */ > +#define RADEON_FENCE_SIGNALED_SEQ 0LL > +#define RADEON_FENCE_NOTEMITED_SEQ (~0LL) > > /* internal ring indices */ > /* r1xx+ has gfx CP ring */ > -#define RADEON_RING_TYPE_GFX_INDEX 0 > +#define RADEON_RING_TYPE_GFX_INDEX 0 > > /* cayman has 2 compute CP rings */ > -#define CAYMAN_RING_TYPE_CP1_INDEX 1 > -#define CAYMAN_RING_TYPE_CP2_INDEX 2 > +#define CAYMAN_RING_TYPE_CP1_INDEX 1 > +#define CAYMAN_RING_TYPE_CP2_INDEX 2 > > /* hardcode those limit for now */ > -#define RADEON_VA_RESERVED_SIZE (8 << 20) > -#define RADEON_IB_VM_MAX_SIZE (64 << 10) > +#define RADEON_VA_RESERVED_SIZE (8 << 20) > +#define RADEON_IB_VM_MAX_SIZE (64 << 10) > > /* > * Errata workarounds. > @@ -254,8 +258,9 @@ struct radeon_fence_driver { > uint32_t scratch_reg; > uint64_t gpu_addr; > volatile uint32_t *cpu_addr; > - atomic_t seq; > - uint32_t last_seq; > + /* seq is protected by ring emission lock */ > + uint64_t seq; > + atomic64_t last_seq; > unsigned long last_activity; > wait_queue_head_t queue; > struct list_head emitted; > @@ -268,11 +273,9 @@ struct radeon_fence { > struct kref kref; > struct list_head list; > /* protected by radeon_fence.lock */ > - uint32_t seq; > - bool emitted; > - bool signaled; > + uint64_t seq; > /* RB, DMA, etc. */ > - int ring; > + unsigned ring; > struct radeon_semaphore *semaphore; > }; > > diff --git a/drivers/gpu/drm/radeon/radeon_fence.c > b/drivers/gpu/drm/radeon/radeon_fence.c > index 5bb78bf..feb2bbc 100644 > --- a/drivers/gpu/drm/radeon/radeon_fence.c > +++ b/drivers/gpu/drm/radeon/radeon_fence.c > @@ -66,14 +66,14 @@ int radeon_fence_emit(struct radeon_device *rdev, struct > radeon_fence *fence) > unsigned long irq_flags; > > write_lock_irqsave(&rdev->fence_lock, irq_flags); > - if (fence->emitted) { > + if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { > write_unlock_irqrestore(&rdev->fence_lock, irq_flags); > return 0; > } > - fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq); > + /* we are protected by the ring emission mutex */ > + fence->seq = ++rdev->fence_drv[fence->ring].seq; > radeon_fence_ring_emit(rdev, fence->ring, fence); > trace_radeon_fence_emit(rdev->ddev, fence->seq); > - fence->emitted = true; > /* are we the first fence on a previusly idle ring? */ > if (list_empty(&rdev->fence_drv[fence->ring].emitted)) { > rdev->fence_drv[fence->ring].last_activity = jiffies; > @@ -87,14 +87,60 @@ static bool radeon_fence_poll_locked(struct radeon_device > *rdev, int ring) > { > struct radeon_fence *fence; > struct list_head *i, *n; > - uint32_t seq; > + uint64_t seq, last_seq; > + unsigned count_loop = 0; > bool wake = false; > > - seq = radeon_fence_read(rdev, ring); > - if (seq == rdev->fence_drv[ring].last_seq) > - return false; > + /* Note there is a scenario here for an infinite loop but it's > + * very unlikely to happen. For it to happen, the current polling > + * process need to be interrupted by another process and another > + * process needs to update the last_seq btw the atomic read and > + * xchg of the current process. > + * > + * More over for this to go in infinite loop there need to be > + * continuously new fence signaled ie radeon_fence_read needs > + * to return a different value each time for both the currently > + * polling process and the other process that xchg the last_seq > + * btw atomic read and xchg of the current process. And the > + * value the other process set as last seq must be higher than > + * the seq value we just read. Which means that current process > + * need to be interrupted after radeon_fence_read and before > + * atomic xchg. > + * > + * To be even more safe we count the number of time we loop and > + * we bail after 10 loop just accepting the fact that we might > + * have temporarly set the last_seq not to the true real last > + * seq but to an older one. > + */ > + last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq); > + do { > + seq = radeon_fence_read(rdev, ring); > + seq |= last_seq & 0xffffffff00000000LL; > + if (seq < last_seq) { > + seq += 0x100000000LL; > + } > > - rdev->fence_drv[ring].last_seq = seq; > + if (!wake && seq == last_seq) { > + return false; > + } > + /* If we loop over we don't want to return without > + * checking if a fence is signaled as it means that the > + * seq we just read is different from the previous on. > + */ > + wake = true; > + if ((count_loop++) > 10) { > + /* We looped over too many time leave with the > + * fact that we might have set an older fence > + * seq then the current real last seq as signaled > + * by the hw. > + */ > + break; > + } > + last_seq = seq; > + } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); > + > + /* reset wake to false */ > + wake = false; > rdev->fence_drv[ring].last_activity = jiffies; > > n = NULL; > @@ -112,7 +158,7 @@ static bool radeon_fence_poll_locked(struct radeon_device > *rdev, int ring) > n = i->prev; > list_move_tail(i, &rdev->fence_drv[ring].signaled); > fence = list_entry(i, struct radeon_fence, list); > - fence->signaled = true; > + fence->seq = RADEON_FENCE_SIGNALED_SEQ; > i = n; > } while (i != &rdev->fence_drv[ring].emitted); > wake = true; > @@ -128,7 +174,7 @@ static void radeon_fence_destroy(struct kref *kref) > fence = container_of(kref, struct radeon_fence, kref); > write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); > list_del(&fence->list); > - fence->emitted = false; > + fence->seq = RADEON_FENCE_NOTEMITED_SEQ; > write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); > if (fence->semaphore) > radeon_semaphore_free(fence->rdev, fence->semaphore); > @@ -145,9 +191,7 @@ int radeon_fence_create(struct radeon_device *rdev, > } > kref_init(&((*fence)->kref)); > (*fence)->rdev = rdev; > - (*fence)->emitted = false; > - (*fence)->signaled = false; > - (*fence)->seq = 0; > + (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ; > (*fence)->ring = ring; > (*fence)->semaphore = NULL; > INIT_LIST_HEAD(&(*fence)->list); > @@ -163,18 +207,18 @@ bool radeon_fence_signaled(struct radeon_fence *fence) > return true; > > write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); > - signaled = fence->signaled; > + signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ); > /* if we are shuting down report all fence as signaled */ > if (fence->rdev->shutdown) { > signaled = true; > } > - if (!fence->emitted) { > + if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { > WARN(1, "Querying an unemitted fence : %p !\n", fence); > signaled = true; > } > if (!signaled) { > radeon_fence_poll_locked(fence->rdev, fence->ring); > - signaled = fence->signaled; > + signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ); > } > write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); > return signaled; > @@ -183,8 +227,8 @@ bool radeon_fence_signaled(struct radeon_fence *fence) > int radeon_fence_wait(struct radeon_fence *fence, bool intr) > { > struct radeon_device *rdev; > - unsigned long irq_flags, timeout; > - u32 seq; > + unsigned long irq_flags, timeout, last_activity; > + uint64_t seq; > int i, r; > bool signaled; > > @@ -207,7 +251,9 @@ int radeon_fence_wait(struct radeon_fence *fence, bool > intr) > timeout = 1; > } > /* save current sequence value used to check for GPU lockups */ > - seq = rdev->fence_drv[fence->ring].last_seq; > + seq = atomic64_read(&rdev->fence_drv[fence->ring].last_seq); > + /* Save current last activity valuee, used to check for GPU > lockups */ > + last_activity = rdev->fence_drv[fence->ring].last_activity; > read_unlock_irqrestore(&rdev->fence_lock, irq_flags); > > trace_radeon_fence_wait_begin(rdev->ddev, seq); > @@ -235,24 +281,23 @@ int radeon_fence_wait(struct radeon_fence *fence, bool > intr) > } > > write_lock_irqsave(&rdev->fence_lock, irq_flags); > - /* check if sequence value has changed since > last_activity */ > - if (seq != rdev->fence_drv[fence->ring].last_seq) { > + /* test if somebody else has already decided that > this is a lockup */ > + if (last_activity != > rdev->fence_drv[fence->ring].last_activity) { > write_unlock_irqrestore(&rdev->fence_lock, > irq_flags); > continue; > } > > - /* change sequence value on all rings, so nobody else > things there is a lockup */ > - for (i = 0; i < RADEON_NUM_RINGS; ++i) > - rdev->fence_drv[i].last_seq -= 0x10000; > - > - rdev->fence_drv[fence->ring].last_activity = jiffies; > write_unlock_irqrestore(&rdev->fence_lock, irq_flags); > > if (radeon_ring_is_lockup(rdev, fence->ring, > &rdev->ring[fence->ring])) { > - > /* good news we believe it's a lockup */ > - printk(KERN_WARNING "GPU lockup (waiting for > 0x%08X last fence id 0x%08X)\n", > - fence->seq, seq); > + dev_warn(rdev->dev, "GPU lockup (waiting for > 0x%016llx last fence id 0x%016llx)\n", > + fence->seq, seq); > + > + /* change last activity so nobody else think > there is a lockup */ > + for (i = 0; i < RADEON_NUM_RINGS; ++i) { > + rdev->fence_drv[i].last_activity = > jiffies; > + } > > /* mark the ring as not ready any more */ > rdev->ring[fence->ring].ready = false; > @@ -387,9 +432,9 @@ int radeon_fence_driver_start_ring(struct radeon_device > *rdev, int ring) > } > rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; > rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; > - radeon_fence_write(rdev, atomic_read(&rdev->fence_drv[ring].seq), > ring); > + radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring); > rdev->fence_drv[ring].initialized = true; > - DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr > 0x%p\n", > + DRM_INFO("fence driver on ring %d use gpu addr 0x%016llx and cpu addr > 0x%p\n", > ring, rdev->fence_drv[ring].gpu_addr, > rdev->fence_drv[ring].cpu_addr); > write_unlock_irqrestore(&rdev->fence_lock, irq_flags); > return 0; > @@ -400,7 +445,8 @@ static void radeon_fence_driver_init_ring(struct > radeon_device *rdev, int ring) > rdev->fence_drv[ring].scratch_reg = -1; > rdev->fence_drv[ring].cpu_addr = NULL; > rdev->fence_drv[ring].gpu_addr = 0; > - atomic_set(&rdev->fence_drv[ring].seq, 0); > + rdev->fence_drv[ring].seq = 0; > + atomic64_set(&rdev->fence_drv[ring].last_seq, 0); > INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); > INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled); > init_waitqueue_head(&rdev->fence_drv[ring].queue); > @@ -458,12 +504,12 @@ static int radeon_debugfs_fence_info(struct seq_file > *m, void *data) > continue; > > seq_printf(m, "--- ring %d ---\n", i); > - seq_printf(m, "Last signaled fence 0x%08X\n", > - radeon_fence_read(rdev, i)); > + seq_printf(m, "Last signaled fence 0x%016lx\n", > + atomic64_read(&rdev->fence_drv[i].last_seq)); > if (!list_empty(&rdev->fence_drv[i].emitted)) { > fence = list_entry(rdev->fence_drv[i].emitted.prev, > struct radeon_fence, list); > - seq_printf(m, "Last emitted fence %p with 0x%08X\n", > + seq_printf(m, "Last emitted fence %p with > 0x%016llx\n", > fence, fence->seq); > } > } > diff --git a/drivers/gpu/drm/radeon/radeon_ring.c > b/drivers/gpu/drm/radeon/radeon_ring.c > index a4d60ae..4ae222b 100644 > --- a/drivers/gpu/drm/radeon/radeon_ring.c > +++ b/drivers/gpu/drm/radeon/radeon_ring.c > @@ -82,7 +82,7 @@ bool radeon_ib_try_free(struct radeon_device *rdev, struct > radeon_ib *ib) > bool done = false; > > /* only free ib which have been emited */ > - if (ib->fence && ib->fence->emitted) { > + if (ib->fence && ib->fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { > if (radeon_fence_signaled(ib->fence)) { > radeon_fence_unref(&ib->fence); > radeon_sa_bo_free(rdev, &ib->sa_bo); > @@ -149,8 +149,9 @@ retry: > /* this should be rare event, ie all ib scheduled none signaled yet. > */ > for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { > - if (rdev->ib_pool.ibs[idx].fence && > rdev->ib_pool.ibs[idx].fence->emitted) { > - r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, > false); > + struct radeon_fence *fence = rdev->ib_pool.ibs[idx].fence; > + if (fence && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { > + r = radeon_fence_wait(fence, false); > if (!r) { > goto retry; > } > @@ -173,7 +174,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct > radeon_ib **ib) > return; > } > radeon_mutex_lock(&rdev->ib_pool.mutex); > - if (tmp->fence && !tmp->fence->emitted) { > + if (tmp->fence && tmp->fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { > radeon_sa_bo_free(rdev, &tmp->sa_bo); > radeon_fence_unref(&tmp->fence); > } > -- > 1.7.5.4 > _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel