On Fri, Jul 13, 2012 at 11:35 PM, Maarten Lankhorst <m.b.lankho...@gmail.com> wrote: > A way to trigger an irq will be needed for optimus support since > cpu-waiting isn't always viable there. This could also be nice for > power saving on since cpu would no longer have to spin, and > performance might improve slightly on cpu-limited workloads. > > Some way to quantify these effects would be nice, even if the > end result would be 'no performance regression'. An earlier > version always emitted an interrupt, resulting in glxgears going > from 8k fps to 7k. However this is no longer the case, as I'm > using the kernel submission channel for generating irqs as > needed now. > > On nv84 I'm using NOTIFY_INTR, but that might have been > removed on fermi, so instead I'm using invalid command > 0x0058 now as a way to signal completion.
Out of curiosity, isn't this like a handcoded version of software methods? If so, why handcoded? Or are software methods not supported on NVC0? > > Signed-off-by: Maarten Lankhorst <maarten.lankho...@canonical.com> > > --- > drivers/gpu/drm/nouveau/nouveau_drv.h | 2 + > drivers/gpu/drm/nouveau/nouveau_fence.c | 49 > ++++++++++++++++++++++++++++--- > drivers/gpu/drm/nouveau/nouveau_fifo.h | 1 + > drivers/gpu/drm/nouveau/nouveau_state.c | 1 + > drivers/gpu/drm/nouveau/nv04_fifo.c | 25 ++++++++++++++++ > drivers/gpu/drm/nouveau/nv84_fence.c | 18 +++++++++-- > drivers/gpu/drm/nouveau/nvc0_fence.c | 12 ++++++-- > drivers/gpu/drm/nouveau/nvc0_fifo.c | 3 +- > drivers/gpu/drm/nouveau/nve0_fifo.c | 15 +++++++-- > 9 files changed, 110 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h > b/drivers/gpu/drm/nouveau/nouveau_drv.h > index f97a1a7..d9d274d 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_drv.h > +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h > @@ -707,6 +707,7 @@ struct drm_nouveau_private { > struct drm_mm heap; > struct nouveau_bo *bo; > } fence; > + wait_queue_head_t fence_wq; > > struct { > spinlock_t lock; > @@ -1656,6 +1657,7 @@ nv44_graph_class(struct drm_device *dev) > #define NV84_SUBCHAN_WRCACHE_FLUSH > 0x00000024 > #define NV10_SUBCHAN_REF_CNT > 0x00000050 > #define NVSW_SUBCHAN_PAGE_FLIP > 0x00000054 > +#define NVSW_SUBCHAN_FENCE_WAKE > 0x00000058 > #define NV11_SUBCHAN_DMA_SEMAPHORE > 0x00000060 > #define NV11_SUBCHAN_SEMAPHORE_OFFSET > 0x00000064 > #define NV11_SUBCHAN_SEMAPHORE_ACQUIRE > 0x00000068 > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c > b/drivers/gpu/drm/nouveau/nouveau_fence.c > index 3c18049..3ba8dee 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fence.c > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c > @@ -68,7 +68,7 @@ nouveau_fence_update(struct nouveau_channel *chan) > > spin_lock(&fctx->lock); > list_for_each_entry_safe(fence, fnext, &fctx->pending, head) { > - if (priv->read(chan) < fence->sequence) > + if (priv->read(chan) - fence->sequence >= 0x80000000U) > break; > > if (fence->work) > @@ -111,11 +111,9 @@ nouveau_fence_done(struct nouveau_fence *fence) > return !fence->channel; > } > > -int > -nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) > +static int nouveau_fence_wait_busy(struct nouveau_fence *fence, bool lazy, > bool intr) > { > unsigned long sleep_time = NSEC_PER_MSEC / 1000; > - ktime_t t; > int ret = 0; > > while (!nouveau_fence_done(fence)) { > @@ -127,7 +125,7 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool > lazy, bool intr) > __set_current_state(intr ? TASK_INTERRUPTIBLE : > TASK_UNINTERRUPTIBLE); > if (lazy) { > - t = ktime_set(0, sleep_time); > + ktime_t t = ktime_set(0, sleep_time); > schedule_hrtimeout(&t, HRTIMER_MODE_REL); > sleep_time *= 2; > if (sleep_time > NSEC_PER_MSEC) > @@ -144,6 +142,47 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool > lazy, bool intr) > return ret; > } > > +static int nouveau_fence_wait_event(struct nouveau_fence *fence, bool intr) > +{ > + struct drm_nouveau_private *dev_priv = > fence->channel->dev->dev_private; > + unsigned long timeout = fence->timeout; > + int ret = 0; > + struct nouveau_channel *chan = dev_priv->channel; > + struct nouveau_channel *prev = fence->channel; > + struct nouveau_fence_priv *priv = nv_engine(chan->dev, > NVOBJ_ENGINE_FENCE); > + > + if (nouveau_fence_done(fence)) > + return 0; > + > + if (!timeout) > + timeout = jiffies + 3 * DRM_HZ; > + > + if (prev != chan) > + ret = priv->sync(fence, prev, chan); > + if (ret) > + goto busy; > + > + if (intr) > + ret = wait_event_interruptible_timeout(dev_priv->fence_wq, > nouveau_fence_done(fence), timeout); > + else > + ret = wait_event_timeout(dev_priv->fence_wq, > nouveau_fence_done(fence), timeout); > + > + return ret; > + > +busy: > + return nouveau_fence_wait_busy(fence, true, intr); > +} > + > +int > +nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) > +{ > + struct drm_nouveau_private *dev_priv = > fence->channel->dev->dev_private; > + if (dev_priv->chipset >= 0x84 && dev_priv->channel && lazy) > + return nouveau_fence_wait_event(fence, intr); > + else > + return nouveau_fence_wait_busy(fence, lazy, intr); > +} > + > int > nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan) > { > diff --git a/drivers/gpu/drm/nouveau/nouveau_fifo.h > b/drivers/gpu/drm/nouveau/nouveau_fifo.h > index ce99cab..942e211 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_fifo.h > +++ b/drivers/gpu/drm/nouveau/nouveau_fifo.h > @@ -15,6 +15,7 @@ int nv04_fifo_fini(struct drm_device *, int, bool); > int nv04_fifo_init(struct drm_device *, int); > void nv04_fifo_isr(struct drm_device *); > void nv04_fifo_destroy(struct drm_device *, int); > +bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid); > > void nv50_fifo_playlist_update(struct drm_device *); > void nv50_fifo_destroy(struct drm_device *, int); > diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c > b/drivers/gpu/drm/nouveau/nouveau_state.c > index 19706f0..6cdfacb 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_state.c > +++ b/drivers/gpu/drm/nouveau/nouveau_state.c > @@ -515,6 +515,7 @@ nouveau_card_channel_init(struct drm_device *dev) > dev_priv->channel = chan; > if (ret) > return ret; > + init_waitqueue_head(&dev_priv->fence_wq); > mutex_unlock(&dev_priv->channel->mutex); > > nouveau_bo_move_init(chan); > diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c > b/drivers/gpu/drm/nouveau/nv04_fifo.c > index a6295cd..1665a82 100644 > --- a/drivers/gpu/drm/nouveau/nv04_fifo.c > +++ b/drivers/gpu/drm/nouveau/nv04_fifo.c > @@ -307,6 +307,26 @@ out: > return handled; > } > > +bool nouveau_fifo_wakeup(struct drm_device *dev, u32 chid) > +{ > + struct nouveau_fifo_priv *pfifo = nv_engine(dev, NVOBJ_ENGINE_FIFO); > + struct drm_nouveau_private *dev_priv = dev->dev_private; > + struct nouveau_channel *chan = NULL; > + bool handled = false; > + unsigned long flags; > + > + spin_lock_irqsave(&dev_priv->channels.lock, flags); > + if (likely(chid >= 0 && chid < pfifo->channels)) { > + chan = dev_priv->channel; > + if (chan->id == chid) { > + wake_up_all(&dev_priv->fence_wq); > + handled = true; > + } > + } > + spin_unlock_irqrestore(&dev_priv->channels.lock, flags); > + return handled; > +} > + > static const char *nv_dma_state_err(u32 state) > { > static const char * const desc[] = { > @@ -448,6 +468,11 @@ nv04_fifo_isr(struct drm_device *dev) > status &= ~0x00000010; > nv_wr32(dev, 0x002100, 0x00000010); > } > + if (status & 0x80000000) { > + nouveau_fifo_wakeup(dev, chid); > + status &= ~0x80000000; > + nv_wr32(dev, 0x002100, 0x80000000); > + } > } > > if (status) { > diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c > b/drivers/gpu/drm/nouveau/nv84_fence.c > index c2f889b..2b8c3e5 100644 > --- a/drivers/gpu/drm/nouveau/nv84_fence.c > +++ b/drivers/gpu/drm/nouveau/nv84_fence.c > @@ -42,15 +42,20 @@ static int > nv84_fence_emit(struct nouveau_fence *fence) > { > struct nouveau_channel *chan = fence->channel; > - int ret = RING_SPACE(chan, 7); > + struct drm_nouveau_private *dev_priv = chan->dev->dev_private; > + bool intr = dev_priv->channel == chan; > + > + int ret = RING_SPACE(chan, 7 + intr); > if (ret == 0) { > BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); > OUT_RING (chan, NvSema); > - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); > + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 + > intr); > OUT_RING (chan, upper_32_bits(chan->id * 16)); > OUT_RING (chan, lower_32_bits(chan->id * 16)); > OUT_RING (chan, fence->sequence); > OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG); > + if (intr) > + OUT_RING (chan, 0); > FIRE_RING (chan); > } > return ret; > @@ -61,15 +66,20 @@ static int > nv84_fence_sync(struct nouveau_fence *fence, > struct nouveau_channel *prev, struct nouveau_channel *chan) > { > - int ret = RING_SPACE(chan, 7); > + struct drm_nouveau_private *dev_priv = chan->dev->dev_private; > + bool intr = dev_priv->channel == chan; > + > + int ret = RING_SPACE(chan, 7 + intr); > if (ret == 0) { > BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1); > OUT_RING (chan, NvSema); > - BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); > + BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4 + > intr); > OUT_RING (chan, upper_32_bits(prev->id * 16)); > OUT_RING (chan, lower_32_bits(prev->id * 16)); > OUT_RING (chan, fence->sequence); > OUT_RING (chan, > NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL); > + if (intr) > + OUT_RING (chan, 0); > FIRE_RING (chan); > } > return ret; > diff --git a/drivers/gpu/drm/nouveau/nvc0_fence.c > b/drivers/gpu/drm/nouveau/nvc0_fence.c > index 47ab388..25805ef 100644 > --- a/drivers/gpu/drm/nouveau/nvc0_fence.c > +++ b/drivers/gpu/drm/nouveau/nvc0_fence.c > @@ -45,15 +45,19 @@ nvc0_fence_emit(struct nouveau_fence *fence) > struct nouveau_channel *chan = fence->channel; > struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE]; > u64 addr = fctx->vma.offset + chan->id * 16; > + struct drm_nouveau_private *dev_priv = chan->dev->dev_private; > + bool intr = dev_priv->channel == chan; > int ret; > > - ret = RING_SPACE(chan, 5); > + ret = RING_SPACE(chan, 5 + intr); > if (ret == 0) { > BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); > OUT_RING (chan, upper_32_bits(addr)); > OUT_RING (chan, lower_32_bits(addr)); > OUT_RING (chan, fence->sequence); > OUT_RING (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG); > + if (intr) > + BEGIN_IMC0(chan, 0, 0x058, 0); > FIRE_RING (chan); > } > > @@ -64,11 +68,13 @@ static int > nvc0_fence_sync(struct nouveau_fence *fence, > struct nouveau_channel *prev, struct nouveau_channel *chan) > { > + struct drm_nouveau_private *dev_priv = chan->dev->dev_private; > struct nvc0_fence_chan *fctx = chan->engctx[NVOBJ_ENGINE_FENCE]; > u64 addr = fctx->vma.offset + prev->id * 16; > + bool intr = dev_priv->channel == chan; > int ret; > > - ret = RING_SPACE(chan, 5); > + ret = RING_SPACE(chan, 5 + intr); > if (ret == 0) { > BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4); > OUT_RING (chan, upper_32_bits(addr)); > @@ -76,6 +82,8 @@ nvc0_fence_sync(struct nouveau_fence *fence, > OUT_RING (chan, fence->sequence); > OUT_RING (chan, > NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL | > NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD); > + if (intr) > + BEGIN_IMC0(chan, 0, NVSW_SUBCHAN_FENCE_WAKE, 0); > FIRE_RING (chan); > } > > diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c > b/drivers/gpu/drm/nouveau/nvc0_fifo.c > index 7d85553..b3930c7 100644 > --- a/drivers/gpu/drm/nouveau/nvc0_fifo.c > +++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c > @@ -356,7 +356,8 @@ nvc0_fifo_isr_subfifo_intr(struct drm_device *dev, int > unit) > if (mthd == 0x0054) { > if (!nvc0_fifo_page_flip(dev, chid)) > show &= ~0x00200000; > - } > + } else if (mthd == 0x0058 && nouveau_fifo_wakeup(dev, chid)) > + show &= ~0x00200000; > } > > if (show) { > diff --git a/drivers/gpu/drm/nouveau/nve0_fifo.c > b/drivers/gpu/drm/nouveau/nve0_fifo.c > index 1855ecb..33e41cd 100644 > --- a/drivers/gpu/drm/nouveau/nve0_fifo.c > +++ b/drivers/gpu/drm/nouveau/nve0_fifo.c > @@ -303,11 +303,18 @@ nve0_fifo_isr_subfifo_intr(struct drm_device *dev, int > unit) > u32 chid = nv_rd32(dev, 0x040120 + (unit * 0x2000)) & 0x7f; > u32 subc = (addr & 0x00070000); > u32 mthd = (addr & 0x00003ffc); > + u32 show = stat; > > - NV_INFO(dev, "PSUBFIFO %d:", unit); > - nouveau_bitfield_print(nve0_fifo_subfifo_intr, stat); > - NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data 0x%08x\n", > - unit, chid, subc, mthd, data); > + if (stat & 0x00200000 && mthd == 0x0058 && > + nouveau_fifo_wakeup(dev, chid)) > + show &= ~0x00200000; > + > + if (show) { > + NV_INFO(dev, "PSUBFIFO %d:", unit); > + nouveau_bitfield_print(nve0_fifo_subfifo_intr, show); > + NV_INFO(dev, "PSUBFIFO %d: ch %d subc %d mthd 0x%04x data > 0x%08x\n", > + unit, chid, subc, mthd, data); > + } > > nv_wr32(dev, 0x0400c0 + (unit * 0x2000), 0x80600008); > nv_wr32(dev, 0x040108 + (unit * 0x2000), stat); > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel -- Far away from the primal instinct, the song seems to fade away, the river get wider between your thoughts and the things we do and say. _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel