From: Mikko Perttunen <mperttu...@nvidia.com> Optimize performance of syncpoint interrupt handling by reading the status register in 64-bit chunks when possible, and skipping processing when the read value is zero.
Signed-off-by: Mikko Perttunen <mperttu...@nvidia.com> --- drivers/gpu/host1x/dev.c | 9 +++++++++ drivers/gpu/host1x/dev.h | 3 +++ drivers/gpu/host1x/hw/intr_hw.c | 40 ++++++++++++++++++++++++++++++---------- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 1f93e5e276c0835eac2f713ffcd60a9db8db2c21..80380b6138276877be9709048c15da85d079f977 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -71,6 +71,15 @@ u32 host1x_sync_readl(struct host1x *host1x, u32 r) return readl(sync_regs + r); } +#ifdef CONFIG_64BIT +u64 host1x_sync_readq(struct host1x *host1x, u32 r) +{ + void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; + + return readq(sync_regs + r); +} +#endif + void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) { writel(v, ch->regs + r); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index d3855a1c6b472a9bd289c753d79906463e6bcdb4..ef44618ed88a128bae9ab712bf49f8abc0f3b778 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -179,6 +179,9 @@ void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r); u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r); void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r); u32 host1x_sync_readl(struct host1x *host1x, u32 r); +#ifdef CONFIG_64BIT +u64 host1x_sync_readq(struct host1x *host1x, u32 r); +#endif void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r); u32 host1x_ch_readl(struct host1x_channel *ch, u32 r); diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c index 415f8d7e42021b791550ca719adafa088cd34101..fe45890a9bfb1dfcbc0354f76d625e78e72ee548 100644 --- a/drivers/gpu/host1x/hw/intr_hw.c +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -11,26 +11,46 @@ #include "../intr.h" #include "../dev.h" +static void process_32_syncpts(struct host1x *host, u32 val, u32 reg_offset) +{ + unsigned int id; + + if (!val) + return; + + host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(reg_offset)); + host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(reg_offset)); + + for_each_set_bit(id, (unsigned long *)&val, 32) + host1x_intr_handle_interrupt(host, reg_offset * 32 + id); +} + static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) { struct host1x_intr_irq_data *irq_data = dev_id; struct host1x *host = irq_data->host; unsigned long reg; - unsigned int i, id; + unsigned int i; +#if !defined(CONFIG_64BIT) for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32); i += host->num_syncpt_irqs) { reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); - host1x_sync_writel(host, reg, - HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i)); - host1x_sync_writel(host, reg, - HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + process_32_syncpts(host, reg, i); + } +#else + /* All 64-bit capable SoCs have number of syncpoints divisible by 64 */ + for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 64); + i += host->num_syncpt_irqs) { + reg = host1x_sync_readq(host, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i*2)); - for_each_set_bit(id, ®, 32) - host1x_intr_handle_interrupt(host, i * 32 + id); + process_32_syncpts(host, lower_32_bits(reg), i*2+0); + process_32_syncpts(host, upper_32_bits(reg), i*2+1); } +#endif return IRQ_HANDLED; } @@ -68,12 +88,12 @@ host1x_intr_init_host_sync(struct host1x *host, u32 cpm) /* * Program threshold interrupt destination among 8 lines per VM, - * per syncpoint. For each group of 32 syncpoints (corresponding to one - * interrupt status register), direct to one interrupt line, going + * per syncpoint. For each group of 64 syncpoints (corresponding to two + * interrupt status registers), direct to one interrupt line, going * around in a round robin fashion. */ for (id = 0; id < host->info->nb_pts; id++) { - u32 reg_offset = id / 32; + u32 reg_offset = id / 64; u32 irq_index = reg_offset % host->num_syncpt_irqs; host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id)); --- base-commit: 2aeda9592360c200085898a258c4754bfe879921 change-id: 20250707-host1x-syncpt-irq-perf-a10b1d9313df