Am 20.04.2014 19:29, schrieb Lauri Kasanen: > This was originally un-inlined by Andi Kleen in 2011 citing size concerns. > Indeed, a first attempt at inlining it grew radeon.ko by 7%. > > However, 2% of cpu is spent in this function. Simply inlining it gave 1% more > fps > in Urban Terror. > > v2: We know the minimum MMIO size. Adding it to the if allows the compiler to > optimize the branch out, improving both performance and size. > > The v2 patch decreases radeon.ko size by 2%. I didn't re-benchmark, but > common sense > says perf is now more than 1% better. > > v3: Also change _wreg, make the threshold a define. > > Inlining _wreg increased the size a bit compared to v2, so now radeon.ko > is only 1% smaller. > > Signed-off-by: Lauri Kasanen <cand at gmx.com>
Reviewed-by: Christian K?nig <christian.koenig at amd.com> > --- > drivers/gpu/drm/radeon/r100.c | 33 --------------------------------- > drivers/gpu/drm/radeon/radeon.h | 40 > ++++++++++++++++++++++++++++++++++++---- > 2 files changed, 36 insertions(+), 37 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c > index b6c3264..a4e7871 100644 > --- a/drivers/gpu/drm/radeon/r100.c > +++ b/drivers/gpu/drm/radeon/r100.c > @@ -4086,39 +4086,6 @@ int r100_init(struct radeon_device *rdev) > return 0; > } > > -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, > - bool always_indirect) > -{ > - if (reg < rdev->rmmio_size && !always_indirect) > - return readl(((void __iomem *)rdev->rmmio) + reg); > - else { > - unsigned long flags; > - uint32_t ret; > - > - spin_lock_irqsave(&rdev->mmio_idx_lock, flags); > - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); > - ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); > - spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); > - > - return ret; > - } > -} > - > -void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, > - bool always_indirect) > -{ > - if (reg < rdev->rmmio_size && !always_indirect) > - writel(v, ((void __iomem *)rdev->rmmio) + reg); > - else { > - unsigned long flags; > - > - spin_lock_irqsave(&rdev->mmio_idx_lock, flags); > - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); > - writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); > - spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); > - } > -} > - > u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) > { > if (reg < rdev->rio_mem_size) > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index f21db7a..a749b6c 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -2328,10 +2328,42 @@ int radeon_device_init(struct radeon_device *rdev, > void radeon_device_fini(struct radeon_device *rdev); > int radeon_gpu_wait_for_idle(struct radeon_device *rdev); > > -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, > - bool always_indirect); > -void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, > - bool always_indirect); > +#define RADEON_MIN_MMIO_SIZE 0x10000 > + > +static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, > + bool always_indirect) > +{ > + /* The mmio size is 64kb at minimum. Allows the if to be optimized out. > */ > + if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && > !always_indirect) > + return readl(((void __iomem *)rdev->rmmio) + reg); > + else { > + unsigned long flags; > + uint32_t ret; > + > + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); > + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); > + ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); > + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); > + > + return ret; > + } > +} > + > +static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, > uint32_t v, > + bool always_indirect) > +{ > + if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && > !always_indirect) > + writel(v, ((void __iomem *)rdev->rmmio) + reg); > + else { > + unsigned long flags; > + > + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); > + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); > + writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); > + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); > + } > +} > + > u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); > void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); >