The bandwidth between system memory and VRAM is very limited
on G200.
So when using a 32bit framebuffer on system memory, convert it to 24bit
when copying the frame to the VRAM, this allows to go 33% faster.
Converting the format on the fly is negligible, even on low end CPU.

small benchmark on my Dell T310:
1280x1024 32bits: ~125ms to transfert a single frame.
1280x1024 24bits: ~95ms

Signed-off-by: Jocelyn Falempe <jfale...@redhat.com>
---
 drivers/gpu/drm/mgag200/mgag200_mode.c | 28 ++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c 
b/drivers/gpu/drm/mgag200/mgag200_mode.c
index e3f0da338b95..a8d6b08bf959 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -289,6 +289,8 @@ void mgag200_set_mode_regs(struct mga_device *mdev, const 
struct drm_display_mod
 static u32 mgag200_calculate_offset(struct mga_device *mdev,
                                    const struct drm_framebuffer *fb)
 {
+       if (fb->format->format == DRM_FORMAT_XRGB8888)
+               return (fb->pitches[0] * 3) >> 6;
        return fb->pitches[0] >> 4;
 }
 
@@ -314,17 +316,16 @@ void mgag200_set_format_regs(struct mga_device *mdev, 
const struct drm_format_in
        struct drm_device *dev = &mdev->base;
        unsigned int scale;
        u8 crtcext3, xmulctrl;
+       u8 cpp;
 
        switch (format->format) {
        case DRM_FORMAT_RGB565:
                xmulctrl = MGA1064_MUL_CTL_16bits;
                break;
+       case DRM_FORMAT_XRGB8888: /* use 24bit format in VRAM */
        case DRM_FORMAT_RGB888:
                xmulctrl = MGA1064_MUL_CTL_24bits;
                break;
-       case DRM_FORMAT_XRGB8888:
-               xmulctrl = MGA1064_MUL_CTL_32_24bits;
-               break;
        default:
                /* BUG: We should have caught this problem already. */
                drm_WARN_ON(dev, "invalid drm format\n");
@@ -346,8 +347,12 @@ void mgag200_set_format_regs(struct mga_device *mdev, 
const struct drm_format_in
        WREG_GFX(7, 0x0f);
        WREG_GFX(8, 0x0f);
 
+       cpp = format->cpp[0];
+       if (cpp == 4) /* use 24 bit format in VRAM */
+               cpp = 3;
+
        /* scale is the number of bytes per pixels - 1 */
-       scale = format->cpp[0] - 1;
+       scale = cpp - 1;
 
        RREG_ECRT(3, crtcext3);
        crtcext3 &= ~GENMASK(2, 0);
@@ -403,8 +408,19 @@ static void mgag200_handle_damage(struct mga_device *mdev, 
const struct iosys_ma
 {
        struct iosys_map dst = IOSYS_MAP_INIT_VADDR_IOMEM(mdev->vram);
 
-       iosys_map_incr(&dst, drm_fb_clip_offset(fb->pitches[0], fb->format, 
clip));
-       drm_fb_memcpy(&dst, fb->pitches, vmap, fb, clip);
+       if (fb->format->format == DRM_FORMAT_XRGB8888) {
+               /* use 24 bit format for VRAM, to save memory bandwidth,
+                * converting on the fly is much faster than sending the bytes
+                */
+               u32 dst_pitch[3] = {(fb->pitches[0] * 3) / 4,
+                                   (fb->pitches[1] * 3) / 4,
+                                   (fb->pitches[2] * 3) / 4};
+               iosys_map_incr(&dst, clip->y1 * dst_pitch[0] + clip->x1 * 3);
+               drm_fb_xrgb8888_to_rgb888(&dst, dst_pitch, vmap, fb, clip);
+       } else {
+               iosys_map_incr(&dst, drm_fb_clip_offset(fb->pitches[0], 
fb->format, clip));
+               drm_fb_memcpy(&dst, fb->pitches, vmap, fb, clip);
+       }
 }
 
 /*
-- 
2.39.2

Reply via email to