Module Name: src Committed By: rin Date: Fri Jul 26 05:24:04 UTC 2019
Modified Files: src/sys/dev/rasops: rasops.c Log Message: Replace manually unrolled loops with memcpy/memmove or simple loops. Modern compilers are smart enough; there's no measurable changes in performance even on MC68040 with optimization level -Os. Also, convert loop of byte-wise copy into memset. To generate a diff of this commit: cvs rdiff -u -r1.89 -r1.90 src/sys/dev/rasops/rasops.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/rasops/rasops.c diff -u src/sys/dev/rasops/rasops.c:1.89 src/sys/dev/rasops/rasops.c:1.90 --- src/sys/dev/rasops/rasops.c:1.89 Fri Jul 26 05:15:47 2019 +++ src/sys/dev/rasops/rasops.c Fri Jul 26 05:24:04 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: rasops.c,v 1.89 2019/07/26 05:15:47 rin Exp $ */ +/* $NetBSD: rasops.c,v 1.90 2019/07/26 05:24:04 rin Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.89 2019/07/26 05:15:47 rin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.90 2019/07/26 05:24:04 rin Exp $"); #include "opt_rasops.h" #include "rasops_glue.h" @@ -617,12 +617,11 @@ rasops_allocattr_mono(void *cookie, int static void rasops_copyrows(void *cookie, int src, int dst, int num) { - uint32_t *sp, *dp, *hp, *srp, *drp, *hrp; - struct rasops_info *ri; - int n8, n1, cnt, delta; + struct rasops_info *ri = (struct rasops_info *)cookie; + uint8_t *sp, *dp, *hp; + int n; - ri = (struct rasops_info *)cookie; - hp = hrp = NULL; + hp = NULL; /* XXX GCC */ #ifdef RASOPS_CLIPPING if (dst == src) @@ -649,72 +648,21 @@ rasops_copyrows(void *cookie, int src, i #endif num *= ri->ri_font->fontheight; - n8 = ri->ri_emustride >> 5; - n1 = (ri->ri_emustride >> 2) & 7; + n = ri->ri_emustride; - if (dst < src) { - srp = (uint32_t *)(ri->ri_bits + src * ri->ri_yscale); - drp = (uint32_t *)(ri->ri_bits + dst * ri->ri_yscale); - if (ri->ri_hwbits) - hrp = (uint32_t *)(ri->ri_hwbits + dst * - ri->ri_yscale); - delta = ri->ri_stride; - } else { - src = ri->ri_font->fontheight * src + num - 1; - dst = ri->ri_font->fontheight * dst + num - 1; - srp = (uint32_t *)(ri->ri_bits + src * ri->ri_stride); - drp = (uint32_t *)(ri->ri_bits + dst * ri->ri_stride); - if (ri->ri_hwbits) - hrp = (uint32_t *)(ri->ri_hwbits + dst * - ri->ri_stride); - - delta = -ri->ri_stride; - } + sp = ri->ri_bits + src * ri->ri_yscale; + dp = ri->ri_bits + dst * ri->ri_yscale; + if (ri->ri_hwbits) + hp = ri->ri_hwbits + dst * ri->ri_yscale; while (num--) { - dp = drp; - sp = srp; - if (ri->ri_hwbits) - hp = hrp; - - DELTA(drp, delta, uint32_t *); - DELTA(srp, delta, uint32_t *); - if (ri->ri_hwbits) - DELTA(hrp, delta, uint32_t *); - - for (cnt = n8; cnt; cnt--) { - dp[0] = sp[0]; - dp[1] = sp[1]; - dp[2] = sp[2]; - dp[3] = sp[3]; - dp[4] = sp[4]; - dp[5] = sp[5]; - dp[6] = sp[6]; - dp[7] = sp[7]; - dp += 8; - sp += 8; - } + memmove(dp, sp, n); + dp += n; if (ri->ri_hwbits) { - sp -= (8 * n8); - for (cnt = n8; cnt; cnt--) { - hp[0] = sp[0]; - hp[1] = sp[1]; - hp[2] = sp[2]; - hp[3] = sp[3]; - hp[4] = sp[4]; - hp[5] = sp[5]; - hp[6] = sp[6]; - hp[7] = sp[7]; - hp += 8; - sp += 8; - } - } - - for (cnt = n1; cnt; cnt--) { - *dp++ = *sp++; - if (ri->ri_hwbits) - *hp++ = *(sp - 1); + memcpy(hp, sp, n); + hp += n; } + sp += n; } } @@ -916,13 +864,11 @@ rasops_unpack_attr(long attr, int *fg, i void rasops_eraserows(void *cookie, int row, int num, long attr) { - struct rasops_info *ri; - int np, nw, cnt, delta; + struct rasops_info *ri = (struct rasops_info *)cookie; uint32_t *dp, *hp, clr; - int i; + int n, cnt, delta; - ri = (struct rasops_info *)cookie; - hp = NULL; + hp = NULL; /* XXX GCC */ #ifdef RASOPS_CLIPPING if (row < 0) { @@ -946,16 +892,14 @@ rasops_eraserows(void *cookie, int row, * the RI_FULLCLEAR flag is set, clear the entire display. */ if (num == ri->ri_rows && (ri->ri_flg & RI_FULLCLEAR) != 0) { - np = ri->ri_stride >> 5; - nw = (ri->ri_stride >> 2) & 7; + n = ri->ri_stride >> 2; num = ri->ri_height; dp = (uint32_t *)ri->ri_origbits; if (ri->ri_hwbits) hp = (uint32_t *)ri->ri_hworigbits; delta = 0; } else { - np = ri->ri_emustride >> 5; - nw = (ri->ri_emustride >> 2) & 7; + n = ri->ri_emustride >> 2; num *= ri->ri_font->fontheight; dp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale); if (ri->ri_hwbits) @@ -965,26 +909,11 @@ rasops_eraserows(void *cookie, int row, } while (num--) { - for (cnt = np; cnt; cnt--) { - for (i = 0; i < 8; i++) { - dp[i] = clr; - if (ri->ri_hwbits) - hp[i] = clr; - } - dp += 8; + for (cnt = n; cnt; cnt--) { + *dp++ = clr; if (ri->ri_hwbits) - hp += 8; - } - - for (cnt = nw; cnt; cnt--) { - *(uint32_t *)dp = clr; - DELTA(dp, 4, uint32_t *); - if (ri->ri_hwbits) { - *(uint32_t *)hp = clr; - DELTA(hp, 4, uint32_t *); - } + *hp++ = clr; } - DELTA(dp, delta, uint32_t *); if (ri->ri_hwbits) DELTA(hp, delta, uint32_t *); @@ -1112,13 +1041,11 @@ rasops_do_cursor(struct rasops_info *ri) void rasops_erasecols(void *cookie, int row, int col, int num, long attr) { - int n8, height, cnt, slop1, slop2, clr; - struct rasops_info *ri; + struct rasops_info *ri = (struct rasops_info *)cookie; + int height, cnt, slop1, slop2, clr; uint32_t *rp, *dp, *hrp, *hp; - int i; - ri = (struct rasops_info *)cookie; - hrp = hp = NULL; + hrp = hp = NULL; /* XXX GCC */ #ifdef RASOPS_CLIPPING if ((unsigned)row >= (unsigned)ri->ri_rows) @@ -1136,7 +1063,7 @@ rasops_erasecols(void *cookie, int row, return; #endif - num = num * ri->ri_xscale; + num *= ri->ri_xscale; rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale); if (ri->ri_hwbits) hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale + @@ -1190,32 +1117,21 @@ rasops_erasecols(void *cookie, int row, } } else { while (height--) { - dp = rp; + memset(rp, clr, num); DELTA(rp, ri->ri_stride, uint32_t *); if (ri->ri_hwbits) { - hp = hrp; + memset(hrp, clr, num); DELTA(hrp, ri->ri_stride, uint32_t *); } - - for (cnt = num; cnt; cnt--) { - *(uint8_t *)dp = clr; - DELTA(dp, 1, uint32_t *); - if (ri->ri_hwbits) { - *(uint8_t *)hp = clr; - DELTA(hp, 1, uint32_t *); - } - } } } return; } - slop1 = (4 - ((long)rp & 3)) & 3; + slop1 = (4 - ((uintptr_t)rp & 3)) & 3; slop2 = (num - slop1) & 3; - num -= slop1 + slop2; - n8 = num >> 5; - num = (num >> 2) & 7; + num = (num - slop1 /* - slop2 */) >> 2; while (height--) { dp = rp; @@ -1244,18 +1160,6 @@ rasops_erasecols(void *cookie, int row, } } - /* Write 32 bytes per loop */ - for (cnt = n8; cnt; cnt--) { - for (i = 0; i < 8; i++) { - dp[i] = clr; - if (ri->ri_hwbits) - hp[i] = clr; - } - dp += 8; - if (ri->ri_hwbits) - hp += 8; - } - /* Write 4 bytes per loop */ for (cnt = num; cnt; cnt--) { *dp++ = clr;