Module Name: src Committed By: rin Date: Thu Jul 25 15:12:47 UTC 2019
Modified Files: src/sys/dev/rasops: rasops32.c Log Message: Add width-optimized putchar functions for 32-bpp, that work fine on both little- and big-endian machines. WIP genfb(4) driver for mac68k becomes 1.5 times faster! To generate a diff of this commit: cvs rdiff -u -r1.35 -r1.36 src/sys/dev/rasops/rasops32.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/rasops/rasops32.c diff -u src/sys/dev/rasops/rasops32.c:1.35 src/sys/dev/rasops/rasops32.c:1.36 --- src/sys/dev/rasops/rasops32.c:1.35 Thu Jul 25 03:02:44 2019 +++ src/sys/dev/rasops/rasops32.c Thu Jul 25 15:12:47 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: rasops32.c,v 1.35 2019/07/25 03:02:44 rin Exp $ */ +/* $NetBSD: rasops32.c,v 1.36 2019/07/25 15:12:47 rin Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rasops32.c,v 1.35 2019/07/25 03:02:44 rin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rasops32.c,v 1.36 2019/07/25 15:12:47 rin Exp $"); #include "opt_rasops.h" @@ -44,6 +44,30 @@ __KERNEL_RCSID(0, "$NetBSD: rasops32.c,v static void rasops32_putchar(void *, int, int, u_int, long attr); static void rasops32_putchar_aa(void *, int, int, u_int, long attr); +#ifndef RASOPS_SMALL +static void rasops32_putchar8(void *, int, int, u_int, long); +static void rasops32_putchar12(void *, int, int, u_int, long); +static void rasops32_putchar16(void *, int, int, u_int, long); +static void rasops32_makestamp(struct rasops_info *, long); + +/* + * 4x1 stamp for optimized character blitting + */ +static uint32_t stamp[64]; +static long stamp_attr; +static int stamp_mutex; /* XXX see note in readme */ +#endif + +/* + * offset = STAMP_SHIFT(fontbits, nibble #) & STAMP_MASK + * destination uint32_t[0] = STAMP_READ(offset) + * destination uint32_t[1] = STAMP_READ(offset + 4) + * destination uint32_t[2] = STAMP_READ(offset + 8) + * destination uint32_t[3] = STAMP_READ(offset + 12) + */ +#define STAMP_SHIFT(fb, n) ((n) ? (fb) : (fb) << 4) +#define STAMP_MASK (0xf << 4) +#define STAMP_READ(o) (*(uint32_t *)((char *)stamp + (o))) /* * Initialize a 'rasops_info' descriptor for this depth. @@ -63,8 +87,25 @@ rasops32_init(struct rasops_info *ri) if (FONT_IS_ALPHA(ri->ri_font)) { ri->ri_ops.putchar = rasops32_putchar_aa; - } else + return; + } + + switch (ri->ri_font->fontwidth) { +#ifndef RASOPS_SMALL + case 8: + ri->ri_ops.putchar = rasops32_putchar8; + break; + case 12: + ri->ri_ops.putchar = rasops32_putchar12; + break; + case 16: + ri->ri_ops.putchar = rasops32_putchar16; + break; +#endif + default: ri->ri_ops.putchar = rasops32_putchar; + break; + } } /* @@ -243,3 +284,433 @@ rasops32_putchar_aa(void *cookie, int ro *rp++ = clr[1]; } } + +#ifndef RASOPS_SMALL +/* + * Recompute the blitting stamp. + */ +static void +rasops32_makestamp(struct rasops_info *ri, long attr) +{ + uint32_t fg, bg; + int i; + + fg = ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf]; + bg = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf]; + stamp_attr = attr; + + for (i = 0; i < 64; i += 4) { + stamp[i + 0] = (i & 32 ? fg : bg); + stamp[i + 1] = (i & 16 ? fg : bg); + stamp[i + 2] = (i & 8 ? fg : bg); + stamp[i + 3] = (i & 4 ? fg : bg); + } +} + +/* + * Put a single character. This is for 8-pixel wide fonts. + */ +static void +rasops32_putchar8(void *cookie, int row, int col, u_int uc, long attr) +{ + struct rasops_info *ri = (struct rasops_info *)cookie; + struct wsdisplay_font *font = PICK_FONT(ri, uc); + int height, so, fs; + uint32_t *rp, *hrp = NULL; + uint8_t *fr; + + hrp = NULL; /* XXX GCC */ + +#ifdef RASOPS_CLIPPING + /* Catches 'row < 0' case too */ + if ((unsigned)row >= (unsigned)ri->ri_rows) + return; + + if ((unsigned)col >= (unsigned)ri->ri_cols) + return; +#endif + + /* check if character fits into font limits */ + if (!CHAR_IN_FONT(uc, font)) + return; + + /* Can't risk remaking the stamp if it's already in use */ + if (stamp_mutex++) { + stamp_mutex--; + rasops32_putchar(cookie, row, col, uc, attr); + return; + } + + /* Recompute stamp? */ + if (attr != stamp_attr) + rasops32_makestamp(ri, attr); + + rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale); + if (ri->ri_hwbits) + hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale + + col*ri->ri_xscale); + + height = font->fontheight; + + if (uc == ' ') { + while (height--) { + rp[0] = rp[1] = rp[2] = rp[3] = + rp[4] = rp[5] = rp[6] = rp[7] = stamp[0]; + DELTA(rp, ri->ri_stride, uint32_t *); + if (ri->ri_hwbits) { + hrp[0] = hrp[1] = hrp[2] = hrp[3] = + hrp[4] = hrp[5] = hrp[6] = hrp[7] = stamp[0]; + DELTA(hrp, ri->ri_stride, uint32_t *); + } + } + } else { + fr = FONT_GLYPH(uc, font, ri); + fs = font->stride; + + while (height--) { + uint32_t tmp0, tmp1, tmp2, tmp3; + + so = STAMP_SHIFT(fr[0], 1) & STAMP_MASK; + tmp0 = STAMP_READ(so); + tmp1 = STAMP_READ(so + 4); + tmp2 = STAMP_READ(so + 8); + tmp3 = STAMP_READ(so + 12); + rp[0] = tmp0; + rp[1] = tmp1; + rp[2] = tmp2; + rp[3] = tmp3; + if (ri->ri_hwbits) { + hrp[0] = tmp0; + hrp[1] = tmp1; + hrp[2] = tmp2; + hrp[3] = tmp3; + } + + so = STAMP_SHIFT(fr[0], 0) & STAMP_MASK; + tmp0 = STAMP_READ(so); + tmp1 = STAMP_READ(so + 4); + tmp2 = STAMP_READ(so + 8); + tmp3 = STAMP_READ(so + 12); + rp[4] = tmp0; + rp[5] = tmp1; + rp[6] = tmp2; + rp[7] = tmp3; + if (ri->ri_hwbits) { + hrp[4] = tmp0; + hrp[5] = tmp1; + hrp[6] = tmp2; + hrp[7] = tmp3; + } + + fr += fs; + DELTA(rp, ri->ri_stride, uint32_t *); + if (ri->ri_hwbits) + DELTA(hrp, ri->ri_stride, uint32_t *); + } + } + + /* Do underline */ + if ((attr & WSATTR_UNDERLINE) != 0) { + DELTA(rp, -(ri->ri_stride << 1), uint32_t *); + rp[0] = rp[1] = rp[2] = rp[3] = + rp[4] = rp[5] = rp[6] = rp[7] = stamp[60]; + if (ri->ri_hwbits) + hrp[0] = hrp[1] = hrp[2] = hrp[3] = + hrp[4] = hrp[5] = hrp[6] = hrp[7] = stamp[60]; + } + + stamp_mutex--; +} + +/* + * Put a single character. This is for 12-pixel wide fonts. + */ +static void +rasops32_putchar12(void *cookie, int row, int col, u_int uc, long attr) +{ + struct rasops_info *ri = (struct rasops_info *)cookie; + struct wsdisplay_font *font = PICK_FONT(ri, uc); + int height, so, fs; + uint32_t *rp, *hrp = NULL; + uint8_t *fr; + + hrp = NULL; /* XXX GCC */ + +#ifdef RASOPS_CLIPPING + /* Catches 'row < 0' case too */ + if ((unsigned)row >= (unsigned)ri->ri_rows) + return; + + if ((unsigned)col >= (unsigned)ri->ri_cols) + return; +#endif + + /* check if character fits into font limits */ + if (!CHAR_IN_FONT(uc, font)) + return; + + /* Can't risk remaking the stamp if it's already in use */ + if (stamp_mutex++) { + stamp_mutex--; + rasops32_putchar(cookie, row, col, uc, attr); + return; + } + + /* Recompute stamp? */ + if (attr != stamp_attr) + rasops32_makestamp(ri, attr); + + rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale); + if (ri->ri_hwbits) + hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale + + col*ri->ri_xscale); + + height = font->fontheight; + + if (uc == ' ') { + while (height--) { + rp[0] = rp[1] = rp[2] = rp[3] = + rp[4] = rp[5] = rp[6] = rp[7] = + rp[8] = rp[9] = rp[10] = rp[11] = stamp[0]; + DELTA(rp, ri->ri_stride, uint32_t *); + if (ri->ri_hwbits) { + hrp[0] = hrp[1] = hrp[2] = hrp[3] = + hrp[4] = hrp[5] = hrp[6] = hrp[7] = + hrp[8] = hrp[9] = hrp[10] = hrp[11] = + stamp[0]; + DELTA(hrp, ri->ri_stride, uint32_t *); + } + } + } else { + fr = FONT_GLYPH(uc, font, ri); + fs = font->stride; + + while (height--) { + uint32_t tmp0, tmp1, tmp2, tmp3; + + so = STAMP_SHIFT(fr[0], 1) & STAMP_MASK; + tmp0 = STAMP_READ(so); + tmp1 = STAMP_READ(so + 4); + tmp2 = STAMP_READ(so + 8); + tmp3 = STAMP_READ(so + 12); + rp[0] = tmp0; + rp[1] = tmp1; + rp[2] = tmp2; + rp[3] = tmp3; + if (ri->ri_hwbits) { + hrp[0] = tmp0; + hrp[1] = tmp1; + hrp[2] = tmp2; + hrp[3] = tmp3; + } + + so = STAMP_SHIFT(fr[0], 0) & STAMP_MASK; + tmp0 = STAMP_READ(so); + tmp1 = STAMP_READ(so + 4); + tmp2 = STAMP_READ(so + 8); + tmp3 = STAMP_READ(so + 12); + rp[4] = tmp0; + rp[5] = tmp1; + rp[6] = tmp2; + rp[7] = tmp3; + if (ri->ri_hwbits) { + hrp[4] = tmp0; + hrp[5] = tmp1; + hrp[6] = tmp2; + hrp[7] = tmp3; + } + + so = STAMP_SHIFT(fr[1], 1) & STAMP_MASK; + tmp0 = STAMP_READ(so); + tmp1 = STAMP_READ(so + 4); + tmp2 = STAMP_READ(so + 8); + tmp3 = STAMP_READ(so + 12); + rp[8] = tmp0; + rp[9] = tmp1; + rp[10] = tmp2; + rp[11] = tmp3; + if (ri->ri_hwbits) { + hrp[8] = tmp0; + hrp[9] = tmp1; + hrp[10] = tmp2; + hrp[11] = tmp3; + } + + fr += fs; + DELTA(rp, ri->ri_stride, uint32_t *); + if (ri->ri_hwbits) + DELTA(hrp, ri->ri_stride, uint32_t *); + } + } + + /* Do underline */ + if ((attr & WSATTR_UNDERLINE) != 0) { + DELTA(rp, -(ri->ri_stride << 1), uint32_t *); + rp[0] = rp[1] = rp[2] = rp[3] = + rp[4] = rp[5] = rp[6] = rp[7] = + rp[8] = rp[9] = rp[10] = rp[11] = stamp[60]; + if (ri->ri_hwbits) + hrp[0] = hrp[1] = hrp[2] = hrp[3] = + hrp[4] = hrp[5] = hrp[6] = hrp[7] = + hrp[8] = hrp[9] = hrp[10] = hrp[11] = stamp[60]; + } + + stamp_mutex--; +} + +/* + * Put a single character. This is for 16-pixel wide fonts. + */ +static void +rasops32_putchar16(void *cookie, int row, int col, u_int uc, long attr) +{ + struct rasops_info *ri = (struct rasops_info *)cookie; + struct wsdisplay_font *font = PICK_FONT(ri, uc); + int height, so, fs; + uint32_t *rp, *hrp = NULL; + uint8_t *fr; + + hrp = NULL; /* XXX GCC */ + +#ifdef RASOPS_CLIPPING + /* Catches 'row < 0' case too */ + if ((unsigned)row >= (unsigned)ri->ri_rows) + return; + + if ((unsigned)col >= (unsigned)ri->ri_cols) + return; +#endif + + /* check if character fits into font limits */ + if (!CHAR_IN_FONT(uc, font)) + return; + + /* Can't risk remaking the stamp if it's already in use */ + if (stamp_mutex++) { + stamp_mutex--; + rasops32_putchar(cookie, row, col, uc, attr); + return; + } + + /* Recompute stamp? */ + if (attr != stamp_attr) + rasops32_makestamp(ri, attr); + + rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale); + if (ri->ri_hwbits) + hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale + + col*ri->ri_xscale); + + height = font->fontheight; + + if (uc == ' ') { + while (height--) { + rp[0] = rp[1] = rp[2] = rp[3] = + rp[4] = rp[5] = rp[6] = rp[7] = + rp[8] = rp[9] = rp[10] = rp[11] = + rp[12] = rp[13] = rp[14] = rp[15] = stamp[0]; + DELTA(rp, ri->ri_stride, uint32_t *); + if (ri->ri_hwbits) { + hrp[0] = hrp[1] = hrp[2] = hrp[3] = + hrp[4] = hrp[5] = hrp[6] = hrp[7] = + hrp[8] = hrp[9] = hrp[10] = hrp[11] = + hrp[12] = hrp[13] = hrp[14] = hrp[15] = + stamp[0]; + DELTA(hrp, ri->ri_stride, uint32_t *); + } + } + } else { + fr = FONT_GLYPH(uc, font, ri); + fs = font->stride; + + while (height--) { + uint32_t tmp0, tmp1, tmp2, tmp3; + + so = STAMP_SHIFT(fr[0], 1) & STAMP_MASK; + tmp0 = STAMP_READ(so); + tmp1 = STAMP_READ(so + 4); + tmp2 = STAMP_READ(so + 8); + tmp3 = STAMP_READ(so + 12); + rp[0] = tmp0; + rp[1] = tmp1; + rp[2] = tmp2; + rp[3] = tmp3; + if (ri->ri_hwbits) { + hrp[0] = tmp0; + hrp[1] = tmp1; + hrp[2] = tmp2; + hrp[3] = tmp3; + } + + so = STAMP_SHIFT(fr[0], 0) & STAMP_MASK; + tmp0 = STAMP_READ(so); + tmp1 = STAMP_READ(so + 4); + tmp2 = STAMP_READ(so + 8); + tmp3 = STAMP_READ(so + 12); + rp[4] = tmp0; + rp[5] = tmp1; + rp[6] = tmp2; + rp[7] = tmp3; + if (ri->ri_hwbits) { + hrp[4] = tmp0; + hrp[5] = tmp1; + hrp[6] = tmp2; + hrp[7] = tmp3; + } + + so = STAMP_SHIFT(fr[1], 1) & STAMP_MASK; + tmp0 = STAMP_READ(so); + tmp1 = STAMP_READ(so + 4); + tmp2 = STAMP_READ(so + 8); + tmp3 = STAMP_READ(so + 12); + rp[8] = tmp0; + rp[9] = tmp1; + rp[10] = tmp2; + rp[11] = tmp3; + if (ri->ri_hwbits) { + hrp[8] = tmp0; + hrp[9] = tmp1; + hrp[10] = tmp2; + hrp[11] = tmp3; + } + + so = STAMP_SHIFT(fr[1], 0) & STAMP_MASK; + tmp0 = STAMP_READ(so); + tmp1 = STAMP_READ(so + 4); + tmp2 = STAMP_READ(so + 8); + tmp3 = STAMP_READ(so + 12); + rp[12] = tmp0; + rp[13] = tmp1; + rp[14] = tmp2; + rp[15] = tmp3; + if (ri->ri_hwbits) { + hrp[12] = tmp0; + hrp[13] = tmp1; + hrp[14] = tmp2; + hrp[15] = tmp3; + } + + fr += fs; + DELTA(rp, ri->ri_stride, uint32_t *); + if (ri->ri_hwbits) + DELTA(hrp, ri->ri_stride, uint32_t *); + } + } + + /* Do underline */ + if ((attr & WSATTR_UNDERLINE) != 0) { + DELTA(rp, -(ri->ri_stride << 1), uint32_t *); + rp[0] = rp[1] = rp[2] = rp[3] = + rp[4] = rp[5] = rp[6] = rp[7] = + rp[8] = rp[9] = rp[10] = rp[11] = + rp[12] = rp[13] = rp[14] = rp[15] = stamp[60]; + if (ri->ri_hwbits) + hrp[0] = hrp[1] = hrp[2] = hrp[3] = + hrp[4] = hrp[5] = hrp[6] = hrp[7] = + hrp[8] = hrp[9] = hrp[10] = hrp[11] = + hrp[12] = hrp[13] = hrp[14] = hrp[15] = stamp[60]; + } + + stamp_mutex--; +} +#endif