Module Name:    src
Committed By:   rin
Date:           Thu Jul 25 15:12:47 UTC 2019

Modified Files:
        src/sys/dev/rasops: rasops32.c

Log Message:
Add width-optimized putchar functions for 32-bpp, that work fine on
both little- and big-endian machines.

WIP genfb(4) driver for mac68k becomes 1.5 times faster!


To generate a diff of this commit:
cvs rdiff -u -r1.35 -r1.36 src/sys/dev/rasops/rasops32.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/rasops/rasops32.c
diff -u src/sys/dev/rasops/rasops32.c:1.35 src/sys/dev/rasops/rasops32.c:1.36
--- src/sys/dev/rasops/rasops32.c:1.35	Thu Jul 25 03:02:44 2019
+++ src/sys/dev/rasops/rasops32.c	Thu Jul 25 15:12:47 2019
@@ -1,4 +1,4 @@
-/*	 $NetBSD: rasops32.c,v 1.35 2019/07/25 03:02:44 rin Exp $	*/
+/*	 $NetBSD: rasops32.c,v 1.36 2019/07/25 15:12:47 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops32.c,v 1.35 2019/07/25 03:02:44 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops32.c,v 1.36 2019/07/25 15:12:47 rin Exp $");
 
 #include "opt_rasops.h"
 
@@ -44,6 +44,30 @@ __KERNEL_RCSID(0, "$NetBSD: rasops32.c,v
 
 static void 	rasops32_putchar(void *, int, int, u_int, long attr);
 static void 	rasops32_putchar_aa(void *, int, int, u_int, long attr);
+#ifndef RASOPS_SMALL
+static void	rasops32_putchar8(void *, int, int, u_int, long);
+static void	rasops32_putchar12(void *, int, int, u_int, long);
+static void	rasops32_putchar16(void *, int, int, u_int, long);
+static void	rasops32_makestamp(struct rasops_info *, long);
+
+/*
+ * 4x1 stamp for optimized character blitting
+ */
+static uint32_t	stamp[64];
+static long	stamp_attr;
+static int	stamp_mutex;	/* XXX see note in readme */
+#endif
+
+/*
+ * offset = STAMP_SHIFT(fontbits, nibble #) & STAMP_MASK
+ * destination uint32_t[0] = STAMP_READ(offset)
+ * destination uint32_t[1] = STAMP_READ(offset +  4)
+ * destination uint32_t[2] = STAMP_READ(offset +  8)
+ * destination uint32_t[3] = STAMP_READ(offset + 12)
+ */
+#define	STAMP_SHIFT(fb, n)	((n) ? (fb) : (fb) << 4)
+#define	STAMP_MASK		(0xf << 4)
+#define	STAMP_READ(o)		(*(uint32_t *)((char *)stamp + (o)))
 
 /*
  * Initialize a 'rasops_info' descriptor for this depth.
@@ -63,8 +87,25 @@ rasops32_init(struct rasops_info *ri)
 
 	if (FONT_IS_ALPHA(ri->ri_font)) {
 		ri->ri_ops.putchar = rasops32_putchar_aa;
-	} else
+		return;
+	}
+
+	switch (ri->ri_font->fontwidth) {
+#ifndef RASOPS_SMALL
+	case 8:
+		ri->ri_ops.putchar = rasops32_putchar8;
+		break;
+	case 12:
+		ri->ri_ops.putchar = rasops32_putchar12;
+		break;
+	case 16:
+		ri->ri_ops.putchar = rasops32_putchar16;
+		break;
+#endif
+	default:
 		ri->ri_ops.putchar = rasops32_putchar;
+		break;
+	}
 }
 
 /*
@@ -243,3 +284,433 @@ rasops32_putchar_aa(void *cookie, int ro
 			*rp++ = clr[1];
 	}
 }
+
+#ifndef RASOPS_SMALL
+/*
+ * Recompute the blitting stamp.
+ */
+static void
+rasops32_makestamp(struct rasops_info *ri, long attr)
+{
+	uint32_t fg, bg;
+	int i;
+
+	fg = ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf];
+	bg = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf];
+	stamp_attr = attr;
+
+	for (i = 0; i < 64; i += 4) {
+		stamp[i + 0] = (i & 32 ? fg : bg);
+		stamp[i + 1] = (i & 16 ? fg : bg);
+		stamp[i + 2] = (i & 8 ? fg : bg);
+		stamp[i + 3] = (i & 4 ? fg : bg);
+	}
+}
+
+/*
+ * Put a single character. This is for 8-pixel wide fonts.
+ */
+static void
+rasops32_putchar8(void *cookie, int row, int col, u_int uc, long attr)
+{
+	struct rasops_info *ri = (struct rasops_info *)cookie;
+	struct wsdisplay_font *font = PICK_FONT(ri, uc);
+	int height, so, fs;
+	uint32_t *rp, *hrp = NULL;
+	uint8_t *fr;
+
+	hrp = NULL; /* XXX GCC */
+
+#ifdef RASOPS_CLIPPING
+	/* Catches 'row < 0' case too */
+	if ((unsigned)row >= (unsigned)ri->ri_rows)
+		return;
+
+	if ((unsigned)col >= (unsigned)ri->ri_cols)
+		return;
+#endif
+
+	/* check if character fits into font limits */
+	if (!CHAR_IN_FONT(uc, font))
+		return;
+
+	/* Can't risk remaking the stamp if it's already in use */
+	if (stamp_mutex++) {
+		stamp_mutex--;
+		rasops32_putchar(cookie, row, col, uc, attr);
+		return;
+	}
+
+	/* Recompute stamp? */
+	if (attr != stamp_attr)
+		rasops32_makestamp(ri, attr);
+
+	rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale);
+	if (ri->ri_hwbits)
+		hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
+		    col*ri->ri_xscale);
+
+	height = font->fontheight;
+
+	if (uc == ' ') {
+		while (height--) {
+			rp[0] = rp[1] = rp[2] = rp[3] =
+			rp[4] = rp[5] = rp[6] = rp[7] = stamp[0];
+			DELTA(rp, ri->ri_stride, uint32_t *);
+			if (ri->ri_hwbits) {
+				hrp[0] = hrp[1] = hrp[2] = hrp[3] =
+				hrp[4] = hrp[5] = hrp[6] = hrp[7] = stamp[0];
+				DELTA(hrp, ri->ri_stride, uint32_t *);
+			}
+		}
+	} else {
+		fr = FONT_GLYPH(uc, font, ri);
+		fs = font->stride;
+
+		while (height--) {
+			uint32_t tmp0, tmp1, tmp2, tmp3;
+
+			so = STAMP_SHIFT(fr[0], 1) & STAMP_MASK;
+			tmp0 = STAMP_READ(so);
+			tmp1 = STAMP_READ(so +  4);
+			tmp2 = STAMP_READ(so +  8);
+			tmp3 = STAMP_READ(so + 12);
+			rp[0] = tmp0;
+			rp[1] = tmp1;
+			rp[2] = tmp2;
+			rp[3] = tmp3;
+			if (ri->ri_hwbits) {
+				hrp[0] = tmp0;
+				hrp[1] = tmp1;
+				hrp[2] = tmp2;
+				hrp[3] = tmp3;
+			}
+
+			so = STAMP_SHIFT(fr[0], 0) & STAMP_MASK;
+			tmp0 = STAMP_READ(so);
+			tmp1 = STAMP_READ(so +  4);
+			tmp2 = STAMP_READ(so +  8);
+			tmp3 = STAMP_READ(so + 12);
+			rp[4] = tmp0;
+			rp[5] = tmp1;
+			rp[6] = tmp2;
+			rp[7] = tmp3;
+			if (ri->ri_hwbits) {
+				hrp[4] = tmp0;
+				hrp[5] = tmp1;
+				hrp[6] = tmp2;
+				hrp[7] = tmp3;
+			}
+
+			fr += fs;
+			DELTA(rp, ri->ri_stride, uint32_t *);
+			if (ri->ri_hwbits)
+				DELTA(hrp, ri->ri_stride, uint32_t *);
+		}
+	}
+
+	/* Do underline */
+	if ((attr & WSATTR_UNDERLINE) != 0) {
+		DELTA(rp, -(ri->ri_stride << 1), uint32_t *);
+		rp[0] = rp[1] = rp[2] = rp[3] =
+		rp[4] = rp[5] = rp[6] = rp[7] = stamp[60];
+		if (ri->ri_hwbits)
+			hrp[0] = hrp[1] = hrp[2] = hrp[3] =
+			hrp[4] = hrp[5] = hrp[6] = hrp[7] = stamp[60];
+	}
+
+	stamp_mutex--;
+}
+
+/*
+ * Put a single character. This is for 12-pixel wide fonts.
+ */
+static void
+rasops32_putchar12(void *cookie, int row, int col, u_int uc, long attr)
+{
+	struct rasops_info *ri = (struct rasops_info *)cookie;
+	struct wsdisplay_font *font = PICK_FONT(ri, uc);
+	int height, so, fs;
+	uint32_t *rp, *hrp = NULL;
+	uint8_t *fr;
+
+	hrp = NULL; /* XXX GCC */
+
+#ifdef RASOPS_CLIPPING
+	/* Catches 'row < 0' case too */
+	if ((unsigned)row >= (unsigned)ri->ri_rows)
+		return;
+
+	if ((unsigned)col >= (unsigned)ri->ri_cols)
+		return;
+#endif
+
+	/* check if character fits into font limits */
+	if (!CHAR_IN_FONT(uc, font))
+		return;
+
+	/* Can't risk remaking the stamp if it's already in use */
+	if (stamp_mutex++) {
+		stamp_mutex--;
+		rasops32_putchar(cookie, row, col, uc, attr);
+		return;
+	}
+
+	/* Recompute stamp? */
+	if (attr != stamp_attr)
+		rasops32_makestamp(ri, attr);
+
+	rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale);
+	if (ri->ri_hwbits)
+		hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
+		    col*ri->ri_xscale);
+
+	height = font->fontheight;
+
+	if (uc == ' ') {
+		while (height--) {
+			rp[0]  = rp[1]  = rp[2]  = rp[3]  =
+			rp[4]  = rp[5]  = rp[6]  = rp[7]  =
+			rp[8]  = rp[9]  = rp[10] = rp[11] = stamp[0];
+			DELTA(rp, ri->ri_stride, uint32_t *);
+			if (ri->ri_hwbits) {
+				hrp[0]  = hrp[1]  = hrp[2]  = hrp[3]  =
+				hrp[4]  = hrp[5]  = hrp[6]  = hrp[7]  =
+				hrp[8]  = hrp[9]  = hrp[10] = hrp[11] =
+				    stamp[0];
+				DELTA(hrp, ri->ri_stride, uint32_t *);
+			}
+		}
+	} else {
+		fr = FONT_GLYPH(uc, font, ri);
+		fs = font->stride;
+
+		while (height--) {
+			uint32_t tmp0, tmp1, tmp2, tmp3;
+
+			so = STAMP_SHIFT(fr[0], 1) & STAMP_MASK;
+			tmp0 = STAMP_READ(so);
+			tmp1 = STAMP_READ(so +  4);
+			tmp2 = STAMP_READ(so +  8);
+			tmp3 = STAMP_READ(so + 12);
+			rp[0] = tmp0;
+			rp[1] = tmp1;
+			rp[2] = tmp2;
+			rp[3] = tmp3;
+			if (ri->ri_hwbits) {
+				hrp[0] = tmp0;
+				hrp[1] = tmp1;
+				hrp[2] = tmp2;
+				hrp[3] = tmp3;
+			}
+
+			so = STAMP_SHIFT(fr[0], 0) & STAMP_MASK;
+			tmp0 = STAMP_READ(so);
+			tmp1 = STAMP_READ(so +  4);
+			tmp2 = STAMP_READ(so +  8);
+			tmp3 = STAMP_READ(so + 12);
+			rp[4] = tmp0;
+			rp[5] = tmp1;
+			rp[6] = tmp2;
+			rp[7] = tmp3;
+			if (ri->ri_hwbits) {
+				hrp[4] = tmp0;
+				hrp[5] = tmp1;
+				hrp[6] = tmp2;
+				hrp[7] = tmp3;
+			}
+
+			so = STAMP_SHIFT(fr[1], 1) & STAMP_MASK;
+			tmp0 = STAMP_READ(so);
+			tmp1 = STAMP_READ(so +  4);
+			tmp2 = STAMP_READ(so +  8);
+			tmp3 = STAMP_READ(so + 12);
+			rp[8] = tmp0;
+			rp[9] = tmp1;
+			rp[10] = tmp2;
+			rp[11] = tmp3;
+			if (ri->ri_hwbits) {
+				hrp[8] = tmp0;
+				hrp[9] = tmp1;
+				hrp[10] = tmp2;
+				hrp[11] = tmp3;
+			}
+
+			fr += fs;
+			DELTA(rp, ri->ri_stride, uint32_t *);
+			if (ri->ri_hwbits)
+				DELTA(hrp, ri->ri_stride, uint32_t *);
+		}
+	}
+
+	/* Do underline */
+	if ((attr & WSATTR_UNDERLINE) != 0) {
+		DELTA(rp, -(ri->ri_stride << 1), uint32_t *);
+		rp[0]  = rp[1]  = rp[2]  = rp[3]  =
+		rp[4]  = rp[5]  = rp[6]  = rp[7]  =
+		rp[8]  = rp[9]  = rp[10] = rp[11] = stamp[60];
+		if (ri->ri_hwbits)
+			hrp[0]  = hrp[1]  = hrp[2]  = hrp[3]  =
+			hrp[4]  = hrp[5]  = hrp[6]  = hrp[7]  =
+			hrp[8]  = hrp[9]  = hrp[10] = hrp[11] = stamp[60];
+	}
+
+	stamp_mutex--;
+}
+
+/*
+ * Put a single character. This is for 16-pixel wide fonts.
+ */
+static void
+rasops32_putchar16(void *cookie, int row, int col, u_int uc, long attr)
+{
+	struct rasops_info *ri = (struct rasops_info *)cookie;
+	struct wsdisplay_font *font = PICK_FONT(ri, uc);
+	int height, so, fs;
+	uint32_t *rp, *hrp = NULL;
+	uint8_t *fr;
+
+	hrp = NULL; /* XXX GCC */
+
+#ifdef RASOPS_CLIPPING
+	/* Catches 'row < 0' case too */
+	if ((unsigned)row >= (unsigned)ri->ri_rows)
+		return;
+
+	if ((unsigned)col >= (unsigned)ri->ri_cols)
+		return;
+#endif
+
+	/* check if character fits into font limits */
+	if (!CHAR_IN_FONT(uc, font))
+		return;
+
+	/* Can't risk remaking the stamp if it's already in use */
+	if (stamp_mutex++) {
+		stamp_mutex--;
+		rasops32_putchar(cookie, row, col, uc, attr);
+		return;
+	}
+
+	/* Recompute stamp? */
+	if (attr != stamp_attr)
+		rasops32_makestamp(ri, attr);
+
+	rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale);
+	if (ri->ri_hwbits)
+		hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
+		    col*ri->ri_xscale);
+
+	height = font->fontheight;
+
+	if (uc == ' ') {
+		while (height--) {
+			rp[0]  = rp[1]  = rp[2]  = rp[3]  =
+			rp[4]  = rp[5]  = rp[6]  = rp[7]  =
+			rp[8]  = rp[9]  = rp[10] = rp[11] =
+			rp[12] = rp[13] = rp[14] = rp[15] = stamp[0];
+			DELTA(rp, ri->ri_stride, uint32_t *);
+			if (ri->ri_hwbits) {
+				hrp[0]  = hrp[1]  = hrp[2]  = hrp[3]  =
+				hrp[4]  = hrp[5]  = hrp[6]  = hrp[7]  =
+				hrp[8]  = hrp[9]  = hrp[10] = hrp[11] =
+				hrp[12] = hrp[13] = hrp[14] = hrp[15] =
+				    stamp[0];
+				DELTA(hrp, ri->ri_stride, uint32_t *);
+			}
+		}
+	} else {
+		fr = FONT_GLYPH(uc, font, ri);
+		fs = font->stride;
+
+		while (height--) {
+			uint32_t tmp0, tmp1, tmp2, tmp3;
+
+			so = STAMP_SHIFT(fr[0], 1) & STAMP_MASK;
+			tmp0 = STAMP_READ(so);
+			tmp1 = STAMP_READ(so +  4);
+			tmp2 = STAMP_READ(so +  8);
+			tmp3 = STAMP_READ(so + 12);
+			rp[0] = tmp0;
+			rp[1] = tmp1;
+			rp[2] = tmp2;
+			rp[3] = tmp3;
+			if (ri->ri_hwbits) {
+				hrp[0] = tmp0;
+				hrp[1] = tmp1;
+				hrp[2] = tmp2;
+				hrp[3] = tmp3;
+			}
+
+			so = STAMP_SHIFT(fr[0], 0) & STAMP_MASK;
+			tmp0 = STAMP_READ(so);
+			tmp1 = STAMP_READ(so +  4);
+			tmp2 = STAMP_READ(so +  8);
+			tmp3 = STAMP_READ(so + 12);
+			rp[4] = tmp0;
+			rp[5] = tmp1;
+			rp[6] = tmp2;
+			rp[7] = tmp3;
+			if (ri->ri_hwbits) {
+				hrp[4] = tmp0;
+				hrp[5] = tmp1;
+				hrp[6] = tmp2;
+				hrp[7] = tmp3;
+			}
+
+			so = STAMP_SHIFT(fr[1], 1) & STAMP_MASK;
+			tmp0 = STAMP_READ(so);
+			tmp1 = STAMP_READ(so +  4);
+			tmp2 = STAMP_READ(so +  8);
+			tmp3 = STAMP_READ(so + 12);
+			rp[8] = tmp0;
+			rp[9] = tmp1;
+			rp[10] = tmp2;
+			rp[11] = tmp3;
+			if (ri->ri_hwbits) {
+				hrp[8] = tmp0;
+				hrp[9] = tmp1;
+				hrp[10] = tmp2;
+				hrp[11] = tmp3;
+			}
+
+			so = STAMP_SHIFT(fr[1], 0) & STAMP_MASK;
+			tmp0 = STAMP_READ(so);
+			tmp1 = STAMP_READ(so +  4);
+			tmp2 = STAMP_READ(so +  8);
+			tmp3 = STAMP_READ(so + 12);
+			rp[12] = tmp0;
+			rp[13] = tmp1;
+			rp[14] = tmp2;
+			rp[15] = tmp3;
+			if (ri->ri_hwbits) {
+				hrp[12] = tmp0;
+				hrp[13] = tmp1;
+				hrp[14] = tmp2;
+				hrp[15] = tmp3;
+			}
+
+			fr += fs;
+			DELTA(rp, ri->ri_stride, uint32_t *);
+			if (ri->ri_hwbits)
+				DELTA(hrp, ri->ri_stride, uint32_t *);
+		}
+	}
+
+	/* Do underline */
+	if ((attr & WSATTR_UNDERLINE) != 0) {
+		DELTA(rp, -(ri->ri_stride << 1), uint32_t *);
+		rp[0]  = rp[1]  = rp[2]  = rp[3]  =
+		rp[4]  = rp[5]  = rp[6]  = rp[7]  =
+		rp[8]  = rp[9]  = rp[10] = rp[11] =
+		rp[12] = rp[13] = rp[14] = rp[15] = stamp[60];
+		if (ri->ri_hwbits)
+			hrp[0]  = hrp[1]  = hrp[2]  = hrp[3]  =
+			hrp[4]  = hrp[5]  = hrp[6]  = hrp[7]  =
+			hrp[8]  = hrp[9]  = hrp[10] = hrp[11] =
+			hrp[12] = hrp[13] = hrp[14] = hrp[15] = stamp[60];
+	}
+
+	stamp_mutex--;
+}
+#endif

Reply via email to