Module Name:    src
Committed By:   rin
Date:           Tue Jul 30 15:29:40 UTC 2019

Modified Files:
        src/sys/dev/rasops: rasops.c rasops1.c rasops_bitops.h rasops_putchar.h
            rasops_putchar_aa.h rasops_putchar_width.h

Log Message:
Try to improve performance when shadow framebuffer is present;
Use block copy from shadow fb to real fb, instead of repeating
the same operations to two fb's.


To generate a diff of this commit:
cvs rdiff -u -r1.100 -r1.101 src/sys/dev/rasops/rasops.c
cvs rdiff -u -r1.30 -r1.31 src/sys/dev/rasops/rasops1.c
cvs rdiff -u -r1.17 -r1.18 src/sys/dev/rasops/rasops_bitops.h
cvs rdiff -u -r1.4 -r1.5 src/sys/dev/rasops/rasops_putchar.h
cvs rdiff -u -r1.3 -r1.4 src/sys/dev/rasops/rasops_putchar_aa.h
cvs rdiff -u -r1.8 -r1.9 src/sys/dev/rasops/rasops_putchar_width.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/rasops/rasops.c
diff -u src/sys/dev/rasops/rasops.c:1.100 src/sys/dev/rasops/rasops.c:1.101
--- src/sys/dev/rasops/rasops.c:1.100	Tue Jul 30 14:41:10 2019
+++ src/sys/dev/rasops/rasops.c	Tue Jul 30 15:29:40 2019
@@ -1,4 +1,4 @@
-/*	 $NetBSD: rasops.c,v 1.100 2019/07/30 14:41:10 rin Exp $	*/
+/*	 $NetBSD: rasops.c,v 1.101 2019/07/30 15:29:40 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.100 2019/07/30 14:41:10 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops.c,v 1.101 2019/07/30 15:29:40 rin Exp $");
 
 #include "opt_rasops.h"
 #include "rasops_glue.h"
@@ -950,8 +950,8 @@ void
 rasops_eraserows(void *cookie, int row, int num, long attr)
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
-	uint32_t *dp, *hp, clr;
-	int n, cnt, delta;
+	uint32_t *rp, *dp, *hp, clr;
+	int n, cnt;
 
 	hp = NULL;	/* XXX GCC */
 
@@ -979,29 +979,26 @@ rasops_eraserows(void *cookie, int row, 
 	if (num == ri->ri_rows && (ri->ri_flg & RI_FULLCLEAR) != 0) {
 		n = ri->ri_stride >> 2;
 		num = ri->ri_height;
-		dp = (uint32_t *)ri->ri_origbits;
+		rp = (uint32_t *)ri->ri_origbits;
 		if (ri->ri_hwbits)
 			hp = (uint32_t *)ri->ri_hworigbits;
-		delta = 0;
 	} else {
 		n = ri->ri_emustride >> 2;
 		num *= ri->ri_font->fontheight;
-		dp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale);
+		rp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale);
 		if (ri->ri_hwbits)
-			hp = (uint32_t *)(ri->ri_hwbits + row *
-			    ri->ri_yscale);
-		delta = ri->ri_delta;
+			hp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale);
 	}
 
 	while (num--) {
-		for (cnt = n; cnt; cnt--) {
+		dp = rp;
+		for (cnt = n; cnt; cnt--)
 			*dp++ = clr;
-			if (ri->ri_hwbits)
-				*hp++ = clr;
+		if (ri->ri_hwbits) {
+			memcpy(hp, rp, n << 2);
+			DELTA(hp, ri->ri_stride, uint32_t *);
 		}
-		DELTA(dp, delta, uint32_t *);
-		if (ri->ri_hwbits)
-			DELTA(hp, delta, uint32_t *);
+		DELTA(rp, ri->ri_stride, uint32_t *);
 	}
 }
 
@@ -1015,9 +1012,9 @@ rasops_do_cursor(struct rasops_info *ri)
 	int full, height, cnt, slop1, slop2, row, col;
 	uint32_t tmp32, msk1, msk2;
 	uint8_t tmp8;
-	uint8_t *dp, *rp, *hrp, *hp;
+	uint8_t *dp, *rp, *hp;
 
-	hrp = hp = NULL;	/* XXX GCC */
+	hp = NULL;	/* XXX GCC */
 
 #if NRASOPS_ROTATION > 0
 	if (ri->ri_flg & RI_ROTATE_MASK) {
@@ -1042,8 +1039,7 @@ rasops_do_cursor(struct rasops_info *ri)
 
 	rp = ri->ri_bits + row * ri->ri_yscale + col * ri->ri_xscale;
 	if (ri->ri_hwbits)
-		hrp = ri->ri_hwbits + row * ri->ri_yscale + col
-		    * ri->ri_xscale;
+		hp = ri->ri_hwbits + row * ri->ri_yscale + col * ri->ri_xscale;
 	height = ri->ri_font->fontheight;
 
 	/*
@@ -1060,8 +1056,8 @@ rasops_do_cursor(struct rasops_info *ri)
 			rp += ri->ri_stride;
 
 			if (ri->ri_hwbits) {
-				*hrp = tmp8;
-				hrp += ri->ri_stride;
+				*hp = tmp8;
+				hp += ri->ri_stride;
 			}
 		}
 		return;
@@ -1078,45 +1074,37 @@ rasops_do_cursor(struct rasops_info *ri)
 	full = (ri->ri_xscale - slop1 /* - slop2 */) >> 2;
 
 	rp = (uint8_t *)((uintptr_t)rp & ~3);
-	hrp = (uint8_t *)((uintptr_t)hrp & ~3);
+	hp = (uint8_t *)((uintptr_t)hp & ~3);
 
 	msk1 = !slop1 ? 0 : be32toh(0xffffffffU >> (32 - (8 * slop1)));
 	msk2 = !slop2 ? 0 : be32toh(0xffffffffU << (32 - (8 * slop2)));
 
 	while (height--) {
 		dp = rp;
-		rp += ri->ri_stride;
-		if (ri->ri_hwbits) {
-			hp = hrp;
-			hrp += ri->ri_stride;
-		}
 
 		if (slop1) {
 			tmp32 = *(uint32_t *)dp ^ msk1;
 			*(uint32_t *)dp = tmp32;
 			dp += 4;
-			if (ri->ri_hwbits) {
-				*(uint32_t *)hp = tmp32;
-				hp += 4;
-			}
 		}
 
 		for (cnt = full; cnt; cnt--) {
 			tmp32 = ~*(uint32_t *)dp;
 			*(uint32_t *)dp = tmp32;
 			dp += 4;
-			if (ri->ri_hwbits) {
-				*(uint32_t *)hp = tmp32;
-				hp += 4;
-			}
 		}
 
 		if (slop2) {
 			tmp32 = *(uint32_t *)dp ^ msk2;
 			*(uint32_t *)dp = tmp32;
-			if (ri->ri_hwbits)
-				*(uint32_t *)hp = tmp32;
 		}
+
+		if (ri->ri_hwbits) {
+			memcpy(hp, rp, ((slop1 != 0) + full +
+			    (slop2 != 0)) << 2);
+			hp += ri->ri_stride;
+		}
+		rp += ri->ri_stride;
 	}
 }
 
@@ -1128,9 +1116,9 @@ rasops_erasecols(void *cookie, int row, 
 {
 	struct rasops_info *ri = (struct rasops_info *)cookie;
 	int height, cnt, slop1, slop2, clr;
-	uint32_t *rp, *dp, *hrp, *hp;
+	uint32_t *rp, *dp, *hp;
 
-	hrp = hp = NULL;	/* XXX GCC */
+	hp = NULL;	/* XXX GCC */
 
 #ifdef RASOPS_CLIPPING
 	if ((unsigned)row >= (unsigned)ri->ri_rows)
@@ -1141,7 +1129,7 @@ rasops_erasecols(void *cookie, int row, 
 		col = 0;
 	}
 
-	if ((col + num) > ri->ri_cols)
+	if (col + num > ri->ri_cols)
 		num = ri->ri_cols - col;
 
 	if (num <= 0)
@@ -1151,7 +1139,7 @@ rasops_erasecols(void *cookie, int row, 
 	num *= ri->ri_xscale;
 	rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + col*ri->ri_xscale);
 	if (ri->ri_hwbits)
-		hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
+		hp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
 		    col*ri->ri_xscale);
 	height = ri->ri_font->fontheight;
 	clr = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf];
@@ -1164,17 +1152,13 @@ rasops_erasecols(void *cookie, int row, 
 
 			while (height--) {
 				dp = rp;
-				DELTA(rp, ri->ri_stride, uint32_t *);
-				if (ri->ri_hwbits) {
-					hp = hrp;
-					DELTA(hrp, ri->ri_stride, uint32_t *);
-				}
-
-				for (cnt = num; cnt; cnt--) {
+				for (cnt = num; cnt; cnt--)
 					*dp++ = clr;
-					if (ri->ri_hwbits)
-						*hp++ = clr;
+				if (ri->ri_hwbits) {
+					memcpy(hp, rp, num << 2);
+					DELTA(hp, ri->ri_stride, uint32_t *);
 				}
+				DELTA(rp, ri->ri_stride, uint32_t *);
 			}
 		} else if (((num | ri->ri_xscale) & 1) == 0) {
 			/*
@@ -1185,38 +1169,28 @@ rasops_erasecols(void *cookie, int row, 
 
 			while (height--) {
 				dp = rp;
-				DELTA(rp, ri->ri_stride, uint32_t *);
-				if (ri->ri_hwbits) {
-					hp = hrp;
-					DELTA(hrp, ri->ri_stride, uint32_t *);
-				}
-
 				for (cnt = num; cnt; cnt--) {
 					*(uint16_t *)dp = clr;
 					DELTA(dp, 2, uint32_t *);
-					if (ri->ri_hwbits) {
-						*(uint16_t *)hp = clr;
-						DELTA(hp, 2, uint32_t *);
-					}
 				}
+				if (ri->ri_hwbits) {
+					memcpy(hp, rp, num << 1);
+					DELTA(hp, ri->ri_stride, uint32_t *);
+				}
+				DELTA(rp, ri->ri_stride, uint32_t *);
 			}
 		} else {
 			while (height--) {
 				dp = rp;
-				DELTA(rp, ri->ri_stride, uint32_t *);
-				if (ri->ri_hwbits) {
-					hp = hrp;
-					DELTA(hrp, ri->ri_stride, uint32_t *);
-				}
-
 				for (cnt = num; cnt; cnt--) {
 					*(uint8_t *)dp = clr;
 					DELTA(dp, 1, uint32_t *);
-					if (ri->ri_hwbits) {
-						*(uint8_t *)hp = clr;
-						DELTA(hp, 1, uint32_t *);
-					}
 				}
+				if (ri->ri_hwbits) {
+					memcpy(hp, rp, num);
+					DELTA(hp, ri->ri_stride, uint32_t *);
+				}
+				DELTA(rp, ri->ri_stride, uint32_t *);
 			}
 		}
 
@@ -1229,53 +1203,36 @@ rasops_erasecols(void *cookie, int row, 
 
 	while (height--) {
 		dp = rp;
-		DELTA(rp, ri->ri_stride, uint32_t *);
-		if (ri->ri_hwbits) {
-			hp = hrp;
-			DELTA(hrp, ri->ri_stride, uint32_t *);
-		}
 
 		/* Align span to 4 bytes */
 		if (slop1 & 1) {
 			*(uint8_t *)dp = clr;
 			DELTA(dp, 1, uint32_t *);
-			if (ri->ri_hwbits) {
-				*(uint8_t *)hp = clr;
-				DELTA(hp, 1, uint32_t *);
-			}
 		}
 
 		if (slop1 & 2) {
 			*(uint16_t *)dp = clr;
 			DELTA(dp, 2, uint32_t *);
-			if (ri->ri_hwbits) {
-				*(uint16_t *)hp = clr;
-				DELTA(hp, 2, uint32_t *);
-			}
 		}
 
 		/* Write 4 bytes per loop */
-		for (cnt = num; cnt; cnt--) {
+		for (cnt = num; cnt; cnt--)
 			*dp++ = clr;
-			if (ri->ri_hwbits)
-				*hp++ = clr;
-		}
 
 		/* Write unaligned trailing slop */
 		if (slop2 & 1) {
 			*(uint8_t *)dp = clr;
 			DELTA(dp, 1, uint32_t *);
-			if (ri->ri_hwbits) {
-				*(uint8_t *)hp = clr;
-				DELTA(hp, 1, uint32_t *);
-			}
 		}
 
-		if (slop2 & 2) {
+		if (slop2 & 2)
 			*(uint16_t *)dp = clr;
-			if (ri->ri_hwbits)
-				*(uint16_t *)hp = clr;
+
+		if (ri->ri_hwbits) {
+			memcpy(hp, rp, slop1 + (num << 2) + slop2);
+			DELTA(hp, ri->ri_stride, uint32_t *);
 		}
+		DELTA(rp, ri->ri_stride, uint32_t *);
 	}
 }
 

Index: src/sys/dev/rasops/rasops1.c
diff -u src/sys/dev/rasops/rasops1.c:1.30 src/sys/dev/rasops/rasops1.c:1.31
--- src/sys/dev/rasops/rasops1.c:1.30	Mon Jul 29 08:13:50 2019
+++ src/sys/dev/rasops/rasops1.c	Tue Jul 30 15:29:40 2019
@@ -1,4 +1,4 @@
-/* 	$NetBSD: rasops1.c,v 1.30 2019/07/29 08:13:50 rin Exp $	*/
+/* 	$NetBSD: rasops1.c,v 1.31 2019/07/30 15:29:40 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rasops1.c,v 1.30 2019/07/29 08:13:50 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rasops1.c,v 1.31 2019/07/30 15:29:40 rin Exp $");
 
 #include "opt_rasops.h"
 
@@ -91,11 +91,11 @@ rasops1_putchar(void *cookie, int row, i
 	struct wsdisplay_font *font = PICK_FONT(ri, uc);
 	uint32_t fs, rs, fb, bg, fg, lmask, rmask;
 	uint32_t height, width;
-	uint32_t *rp, *hrp, tmp, tmp0, tmp1;
+	uint32_t *rp, *hp, tmp, tmp0, tmp1;
 	uint8_t *fr;
 	bool space;
 
-	hrp = NULL;	/* XXX GCC */
+	hp = NULL;	/* XXX GCC */
 
 #ifdef RASOPS_CLIPPING
 	/* Catches 'row < 0' case too */
@@ -110,7 +110,7 @@ rasops1_putchar(void *cookie, int row, i
 	rp = (uint32_t *)(ri->ri_bits + row * ri->ri_yscale +
 	    ((col >> 3) & ~3));
 	if (ri->ri_hwbits)
-		hrp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale +
+		hp = (uint32_t *)(ri->ri_hwbits + row * ri->ri_yscale +
 		    ((col >> 3) & ~3));
 	height = font->fontheight;
 	width = font->fontwidth;
@@ -144,8 +144,8 @@ rasops1_putchar(void *cookie, int row, i
 				*rp = tmp;
 				DELTA(rp, rs, uint32_t *);
 				if (ri->ri_hwbits) {
-					*hrp = tmp;
-					DELTA(hrp, rs, uint32_t *);
+					*hp = tmp;
+					DELTA(hp, rs, uint32_t *);
 				}
 			}
 		} else {
@@ -159,8 +159,8 @@ rasops1_putchar(void *cookie, int row, i
 				*rp = tmp;
 				DELTA(rp, rs, uint32_t *);
 				if (ri->ri_hwbits) {
-					*hrp = tmp;
-					DELTA(hrp, rs, uint32_t *);
+					*hp = tmp;
+					DELTA(hp, rs, uint32_t *);
 				}
 			}
 		}
@@ -171,8 +171,8 @@ rasops1_putchar(void *cookie, int row, i
 			tmp = (*rp & lmask) | (fg & rmask);
 			*rp = tmp;
 			if (ri->ri_hwbits) {
-				DELTA(hrp, -(ri->ri_stride << 1), uint32_t *);
-				*hrp = tmp;
+				DELTA(hp, -(ri->ri_stride << 1), uint32_t *);
+				*hp = tmp;
 			}
 		}
 	} else {
@@ -191,9 +191,9 @@ rasops1_putchar(void *cookie, int row, i
 				rp[1] = tmp1;
 				DELTA(rp, rs, uint32_t *);
 				if (ri->ri_hwbits) {
-					hrp[0] = tmp0;
-					hrp[1] = tmp1;
-					DELTA(hrp, rs, uint32_t *);
+					hp[0] = tmp0;
+					hp[1] = tmp1;
+					DELTA(hp, rs, uint32_t *);
 				}
 			}
 		} else {
@@ -211,9 +211,9 @@ rasops1_putchar(void *cookie, int row, i
 				rp[1] = tmp1;
 				DELTA(rp, rs, uint32_t *);
 				if (ri->ri_hwbits) {
-					hrp[0] = tmp0;
-					hrp[1] = tmp1;
-					DELTA(hrp, rs, uint32_t *);
+					hp[0] = tmp0;
+					hp[1] = tmp1;
+					DELTA(hp, rs, uint32_t *);
 				}
 			}
 		}
@@ -226,9 +226,9 @@ rasops1_putchar(void *cookie, int row, i
 			rp[0] = tmp0;
 			rp[1] = tmp1;
 			if (ri->ri_hwbits) {
-				DELTA(hrp, -(ri->ri_stride << 1), uint32_t *);
-				hrp[0] = tmp0;
-				hrp[1] = tmp1;
+				DELTA(hp, -(ri->ri_stride << 1), uint32_t *);
+				hp[0] = tmp0;
+				hp[1] = tmp1;
 			}
 		}
 	}

Index: src/sys/dev/rasops/rasops_bitops.h
diff -u src/sys/dev/rasops/rasops_bitops.h:1.17 src/sys/dev/rasops/rasops_bitops.h:1.18
--- src/sys/dev/rasops/rasops_bitops.h:1.17	Sun Jul 28 12:06:10 2019
+++ src/sys/dev/rasops/rasops_bitops.h	Tue Jul 30 15:29:40 2019
@@ -1,4 +1,4 @@
-/* 	$NetBSD: rasops_bitops.h,v 1.17 2019/07/28 12:06:10 rin Exp $	*/
+/* 	$NetBSD: rasops_bitops.h,v 1.18 2019/07/30 15:29:40 rin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -40,9 +40,11 @@ NAME(erasecols)(void *cookie, int row, i
 {
 	int lclr, rclr, clr;
 	struct rasops_info *ri;
-	uint32_t *dp, *rp, *hrp = NULL, *hp = NULL, tmp, lmask, rmask;
+	uint32_t *dp, *rp, *hp, tmp, lmask, rmask;
 	int height, cnt;
 
+	hp = NULL;	/* XXX GCC */
+
 	ri = (struct rasops_info *)cookie;
 
 #ifdef RASOPS_CLIPPING
@@ -66,7 +68,7 @@ NAME(erasecols)(void *cookie, int row, i
 	clr = ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf];
 	rp = (uint32_t *)(ri->ri_bits + row*ri->ri_yscale + ((col >> 3) & ~3));
 	if (ri->ri_hwbits)
-		hrp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
+		hp = (uint32_t *)(ri->ri_hwbits + row*ri->ri_yscale +
 		    ((col >> 3) & ~3));
 	if ((col & 31) + num <= 32) {
 		lmask = ~rasops_pmask[col & 31][num];
@@ -79,8 +81,8 @@ NAME(erasecols)(void *cookie, int row, i
 			tmp = (*dp & lmask) | lclr;
 			*dp = tmp;
 			if (ri->ri_hwbits) {
-				*hrp = tmp;
-				DELTA(hrp, ri->ri_stride, uint32_t *);
+				*hp = tmp;
+				DELTA(hp, ri->ri_stride, uint32_t *);
 			}
 		}
 	} else {
@@ -97,35 +99,24 @@ NAME(erasecols)(void *cookie, int row, i
 
 		while (height--) {
 			dp = rp;
-			DELTA(rp, ri->ri_stride, uint32_t *);
-			if (ri->ri_hwbits) {
-				hp = hrp;
-				DELTA(hrp, ri->ri_stride, uint32_t *);
-			}
 
 			if (lmask) {
-				tmp = (*dp & lmask) | lclr;
-				*dp = tmp;
+				*dp = (*dp & lmask) | lclr;
 				dp++;
-				if (ri->ri_hwbits) {
-					*hp = tmp;
-					hp++;
-				}
 			}
 
 			for (cnt = num; cnt > 0; cnt--)
 				*dp++ = clr;
-			if (ri->ri_hwbits) {
-				for (cnt = num; cnt > 0; cnt--)
-					*hp++ = clr;
-			}
 
-			if (rmask) {
-				tmp = (*dp & rmask) | rclr;
-				*dp = tmp;
-				if (ri->ri_hwbits)
-					*hp = tmp;
+			if (rmask)
+				*dp = (*dp & rmask) | rclr;
+
+			if (ri->ri_hwbits) {
+				memcpy(hp, rp, ((lmask != 0) + num +
+				    (rmask != 0)) << 2);
+				DELTA(hp, ri->ri_stride, uint32_t *);
 			}
+			DELTA(rp, ri->ri_stride, uint32_t *);
 		}
 	}
 }
@@ -137,7 +128,9 @@ static void
 NAME(do_cursor)(struct rasops_info *ri)
 {
 	int height, row, col, num;
-	uint32_t *dp, *rp, *hp = NULL, *hrp = NULL, tmp, lmask, rmask;
+	uint32_t *dp, *rp, *hrp, *hp, tmp, lmask, rmask;
+
+	hrp = hp = NULL;	/* XXX GCC */
 
 	row = ri->ri_crow;
 	col = ri->ri_ccol * ri->ri_font->fontwidth << PIXEL_SHIFT;
@@ -153,17 +146,13 @@ NAME(do_cursor)(struct rasops_info *ri)
 		lmask = rasops_pmask[col & 31][num];
 
 		while (height--) {
-			dp = rp;
-			DELTA(rp, ri->ri_stride, uint32_t *);
-			*dp ^= lmask;
-		}
-		if (ri->ri_hwbits) {
-			height = ri->ri_font->fontheight;
-			while (height--) {
-				hp = hrp;
+			tmp = *rp ^ lmask;
+			*rp = tmp;
+			if (ri->ri_hwbits) {
+				*hrp = tmp;
 				DELTA(hrp, ri->ri_stride, uint32_t *);
-				*hp ^= lmask;
 			}
+			DELTA(rp, ri->ri_stride, uint32_t *);
 		}
 	} else {
 		lmask = ~rasops_rmask[col & 31];
@@ -176,6 +165,7 @@ NAME(do_cursor)(struct rasops_info *ri)
 				hp = hrp;
 				DELTA(hrp, ri->ri_stride, uint32_t *);
 			}
+
 			if (lmask != -1) {
 				tmp = *dp ^ lmask;
 				*dp = tmp;
@@ -202,12 +192,12 @@ NAME(do_cursor)(struct rasops_info *ri)
 static void
 NAME(copycols)(void *cookie, int row, int src, int dst, int num)
 {
+	struct rasops_info *ri = (struct rasops_info *)cookie;
 	int height, lnum, rnum, sb, db, cnt, full;
 	uint32_t tmp, lmask, rmask;
-	uint32_t *sp, *dp, *srp, *drp, *dhp = NULL, *hp = NULL;
-	struct rasops_info *ri;
+	uint32_t *sp, *dp, *srp, *drp, *dhp, *hp;
 
-	ri = (struct rasops_info *)cookie;
+	dhp = hp = NULL;	/* XXX GCC */
 
 #ifdef RASOPS_CLIPPING
 	if (dst == src)
@@ -222,7 +212,7 @@ NAME(copycols)(void *cookie, int row, in
 		src = 0;
 	}
 
-	if ((src + num) > ri->ri_cols)
+	if (src + num > ri->ri_cols)
 		num = ri->ri_cols - src;
 
 	if (dst < 0) {
@@ -230,7 +220,7 @@ NAME(copycols)(void *cookie, int row, in
 		dst = 0;
 	}
 
-	if ((dst + num) > ri->ri_cols)
+	if (dst + num > ri->ri_cols)
 		num = ri->ri_cols - dst;
 
 	if (num <= 0)
@@ -302,19 +292,12 @@ NAME(copycols)(void *cookie, int row, in
 		while (height--) {
 			sp = srp;
 			dp = drp;
-			if (ri->ri_hwbits) {
-				hp = dhp;
-				DELTA(dhp, ri->ri_stride, uint32_t *);
-			}
 			DELTA(srp, ri->ri_stride, uint32_t *);
 			DELTA(drp, ri->ri_stride, uint32_t *);
 
 			if (rnum) {
 				GETBITS(sp, sboff, rnum, tmp);
 				PUTBITS(tmp, 0, rnum, dp);
-				if (ri->ri_hwbits) {
-					PUTBITS(tmp, 0, rnum, hp);
-				}
 			}
 
 			/* Now aligned to 32-bits wrt dp */
@@ -323,10 +306,6 @@ NAME(copycols)(void *cookie, int row, in
 				--sp;
 				GETBITS(sp, sboff, 32, tmp);
 				*dp = tmp;
-				if (ri->ri_hwbits) {
-					--hp;
-					*hp = tmp;
-				}
 			}
 
 			if (lmask) {
@@ -335,9 +314,15 @@ NAME(copycols)(void *cookie, int row, in
 				--dp;
 				GETBITS(sp, sb, lnum, tmp);
 				PUTBITS(tmp, db, lnum, dp);
-				if (ri->ri_hwbits)
-					PUTBITS(tmp, db, lnum, hp);
  			}
+
+			if (ri->ri_hwbits) {
+				hp = dhp;
+				hp -= full + (lmask != 0);
+				memcpy(hp, dp, ((rmask != 0) + cnt +
+				    (lmask != 0)) << 2);
+				DELTA(dhp, ri->ri_stride, uint32_t *);
+			}
  		}
 	} else {
 		/* Copy left-to-right */
@@ -352,23 +337,13 @@ NAME(copycols)(void *cookie, int row, in
 			sb = src & 31;
 			sp = srp;
 			dp = drp;
-			if (ri->ri_hwbits) {
-				hp = dhp;
-				DELTA(dhp, ri->ri_stride, uint32_t *);
-			}
-			DELTA(srp, ri->ri_stride, uint32_t *);
-			DELTA(drp, ri->ri_stride, uint32_t *);
 
 			if (lmask) {
 				GETBITS(sp, sb, lnum, tmp);
 				PUTBITS(tmp, db, lnum, dp);
 				dp++;
-				if (ri->ri_hwbits) {
-					PUTBITS(tmp, db, lnum, hp);
-					hp++;
-				}	
 
-				if ((sb += lnum) > 31) {
+				if (sb += lnum > 31) {
 					sp++;
 					sb -= 32;
 				}
@@ -378,16 +353,21 @@ NAME(copycols)(void *cookie, int row, in
 			for (cnt = full; cnt; cnt--, sp++) {
 				GETBITS(sp, sb, 32, tmp);
 				*dp++ = tmp;
-				if (ri->ri_hwbits)
-					*hp++ = tmp;
 			}
 
 			if (rmask) {
 				GETBITS(sp, sb, rnum, tmp);
 				PUTBITS(tmp, 0, rnum, dp);
-				if (ri->ri_hwbits)
-					PUTBITS(tmp, 0, rnum, hp);
  			}
+
+			if (ri->ri_hwbits) {
+				memcpy(dhp, drp, ((lmask != 0) + full +
+				    (rmask != 0)) << 2);
+				DELTA(dhp, ri->ri_stride, uint32_t *);
+			}
+
+			DELTA(srp, ri->ri_stride, uint32_t *);
+			DELTA(drp, ri->ri_stride, uint32_t *);
  		}
  	}
 }

Index: src/sys/dev/rasops/rasops_putchar.h
diff -u src/sys/dev/rasops/rasops_putchar.h:1.4 src/sys/dev/rasops/rasops_putchar.h:1.5
--- src/sys/dev/rasops/rasops_putchar.h:1.4	Mon Jul 29 08:13:50 2019
+++ src/sys/dev/rasops/rasops_putchar.h	Tue Jul 30 15:29:40 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: rasops_putchar.h,v 1.4 2019/07/29 08:13:50 rin Exp $ */
+/* $NetBSD: rasops_putchar.h,v 1.5 2019/07/30 15:29:40 rin Exp $ */
 
 /* NetBSD: rasops8.c,v 1.41 2019/07/25 03:02:44 rin Exp  */
 /*-
@@ -38,28 +38,32 @@
 #define PUTCHAR(depth)	PUTCHAR1(depth)
 #define PUTCHAR1(depth)	rasops ## depth ## _putchar
 
-#if RASOPS_DEPTH == 8
-#define	CLR_TYPE	uint8_t
+#if   RASOPS_DEPTH == 8
+#define	COLOR_TYPE		uint8_t
 #elif RASOPS_DEPTH ==  15
-#define	CLR_TYPE	uint16_t
+#define	COLOR_TYPE		uint16_t
 #else
-#define	CLR_TYPE	uint32_t
+#define	COLOR_TYPE		uint32_t
+#endif
+
+#if RASOPS_DEPTH != 24
+#define	PIXEL_BYTES		sizeof(COLOR_TYPE)
+#define	SET_PIXEL(p, index)						\
+	do {								\
+		*(COLOR_TYPE *)(p) = clr[index];			\
+		(p) += sizeof(COLOR_TYPE);				\
+	} while (0 /* CONSTCOND */)
 #endif
 
 #if RASOPS_DEPTH == 24
-#define	SUBST_CLR(p, index)						\
+#define	PIXEL_BYTES		3
+#define	SET_PIXEL(p, index)						\
 	do {								\
-		CLR_TYPE c = clr[index];				\
+		COLOR_TYPE c = clr[index];				\
 		*(p)++ = c >> 16;					\
 		*(p)++ = c >> 8;					\
 		*(p)++ = c;						\
 	} while (0 /* CONSTCOND */)
-#else
-#define	SUBST_CLR(p, index)						\
-	do {								\
-		*(CLR_TYPE *)(p) = clr[index];				\
-		(p) += sizeof(CLR_TYPE);				\
-	} while (0 /* CONSTCOND */)
 #endif
 
 /*
@@ -72,10 +76,10 @@ PUTCHAR(RASOPS_DEPTH)(void *cookie, int 
 	struct wsdisplay_font *font = PICK_FONT(ri, uc);
 	int width, height, cnt, fs;
 	uint32_t fb;
-	uint8_t *dp, *rp, *hp, *hrp, *fr;
-	CLR_TYPE clr[2];
+	uint8_t *dp, *rp, *hp, *fr;
+	COLOR_TYPE clr[2];
 
-	hp = hrp = NULL;	/* XXX GCC */
+	hp = NULL;	/* XXX GCC */
 
 	if (!CHAR_IN_FONT(uc, font))
 		return;
@@ -91,28 +95,26 @@ PUTCHAR(RASOPS_DEPTH)(void *cookie, int 
 
 	rp = ri->ri_bits + row * ri->ri_yscale + col * ri->ri_xscale;
 	if (ri->ri_hwbits)
-		hrp = ri->ri_hwbits + row * ri->ri_yscale + col *
-		    ri->ri_xscale;
+		hp = ri->ri_hwbits + row * ri->ri_yscale + col * ri->ri_xscale;
 
 	height = font->fontheight;
 	width = font->fontwidth;
 
-	clr[0] = (CLR_TYPE)ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf];
-	clr[1] = (CLR_TYPE)ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf];
+	clr[0] = (COLOR_TYPE)ri->ri_devcmap[((uint32_t)attr >> 16) & 0xf];
+	clr[1] = (COLOR_TYPE)ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf];
 
 	if (uc == ' ') {
 		while (height--) {
 			dp = rp;
-			if (ri->ri_hwbits)
-				hp = hrp;
-			for (cnt = width; cnt; cnt--) {
-				SUBST_CLR(dp, 0);
-				if (ri->ri_hwbits)
-					SUBST_CLR(hp, 0);
+			for (cnt = width; cnt; cnt--)
+				SET_PIXEL(dp, 0);
+			if (ri->ri_hwbits) {
+				uint16_t bytes = width * PIXEL_BYTES;
+					/* XXX GCC */
+				memcpy(hp, rp, bytes);
+				hp += ri->ri_stride;
 			}
 			rp += ri->ri_stride;
-			if (ri->ri_hwbits)
-				hrp += ri->ri_stride;
 		}
 	} else {
 		fr = FONT_GLYPH(uc, font, ri);
@@ -120,35 +122,37 @@ PUTCHAR(RASOPS_DEPTH)(void *cookie, int 
 
 		while (height--) {
 			dp = rp;
-			rp += ri->ri_stride;
-			if (ri->ri_hwbits) {
-				hp = hrp;
-				hrp += ri->ri_stride;
-			}
 			fb = be32uatoh(fr);
 			fr += fs;
 			for (cnt = width; cnt; cnt--) {
-				SUBST_CLR(dp, (fb >> 31) & 1);
-				if (ri->ri_hwbits)
-					SUBST_CLR(hp, (fb >> 31) & 1);
+				SET_PIXEL(dp, (fb >> 31) & 1);
 				fb <<= 1;
 			}
+			if (ri->ri_hwbits) {
+				uint16_t bytes = width * PIXEL_BYTES;
+					/* XXX GCC */
+				memcpy(hp, rp, bytes);
+				hp += ri->ri_stride;
+			}
+			rp += ri->ri_stride;
 		}
 	}
 
 	/* Do underline */
 	if ((attr & WSATTR_UNDERLINE) != 0) {
 		rp -= (ri->ri_stride << 1);
-		if (ri->ri_hwbits)
-			hrp -= (ri->ri_stride << 1);
-		while (width--) {
-			SUBST_CLR(rp, 1);
-			if (ri->ri_hwbits)
-				SUBST_CLR(hrp, 1);
+		dp = rp;
+		while (width--)
+			SET_PIXEL(dp, 1);
+		if (ri->ri_hwbits) {
+			hp -= (ri->ri_stride << 1);
+			uint16_t bytes = width * PIXEL_BYTES; /* XXX GCC */
+			memcpy(hp, rp, bytes);
 		}
 	}
 }
 
 #undef	PUTCHAR
-#undef	CLR_TYPE
-#undef	SUBST_CLR
+#undef	COLOR_TYPE
+#undef	PIXEL_BYTES
+#undef	SET_PIXEL

Index: src/sys/dev/rasops/rasops_putchar_aa.h
diff -u src/sys/dev/rasops/rasops_putchar_aa.h:1.3 src/sys/dev/rasops/rasops_putchar_aa.h:1.4
--- src/sys/dev/rasops/rasops_putchar_aa.h:1.3	Mon Jul 29 14:59:25 2019
+++ src/sys/dev/rasops/rasops_putchar_aa.h	Tue Jul 30 15:29:40 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: rasops_putchar_aa.h,v 1.3 2019/07/29 14:59:25 rin Exp $ */
+/* $NetBSD: rasops_putchar_aa.h,v 1.4 2019/07/30 15:29:40 rin Exp $ */
 
 /* NetBSD: rasops8.c,v 1.43 2019/07/28 12:06:10 rin Exp */
 /*-
@@ -38,41 +38,41 @@
 #define	PUTCHAR_AA(depth)	PUTCHAR_AA1(depth)
 #define	PUTCHAR_AA1(depth)	rasops ## depth ## _putchar_aa
 
-#define	MAX_WIDTH	64	/* XXX */
+#define	MAX_WIDTH		64	/* XXX */
 
 #if   RASOPS_DEPTH == 8
-#define	PIXEL_TYPE	uint8_t
+#define	PIXEL_TYPE		uint8_t
 #elif RASOPS_DEPTH == 15
-#define	PIXEL_TYPE	uint16_t
+#define	PIXEL_TYPE		uint16_t
 #elif RASOPS_DEPTH == 24
-#define	PIXEL_TYPE	uint8_t
+#define	PIXEL_TYPE		uint8_t
 #elif RASOPS_DEPTH == 32
-#define	PIXEL_TYPE	uint32_t
+#define	PIXEL_TYPE		uint32_t
 #endif
 
 #if RASOPS_DEPTH != 24
-#define	COLOR_TYPE	PIXEL_TYPE
-#define	PIXEL_LEN	sizeof(PIXEL_TYPE)
-#define	BUF_LEN		MAX_WIDTH
-#define	SET_PIXEL(x, c)	buf[x] = clr[c]
+#define	COLOR_TYPE		PIXEL_TYPE
+#define	PIXEL_BYTES		sizeof(PIXEL_TYPE)
+#define	BUF_LEN			MAX_WIDTH
+#define	SET_PIXEL(p, x, c)	(p)[x] = clr[c]
 #endif
 
 #if RASOPS_DEPTH == 24
-#define	COLOR_TYPE	uint32_t
-#define	PIXEL_LEN	3
-#define	BUF_LEN		(MAX_WIDTH * 3)
-#define	SET_PIXEL(x, c)				\
+#define	COLOR_TYPE		uint32_t
+#define	PIXEL_BYTES		3
+#define	BUF_LEN			(MAX_WIDTH * 3)
+#define	SET_PIXEL(p, x, c)			\
 	do {					\
-		buf[3 * x + 0] = clr[c] >> 16;	\
-		buf[3 * x + 1] = clr[c] >> 8;	\
-		buf[3 * x + 2] = clr[c];	\
+		(p)[3 * x + 0] = clr[c] >> 16;	\
+		(p)[3 * x + 1] = clr[c] >> 8;	\
+		(p)[3 * x + 2] = clr[c];	\
 	} while (0 /* CONSTCOND */)
 #endif
 
 #if RASOPS_DEPTH != 8
-#define	SET_BUF(c)	for (x = 0; x < width; x++) { SET_PIXEL(x, c); }
+#define	SET_WIDTH(p, c)	for (x = 0; x < width; x++) { SET_PIXEL(p, x, c); }
 #else
-#define	SET_BUF(c)	memset(buf, clr[c], width)
+#define	SET_WIDTH(p, c)	memset(p, clr[c], width)
 #endif
 
 static void
@@ -118,12 +118,12 @@ PUTCHAR_AA(RASOPS_DEPTH)(void *cookie, i
 	clr[1] = (COLOR_TYPE)ri->ri_devcmap[((uint32_t)attr >> 24) & 0xf];
 
 	if (uc == ' ') {
-		SET_BUF(0);
+		SET_WIDTH(buf, 0);
 		while (height--) {
-			memcpy(rp, buf, width * PIXEL_LEN);
+			memcpy(rp, buf, width * PIXEL_BYTES);
 			DELTA(rp, ri->ri_stride, PIXEL_TYPE *);
 			if (ri->ri_hwbits) {
-				memcpy(hp, buf, width * PIXEL_LEN);
+				memcpy(hp, buf, width * PIXEL_BYTES);
 				DELTA(hp, ri->ri_stride, PIXEL_TYPE *);
 			}
 		}
@@ -148,9 +148,9 @@ PUTCHAR_AA(RASOPS_DEPTH)(void *cookie, i
 				aval = *fr;
 				fr++;
 				if (aval == 0)
-					SET_PIXEL(x, 0);
+					SET_PIXEL(buf, x, 0);
 				else if (aval == 255)
-					SET_PIXEL(x, 1);
+					SET_PIXEL(buf, x, 1);
 				else {
 #define	AVERAGE(p, w)	((w * p[1] + (0xff - w) * p[0]) >> 8)
 					R = AVERAGE(r, aval);
@@ -186,10 +186,10 @@ PUTCHAR_AA(RASOPS_DEPTH)(void *cookie, i
 #endif
 				}
 			}
-			memcpy(rp, buf, width * PIXEL_LEN);
+			memcpy(rp, buf, width * PIXEL_BYTES);
 			DELTA(rp, ri->ri_stride, PIXEL_TYPE *);
 			if (ri->ri_hwbits) {
-				memcpy(hp, buf, width * PIXEL_LEN);
+				memcpy(hp, buf, width * PIXEL_BYTES);
 				DELTA(hp, ri->ri_stride, PIXEL_TYPE *);
 			}
 		}
@@ -197,13 +197,12 @@ PUTCHAR_AA(RASOPS_DEPTH)(void *cookie, i
 
 	/* Do underline */
 	if ((attr & WSATTR_UNDERLINE) != 0) {
-		SET_BUF(1);
 		DELTA(rp, -(ri->ri_stride << 1), PIXEL_TYPE *);
-		if (ri->ri_hwbits)
+		SET_WIDTH(rp, 1);
+		if (ri->ri_hwbits) {
 			DELTA(hp, -(ri->ri_stride << 1), PIXEL_TYPE *);
-		memcpy(rp, buf, width * PIXEL_LEN);
-		if (ri->ri_hwbits)
-			memcpy(hp, buf, width * PIXEL_LEN);
+			memcpy(hp, rp, width * PIXEL_BYTES);
+		}
 	}
 }
 
@@ -214,6 +213,6 @@ PUTCHAR_AA(RASOPS_DEPTH)(void *cookie, i
 
 #undef	PIXEL_TYPE
 #undef	COLOR_TYPE
-#undef	PIXEL_LEN
+#undef	PIXEL_BYTES
 #undef	SET_PIXEL
-#undef	SET_BUF
+#undef	SET_WIDTH

Index: src/sys/dev/rasops/rasops_putchar_width.h
diff -u src/sys/dev/rasops/rasops_putchar_width.h:1.8 src/sys/dev/rasops/rasops_putchar_width.h:1.9
--- src/sys/dev/rasops/rasops_putchar_width.h:1.8	Mon Jul 29 17:22:19 2019
+++ src/sys/dev/rasops/rasops_putchar_width.h	Tue Jul 30 15:29:40 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: rasops_putchar_width.h,v 1.8 2019/07/29 17:22:19 rin Exp $ */
+/* $NetBSD: rasops_putchar_width.h,v 1.9 2019/07/30 15:29:40 rin Exp $ */
 
 /* NetBSD: rasops8.c,v 1.41 2019/07/25 03:02:44 rin Exp  */
 /*-
@@ -57,6 +57,18 @@
 #endif
 
 #if   RASOPS_DEPTH <= 8
+#define	SUBST_UNIT	1
+#elif RASOPS_DEPTH == 15
+#define	SUBST_UNIT	2
+#elif RASOPS_DEPTH == 24
+#define	SUBST_UNIT	3
+#elif RASOPS_DEPTH == 32
+#define	SUBST_UNIT	4
+#endif
+
+#define	SUBST_BYTES	(SUBST_UNIT * (RASOPS_WIDTH / 4) * sizeof(STAMP_TYPE))
+
+#if   RASOPS_DEPTH <= 8
 #define	FILLED_STAMP	15
 #elif RASOPS_DEPTH == 15
 #define	FILLED_STAMP	30
@@ -68,16 +80,13 @@
 
 #if RASOPS_DEPTH <= 8
 
-#define	SUBST_STAMP1(p, off, base)					\
-	(p)[(off) * 1 + 0] = stamp[base]
+#define	SUBST_STAMP1(off, base)						\
+	rp[(off) * 1 + 0] = stamp[base]
 
 #define	SUBST_GLYPH1(index, nibble, off)				\
 	do {								\
 		int so = STAMP_SHIFT(fr[index], nibble) & STAMP_MASK;	\
 		rp[(off) * 1 + 0] = STAMP_READ(so);			\
-		if (ri->ri_hwbits) {					\
-			hrp[(off) * 1 + 0] = STAMP_READ(so);		\
-		}							\
 	} while (0 /* CONSTCOND */)
 
 #endif /* RASOPS_DEPTH <= 8 */
@@ -86,18 +95,14 @@
 
 #if RASOPS_DEPTH == 15
 
-#define	SUBST_STAMP1(p, off, base)					\
-	(p)[(off) * 2 + 0] = (p)[(off) * 2 + 1] = stamp[base]
+#define	SUBST_STAMP1(off, base)						\
+	rp[(off) * 2 + 0] = rp[(off) * 2 + 1] = stamp[base]
 
 #define	SUBST_GLYPH1(index, nibble, off)				\
 	do {								\
 		int so = STAMP_SHIFT(fr[index], nibble) & STAMP_MASK;	\
 		rp[(off) * 2 + 0] = STAMP_READ(so);			\
 		rp[(off) * 2 + 1] = STAMP_READ(so +  4);		\
-		if (ri->ri_hwbits) {					\
-			hrp[(off) * 2 + 0] = STAMP_READ(so);		\
-			hrp[(off) * 2 + 1] = STAMP_READ(so +  4);	\
-		}							\
 	} while (0 /* CONSTCOND */)
 
 #endif /* RASOPS_DEPTH == 15 */
@@ -106,11 +111,11 @@
 
 #if RASOPS_DEPTH == 24
 
-#define	SUBST_STAMP1(p, off, base)					\
+#define	SUBST_STAMP1(off, base)						\
 	do {								\
-		(p)[(off) * 3 + 0] = stamp[(base) + 0];			\
-		(p)[(off) * 3 + 1] = stamp[(base) + 1];			\
-		(p)[(off) * 3 + 2] = stamp[(base) + 2];			\
+		rp[(off) * 3 + 0] = stamp[(base) + 0];			\
+		rp[(off) * 3 + 1] = stamp[(base) + 1];			\
+		rp[(off) * 3 + 2] = stamp[(base) + 2];			\
 	} while (0 /* CONSTCOND */)
 
 #define	SUBST_GLYPH1(index, nibble, off)				\
@@ -119,11 +124,6 @@
 		rp[(off) * 3 + 0] = STAMP_READ(so);			\
 		rp[(off) * 3 + 1] = STAMP_READ(so +  4);		\
 		rp[(off) * 3 + 2] = STAMP_READ(so +  8);		\
-		if (ri->ri_hwbits) {					\
-			hrp[(off) * 3 + 0] = STAMP_READ(so);		\
-			hrp[(off) * 3 + 1] = STAMP_READ(so +  4);	\
-			hrp[(off) * 3 + 2] = STAMP_READ(so +  8);	\
-		}							\
 	} while (0 /* CONSTCOND */)
 
 #endif /* RASOPS_DEPTH == 24 */
@@ -132,9 +132,9 @@
 
 #if RASOPS_DEPTH == 32
 
-#define	SUBST_STAMP1(p, off, base)					\
-	(p)[(off) * 4 + 0] = (p)[(off) * 4 + 1] =			\
-	(p)[(off) * 4 + 2] = (p)[(off) * 4 + 3] = stamp[base]
+#define	SUBST_STAMP1(off, base)						\
+	rp[(off) * 4 + 0] = rp[(off) * 4 + 1] =				\
+	rp[(off) * 4 + 2] = rp[(off) * 4 + 3] = stamp[base]
 
 #define	SUBST_GLYPH1(index, nibble, off)				\
 	do {								\
@@ -143,12 +143,6 @@
 		rp[(off) * 4 + 1] = STAMP_READ(so +  4);		\
 		rp[(off) * 4 + 2] = STAMP_READ(so +  8);		\
 		rp[(off) * 4 + 3] = STAMP_READ(so + 12);		\
-		if (ri->ri_hwbits) {					\
-			hrp[(off) * 4 + 0] = STAMP_READ(so);		\
-			hrp[(off) * 4 + 1] = STAMP_READ(so +  4);	\
-			hrp[(off) * 4 + 2] = STAMP_READ(so +  8);	\
-			hrp[(off) * 4 + 3] = STAMP_READ(so + 12);	\
-		}							\
 	} while (0 /* CONSTCOND */)
 
 #endif /* RASOPS_DEPTH == 32 */
@@ -156,25 +150,25 @@
 /* ################################################################### */
 
 #if   RASOPS_WIDTH == 8
-#define	SUBST_STAMP(p, base) 			\
+#define	SUBST_STAMP(base) 			\
 	do {					\
-		SUBST_STAMP1(p, 0, base);	\
-		SUBST_STAMP1(p, 1, base);	\
+		SUBST_STAMP1(0, base);		\
+		SUBST_STAMP1(1, base);		\
 	} while (0 /* CONSTCOND */)
 #elif RASOPS_WIDTH == 12
-#define	SUBST_STAMP(p, base)			\
+#define	SUBST_STAMP(base)			\
 	do {					\
-		SUBST_STAMP1(p, 0, base);	\
-		SUBST_STAMP1(p, 1, base);	\
-		SUBST_STAMP1(p, 2, base);	\
+		SUBST_STAMP1(0, base);		\
+		SUBST_STAMP1(1, base);		\
+		SUBST_STAMP1(2, base);		\
 	} while (0 /* CONSTCOND */)
 #elif RASOPS_WIDTH == 16
-#define	SUBST_STAMP(p, base)			\
+#define	SUBST_STAMP(base)			\
 	do {					\
-		SUBST_STAMP1(p, 0, base);	\
-		SUBST_STAMP1(p, 1, base);	\
-		SUBST_STAMP1(p, 2, base);	\
-		SUBST_STAMP1(p, 3, base);	\
+		SUBST_STAMP1(0, base);		\
+		SUBST_STAMP1(1, base);		\
+		SUBST_STAMP1(2, base);		\
+		SUBST_STAMP1(3, base);		\
 	} while (0 /* CONSTCOND */)
 #endif
 
@@ -211,10 +205,10 @@ PUTCHAR_WIDTH(RASOPS_DEPTH, RASOPS_WIDTH
 	struct rasops_info *ri = (struct rasops_info *)cookie;
 	struct wsdisplay_font *font = PICK_FONT(ri, uc);
 	int height, fs;
-	STAMP_TYPE *rp, *hrp;
+	STAMP_TYPE *rp, *hp;
 	uint8_t *fr;
 
-	hrp = NULL; /* XXX GCC */
+	hp = NULL; /* XXX GCC */
 
 #ifdef RASOPS_CLIPPING
 	/* Catches 'row < 0' case too */
@@ -243,19 +237,19 @@ PUTCHAR_WIDTH(RASOPS_DEPTH, RASOPS_WIDTH
 	rp = (STAMP_TYPE *)(ri->ri_bits + row * ri->ri_yscale +
 	    col * ri->ri_xscale);
 	if (ri->ri_hwbits)
-		hrp = (STAMP_TYPE *)(ri->ri_hwbits + row * ri->ri_yscale +
+		hp = (STAMP_TYPE *)(ri->ri_hwbits + row * ri->ri_yscale +
 		    col * ri->ri_xscale);
 
 	height = font->fontheight;
 
 	if (uc == ' ') {
 		while (height--) {
-			SUBST_STAMP(rp, 0);
-			DELTA(rp, ri->ri_stride, STAMP_TYPE *);
+			SUBST_STAMP(0);
 			if (ri->ri_hwbits) {
-				SUBST_STAMP(hrp, 0);
-				DELTA(hrp, ri->ri_stride, STAMP_TYPE *);
+				memcpy(hp, rp, SUBST_BYTES);
+				DELTA(hp, ri->ri_stride, STAMP_TYPE *);
 			}
+			DELTA(rp, ri->ri_stride, STAMP_TYPE *);
 		}
 	} else {
 		fr = FONT_GLYPH(uc, font, ri);
@@ -263,21 +257,22 @@ PUTCHAR_WIDTH(RASOPS_DEPTH, RASOPS_WIDTH
 
 		while (height--) {
 			SUBST_GLYPH;
-
 			fr += fs;
+			if (ri->ri_hwbits) {
+				memcpy(hp, rp, SUBST_BYTES);
+				DELTA(hp, ri->ri_stride, STAMP_TYPE *);
+			}
 			DELTA(rp, ri->ri_stride, STAMP_TYPE *);
-			if (ri->ri_hwbits)
-				DELTA(hrp, ri->ri_stride, STAMP_TYPE *);
 		}
 	}
 
 	/* Do underline */
 	if ((attr & WSATTR_UNDERLINE) != 0) {
 		DELTA(rp, -(ri->ri_stride << 1), STAMP_TYPE *);
-		SUBST_STAMP(rp, FILLED_STAMP);
+		SUBST_STAMP(FILLED_STAMP);
 		if (ri->ri_hwbits) {
-			DELTA(hrp, -(ri->ri_stride << 1), STAMP_TYPE *);
-			SUBST_STAMP(hrp, FILLED_STAMP);
+			DELTA(hp, -(ri->ri_stride << 1), STAMP_TYPE *);
+			memcpy(hp, rp, SUBST_BYTES);
 		}
 	}
 
@@ -286,6 +281,9 @@ PUTCHAR_WIDTH(RASOPS_DEPTH, RASOPS_WIDTH
 
 #undef	STAMP_TYPE
 
+#undef	SUBST_UNIT
+#undef	SUBST_BYTES
+
 #undef	FILLED_STAMP
 
 #undef	PUTCHAR_WIDTH1

Reply via email to