Module Name:    src
Committed By:   macallan
Date:           Fri Dec 17 18:51:02 UTC 2021

Modified Files:
        src/sys/arch/sparc/dev: cgfourteen.c

Log Message:
cg14_invert():
- use only 32bit accesses
- use byte mask / ROPs for partial writes


To generate a diff of this commit:
cvs rdiff -u -r1.90 -r1.91 src/sys/arch/sparc/dev/cgfourteen.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/sparc/dev/cgfourteen.c
diff -u src/sys/arch/sparc/dev/cgfourteen.c:1.90 src/sys/arch/sparc/dev/cgfourteen.c:1.91
--- src/sys/arch/sparc/dev/cgfourteen.c:1.90	Sat Aug  7 16:19:05 2021
+++ src/sys/arch/sparc/dev/cgfourteen.c	Fri Dec 17 18:51:02 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: cgfourteen.c,v 1.90 2021/08/07 16:19:05 thorpej Exp $ */
+/*	$NetBSD: cgfourteen.c,v 1.91 2021/12/17 18:51:02 macallan Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -1226,50 +1226,64 @@ cg14_rectfill_a(void *cookie, int dstx, 
 	    sc->sc_vd.active->scr_ri.ri_devcmap[(attr >> 24 & 0xf)]);
 }
 
+/*
+ * invert a rectangle, used only to (un)draw the cursor.
+ * - does a scanline at a time
+ * - does not handle wi > 64 or wi < 4, not that we need it for our fonts
+ * - uses all 32bit accesses
+ */ 
 static void
 cg14_invert(struct cgfourteen_softc *sc, int x, int y, int wi, int he)
 {
-	uint32_t addr, pptr;
-	int line, cnt, pre, words;
+	uint32_t addr, pptr, lmask, rmask;
+	int line, cnt, pre, words, pwrds = 0, post, reg;
 	int stride = sc->sc_fb.fb_type.fb_width;
 
-	addr = sc->sc_fb_paddr + x + stride * y;
-	sx_write(sc->sc_sx, SX_ROP_CONTROL, 0x33); /* ~src a */
+	addr = (sc->sc_fb_paddr + x + stride * y) & ~3;
+	sx_write(sc->sc_sx, SX_ROP_CONTROL, 0x3C); /* ~src a / src a */
 	/*
-	 * Calculate the number of pixels we need to do one by one
-	 * until we're 32bit aligned, then do the rest in 32bit
-	 * mode. Assumes that stride is always a multiple of 4. 
-	 */ 
-	/* TODO: use 32bit writes with byte mask instead */
-	pre = addr & 3;
-	if (pre != 0) pre = 4 - pre;
+	 * Calculate the number of pixels we need to mask on each end of the
+	 * scanline and how many we can do without mask, if any
+	 */
+	pre = x & 3;
+	if (pre != 0) {
+		lmask = 0xffffffff >> pre;
+		pre = 4 - pre;
+		pwrds++;
+	}
+	post = (x + wi) & 3;
+	if (post != 0) {
+		rmask = ~(0xffffffff >> post);
+		pwrds++;
+	}
+	words = (wi + pre + 3) >> 2;
+	cnt = words - pwrds;
+	sx_write(sc->sc_sx, SX_QUEUED(7), 0xe0e0e0e0); /* four red pixels */
 	for (line = 0; line < he; line++) {
 		pptr = addr;
-		cnt = wi;
+		/* load a whole scanline */
+		sta(pptr & ~7, ASI_SX, SX_LD(8, words - 1, pptr & 7));
+		reg = 8;
 		if (pre) {
-			sta(pptr & ~7, ASI_SX, SX_LDB(8, pre - 1, pptr & 7));
+			sx_write(sc->sc_sx, SX_QUEUED(R_MASK), lmask);
 			sx_write(sc->sc_sx, SX_INSTRUCTIONS,
-			    SX_ROP(8, 8, 32, pre - 1));
-			sta(pptr & ~7, ASI_SX, SX_STB(32, pre - 1, pptr & 7));
-			pptr += pre;
-			cnt -= pre;
+			    SX_ROPB(8, 8, 40, 0));
+			reg++;
 		}
-		/* now do the aligned pixels in 32bit chunks */
-		while(cnt > 15) {
-			words = uimin(16, cnt >> 2);
-			sta(pptr & ~7, ASI_SX, SX_LD(8, words - 1, pptr & 7));
+		if (cnt > 0) {
+			sx_write(sc->sc_sx, SX_QUEUED(R_MASK), 0xffffffff);
+			/* XXX handle cnt > 16 */
 			sx_write(sc->sc_sx, SX_INSTRUCTIONS,
-			    SX_ROP(8, 8, 32, words - 1));
-			sta(pptr & ~7, ASI_SX, SX_ST(32, words - 1, pptr & 7));
-			pptr += words << 2;
-			cnt -= words << 2;
+			    SX_ROP(reg, reg, reg + 32, cnt - 1));
+			reg += cnt;
 		}
-		/* do any remaining pixels byte-wise again */
-		if (cnt > 0)
-			sta(pptr & ~7, ASI_SX, SX_LDB(8, cnt - 1, pptr & 7));
+		if (post) {
+			sx_write(sc->sc_sx, SX_QUEUED(R_MASK), rmask);
 			sx_write(sc->sc_sx, SX_INSTRUCTIONS,
-			    SX_ROP(8, 8, 32, cnt - 1));
-			sta(pptr & ~7, ASI_SX, SX_STB(32, cnt - 1, pptr & 7));
+			    SX_ROPB(reg, 7, reg + 32, 0));
+			reg++;
+		}
+		sta(pptr & ~7, ASI_SX, SX_ST(40, words - 1, pptr & 7));		
 		addr += stride;
 	}
 }

Reply via email to