Module Name: xsrc Committed By: macallan Date: Fri Dec 10 19:42:07 UTC 2021
Modified Files: xsrc/external/mit/xf86-video-suncg14/dist/src: cg14_accel.c Log Message: skip reading destination fb if we're going to overwrite it, only read words we need to partially write while there write mask registers only if we're actually going to need them another 20% speedup To generate a diff of this commit: cvs rdiff -u -r1.23 -r1.24 \ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.23 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.24 --- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.23 Fri Dec 10 19:09:56 2021 +++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c Fri Dec 10 19:42:07 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: cg14_accel.c,v 1.23 2021/12/10 19:09:56 macallan Exp $ */ +/* $NetBSD: cg14_accel.c,v 1.24 2021/12/10 19:42:07 macallan Exp $ */ /* * Copyright (c) 2013 Michael Lorenz * All rights reserved. @@ -439,25 +439,24 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst * mask out trailing pixels to avoid partial writes */ post = (dststart + w) & 3; - rmask = ~(0xffffffff >> (post * 8)); - write_sx_reg(p, SX_QUEUED(7), rmask); - write_sx_reg(p, SX_QUEUED(6), ~rmask); - + if (post != 0) { + rmask = ~(0xffffffff >> (post * 8)); + write_sx_reg(p, SX_QUEUED(7), rmask); + write_sx_reg(p, SX_QUEUED(6), ~rmask); + } + DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); /* mask out the leading pixels in dst by using a mask and ROP */ - write_sx_reg(p, SX_ROP_CONTROL, (p->last_rop & 0xf0) | 0xa); - write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); + if (pre != 0) { + write_sx_reg(p, SX_ROP_CONTROL, (p->last_rop & 0xf0) | 0xa); + write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); + } saddr = srcstart & ~3; daddr = dststart & ~3; - - /* TODO: - * - skip reading the fb where we can get away with it, for example - * GXcopy, where we only need to read the destination for partials, - * everything in between is straight copy - */ + while (h > 0) { write_sx_io(p, daddr & ~7, SX_LD(80, wrds - 1, daddr & 7)); write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); @@ -517,6 +516,111 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst } } +/* up to 124 pixels so direction doesn't matter, unaligned, straight copy */ +static void +CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) +{ + int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; + int ssreg; +#ifdef DEBUG + int taddr = 4 + dstpitch * 50; +#endif + uint32_t lmask, rmask; + ENTER; + + pre = dststart & 3; + lmask = 0xffffffff >> pre; + spre = srcstart & 3; + /* + * make sure we count all the words needed to cover the destination + * line, covering potential partials on both ends + */ + wrds = (w + pre + 3) >> 2; + swrds = (w + spre + 3) >> 2; + + if (spre < pre) { + dist = 32 - (pre - spre) * 8; + sreg = 9; + } else { + dist = (spre - pre) * 8; + sreg = 8; + } + + /* + * mask out trailing pixels to avoid partial writes + */ + post = (dststart + w) & 3; + if (post != 0) { + rmask = ~(0xffffffff >> (post * 8)); + write_sx_reg(p, SX_QUEUED(7), rmask); + write_sx_reg(p, SX_QUEUED(6), ~rmask); + } + + DPRINTF(X_ERROR, "%s %d %d, %d %d %08x %d %d %d %d %08x\n", __func__, + w, h, spre, pre, lmask, dist, sreg, wrds, post, rmask); + + /* mask out the leading pixels in dst by using a mask and ROP */ + if (pre != 0) { + write_sx_reg(p, SX_ROP_CONTROL, 0xca); + write_sx_reg(p, SX_QUEUED(R_MASK), lmask); + } + + saddr = srcstart & ~3; + daddr = dststart & ~3; + + while (h > 0) { + write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); + if (wrds > 15) { + if (dist != 0) { + write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); + /* shifted source pixels are now at register 40+ */ + ssreg = 40; + } else ssreg = 8; + if (pre != 0) { + /* read only the first word */ + write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); + /* mask out leading junk */ + write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); + } + } else { + if (dist != 0) { + write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); + ssreg = 40; + } else ssreg = 8; + if (pre != 0) { + /* read only the first word */ + write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); + /* mask out leading junk */ + write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); + } + } + if (post != 0) { + int laddr = daddr + ((wrds - 1) << 2); + /* + * if the last word to be written out is a partial we + * mask out the leftovers and replace them with + * background pixels + * we could pull the same ROP * mask trick as we do on + * the left end but it's less annoying this way and + * the instruction count is the same + */ + write_sx_io(p, laddr & ~7, SX_LD(81, 0, laddr & 7)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(ssreg + wrds - 1, 7, 5, 0)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(81, 6, 4, 0)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, ssreg + wrds - 1, 0)); + } +#ifdef DEBUG + write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7)); + taddr += dstpitch; +#endif + write_sx_io(p, daddr & ~7, SX_ST(ssreg, wrds - 1, daddr & 7)); + saddr += srcpitch; + daddr += dstpitch; + h--; + } +} + static void CG14Copy8(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int w, int h) @@ -555,7 +659,13 @@ CG14Copy8(PixmapPtr pDstPixmap, * uses all 32bit accesses and funnel shifter for unaligned copies */ if ((w < 125) && (w > 8)) { - CG14Copy8_short_rop(p, srcstart, dststart, w, h, srcinc, dstinc); + switch (p->last_rop) { + case 0xcc: + CG14Copy8_short_norop(p, srcstart, dststart, w, h, srcinc, dstinc); + break; + default: + CG14Copy8_short_rop(p, srcstart, dststart, w, h, srcinc, dstinc); + } return; }