Module Name: xsrc Committed By: macallan Date: Fri Dec 10 19:09:56 UTC 2021
Modified Files: xsrc/external/mit/xf86-video-suncg14/dist/src: cg14_accel.c Log Message: CG14Copy8_short_rop(): skip the funnel shifter if source and destination are aligned. Small but measurable speedup. To generate a diff of this commit: cvs rdiff -u -r1.22 -r1.23 \ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.22 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.23 --- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.22 Fri Dec 10 18:25:43 2021 +++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c Fri Dec 10 19:09:56 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: cg14_accel.c,v 1.22 2021/12/10 18:25:43 macallan Exp $ */ +/* $NetBSD: cg14_accel.c,v 1.23 2021/12/10 19:09:56 macallan Exp $ */ /* * Copyright (c) 2013 Michael Lorenz * All rights reserved. @@ -410,6 +410,7 @@ static void CG14Copy8_short_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) { int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; + int ssreg; #ifdef DEBUG int taddr = 4 + dstpitch * 50; #endif @@ -453,8 +454,6 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst daddr = dststart & ~3; /* TODO: - * - special case dist == 0 where we can skip the funnel shifter - * and only need to deal with leading / trailing garbage * - skip reading the fb where we can get away with it, for example * GXcopy, where we only need to read the destination for partials, * everything in between is straight copy @@ -463,30 +462,35 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst write_sx_io(p, daddr & ~7, SX_LD(80, wrds - 1, daddr & 7)); write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); if (wrds > 15) { - write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); - /* shifted source pixels are now at register 40+ */ + if (dist != 0) { + write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); + /* shifted source pixels are now at register 40+ */ + ssreg = 40; + } else ssreg = 8; if (pre != 0) { /* mask out leading junk */ write_sx_reg(p, SX_QUEUED(R_MASK), lmask); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(40, 80, 8, 0)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(41, 81, 9, 14)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, 14)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(40, 80, 8, 15)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 15)); } - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(56, 96, 24, wrds - 16)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 16, 96, 24, wrds - 16)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); - + if (dist != 0) { + write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); + ssreg = 40; + } else ssreg = 8; if (pre != 0) { /* mask out leading junk */ write_sx_reg(p, SX_QUEUED(R_MASK), lmask); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(40, 80, 8, 0)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(41, 81, 9, wrds)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, wrds)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(40, 80, 8, wrds)); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, wrds)); } } if (post != 0) {