Author: marius
Date: Wed Sep 22 20:03:59 2010
New Revision: 213020
URL: http://svn.freebsd.org/changeset/base/213020

Log:
  MFC: rr212709, r212730
  
  Add a VIS-based block copy function for SPARC64 V and later, which
  additionally takes advantage of the prefetch cache of these CPUs.
  Unlike the uncommitted US-III version, which provide no measurable
  speedup or even resulted in a slight slowdown on certain CPUs models
  compared to using the US-I version with these, the SPARC64 version
  actually results in a slight improvement.

Modified:
  stable/8/sys/sparc64/include/md_var.h
  stable/8/sys/sparc64/sparc64/machdep.c
  stable/8/sys/sparc64/sparc64/support.S
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)

Modified: stable/8/sys/sparc64/include/md_var.h
==============================================================================
--- stable/8/sys/sparc64/include/md_var.h       Wed Sep 22 20:01:35 2010        
(r213019)
+++ stable/8/sys/sparc64/include/md_var.h       Wed Sep 22 20:03:59 2010        
(r213020)
@@ -58,6 +58,8 @@ struct md_utrap *utrap_hold(struct md_ut
 
 cpu_block_copy_t spitfire_block_copy;
 cpu_block_zero_t spitfire_block_zero;
+cpu_block_copy_t zeus_block_copy;
+cpu_block_zero_t zeus_block_zero;
 
 extern cpu_block_copy_t *cpu_block_copy;
 extern cpu_block_zero_t *cpu_block_zero;

Modified: stable/8/sys/sparc64/sparc64/machdep.c
==============================================================================
--- stable/8/sys/sparc64/sparc64/machdep.c      Wed Sep 22 20:01:35 2010        
(r213019)
+++ stable/8/sys/sparc64/sparc64/machdep.c      Wed Sep 22 20:03:59 2010        
(r213020)
@@ -494,7 +494,6 @@ sparc64_init(caddr_t mdp, u_long o1, u_l
        if (cpu_use_vis) {
                switch (cpu_impl) {
                case CPU_IMPL_SPARC64:
-               case CPU_IMPL_SPARC64V:
                case CPU_IMPL_ULTRASPARCI:
                case CPU_IMPL_ULTRASPARCII:
                case CPU_IMPL_ULTRASPARCIIi:
@@ -508,6 +507,10 @@ sparc64_init(caddr_t mdp, u_long o1, u_l
                        cpu_block_copy = spitfire_block_copy;
                        cpu_block_zero = spitfire_block_zero;
                        break;
+               case CPU_IMPL_SPARC64V:
+                       cpu_block_copy = zeus_block_copy;
+                       cpu_block_zero = zeus_block_zero;
+                       break;
                }
        }
 

Modified: stable/8/sys/sparc64/sparc64/support.S
==============================================================================
--- stable/8/sys/sparc64/sparc64/support.S      Wed Sep 22 20:01:35 2010        
(r213019)
+++ stable/8/sys/sparc64/sparc64/support.S      Wed Sep 22 20:03:59 2010        
(r213020)
@@ -661,8 +661,121 @@ ENTRY(spitfire_block_copy)
 END(spitfire_block_copy)
 
 /*
+ * void zeus_block_copy(void *src, void *dst, size_t len)
+ */
+ENTRY(zeus_block_copy)
+       prefetch [%o0 + (0 * 64)], 0
+
+       rdpr    %pil, %o3
+       wrpr    %g0, PIL_TICK, %pil
+
+       wr      %g0, ASI_BLK_S, %asi
+       wr      %g0, FPRS_FEF, %fprs
+
+       sub     PCB_REG, TF_SIZEOF, %o4
+       ldx     [%o4 + TF_FPRS], %o5
+       andcc   %o5, FPRS_FEF, %g0
+       bz,a,pt %xcc, 1f
+        nop
+       stda    %f0, [PCB_REG + PCB_UFP + (0 * 64)] %asi
+       stda    %f16, [PCB_REG + PCB_UFP + (1 * 64)] %asi
+       stda    %f32, [PCB_REG + PCB_UFP + (2 * 64)] %asi
+       stda    %f48, [PCB_REG + PCB_UFP + (3 * 64)] %asi
+       membar  #Sync
+
+       andn    %o5, FPRS_FEF, %o5
+       stx     %o5, [%o4 + TF_FPRS]
+       ldx     [PCB_REG + PCB_FLAGS], %o4
+       or      %o4, PCB_FEF, %o4
+       stx     %o4, [PCB_REG + PCB_FLAGS]
+
+1:     wrpr    %o3, 0, %pil
+
+       ldd     [%o0 + (0 * 8)], %f0
+       prefetch [%o0 + (1 * 64)], 0
+       ldd     [%o0 + (1 * 8)], %f2
+       prefetch [%o0 + (2 * 64)], 0
+       fmovd   %f0, %f32
+       ldd     [%o0 + (2 * 8)], %f4
+       prefetch [%o0 + (3 * 64)], 0
+       fmovd   %f2, %f34
+       ldd     [%o0 + (3 * 8)], %f6
+       prefetch [%o0 + (4 * 64)], 1
+       fmovd   %f4, %f36
+       ldd     [%o0 + (4 * 8)], %f8
+       prefetch [%o0 + (8 * 64)], 1
+       fmovd   %f6, %f38
+       ldd     [%o0 + (5 * 8)], %f10
+       prefetch [%o0 + (12 * 64)], 1
+       fmovd   %f8, %f40
+       ldd     [%o0 + (6 * 8)], %f12
+       prefetch [%o0 + (16 * 64)], 1
+       fmovd   %f10, %f42
+       ldd     [%o0 + (7 * 8)], %f14
+       ldd     [%o0 + (8 * 8)], %f0
+       sub     %o2, 64, %o2
+       add     %o0, 64, %o0
+       prefetch [%o0 + (19 * 64)], 1
+       ba,pt   %xcc, 2f
+        prefetch [%o0 + (23 * 64)], 1
+       .align  32
+
+2:     ldd     [%o0 + (1 * 8)], %f2
+       fmovd   %f12, %f44
+       ldd     [%o0 + (2 * 8)], %f4
+       fmovd   %f14, %f46
+       stda    %f32, [%o1] %asi
+       ldd     [%o0 + (3 * 8)], %f6
+       fmovd   %f0, %f32
+       ldd     [%o0 + (4 * 8)], %f8
+       fmovd   %f2, %f34
+       ldd     [%o0 + (5 * 8)], %f10
+       fmovd   %f4, %f36
+       ldd     [%o0 + (6 * 8)], %f12
+       fmovd   %f6, %f38
+       ldd     [%o0 + (7 * 8)], %f14
+       fmovd   %f8, %f40
+       ldd     [%o0 + (8 * 8)], %f0
+       fmovd   %f10, %f42
+       sub     %o2, 64, %o2
+       prefetch [%o0 + (3 * 64)], 0
+       add     %o1, 64, %o1
+       prefetch [%o0 + (24 * 64)], 1
+       add     %o0, 64, %o0
+       cmp     %o2, 64 + 8
+       bgu,pt  %xcc, 2b
+        prefetch [%o0 + (12 * 64)], 1
+       ldd     [%o0 + (1 * 8)], %f2
+       fsrc1   %f12, %f44
+       ldd     [%o0 + (2 * 8)], %f4
+       fsrc1   %f14, %f46
+       stda    %f32, [%o1] %asi
+       ldd     [%o0 + (3 * 8)], %f6
+       fsrc1   %f0, %f32
+       ldd     [%o0 + (4 * 8)], %f8
+       fsrc1   %f2, %f34
+       ldd     [%o0 + (5 * 8)], %f10
+       fsrc1   %f4, %f36
+       ldd     [%o0 + (6 * 8)], %f12
+       fsrc1   %f6, %f38
+       ldd     [%o0 + (7 * 8)], %f14
+       fsrc1   %f8, %f40
+       add     %o1, 64, %o1
+       fsrc1   %f10, %f42
+       fsrc1   %f12, %f44
+       fsrc1   %f14, %f46
+       stda    %f32, [%o1] %asi
+       membar  #Sync
+
+       retl
+        wr     %g0, 0, %fprs
+END(zeus_block_copy)
+
+/*
  * void spitfire_block_zero(void *dst, size_t len)
+ * void zeus_block_zero(void *dst, size_t len)
  */
+ALTENTRY(zeus_block_zero)
 ENTRY(spitfire_block_zero)
        rdpr    %pil, %o3
        wrpr    %g0, PIL_TICK, %pil
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to