.gitignore                            |    7 
 Makefile.win32                        |   14 
 Makefile.win32.common                 |    4 
 TODO                                  |  271 ---
 configure.ac                          |   45 
 demos/Makefile.am                     |    8 
 demos/composite-test.c                |    2 
 demos/gtk-utils.c                     |   51 
 demos/gtk-utils.h                     |    1 
 demos/radial-test.c                   |    2 
 demos/srgb-test.c                     |   87 +
 demos/srgb-trap-test.c                |  119 +
 pixman-1.pc.in                        |    4 
 pixman/Makefile.am                    |   47 
 pixman/Makefile.sources               |   26 
 pixman/Makefile.win32                 |    2 
 pixman/loongson-mmintrin.h            |  137 +
 pixman/make-combine.pl                |   86 -
 pixman/make-srgb.pl                   |  115 +
 pixman/pixman-access.c                |  714 ++++++---
 pixman/pixman-arm-common.h            |   24 
 pixman/pixman-arm-neon-asm-bilinear.S |  119 -
 pixman/pixman-arm-neon-asm.S          |  159 +-
 pixman/pixman-arm-neon-asm.h          |   45 
 pixman/pixman-arm-neon.c              |   89 -
 pixman/pixman-arm-simd-asm.S          |   47 
 pixman/pixman-arm.c                   |  225 +++
 pixman/pixman-bits-image.c            |  377 ++---
 pixman/pixman-combine-float.c         | 1010 +++++++++++++
 pixman/pixman-combine.c.template      | 2461 ----------------------------------
 pixman/pixman-combine.h.template      |  226 ---
 pixman/pixman-combine32.c             | 2460 +++++++++++++++++++++++++++++++++
 pixman/pixman-combine32.h             |  225 +++
 pixman/pixman-compiler.h              |    4 
 pixman/pixman-conical-gradient.c      |    7 
 pixman/pixman-cpu.c                   |  815 -----------
 pixman/pixman-fast-path.c             |   76 -
 pixman/pixman-general.c               |   70 
 pixman/pixman-glyph.c                 |  676 +++++++++
 pixman/pixman-image.c                 |   40 
 pixman/pixman-implementation.c        |  339 +++-
 pixman/pixman-inlines.h               |   98 -
 pixman/pixman-linear-gradient.c       |   11 
 pixman/pixman-matrix.c                |    6 
 pixman/pixman-mips-dspr2-asm.S        | 1916 ++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2-asm.h        |  112 +
 pixman/pixman-mips-dspr2.c            |  226 ++-
 pixman/pixman-mips-dspr2.h            |  184 ++
 pixman/pixman-mips.c                  |   94 +
 pixman/pixman-mmx.c                   |  741 ++++++++--
 pixman/pixman-noop.c                  |   34 
 pixman/pixman-ppc.c                   |  155 ++
 pixman/pixman-private.h               |  236 ++-
 pixman/pixman-radial-gradient.c       |    9 
 pixman/pixman-region.c                |   69 
 pixman/pixman-solid-fill.c            |   29 
 pixman/pixman-sse2.c                  |  372 +++--
 pixman/pixman-trap.c                  |  142 +
 pixman/pixman-utils.c                 |  297 +---
 pixman/pixman-x86.c                   |  237 +++
 pixman/pixman.c                       |   56 
 pixman/pixman.h                       |   86 +
 test/Makefile.sources                 |    4 
 test/Makefile.win32                   |   10 
 test/affine-test.c                    |   28 
 test/blitters-test.c                  |   35 
 test/combiner-test.c                  |  151 ++
 test/composite-traps-test.c           |    2 
 test/composite.c                      |   88 -
 test/fuzzer-find-diff.pl              |    7 
 test/glyph-test.c                     |  338 ++++
 test/gradient-crash-test.c            |    2 
 test/infinite-loop.c                  |   39 
 test/lowlevel-blt-bench.c             |   64 
 test/pdf-op-test.c                    |    2 
 test/rotate-test.c                    |  113 +
 test/scaling-crash-test.c             |    7 
 test/scaling-test.c                   |   46 
 test/stress-test.c                    |   95 -
 test/utils.c                          |  153 +-
 test/utils.h                          |   15 
 81 files changed, 11802 insertions(+), 5743 deletions(-)

New commits:
commit 8a2ff3e0ef0449921d962f8b9c093c2353ffd945
Author: Søren Sandmann Pedersen <s...@redhat.com>
Date:   Wed Nov 7 13:40:34 2012 -0500

    Pre-release version bump to 0.28.0

diff --git a/configure.ac b/configure.ac
index 44ae5bf..65f7162 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,8 +53,8 @@ AC_PREREQ([2.57])
 #
 
 m4_define([pixman_major], 0)
-m4_define([pixman_minor], 27)
-m4_define([pixman_micro], 5)
+m4_define([pixman_minor], 28)
+m4_define([pixman_micro], 0)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit 4b91f6ca72db3e8cbd7e97e9ef44be2f8994040d
Author: Søren Sandmann Pedersen <s...@redhat.com>
Date:   Thu Oct 25 10:42:26 2012 -0400

    Post-release version bump to 0.27.5

diff --git a/configure.ac b/configure.ac
index 1132bcd..44ae5bf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 27)
-m4_define([pixman_micro], 4)
+m4_define([pixman_micro], 5)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit 0de3f3344908757b61f9f51b59d4a39f7447451b
Author: Søren Sandmann Pedersen <s...@redhat.com>
Date:   Thu Oct 25 10:35:27 2012 -0400

    Pre-release version bump to 0.27.4

diff --git a/configure.ac b/configure.ac
index f9c0e02..1132bcd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,7 +54,7 @@ AC_PREREQ([2.57])
 
 m4_define([pixman_major], 0)
 m4_define([pixman_minor], 27)
-m4_define([pixman_micro], 3)
+m4_define([pixman_micro], 4)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
 

commit f0750258459580bbc9f136710f8e5c551bd01a0f
Author: Nemanja Lukic <nemanja.lu...@rt-rk.com>
Date:   Sun Oct 14 11:58:52 2012 +0200

    MIPS: DSPr2: Added more fast-paths for ADD operation: - add_8888_8888_8888 
- add_8_8 - add_8888_8888
    
    Performance numbers before/after on MIPS-74kc @ 1GHz:
    
    lowlevel-blt-bench results
    
    Referent (before):
            add_8888_8888_8888 =  L1:  17.55  L2:  13.35  M:  8.13 ( 93.95%)  
HT:  6.60  VT:  6.64  R:  6.45  RT:  3.47 (  26Kops/s)
            add_8_8            =  L1:  86.07  L2:  84.89  M: 62.36 ( 90.11%)  
HT: 36.36  VT: 34.74  R: 29.56  RT: 11.56 (  52Kops/s)
            add_8888_8888      =  L1:  95.59  L2:  73.05  M: 17.62 (101.84%)  
HT: 15.46  VT: 15.01  R: 13.94  RT:  6.71 (  42Kops/s)
    
    Optimized:
            add_8888_8888_8888 =  L1:  41.52  L2:  33.21  M: 11.97 (138.45%)  
HT: 10.47  VT: 10.19  R:  9.42  RT:  4.86 (  32Kops/s)
            add_8_8            =  L1: 135.06  L2: 104.82  M: 57.13 ( 82.58%)  
HT: 34.79  VT: 36.60  R: 28.28  RT: 10.54 (  51Kops/s)
            add_8888_8888      =  L1: 176.36  L2:  67.82  M: 17.48 (101.06%)  
HT: 15.16  VT: 14.62  R: 13.88  RT:  8.05 (  45Kops/s)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 7c8ca30..b5cae16 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -1631,6 +1631,208 @@ 
LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
 
 END(pixman_composite_add_8888_n_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (a8r8g8b8)
+ * a2 - mask (a8r8g8b8)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, s0, s1, s2
+    li       t4, 0x00ff00ff
+    beqz     a3, 3f
+     nop
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
+    lw       t2, 0(a2) /* t2 = mask        (a8r8g8b8) */
+    lw       t3, 4(a2) /* t3 = mask        (a8r8g8b8) */
+    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
+    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
+    addiu    a1, a1, 8
+    addiu    a2, a2, 8
+    srl      t2, t2, 24
+    srl      t3, t3, 24
+
+    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
+                                       t2, t3, \
+                                       t5, t6, \
+                                       t7, t8, \
+                                       t4, t9, s0, s1, s2, t0, t1
+
+    sw       t7, 0(a0)
+    sw       t8, 4(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a3, 3f
+     nop
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lw       t1, 0(a2) /* t1 = mask        (a8r8g8b8) */
+    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+    srl      t1, t1, 24
+
+    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
+
+    sw       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+    j        ra
+     nop
+
+END(pixman_composite_add_8888_8888_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
+/*
+ * a0 - dst  (a8)
+ * a1 - src  (a8)
+ * a2 - w
+ */
+
+    beqz              a2, 3f
+     nop
+    srl               t9, a2, 2   /* t9 = how many multiples of 4 dst pixels */
+    beqz              t9, 1f      /* branch if less than 4 src pixels */
+     nop
+
+0:
+    beqz              t9, 1f
+     addiu            t9, t9, -1
+    lbu               t0, 0(a1)
+    lbu               t1, 1(a1)
+    lbu               t2, 2(a1)
+    lbu               t3, 3(a1)
+    lbu               t4, 0(a0)
+    lbu               t5, 1(a0)
+    lbu               t6, 2(a0)
+    lbu               t7, 3(a0)
+
+    addiu             a1, a1, 4
+
+    precr_sra.ph.w    t1, t0, 0
+    precr_sra.ph.w    t3, t2, 0
+    precr_sra.ph.w    t5, t4, 0
+    precr_sra.ph.w    t7, t6, 0
+
+    precr.qb.ph       t0, t3, t1
+    precr.qb.ph       t1, t7, t5
+
+    addu_s.qb         t2, t0, t1
+
+    sb                t2, 0(a0)
+    srl               t2, t2, 8
+    sb                t2, 1(a0)
+    srl               t2, t2, 8
+    sb                t2, 2(a0)
+    srl               t2, t2, 8
+    sb                t2, 3(a0)
+    addiu             a2, a2, -4
+    b                 0b
+     addiu            a0, a0, 4
+
+1:
+    beqz              a2, 3f
+     nop
+2:
+    lbu               t0, 0(a1)
+    lbu               t1, 0(a0)
+    addiu             a1, a1, 1
+
+    addu_s.qb         t2, t0, t1
+    sb                t2, 0(a0)
+    addiu             a2, a2, -1
+    bnez              a2, 2b
+     addiu            a0, a0, 1
+
+3:
+    j                 ra
+     nop
+
+END(pixman_composite_add_8_8_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+    beqz         a2, 4f
+     nop
+
+    srl          t9, a2, 2      /* t1 = how many multiples of 4 src pixels */
+    beqz         t9, 3f         /* branch if less than 4 src pixels */
+     nop
+1:
+    addiu        t9, t9, -1
+    beqz         t9, 2f
+     addiu       a2, a2, -4
+
+    lw           t0, 0(a1)
+    lw           t1, 4(a1)
+    lw           t2, 8(a1)
+    lw           t3, 12(a1)
+    lw           t4, 0(a0)
+    lw           t5, 4(a0)
+    lw           t6, 8(a0)
+    lw           t7, 12(a0)
+    addiu        a1, a1, 16
+
+    addu_s.qb    t4, t4, t0
+    addu_s.qb    t5, t5, t1
+    addu_s.qb    t6, t6, t2
+    addu_s.qb    t7, t7, t3
+
+    sw           t4, 0(a0)
+    sw           t5, 4(a0)
+    sw           t6, 8(a0)
+    sw           t7, 12(a0)
+    b            1b
+     addiu       a0, a0, 16
+2:
+    lw           t0, 0(a1)
+    lw           t1, 4(a1)
+    lw           t2, 8(a1)
+    lw           t3, 12(a1)
+    lw           t4, 0(a0)
+    lw           t5, 4(a0)
+    lw           t6, 8(a0)
+    lw           t7, 12(a0)
+    addiu        a1, a1, 16
+
+    addu_s.qb    t4, t4, t0
+    addu_s.qb    t5, t5, t1
+    addu_s.qb    t6, t6, t2
+    addu_s.qb    t7, t7, t3
+
+    sw           t4, 0(a0)
+    sw           t5, 4(a0)
+    sw           t6, 8(a0)
+    sw           t7, 12(a0)
+
+    beqz         a2, 4f
+     addiu       a0, a0, 16
+3:
+    lw           t0, 0(a1)
+    lw           t1, 0(a0)
+    addiu        a1, a1, 4
+    addiu        a2, a2, -1
+    addu_s.qb    t1, t1, t0
+    sw           t1, 0(a0)
+    bnez         a2, 3b
+     addiu       a0, a0, 4
+4:
+    jr           ra
+     nop
+
+END(pixman_composite_add_8888_8888_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
 /*
  * a0     - *dst
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 1471750..9da636d 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -50,6 +50,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, 
src_0888_0888,
                                     uint8_t, 3, uint8_t, 3)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888,
                                     uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
+                                    uint8_t, 1, uint8_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
+                                    uint32_t, 1, uint32_t, 1)
 
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
                                        uint32_t, 1, uint32_t, 1)
@@ -77,6 +81,8 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t,  
1,
                                          uint8_t,  1, uint8_t,  1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
                                          uint8_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8888_8888, uint32_t, 1,
+                                         uint32_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_0565_8_0565, uint16_t, 1,
                                          uint8_t,  1, uint16_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1,
@@ -291,8 +297,12 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   
mips_composite_add_0565_8_0565),
     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, 
mips_composite_add_8888_8_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, 
mips_composite_add_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, 
mips_composite_add_8888_8888_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, 
mips_composite_add_8888_n_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, 
mips_composite_add_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       
mips_composite_add_8_8),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, 
mips_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, 
mips_composite_add_8888_8888),
 
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),

commit ca83717c63813b6f53f89dd94b5771bd32382a18
Author: Nemanja Lukic <nemanja.lu...@rt-rk.com>
Date:   Sun Oct 14 11:58:51 2012 +0200

    MIPS: DSPr2: Added more fast-paths for ADD operation: - add_0565_8_0565 - 
add_8888_8_8888 - add_8888_n_8888
    
    Performance numbers before/after on MIPS-74kc @ 1GHz:
    
    lowlevel-blt-bench results
    
    Referent (before):
            add_0565_8_0565 =  L1:   8.89  L2:   8.37  M:  7.35 ( 29.22%)  HT:  
5.90  VT:  5.85  R:  5.67  RT:  3.31 (  26Kops/s)
            add_8888_8_8888 =  L1:  17.22  L2:  14.17  M:  9.89 ( 65.56%)  HT:  
7.57  VT:  7.50  R:  7.36  RT:  4.10 (  30Kops/s)
            add_8888_n_8888 =  L1:  17.79  L2:  14.87  M: 10.35 ( 54.89%)  HT:  
5.19  VT:  4.93  R:  4.92  RT:  1.90 (  19Kops/s)
    
    Optimized:
            add_0565_8_0565 =  L1:  21.72  L2:  20.01  M: 14.96 ( 59.54%)  HT: 
12.03  VT: 11.81  R: 11.26  RT:  6.33 (  37Kops/s)
            add_8888_8_8888 =  L1:  47.42  L2:  38.64  M: 15.90 (105.48%)  HT: 
13.34  VT: 13.03  R: 11.84  RT:  6.63 (  38Kops/s)
            add_8888_n_8888 =  L1:  54.83  L2:  42.66  M: 17.36 ( 92.11%)  HT: 
15.20  VT: 14.82  R: 13.66  RT:  7.83 (  41Kops/s)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 614c628..7c8ca30 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -1461,6 +1461,176 @@ LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
 
 END(pixman_composite_add_n_8_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
+/*
+ * a0 - dst  (r5g6b5)
+ * a1 - src  (r5g6b5)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
+    li       t4, 0xf800f800
+    li       t5, 0x07e007e0
+    li       t6, 0x001F001F
+    li       t7, 0x00ff00ff
+    beqz     a3, 3f
+     nop
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
+    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
+    lbu      t2, 0(a2) /* t2 = mask        (a8) */
+    lbu      t3, 1(a2) /* t3 = mask        (a8) */
+    lhu      t8, 0(a0) /* t8 = destination (r5g6b5) */
+    lhu      t9, 2(a0) /* t9 = destination (r5g6b5) */
+    addiu    a1, a1, 4
+    addiu    a2, a2, 2
+
+    CONVERT_2x0565_TO_2x8888  t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
+    CONVERT_2x0565_TO_2x8888  t8, t9, s2, s3, t5, t6, s4, s5, s6, s7
+    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4  s0, s1, \
+                                        t2, t3, \
+                                        s2, s3, \
+                                        t0, t1, \
+                                        t7, s4, s5, s6, s7, t8, t9
+    CONVERT_2x8888_TO_2x0565  t0, t1, s0, s1, t4, t5, t6, s2, s3
+
+    sh       s0, 0(a0)
+    sh       s1, 2(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 4
+2:
+    beqz     a3, 3f
+     nop
+    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
+    lbu      t1, 0(a2) /* t1 = mask        (a8) */
+    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
+
+    CONVERT_1x0565_TO_1x8888  t0, t3, t4, t5
+    CONVERT_1x0565_TO_1x8888  t2, t4, t5, t6
+    MIPS_UN8x4_MUL_UN8_ADD_UN8x4  t3, t1, t4, t0, t7, t2, t5, t6
+    CONVERT_1x8888_TO_1x0565  t0, t3, t4, t5
+
+    sh       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
+    j        ra
+     nop
+
+END(pixman_composite_add_0565_8_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (a8r8g8b8)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, s0, s1, s2
+    li       t4, 0x00ff00ff
+    beqz     a3, 3f
+     nop
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
+    lbu      t2, 0(a2) /* t2 = mask        (a8) */
+    lbu      t3, 1(a2) /* t3 = mask        (a8) */
+    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
+    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
+    addiu    a1, a1, 8
+    addiu    a2, a2, 2
+
+    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
+                                       t2, t3, \
+                                       t5, t6, \
+                                       t7, t8, \
+                                       t4, t9, s0, s1, s2, t0, t1
+
+    sw       t7, 0(a0)
+    sw       t8, 4(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a3, 3f
+     nop
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lbu      t1, 0(a2) /* t1 = mask        (a8) */
+    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+
+    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
+
+    sw       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+    j        ra
+     nop
+
+END(pixman_composite_add_8888_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (a8r8g8b8)
+ * a2 - mask (32bit constant)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, s0, s1, s2
+    li       t4, 0x00ff00ff
+    beqz     a3, 3f
+     nop
+    srl      a2, a2, 24
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
+                       /* a2 = mask        (32bit constant) */
+    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+    addiu    a1, a1, 8
+
+    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
+                                       a2, a2, \
+                                       t2, t3, \
+                                       t5, t6, \
+                                       t4, t7, t8, t9, s0, s1, s2
+
+    sw       t5, 0(a0)
+    sw       t6, 4(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a3, 3f
+     nop
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+                       /* a2 = mask        (32bit constant) */
+    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+
+    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7
+
+    sw       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+    j        ra
+     nop
+
+END(pixman_composite_add_8888_n_8888_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
 /*
  * a0     - *dst
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 30d2a85..1471750 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -70,9 +70,15 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, 
over_8888_n_0565,
                                       uint32_t, 1, uint16_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
                                       uint16_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888,
+                                      uint32_t, 1, uint32_t, 1)
 
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t,  1,
                                          uint8_t,  1, uint8_t,  1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
+                                         uint8_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_0565_8_0565, uint16_t, 1,
+                                         uint8_t,  1, uint16_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1,
                                          uint8_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_0565, uint32_t, 1,
@@ -281,6 +287,12 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, 
mips_composite_add_n_8_8888),
     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, 
mips_composite_add_n_8_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       
mips_composite_add_8_8_8),
+    PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   
mips_composite_add_0565_8_0565),
+    PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   
mips_composite_add_0565_8_0565),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, 
mips_composite_add_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, 
mips_composite_add_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, 
mips_composite_add_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, 
mips_composite_add_8888_n_8888),
 
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),

commit 52d20e692ebc605077448ab6f52fd257f83481b2
Author: Nemanja Lukic <nemanja.lu...@rt-rk.com>
Date:   Sun Oct 14 11:58:50 2012 +0200

    MIPS: DSPr2: Added fast-paths for ADD operation: - add_n_8_8 - add_n_8_8888 
- add_8_8_8
    
    Performance numbers before/after on MIPS-74kc @ 1GHz:
    
    lowlevel-blt-bench results
    
    Referent (before):
            add_n_8_8    =  L1:  41.37  L2:  37.83  M: 30.38 ( 60.45%)  HT: 
23.70  VT: 22.85  R: 21.51  RT: 10.32 (  45Kops/s)
            add_n_8_8888 =  L1:  16.01  L2:  14.46  M: 11.64 ( 46.32%)  HT:  
5.50  VT:  5.18  R:  5.06  RT:  1.89 (  18Kops/s)
            add_8_8_8    =  L1:  13.26  L2:  12.47  M: 11.16 ( 29.61%)  HT:  
8.09  VT:  8.04  R:  7.68  RT:  3.90 (  29Kops/s)
    
    Optimized:
            add_n_8_8    =  L1:  96.03  L2:  79.37  M: 51.89 (103.31%)  HT: 
32.59  VT: 31.29  R: 28.52  RT: 11.08 (  46Kops/s)
            add_n_8_8888 =  L1:  53.61  L2:  46.92  M: 23.78 ( 94.70%)  HT: 
19.06  VT: 18.64  R: 17.30  RT:  9.15 (  43Kops/s)
            add_8_8_8    =  L1:  89.65  L2:  66.82  M: 37.10 ( 98.48%)  HT: 
22.10  VT: 21.74  R: 20.12  RT:  8.12 (  41Kops/s)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 3a6b26a..614c628 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -1209,6 +1209,258 @@ 
LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
 
 END(pixman_composite_over_8888_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
+/*
+ * a0 - dst  (a8)
+ * a1 - src  (a8)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, v0, v1
+    li                t9, 0x00ff00ff
+    beqz              a3, 3f
+     nop
+
+    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
+    beqz              v0, 1f      /* branch if less than 4 src pixels */
+     nop
+
+0:
+    beqz              v0, 1f
+     addiu            v0, v0, -1
+    lbu               t0, 0(a2)
+    lbu               t1, 1(a2)
+    lbu               t2, 2(a2)
+    lbu               t3, 3(a2)
+    lbu               t4, 0(a0)
+    lbu               t5, 1(a0)
+    lbu               t6, 2(a0)
+    lbu               t7, 3(a0)
+
+    addiu             a2, a2, 4
+
+    precr_sra.ph.w    t1, t0, 0
+    precr_sra.ph.w    t3, t2, 0
+    precr_sra.ph.w    t5, t4, 0
+    precr_sra.ph.w    t7, t6, 0
+
+    precr.qb.ph       t0, t3, t1
+    precr.qb.ph       t1, t7, t5
+
+    lbu               t4, 0(a1)
+    lbu               v1, 1(a1)
+    lbu               t7, 2(a1)
+    lbu               t8, 3(a1)
+
+    addiu             a1, a1, 4
+
+    precr_sra.ph.w    v1, t4, 0
+    precr_sra.ph.w    t8, t7, 0
+
+    muleu_s.ph.qbl    t2, t0, t8
+    muleu_s.ph.qbr    t3, t0, v1
+    shra_r.ph         t4, t2, 8
+    shra_r.ph         t5, t3, 8
+    and               t4, t4, t9
+    and               t5, t5, t9
+    addq.ph           t2, t2, t4
+    addq.ph           t3, t3, t5
+    shra_r.ph         t2, t2, 8
+    shra_r.ph         t3, t3, 8
+    precr.qb.ph       t0, t2, t3
+
+    addu_s.qb         t2, t0, t1
+
+    sb                t2, 0(a0)
+    srl               t2, t2, 8
+    sb                t2, 1(a0)
+    srl               t2, t2, 8
+    sb                t2, 2(a0)
+    srl               t2, t2, 8
+    sb                t2, 3(a0)
+    addiu             a3, a3, -4
+    b                 0b
+     addiu            a0, a0, 4
+
+1:
+    beqz              a3, 3f
+     nop
+2:
+    lbu               t8, 0(a1)
+    lbu               t0, 0(a2)
+    lbu               t1, 0(a0)
+    addiu             a1, a1, 1
+    addiu             a2, a2, 1
+
+    mul               t2, t0, t8
+    shra_r.ph         t3, t2, 8
+    andi              t3, t3, 0xff
+    addq.ph           t2, t2, t3
+    shra_r.ph         t2, t2, 8
+    andi              t2, t2, 0xff
+
+    addu_s.qb         t2, t2, t1
+    sb                t2, 0(a0)
+    addiu             a3, a3, -1
+    bnez              a3, 2b
+     addiu            a0, a0, 1
+
+3:
+    RESTORE_REGS_FROM_STACK 0, v0, v1
+    j                 ra
+     nop
+
+END(pixman_composite_add_8_8_8_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
+/*
+ * a0 - dst  (a8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, v0
+    li                t9, 0x00ff00ff
+    beqz              a3, 3f
+     nop
+
+    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
+    beqz              v0, 1f      /* branch if less than 4 src pixels */
+     nop
+
+    srl               t8, a1, 24
+    replv.ph          t8, t8
+
+0:
+    beqz              v0, 1f
+     addiu            v0, v0, -1
+    lbu               t0, 0(a2)
+    lbu               t1, 1(a2)
+    lbu               t2, 2(a2)
+    lbu               t3, 3(a2)
+    lbu               t4, 0(a0)
+    lbu               t5, 1(a0)
+    lbu               t6, 2(a0)
+    lbu               t7, 3(a0)
+
+    addiu             a2, a2, 4
+
+    precr_sra.ph.w    t1, t0, 0
+    precr_sra.ph.w    t3, t2, 0
+    precr_sra.ph.w    t5, t4, 0
+    precr_sra.ph.w    t7, t6, 0
+
+    precr.qb.ph       t0, t3, t1
+    precr.qb.ph       t1, t7, t5
+
+    muleu_s.ph.qbl    t2, t0, t8
+    muleu_s.ph.qbr    t3, t0, t8
+    shra_r.ph         t4, t2, 8
+    shra_r.ph         t5, t3, 8
+    and               t4, t4, t9
+    and               t5, t5, t9
+    addq.ph           t2, t2, t4
+    addq.ph           t3, t3, t5
+    shra_r.ph         t2, t2, 8
+    shra_r.ph         t3, t3, 8
+    precr.qb.ph       t0, t2, t3
+
+    addu_s.qb         t2, t0, t1
+
+    sb                t2, 0(a0)
+    srl               t2, t2, 8
+    sb                t2, 1(a0)
+    srl               t2, t2, 8
+    sb                t2, 2(a0)
+    srl               t2, t2, 8
+    sb                t2, 3(a0)
+    addiu             a3, a3, -4
+    b                 0b
+     addiu            a0, a0, 4
+
+1:
+    beqz              a3, 3f
+     nop
+    srl               t8, a1, 24
+2:
+    lbu               t0, 0(a2)
+    lbu               t1, 0(a0)
+    addiu             a2, a2, 1
+
+    mul               t2, t0, t8
+    shra_r.ph         t3, t2, 8
+    andi              t3, t3, 0xff
+    addq.ph           t2, t2, t3
+    shra_r.ph         t2, t2, 8
+    andi              t2, t2, 0xff
+
+    addu_s.qb         t2, t2, t1
+    sb                t2, 0(a0)
+    addiu             a3, a3, -1
+    bnez              a3, 2b
+     addiu            a0, a0, 1
+
+3:
+    RESTORE_REGS_FROM_STACK 0, v0
+    j                 ra
+     nop
+
+END(pixman_composite_add_n_8_8_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, s0, s1, s2
+    li       t4, 0x00ff00ff
+    beqz     a3, 3f
+     nop
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+                       /* a1 = source      (32bit constant) */
+    lbu      t0, 0(a2) /* t0 = mask        (a8) */
+    lbu      t1, 1(a2) /* t1 = mask        (a8) */
+    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+    addiu    a2, a2, 2
+
+    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \
+                                       t0, t1, \
+                                       t2, t3, \
+                                       t5, t6, \
+                                       t4, t7, t8, t9, s0, s1, s2
+
+    sw       t5, 0(a0)
+    sw       t6, 4(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a3, 3f
+     nop
+                       /* a1 = source      (32bit constant) */
+    lbu      t0, 0(a2) /* t0 = mask        (a8) */
+    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+
+    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6
+
+    sw       t2, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+    j        ra
+     nop
+
+END(pixman_composite_add_n_8_8888_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
 /*
  * a0     - *dst
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 7327dc6..b330c0f 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -600,6 +600,28 @@ LEAF_MIPS32R2(symbol)                                   \
     addu_s.qb          \out_8888, \out_8888, \d_8888
 .endm
 
+.macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888,   \
+                             s2_8888,   \
+                             m1_8,      \
+                             m2_8,      \
+                             d1_8888,   \
+                             d2_8888,   \
+                             out1_8888, \
+                             out2_8888, \
+                             maskLSR,   \
+                             scratch1,  scratch2, scratch3, \
+                             scratch4, scratch5, scratch6
+    MIPS_2xUN8x4_MUL_2xUN8 \s1_8888,   \s2_8888, \
+                           \m1_8,      \m2_8, \
+                           \out1_8888, \out2_8888, \
+                           \maskLSR, \
+                           \scratch1,  \scratch2, \scratch3, \
+                           \scratch4,  \scratch5, \scratch6
+
+    addu_s.qb             \out1_8888, \out1_8888, \d1_8888
+    addu_s.qb             \out2_8888, \out2_8888, \d2_8888
+.endm
+
 .macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br,         \
                                          scratch1, scratch2,     \
                                          alpha, red, green, blue \
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index e80bbb6..30d2a85 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -59,6 +59,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, 
over_n_8_8888,
                                        uint8_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
                                        uint8_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
 
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_8888,
                                       uint32_t, 1, uint32_t, 1)
@@ -67,6 +71,8 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, 
over_8888_n_0565,
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
                                       uint16_t, 1, uint16_t, 1)
 
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t,  1,
+                                         uint8_t,  1, uint8_t,  1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1,
                                          uint8_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_0565, uint32_t, 1,
@@ -271,6 +277,10 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, 
mips_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, 
mips_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, 
mips_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       
mips_composite_add_n_8_8),
+    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, 
mips_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, 
mips_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       
mips_composite_add_8_8_8),


-- 
To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org
with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org
Archive: http://lists.debian.org/e1tdkvz-0000hv...@vasks.debian.org

Reply via email to