Modified: trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp (134402 => 134403)
--- trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp 2012-11-13 13:26:44 UTC (rev 134402)
+++ trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp 2012-11-13 13:44:31 UTC (rev 134403)
@@ -526,6 +526,16 @@
void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
{
+#if HAVE(ARM_NEON_INTRINSICS)
+ unsigned tailPixels = pixelsPerRow % 8;
+ unsigned pixelSize = pixelsPerRow - tailPixels;
+
+ ARM::unpackOneRowOfRGBA5551ToRGBA8NEON(source, destination, pixelSize);
+
+ source += pixelSize;
+ destination += pixelSize * 4;
+ pixelsPerRow = tailPixels;
+#endif
for (unsigned int i = 0; i < pixelsPerRow; ++i) {
uint16_t packedValue = source[0];
uint8_t r = packedValue >> 11;
@@ -569,6 +579,16 @@
void unpackOneRowOfRGB565ToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
{
+#if HAVE(ARM_NEON_INTRINSICS)
+ unsigned tailPixels = pixelsPerRow % 8;
+ unsigned pixelSize = pixelsPerRow - tailPixels;
+
+ ARM::unpackOneRowOfRGB565ToRGBA8NEON(source, destination, pixelSize);
+
+ source += pixelSize;
+ destination += pixelSize * 4;
+ pixelsPerRow = tailPixels;
+#endif
for (unsigned int i = 0; i < pixelsPerRow; ++i) {
uint16_t packedValue = source[0];
uint8_t r = packedValue >> 11;
@@ -1014,6 +1034,17 @@
void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
{
+#if HAVE(ARM_NEON_INTRINSICS)
+ unsigned componentsPerRow = pixelsPerRow * 4;
+ unsigned tailComponents = componentsPerRow % 32;
+ unsigned componentsSize = componentsPerRow - tailComponents;
+
+ ARM::packOneRowOfRGBA8ToUnsignedShort5551NEON(source, destination, componentsSize);
+
+ source += componentsSize;
+ destination += componentsSize / 4;
+ pixelsPerRow = tailComponents / 4;
+#endif
for (unsigned int i = 0; i < pixelsPerRow; ++i) {
*destination = (((source[0] & 0xF8) << 8)
| ((source[1] & 0xF8) << 3)
@@ -1059,6 +1090,17 @@
void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
{
+#if HAVE(ARM_NEON_INTRINSICS)
+ unsigned componentsPerRow = pixelsPerRow * 4;
+ unsigned tailComponents = componentsPerRow % 32;
+ unsigned componentsSize = componentsPerRow - tailComponents;
+
+ ARM::packOneRowOfRGBA8ToUnsignedShort565NEON(source, destination, componentsSize);
+
+ source += componentsSize;
+ destination += componentsSize / 4;
+ pixelsPerRow = tailComponents / 4;
+#endif
for (unsigned int i = 0; i < pixelsPerRow; ++i) {
*destination = (((source[0] & 0xF8) << 8)
| ((source[1] & 0xFC) << 3)
Modified: trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h (134402 => 134403)
--- trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h 2012-11-13 13:26:44 UTC (rev 134402)
+++ trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h 2012-11-13 13:44:31 UTC (rev 134403)
@@ -36,14 +36,14 @@
ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8NEON(const uint16_t* source, uint8_t* destination, unsigned pixelSize)
{
- uint16x8_t constant = vdupq_n_u16(0x0F);
+ uint16x8_t immediate0x0f = vdupq_n_u16(0x0F);
for (unsigned i = 0; i < pixelSize; i += 8) {
uint16x8_t eightPixels = vld1q_u16(source + i);
uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12));
- uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8), constant));
- uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4), constant));
- uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, constant));
+ uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8), immediate0x0f));
+ uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4), immediate0x0f));
+ uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f));
componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR);
componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG);
@@ -59,14 +59,14 @@
ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444NEON(const uint8_t* source, uint16_t* destination, unsigned componentsSize)
{
uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
- uint8x8_t constant = vdup_n_u8(0xF0);
+ uint8x8_t immediate0xf0 = vdup_n_u8(0xF0);
for (unsigned i = 0; i < componentsSize; i += 32) {
uint8x8x4_t RGBA8 = vld4_u8(source + i);
- uint8x8_t componentR = vand_u8(RGBA8.val[0], constant);
- uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], constant), 4);
- uint8x8_t componentB = vand_u8(RGBA8.val[2], constant);
- uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], constant), 4);
+ uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0);
+ uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4);
+ uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0);
+ uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4);
uint8x8x2_t RGBA4;
RGBA4.val[0] = vorr_u8(componentB, componentA);
@@ -76,6 +76,104 @@
}
}
+ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8NEON(const uint16_t* source, uint8_t* destination, unsigned pixelSize)
+{
+ uint8x8_t immediate0x7 = vdup_n_u8(0x7);
+ uint8x8_t immediate0xff = vdup_n_u8(0xFF);
+ uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);
+ uint16x8_t immediate0x1 = vdupq_n_u16(0x1);
+
+ for (unsigned i = 0; i < pixelSize; i += 8) {
+ uint16x8_t eightPixels = vld1q_u16(source + i);
+
+ uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11));
+ uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 6), immediate0x1f));
+ uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 1), immediate0x1f));
+ uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x1));
+
+ componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7));
+ componentG = vorr_u8(vshl_n_u8(componentG, 3), vand_u8(componentG, immediate0x7));
+ componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7));
+ componentA = vmul_u8(componentA, immediate0xff);
+
+ uint8x8x4_t destComponents = {componentR, componentG, componentB, componentA};
+ vst4_u8(destination, destComponents);
+ destination += 32;
+ }
+}
+
+ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551NEON(const uint8_t* source, uint16_t* destination, unsigned componentsSize)
+{
+ uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
+
+ uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
+ uint8x8_t immediate0x18 = vdup_n_u8(0x18);
+ for (unsigned i = 0; i < componentsSize; i += 32) {
+ uint8x8x4_t RGBA8 = vld4_u8(source + i);
+
+ uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8);
+ uint8x8_t componentG3bit = vshr_n_u8(RGBA8.val[1], 5);
+
+ uint8x8_t componentG2bit = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x18), 3);
+ uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 2);
+ uint8x8_t componentA = vshr_n_u8(RGBA8.val[3], 7);
+
+ uint8x8x2_t RGBA5551;
+ RGBA5551.val[0] = vorr_u8(vorr_u8(componentG2bit, componentB), componentA);
+ RGBA5551.val[1] = vorr_u8(componentR, componentG3bit);
+ vst2_u8(dst, RGBA5551);
+ dst += 16;
+ }
+}
+
+ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8NEON(const uint16_t* source, uint8_t* destination, unsigned pixelSize)
+{
+ uint16x8_t immediate0x3f = vdupq_n_u16(0x3F);
+ uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);
+ uint8x8_t immediate0x3 = vdup_n_u8(0x3);
+ uint8x8_t immediate0x7 = vdup_n_u8(0x7);
+
+ uint8x8_t componentA = vdup_n_u8(0xFF);
+
+ for (unsigned i = 0; i < pixelSize; i += 8) {
+ uint16x8_t eightPixels = vld1q_u16(source + i);
+
+ uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11));
+ uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 5), immediate0x3f));
+ uint8x8_t componentB = vqmovn_u16(vandq_u16(eightPixels, immediate0x1f));
+
+ componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immediate0x7));
+ componentG = vorr_u8(vshl_n_u8(componentG, 2), vand_u8(componentG, immediate0x3));
+ componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immediate0x7));
+
+ uint8x8x4_t destComponents = {componentR, componentG, componentB, componentA};
+ vst4_u8(destination, destComponents);
+ destination += 32;
+ }
+}
+
+ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565NEON(const uint8_t* source, uint16_t* destination, unsigned componentsSize)
+{
+ uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
+
+ uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
+ uint8x8_t immediate0x1c = vdup_n_u8(0x1C);
+ for (unsigned i = 0; i < componentsSize; i += 32) {
+ uint8x8x4_t RGBA8 = vld4_u8(source + i);
+
+ uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8);
+ uint8x8_t componentGLeft = vshr_n_u8(RGBA8.val[1], 5);
+ uint8x8_t componentGRight = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x1c), 3);
+ uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 3);
+
+ uint8x8x2_t RGB565;
+ RGB565.val[0] = vorr_u8(componentGRight, componentB);
+ RGB565.val[1] = vorr_u8(componentR, componentGLeft);
+ vst2_u8(dst, RGB565);
+ dst += 16;
+ }
+}
+
} // namespace ARM
} // namespace WebCore