Modified: trunk/Source/WebCore/ChangeLog (102701 => 102702)
--- trunk/Source/WebCore/ChangeLog 2011-12-13 21:27:15 UTC (rev 102701)
+++ trunk/Source/WebCore/ChangeLog 2011-12-13 21:42:37 UTC (rev 102702)
@@ -1,3 +1,22 @@
+2011-12-13 Xingnan Wang <[email protected]>
+
+ Implement a function of vector multiply with SSE2 optimization in VectorMath.cpp.
+ https://bugs.webkit.org/show_bug.cgi?id=74048
+
+ Reviewed by Benjamin Poulain.
+
+ The vmul is a function for an element-by-element multiply of two float vectors and we
+ get about 3.4x performance improvement with SSE2 optimization compared with the common
+ multiply.
+
+ Use vmul in AudioBus::copyWithSampleAccurateGainValuesFrom().
+
+ * platform/audio/AudioBus.cpp:
+ (WebCore::AudioBus::copyWithSampleAccurateGainValuesFrom):
+ * platform/audio/VectorMath.cpp:
+ (WebCore::VectorMath::vmul):
+ * platform/audio/VectorMath.h:
+
2011-12-13 Vsevolod Vlasov <[email protected]>
Web Inspector: [Regression] ResourceHeadersView sections should be expanded by default.
Modified: trunk/Source/WebCore/platform/audio/AudioBus.cpp (102701 => 102702)
--- trunk/Source/WebCore/platform/audio/AudioBus.cpp 2011-12-13 21:27:15 UTC (rev 102701)
+++ trunk/Source/WebCore/platform/audio/AudioBus.cpp 2011-12-13 21:42:37 UTC (rev 102702)
@@ -382,15 +382,13 @@
return;
}
- // FIXME: this can potentially use SIMD optimizations with vector libraries.
// We handle both the 1 -> N and N -> N case here.
const float* source = sourceBus.channel(0)->data();
for (unsigned channelIndex = 0; channelIndex < numberOfChannels(); ++channelIndex) {
if (sourceBus.numberOfChannels() == numberOfChannels())
source = sourceBus.channel(channelIndex)->data();
float* destination = channel(channelIndex)->data();
- for (unsigned i = 0; i < numberOfGainValues; ++i)
- destination[i] = source[i] * gainValues[i];
+ vmul(source, 1, gainValues, 1, destination, 1, numberOfGainValues);
}
}
Modified: trunk/Source/WebCore/platform/audio/VectorMath.cpp (102701 => 102702)
--- trunk/Source/WebCore/platform/audio/VectorMath.cpp 2011-12-13 21:27:15 UTC (rev 102701)
+++ trunk/Source/WebCore/platform/audio/VectorMath.cpp 2011-12-13 21:42:37 UTC (rev 102702)
@@ -63,8 +63,17 @@
#endif
}
+void vmul(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)
+{
+#if defined(__ppc__) || defined(__i386__)
+ ::vmul(source1P, sourceStride1, source2P, sourceStride2, destP, destStride, framesToProcess);
#else
+ vDSP_vmul(source1P, sourceStride1, source2P, sourceStride2, destP, destStride, framesToProcess);
+#endif
+}
+#else
+
void vsmul(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)
{
#ifdef __SSE2__
@@ -229,6 +238,66 @@
#endif
}
+void vmul(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)
+{
+
+ int n = framesToProcess;
+
+#ifdef __SSE2__
+ if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) {
+
+ // If the source1P address is not 16-byte aligned, the first several frames (at most three) should be processed seperately.
+ while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) {
+ *destP = *source1P * *source2P;
+ source1P++;
+ source2P++;
+ destP++;
+ n--;
+ }
+
+ // Now the source1P address aligned and start to apply SSE.
+ int tailFrames = n % 4;
+ float* endP = destP + n - tailFrames;
+ __m128 pSource1;
+ __m128 pSource2;
+ __m128 dest;
+
+ bool source2Aligned = !(reinterpret_cast<uintptr_t>(source2P) & 0x0F);
+ bool destAligned = !(reinterpret_cast<uintptr_t>(destP) & 0x0F);
+
+#define SSE2_MULT(loadInstr, storeInstr) \
+ while (destP < endP) \
+ { \
+ pSource1 = _mm_load_ps(source1P); \
+ pSource2 = _mm_##loadInstr##_ps(source2P); \
+ dest = _mm_mul_ps(pSource1, pSource2); \
+ _mm_##storeInstr##_ps(destP, dest); \
+ source1P += 4; \
+ source2P += 4; \
+ destP += 4; \
+ }
+
+ if (source2Aligned && destAligned) // Both aligned.
+ SSE2_MULT(load, store)
+ else if (source2Aligned && !destAligned) // Source2 is aligned but dest not.
+ SSE2_MULT(load, storeu)
+ else if (!source2Aligned && destAligned) // Dest is aligned but source2 not.
+ SSE2_MULT(loadu, store)
+ else // Neither aligned.
+ SSE2_MULT(loadu, storeu)
+
+ n = tailFrames;
+ }
+#endif
+ while (n) {
+ *destP = *source1P * *source2P;
+ source1P += sourceStride1;
+ source2P += sourceStride2;
+ destP += destStride;
+ n--;
+ }
+}
+
#endif // OS(DARWIN)
} // namespace VectorMath
Modified: trunk/Source/WebCore/platform/audio/VectorMath.h (102701 => 102702)
--- trunk/Source/WebCore/platform/audio/VectorMath.h 2011-12-13 21:27:15 UTC (rev 102701)
+++ trunk/Source/WebCore/platform/audio/VectorMath.h 2011-12-13 21:42:37 UTC (rev 102702)
@@ -34,6 +34,9 @@
void vsmul(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess);
void vadd(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess);
+// For an element-by-element multiply of two float vectors.
+void vmul(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess);
+
} // namespace VectorMath
} // namespace WebCore