include/tools/cpuid.hxx | 28 ++++++ sc/source/core/inc/arraysumfunctor.hxx | 141 +++++++++++++++++++++++++++++++++ sc/source/core/tool/interpr6.cxx | 71 +++++++++++----- tools/Library_tl.mk | 1 tools/source/misc/cpuid.cxx | 63 ++++++++++++++ 5 files changed, 282 insertions(+), 22 deletions(-)
New commits: commit 599aab361bd44635386728a09452f53342419926 Author: László Németh <laszlo.nem...@collabora.com> Date: Sun Nov 8 20:52:39 2015 +0100 Revert "Revert "Adapt FuncSum to vectorize better - potentially ..."" This reverts commit 369a3f9cfa6738e8cd02fb41726f536694618ead. Revert "Revert "invalid array index when pCurrent pointer is incremented"" This reverts commit 3395c3ed22519c62b091a5065e03862bda587f20. diff --git a/include/tools/cpuid.hxx b/include/tools/cpuid.hxx new file mode 100644 index 0000000..316e656 --- /dev/null +++ b/include/tools/cpuid.hxx @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#ifndef INCLUDED_TOOLS_CPUID_HXX +#define INCLUDED_TOOLS_CPUID_HXX + +#include <sal/config.h> +#include <tools/toolsdllapi.h> + +namespace tools +{ +namespace cpuid +{ + TOOLS_DLLPUBLIC bool hasSSE(); + TOOLS_DLLPUBLIC bool hasSSE2(); +} +} + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/inc/arraysumfunctor.hxx b/sc/source/core/inc/arraysumfunctor.hxx new file mode 100644 index 0000000..776c514 --- /dev/null +++ b/sc/source/core/inc/arraysumfunctor.hxx @@ -0,0 +1,141 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#ifndef INCLUDED_SC_SOURCE_CORE_INC_ARRAYSUMFUNCTOR_HXX +#define INCLUDED_SC_SOURCE_CORE_INC_ARRAYSUMFUNCTOR_HXX + +#include <emmintrin.h> +#include <tools/cpuid.hxx> + +namespace sc +{ + +template<typename T, unsigned int N> +inline bool isAligned(const T* pointer) +{ + return 0 == (uintptr_t(pointer) % N); +} + +struct ArraySumFunctor +{ +private: + const double* mpArray; + size_t mnSize; + +public: + ArraySumFunctor(const double* pArray, size_t nSize) + : mpArray(pArray) + , mnSize(nSize) + { + } + + double operator() () + { + static bool hasSSE2 = tools::cpuid::hasSSE2(); + + double fSum = 0.0; + size_t i = 0; + const double* pCurrent = mpArray; + + if (hasSSE2) + { + while (!isAligned<double, 16>(pCurrent)) + { + fSum += *pCurrent++; + i++; + } + fSum += executeSSE2(i, pCurrent); + } + else + fSum += executeUnrolled(i, pCurrent); + + // sum rest of the array + + for (; i < mnSize; ++i) + fSum += mpArray[i]; + + return fSum; + } + +private: + inline double executeSSE2(size_t& i, const double* pCurrent) const + { + double fSum = 0.0; + size_t nRealSize = mnSize - i; + size_t nUnrolledSize = nRealSize - (nRealSize % 8); + + if (nUnrolledSize > 0) + { + __m128d sum1 = _mm_setzero_pd(); + __m128d sum2 = _mm_setzero_pd(); + __m128d sum3 = _mm_setzero_pd(); + __m128d sum4 = _mm_setzero_pd(); + + for (; i < nUnrolledSize; i += 8) + { + __m128d load1 = _mm_load_pd(pCurrent); + sum1 = _mm_add_pd(sum1, load1); + pCurrent += 2; + + __m128d load2 = _mm_load_pd(pCurrent); + sum2 = _mm_add_pd(sum2, load2); + pCurrent += 2; + + __m128d load3 = _mm_load_pd(pCurrent); + sum3 = _mm_add_pd(sum3, load3); + pCurrent += 2; + + __m128d load4 = _mm_load_pd(pCurrent); + sum4 = _mm_add_pd(sum4, load4); + pCurrent += 2; + } + sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4)); + + double temp; + + _mm_storel_pd(&temp, sum1); + fSum += temp; + + _mm_storeh_pd(&temp, sum1); + fSum += temp; + } + return fSum; + } + + inline double executeUnrolled(size_t& i, const double* pCurrent) const + { + size_t nRealSize = mnSize - i; + size_t nUnrolledSize = nRealSize - (nRealSize % 4); + + if (nUnrolledSize > 0) + { + double sum0 = 0.0; + double sum1 = 0.0; + double sum2 = 0.0; + double sum3 = 0.0; + + for (; i < nUnrolledSize; i += 4) + { + sum0 += *pCurrent++; + sum1 += *pCurrent++; + sum2 += *pCurrent++; + sum3 += *pCurrent++; + } + return sum0 + sum1 + sum2 + sum3; + } + return 0.0; + } +}; + +} // end namespace sc + +#endif + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/sc/source/core/tool/interpr6.cxx b/sc/source/core/tool/interpr6.cxx index 694a403..c673fdb 100644 --- a/sc/source/core/tool/interpr6.cxx +++ b/sc/source/core/tool/interpr6.cxx @@ -26,8 +26,11 @@ #include "mtvcellfunc.hxx" #include "scmatrix.hxx" +#include "arraysumfunctor.hxx" + #include <formula/token.hxx> + using namespace formula; double const fHalfMachEps = 0.5 * ::std::numeric_limits<double>::epsilon(); @@ -203,6 +206,11 @@ double ScInterpreter::GetGammaDist( double fX, double fAlpha, double fLambda ) namespace { +// this is unpleasant - but ... we want raw access. +struct puncture_mdds_encap : public sc::numeric_block { + const double *getPtr(size_t nOffset) const { return &m_array[nOffset]; } +}; + class NumericCellAccumulator { double mfSum; @@ -211,32 +219,51 @@ class NumericCellAccumulator public: NumericCellAccumulator() : mfSum(0.0), mnError(0) {} - void operator() (size_t, double fVal) + void operator() (const sc::CellStoreType::value_type& rNode, size_t nOffset, size_t nDataSize) { - mfSum += fVal; - } + switch (rNode.type) + { + case sc::element_type_numeric: + { + const puncture_mdds_encap *pBlock = static_cast<const puncture_mdds_encap *>(rNode.data); + const double *p = pBlock->getPtr(nOffset); - void operator() (size_t, const ScFormulaCell* pCell) - { - if (mnError) - // Skip all the rest if we have an error. - return; + sc::ArraySumFunctor functor(p, nDataSize); - double fVal = 0.0; - sal_uInt16 nErr = 0; - ScFormulaCell& rCell = const_cast<ScFormulaCell&>(*pCell); - if (!rCell.GetErrorOrValue(nErr, fVal)) - // The cell has neither error nor value. Perhaps string result. - return; + mfSum += functor(); - if (nErr) - { - // Cell has error. - mnError = nErr; - return; - } + break; + } + + case sc::element_type_formula: + { + sc::formula_block::const_iterator it = sc::formula_block::begin(*rNode.data); + std::advance(it, nOffset); + sc::formula_block::const_iterator itEnd = it; + std::advance(itEnd, nDataSize); + for (; it != itEnd; ++it) + { + double fVal = 0.0; + sal_uInt16 nErr = 0; + ScFormulaCell& rCell = const_cast<ScFormulaCell&>(*(*it)); + if (!rCell.GetErrorOrValue(nErr, fVal)) + // The cell has neither error nor value. Perhaps string result. + continue; - mfSum += fVal; + if (nErr) + { + // Cell has error - skip all the rest + mnError = nErr; + return; + } + + mfSum += fVal; + } + } + break; + default: + ; + } } sal_uInt16 getError() const { return mnError; } @@ -335,7 +362,7 @@ public: return; NumericCellAccumulator aFunc; - maPos.miCellPos = sc::ParseFormulaNumeric(maPos.miCellPos, mpCol->GetCellStore(), nRow1, nRow2, aFunc); + maPos.miCellPos = sc::ParseBlock(maPos.miCellPos, mpCol->GetCellStore(), aFunc, nRow1, nRow2); mnError = aFunc.getError(); if (mnError) return; diff --git a/tools/Library_tl.mk b/tools/Library_tl.mk index 2d105cd..65ba17c 100644 --- a/tools/Library_tl.mk +++ b/tools/Library_tl.mk @@ -69,6 +69,7 @@ $(eval $(call gb_Library_add_exception_objects,tl,\ tools/source/memtools/multisel \ tools/source/memtools/unqidx \ tools/source/misc/appendunixshellword \ + tools/source/misc/cpuid \ tools/source/misc/extendapplicationenvironment \ tools/source/misc/getprocessworkingdir \ tools/source/misc/solarmutex \ diff --git a/tools/source/misc/cpuid.cxx b/tools/source/misc/cpuid.cxx new file mode 100644 index 0000000..1d0518c --- /dev/null +++ b/tools/source/misc/cpuid.cxx @@ -0,0 +1,63 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + */ + +#include <tools/cpuid.hxx> +#include <cstdint> + +namespace tools +{ +namespace cpuid +{ + +// First minimize to MSVC / GCC compat. compiler and x86 / x64 architecture +#if (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) + +namespace +{ +#if defined(_MSC_VER) +#include <intrin.h> +static void getCpuId(uint32_t array[4]) +{ + __cpuid((int*)array, 1); +} +#else +#include <cpuid.h> +static void getCpuId(uint32_t array[4]) +{ + __get_cpuid(1, array + 0, array + 1, array + 2, array + 3); +} +#endif +} + +bool hasSSE() +{ + uint32_t cpuInfoArray[] = {0, 0, 0, 0}; + getCpuId(cpuInfoArray); + return (cpuInfoArray[3] & (1 << 25)) != 0; +} +bool hasSSE2() +{ + uint32_t cpuInfoArray[] = {0, 0, 0, 0}; + getCpuId(cpuInfoArray); + return (cpuInfoArray[3] & (1 << 26)) != 0; +} + +#else + +bool hasSSE() { return false; } +bool hasSSE2() { return false; } + +#endif + +} +} + + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
_______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits